From b87a03cc0838e2b92ceccef3ac7f3a77287e4254 Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Thu, 1 Feb 2024 13:52:17 -0600 Subject: [PATCH 01/92] Track additional metrics with W&B in `megatron/training.py` --- megatron/training.py | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/megatron/training.py b/megatron/training.py index c7ac2574fb..29d2181d46 100644 --- a/megatron/training.py +++ b/megatron/training.py @@ -1039,7 +1039,8 @@ def training_log(loss_dict, total_loss_dict, learning_rate, iteration, tokens_per_gpu_per_second = tokens_per_sec / args.world_size tokens_per_gpu_per_second_per_replica = tokens_per_gpu_per_second / args.data_parallel_size if wandb is not None and getattr(wandb, 'run', None) is not None: - tput = { + assert wandb.run is not None + wandb_metrics = { 'throughput/iteration-time': elapsed_time_per_iteration, # 1000 ms / s 'throughput/samples_per_sec': samples_per_sec, 'throughput/samples_per_sec_per_replica': samples_per_sec_per_replica, @@ -1050,8 +1051,13 @@ def training_log(loss_dict, total_loss_dict, learning_rate, iteration, 'throughput/tflops': tflops, 'throughput/approx_params_in_billions': approx_parameters_in_billions, 'throughput/elapsed_ms_per_iteration': elapsed_time_per_iteration, + 'throughput/iteration': iteration, } - wandb.run.log(tput) + if loss_dict is not None: + wandb_metrics |= { + f'loss/{k}': v for k, v in loss_dict.items() + } + wandb_metrics |= {'loss/iteration': iteration} if writer: if args.log_timers_to_tensorboard: writer.add_scalar('iteration-time/iteration-time', @@ -1060,6 +1066,21 @@ def training_log(loss_dict, total_loss_dict, learning_rate, iteration, elapsed_time_per_iteration, args.consumed_train_samples) writer.add_scalar('iteration-time/iteration-time vs tokens', elapsed_time_per_iteration, args.consumed_train_tokens) + if wandb is not None and getattr(wandb, 'run', None) is not None: + wandb_metrics |= { + 'iteration': iteration, + 'iteration_time': elapsed_time_per_iteration, + 'iteration_time_vs_tokens': ( + (elapsed_time_per_iteration + / args.consumed_train_tokens) + ), + 'iteration_time_vs_samples': ( + (elapsed_time_per_iteration + / args.consumed_train_samples), + ), + } + if wandb is not None and getattr(wandb, 'run', None) is not None: + wandb.log(wandb_metrics) log_string = ' iteration {:8d}/{:8d} |'.format( iteration, args.train_iters) log_string += ' consumed samples: {:12d} |'.format( From 1302c61795e60c14de4ec0a5ab3ea1c3214cf45f Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Wed, 21 Feb 2024 08:27:08 -0600 Subject: [PATCH 02/92] Update `megatron/training.py` --- megatron/training.py | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/megatron/training.py b/megatron/training.py index 29d2181d46..ef32cd3856 100644 --- a/megatron/training.py +++ b/megatron/training.py @@ -1066,21 +1066,6 @@ def training_log(loss_dict, total_loss_dict, learning_rate, iteration, elapsed_time_per_iteration, args.consumed_train_samples) writer.add_scalar('iteration-time/iteration-time vs tokens', elapsed_time_per_iteration, args.consumed_train_tokens) - if wandb is not None and getattr(wandb, 'run', None) is not None: - wandb_metrics |= { - 'iteration': iteration, - 'iteration_time': elapsed_time_per_iteration, - 'iteration_time_vs_tokens': ( - (elapsed_time_per_iteration - / args.consumed_train_tokens) - ), - 'iteration_time_vs_samples': ( - (elapsed_time_per_iteration - / args.consumed_train_samples), - ), - } - if wandb is not None and getattr(wandb, 'run', None) is not None: - wandb.log(wandb_metrics) log_string = ' iteration {:8d}/{:8d} |'.format( iteration, args.train_iters) log_string += ' consumed samples: {:12d} |'.format( @@ -1091,6 +1076,21 @@ def training_log(loss_dict, total_loss_dict, learning_rate, iteration, elapsed_time_per_iteration * 1000.0) log_string += ' learning rate: {:.3E} |'.format(learning_rate) log_string += ' global batch size: {:5d} |'.format(batch_size) + if wandb is not None and getattr(wandb, 'run', None) is not None: + wandb_metrics |= { + 'training/iteration': iteration, + 'training/iteration_time': elapsed_time_per_iteration, + 'training/iteration_time_vs_tokens': ( + (elapsed_time_per_iteration + / args.consumed_train_tokens) + ), + 'training/iteration_time_vs_samples': ( + (elapsed_time_per_iteration + / args.consumed_train_samples), + ), + 'training/consumed_samples': args.consumed_train_samples, + 'training/consumed_tokens': args.consumed_train_tokens, + } for key in total_loss_dict: if key not in [advanced_iters_key, skipped_iters_key, nan_iters_key]: @@ -1099,6 +1099,8 @@ def training_log(loss_dict, total_loss_dict, learning_rate, iteration, if avg > 0.0: log_string += ' {}: {:.6E} |'.format(key, avg) total_loss_dict[key] = get_accelerator().FloatTensor([0.0]) + if wandb is not None and getattr(wandb, 'run', None) is not None: + wandb.log(wandb_metrics) if loss_scale is not None: log_string += ' loss scale: {:.1f} |'.format(loss_scale) if grad_norm is not None: From de60d86cf2928f43b894c3825d8ee90872cdd549 Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Mon, 26 Feb 2024 08:22:46 -0600 Subject: [PATCH 03/92] Remove `assert num_datasets < 255` in `megatron/data/blendable_dataset.py` --- megatron/data/blendable_dataset.py | 54 ++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 14 deletions(-) diff --git a/megatron/data/blendable_dataset.py b/megatron/data/blendable_dataset.py index 2516e58415..590375a0f2 100644 --- a/megatron/data/blendable_dataset.py +++ b/megatron/data/blendable_dataset.py @@ -13,16 +13,39 @@ from megatron import print_rank_0 from megatron.core import mpu -class BlendableDataset(torch.utils.data.Dataset): - - def __init__(self, datasets, weights, size, *, - data_cache_path=None): +class BlendableDataset(torch.utils.data.Dataset): + def __init__( + self, + datasets, + weights, + size, + *, + data_cache_path=None + ): self.datasets = datasets num_datasets = len(datasets) + ndsets = np.array([num_datasets], dtype=np.int64).item() + len_weights = np.array([len(weights)], dtype=np.int64).item() + print_rank_0(f'{len(datasets)=}') + print_rank_0(f'{len(weights)}') + print_rank_0(f'{ndsets=}') + print_rank_0(f'{len_weights=}') + # if int(num_datasets) != int(len(weights)): + # if f'{num_datasets}' == f'{len(weights)}': + if ndsets != len_weights: + warr = np.array(weights) + print_rank_0('\n'.join([ + f'{num_datasets=}', + f'{len(weights)=}', + f'{warr.shape=}', + f'{warr.sum()=}', + # f'{num_datasets=} != {len(warr)=}', + ])) assert num_datasets == len(weights) - + # else: + # raise IndexError(f'{num_datasets=} != {len(weights)=}') self.size = size # Normalize weights. @@ -34,8 +57,8 @@ def __init__(self, datasets, weights, size, *, # Build indicies. def _build_indices(): start_time = time.time() - assert num_datasets < 255 - dataset_index = np.zeros(self.size, dtype=np.uint8) + # assert num_datasets < 255 + dataset_index = np.zeros(self.size, dtype=np.int64) dataset_sample_index = np.zeros(self.size, dtype=np.int64) from megatron.data import helpers @@ -92,16 +115,21 @@ def _build_indices(): # Load on all ranks. print_rank_0(f'> loading blendable dataset index: {index_path}') - self.dataset_index = np.load(index_path, allow_pickle=True, mmap_mode='r') + self.dataset_index = np.load( + index_path, + allow_pickle=True, + mmap_mode='r' + ) assert self.dataset_index.size == self.size - print_rank_0(f'> loading blendable dataset sample index: {sample_index_path}') + print_rank_0( + f'> loading blendable dataset sample index: ' + f'{sample_index_path}' + ) self.dataset_sample_index = np.load(sample_index_path, allow_pickle=True, mmap_mode='r') assert self.dataset_sample_index.size == self.size else: self.dataset_index, self.dataset_sample_index = _build_indices() - - # Check size _ = self.__getitem__(self.size - 1) try: @@ -112,15 +140,13 @@ def _build_indices(): print_rank_0('> size of blendable dataset: ' '{} samples'.format(self.size)) - def __len__(self): return self.size - def __getitem__(self, idx): dataset_idx = self.dataset_index[idx] sample_idx = self.dataset_sample_index[idx] return { - "dataset_idx" : dataset_idx, + "dataset_idx": dataset_idx, **self.datasets[dataset_idx][sample_idx], } From c11589da93bc109aa98be19434ee9d984783d424 Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Mon, 26 Feb 2024 08:25:50 -0600 Subject: [PATCH 04/92] [format] `megatron/data/indexed_dataset.py` --- megatron/data/indexed_dataset.py | 98 ++++++++++++++++++++++++-------- 1 file changed, 73 insertions(+), 25 deletions(-) diff --git a/megatron/data/indexed_dataset.py b/megatron/data/indexed_dataset.py index 219ffe8031..efd3310af0 100644 --- a/megatron/data/indexed_dataset.py +++ b/megatron/data/indexed_dataset.py @@ -47,21 +47,32 @@ def infer_dataset_impl(path): return None else: print(f"Dataset does not exist: {path}") - print("Path should be a basename that both .idx and .bin can be appended to get full filenames.") + print( + "Path should be a basename that both .idx and .bin can be " + "appended to get full filenames." + ) return None def make_builder(out_file, impl, vocab_size=None): if impl == 'mmap': - return MMapIndexedDatasetBuilder(out_file, dtype=__best_fitting_dtype(vocab_size)) + return MMapIndexedDatasetBuilder( + out_file, + dtype=__best_fitting_dtype(vocab_size) + ) else: return IndexedDatasetBuilder(out_file) def make_dataset(path, impl, skip_warmup=False): if not IndexedDataset.exists(path): - print(f"Dataset does not exist: {path}") - print("Path should be a basename that both .idx and .bin can be appended to get full filenames.") + print( + f"Dataset does not exist: {path}" + ) + print( + "Path should be a basename that both .idx and .bin " + "can be appended to get full filenames." + ) return None if impl == 'infer': impl = infer_dataset_impl(path) @@ -167,13 +178,15 @@ def __del__(self): self.data_file.close() # @lru_cache(maxsize=8) - def __getitem__(self, idx): + def __getitem__(self, idx) -> np.ndarray: if not self.data_file: self.read_data(self.path) if isinstance(idx, int): i = idx self.check_index(i) - tensor_size = self.sizes[self.dim_offsets[i]:self.dim_offsets[i + 1]] + tensor_size = ( + self.sizes[self.dim_offsets[i]:self.dim_offsets[i + 1]] + ) a = np.empty(tensor_size, dtype=self.dtype) self.data_file.seek(self.data_offsets[i] * self.element_size) self.data_file.readinto(a) @@ -181,15 +194,16 @@ def __getitem__(self, idx): elif isinstance(idx, slice): start, stop, step = idx.indices(len(self)) if step != 1: - raise ValueError("Slices into indexed_dataset must be contiguous") + raise ValueError( + "Slices into indexed_dataset must be contiguous" + ) sizes = self.sizes[self.dim_offsets[start]:self.dim_offsets[stop]] size = sum(sizes) a = np.empty(size, dtype=self.dtype) self.data_file.seek(self.data_offsets[start] * self.element_size) self.data_file.readinto(a) offsets = list(accumulate(sizes)) - sents = np.split(a, offsets[:-1]) - return sents + return np.split(a, offsets[:-1]) def __len__(self): return self._len @@ -203,7 +217,8 @@ def size(self, index): @staticmethod def exists(path): return ( - os.path.exists(index_file_path(path)) and os.path.exists(data_file_path(path)) + os.path.exists(index_file_path(path)) + and os.path.exists(data_file_path(path)) ) @property @@ -251,7 +266,9 @@ def __getitem__(self, idx): if isinstance(idx, int): i = idx self.check_index(i) - tensor_size = self.sizes[self.dim_offsets[i]:self.dim_offsets[i + 1]] + tensor_size = ( + self.sizes[self.dim_offsets[i]:self.dim_offsets[i + 1]] + ) a = np.empty(tensor_size, dtype=self.dtype) ptx = self.cache_index[i] np.copyto(a, self.cache[ptx: ptx + a.size]) @@ -286,7 +303,9 @@ def __init__(self, out_file, dtype=np.int32): def add_item(self, tensor): bytes = self.out_file.write(np.array(tensor.numpy(), dtype=self.dtype)) - self.data_offsets.append(self.data_offsets[-1] + bytes / self.element_size) + self.data_offsets.append( + self.data_offsets[-1] + bytes / self.element_size + ) for s in tensor.size(): self.sizes.append(s) self.dim_offsets.append(self.dim_offsets[-1] + len(tensor.size())) @@ -325,7 +344,9 @@ def finalize(self, index_file): index.write(b'TNTIDX\x00\x00') index.write(struct.pack(' Date: Mon, 26 Feb 2024 08:26:33 -0600 Subject: [PATCH 05/92] Add debug logic (+ formatting fixes) in `megatron/data/gpt_dataset.py` --- megatron/data/gpt_dataset.py | 204 +++++++++++++++++++++++++---------- 1 file changed, 149 insertions(+), 55 deletions(-) diff --git a/megatron/data/gpt_dataset.py b/megatron/data/gpt_dataset.py index 1d9b7e1c1d..f9cf5ee6f7 100644 --- a/megatron/data/gpt_dataset.py +++ b/megatron/data/gpt_dataset.py @@ -32,16 +32,23 @@ def build_train_valid_test_datasets(data_prefix, data_impl, splits_string, # Single dataset. if len(data_prefix) == 1: - return _build_train_valid_test_datasets(data_prefix[0], - data_impl, splits_string, - train_valid_test_num_samples, - seq_length, seed, skip_warmup, - data_cache_path=data_cache_path) + return _build_train_valid_test_datasets( + data_prefix[0], + data_impl, + splits_string, + train_valid_test_num_samples, + seq_length, + seed, + skip_warmup, + data_cache_path=data_cache_path + ) # Blending dataset. # Parse the values. - output = get_datasets_weights_and_num_samples(data_prefix, - train_valid_test_num_samples) + output = get_datasets_weights_and_num_samples( + data_prefix, + train_valid_test_num_samples + ) prefixes, weights, datasets_train_valid_test_num_samples = output train_num_samples, valid_num_samples, test_num_samples = map( sum, @@ -69,55 +76,92 @@ def build_train_valid_test_datasets(data_prefix, data_impl, splits_string, # Blend. blending_train_dataset = None if train_datasets: - blending_train_dataset = BlendableDataset(train_datasets, weights, train_num_samples, - data_cache_path=data_cache_path) + blending_train_dataset = BlendableDataset( + train_datasets, + weights, + train_num_samples, + data_cache_path=data_cache_path + ) blending_valid_dataset = None if valid_datasets: - blending_valid_dataset = BlendableDataset(valid_datasets, weights, valid_num_samples, - data_cache_path=data_cache_path) + blending_valid_dataset = BlendableDataset( + valid_datasets, + weights, + valid_num_samples, + data_cache_path=data_cache_path + ) blending_test_dataset = None if test_datasets: - blending_test_dataset = BlendableDataset(test_datasets, weights, test_num_samples, - data_cache_path=data_cache_path) + blending_test_dataset = BlendableDataset( + test_datasets, + weights, + test_num_samples, + data_cache_path=data_cache_path + ) return (blending_train_dataset, blending_valid_dataset, blending_test_dataset) else: - print_rank_0("Separate data paths provided for train, valid & test. Split string will be ignored.") + print_rank_0( + "Separate data paths provided for train, valid & test. " + "Split string will be ignored." + ) train_dataset, valid_dataset, test_dataset = None, None, None # Single dataset. if train_data_prefix is not None: - train_dataset = build_dataset("train", train_data_prefix, data_impl, - splits_string, - train_valid_test_num_samples[0], - seq_length, seed, skip_warmup, - data_cache_path=data_cache_path) + train_dataset = build_dataset( + "train", + train_data_prefix, + data_impl, + splits_string, + train_valid_test_num_samples[0], + seq_length, seed, skip_warmup, + data_cache_path=data_cache_path + ) if valid_data_prefix is not None: - valid_dataset = build_dataset("valid", valid_data_prefix, data_impl, - splits_string, - train_valid_test_num_samples[1], - seq_length, seed, False, - data_cache_path=data_cache_path) - + valid_dataset = build_dataset( + "valid", + valid_data_prefix, + data_impl, + splits_string, + train_valid_test_num_samples[1], + seq_length, + seed, + False, + data_cache_path=data_cache_path + ) if test_data_prefix is not None: - test_dataset = build_dataset("test", test_data_prefix, data_impl, - splits_string, - train_valid_test_num_samples[2], - seq_length, seed, False, - data_cache_path=data_cache_path) + test_dataset = build_dataset( + "test", + test_data_prefix, + data_impl, + splits_string, + train_valid_test_num_samples[2], + seq_length, + seed, + False, + data_cache_path=data_cache_path + ) return (train_dataset, valid_dataset, test_dataset) -def _build_train_valid_test_datasets(data_prefix, data_impl, splits_string, - train_valid_test_num_samples, - seq_length, seed, skip_warmup, - return_doc_ids=False, *, - data_cache_path=None): +def _build_train_valid_test_datasets( + data_prefix, + data_impl, + splits_string, + train_valid_test_num_samples, + seq_length, + seed, + skip_warmup, + return_doc_ids=False, + *, + data_cache_path=None +): """Build train, valid, and test datasets.""" # Indexed dataset. @@ -195,10 +239,18 @@ def build_dataset(dataset_name, data_prefix, data_impl, return dataset -def _build_dataset(dataset_name, data_prefix, data_impl, splits_string, - num_samples, seq_length, seed, skip_warmup, - *, - data_cache_path=None): +def _build_dataset( + dataset_name, + data_prefix, + data_impl, + splits_string, + num_samples, + seq_length, + seed, + skip_warmup, + *, + data_cache_path=None +): """ Build dataset. This method is called when individual train, valid, test datasets are provided @@ -211,18 +263,34 @@ def _build_dataset(dataset_name, data_prefix, data_impl, splits_string, total_num_of_documents = indexed_dataset.sizes.shape[0] - print_rank_0(' {}:'.format(dataset_name)) - print_rank_0(' document indices in [0, {}) total of {} ' - 'documents'.format(total_num_of_documents, total_num_of_documents)) - - documents = np.arange(start=0, stop=total_num_of_documents, - step=1, dtype=np.int32) - - dataset = GPTDataset(dataset_name, data_prefix, documents, indexed_dataset, - splits_string, num_samples, seq_length, seed, - data_cache_path=data_cache_path) - - return dataset + print_rank_0(f' {dataset_name}:') + print_rank_0( + f' ' + f'document indices in [0, {total_num_of_documents}) ' + f'total of {total_num_of_documents} documents' + ) + # 'documents'.format( + # total_num_of_documents, + # total_num_of_documents + # )) + + documents = np.arange( + start=0, + stop=total_num_of_documents, + step=1, + # dtype=np.int32 + ) + return GPTDataset( + dataset_name, + data_prefix, + documents, + indexed_dataset, + splits_string, + num_samples, + seq_length, + seed, + data_cache_path=data_cache_path + ) def get_indexed_dataset_(data_prefix, data_impl, skip_warmup): @@ -273,7 +341,31 @@ def __getitem__(self, idx): args = get_args() orig_idx = idx # Get the shuffled index. - idx = self.shuffle_idx[idx] + try: + idx = self.shuffle_idx[idx] + except IndexError as exc: + if is_rank_0(): + import json + from rich import print_json + print(exc) + print( + '\n'.join( + ['-------------------------------------------------', + f'Trying to access {idx=} from self.shuffle_idx,', + f'but {len(self.shuffle_idx)=}', + '-------------------------------------------------'] + ) + ) + print_json( + json.dumps( + { + 'doc_idx': len(self.doc_idx), + 'sample_idx': len(self.sample_idx), + 'shuffle_idx': len(self.shuffle_idx), + }, + indent=4, + ) + ) # Start and end documents and offsets. doc_index_f = self.sample_idx[idx][0] doc_index_l = self.sample_idx[idx + 1][0] @@ -283,9 +375,11 @@ def __getitem__(self, idx): doc_ids = [] if doc_index_f == doc_index_l: doc_ids.append(self.doc_idx[doc_index_f]) - sample = self.indexed_dataset.get(self.doc_idx[doc_index_f], - offset=offset_f, - length=offset_l - offset_f + 1) + sample = self.indexed_dataset.get( + self.doc_idx[doc_index_f], + offset=offset_f, + length=offset_l - offset_f + 1 + ) else: # Otherwise, get the rest of the initial document. doc_ids.append(self.doc_idx[doc_index_f]) From 1906d31689b81e59a3000362f0bbb0dd8e7d0bb1 Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Mon, 26 Feb 2024 08:27:12 -0600 Subject: [PATCH 06/92] Update `.gitignore` --- .gitignore | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 64270f0752..74f73f1cde 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,27 @@ +*.bak +**index-cache** +**pbslogs** +ezpz +*.o17* +*.e17* +*hostfile* +.deepspeed_env +*.DS_Store +old/* +**venv** +*.json +*.o1 +*.e1 +outputs/ +venvs/ +wandb/ +llama-logs/ +checkpoints/ +*.gz +*.txt +*.idx +*.bin +*.log __pycache__ # Distribution / packaging @@ -20,4 +44,4 @@ slurm* logs # Data folder -bookcorpus_data/ \ No newline at end of file +bookcorpus_data/ From ee7ce6fbc82f5020c82df15033fa6dcea710cad4 Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Mon, 26 Feb 2024 13:08:35 -0600 Subject: [PATCH 07/92] Update `.gitignore` --- .gitignore | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/.gitignore b/.gitignore index 74f73f1cde..c972bfbd5b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,21 +1,10 @@ -*.bak -**index-cache** -**pbslogs** -ezpz -*.o17* -*.e17* -*hostfile* .deepspeed_env *.DS_Store -old/* **venv** *.json -*.o1 -*.e1 outputs/ venvs/ wandb/ -llama-logs/ checkpoints/ *.gz *.txt From 634e37b3def714aa5c3fc4a0bdee18f96ddff078 Mon Sep 17 00:00:00 2001 From: Xinyu Lian Date: Thu, 27 Jun 2024 07:43:05 +0900 Subject: [PATCH 08/92] Add steps and results for running ZeRO stage 3 withUniversal Checkpointing (#383) --- .../universal_checkpointing/README.md | 23 +++++++++++++++++- .../image/uc_stage3_char_training_loss.png | Bin 0 -> 54650 bytes .../image/uc_stage3_char_validation_loss.png | Bin 0 -> 41972 bytes 3 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 examples_deepspeed/universal_checkpointing/assets/image/uc_stage3_char_training_loss.png create mode 100644 examples_deepspeed/universal_checkpointing/assets/image/uc_stage3_char_validation_loss.png diff --git a/examples_deepspeed/universal_checkpointing/README.md b/examples_deepspeed/universal_checkpointing/README.md index 341b0d113f..14169c9e22 100644 --- a/examples_deepspeed/universal_checkpointing/README.md +++ b/examples_deepspeed/universal_checkpointing/README.md @@ -116,4 +116,25 @@ Repeat steps in ZeRO stage 1 training above with the following modifications to * Set ZERO_STAGE=2 * Add `--no-pipeline-parallel` flag to deepspeed options -## ZeRO stage 3 training (**Coming soon**) +## ZeRO stage 3 training +Repeat steps in ZeRO stage 1 training above with the following modifications to your job batch scripts: +* Set ZERO_STAGE=3 +* Add `--no-pipeline-parallel` flag to deepspeed options + +> **Note:** that the stage 3 universal checkpoint currently supports Data parallelism. + +Below is the visualization of the `png` files generated from ZeRO stage 3. + +
+ + + *Figure 1: Training LM loss curve for first 200 training steps of Step 1 (TP=1, PP=1, DP=4) and training steps 101 to 200 of Step 3 (TP=1, PP=1, DP=2), which was loaded using the Universal Checkpoint.* +
+ +
+ + + *Figure 2: Validation LM loss curve for first 200 training steps of Step 1 (TP=1, PP=1, DP=4) and training steps 101 to 200 of Step 3 (TP=1, PP=1, DP=2), which was loaded using the Universal Checkpoint.* +
+ + diff --git a/examples_deepspeed/universal_checkpointing/assets/image/uc_stage3_char_training_loss.png b/examples_deepspeed/universal_checkpointing/assets/image/uc_stage3_char_training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..4c6758e99111638b74eaf8c328d1640a41eb2bab GIT binary patch literal 54650 zcmeFZWmr{R)Hb>{jWkF|qlkhu0@5j9qO_=_0@B^xh?Iz+pn!;qw1{-aR-^?{Is_Du z?%15Mc%JwD-tYas6F<(Mb6w{Lm%-j^%{AAYW8C*W?lGfoTvw+!#(WGx5QqP@sZ3hci4^!v+$Td?}M_UJ1+XrTx?)RNt9yr*G30@Es zxqzwfahQyUq5e&nZ#XjL5)oMZ){#+q|z{tzOg_Ca1^H)6k@mJee>d zCHbJb8BSTL{=8;!DK6C~G4+kz;2XQ=@=F6>lYecFrn)@-=(n4;mUhaFDICKUO07)t zK)Bg$1W_h|e`r;mMYxat&*cCA3(153n@M;blp=cn*qf`*v<5ofeQJuJIwc}P6G6qX zIp;@5M_24PpRVX?*h<0$NZ`i&K-TRt``|(`@(`sEBCd1i$U=xH z8rzc2&A)nv_w(n^A0Hpkg;TJEzkdC?(*F9ht2}fP_QR2VeL8n)eNTATu$!8i{_f7i zZ+&zkB;l00^2r3mB!H8OP5Mcl9zR3Y%`7#mv06Ecj^|;oUvsyoN>V8t?z@C_$+#~V zR3B^(+1E)pjLI$ySJ0igp60l>@#M+zM@%BG+UGAlkaQSrp6kzb8n4Iv`t?hZCy|Sb z3v)_6wmFoPE<=ZJzA|c~bF{|CX|9jA{iR%0L8{h+(QWw+OPHkAz8vk96)eY8tjyxO z`I&fTRg8&T=8dyOZu8%@Fyk=J_@pGliaSQ?0_OzIoFV?2ETr!1TirzzNJvQNxloLC zoM}I?K0iREDZj(5tfG>ToE#BCOf|VPn~sfPkrZ~Bx!T}n-ukE^5QA@0^3!p;jo$W0 z5oT}mZ_}G=sluj>1nY|=I};{A@1w&Q`^AeF-3H37d*?SuNJ!rG&+gY9?kS|ncu; zm9c=siNR8v>;jvC0=C84J$sx}vf$n2Ux}J#P2m&<<&IohT3XwM2U}wS29>V7j}sFK z2l|pKw@)>g71vvo^nY=Ao1H!J(=z?yoja`G3+_gzq%h%Z63=M1!Yme9cD*!oNG^PL z*<_M5O!?-it&OKoo;>06OP_O2}gI!z8B1dCtSc0eome~O?bD`wZS}2E_`jaYi*WQ-j``>#DfDF z&rUW{HZyx=BKY+QuePv7J5!pxA2U3M+HB{Gl?GxC1X*2M6A8fiqW5K!ac{osF`{ez zuGOov4<1#-_J^?R?{3W2sE^R~7t+%`HeQp>r288K#^r~&y(@$PtcyG6dUN%KYVW#D zz?y(tos4CZ2`?Mc=$3+~GB8`oC%$rT#aJZx@O z^vR2NOVw+Xd3w3yyN!*?`(uX(9F?wr zf?*gFEsVy^J8QEw(uKkT0u(MY?MiX%a@`d!mcs#u2lj^~_LPk1#nHL)=7PMZt2AYuh|A=R(~4Qjl_iKy5g;STnu_IK7&B5^MqOJPj5C^ zK>Dj!CuMQlQ|KaCteh8ARa0v;5jKJQmV2%7@Myk##B=Sr2{x83Mbd%2&)O}=xDRqq zm|iX#A~|WJio7?hA(pe$9pZe#p2dt*dEf`CV~j4A8~=hd(_gFa#&7tMgfHnB1X5*K zrD#YABV(M*ij#Nt@z>EgUg@zE`**0!{CnQ5;KJ-TZy+5luLZ1o%dz_|E8VlSoC+dl zSG2X|zn*p>h)RANsi>&%+?h@$95`J6rYX!EgYHMJBcl z1Ds#2`(I{sQ0?tkVI@$WOdvs690iPPS^aj`8=9l&cGhrvO9~BJez*8`N1qFu;yF7z zqhXc*)`#Y)fz4-@FO^`(%fl{R6AM2}w&BAxG&Dhndv%9XUC-e4eR3F!ty-#Ofj!U) zIX&deOV7rs+2v1?qn`Ni$rLFU9#?6$z`20y(b3T>Kg{Dyx0ffFq@20k7l)%D5>k

_yC;+;TlWm15z5Ijk|5Pb|9B&!qd(qG@+|q6t%2saTvw@Dg1J2NBtcd*p_rS}|fgBPSy zbMym@#=Z#(7U!8yj3uXGxRd>@ci@H zzdF*S64C4QS(lENH<*|}YT7w`7#&S`a25U+l40ww*;z4I`t1`<5zl-UD@eI=GdG^ygB3{kk^;03vv8)MrVKhnF|%OMCl0rq&f^w?DmM zU%I*&)Aj&Nh-RtB36?Dl&kp{CcYJiM+4O*!IP}4;Dl6ly!4^sODt!EymWPWg$ZPe> znXi7=En#x50^)kcB5A*j1ss6o=H?cj|DHb;*O9J}{%5F6Nc+YO&T<^UkNcJGf7`aE z;3-VdHv;?XL!XP&68+Zme50D@BB>qiVqLOLu5*1+KkEYO#(fw4*k#=L%|)MWPvz?6 zo1x!NoQf_?32BnS>KmER`+%{|9*uCnX1rQ!`Nx(z0$&ne!HQ|Fx)f+eUm3(+=rfk zN{;RYYiI?Vr*{s0A#wifPJc&X!9Sa=-kC`~`aLX|Wh_J*v3sAXEtba{_AR>-m-Y(o zR*5=Puc%eb#y?k5so(hgIpLJ)dFEoDEqkl;%n%ITwoi-!$l>JXCV+K+QAr7NXbf+H zhKcDj)fHB0R}zNku*3O;HAoasE8pG`j4){TzX!uOMn(poiG&q630Gm?QV~o>K}WDT zd|AcS^*nHhCXU1}_fU`qu>+tBN5f8*T8y%l)!D<8bFr7MT*+PD5oe}Ao;>d=F>NA3 zQTAcihB)TO!et1#J!KBY^5#+=i^HN3k8Q}36DD2fzf(YjJaOX0eIqe|LmJsyuiv~` z#R?i#M*`?K?|XN1zZf1R0}#QD9G&1Ut#o-mlTyPgHIijx{xT;|o+QBP@X>$x@S%cD>tV>QeZrsQ&nE&=Rj#F&llih6sV&b**bK#~%HUom> zCqxOzn@Eh}Q1towbtp$AYzD^0rlt{pA*nk3I|1qbyANP0h#{CrL0X>uLT!Pq77+2-~mygMR6h3}W^u%J_ zp+7FgfWP(EmqfYBx-*-=Ga%rSKBNr^4o2C`y?gh@SEs)svU|1qIXc_09TQ%>_*=S3 z)74GwC`Su<0f?A6>^)&7@*=~0$eE!-E)K)xL9k_&N>fYr_k9sX3k#mWUU~;e*dFWnDUWD&R4d4HDk)G2&`mW=zoc!3bE$p*hf#l+Cd&J4D7 zxByr?%galXM#siRQBQOHx2tq$Xy~#r2PA3OqzK|rcks!?pT)8AC3jhMS+_C4(00TFw@iv9!=Q;Dt5-Dn`)5>|m(TMrc)eEqqZJ)oQ zQ{1-q^`#sa_JDmSR#{K#DQukj+Dr$!(jJ`dHV7`;0J(x3_9;BYm@0o zDJh8%tJ1;CcV~Ai@P2eLY)La5KdzXsXFk;$^P}7;Yskm35DO34^WpxLI6f);f;*%y zu17#}%LGz%|K&inmrKJ`Mh2&{FYzud);#lUO*KAR0wQQ;a$Htc3W5XyXD97 za;AochR^Nop$7bd8#_*aU2ZwMxDdvvC@a6OX^yMva&}XA3lK$cLyi?d4L}$j&wS;~ zvm01lE30=V>{jqrUS(xnp}6SdvJnJW!F6%CqR(m3CHdL2D2Oa^wM!68qP^B;OuI9b zP%2ME$r=ffh8RY&VdxX(a~%0DhEic5Uk|IRWyzU@0}r|8+AK=AL`7+rYPMbu6;|P> zSDG!;{leoEa7-u(dI`%uG(23HpTT@(vL(szdLzqxK~iZu!9Xks3;qYHlw0^u_qf~u z;xtgb`tid6muw6%0OH4$kvbR)a0<=RA!CM$JCuoE2*OP-A3Qc;B2vrhv8(^Xd_l%o z`(;m-`V^41#tN6&_$N<70TX8F7a9e3MRZuGjMe&~c%-66J(@v)Aa10lF8_8}D-bSV zr0ff0bsVrc8e#iKKNGVw1Eh`oxPJ*1Ok~%)RWz<&SJTiS0lfCoZ}aD6A$?j_)~Cpw3R@4$*g0;l$VftD1!WC=H4Uw17$SkX#aT6x&q($WAcRZi~ivU&?V zi)`iS#$F99?NM9yuAy{o2f(gPFv(X03U!U9y>wqTjfmb_^f%I9_oG5YL_|_#y+l#q z4gu~Fn+!v`-{uLzK=DU2uqTw1lspeM21I-|7o5wSm;v}g29)3Vm8fN0V!*%ecpVZJ zSJLHTNMj9B!=hB3+_aZ+>Kw})#*C~NiMCYUimIsL6Ok+0+ZUL~KW{N>sC1jBhU^JU z-N#ob@F1x7ttLl9KpJH0R}N#fs7&X7c;Hbs;B)TW&0E7l&amTaHBK{1@_U5LQi)q*BrM&M2ZEz zTQ+uOQQjD(900x|5B9dEfW|w@5Sl_D+@B{bTGJC?AUk}9iUg2|Al#qvjOFm(#+*KV z+GsD#5Pe6EC#oT=>p2dOYz4~{j4drb#jOxI1YEDlhh+w<0Dba+c8ijV16I9$py{X} z*bsYB`UPdGgq<=hZ4O^}T@H}c2|E)K5+b9bg1+2aMMKvUOkPAAv$ykiKw?)@gfZvz z3z{KdbVG25NxB1z7*)=`3e`Q5SuDrDdGlt9v>RXT_9Pjq62P-yAxRWDjLC;aM3CWn zInG;A>*W~)B^!B@f(TIVxpMN8?N2T10wPJSU0_No!2a;?@p~YVJI}hJ_XFhGTZdt@&HlxuX7&muf^%(3uwX| zneK0|Z12t&0=uV>ca-y7=0tP%$`m%sZs?PdUnj_~6Nk8^!)16KuVA{e-_WCED*(51 z#-cqXsMs9)3J_fP{02H&^!+K|wzC>}34wj7pmZ99iqD@l?$mhGkNK`qKN z5L9R&@JZMWjss@6;JIv)l>Fnt_q-D*&X?U=C_(pn9bF_Sbb#oKKyl)uppl1xU+~=) zM&ZUI4h43*!H-H#P9hLtLr~TS%Ml4gGfUn6D%q&_yg-_?+p)V{4;ki8#A>#Z?<4%=3#hCw|0E+eij5tFAQMS^!R__J}E{)Mnt-yLXYsL^A-#5Nn=! z1Nu>1n)gtVX;XKO_L=InP8n3nFf6qW1Jaww(X+DdHVl|9y~jlYLcZ)uGu@9*b}T4d zKn0@A%*^fSWaIdkFJnRYTFJUV?>Cm)Z;gtwr_>%Pf>?k8VpLiN$%F2m!KIE2q&rUF zPB}H3v}--AfCv!IsA;1JAn2lLg}^4OW%J3&%W>4S;JFbxfj zddTgcg&vOpps8MMJ7<*tYYIibaB;)@?{?|{BeB9)tRS#Z? zWf#No-Y53~vZ3l@?f&W&fOY5XhnckBTx|duttEYJ^{bEyU#X0AB8;aV1==4C}N0I|u~{=qrMsX!tnbAR5p+w--vf2yFEA*8K`~_(Xt9 zvR-gR0NLO*30uKNHS+l@UY09Ny+kYGCjR}R?{mX^HPED1fC-O(Ig_3@KSSM{aR8_m z6-t+zs1!0_6T6M18M-YFOQFfy6O{3qrwz7|Xx3>5$)SsxPT2(^B_~qJF&`d&$1dmH z=k7wnMWJ&`A-dVjDSH+pd9R=_3-l~Bld-a1t+JtJr@+twA)5rc-wL%9G`|4CMw!I# zfkKXhYz>keaS)wbVITK^J`URz)yDv@H-TU{T+pL2yf=1;BMxZ@1g$mYDi12OK_RIg z#y6XEzdSM^1BvH^!Wrx!RJ+#8y?E~)H^_NyLX=lYxJtP|@h7Hatp~*j5AZhtO;mjp zvuJ-e|EB*ONbilw#{N-&nCh+0zXIIpxzVpb@2XEf>0?F7iPkqY@Tat05R~Z{5TjUXB^_f%vR<<716W4d`PwH zA6hheLu_61sjnO?HYe`w?Ip;bc?n7Gs3>)?>xRm;D6CikU3vn_QCrGiNzsAKQKdh} zzW^c5->DA8)Vh9352TMMX9E@yaAd z%d%XxU2jDL@7pmL{d5`r2|6wkoW!8Ww-Uh!oyd6xb?z1S{3uZ>%V;<8>LE~#D^H>WAtqP#Ec(c{@1JgRDciA zk`e~KW^Oj4X>1Cq_SQ_|tIyq9yV!@_?Hb!7zQJ)2RJTR7d?>%`b2xW7gcQ-`OBy4nShi`8OaM*kZuqEn#+s0{q7ruNO!aX3H zW3eLSA)pjw0^aJcENht@p?cGAy|ydoNAbT55;s~`*=y1`2AO*tH1%@-1L*)rGAd{J z_!491r; zIUykk#r-*NUp7MMM-a#)^-!-x*BcseAe$R}xJODv!K`Y7&}vNS;n$MiIxG&{820_95GN-SpRQ_%@m#j-$yTY$ zCzC8n2XEZS*l4SHttA34_=dr|jiDV%^zBvz?Cex@&x?~#Qbybu*4sD!U` zza|bhs!)HpXYdmzLXMyKg(T(;H3ri_LJ{NYW2CA|O&`?fhEKw^k=S7|9a1-LqI*{o zJO5g%*qCVfzkE3f_XTQWfoj^3@$rt#l!Ev*e4+7t#Rho@tAZ^lSF^sOLh}9t`wwL| z5r|GF-3?mP!~ICPa6$fpx{vS_QL95dQTPl(tIYh-|DaOWvvYr^(6sNJOs?zUt&*3D zCMKLu`FGxF4M1jn0l1grrO#pupPa*eHUXB+ce-+IB)BDdY@CCbm^jO8?WyN-LqvDoVFq7m(Ao-iX=-mt(qy=NrF64q zq7Ze0oVNnrls&%k=tSIj?H+!J+PNJxJ@V9Eo4= z@7GODP8ORqoJ7d&M=oO_;e^B6sFxhGRlaxc3AQUWIXOfD_XAuCn^r>-q{%|Y@7;|T zs~qscDXQ>c%c#e(H|%cMBs_Z-Bspr(p8l=a+_F7whul`;9K^v#y1DW#P`E2{|Lbvs zFNs}Y0P6j~!!dxt*vZY;+O$Nzz3o!Hd|A?f-va7TriVB=sBNM00D*{7UZ?mlfRJ*X|O_B|f>sq;tu@-3m#L9fM(!5C+QUylal^t8*~9 z;mm5*h(BK`6}nvtc<`w-1Kl?!0FQhnyWLjj6*HF?lAXRj&41rl5E}YPx3ZuTplb{u z4h#advM4B%aEfmXNRuJvX3ShXZ)gsoNE1RG-C{o`iQG7Qe18CH`ZRQOAcb&XJ6{Yz zN#=N5dYRk&P|^Lbf`~0cFAIiquflBTd-fGcs4AEaeKKBIC=o)}1VT+q+nD|>M_cqs z8^Uk6&P>U+O9oJPZM$N3ldaxzXw-MjElnZd)8Jo+-q$(fZlK~FmvF$5F4a~s%?}i| zs8C&_OG{5lY2M1cl3c(fX@A~--^n})zn)6rPB1AQ59x>M*K<~s8C24q(quA}W6cig z-{vcML(hBKe9voMo;ZLi=Yy;1IFfMkKOr0e`|Jq7ysyG;a|GlZqa0hqVzh-d5*)x~ zgXb+fyzzvpm#^mR-K}swp(!s#p%Ss#@`QJ5YI#B>cpeh_b>HO%nf)m!ud`{t8IA*# zGMS*xWBQ}W-*bN@=RyE3ZCpA=EhLQ080t>=U=eTx^oGYG;XBoHqwHL|7J-5 zZoeXU_IC9)1*>d1ho+o3F=Pqm0^_>yrMj^j)3uiON0jf}xsy~-St^K)7 z5t6vtLtR9r)%NqtccR47*xSk)(`bgMSY&vU^tIW@7_ZT+cs%ON%f{}^GP2mN^h4%D zhur>$u%Ap9e%u67ewKr~(p&=QRVzQ#9H@})yjw14{KMh)IDt0m&bV2ZGJk#Q7ps?V zh;KqBKGXgzNX+tjmdortAf@yGSX;q>6C|_9M1lPNSwh8gt+H#X)-YcJXqZ(gv+d9) zLSX+5kZ=F2Y;>M~PV2kU;tfO#-(<_j7-kk&-da;7ZB+9-A}`?2&b))dnp1K;n7PWs~^ zAw*I}d+**^@f#G8}g?s~*b$Jq^UUkUT5_1-uJ zoXjWh&d&X?*~e{}NH;Mr{SyrLM#>U5PqkW(&Pm}L2Jyg@PPC_PvF>hhL_%$s#ME@5 zb?w_*1@f#OoyCLW#|ZCq3n#g`RhLudT`nHv#cpr&Y`!<7VHSTlF~(>N zg(6jO@X(4rV=XQ&CL!m)+c5~Sc&6Qp(yW-Nr$>Z@D(+p#Q$5P9h%2h0MQyuaqEK>l z6joN&0cVF(4xgze|J_{-_(C4G#!TJN(!xW)NF^sy3di$LTsoI&( z33W7pR=|%!7F+fpv0fje-w)ongd{gI84r6#*0B%MmbO1v#l9CFVmeaG-hFpDuY;~$ zEUO|j9?wP`+(_9-GMZ?BC#wg^(cvSZ^!Phy)JZQcB$ANtIZ_uur54MIAK3Af^}>x( z!bkY{_#$?Lu0YvO0s|!mhn3pZtK{t#VxpqGI~z1{Y!OxE0G$N1W9Ys1{?kGGC|wL03=eoV4>VCf|dNWOPA*GWW=+ zW|xzb6IrS*AxS)=5IQ_;>kG_0{wuw1<>5C?g{Zu|+w2|0YvJ%`itq%x?EbR3=dT3p zS2>TZ#W4&rP-q;|Z#~6v=5*<7G+X(L%e99%{pMB(XqBR%!-reG^g6aSQ`1Jgvj`#( ztMob}@~O)(|EWWOv$MXxAyVek23D#3<%)0LzB%r$2NW3!KtZe#3eHeg;b0Oram@a1 z@at-dBNYHx0T1?fRT~#TYlh-ePxh7RWQmiM=<5;0w>p2TLHJQuyD?Yj1vJ(|GG7F{omviEN1e>d!47bVBf?Vs!EWGbwc z?)qy!n~$L&Lbase5S7b!jp@oa#mKc6y&!Ae{}k}2w~Ly+lSmVYn~-qMXCPll5yrs4 z-~tpOSLzOHVgO(3{n^s7tJ_l5Xx3ThJvsfs(C+NnZav>u#Q!jw_@w;SsVnaIh`g6Q zVobk4RW8rKVC`pZA1k@z!%=~|Z^_7c<_ujzLUH_;Uv4A(g=?id=KUfrGgE-h3_e{g z-+ZC4(eq;P_ZUcI;d=RlCX2&f^yzo--w$D(rqrfDh~4^0_%wG=q2y=i{{HRzINyhn zB=36}VsOm?C&?5}CP#7M-8OI#zIDI{7`!h}&IhOH?#=-45Fb|BnEy}^D2zdX<8$ZV z?685_1ysVcAs8&F+V8o~og*U(!GI!EIR7SIS}`GvYLcgi<*MFuV&Fti0s|EnhMztB z`0+fTT@Q?%h@1R9_Y^DbMB{I%GPRbZH3}%W{UL2O#%V}#6;oT z8BKQLdV3P;|4P?bZ|BTj(bNQA?mQF}9zZrE?07DYt#s2JqT2mGqig)bWEaHU$QXqD zLjqVq(4XnJJqdPJ6 zKrZ2}W;$3!?TZZB(Ex`#@F_BBkX@gbL_qxr(%~cL9SsOL5gZ(RQ%5HpP_aZ`2$ z{jpm7ZQCJKl>kGcUe&?1=jZ9G7eSnYYLUXkhzFzj!EUMN@5~3mYjkvp+x}nyqlYSB zYn+08o(|%<_t#_mrB5rF=p*=IOQPoWriPF`5v%U*Ik)$iEU7^GhoGrUvVgNQ3h2_* z!q0Ch@&`2JCwhsYW+jJ=f747ho$DiIlCM@+tgz%Z^0AX&U0o#@=z6|EVB|UROlzyk zd0O@ zrd^#?pf#+aAanh27?X@xdnrGc~vCY@Y5{kAj_~bQF}CM%SS6c;K6Y6 z83k)j9L*^;RIEqqMMcmYaBRtMd)X8M&ePo`@WO4lOI8QNe)Wy`Y1_);P#rwvzyW^D z+YdpL#vlMPh3~hWGWhW8lR%=O^H_oWnH^`iCjh;eQWTh}lZ8FopBFb)wanx!giCFr)qM z&t*h~e4Fq5dB%0@V2Pxc$AV!aOfR2RMRa0fVss&Rpj%YcOR z#%HxvUk9hq&ktQa?{#eu+ssy2KWIpIT}*ofX~Jx*cEoc!NzxI^mW4q8G&j7ywB64=sHa62VwaCnoCj<)HCPILt065u-O(7!+m2>kl2Fql8UYy+01)`#?0pb89Dc5jlwrN3RCZjMX${rzh* z3|cHofKg|F(H6$5KIIRZ4t2(3+{g>~KBw!-81fkXCH6l^k;=`xd7&f;s#oeDS2 z*xR`FL0 zXNtpPX_O8Grw`+4K$J-a$38v~Y`HAD=;%X@b|@8G_9qIkiW^Z%ZMW~cwD*&1!k zzx@o#eB$!-1Ow{w^oZU%<1Ir&RCyhkKcM zI($%eK6?QQ{{M}j(ojdKLRd9rikY@lr2l`yQ2E4qSK7`wpnM#hk@b~s#-Pg)IVmjsu21TP z^l1>nW;q$6QF6Du94T$}VA?UDTve4MH&0>rHyv0XPjRb;fsH8nJ#9fA7+49a-0iTB zPjjqbjirJ+@&nrktAz944Vp)VXQ7#tU}*B?%T5{Xx4yX)!2OsfSag)lxS~x%l{w zF+>wG$EZ;gFfcH9`^GT{7_Np9Lx~ODie@{jaiH2dM5>)tgU(l@U; zSEM=A>F^1bJgyQ1VP@Ix+!aSh$FSM$=Ku`UTwTeTCGGhyjJ~-RJn(&PZz5s|2!C>l z!eQxUU7ExX{kcdH`2CO!Ez*J3Np=@wk2B!FMJ*x__9(-YgTO<;V(hy@2!iy(q;t8F zCk*&DI`7ch=L<)vJ{nh4cdxfL=hW}(IGHy(Gprskz2ukoE&v(>;(4u0E*W84e5P9x z)Uq&IIXcv=i-V;#;kWf_n^iwex`A5FB=uO}Rmo4=3yvAT)#(cH#i;x8h6$#Mf`U83&&FD z$Cm1rK%lRS1G2oDZligL`()uk?55-fACeafT|k+4Ww6G&V1h-tHiO`uNa%jZgdg3? zLIZwfPtQN+{oUT_U`R-BD)Zynl4p>_pOQUapWAhFK!@*eE=~qFiXAsO-E3BDj0>cX z{y8Q~8HTbzP7n+te^#I zeLdhX0mN?d1wK;Kj9o$GbjGV8-8Wy+!4z z)?$O&>ILq0a2o-)p8q{qDg)Xcbk4DrVhU_jrAvOQ0#zlUpjiKMQ{MPSAzpoV_YFkx z(k0SYS#0;$tV<1--85rZrD(yF+6<9DmlYYbCGWGon^fR-Iw3Msix3LJP1dDKyvfRz zA30^p2+V}?CKt)>?EQ{|THl?+^UU}#8F_lGZ^M69H&~c|)@-_2K`Nl$-vBbTtYA>?oiHIvj zwm&1{zfJ>x5dn#c9!d_05enRfWc9nG9@G4ASv-vfsFgwt0y|ExQ+ZE7oBsI86 z%YoE@hZoFOZG3U;+@g_~K0|bpLtVeM%aA(qxuZj<ed9pF@Qzs;c)Yy1vvn zr2AL0A~Nml3=FZMp{n~!0S@Ls6it{CQoB5N4)TB3Fh>1Y_yFEPaH=f-Fjprq4gkdm zgsgSZ!FFiS%q%w`uBml(H_Se^Jam&;Gm6Q>CVv|#IOaL8hpml;{x2r*bVBZJ&?m!( z*gn79DYG=by)u==hsS&NtRg_Cl|h?AQ?N+uTEFeLR#6K2G)X7Nne>o`ojtTsS%9B` z<)yI`5_VdxdiRFp$4B`G0-4sqHXH{Dbz2GZ#_sMI=v<>Yaf0kwyTZf5w4K5I;yKXl zoxttFf19WM*ztS;@RbDK`X%EgOwu2ve4L@21fsc;_uO2K>AShM!($^1tDsB?5t>h=y z#iLW^v*~)N5dY!@pt?cud5o8hxkHgVdd<2+-Hzx2C&&-K>FF< z_Czg@nzA={?5wQG?jxjJN&S%G40%GRwYVVZrk;NgeB>=3g~cJ_@B5S2#$)T5yr1+N zO%H`zefnsv92ID~i!2p)FMt3G5{$+*@VZ6t{*6{(;l~zwH^seW_14-1%?}PU$xD&Sfu1cmW7e1nLX~(?gGMV&gHY0_tli5?W z*WNaLtZ=E}cCxJmO`hIi=gIB=G&y`YFlQoyLN4<)F8J_{U*)9-AP@;3i*m zu5h0W4JUe4ZZKQ+lm5Sck2dOwXaM`sW72PuXY>U47t~*<2YUMD2a^zhz z>(lpL5;v+Iu*5Nbh?*k$#g8TUF`=^zrzX&$GjYs^zNqIa=E!qY9RcM%o`fR$B)!zb zj=!AfRE|IiBI45DL<8QVBNiehZz8{&n$6(Jz>m~Ot>^wPGrb4NO&|5jF)FX;397h)m zgW=4uB8GN4q(>iELZ4TPIFNyl0R4SrGJ3h@>ljV}LKXA^4(J6)!vo=^|L66hR8K}; z{CJ%F=z`I3!7~Yt7*6^ibac%!Fnp(BSw)tAW@Pd-ytfOye~D151+`EX(v9u$uZKI4 ztVn=z!upr^Xc!LYFD^C59t0j4q^g6^t7e`=uNdkHE^YbPr$I;KE879}0sOO2GlCMU zpCq*CL_ndhTy6=Blh{a_Qy@F554#QA{ZZpnrvATiafPd8tynNt$D*DO81B&j?B&XW zGY9Oueu62~$ZtdU5#&2)DLZQEhBFbMh5V_H1MDy?yAL@i@L0HI9NKamaInGn5;8CI z@2D+}e~!zDOujTJIQCnK|urK%Z3;)KXAOZNmy` zAqFf1l>WX^6_}ktNLX#)K-T{bF+wZPk17v`4Q8VdE4Cx&i)X4PVIX~lHz2qSFuVqX z4Wlglr>&Zyp<%^^qej3T#)(@FlhVeJs?{(h?aHlkc2rD?wykTF^Eilc>>rBL^=-Z{D=@gG#N$ z-r<$YW%|5tE%*_Yc@i@L zK2IC@pp7mf;iisIF@=dOGyMC(pn9+2qoLitT+2y!%8V~&YrAWGr)TAN&;9pHqwyli zn;r*V?BfE4AM2ryg#fFqt=$7}=JWKbXd#=h2?B^N4z!mwENU5s8ZQ`-i{3oHV`nya zEEe4nMl;$c6l;vSguXmZNWjFXH4*#Bd9N#~suDnnBnHh)(5MfJ{27o#P=Ec=aRYv$ zqN3;-1F{=^+9~2T42TJ+7a)zDM!y3hIVGG9a^)$1Lxxg_uRDbz62JPjzS~&g-&cMN zSzB9M0fQ-7tW#VaZr291WjYYCg23*LI>m<3D7pw?FkJ!Ha$DLkm(ny~4@2Mf_> zJ`*{Ls-sF!jmRSZCuXA`oi!i|e|S{Im}En5Q5^a2 zzwV;{!iox`wU7UZcmFXs^o#2M?vtbNR!4na0RJeEqLCPTsb!PT8{PY8{U*))1k=A?J(`%PP`~JMsX&QvrQ%kDvQFj3v^3k4f2!zxp zPDFv(oe`|mp{gf&w<>wH-;6`^L3ZrhH*df}_g%by*qVk6PmxHC>^6n%`CIevFQG%t znVq|fc&`4sgs%6acZU;eGhRfr#!AFgdBN17DlQDb-tNSe(y2G_p7`LO$2^B{m{kA$kOa=XQ{8>yUx;GKq@K9uBCYg1r^l=;7FgXn%;Jxx#|JD#Q`=I8;#tT?|}` zj4C{drchHr?+elepIc#lz2ecCIUb97&OZOUfIqlBwaFX!`CRw1Ot6(J&v6Fpn$_yk zoILqmy8j-V|3}zg!cznf7##Lt4(I-S{`^@H4#eSSAd}x;RzW*1uMgfU_S_12P&!Dk z^@}g4R9Y;c-QA#zrYbXMs%d;C=j^F9Abq3HeVm@23;YVuDup&!KzGes!@nx` zA!0E-ibdkxk0OhPZhsXb%*#s(tz7SytI-A)=mnEF1R-PwEOghdN#wU`aBp&%yv^t` zD{U|<{dq3*2=2+M_o&E?r{G)BuuAp_AI16ifq3=aij7nlGJJVvq9X>Pdjm0`DnEyZ zc}I=za4uaHI46oz(3~XY-~CER7`TBb5G;uPx?+PtgY#>?vq&Cs8y{ZgKk+N)7_?zx z7nNEI#d?S=V{uf1 zXS{amD5&fFL5iUG9m6a>$#Dp)5JaSNsN)n$(P$^wY*+f0^(hwVsAe~k6t+3>=lo~+ zMG%BvkL9Y)arBv}sdS_fog5uFng$<1Z7DMDum!kL*C0 z+x3)?cVa0t?62PSe?oDikt1wesMUkaCIR3p6dD|v%0}E-kb3JO?b+Q^nKdM! zZS26fenG4$ERg!?b+d)tSsotp3W|ixwn3Y-Xxg0%yG$I2a3+2kCh~$_IFM=#N?9(3 z-boI+NmzVBG2{zOS_gvj=ciB?`t?fp&e_G^Kl3)3FT=yfduPJS1Iu9(NaTdr&84w8 zXdyNSm;`|^e}GxtAUh)PCpl*m(qc##kj8?iHxzf`>Ur*??9j`{hQNESLVHO#;D9eQ ze!{sz#0Ywz8`|BzWvFpk0H2Qaw?CHZg&#?|Di$|(?^C~R6Z*!qgmk45*#Mj@+NGxe z=QW8nn~6Y$$r9X3TWA|8x`YZPK4iMQz<(Eii8J}B|NJ(2Ml8IB2X~LMX!%W8HGtEB ze(XeH3uH+}unl{2st-{^E;eq~1q)u;p zF(wd+3c#X^eE#wUoEMe^WJ)%rhLJ5vwF0~meSbE7i3S|I^#{3OS)`aN8Djba`!70s zaakV&$J-r88zSK_Jlw&SF}RPQHw)S$0Bl0lR{Lo5?08Ry@D;MPiG=xdqN>gCKN5-c zZSDjh|Bj%r7c&lrj=%v6c(5E!0NL@J&o85ej~7)$%3lI#{Yw{w~mUcef!1tFar$3kkTE3v>@Fb z0xBXUA&p2$2ujJ&T}lcF3P^XWbV{Q%h=hVli2mRh!=AnO zec#vhsS6Y&je)HQ7vAe z*8;;UPSnn5r z1<=?;ucD$NY{N5IVIB`oM7jqR;@}WjT7`;(z9wjPA$2uhO{U|*SpNGv!`nbBj}L;o zG>ZNq@&&zr+4zAET(HkD0j@@6U0rgla~4tnX=3eyXb`Rg2R_}{3JtISTXSDrC1#lV zj*YY&B_X2%gOGatIg{y$cM8`=L`|G3XVP?~BeADTo@L2z(#mct?uLGGaNtr!=NrfJ zjDV&NL3j(U@V~3bWN)*7*+USJh>DA+6Gep+GX{OI;aTZ_yH<`2F~Jvxy=L4+?GIYXf-Zp1W4FI-ffejvc&p*!lJ7o9kS7oV_p$Va%2e$KNB>RNn{r1k_DH zY2S+lC1U8si9xFiL0H2tu;B`WL_h$C;yi44j!n)0zF|$*4d6?LFw4u!*D6+8-slz~ zdYgK`nVFCkjZ2$+xqjKI{X%j$T8{S{xkkQ~NF+f|Q24vO-eCAVKE@zy*ZAc9tN*E< z!}i*BV8MY+rw2Gl;yAQY7J&_l+K(TUDBCS30zUgczt!Qr1?4|EU|wv2l$r_)DVZbK z5|b9_8^j{I*+Rncz4u@8=wlYEhL>q3iS)TY%bDAx?Yzi=Xls3hDPX9Ih7tOINe*CW z`p;U~u(+1k0u*^WpbBFV7pG27Plqs#jS?Uz#JA3E3wvjPJ-X`|hgRj2T`S;X$F57j zfvak3Yo|{EuN@8q;^7dRdL?vHgtb-AbBJ4_N;H<1eCEXI@VGeX5eWOB#XwM*?*!z%lkW1 zb~WDbxjZ~UQcMmwU4e(*sKJF1Jfa{u|369IPl4AR(JYgRY~f=5X6UL+}tA6-%_o-xS873S^EF6ehzQ~uPxSS3?v?5ApL_4e15^rtPWr! zxkGKXo96MtSipn1#$%uja>CPjXp_49>K~osa`6H#1^^wvPP+V<5L&t_Q>xIcOAs0{+29Rr+!vpkzs> z7@uIR(m*64Ez+u^6~5{U<&l%|8HAPLq|~hai&2!zpddh2n0?x0p+xFwyb?9)pWibC z#5|iyB&cj(>uGZ3{fooQmpoQwA(~iuTpw)gsAjJ-vrc)l^_{GH^ly6#GL7}Wph})L zsKPI08LR6*xjA4&Y1~r`L8y~q7PU+NoX2Di)AUQnLZSZUIg#}ve?bjG6m*sp6!sTB z*2M!qP>AD?swn?On%w}lll}T1Y^NWnPP6|#AZ>RGQH=m+NPb(Sak&E=a>e5;%Z&3u zW1O=Df6I6Q@U=Xp7?U(PfRZ$LbaeEE%cWhMe``ZNKrnzPv>w3qMuK>W+<=R7ELjsq z%!mT{sGm03^}8qojhxY-{>>w&+q{DP#DQE0yYYqt)C`-#3Gmiskg$OPrUhW!!*6T0K(aL+V)Qw6SA^%M!)6z}e6mactS8oA`x3G7 zCO{CZ|9q2!>6s>=b5$yZ&@p!OILV%$*|0_QQ4~*pD7FH?o@XbU4bx2?S7vJ;1cN4x z3Vhsx3v&@$q6Q)~lNYgj;f+CevdF5#qm!fVVQbJRJ@2cFSi3hkU9klZ3WWm3dunjN z4rDm91L41Bj<_+s3Tt1YhosT*I&I94PYf$~wdKH?4t}>>8A$R41s68b7Wij~z)pyb zLIzIt7;sFph>6v|nbjyRWYx#H|MdY360Z^Uoc{~29ePthPY-79W4|jl3Km3C?s&lF zK-~uJ@C3_41z66c1s+72N0L_#mim7?BYh zXv9~%LWONBTCh(OXo(7ccqP!Wq>2G1F_AgD#-WX`lf!27CL+m zH)=fKq6u^JEg!aT|KW%$XyOFAxaEy~rPIfvtzsD_wx%#-fri#!{81Og8lBOk!rmvE z;WUp$4a)G3OV!MI5UF{+%+X_0a?jub`qj5j(fJN&0hq=0GkbAtOJOKOha=~)8q+I2 z_~Ap5yLH54H&vG5>R)w8qGzd6E|t_#T;FuN)bW?Tev_dXe-4c{K?o?SZa?=kxLOUf zT`mZV#<7Ckt5V?a?&V;x-l)U%jXZCjD%Of$y41HsA-v0ODSucrZJI@s^*M`DJ_4dh zqH(Be*6-$rUCK#Ncv$;7;-6ja+b=H24;1qbZyA$+0srYDHtFcwKq?spMF>diu>x(Q zJy;}7Q$!#jgO-*yKQ0?2ouEQ5%IK#qbJ7fC(?V57A=~GTAvIHkiOCfnzU5;r206N{ ziYmwx>Mnk7HHUt}(!(B&Y4;Qid z%Rux502^y;Y#}zlB1zM>X(}G%`^L-tDO^l5z2~DbZtiOGS~L!cfNUuBNFga(Xm4Vl z1KwwMb%kvP0BhDcW9hHlg~$IY?R^^2gFq+=4)Ab*q3tOT0k=Ak_J#xF#{-bLS2n+% z`}%c4W~M12|2}_Jq7(Fl*L?k89ZvvC(jt9jC5)kKrsb@u8_c)%!%cYu6X7GWByaG^+UXX+dgFs9s7CF28>{mDsR^G>_W_Di>90ErOU>5Z@qS8AfvPNx!B!#3Vprszu^xs7S5P6ULJsI{XCcQWeqju z3GB8~PHPSlt1vQmF&X~C)rFsN-W|f~ELiJ!DSmFbOm3D2j}9>Ux}1ujiiJ$x>t3p` zv>hKm`OXVa@&5y|-Vq0{4XA}E83q7wY-!25yt2aUSpo93v2YjAw*0ocAFNA!wk7 z>jd2paimI+I+J%^`P(}sz>>~N4)e*SbSR4?Nr9H6FwoIw1s-FxahWxoUx3hXuGVi= z7oDybq=0*pI7!d{%}foH1S0mJQU&cB4-i9Pvx|Xgn+$*(Aat)so!LKR$F-NK;cuKp za=?7v^-EYm9mo&Fu56IW2YRnikgTj%g3~CMc)i=6f+?aDhf!>^B}QHSiXxlc^R{^@ zj2~qGg{aG-TIqfzi_VYpT;12!@XEKuQ1FIwtLaIson!hGwzwcN1Dl5`2xY#R836*U zQhXNx#es-_{`|QXqhrJt{Wx9rsUKbVuuZ3v?M_$LCQEWe)-}wAx)L}VG`K@2Eff_a zVS)V?4TrpAlWw50H;0ch<+#5~F)G#0kGg}ce!@bYN{b1ak-y#@pC(R-P?hkJ!#}wg zLO?W|T18x$_5Lf>7-|iad5y!2tXM=EVsB!g^MB5 zNe!__Z}$-U-e0ZJn%<#R2xZ4a)#78;1KvffPGq$Y|`GQKknUx4-&xu zYogJ$Ar@$w4K9b24U9;DQ`(tds#LiTB7*!VD?f6>Y07nxiL>%z=L z1aFB{aIw$oqYM^|>ykx5^o5HGV%BfQGAysnwGl12gCc~b=&pncTaLOe8fvqt-+ku- zVR|gW_HrIxXB>nBtQ;pU->H}!a`Sq!x&I)=@ zo{Fic=MAI{Q*f~dzZ zH+9=DbT>DX_B|MXm2E<siD4aGtP^Bnv z*5ONa`#>Eyq=qiS7%9uPg0g&c&V*fOYSCI7L^IF9r2I~d%+GA=8ouC&oVFi#bK#(` zJE0~%ejh0#oki=-s}co9)?FmNmhSFy6F)6CxHA@Lxn!Ez)IxpLby%hJ`uQvXK z&#!kJuRM{`y=Vz!_A)sU3|m=+xu*-*dc$)3`x?7$;0X#`BUPC3fIfMM9<|;g zUOdcwkl#}OZPC-|e4Xy#W#3NY=c(#(2tyUCJnyo#n3j<8I^9W?Mq(0a*5Jz z1fn7o#sDQE)YfYF%d&3l6#M?}Q-jq*BTkYq@F(-!4zOs3w;f6B-qh3Foh2J^>!@J_r3KEBB{e_0KapMLb63ir)*q>Q+~ z31j9*g~Cl%)pEsG+sGa$s!@;V^)hb zTZ=}t#SAQG#;S8{uL+v&pXH_PzG+_b3wF2a%?7JQ^hB7)6yZ_noTmVshJN|LPaQ9T z=x1g_;k)#*z!|IVkIXlh-pLEVAOI4#=6L4lh85}@(kiT-XEi1n+WE?Qf-=?`YzKQ9 zKL)wLqSt-?_X?VPO)YX0^y`tJMU)D|m@DlA4(Tp1w0((x zv+{ZS=5m3*p=H(Vk(rdShyXkE$QZAuNAUETIONLy4sk``8@9?m^%5ZxqwgHKSV398 z?%eQz%&uXE2Np8lC*g;2``)F!<=Qz+RYJr$TMzUn8vI~g!3f=qWoUPtkVX${+nlPZ z1E0%cB4Ysxh-M$GhqW!$%z#iHGN*(m8-B#h5}zyY|HAwNH<`{qAnR=?{R~H?5iNo{ zKcaqQ!lQe?)26=Nq83$O@-E#hsCC3wq{bncmvb<EZ62?XpY+98U_?&FrEca(+fM6bbnk;!oRgitYv2_Y zG#<83d!Yt4XDZW3bdJVlcRt))(Ou^66@9Bmc+GpOV2iEKt%lQ!WvZnoIHZuyYEM5n^l>7v z`BZ-NfCdC`U?HI_rnrI)axCJRgSdC>qKWQ5q`-mRUakx@2TdZjbz8HqTsqXMoZav^ z?tmYi7?+B@Kj^juf5~WW7(6{{FtI|vki>Ph<%hu~uEMRC;B8P7#SmSUVy*_F){)F5 z;HI$r#jqrthLUp-a@aT{B2L=#H*g%9;6b4#UT>V*9B$bPVaT`8ZdCMyn|V~vzhU{zFl4z%+H28n)Rl}3(V!!cRm zU1q64OMfWIS(GE1o55Q0v7RV-oJA=AC<*4Y6Zh~-%y(tx#lZ*0)XzwD_xf)gleQY9 zOJXn{c%$f=&>>yd_5^EUmJIF{6u(q`RLFFZk@b_qH%IHOe0jDt>CLOL`h5c9Guw{K zhga}0%~ymIT$njsXiJn-BFL#fMvcDGw@JrkfpQk36-rOZ1z{*|pIS!*DCoNEETo|K!EAOzZy=P?T6g-_*6L6%X`~HN#SX0Xbio8V7`IO1YZsc({SbN3TGvDJS zz7e>2st09rjmu~`c%d+RgGRi`u~dXATW!NgoyTO=4T`OwQ{)6UI!9YUqg8;ocr4)}`AXB`Yf*Nw89WvO@IrB8fNUawnwFHjYY1L?6VlLc z>r!+gfoKTTP?E&S^&<6Qa7c&oEQJ2ayN4n;8=5yyymni}@ z(=()DheQrU(f%|u-u5#N*6`x5Vz(u{)6JAC-*cGHCu(;6gWjP;H?DqyQXxSR4a1Kud%3mDHs#)e>Qt*uJU}e1_RNf{vh>m9k|XYA8yDO)ibU z8XpsAYXfOLvXhG-bWb0kwr0i~=Ki=#t5BZ3Ro_#Brtd`7_vNx$uvu>4s!kT)PX=LY zcfh;6*vM*Hq=G2QJ8i{p%)z%PakxV4p{=(TrtPUl$UObX<`COcwGNja>q5)NQl@bcm^B~S? z@538o09%{@oU0nEm@5Jxp_nTx%!%dB*s1)1DeYw|GC*Y>5;?_ITJ-AjjakCdJRV-L zq5KZKYNXV(5l4kA@w08V5aH$?DxaK;O(`h?MWmqgsZ#SGB0oV3mUbi?W)_a=wjxu) z`6i&>*5T2(PvAkgKRR8K(KHopLSB#;MXNA{i3h4=T^*p))Aa>s^J1fd1 z8eEi=Kv^KzsC8*xy0Xr~x%KC>>l{I>voyY|Wm2Z8wQkg4|9kL`u&~i`FM$*mL2Z4=nPQU}Bn~dLDjd{DHJmR(*bSc_2E+ky4Ol5PCa&VIBVg z0q8v&zt!i2OrC05$pW(FJywrw&hpUuA(xaK?lq&Ki=!11Yz8tApkR|uv4Qw``T6=! z7&`@zPWMKzI1Ly_0z%HT+Wb{R>Udz{Ul~(p4sF+YYG*T1tdC{iu@i|vtQk2_ zNIwC9xGak4`jGBSFgMvW&*D$_VF<4;-=)M+nU@O(W_k@1*x~)mlvTs)6>-C_8Qjua zrv3Yc{&d0YaGLK)u+z71Jfm{chZw!KqqOjr{2s>mC#K15DWf&32INmq@33IzLE{9-vrK`YM zcK(H9m{$HVE(>^mf^BD#LY9H2t!BY0F8pl)SJ(D>KYG;l@sG31ygz021cw7VyihbP zF{K2yXw>#1_wlT#O0IaPeYHWOCv%ZJAL!u-SnEm_Y$4$81X&LV+N=apFL~o0Z%15e zVDS!D%WVgm(qn5dvjIde^tSk)IFz?{vEQ%`fskRsA+>R|8XrK5Y6A zN_tf-^W_W|Awx3YMmb_^4GEu9o2K*M8f{mr#K_ygK6!{7d4JJY^s0GP4e(h?Hp0P_ zU#SSTkc6K=>A46doM9&sVy)^N92{6w2Bc15us%w_NZHz>PG63(94DfU!Aq~Jxf^2@)#3b3vZwyMn_$}a&MD*(OP|}29 z^T7dDojiMazaxPI(%A6b)#RCe-XOwr(ZQt&sxDzJ$dZ5?D5 z3UUBWQJc!7$fH9kBs<>?Zn9EiescvNVdKVknzq?fVQ7M9dyc3#YxYMK)DLb1tzEkt zqBTU}5OeQjUO#2s_sM}KOzgswsGx#!u}=N1uOG7u&HL&6ik6)ER%+VReDhLH_B>tv zSh<4Mi}$<#+eT0V9s}@0V$;PIf##$aNaJ+$^g?dQ9V2eZ>>^r#K#B+7O`n4*nkREx z|1OXIel&}p*^NUXo_V3iSNT-#uKGUOPMCB3WhL;|GlIX4lA++bHnQA~D)+im;i6pC zSR6*5jmpxff@<3Bh_n2Q?#lNjKJ<@CG*oGOp>OleQE0QA_>}6gv9H?ImMT>Dfth0F z6~UzM+Q{3u3Jw?TSKNVxgC^I^pAbaG%80s57Z?JTn+}<3Hr^Pg9*W9H0 zh0yLq%wLUgWq+IXVNuz(>J*!2KDzGU#nH(K@HYIHO)1%j8WI2l*|0&$Ajkg58kpf} zKngw>=@|pIuEyq@`MN-$*lsU^s>T3E2e2BydZnM?YgZ$rQ{t)rb~0IoEliUVsxu~W zq@9BXTP&3-1`EQwO0VN+g(nnI}EKaWklZ?g8b z;3GP?R6|_XzKH{abat={!PqGMpwZs?1Hh3=)kT5RS1^rVjAqm04?e?59|X6UeMb z#fgF(a}X2(^s090Q*Is?0Lu;o#L2H-?A*r2#zqHQVt%Uar-Ee$8B^Oo939@q&mTBI zCZxJqYYvRmQIl@3ESxaitBvt{TWtK+`jtjb_LoM^S>1N+SW6A)%k2s+>+Zi5nCDU{ zF`^tlK$(+4=_;lUQ{;&eM9^Lo`czkVSijRa7#Gkc@qd!^+s@r|UkfW2h%0Z|$#xYl zFDRE=-F;ZwVNtttcOdHfrlL<`&)1sl!`VOgY8wY(B0nGfPY?aPr@I?F6fVdbuvgf5 zNPd2P=~E!j1sBK=r(67`0|uSMtU+KbNc~cXtxFvzTDm4ET4NgJLFrava z!E26RHyKjWFn%hTDm=|b5bJ-=+>jCB0gKXOTjqToos|+aMnA)6=|>^W_x*>ri)Nm#rB-onQn0YBs+Szi9Sm!);pdjFbu*ur$fHDJu z2?J^~R?)uawid;_mvx8*My7elJ(3E$Vz(g<2uw6f9I7#-l-?F+pr#Gc zIH|1IGsc+K^DQtjP^HjV)+9G?kX>o$yM3b{HyGe!VPE3&SNdi)ss&0Y>OMUaeTkPW z7Pb`LK4FZ4oCRPx6ChBJHP3G19qS?x#sTq^HArYJ-U?f|m zai~WLcHDpGPzIxEN+^ug=I9PA04TJ_6gK^t8%Gz&q)Bz->Mu}d+kAcC#@(D|UYSY%C;56ec*k#81syMH2qK(D zQgFHRktDZSDyA`hb+48opl1OmX&Yz&)7yf`w>P*=my_C8>Ko@yr$oVbSDUl5{65j* zR^*5lAT&w+K8SpL*PuYL3MGsY6W%=3Mi$*AI=tnwJY}OR)Hq~?<0uF{D$tjaX7H1Z zE{k)fAd84AV~0V2=&oF|!Gl=jW~&C(Bm7O8-UziLYaT`$e&vMIu%!M{?xGxmi@Ye+ zkscZuFr?SX5!+GaUNF+0wX@cF{a6NH{QVG-xyOu}VL8k~nHkT{6s~|3rX;*jg|X~v zu_Z=aMckhJ^|EQWvk@BQ5koZ0lmwGxedwq&;Q&o_WA1&A^GtBcS-Yk_sdc6r-j3do z!PmJ@0wtr0tRJMIa2jP&E2s;xJt-lRVJa4iCyp_0PB%09%ru%z3DKFs+rsW4m+2s5 ztk-ay4+TYj`8g#-)gVfsYO1#>aE*^FdhAW4wb_T1DxTVU`Ff)n`v-azt3f==3!C)T z{hN`b(W*#7yLf35m^z9E3Qo-n6c4v#9QHGOunwJ_aR3?x@_x#@maE3L8k4yR?XaEkkQx1bZ{+)WFR! zc^-CUF(3ck@A%$00D~xNp}kTSNE= zMBMz4q~kkZrI9;6mamtWOM|^nGYBb=n~(n7`^oXFNmYPWp~v*F0-v?3^){rTL8RdX zWEkXC$RVrgI@mi7HD!cFM}k-o!0U1G`&grV7ExI^3c>zN%b~SwkV0JZjr=f- zKMVZHk{zqI9sTeESLyy5ubkk^6_n8`$T|`@^#ko73vLD0rN6*9fa8ox#Y$&=s0`?XCl7FvJj7AQ$4_Gc zde-qQ?H)LgyQZbcg~|5s16vlir-P78=}>d{O`S1dq`#j z!Nj2VmLxJwPMoP@UwP`xgd;r_tt%|N7+;nXnc|HCD6V_*ElC6jY6p%wB|lO_mSqMA zk1G!Z-he;$6zz7f8p3^zficLUB;Hn6=-Ec+2&XTr+M_~pGM-*KU)@NWvX$xANe=g+ z&Gvgvmrd+fq!4IG+(V?V@6)e%Pdvixi5TQ<)Ycs{s*+gq`&HGA98SMI6JQj=8y+t> zHg;O34Gv*K|GX>_B+DqpC=pIM_HXYCgv3b%3v_z%F+&@3U3Cx1w)}WiLZtw~N6Pp{ z#fIyo@hz=FT?&z2UUupu_{#FMT>^Ar z(>#s;TFm42LcQ_9A?3N}Zqg(;4?bJeR(Dn2XQbR%=gJ3$8{JN@>2=||rB{{WWLXpE ze2U`~I*`NwK56v(5J1f$8m>O!Cyz|98NqRnRs%bnIErNP@H78M8q2yFP1yi70s5}| zy^E=x&c@brPonJ#dE$)NrD*Yvpl^Pbs0LND5(&!IOd_a-&e+kMkIv#yj99mNs%e<9 zlrOfKpRLjZX6P(=$AQZzSKNBxe)!I3=2S#4Z5UXT3O{Te9OzPE6udLb4A_oS^zX~7 z<0q0aG{7siqg)Jg0V`ja{(ZL^o)V`gB7*&`xa#^7N<5BUIFwC>z@+Ucgy4N(C5vto zRHzyt;~#;d2KUM-X4Loh)gk4A2y{fAV8(`Jp_Z^X|DX3@gFcxBfI1vd9(k zjU!KbvaCh!x(z%qq=;2ab1T_|bgW1qTg45jIQPGI89T?pPw$d1)s7}7J;X!V@mblb zgK=*Q9Lt8Fi5a<}(KC<#?0CuC8{f3*rgY zGD|S99Y+`r7QLg_ai>*;f|L?)>H?XEhx(^b5h+F53%9BcxIzFugtw9o*_(*&(Rdle6@*+slAKecDu4m_?4Zj_PKq$~y9Q z`4=7_B$R?mo6!J5PnZ$Lz}lU@)v9#0+2?I8B3VV%jd!_265f(BGhTkOAp`bGA_}-?J zt{RvabM>lG+bm^8Ji&oP7@sJ@$(p zT^Mm1g$KfH&4IIFLIISk;LaijZ}*2Px5aLBmK3u{Dg{#(mi8Q23|lcf05&^lLrGR1 z!C-4Hz4JYk0r0sWC|~^EWWaBVIooY_en`E+3IEOqRGj0;RY-G`Q;vd-EJxVbmE%tA zC2;tksNhXHN%!7ok__cgV|bT9n27v7E=3OX0iuDHG%YBJ%Q|s87QHulhBM)98Ip7@ z+ryLqF`{jAR6k_@I!MykbVk@+9iO+W zT8Y4#!9*g9yzeyQY1Fg!CHV+MfCAz={{r@p~Y zukYrP#;-v?qo+&nSIP}+PL*_6iTw(Q)UtI?V^M~*D`|e;^Ceb7x`AStRZf#Nl|_mV z0)z-l&LaXyIhgXvVUe7EORFAR&mx6adK%D-3cR6S0VMQO##R9WP=>k@cR`EoWCZr) zj3H06rY((KXldGn5F0Usq*m;5PP~) zp(_xm&EqwzIZ}t3c6u010bbLo8qLwt+Zsiz7Rt1=3K@}r&R3u>}uYGTV)lvtA`o~ktB zVH7oK)qdOe;CoRL!K6pX5Yvz$A9=DAi7Q2+E3G_n!(+F`J8ed#LXs$j4d+F`se3?w zPYOoks?Of2jpQ~{yadNvcRe6Wui7iyJ^F`hde6GM>c?;n&P1c9<^H1Efb_rI72Yj9 z1bR%wKI@iS<3xkiii?#>Mm9G-age!eC+ z4Eio}3E)||I|H+rp;b6=dlF%doVKcCEl3zj!Nr&0d6rKumSI?G3+FC(2WLpT^Ns0m z2EhI=Xi|Dg7c}qJQyrzCLZ$A@hf8Z)-+HaoP7JF31!k5=&1z%K?nRP755L?QLfiI& z9tRHtuEFudjk3e-;6gndcV}qbTO~l6t;dJo5s?LnDvnjNWRk}Y+tB|hOLEV$L z<^TaL7TC55F1ZNVyBvvw1q-B_L2w(;ErUk&n02vG$7?3}8eE|yS5`OnF-pawxsE;AcP*>P0yP|%;sglBDDf;NCKe>2+WRNu8C7@NRZiquz7YTmPLWl( zT@xGn@tSKtJq{@ORXNn27E!f5AkH~EpZD_q(X`<%G^|*-ZZ&EO6{3=W{c+dv${Slp z2HLhtpp_uc$i=AI4)fNW*;W}jOZbbLk6xKV=iC@P_~3BOV!Mi7;&F4%#}SY%LHUZu zLdg$IgzmVwpBHq$5Tv?c4&@$Qm z4y*J|`sX{^IL2*BZPTWYL; zC&~f*@7FL)lP9Vo>kswMOcQ(g?_LmQ9Ei|(J}Q+MuxsQO@6;_ZoUbE0hW+AYht#a5 z(<~vVB%MgE34Do;`U=4_Df04dr8oTKDL*cF_e43)W2+2bMt`m4lGqja6RCt;k`-O;QPY8I6BhWY^*5*WXwgY$7+heG}rMie&JmoLhsXd>;a8l8D8=Ew_s`l&n%_ z*vn2+s^HaC$bIip6M7LeVo%Fdr&uip&L`}>c1^y%`HX-xXqMgvC6^%#-Y}{OKajKf z^ti*pdd{h)^00OWuAtU&P*k%oqSS?VSyR~cc)nXxHojuu;y!-0*g1(m%}gwMk7@zLFvS=ErCUn zIl*1IjbV*-(e)`Y3*hC#Fn~?y8)61W_~}0AqwsQZ#e5Uqa9f1?+M?-Ie$pH;{WO{4 z{aKCaNFagiIfuLRg*Tq|h@FsD-WbHiY&7Sy^$NO;r3WYbFqs#~Ho+CU4HM3{`(we!Ny754RCKoS;lR2iIG)D2xk9EQcyv$QeQ7``sYogk?@yA$?i9>7wAU1lR z@f{zuRgBsmQA0|>A??K$#4N2eBJ>u?t{%`PZQFDoi$+(RCo(u$_i3vwQ2fuc$^Jf2 z7%9CUaNOU%z;52N1$Qd5;c!jJkGN>~(+URHWP}p&fG4i;6X`(2CdN*C`w1OT1_!hW zqJx|^){8!rG$yR}6=futoe=t-%NcHY+YZe=vbCn+JK3dZ(cr&$+R~QCy?Wt1YQPF< zw?@su&pQ>?VGqTkj|i~EUma&5IgBM0`!W({Q3bAsG}%D7Ky$){vos83r4g}@_-Zau zu&m74SkvUKCGEg+R4Z`7d@~HuB;V2y403n&!j<(Bq!~h9l6)H3TXGA)-+@)thc+9u z3RRh#4Mc2(YVvSicBRrrXiDJTucIcv*fI&i82`e#(oYHkR>7hs*Ih!V!tOJ-!xaN^ zZ!9ca8)j2GasMTZ@64SbN(59lT0)`LDV0Z+M9I6WTT^wNV+QyhUSPtFVoevA34 zmMaoQ&mbZr&N%4CF9-S)YYAv``a->eef*;s%lV7f{CzFBMSXaL>|t@Y6*wkWevK(( zWCg7@23ke5oCqPo23mxwDVuu||MYow`3c|6dtL%A_nPk&IcmDR4EI`*v2i*>LpqtM zodGwB2P$@a(qGD6>CfaSeCcKIWEZi3JRy6PT5s^AHPesirM`3T??ri?y#395FEj^` zz+5*xM(5n^CC<=N75lzlTYi}g%Xfi^nqcnP_k&j8>Pzf&XAzfUBO9)59YIOG0c6&^joM(N%u4Tnloi7{0qFyVur zUl!-yvf6(3JqJ5@cF!}muT)8mvg-Gq5kL`Q`G!wgpOt3VvkoasdvlJ5c2mAIPt`V7 z4;aET%dcqo!7xSd*`n{-IbB*wWjFrg{@Yh2j|bY`AIv(A+J^)XlmXW%uo@|WbVguZ z2I&x3t`@iVtNS70B)QW2ktSr`dmamgG~g^!t3-8lufb(eErl-86te%=UN`|NoGuMga)ub*vTc=`5_vX(y<4W| z`tkMM0Wdbqb$MJ1jiPfJ)bz27)LjX^O&>UZJj=(lI8zg}Z;(II2sra87(SR!!G+v=;Th@l=^vu4cBcl-GAn(4J$VS*eM zH1z21xcy7|F?49_U=2Ml0i_3H5Sv(Z^5YdctK8>WS)FOqi(KTYqxAfzrZ*pJAIEuT zybW^7(Jxg$D|ID$wxfVV#8o)sOck6ih`Sm+^`rF_gcD@i3%{Z<`sIe8_aBa-_y`lgOCv3btL8^EE=e2D@qJmPZ!mZ^%M#&!Ys}K-Fu70N^9@G-q__Ixhf)w z6W>va7MxXTPmO+Po8nzGW2G9=M|Ko}`i%e5szCNdw999{21yuRFN2E7^==Q+b#iN0 z^qg^QGRfj)yWV}f=7&TuYKqFv?Yrrcl$qBZG@D5v;G&6Y0~tb$Qnei(4PX*8NH1Uk zb7q09PeQ=7QHPNKa)>LBFg~fP6G18nL|N!Fa0+|pB;1Sd>TsEaG}alsI;bebM@vI46+-!^&P?L*wXq~S`-+}IiN`{DDclx3qF4UMkbsgwR=yAO4s*EbZ^ib?5=)l0h zZ!e|6h+3?$#m&tPD^LINj23Zb=nIcx9vGuO|IyNU#JJQxmM+5`txE`D6oZqDWR zxlH-mL>HY$Ud5vE4FnPmK^c)l*Y0+Ac*VCq7R0ny&Xo3@+`&oji(1a)>#B2V&$h0a zA?+`Ok&#l(AwV?e8w{61MkqvPf?!KYaiPB|h#rNHQWFf1+2pgDA}As{#vc7-`|5X$ z?&vnQVfqqJ<^S-iC;jG+&w{2>K@npfEQys`i%ZN2I28XQCc+@o;;vGq>>L*+tn;|LqM{dP^e3luBfEF<+cTf?GOFJ>j;BA4B(k zv9KZIZ=Z^L+Y-T2Qa-r%8RpL-4T%<6UI#@Ga;`DgFH8=Jsd({gahPh6SJQ zO0iyk^y}%%-rlXB9>v2&pR)e^digjo{gCZ@q2uF)dj*XAhSFxBe<9OU-N@DAR>Q6M zS{kbRq2XAztl?c^Q3OgyKc`6j{Au)pU5G=xqP`##!xxWMUt2<0AFjWf_2x!Hp^Mvg zH>RQYrM|49&CpTKyU%X>hpqYkH*^WxKEHe12@-~DYimE%19K7P)aTdN4{2aPITeT? zBBB=q)8XnqGr>nZG_DIhTBsyU8#-%UWgikc{;_o$wrZ^N@X*3Yv6#Vajw!z;%<^Ik zuL-5%6D6>9n38JSvk)+8t@Ea6=*u!ubnkKrv%?Z0QsdTGZ{79lLKaUfMZR}feeADb z$I;`osm z_tb^#Q4y^Idk#v%dKP)pE^=@EV`6q{lrrrad;abnM4Vq#(nsFq;XB9OzJlRSu9*@c ztkk9mmZyvv#D1vW24>dn)I%jp3(;4y_vAlvWNbtitl%sv3)th_%=GzoD5(98E?D4w z@laJs*Y|_c4(s51Hj0d5&T@l+%&&H)XR{uMW|zIY#QFU)$s<$Jd%kd#8anzxLCR|! zolVCGN%Mi@WM+PK&1e+wY(g%0I8&RZB!{oE`riL!qEYu`h?%C;<3})D{WHht=-aAV z_BDA*JJ2FtzGN7o-&p#Z=j)G0re|jlADAbO<5ZTGqQUfDkb}43{$+4bTl<9w0q~8= z<|)zG*IwZ@%m-(S+f6O&9Z6&+0K8lTkrK&zSDDA}YRsHj?+vL!Ga)%o&rw zIP-^5$QdVgu$ue(Crj{_N_)zM3}v&j>xAY?H?VKM=U#kLKOqo_gI1JogH6cmvpd&{ zyD)NG?An3}-Y+6n}nfkb2>Y;Lb#5=LOMA<+*|RX20P|y)8;FNEIdwnU$rt zK8l5xQ~6aRm&`l`!~Fj!Jx~t(Bp>uxyjFVq&8)Ez^dyP%2`+zio=>LCsjT*oDn%xT z4_D2Vw;S4B9%B~}uY4Fq<>fh<%QT^@H(N;}7t4=e&|+z^f}rC(ZT8h5;~12}hwWH; zNA*_K>*TML_54nNE~Z>IR1<~6kV7uB8Mp3q&M%tb316sAtHl{z+p`40K8?UJ`bxKM zTly1yh?49{HwB`qcf`9~(L?*`TWNVFhH*`SrIz3tZ6n{auAR;#;UvXqsE4Ue{qPj!ug}%}cqhsJdu zpIuA3liDHWCHBIxZ{&+=)fh&=d+&R}+{2e|T&anCA7WLj3A3lY-@qkXnU*L`;ex`u@;F3%N{~9$ab*tuqd*W6_P`XFw%C2(R*153CuXn6XY0URG zi|D?_Zl`~FfBE`oEopt`qT``l-M$IBG2`1lo)N`v)2~B~XDap`7C==+1;#*_l-x|ogeIDda>HeOZc=T|Ca zjaL%}`mH*&+OH@0S$+|+J@GyOF-Ziazq{1d!!`y3hASf96l)IgM?C1{U4iSdrG1?C zj@@CotP;!?!^)X}roAnczFVU&(H9$JqNmL6vv~AV7Q!^Y)pcR_p7VNNaekih{fX&V z$WXf)l|Q}UDvP~Eiq_c=e?g5w_uQ|DMu(N>g5g1dX)MH)y*S=4UWI#-($^3OAlr{} zXrTm|jhBRUmoD?}O&Ca8uV2LvVr^1khU^o|l+6E?{|F72GI;g5I{`cO8ZjhEnX-TD|TS`Dd1f)?yT0%hL(4nN#U5e5j z(v5V3)S*L=?t0eo`8rJ19|96RbxL5ojuRR_w&e`I)N1EW^zuw#%nT+)_)2M>tBY@Aa(Pgo!&4gcfMz zhpc4$T>ncQEv&>2(Gp;S7R^AQWKnfDkR|LM8si1exB7aqvGMW6rkg)aCGS?1#m-kg z3-8t+?g3y^iX$4YG|~0PxJ|2(0+F+3v~-qky5}iByn>yb1W8P_g%^*}9gMGceJ6eV zW$0%(Ug()!-{}7$J+I6{h2?4dK)`<5In~73EpJlg3E%N+m!9N^e%R$mDQbr%mPG{~ ze8Ugo%&&RfOQ_G%^Rro#5l3hYL4JzIa)klr!!RXM#xUaNGJVSC{qz`Kt_7RHkXC7;1t*r_NAm_jIEX-D2rUy zXiDf28~ad!$!W_1rB9==q7+qvo8UTWE6G6NMtOQ7WK+a#&o^H*`Nx(}{Q-;uJ4unW z>4CP?L2Ioh?$qQD&eX2qF(K5u2&P=Z3_v&l-FCmPJpoUQZ&yWGc`4!!nskM{3a)Y8 z3rwLg-eyeyDLe=0+eFpOWK1e*e0vq+sr@Bnwnx&;=lHz$4a57Zv4j(tm&j0G6~Aa1 zgaS_%YGrd@EPeDKF8eEvHl#L>Volxno+B3}+*9NzFKFkR8Q;tZhp(=&nwIEZaMNzk zfN^uUCV%&;pF=2>Wi7*(6f7T2R2zxrU%@JcWO)7~Bz|o2{%;7Zm3f)Z1l%xZP^xUt zADz|Fn>4IBo+YR9LVMDg6(BH}Dt-M6*AlIED_WGg+F z=?z2_h~irl99|x|>kq;9Wbs>~U}x%5&h7TFnYY%N>wZMc1aZ>td0M)VkARFt0-oMt zh@FykM;dO~o}g$3`UCO2H8gVmVRXG$<8^g`o`TqJvwWLg^#d$Amv4Dd6LKmbVl?R# zEzgH+|J`phmbgdO8&c894G#y z7t^5-US2{62jaC~$7DCBhlI-|Gp`qhtGAB^uAv6K-WN<%zizQ12M2Su3XhzegdwKV$J*Fdl>Fv6P%}-QY_lQi@pt=7ygCi1 zBi<`I78#U!a%o%Vl=?JXiB+@8VcW5|4JH;bmpHAwFF5?Z%S`U~t}YWTUa z{V#XIPybkpykH;nfBA#avTfpJHCK^sR)7plNA{UXp)~Y7&pNK|1gmDMp*+-vi(j88 zCc%v%fT;1*FdDsvtv&7TOZA~T&eW#b@AeoEn;Bt>L}(G_Li*!Xc^}ZRiC(3Y-Cp7ap#e z-g@EDx_nSoLW$83jl)H?(bmG-nL(SWZ6#apQXrS2LQ5Cyi!BvceCbR5Eh1L-u398X zR05f{LwVuk4-ty({<-S9z*dxjLf+o49B#CCavU~ta!xP3um)Kejkd-7?0#U3P((pY zSbmn$b@IVK_avA2CdU83hkAcq0=a|^quD3Fe6Op%I$fnODls9zGsoNohII2g;WH)}H zV;)a*CX1ZYMx2yI5aTJs#LFUare{(c->&({8|&U_t5qn)hMGrv@#vpmOr=U}yI;%O zcwhH#2=9DboI1v6M^i};tXCBa<~Mhr^GoAiNjWaR;by)Jmi;$(h5k}#Jm|hh=u8l{ zAA=sBx0?t~n}DDEi-N&)7rYhAd-C$cT{{x1g>`&neX2G_x3g!Zu=u>SY zlo9+B7AqAqtkxS~$e0o-ghjq8vDrxmPoGB3ZqrnK>e=f*7do>j_I)f)k`fv!P*h8< zi!$?8jaEcYVz>T$l+Uimp4Z)m5o-Q!(!#}cIM8D2zhbeZ`h3$w@gvR)O}!&JeBBZh z38g@r?*w<##Xe#CHY9A^(8OL!U_x#gu}%%|SSBLv7YEsbiW%7|G?GV+s~<|=oo#x| z;$AY}dxcV&*!Nx|zI7m%f`sICmDdd7+JSb z80lr+r86R8rzzLhEkFFdlPyGP>@V-VJD$@<9r4yxU^)ZqTh%iv&1gOV6{LtSvCb_k z+*CC}=hw#qvr*w^c`i_??CZz*`O2_Dt}tc3MCaLg64>#I9>d64Tj|*}?%N!;v{X@G zp~77&yKGUyQtZPVJsAB4K|&M_3tL(&_gro^stQ*uCYY3Z0z4*cv}5e}Bg_W>UPEwW z-T?kkT~STUa`%nV)^of?f6`A2bl0V2cb;%$!S1oSO+pdAtf?a~$tG!we2qZ$ywe>p(muZ?A1`e)Fj z&P`U3!*_C-?+tMOTY(}nv=1PGiwG|GT{9lY=)fr_cAg+&_e#CMmI z6g86&)!VI%RUQ5!iH2(~W5~fO`&DrkAIW@4?yq&r7|ofy!cy;mubeC!_d-Mdym*Wt z+XtP*6}o{3e@Iy3l3%b2w_qj;FIOrgJRf!NSQ*cw_rf3ht<&@?I$l4#GwT5&(e)9P ziZ?!_h)z=DEw2S7Jxr_|WfI9UW!a`+4GOHY8`9vXdIDc(;`eOC&SJ{^`mpX5 zzq3pc3l`V&zx?SU5gC){)@~9hJf%Bo=kuGYSnvA88HOTq`!26t-5NbGV@NCAezZl; z|Js}Q>Gzok=ctH2W236Ad>I|DObYWGz25&tiVdsP&CN22O}86;FZ@ij5{w{EvQV`kpYg1L|CJ~jTl;&+eJl++}DGw-7c-@A#cnwxr=&{kZE z`_|j&Y$*<~6w|T6hk0RoRGOHpGG8D83Ym;VUD$mxZef=S_0EEP7?j=Nv)JXMow3Pk z9$5ZhMdbwsd(Onv*tGD zqCk0IcK?e^?lVDRaSUuJKZXzj4NJ3XPf-%M1#aQ+Cu89ee&^Si5JR5IZXPZql`gL1 zZTst&tVYk;a+OePYbj+3+)ocHHi1Xt8>$SC>HxrzU z=8T$~Piar=L-h*fVnMgNq1KaZ<8TZv8e;0EYkn29zjyXe(&7H8U$XPFdlu#nv9dX(>O=H=)Nu;5V0d|yvq{OZ=p?I%pAie^Q;5K zQKpcyAo>SWngcvcPbB!)7(Rm{GQD$#mjww^s#!_%CjVh>`|(FD_B%`j)AD9Z688Ow=(gzCWYkz?_W0M2 zocfk>_!307sDkar3O1;{DtNjaEwy!E&|eSj9a%=+{pM<%fyJPd!A^}^@h@rEX6uJFFa##7(<_YZ4DL6rDaq@pG)`% zgE&a2<6C#-I4Ht72Z*SNeiVlcJXP-dog14_GM>aq)QX~U)PC$BsSlm-lu5E578UBl8=`WJ@8Yz)G5cv6g)K@BkK#E zw}Fk>NO%yEJf>44jELmHS)F{?)U21&VyZOjhuOYXgbIa!w?-+k+;%@x!g?jtUph1D zEkN?sTa>zqLw`|b`q%ShQFlQ(Y&L{-^c~{~8BMiq?UQ5NCni1ic0oVRO!sI?`btO| zQ~$uE2L61rDGJOELU-WbXLm>l`gNU~Ioe`1eLWWBMEy1NSi=cLhBz{+U3L&NYEy#y zl+VE%D|6}xT?|tPlRG&Y?6<>MQH!ZBC4s{ol=&L(UwYc+LwDu{i_f$0q+gOi&0JQ- zW7b4cSL9`u7Fko#0(X;>RHjmTo6K(|OUR1ief*n!xi+|6E+r!ebPuG6vsZOj%bXWy zqm?OH_){AqNq` zbk&lb>A0*i=9^E?Ao%Fl!#c5njNinEhW1si-5Y+A*~3OdHyX4oY&ouMY0e2UWGXiT#U~^X;sfnRgdjRK z0+%)I#A+l;Bq`Jmn-k9J#~6!kPSzEW;$NMm6ix1Qjm7=S=DfC!%g6V+xJjB#j5TtU zG5+`HE6#TTeumOF30y@nUjGXB~Z!{E{hG%%iNdE&3jmeA?zn zAcAWc%f?BsK=SL$WP)PGEvJo`3d^{AJz3eNz;gCpTxJs23)c#zTn_bAMR~>GaKutQ zXQFsBMB#+LUJ4Q!l1#il17~(MY<3Msw42_hQa&lnv}K~D`zX8|3#EfiT zTXS@7x`#U~{y$D_fV477o-W4=z4%d!*uj-xwGD~9?I$v=n1SEsqW;=}`P_FS3&tWy zw5sKEeqlf$jFSE+vQK^BMkn(0O6+(sV5F0X1TVVqc{rnY%6;>JKuT9Gryq}&B_e;$ zuRPc@dK$lM(1AiqK>dl~i^jWqWmja|R94dnevA!@TTy;ZpSz>483Oy#!Grc0Y{UL-K%AKqR;qcdyr&6l1D)iFz7F&*QwKLA{|bUK6f{Fp@a_|s zb9Jg`S}N1~)P#j+7+;%tVj1-PPDo6T{16w1kb;K`JeT?VxJ!SG9TNs4^fapfz1UzK zzWzk}PN&^nbk@N}d$4H$4}GW~uai66)@5b3rg}v8UkW0XJ=)9utX+Z}F77KxQ$XbV z$Td@c=1V{ot6Bqrl8F(wXDcI%caGxq9ORd-UF;CqpP1(vlqKM`b(d>z$pwFuZ z*5yA@VwhUynhV}|r=>;`eydrO^;U%OpkPYMArRV@RBgi@{yMR7DmH_)=S{kcOnXIbVq-Y0VRz2McKXh>!i{*-xoftIWW|=niv%=BG#gC~zo0UA` zlvsQ8ojnCgkHMB|x=)2&fb-Ecb^s3d>*qtY%NuN*r>up8UPt8C zeIF{B3wMGn;NRunZ9xm=rHO z`9^N6LvUzB>xxvI`sP2|$h$7OKP*x(ry&zzCH9d~in`^Ir<3win#{P3c++ zqADyUmrSiZ)b}&!NFqD-!sT$Lt=JX;`(2e{B;ROUN#vau{eE6fsuwL|CH$-WoqIQdCRCB0 z;Wj)UsRTbIVL%#|)8blT-cylYgy0S2ul7tSx|?(h4h3?#NoQvh4stcKRvy-H?2!qH z08^d5X?fI#&+*?isaG8C8;|lh^rZCElsfYZ8~SQ8KA^Vvs3k~iBI-p&#T^jU-u94< zaHu@bc7CS4^B!6z9_BAgJPfSp3c2}qjvONKC#SSZSbhu`STI9WMR<;?H!N@D?P3h+ z!Nhm=2D62lzZnzMLrmW@mr=0TJtQ2fy0jqv^nfbgT&ex^+zlI9Z3&GQL!k$nQS(ZO z@Ekr=?`GyrV;sy}QtoS72DBC{Jj4-sGr`_XUZu*835>kb@2)Nm>zCUrOsy2SUW=dK zGaKudWNMDtK#^$=;+fdI!~zrWQMtZKd>N_?+I~Uc@uzcB?kq4Hpiqb zcpmzcKdYN+-f&HLwjl@Y_^BZ&8Sk*`GbL`dn@9NQzZ637$(lS1wKUqb1P?!D70cAo6C8b`7{~4U1aG+tuq=4Z2 zoLEwj!(rcVvhTw)pYxn_-dX2&S5aNhPTyH$cL#-AM$uv6J8pga99=ToUiNVWziRXM zPukG%I|pFkG_PM}cR4S^c%w0)Z+IOV`8wP(l@W8YkMuPd!K=2`+<4WFAM)AzGi0S! z@1UyAws)LE0hdl?gp8uqqBO8=I5vv`zXS7lL2L&-)JG+W#foljKuX2&JGI(al$kaC zM1c2JvO4M9_igJN3>OrVh8u^+B275~+W9EOW~Y5CqGX>RLp|S=CS=Y6WU`ky!$g% zU#$96^{voY{OSork5cHf&~hT0C>hx=6xV_k1~W&x=G)R`HWQwUVU>5TGw_rjGR?gs zQWCauZ64r+!=5S!Zw9;@!OtU4{3w;zSl0iR)^CSYY*!3xx9S@VN?GQ&wtqcy^p&3l z?KlQAhRAP$gbe?!pc}0xR$ClAa$oN1 zKx~J}_b>LnJ0-e-vLaT~F7GgNv$a3z{gAQf5&FDLhP9Ol@8M?KsAzEMw#+H;gw;Oc zu|ITbh}Fe7HXg3Qmn_9H7VV=H=b(>eWU2Zg!%k^PQ`l5s8@oL8wZ~VXP|=MJ=jm85 z5tO4S71U^!|Mhu`zj}|n;!+Fx?65Z%ks3XW>5fc)B$4>jA-BXDt$=IR!c8kIB{4Sw zEWsI~y@xPLI2BD7;Avz|6~0+LI+J}vwDoJbNIHe~zd$XszkNr#Yj?6XFcna88^|9Rb`?o(`m1GRxJIwyK}doSbC_} z6GC0g*%IqA*=m(P`5|K>*}Ub`dOh#cdc2iN zM5R;v*k=yA))rxuF58RFpiDDzpu+^-=Kv%2+bkWwzhX8qsPXZKNNpm^R9JD4`g56` z@3MPGkp@Zh|C2T0@g8)l%WGt|l~|Quda%uPKCIF4=K4O625l?38NvvsbmmSl@@6?H zWm7u~5AW^?w~B=Y3Yj<*)0};wIWggt;>-g!CBk-JZ8KOn2ZAVl|QX;IU0fK($J^ ze?lea=#Ms+_z0OW_W5so5P9JTKGdpX4bOd zTBc4x&Bt{&t0$vFO89Q&!<*mB-v;5bW74>iTLfNnxsQnrf{~>bTJ)C`4$h8Of4>h! zY=}wk*ZO6c-^912sv@+01hp846`Rc_?Z%op@F$>uH+~*p{C#+HMWm@wQ?)VIFHCxnoz(`@qOTZ&zU=L*+J@XksO&0 z?7TBil~yyRIs#-?1$qOR587&(yw+N}8xdszm-{rBpBpa)B5yMd;2&(7TCb~^btdbf z&Vu@YOY+x=MEfEs0^US^oA*}-{fG$-%;`D_RJvHaVK+i0SdSA+B@%)0(va$!?kRp3 zZA``C7aPoGk3?rp^!bgggvBDJmSxQ*YA-_soROS*2K_^V*3~9|K3j)irR)z$qD6F( z{$11>yL*h3#C%TrOW$Ajc?f97k;}%wrL8btufEQ#n-8OTL!ySu-rdhHo4c})aQVy_ zLVWaF$0}264{PNq##_Q)U2ZOy0$IVT$(UWqGscSsc{dj%8&CQ-E&@W;{+n{52DC0m z4fDuta18si`S8+GXy~hJy+>ZETD$}U4;PJEXja&gHSQ`u@vfmO?U=f8z3O&c&-8de z5)NeyRnYsUB$>8K?4aorouQnF?@GVqZkS%FrG_v@Hcu+cg7#XzlW>|!ue2cD^zB`2vQJ7No~UIX11@-JSH0ZEiG z6YBWhKy+O4?yj-70!YoTR8jV6Ch&cvoddH_9ze!!X#$H9*%;r9DSTizTWS8JPV1}N zao0AaB*Gy`CRoho)79!v;yzw)o00RqV$khpJKKa|ej zsb?ZUUYV%*s^`30ZFsq!(RR1KGVpoJ^1RVg_$aL+n@{$1Mkg5se(>C11vaL%9*0mt z#kVj@oIoVvqc8Ch<^{nOc8VG+7%YF%6i*6Jfn!o~5T>zUf?|KA@#S!|zHukTj$2n! zu#D(Y_erOha@r<5Iz5MZi9X4V={nCQQ8As1x#DIjqrnZDe=c$D6LIb3?IjNy8_UWb>J)1Cr93Wgb2Tr&>njC$H%WHl zY&eUwI0B}p3TA_-0x7d_YMR}teW*-Jo6SVa=8+81#kbnzwS+6=c=)?+q`>foZr4G_ zgN5K9jQTfxR_Gan>usRH$nktNUN-l+%aq{K<@=>}>`znz$`Xc#h7l<#DUVKW|5z=6 zNxW=mLPfSp?jE9AMqK;eNJwVf4wg0z78fh(o&PlJB5)H+Bgpo-ubLu!2dJb0g_YPy zbbo2&x-PdElt^v^?`VOa5?)u-!$lxf!ctObK>Hv2-*U1C+8K3qR7j>RB$YjEe7rU8 zww7ptq=y44*wXQ&(GyKg673@W=^0tpe;3)%jCl@;&=4RAC!puY@)0bTEsO|_acv4< zoEeme4LvrQr)&gCDJYOKHd^Qb;KmFguqV%PqyYR0v~3jQ12gWdF#`!BxEo+598c3w z{aY*TkJU#mkfjQ8;mrB}3v<#$?a=@BPoJ5Tw*E*nJ&IwHCjIc2oP*S}sbF)8ILYi_ zbYUHHh~^R%JXNXN^yTR!`avh#wL5f*|3xq?KT6{va?of4Zv206zyI?Q{{MVJc6t(( z0S>@8Nd3JIzz@UpZ-E2M#&U${U$|c3-#?iyiAc^5!2War|FP&S!soVUjMP+nY2ZR8 z9DxcUe>U>DcF|t|fD7ZP5|ak;74CAYQI@Nd>ZWkf#<$}ffSfIUaESz}0xtLy7Z(8} zR~|I&lTa9WI@%Ld&Qw$@Kzm)1H+SLupn!S0$jFjX@#Uwy7VJStN*?#@{we*|0 z%)Gz9-)+0J3)$z5<}Zx|jv-+J4<9}HU@c-hGEHN^O>uKE*|Y>OD6;bM=)imdN+B3| zFL`g) ziMs^Ub1kaMq6nSv5d@=x6^BG3q`9x%9h;a~0wPJlz>Z`I#m6Sw{yg-#oLPuUPX2hg z+eD9KeJ*|E*+cjfW_|sNQ9bXGKWn$t9$EvyZKfA%tE?;L5?011)VH=QL zS~2S>i+B=A*MaPB2L~c#N#zw|NGDl_1%85u=&(J-ha|&Mg<}9zN*fY(J|Maa-o*Tr z{GQrh3arn*@tmcoZi@pcX1I5G?Y5a4j|_A5NLW+WHIjQc_FZ%J6vIFB<=QY^XaQr? z)cc3Jle~`}k${hy_wgykH~^;Zk{>R2i0DFXRZyrHea!Xy1T96m^aCI-sRHx?W9IDQ zBOCTRsh>MKmTGBgj=o3z!xA1I?$|}=hJaatiHTvrieXY30kFrMWV^cW)xi^H$PtVH zUBj6a>i`w*TDiJEML$g=xOB_p>cEu-Pxc0=oYwlHks#qF^{+0UJWrXFVgN8&VzaE5 z`=j;e?j$cji9%i>*WjmXPgexJBsUounG%?23EcbYDFbU8u0NS0yGWe+R^KUUepQX8 zb6>f0J_R%|bAkI&298YpC+gpOp#>)Q7J6PAwpE>h0Q29ks%)YoQ}eA8x(^wreS&+? z&cOl^egC%|JP+eay;U<@a!xo~*i^%Z_Y3$1$spEt(mvqk4(eU?1O1);ca9}k)zjp_ z`UL&yL{6TIt$|bJ7KA9iQuN^kmq@l%-j5%xfH{8wN+iE^(HqV9jijB~S`rWffTmc~ z3_N%v-&v|1g!3izbJ>>NO zt;h}Gi#ap~7$vCN7^z;Ym!nh||HTua6AHvGLodbdPL!-3nq=G%T;lv@+cg7$y3gEc z*70KpN^sjS=KU?*lfQExfj??LTS);pOr3f+J{(-!XD?p{gVO-WPS7sk76A1nK!V2B z^!4l4h^iU8Hh`!DQ56Zx1-!`2sw#3q@uR4EyTVC<1BvMg0a6?jU_yWrPH^PW;3+me zS;`z*lsLNiY5($*ajA~O#!zOt*srv8pPO}`W=EQ$+BK^41iivefk}OTznn{NRgCJ)1|5zxXU-YEKlEf^-ixt^*oAd2k7n(V&r!;6soIYgUzY2aySw8(~ZEdY) z8F)#+{3KKudnBByR-jjj1DgH^#ST@28@W6h=qL=`7zyZstkwN(U9j{aX(ES}?mO;t zdc|N7puuxqP5!S5PfvXWnBZxfh>sB%F}tT1fGxEM&@ioB)w6yNifXb(PmtFGOKYUi z_SHhqO<9m#HFdh@0XFi|filBZmrhprGan@IDi%zPWKl{Yt$HmoYisWwA0OLZtTLfW zq!WT~sNm$}r2pfKuheTW3~P7y$66d@2Zx83raE)2|I-P7G@>$?Xq#OwontX6>}j=hk}`kZR7(2Yn(yzS~e7Jo@J-SO5(Ua93Co={%b#e*^Vy*eW zf;6ODoHhaMRt+{%N^uWeVWRQ74Q=fp01F19!4jD#>#bC-C!UmSO>Iu-2e56kIBtWmaB86?p{8?u?PA05-@o<5 z_S;E-X0MgZ(x(3MVlaItYr3Z&G7bV3E1=iKOHzYT6VQnvBOa2;+1=kSZ(u-+jMQ0; zjdbeb$65>c^|(m6U(Ii7vH7f?fWC@k6g$khOfufTj|w@=*j6v5efSFqJuoE17uYre z(c zdNTK{$=RikK|06?JRSgOBE9j&6}?Sr{~f8H11BI*wgBe|EBUYo{AhMr88NUU0Cq<* zwE&!(Fy&8klx1XH#dSPVBc^|zSf@{4EVkhbbO%dzsrICgsA=Q zA)qLCG%SF!4p1yZ6PA%`pB^3_)x{)u7>r8KtG8YU%P?a3LE#FcJAdBCBLhz;Yg#in zNzVKJufAoao|d+@>0p{TrNCQKq^Ks;e7fKhnP1#U6)Ao~SyX@U%aEQG?D=9MU!XX4 zavIhPuU-41VgfJzw`RF;-=kq@(;3_738>$!J)sOOj!dnSoAi3%NY>Ln5P;D%tMD|C zp1PZvnH5Kdg1JRb@djBO*x>-7n?}SLYtrxoS7#(WI4!y#3Bh+}s+Yee+qHuE;`?tk z6AnsTa-vCxJbq$L-waY`KyxzzvEA*kUx?3sUd;W~FUoe?1~Cf zz>IBq6K@a%NV}ckx8N(a#=bNR1%AgAFxcHdwn%*NpkihRSY?8+qiJ$F0K50BH(hLj z{;;O&9s6fM#u$)bZGKlM*?mAX!;pUAGt1Lf5THwKrXOz4R2TQV(@A>MfRaKKiOmAa zrAtWt@P*x7hXOQdJeqD4MIyQ4;jyuR7x~(YARla7jW^={m<|dY3J`}TjeNw9Jj5HX z4phXhk4C{bkcqj7xIIW!X6=OHICW7XmP@>^J!V~JY=Ow^hi}?#02yuqQtq?yVsCU% zBLNR++MgtFw(?m$DkcU60*;g(nFrqk-l}@em0IpuJE%cEf-FX@D=sb$qfq-Jpr;v6 zm%S;`E3~bm7PO%R*5Z%c?@@{4Y{^mj3`vnkd^p)|0XaI-o6LIrDFm%j$%3}6VPp^X zep?rt0Bh%4M`rcNGUMlNBQ=r?3TzQnWy@8o*nX4=$bvF!GOxI}s|~P8b5-IyX?zZP zSTo1puAWZnJK@uLY%tc}T>L?zSVfP|^NWf)fkH6IULZ&|2(JYhS%FqI4YDo^_KKvJ zj6gyQK|%@fx@>8I6-bG!Qb3XpLF$s%{G>SOK>FO}^_w>^2zk;H?B9$isf+=KZZ8gJG>!QCIH*dcON0v;l%Qlli573=^_K0gk`p;`k59z#!wz zk=&Z?vfdKAxw@L2Rd7J%YL&+aG%kMFspO>!ciX9)bZEmDG9Jr&mKu@F5ORZW3oLrr zI&XYS(4Sf_OftR~9k^YPf42QO&Z@7kA3a2#h9>uckSufvsd6v6BkFbGta}A(&U=<^ z%ulu2=*M6Tj1lmYi#Tr}<2^(Rpss6wTgQY8SWh58fRqFB;1ai2PRKy{&YH#-E z1g~?BsDRtF)jN!!6AuF!=OQ?-6wtdGc3x0hTRVD4K*i4nK7l8aI>3DzeiV6RYhx1t z%1vN$%fBmK3dF%q(lb3h9I&kh>SSZEg$T0zwtk_7Pi19g1lgY-aBx5wZ997)Eu*|M z>tO@LdJ*n)FJ8RBj!XnohKt-i_6U!QYs6CfMgnpRWPt#(N(~JS;IaIHSn)EU&zWq> zn|v-(9QouflAd=iEM7m6{|mTBUa*6hr^QhZHWP&%5YSiE#1XsnZk@jkr}M6ds9VPu z1jW?Tf-J!MA>Y9bcX?hPPlC|Zntpp_flSB`_MJMO#{#new*sYo&ERj539`;lcm0tD z3i31PH-**t`8kx}mLlQhA+ugoQ1Inae*R-t2zZ*0s2%|?jCueAYh8WO{lp?Kw}Xoa z3sE^~_$QySBSV9Y7d%^J3XxY>7#sN#xLFVzQ%_Dk8qPqp|9!=|6e$GGhC@+K_p0XW z=m7Q@;DGnQwr9(ZYgcNQd3guzhvV^=X7f!4zaBsgFKHREOpRs9!Gv9W0b1kDrltP` z*AG^E!DtmxH?7lFvV1Eiq=S}^hi9B{>7?!mGBAmrPl z5D6Bb41iR^zqxF>r3R&p|FlgfaElREqhCz`I^9cL4C+HU1-a=GGEQl#iw8>kW0(LZ zhb#*Dm8U_rgWMznE-MVM8!|!Sh7*W_bQ|EQe*`$vUR$7}kBnqj`;k6K8t@&Qz`ZH6 zNJdFXdZcxO(S;&IQ*r$Pju;&wfU@)2%?fksmNEUULvBtW?+6k{V&z<7upZmMRcDq@ zgBU_q0+0b)+WGS=eAb#K9&K*nn}EGI{|}ls+2;TN literal 0 HcmV?d00001 diff --git a/examples_deepspeed/universal_checkpointing/assets/image/uc_stage3_char_validation_loss.png b/examples_deepspeed/universal_checkpointing/assets/image/uc_stage3_char_validation_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..30d6f72eb879d5a698fcd25545da8f59cd0fddd4 GIT binary patch literal 41972 zcmdqJbySu8_cnOw4ryr=kdjb9N`+g_Uq=uXsm}^mo9O!@+|5(@w&h95MO`4H1WQwglD|5sQP}@W*yD} z^KFwtE_L$cY*ya{_JhBn&*;A;lFMZtY^)RwtZX(7wp&RZ&CVS3mGSk3kjvq-g>jKQ zZNOp;Lw>J{C6q;8BIl6DVTB)ZQO~k5;nVnR3ZFGGZXIN8WtznhuKMpU+?9 zK@lKtenCtf^v`APMEw8$GJ6WLvM5YU%w;B}_?aeu1`iL98vno5ItTWT9z`m~(x|v+ zQAfwehi7CkRXff-DEf`rQf&Bferf6Q-s#l0Z|3b0BwBNhCI?4~Z#3>l1)QNpdT-(5~l&I~4@`Rure=5kTy zRiopB~ZVos#v$v0JYm=L94rJlgE0%rukUijN zh1NTgo3{C6hsYr1!+7;XiAnoxqpy(n&N9Vdcw}T~-^#eK)JydKenfS(Xh=wi;KQCo ztqB5BQry%&#Vl$6_cKr7uG-q$TbDlX+q-Rye^?vN#jEr9%~RfB+8nU#S>f_4-JBOFZ@v+IH6@bS&(E)TwzjtT1OyiZc)7S*YF$_7S6B7dMK2}cOL}f9z+??o zSgCsX_*l#~c;D905GbFGAYqtaSqazsaG#Bp6_u2fw2ek5iOr;?q(oTtrL+zXl6^4x z6cHOs)O7TNK`QXfxZ%t1@1KOcw#`s2zrKC!EVvn48cKkV9|XH3Q_4H!!eym5xuYMfe4cH1yaiGrdc2Fhb=HeTBQ7#TjC_4bxvtP50B?6cDR{G>!Qe6k75H|Lj^ z!(jc)dtY#moqd1uy_!T^T->1IVYC8TzxZCR65UN7x9AmGfd^qQ8^|{nXs{SJ2Q+p_ zKvYS)=_@LA^JeD;leAw=htJW`5fW#7{;UP3Ts3Oz>mz#3sn%BSxz!u@+RE_cWOvZX z&8_2TcNOErGaZ6PtoQDZY?FEqCuiqYGMN+2#ZzLG@4+T=0y61FFJ8PzvEdgm zV`w#J^wD~t4Neh;?BYm-MV2X6jd?rn8RRl&!5feGWF`A-<=_Pq|GJu*sDtNlxEz~LY5b4>+?s3hH@C5g zYG{zc#>Sq9q0k9gVDnwQ>b(EUu>X|^5iBZ4Q(;0Swo|v z9T0Yfta_=8Yh7@TkB?C?F{|0~ks;=EadbkMxVX4A0jClH$3L$;w6ZeqPZL6+vNfZe zm%7-5g{k1nS_|*xCT3*BCnl0N_#IV@s$zu6vHj?O849Vua;YnRdw)O0D*&csaS0A- z{OR9=%hJ-!aP)B71yM1CB@js3zJ8?yJV9A{0QV4m;BUy|<9PJO@7|_ z=hblUH({xS{EywF{Ew+%zlH<{W1Jl{pGQx;gj4n+O(NP_XpcvFR3|lA$7ffpkevQeOR%un!+T97+~) zLVAp~?0%XJi+Z@zCvbt8`6*<9OFTS;uSFd&HAlI(;S1$WO4`)^b_5g`!phLWaG)eP2XmPS;1cCbgsI!KTUh_;|`q@14Akvl7#0{_+zz?vHnW41C*a`m1iy zNWvgmR$X2FD?#Rr>p@CZ)*xZHT=brWg@u8#va$!P@}kwk!b0(f4_AF*Q&SwpF^WH{ zs;aVrNfs#gTm6#p&z}}M2+$QFA(-QHb6M^l9@enwJAZ$hnyP?9X7zPsq+{bjgI5yr zIRVoqSZ_I!o$&Ok_iou1wwaB`T`=>J#r zdvjpmKlkCeX&mj*`&aVC3WT(W85tRUezV`c87`(#HA@59YL8L4gx+asksWVsy>+QO z>9TSK#1ASNX{UJXt|tti0B`4mEtL6W-DzeH-7!!(W$4qgEGi{{2i1ze^Fc7 z-+vbwW`>}-T&+VuFZ}8-oPz_g+X!9pXfU%JnQ1taoqceO+5vrMs;Q_XO$JO=+CGLY z(c#S~?U#nklIO30Ud-v;caBzxCOG`U^pIfO0a8mIG40LpKl<^r=O(>yP{}gQ116r&0 z|GWQd`SWwt%Dvjb<^*Z|lcwXJ1zGGV=f@kR$RHT#>3iY6AJjPAIyu!(fiU_J=s>Y7 zYoQ>&mWUf+4e8IZB3cNk&MUpt5b72n^I2t{0kq+(tg0G@QRU_3MV(jK3}e#M(=W0K z%gM{Lk>d;c9Xdt-R({H8Xp!Y7&BCZYtSZMvbxA~o8s23yl1Gp!?SKEL)S;g=Bs;G1 zUod@wqM{LSJ6YSh>q_3}!A1>F?*_m9c2EnFf z3C*q^+UR#g0Z?;orv8)Hw{IUGAy{N>ZS8hL3IHTFDneDp*Ra{z;W${lgzzHmcc{5( z>mwo}fNimn0)Z?OR+&5?-t_sF}LKNJyxszrWbc zH##~RVH>!_bSOBU=pDdZ5ai!>9A{O(&dhuYo3zsQ>)YO~a0^|32OArk+s5_%tGfVO z`{X3bWc3*s;JyjQoCOyJo%@E~^78ULUw?R-xd_etEv*A(dc<6 zF^&f^S*yk=2`n;qq>!-7l_UMg(UA;-3&;wwTh^qcq!BP|1&o06GylhvWf){E(tOSX zFvD*<9^LkiNl>#rda-}`xHp-1vcda0qE$GX7~5{0zppg?HWBgeoo#eAuz6&1?5fV+ z)zpYNiKSD_Np^L0!DL_P9;tRP`m;8S1&Tliq}*ZnUr-&W0WvQR&MdS?F7>~>ez^bL zaTu^GGKGykdve`Rnd})0DsnN)o}vNVL)wS~pz8+l{)FvVJn1&Jyu5s|ac#xus?1jQ zyEu?2%s}^`6Sg6LtDZqOrBziTUovmT z0#z83w^+nx+NrnRF3 zyYxZh`w{C+J6J*4DVx z)YQiH9wb0k`Sz4^gh90;qvX6B^!hc!kHIW5OH0eb!ovBUq{|dHQz~>AJjwNS`mYg( z?rlsg!9v!!tPnn#t_c=*{b2?RDCV`zDh!(y2nT?vr5^*Y5g-ld>L~<+oWS!lgX+gG z`&|nM3sJ?Eu-PMkej=AK_-L*6asxs?0Ya6hM?Q*&kl3EPLHb;(vyL1hBHZeyf{w(^yvoR z15a!m92Ke{_h1lmurVFE1~g z#L=4Vv7sSZU~0pVsi>%^%zzk8R#*|i8sLGJg5X+!_o+3zQZh2ly^VpL5}uoJaIzOc zYFHsHF`#Hv9YSEpscYaDcqo@Re;00{5%z!UdWRF#$@pkG|6`2Y#XDOA36A zw`rG=YklG55S?4|DfiHS2e3r+fh+7yz`^mn`X*c-W9{EWx zj{o z5(MSVrGON$1})I0fWN;#Q49kAM*n04y1(ru1qz_4MaH5zvvlW*i|WaJ8P^}g6ciNf zDb#Zx43o8$U|~CeFmjYAQ{rP!Hu~NW78aiW@k8&YKSoe=Gy!CIjx%ceuWv~JEhIFb zpVjwY_>(`zdr!C1bUBN(Rjc?g?e^|y1mt~?TAd*7r%SjK^6>C*;J)|`V`7%_Vr^2| z&<0`*>Kb+RuD2sk)?g%@X@eAM)9>2%6WRegaPW&1#yB<|;R57Ill0_&_UzdmsdM-P z4;eLRW{~{Iki7w*o)3LeP|yxB{^{uGsMefneq$31vD|uqe!9jPOXg%-{#~Og9vSnC z8X7xL`rtbkC&EC{($Uc=^F4SHElSEF9S1r3-E<>?&eqKbNsG)4gOLW@zZ*4QAuCRD zx-0JL>RMA*w+Jv(%zK9&C?m%vy+D9kK!CWMJE0H&DjTA8fO%m<5&T2i&LXj}B4jFA}Q(;C{d zT{f%RW13`UOgiK53N)$!X`G#%P1d>dLYjzxKqs%Lhyz@*6>t|q>azjf3tqd{2Ap|& zH7n2pwl%POETBEXAf*AxGhIw=f@2EGZWw$d*L`E$>u84qgjn_fHB55Tn!x!M3>!N; zz@Av@>gpWhe_u2=%OIP}@S`;w;4K;&8ZpmJ#7k5q-rX^u9r$HT#@AR<;yiS zH7!F!9*tqaFnwbv04 zm|`Dca)Y!AA;byj2BF4<6lF$%;( z1yC%Jx&nerBZEOcBtd=qBY~+{up4-G^oX646IpADLF&LKFgRv_u{Z#RoY#if=;-NF zYJQYmd#jQ46r!FvXfjA19~l`bdo;)_Y(Gv6vghKSg!_6J9QJw85rQG6J$wGV+;L9Y zz|b(c2DcRIHTWbXollu0%`7Z1RaI3h?Z)0Ke6QM@;@XEzs3pk?k}W9|O^E3PQEDlHP5fBkxVa(XZ~G%`XFC_R`VNelA~d0sgb zmxP>!B>~W63$W)L&mL9|$FeQkD8jOWk2B+Dn))+LvHxMo})3wk1iOK5W<_jl0jxvhUI|`?sWXeR@iZt5wwcV_qs*wQ@Z!=wZcA&0U^|~H3SD@ zfkB;{Z7t55SFh+1^%(RlMB(n~?Ue&Dm&ct+!yV>w5d=vw;PHqY1nl6FfB+fr4`gV7 z-#9kIKB&dgTnzy}_>@un5;J?f%5zQ#Sx=I?nTm*6SXc_*yyyoaGEgk?YMEXv|N6eIrw1Pdtv1;9P77@~NZAKC zL0NhEg#>wAs=dV%bTe0Ji!_VZ_JXJ`KO2KL{*jYwE1M38><>eV5Ahrd?s~Nb#--g5^h|u?f9#{$<#qeh8sYl z{}qi)?7j*SeiVio@OLCyfw0=~(HS5TF0A^CZv%Qaeg3ovG@rn#g-|GJLahZU#1xy< z>oI;WGjBIDGYbL?q*MC=HlzjMQV>J0Ld^pQU3&8v62F*Sx)k*1$WftJ|SI{jNefR*X?SxfF{t zY?L?yQx6kH)dgbwP`RZta7Kf-y#OYNg3L#lnI z>vZv9|8}=vNwTJjnoFD-vkEokKpOdFV+1M};X$Tl1 zGM5h;eHP%_*Pg=n;^EL)7Pykv3okZ?dz40VC@3GN)LvQE-oDmE9L(RN~&je!;6RTf38QydhxK zo9dGMCBRDL^i7Utjky^p2{@4Hf#k%HWddyl1`!WCw`FXss^3As*Z@OYTl?|%YBec> zn#f4y_{9s+26=f8Cu~^UnG(8#o0>SUU2|b2f05qOBz^C#F~$m9{zLN%Zqr^}31VX5 z))gL4j6$b^ARJ5#!0;SA2aC0L`ZoIedwbhpmm;E%Gg|o{J3$;(35rq7i8UuDr_6Kf z0it_Ag#vC$3FOm-#YL?j9v3cL;Ns)MPUvn2>VV`9geD;+S&(e6TFWArxnxS{NjNz+ zMvbsUASGos!JZUof%>0;J`p3GBL>V79)8oP{8X39jSG}Nzk$@&aS&-iQAaT zgu|F$d9iu#<+T+;F8!t#_NV!GkF-h@?o(}YCYF^6#xdL=h1eYe=mK)f_IJnT;O-@J z$aYq<*c4P$VQ^bH5PKon<$4{x60t|{$s%C5m04LWniQ+#GdZn)%n~%=8yLn7UP7=7 zgy5K>uL1t`^7UQnh^E{=I1q4`1T6#=R9|1;;Q|o$@wW;b$O2#^FiBp5HF)@NS;t#a zv2DfWO>XYfxHx0{@V&n|mR7ygi4U4CC6QwWlgrMW-uw)I&(Du2E^hD%e4Nbv?WMYnKGMtfl=>9q4wEZ=Xl@<%0W~@LN}E6I{FY)1SEn~HH9x2mXTQ|AWr7|gwizd zR0LrEauO*Up(;B?Osve+t4!XJ+T2pi%CH!*DAnhsSa_cdRgB;nKDAGuQRhxCr@Oq+}k_&{2cdk?3wzKnSJRf^^ zw-!^R?^9VB>htFr2ku~d303g*HjhLg$oH6E*?=pk%*Rmx=@caXn7;9d#Sa*pK*59M9^+jUsGQn zPxH@x??XXiREr>ST)Y~TuH(>4S6W9$i|_pI?CJv7!c0_@SIF8Lmw};3+vZH5KJWg) z){@Y}+ootsX;W$>+kgD1{OL*KP^~L%G((Ge7O9-syd2sRv`=BXQEaGBC`FRaj>TjH zxhQg48wwz16w4c*W!R%eJWKhYtdmA}FLeh-w}zfVtzQFujfkkvZY!`W&Tvd(jEb%X zb84z8&~+ePeAnD+j1H8}Rs5n2} zCF2-VeH@&l8qY1#{mq&1!$UDXV>+$}1y+4}X<}&KCm|2lJ@u zkux!c!x{hx#N^|B)>|gD)TO4dF<4z;|)mBk^?VOzbzT z@d+i!-W1Ym(c{b)l=9Nj>a$l7`WA_`v%Ul|&nvjfB}$(qspMYF!eIcT z1|CRy*h^jh1S~8XjN&f%D3C^QjL}y*+U{#EfwD`kosP|?ooMdf$`^o@<^VOt-WL{bzZnB_3 zV`#sPQ#Lk!oV-*>;OmQrQY;WE$^jzC&0h)S?4k|{cYy%Z+Jqx3Bjc;3wPE>{l7j5) zXwb~qq?_!#sU!o>h!>ZZ1T8w&>>M=i0l(B{}A#DCT3H|&WgQ|`#Q1T;Wjf&L%1v}muDbmFe;X2Kyhm}0H4^H zz~3LMKjSaf$k(sEGyEJJ7XgWNl$qy3CNJdz5}Pi5onwlK*fRoBM+}IWjT<+3d2?&# z1+Vc)oc=xYmuhG*=7`bt=H@?%i;LrJR5zHsSyNv>|K}&`H#`HCCr>tStE;y~l5O=F zD??RENa&UyBh-q;pL6O;5gVzii;6nj2Q9Wh@29XBH%kU)N$9JzpR%eZIEJ<1QE_UK z8OZ^i3>UPN%lKkLV&AM zD>h@j7l(&SollRc`!njvo0lcQzMr+JxkMQ+@n4GtzsN=svv{?NDLk{=MN;(UvdmNqt-H#MYt3BZ#&D&|Sa zyi2XiMaDF%*t2RVEFuvKXbL8bo{r(QUj->quq-OY>H;-&wA$-&s3I_wlsIrKma2Z- zq+;n?UVa$;S~ungJ}r`{!Q-Lq;zH4MyxtsATG0*en>lmaZ)F{S6Gp*F>P{Zn{>Dlh zD@=)2 z-QnM;vJVVRNujB%avypVY#Uu4#%p#N-D;44;2`3y$V*FyEf(79_S#aC+xi5TyEjZf`>$5grs{1E>Qrl08Xu z9FQ@}vdZe|(fs*y#vsQD@N)zyw?rRyh3b7U25!Roc)ErLvS5%o-S*LYjuK}oeC(cq zfn;?Gk&e~fPV|HbjGwyeZGOJ)qa}g949WAB$w_Rm$x!q;Jbv8iiJ?CM7!<4kzL3V0 zEW9;`gs^d#z$h9yTzGGZzAC#IN*1}5qq=4e*|9y7Zeh*pN~_~EJEYNxcg*Q<-}@aA z(;L5rnh{cfj%f#{(Tk7azrKB=oy1U}-1p$o-+gry1p0(Inb{?B**5oQ4o0SKKPesTpa=mL%W{_tO zU7(HiV`T*x2!z`Dj{OQvMwz~fm6Fs~ue7XBHYOZ{n*#2kU%Y*%WnG>-5{nhf`T2GWq4(4gRbiuJpaWpr_c=mj*eQ1c7hsW=tB9{p;C+?@btS>h@ zqUS=|7tGpmj}rW^@a{-d&-ovaz_w}afBBK;f#IFJmm4j?{(?3`LlTlLF7B=$)^MAXx)CTUELa5XM&Pux=f(jthvr>D1NGEHL-6AMx_HF z?iLpw$4ls&x4h3RowwZ5U$$IxFN|EP*hG@7(tb5tr=XsIO}AkBN(*%meHLDQgbylc zyYS%=t~}@!i9W%EvN-b(O@!+t{_UAa!sS6x2R(Nm9$*o$97%jFP-y0W(6_{$dl-c% zdODjo!A%ljmuNKR@_Hm0{~>l_@CgO9?Yy!Z{XY_thyu==5H4ww6EU9JwhQzBv}E+k z!)s*@g?PHu{39QUw)xpqXBYnI=LjAa3nG*Y@4mI$b?mjxL$Gj9#pdCC$z62{tvPRR zU8(Go0@r4}u{;$^5S@cp`X=;j>wz21|NI&L^J`$SUcD|gjRzTEN?I8`ZEY}%nN^KRz<1x=J4zN-l#*vZ8ODk#rEJl4zKhjJzuHlHXcNZoRC zqe39l!RGn^-YI~*2;!f-0sX5*avBZY?}pwMM1lvzpbre>Ti~irC!w!MiOFU3YkL*X-~9hcsbZzWrZE3=t7Z$xm)@b6-g1&xj0_Cz|{&$$D)^H{O*yz=WD= zd1O}#02ln+qZf@t8;R5$?54s zehe4|St*i7+evdB0t6VYtK!8Dk%N=h63?J7*q>@5aq~{Ry_ASlG zh?evA`~#4ADd82coiHaq+5M6cxD9;b&u>X`QPF`!prW9>f?Rn3I)e1}QH&6ow+U=r zOzupgLw7&iYp#TEKHL)Ule6m`F1L!d;^cgr&o2O`Nj%-LUjK_XY)2~ZJA>;{-@g8p z-P9=k{{8?4>dBMe+v{UQ>C*mMPjKEut6-(236sk}!HoulLoZ;{)VWZp^ax1oDIlTQp4dK|n_^Ka|;o9Hz(?XZaADlZ3kZ7fo&L z?#0gC3-mse8IqnK+>CyI_oO4G4md8w zVD8J8Tj{B>stm-8lX&2DE7rn*TC>KVEg6NJ$l;L+HqN_;^q`3aeez6w#gX5!MlXwF z@g_GSg7dx^hLpEjdn>sX0~i@&fKbl={Cb7%#-=Jzz{Q>T(Irr z$TjiB3-}|Ktqjw zwEOyl6Vy=YoUE9baQAR-WUq;aygVfsW##<*#&*E1{haFz1zI5~D)Ggia=TUv0v!}j z+G2e$iz0_^87G^Ou{-GvJ$~0yAds@cNi_Fz^*UsVZ`k3mk@1Gbz2kQc5=mG;nC?L%g zgGVEI<#bXtJ`?(SO3tn34|Uv*4^IDjd3o2~p(@(ZGCl-DfWA*gS(z`CC|ZAhy;VV0 z_6#bDN$KffWGu6(wNEv<6f7)44#iN4GA?inMl1Vm?JSrmdXXH}Y`YM33|>0Oi|K^^ zu~BPO_==aD?0@Prvn6*{@Ue0wnxdovqM1WqQQ%|KN$yL6%3xw~1jFk*7oZ|sSX&Fj z!1966mRWO~PYv>AS3(vx8JPeoC|UP1@x>9^^AlGh*v0&=^>J~;k0-Y!Az$Nay5)8u zOL_r~{o%g&Wr<@EaN>jjIBA=hAj1sC2e@o;xZRrkMVE`}7ArZcq$CkEQY`=x;WH@9 z8~Gl{!^=w-<5CgJSFaW*#Q(B+u%V=8o7wNz(kHppL6h_f8ou#*E~1ngr7`f1Mq@)c?IGr&Z9@PHTAxF4d_ReRUDB;JYQ;ar0l`2u`^e1caaYI zMTUk7zfcj%M-a;=FgL!*X^VXM@*=4Fs>MqnDsyYU4Q*dQ*czOFm#9j+n7|r3y4$l> zjUK-pp_mp@T}?_aEO^y9)k*jC?5`u(?TDc#ftPLCtWgEBBeLM+kDo;&-et{(d^f`U>&yJmv;P)g-N=mRrp~N1NIU6Y712kIChXu;& z7v{W%-X{}9kgU=JbE^TWE>a1>MCE7ZpNbvF=;aXkFV`W+G*y zpo_rB&EaVdtQ!6P95N|$!j<9Kquob#`M)bB|HCWM*Pzgtn7B@2Rt$)*w3#{bhU0^! zR2Dr?8)m>w*YK3`!ZVe9=o@PP?2URV#ffHM#eYy;2J!sVF%8;fMT=|cx9 z321vPnwp8GzuRi`1x(Xk8k%j*nxIggo*IQkMV8+xI&1tTsajjR(cc|6T2_nn?9ZW9 zGN?K5+|I#>38X;f0`=cK=_c5y664w^UVZ+>f!8+p;g`@$8ooYYZzB{CnBXD%s$~Mq zQTgxw6Y8dMuo<&24ggz}IjdCmf|5ou^Cd1(QCy8osV=ZzIh~zc1aa~*2*wIPc#DL) zEQFv&n$Vvd`XRD+;-+e1e2of}A@qA%q`ncvZU$efxb&I6e@$$)5gN08XJfN}a`fdI@; zSJLIN*SJtNF&`}yCETGab4x_SpK~9z6$h2gQCVr zzCL8J?5yLm0yA~jmoy4+`}_K^z<`#b_xtXk;r|I$98Uf3RMG8~iG~JCb+yOgN~-C? z@0tAjGJ-s=zqaO@LFEgAy)b`vDoHWuf3))H@OWdraTC9#jJh#q{S7#n%%R2~6&Dxw z_AMtgUW6j$QE(Y2&vt`!P!2XRsCim$jISr)aZtr1K6_?@j?voRm*=F}Y#?eP#AA5t z5k{!tkx?tfjCXwe>bBZIiKS&*-#Rb@E#Nu@K@Nq!7;fi zO-QKdbwT@xsDXfO@B3I$suO@}Y* zN#K=c1&a-sa+5r{6iBHyskXr_V)o?8Q|KpfPW|}bEAv&0_80Y_GS9-2#iOC3oeCkN z;B7?Bg)`=bd!L;i=hSrau_^eBj)^#JPTM{PrzLzT zu?HA7Li@}U(_Y1fym&<$#%LKR8mrcc*d;z(}Ka7g@J-5@G@|85Ux^9e{y7T zadj24?x%a#)$x3Rgl=W{2ip8menNqx*0z`g4h`phYDXQCPTF3ccASsBFL*US`YGnr z3DwJ4QD+I8(aEBp_!rSbxm>fvk9*?%g3HBPf8_~xLVZ+3c=!S&%$A{SdBhh}St)!1 zrgAz-4?ZX-;DJb}^+{D-y$gH;aQ?1sAC1&oCbgTtDK&MG$eLPQc=!|=OSDU?*BZao22P-R^}UEtez| z&7m`s*wfRq|JB~y)dNtkp!*Lo`pqN$6%VM0gu?R#pdS|cQ83=D`N+X31dIk27#;ge zxuhsXrHB*t=BfKsCg+D@JOo>mBiUW4XKX7j;zPstpjnkatnJueE$N>_WSUfH!&jA_ zF6AWV9!N(Q!QHFRMa8W+7+taaum52MEX%jpDcB_IAt7;e(~l0{(|>=dP8jA&Nl_7s z7yJ}`ch%IgOG^`=r#89P1gu4yvyDVIZh!|DQrH60cmTelBI@T-0+pg8_d=sHB`ChS zTws1lOlxHMvI-9Viyt{Iuo#6;nutfosuGiRg^4d@nfiV$)1^)AljSO2oqKhz^nYWfgBuS6M0Ak^i?GKjE1J#_gU}H#cIGtTFbuJ0q#GZ4>c}|JWtK z&cJczcPU@H26H=Bph?!uj1}T7;&KC%qB*!zptW5A?DZWR@ys{rp;@BFX#o@JF^bX! z&=3Mhs1p2FCkNw1|dMUX5-sac(}HYkUVR46cF!%0PKfMD8(*%x@A!A>!@e)4SR z;iG&!h@J0V9!072hv8=>BD?&YoH_aKzfS|{RrJr>3j>i##|c7? z%hP`z_Tdix#XW_-w~9DsI(`xrE<{MKhrg^}<3Vi@$eV~5(A$St1z#zBNBfU`$_lzN zm~9RH=+pjvNXUy&!WN&rb8qUjfJLX4{I>{A8ug^6qvaySVZ{qFP-HTqz*Vth>>f&roz5zGl< zLWAqNlfz}>p5n5o^}C}`2|u5hAz$_)1U}mHjynUF6Z!Br_;6(UW4K<_4Q{SHf=Wb8 z+kxktY_JvKP7Mt&Uv8F-(l59D&o!Z3wMQOlz9uY)e0MK0K1CqyaOFHc9n`q?00To% zpZPW$2VV3$@&9}XA3pRVf}U)E&m9AmQ26icWW9meajg)=L`l8<_sJ{&yu#Z=x_wkH zyiAM#pV=jZ!3bTKC!zcK{uV3M!nf}_|Ax>+3FKDd%+oYBan+x>3d4Bt-|@oztajkc zZ#*pH<6X5uBCQrJXqbcFjDCg=F0pc4W^rcuxB^HK-oB24ON%!V((cFqfr0(kP08ZC z(E@St?!UK%{RUH$n`)4V@nUN5uLik)7l7P57u78*n0V@e)001Pt7qMgKKqo}omi~luK zE}#xQn|Bts8SX%#$khvik=4jo!g)i%Elt9B_UfN~r0^2HL*f3J@8zEFWG`>`^``!N z?@6>gsN|>;PL_DJDh#R_Vq~?+SCA`mGm~)TSWF=?2t}Nk-V?=MClj?i9 z(p?ZERi0@8Cg!$D4n&{-4U!0+?_aw}M`s4MDge3!kcw1iN}vHq4Eid(8qT#moShMC z6=EO<$n;)sv6QH;Rz%^(r<~alxnTUGaJl6bqC|bxlLOP?^VPGzbMPWxJL}cUWa8eK z=-sRmqm%Q_=W!hY{aq}#G@N&=h5c?av^OeZ@nFNECrZdcyn+I=nwpw38V@x!rRZ5* zv2xe>ZEmi%#Y>=xV9-rWOXE6~r4A!Zz@lcxyT8t3RniW@ue3CJbo4Pkww0*>^zRa4 zUs_%!aN79nn_f>D8a8VDSZ0s-ihRy+cdn2WlUnPN78~-kqn_t&{*+{lV&njvKtdIO z;^PHM%22RW=YmBYo^J7OqzU9wkZp*e@%f@+kuhCF@Owhq$_KeD&6 zFGQ*Y9+}=xzZspkj^R0T@f9Q@4^fJGEcDoW#@c-pjJ4oJBnCwo>%ZWRD3DoQ9~S5g z13M0gHN)cKrtKVxxN3|^-P}e24G?9Qt%n;01e5DCcP#3x8Rb2q zMxlr+br-%j2}wx2hBJOga@nrPwj@Rg@rnJHACwe)fKJILk6!=)1TQ7ul+-8OQqM=lNP{ z|KI(sr0WyT_>Z0zLjz+JGt=~P+8=&p+x?RhyaC)j zg-2dMCpNGgvn(?H<@xmA~ZijFA>nzyB1l%U|OKt z;J&?h;)t_2BYa%^5=c zwRK8dRBRj^MziKUI#zfgDWVtp7SCc6@Cl=pes$+Gm&1{g=BHU(nJt`dEh`O z4J@JKroAr&R(P0jabLMnMiqN7LSqH!;?eKEA)F2Ic7nkHp6;RuJsDPGQt$A@h_P!d_NPQ9LDh3KX5y%5W zKye1|LA2}Y7m(xJ*T6bfEXTTs9ceyqVIMa>5Vy^B?P4>YBI~g~t{ii-GR(VoB;s=B zVtlz52oIBJ|N2$?`Rkg z3naSgAYo!L$q*wgGm{kBR?$mfByEGoa=-#ZcP3{1QxoLV3?GdRb_8`Ze3g0IC+!G-QyHoYs2z+BFJjp02RV9CiIrOtLW9fzrs) zcG`Afe@pD~%=0!f+_pqRvUzY}4^`S~^BGkWgDdS2$QAPV6qGEq9s)MU0&}yWh{n2j zg!#5&s`b1eF|!iqf{I%ONUli0i0Jo0#mB`tLBAGQ!sS4h^g9a-J%A@qfTxrQ4mJ#c z3&ZuY+F@mROQ&zSm=iYzQPU`%#^-K#4xeFAXEU5p{+n+~s`F8)`_{%&` zMPx{bM2Qd?3n4=p5{XPHk)nu1D09YSNP{s#L?Vey6^b+vWu`J`mNED9)b$y@>-W3w zzwUKk>sqVzE?&cV9_O+5vG?=&*bv23cXw}rNc8D>eCpNmBtoJ!b?95^($>TO+M)CZ zPE%RK0tIT0NZ}Zy8#s6c!8iwHv2doNP=ru%RJ7ZGS6wjyn#vRd@H1HE%gI`@*kf4#Z5xKeOjMa5DMJsXMKU!fa@Ti)@;zcsIK;Ot#!a`>01wOrXF zPD)Qp>xHWInm5=UFznhOdm!8%@Qz*IxczW_&XqI`d0y(;vQ113m*x)e+HNfR!OMo* zYV^F}F{!bsDKI`heqjm~MHtJd+}+(t`%!jzGeEh)$e1}|_S4%BTss-YDrf0;zJI5< z4YxJp!>!|F4yR8C!SqN$>VD;O6F7nD_wRLAym5I#@2b;gho;_lt{S*FIp!?3rCjHh zhdl8`?+X{%Sg)+C6rBAcvTfV8tylywz-*v6prxUaf~|+R8v=I9?z89GSgW6R_xJuZ zb@v`AuO)8B2%Z2E;oJhx;|-P|%)&uP8xCk`(U6GRJRHMSb#+?Dnl=izc2uy< zK3iBwj1Qjiu=}@)jkY&Drin)R3#Ft)XzoG>XJ=D!_VMW(9lePKVcYV^u%AbRHfAq#t+koy3DW-hSQbydWP!mDJloM_ z2Ty1D@hf`;`1z4g*tVYRtp@9b+CWBy$eYkp)+QbyxDB7u`->K4{!(z5a0rXu&R;Om zuiHrHK!H3@aVK$W4o4p+D~0(B1T_+@H8%{OsqGh>p8fBc8KJy$pZwbF5ZlV?sxG@P$r@%Ji1zy7iN@_=9Xgv`)0U1? z*2h-u9~`v*Sx0j9k4ZmVq5#3&36Qw2L2Z{O};uuhD$X2MSv5R7(KcJ>Yj!VDwJu594$t=mC`eiXy ze!v@>wKt}TYY)Xs-I$1QC2S(z;|Bp73j5HJZRlAjgl7?nOq(Y0{i)p?==Ei{*s7lH zTT1`)@AMMGGeRz)qmB&+`woc|Y3UoWeqhqTN2;gc@YBfld|u-BB1Qx!XV?DI&;Fg5 zeq4v%5toxW$bAR*reD*m@~?&M->>h?8S@Zj-9CI%gxaM<>UxO1Ywu@qoa!c`iN$Cx z-1^c_NKjC$;=HYGn#^6144svi?Y&c@r!+LOdJT{6x#t}IgKq<|gE(cFlhEv-Wvo5w zXc=!WULhgy`Rjfwimop1{>E_+A_L;R%jH-1kl7Wh^WA|Qs$LkI9==diJN2%uNJ{O2 z_kLSP35o8T2b(V+gLaKbT{w((k!TpXwl;OK2^x>&C^3$-uqz%`t!s#O@RU!gF))RW zh7GT_Co6ie9Ctqe9@G?=bYb19fo&bRwQ=8Nq`ZWj3&)6i_wJ$c6@~rcor<;5nc@p) zgETY0*Ne0E4;734XBVYM)eH`q2pADVv+mseKkBttex6?j!N_`bX^uRyvYEdh6lAK$^dI^+0V6ac%y< z=ma4l4E(oCZ-Di~LwdK`&&+UzgFThGq?2yEwCal(pjR zV~~f)7LRn1z58dI>h!Iv8~8foI*pUsfab#0Z4ZC;SG_UiYWE z?&;gZ?MC@?2}EkD6C4>?_W93Fonq3dVj+FzySfKQo z4ujx(f5c^|A_Q)6M7H8pAq7|46I=ZZyg=z#6 zA6vu$!uNf7tlb|Ew)N`}HrR;w7w5CBbOO-lPy7fnYJyG#4+sHmgj~?THY5RbgLLQ; z`|EOYa)<-y-<6poM2ZL18C>Pmgx3!Iwl;YQqe_>3ym~_uw7Pcs8D}`HyZW^1n>hc) zP7N`xV?pWNpsID2U)hMZa;e+!24w4fV0VCeN>DOezi)s4lyEi!kraYaSmm;U3p&?wWK+5GGh+LOOUcGv->c0vKV_rQ> zYgh6iJ~mrvQ^v=tD$FL!WWG$EVC1Jd}AFC^Q^7dbuq9i`PY zPeh$9aI)ixn4FqA07vCy{~LyK&#AOdXIM1Fn^MBF`^82EG;*iiX@P zYL2h>YcUzqDEdFCsm8J&Q(oIpER(h#H|;KplK?9SgN(VJ_{qnG+01Vt$!57UXQT9? zE49xCfmoDU;mkp8re`IxL6uQV$hqRQJAJ1NKd=$ek0YHtI+}ar*H_m2mOHD+7lcH~ ziSY9;EK2~Uoh?9I0VnBa=Nv_ z(r@oDu9DPy7+h=3rJ2;UQPAT}ZS6{#93$JO9qk@{!sK4cq5h82Z!$MJ^sG*&||hsqiPKO#|JKQ?;$?c~H~MjDBVnpE)^ z$*;!hJc=8y1RumDBBk8jPaAXRe17~S8w<9Df8&zk=W|$?YL0P@^i?=e5!h5n&eZQn z-@pgH9b;!KMvkgweFFX&`2)dH0Dq8pUfkT=PPMWN(AO1a3uH2$51k9nrbRu_X!+E< z^9-H`Z-8v@c%=46MH|WQ-62M9;(4%*^6%yFd^+OC6mh0U4=Dc1NCK%ve05f(8q za(+!C;tOV6MoLfgE~l`I!7ZzMOdsM@_Aah|je}siN1F>3IgyHriik=EFyE<1l2s&S zWfA1!%z}{+ZI;%=AtO!FTt=$u_JPA@zS}G;C=X-X<=A4mk+^*aTzP805zo3Pk-_S=}md^4vjhid&fh5VDToIbUajA=N# zxTZzLHs6g@*0fPp?Lx~aW4_)^tdu~ekAE-586bvhgQl_ZM#KcdX>R{bNq|0YvGQQ= zQf(jY+H=}_+RDT_$=H9oXB%!eL?5~zA0L+{*jU7WBBm93%{jJz|85J4qx`j%(#J{L zGfq1Eofx)Oi4u|^U;h%?`uGx^z+Rw3Kp5UpJioXAPRVE@#k&P(PeK_3lUn+&K)|^yK!XwIj~0s1#}d` zq-cu}M4*5Ggk)u9U5{r#-8i%t2tY<)IB*$x4l5521Hf8)+Sq6*Nnu{XstT}M{1Z*A%IpXF5r6cb(>u^W;vI!eHD`@mW zVfnDp9ok}4VFCe@Bg!LiY62K114I%mlOf!7`?Z~mc1w_($a}6Nt=~}-p`sUlPB&mh zg4_|Z`cNb(aWBswKFmxscu?dc7cFD62a|_O#oIH&mX-cseSLl2CzTNei3AE{!$1M3 zi9YV}NES2CZ|=^pZmkX$6=kn(_|9q^B=2t(=tR!3t0xJ$_doUnQR`1)FzY63(a_eW z#o4!abo9h;ubuCS@8b#H4nY{fI_NBOXF$^&0^D_8Js-ut6#RRK_=fcq+`x%l*z|~x zzQgDvyS@@%G>5}`BDH$6q@1fwv%g0Ogxz`~WAL9O_*T*_a~5l?PMA|s3S)+{6Lxu% zPWf7ERAgJv&-kt_^AkijG^*4N9(>q1hjipRo}&SPF<|K4xH}m*EaFLqCGV&r^$6Kx zD|Xqo!3)NP(^VAg>|y8t4{5Ts>ahEVC0%g;uea5qnIq}EpfT9CG(j@S%zu1n{0cmjq}PokRiy!AJcGe4)JC5GzzAsre^Yt+>dAH{XU5!Z6?cc0`~=CeztK z+2AdG=uGeI;0cfrn5BBKA@2{@ET>V(nhMbyLb>7VbhlkE8OyT=wV zDb!Uwr~0YJ#>d~h$FUqGMzeHm>SZ+_boixLPuPv8YQZO3H|@hB+10aIA<6qnuqO7U z$e816gV>j1Zrw8aH3N1$kj&?c|M0W3Qv%vS5H??5LIqwE>g3l^il9>AZ(*&Y+J5*R z0dj&$$RDRD8?c01BmME@yfxxfjdbyk6ycxPP*mV{f$P?nnIC+c5DgSGPp_*BP)x%+ zts(h-n7?tSzL%E*f#L&0>V{3?hUS|Zg0z8aOreC84t>rvG`|SUl!#uUAF<_PZUj+hv;8s0cJA1fs2*)x=;LHYR-!0_XJ zAKt%qzU2|y*L=$pdGoY}GE9szl2=dhJXTm>ort|OIod6$$3GH|vp>fLpbRp)V;#x3 zQ0vDLW@-S5Y=T#koRARC#k_tHp2*zY9^hSr{0(2ZI_XB#a3a1q<{$XxzKohrk{M=2>$zSIg+)bs6~LLP`Fs0sMNxGM_*LXixvx2IZ_1Tq+LVGEwi%CvaOd&|-XOH`;`G?xhSaHc-IW55x^ zW3qiu?PZ$_g{Y=QuL}l)9_`xLxk|N1KhJ&Y85?_Q}wWrM839~<}P?J^@ z%eHgU)#Lk2FQ&x2<)E-#Q%@8uTK^L6Q@9){5E%n2NFsr9!Ne_71smg=W(cm(w=f}o zKd0)&9kxssXDV40oP$3X*sf8W`o-R2z6_N_XWdSvEB6@+ytg=9RLFH6`TZK*H;z6e&C*8H+ zr2V2b9RYgfVTbD8C(4wE>8oEpY52sBXlG7*(Hn8bJ1s^bErkWAANwr~_N3(II&`LB z{|-Tj_J>LuRfD24&GE#QcXT+HbK{$kWx;fnI?>Ggto%o6fZ>eH$0y(TmKUkiG~RG> zUL6hzJECz zB-?T@o%*}H=Z*~tHMXhgsn-LoiUafGXTA92hTFydLy#d(;3B0KcPy#0SWhXi$_Qvk zZ640Co2TW(MnhZ3UsQ1J0@o}4@AP{&l|9T)zV{$Fq44b5CZFMuBVvo`_4~CrO5(j7 zz8bKU#Ft4*f8HteyVrotkp z;*@G3?8A=P{dH`4h4UYU;4iTL=}(iW^pOrSNFkMdTAj6j#6*}HHZKa7$Vs>OIjdLr z-c+%Ky2rFVlRoOE>niWm`wuTeLJn6iDoPD~i#)vpyU*F!?>nroUR}PWxb`Puc~Bpc z(0Kc5Nk#&)8}aLB5Ln8)2@&KA?-_=GcF!>>!8GHlfjv;2Kw%?V$Q*3*vHs|2rEyt& z*~hQFri%GaECU6jV+=0)7=KYdJFrpuzT-r0;vT*e|4~eu4yPDb8I6&7qu=#<0LSHU zKE+L-mDhqx@G0XE+LlNZiIO;Gln?~t&se8|LOI?NnT!`}CJwNbuJsF{&Rsp|`mx1g zuxIqoEZQ&>vae?IMTgtp@#4@~`lny0a|d>zxr$oikvkBLMR+#Wn!c=_FP zi;(f}7iG6ap8nL~74C=iDj~Eqd2hPSc1m&&U0IShj(5~Y-c~*+nxQZ3ZC2F&P#>(t zVC+5GR<5~hoN1T)9|4>gUkiokKz1)+Oz*wBySn&Lbt0ZEL3>_i3WyZVP7J*(U!WlA z%W{N|caSpzM&MnQw|&CCTLk+C!yTWn+?*Vp#+9iTpaOo>+ZqskQMRZ>UG+)Yd#%gK zzM%mvy!*6J>7TpNNWUK#T`=v4(IrH>g$4xvk3k+ctgbCm5%{v1nONdk^Fo(K>KxCA z03>F4+Xc6>ex3Mo61z6fsmmsFfCU3J9EMW>B=Hr!E@6)Ppc%*8uVd=&?K37en-MzZ z;A*qVUx{mjeC3PLq3LZ_b`EEF)=%B^`lqS3;zCvQ|JWf-&7Vc?F^SC8q(ug57ab_?`%}lF4oSY`G|Hbx+qXT zQ$meiQBmP368#j93L)dfqoS=`XWD1$-LuK;+31!B<%g>SUrG&qiq8xa3W(0RWljcm ziw2fDTntlizGgJkm@(9r$<$g6p6s1$6>UF0oLYFcDGNW%YA;w!?gHZAPZboO&JO>u zkTL08O<2?C*xr;qgiPX9x)WxX(Gv&X-}Wp%&L-P3e%Ve0Cam`}*uz?`?crCwR&L%g zH_>BdE&!oeNPHRn+a=k$=~Wii=>-aNnZ15j8I1p3+XIeNz35Nnlrz;z60^~AnvmsP zdl2Ee=LgeYg+Ju_yWDKjvLuKqTE>T1b28$*dv-hw&FZEnK6Xl1?}nKc4ik~RNUIEe zm)sNhIVB@;(ypC5Gm6C#8)PJciMTGg)~pzEE!lrSE={Vv!`T$!7n48rt&qh4PnvzxdaZ1|o9-ko) zzfp^ted9PF8^mlb0PcD*al{`;2!QeeA#xL|!~5P{c_~x-zTNHhcKd;5<7T3q#iV@% zOP1)JI<|^IAk~W~2FdRYG;VMvByLZJj@1ofCspsZq-G_*rBVDmR#WGuI)ll+C>wR45O_2G4=vrKH!puo}QTo zvH=iJf!?VgW1s6h>*Un-hzVt2Gyu?srvZl;6ouQjDMgCIOi6#LtRSQ+*)-ZoUhX3R^IyDiIk<*O@DcIcSq^*D{SlM00q3X; z@5G6(?=9XFv0`2Q?5KlMB~Civx08_wk}Aei;X=~F@*fUGHbDuG8rJ1phV(DH2zlPEd&?K+-~nhziH&80##cA{C7Q=FI9`#;KiHN!VN zqLPHzeQnX4`%(IZWf6H-X>J}Cv~L~NNF3LJ=(~;61)OUtr>Cbc5TcnbkDk7l6R%=*P9Sx~cqKtlIT<)O;x4xB8voFD1TD%o(M|p- z*DC350_pIs_(4wd;*!^1H4QgBq5MJzk(p*<8LkOvKa6yu<6L3;= zJ0x*)h~aCf4q&Bl^sI{O6Mc7N*DpNQHWd{V_yGco`k`>)3RHo3;BpK0ZwnBWAhsx< zyXrXUa84Kzu{Y$%yLjT^5i~_}*fh@^j534AW`*Q%c@SfTOs#wZ2@pMK$8V6`W_&$n zV8Dvz7shPd-Ox-Z>mjv@H3QLE#^ROtX27uyU}X^3o6vWG-?-xsH9}6m^kjl z)O_yl?Ss7umB0JCXEfF~=8LrjPlp~Xo8ndzSX;2niJ5*RV&(!t#K(n2oyd#g8O9zE z#V9UiFBxJkQkhI@ftSc*^IyhvMTxm;z;-xN(Q|$;2kB$ceGY1OM|z2@Yxg%C%N$rYTI9NMaXE;s%KGiu zMN*B6j`joVli==q(f^C2Q#m{0Z-F=LSzov8}UnTUWf>v>F<4`eXnAMnPh$8 zVer%{Z)*B+UzBnF`(-gbverLEwGCVs0${y>c>f8qNMJp{jc+|R6TnKIGL%%6sC4g0ILlI6L(6!zV18+wWcp&=!XWyVHEuiMs?fvCpxN)#><_t?`z zNMwEX?2g<_I)MzMo#8T5GY+PN0M}->YX6fHB%RWS*R zQa{|ffS@3?t*o$ca^d(Mgm#ztGY5gDUx;#V(cejwuVOoMzOpvP$&mKS3Rz+3H{x$J zy<{lJ1TwDStd7QWD8}6&PE=uBAZ|Uuh|PM?Wurx#ZN(LI0P<3tbs~0c7bMCC3?AKY zN?+~jZErE??qgF6X*pC(nw^~VBf4w=T;p_i_BWbpr}D4ZQ8e-o1ocF!YR)d}JziX9 zTk>$YuJRzFI&~C1fVq&PFMojs6^??6F~FyWWC)b^TgD-yz?3L0bZcfNiv>0dw?1zvxBVDL6 z(BcTqeqmO=yyfqer333JK?HOnD8o9~X5P4B*ZHx$kg%yOto|?_d%3WI+-CJbpg*fo z<@yhCAp5slGB`6kX)_1wKld|q|G`LS2`gphHKtdD>d!!|4VC{|X4=^PzSu}^Hj z6uJyr;$#`?;eY^RPb56r9Hs5tcHLZZSQLWFxuo}hl%dU&vOa!neLX!~e=n0$jw${x z(!WWk{ls^87}j;JnVvlGK)Gq_t8l)@+4$gLLI{pt!_yk4^W@sOw; zeZ;kXfjEdBm|9M#4VT*mr@%Wm#%l{?EQPg)GbRAd0h(e z(K7V9p3&{p+NBw9$xmD5uPjF$|J<~9wtH3kP;;DzCz=R^p{Aa^wy*l}7gh&mix5WH zOAapdAEgGB(&i=JHu(x}P3zvT7Q(xJ$@aXKbEKV#soeVHpek)upz?0!5|%bWhqm+H z+8^SHmlHm?{(4-seKn=4BnOtiRCl8-^Onl*2~~wXrTS2flcjH)cO^q$bYq&d!ae_QG_aGZ|a?QG^Lf@VX?k{Lpd+R z{^Z*tknsR5IW*Y<`dm^vGCAE=6SkT@-*I#^nU|zMJ@d&(p#!P)4IAt65w(^7eVff% z&&k4rKTv(SEKeCupOn3=O7R-c^|j6*n%x_J5U;ppeHo09g8KHqf1&8MN)dDBBcr!w z$jNfwsuu+tt}wvE1!S22dYV7O;oW}@2F^ai(%are>9G@l5Q>+bKn*G&7)jy%e zPIIcrF=S)5Ktej&?aZa-UIuNle)*|vvwC_}fOtG|q>i`$ai1Oq-G3idG_r(kc6lvi z!R&4dzmPt)*KOpc#w<#^lwD-p zsvnJBXInF6;dQ^CRww2-t$Ul*Xp|Vm9IX&;!}Un^PmuYg3k8B~?JGMA_6^tT9y#D> zE4at_R&+uPx#4AnQmV}#H6)(hv2eIZUu6^6`y-2DahNr;{NNQK#yyVT4bFCN_OAPF z-SW(Fm#SKb_xft5zuxCk^=OndqVQ4X9BotPt>&VgH-CgHl^A?<{Ld};z#d!`K4#>O4gtg z*rP?P*OrZpwN=`te3?N>km+kUIttNJ3bNA&AY(W%R=4x@w$pvX_(SBhfKU-DiNS={ z`pJf<2i$#_pF(4dTx zPhcwS5)Mm!DwltEhGHB-O-Bb0lqzwTLm)z6pyc+ zr@DG_r`Mz4bg5eg3k$sT$tSOeRmd&mdEH32N0paOKsE!08Ym}F@f0pRhc*b2W8!~s zDEzmzn8}_)5cP!&Lcr0}%y#JhMc2NQJFhW3SKGLkTDi666D7YPmn&OfbH3V}S9XR1 zkDRwnMAHeh;-ym}BJ@M;iJ6O#klI)qOg;Gwx;l~6!=U(-7CRaHgO~>jLGI2$*G@*b zjvFdn?s6g@2!B45x`ml1j*?xaBK-Op)9U9lS!j?~FT_fvfL z0y}d%w(6{Hop1pdD+NAZc$}Dnf`X`tsUg3;{ttujQn3y#a3)Zu_c_r2*~BzCmdEs= z>#V=@#Dlb#Uh=$0yVWPBMd+=9{LTz8n{cYS%r*Yx+ar)cPH#z~rz9s%d>9_{@#9BP zk1nKi0$^JrRgCfCReDh4kr99M{ZH+c=PEd}z={hnI`eiOA^C4y0`qp#_g;2!4<-kt zi?6YnPY!GiXFPcMhib2^Gec9!YL#rkZLj5rsdgt+q3W4V`AB8`zfUO*4GqN90N|CV zRg!nNazVKb@48Xf8Ow+1=@JvHjuHPccjmgGMV|K1nIhx#v)9?IM^-j@ah}xUy0;(& zYG*`v+w)Z0FPlWV{~9=dG&}9;9Tw@lnxXgNt4N>(2}twQtSlPLEW6l-#mI-gZ!c!v z!39h@SP=|e5A=HY+N3Dc3e`aE*MdSqS>_#L^#7}6h%mr==f8zS_f>u$8yd0o=jn4U z9KXzE!7x#CXN@^9#)Fmf3Jv+pv5yy2X_W@v?m3%`53%jNg}Ylz+^aK&HVjqeMa zC;v6qvRk?;fzV?6v%=8qo_JOs^8U@hUtVC3xB5)$U2^)WBFz|)sQ zV#Q&yCs=6NMP7r+GIcD8p(-d+P+ z@Ywl~E~a40$DXJ-lz?K2Ur0Qr@ldnP;rU?kiectY!l$IQO`VBX>%(5KWM(VMbTFs5 zQqsjx1S$~&up`mCOo2n%nk)153}zsyr5Z5Dph3QPFZ31lE3Ov02xd3x(7p9sE=&GGm+57sbTzyDZd*#8mujf{&%SKYZ#-%YUqsoq@+ zN4xVD%>KIFyW}60(~v^ABo>&v4cwTLXHmeKLu7;^Cp$|f0KiydtJ z3au8^TCRv)reE$iv7vA=6QDe@Ti{a9TJ)c0vXG0?V%=;48Qk-S8`HbS6vX#AS(Z(T!on47m?QiT&1LxL_MD$sZ*(xdUc|A8`ePL|+(`Ns6r;Osl z*9~{`ODAXfXOCW`y_AvK+b#7YI5{}y69420Hv(&d4aLxt*!pR+rDvZVmCDIdR(V!j z6O${!TJhrM$&sj!DO)M1JpO7PSlIliWd26j4LPm{Zdvo&H14d&xr`4|vLCBn`yUPo za|)<6H9xR@ip(oV^}69nk!kfm0&AZ(KPp=|@Frhqf8bUD6HN~?fYq~$9`ACSstwsY zIi^TbGzl8mU!rIf$i4rt*z|?Jfnzs{qSwY0%-AAUb+k6!g1pm)xa{gn@;31?-R6y*!CY377zh0uI5;+>)ii%Uik)28r0nrY+v zAJ-4vc%-&bvWA?!Q~COsy6S<+-!bL2+cFNPZ6Ou{eUtVPK2^%n{$#Dw?Uz2#Yi9Qi zWx4W~Z4Rwheo1X1e&*2?W*V(JjqcP95Ou9z0k7&0IxMPDuIjn@J_V^Rv=To{{G#o* zdjDZP?%RCeZX6>Orq?nmLa31-cr@sYm$SO}0?GKh>-|_G@AJqH%ymu#MkaORiH{TPONOTpI7H z=dEhH%rz0tvx^6mk=m(^?1E`Uk9R2Gr210d?!PYgn%va{-usLG-SL#A`QXW+YXwv< zh>v8xp0GrFHKU@d>N6Zl-EC#9UcFUGqJXkCj0p65=GQ`MOrH-ulQa}_qeoRb#HiPW z%k7*!GtS7AT~BLOS=2sRtulwtNJo48^w`?JN2Z*-O4f1PgaKX5zLE7ia1;kvuJ7Nd zgD^4S919-Bbu^sfyp@xz`Wttr-gjhA{Ncd9O)I^(+fXuT zsrPs>dGou?77R3!N+wr=^|j;HPdEYIZ(H_L*sUs0ZTrsb+wDmn@#Bm`>IMeIxIevT zveG8MnXs$8q<1l?Dt7pL;Xpe*?=JN#!H2aUu0Ov4x2vInAbQ zQ2Cdw8sY0}Hscnq-96aiPC17(-XkYk&2qIo{kFE7WbP=*KYE>Btf@5S>1xpMKA_ix zmhy=jDTpEePTs=z1CbB1%Z&weBLBS%_cPgrpYFB?IUSEC-Xe^fw;nLEt?njd;2UhK$YN{xrgchb$n8gy@-9Vk{&y?rjP^{!K21Y>vZ?H<4-WdIO{l#Gb{{)l zF!=WRavxoLCnJ03zVMpuoYfw~3>?dm*7CD+dZ%ulw83*1!pJrBs2VnsB~O`}+;+E4 zM&GEH%^TTb1*^WY%SwhbneV-AaHeo5v@pY0oNb?%dQxCc#qk2aZl#sf*2dHzb9Rb=$;x!RJH#YNT87(J2#tnnu2H zdL9N#NSh|t=6@@T`C54pJ6N5zdoKc3%qi@W8e zlb4+ILTUSv!BZ#8+UfEllM>?Rcd;#tId(HUn%kZ{Mn@YevNfdE#W3+(rE2uz-s^k9 zBpVwCi{o|YrJIWeXC}vNe7_bYD__+$-*Vw0P3)(u%o3@FDMp5IGaJkF*P>R&b0u$E z6kl$yd3%Ul6t_=?LSXz^Ia}M!?DQ=UixZO6EU9u|y|+Jj{zyK{m5m9}Dt_1hw)`2| z+nCy~$|D&+`k1ezz;N$#YDpE92WT_4E`G;AV6J@^PvuRc7^ec_AO;gQ64Ox<5;&1j z+N0`BNl7VlxgHlM;NVd)j+m~M(4il-AB6UKd(6sRt1xRk!Fe;3KC15YGo%MFBJz3nlET~7))|i;&A#gBAm`-VZvK3}_j|EX zPEPn&<}AZ~Mpo5@OX5DoR`Z=bHKQEfb3W8{AAVBw=3hQtQy?*W!99*gtFp#ZmWO1* zu)EXiZU6UO+;K&@975MzxGz~$R49YvGIld%ZvyG7J>-kXmt%ZaN3Gmn<$oFe=pa*C zy3!Lc2snUef9Q6CMTHrk{XUh?xB1P#iCLnLY_%HBUA-9a_peuBVnp^}LxVV3RnLjq z1p&S*EyLt;kr&ioT%#hzo#APzSPRh2@_%&ioLseG&iOR4S0OKR?%h@Ck`!er*?o)j zy;r!mV-;#kjA7n`+*Y zA79u+}Zamn$>rtN;X+zv9_=r(yB8Aq`f)13t1K{=OEPwW>o zb%!4IPlb?zm}vWM{*Ydo_|zlslNu8Etu5p?N7bWGx&~*q^uBF472YOcTN7`_r%F^?|srS z6O+PAgNFfh_~fut^%2KG%(f9;SX^wq@}<7wqHE#A*&j(~FKQ)9cx1e~ZZ~JTIcR9P~oTh@$X{_?$D*(o-k zzqi;lTiL~NI4brl=b1eI!{%1^o?VkKo~>=DUG}%ydyHM^ty86OT*4uflOjRo;Ug1! z%6;bU#7ciq8F!uXrQE*8%$`e8FJXA8_R+`6gW@fhuRSQHzI&3Q6m+1IUcGS^t^>a)RgA&JTD1z{uZ+PhBR(zow>@#<;-3G-g+>@d9$H$SLU;{cSrxk zovFFvxPMZ4^{~e3IR!HB)w#BPFy#=_`&wI{zI=ejP7UU|E{s~Oy_0TM`qInANG+sb zdF95RFZcE-DyFuUrGJUDlWhN^Q7$yc!JAw8*>w4*_G*Q(6_cjW)guf#$0BoK7<av`n6@^Xtt5 zE$_|UyQ4Rq|G>16*P7W{ruaI1s;hlWxEHhOjf{;q%R1hE)6j!s@$qL4Yyf$(Y+KB= zdE#-u>w~qVj@9^Wq5oP%?sM+i&7`HvQv-KzaJK*XB7AtoXusE6lcj*APw#XijtvAQ zyyeK#Vm+zC_ler%!pk!Gbgl?d`!>lDdIKGLJ-W2QZTtsK{frE+8>QA6q}D|z9~{3u zzV^tx!Y*OhXR#>)V;+pK+!q&;A%99?Q(Z;ot9G z+uXmBHmH8?`=tlc-`eCa?w!bY?riEQj@V&I<>YJ-vJTcK6a6i~Z75 z9rpcJCqrMA-|As#y71d!{Gmu`9%uDDOPautk@tgsQ$* zhfrS{n>9roX00Ln7yfDcthbS|6QS8f^49vVsL693A_WqrK;H0ee#F19GHY(wk3ZCC zX@uxfh<^#V5kqOP{xa3t>siGA^UM5;+wFS}8cK4HubB>wjRjO&KfBDOU-LhaFXOL{ z70-u;LxDOEy?U36h5!4*yrA4ky!HPVpYvuh1#43+bPJ2{+>F_W_&v57y1D96@Ro84VaMt3*MNA6Fx;p}QGNxDzCKdYt0&-WOlSXjI4mu3R z{8Ca`Svfq27++vrO^$ZICVVO&t=ORIj#imNKSRbyrvmMYP9a^%X(|x;Xr{ej$%k5v=OrhVy zubw-WDN1jgsh0}=Yw>*By}H(0GvUfUr|&8sq&;W|agI|QgiTh%pn%#+a+CdDo#F` z9c?>uyxg-edkl4Y9dtC}@VwQ3Zr#7%1VBpDJFRf81DH?>gYh$? zQ%rvbqEv6)z55NJcu1MB8z>=700zoh86 zII8{H+9AhRamkF1A8!EQ^sdrZ8sUBWHb-b0JLN?yM>sL*)=7zr-;9oCEt1tX@KMYD z7M%Ba*e%oD{`m>Sa8!ZqtNh3slU0-Z`};Fl3j9YD&yTyF!))Gf#ZH_@R2H{67MFb; zE!msztJGY7rsQlx;6=kMw02@~iA!E(!h%9XB`6s%;0II3nRb^?l0zz+j-_`je7<{O zX=x8gU^TV1hS^8Qx?Uz=KZs38sDmNl#kExhl6l_O$Cgc(n?_4AF(wN46t5m7lx>l_ztc!jHw}q?^Z3h_11$2 zA4Xbp_ra$`kmR7qITqf2F`oDNynG36Qc7uQVtcz;(Zv(IZ5_?h(|4TBZ;`^x;nAsq z%n@+kAB%&@a&g;5M3P#s;(&r-Yd_PYRQib}Fd{i|0+0f5S2D-p@=ZOKyV!qoH`pyn z970YOAUb4%mGU*T_T*cW#171RKT&ed1}-Yh{QY|L_uyb%+QFN89pE;k!+nMcx^XbW zNZY<9HiQY@xzeszK8on?oR)L-ACDNl|3Nx!xL(-WMl^cPS11>p6@R|!f zrp|u!XsfJjT-F1xoZ~0y9U0>z?eMq&=%<$Vq=)$9zSCgROC&SXpiWF@sjh}XIo0~T z&n&JYVjKx3e!pP&Zt3sK4_sTC9~a|Y{YK_&An4nM)tr&Bt!U!)%UEmf6WiQ7zkg|0o7;H} z$}Ys0O|2Ks5198{dCR6+^mUsPiHd}S|6a5!?VyGpn)AX8sk2_#D9=L$6^2j#z03&4WeFD(NXHe&}eL?VengyfS?X4KDp7TJ1$=Sg?ZnupaARTkr!>X7an!6E=2s zjZ>$#!9PX36mqf?(zDAY|C2X5AG@});Ch#@ZF>45umCJo=<kd)%x(&U$L+n*wqYkTlJD;8qP_TLwFjc zG)+qaFY@3+BTyOrkLCUZ-NbvkVLR;QfSi1p<;zv~@yR>16`O~%&ADCsBFVzS!Z3$Z z2=g01thU#be)V{O7+sXxeC|?~(fMR`4(1&S3Q2fL)J~x8m*isHd9pka((%Clep|@b zhrbnV$Mc?rpsvsBTnmg|05EzNf4<{Da6~4ov^-XXJfzKw_fuB#HAnn0_N~bIx1ozc zg~aLJDt~m3?u9X=&kp`>F1hpBe$)iV)0^4##Pmd8Np3_!~r}XFMib2HM#Q(g>=iDyk zNV#+ERftLx3n7{(xZ0LJNXs5^Elj{y-TvR1qU`@WNhFbr-o>yQB^>G}a>sK_a`mFt z(7w^y-b}tpO(yYSXe&Rz^G^rBRNe!p+tApU@%S-;_=R5-mbQDtWb=?$A|na}G6G-) z$DbjAtn+$XsI~_<6kK?vFR*`~kRE zk6K5TudOW?gW}Zf^T!gZ`#Nx@&wfgy!sP;MkTuSKSKSLv`o zz(Dlo{Q?>{Acg#XP7!GzHkmfDO3N!(_JL}$38Oo6M}6MV06%~IrtX)u2E1cGc42V& z{Xy+3`8o;=xc-({R<(L=yi6=U;`Y6J12@xdJ02l6o{@i$a2BJso)1G~SoG7P(olK0dPiq^+vwXf{S^x0i%xBMT!vK|T zl&!6!V++eLzQ|WaCrQCBUhHUcrUKc*cUQ@z%qLvhngvc9-9_yw} zZ=ttGsK>B%f?+fT(*9>m94K1FsNq;3#r{qwRKme<2=5zwI4jrv_(|HnMB?-%bLaYX z^6qQP&JV8s-d8mJr~7IY(%!_9?Rtq-NEW`~Su8r;l;hFVgA9VmO^BGcb=$T8$m;JP zE0Q`@AN{hVsj(5`(F375C1ESw4CyQEqyz}X4;QL`Y^?EZ1kco;KQzR&`EV|&v~Cr} zefy-a@v5v$V#vm3HE5=Xx?aj5&?_wsrDu8}EHiFVO2lt(U}Lknyl7#_j*mrb!$|EP z9Tg)H9~Wam8qmvkvOIYWXM#lpQ?f*PS3z{ky%&EblrQw(BrMh4rFho#CCC^AlEN|j z>b{)Ziqztl(K&Vw4#My9?*03QckgmMDupz|C53%SK1;J?z1UXfN_@r&>+~A<_a{hL zPR`EC6w7ik;vtiA8!|ASruhgxH3*?2U8?(SZ3QPP{%k=`_Y;p}`P$!QNP6R??A`)x zdHljMd~h*Hc9G6BVW79Dm{{7QM~JS6~|z@p19Ug>~yBYGL~_~t?cUg6JK6sI-|}B z#W`l$zdBY?_U`vKJe|n8X1$a#r;k__7@A5PjxJuDwwyQ&COmcN=+DVpw>A`QTbOT$ zF#==I1m)z~`V(=rbaCd^h+Q1evtqh`7}I`vaY?s{h!6wn>S~kY1TdUVAKCWWsso z%%H;iU5oL#LC-4q@l%+G9qms+(?2ru4sn1m03-W)X?Hit48L}%Zi1%8LB_+UC+YoL zHCi&_oc$O_T03@5&Y5Z&e)vMHjIUg}lkEru5Mm_2UFJ_(O)l@oHj2p*Pjsc9U8=&2b9RfC91YStF|IzV}>TUYQX= zgnL-5wr2n<5x@2pp%3g#>A1lkKQLpV+N%Sz9%pcaS{KcmyvmWZlfHNec3&G-*ts(t zYZ``PT|8)ct5`CvVjUO^@ogTPFPYvEJ~lP^y?5{!nVCRnefHZ^VH{tD0LhHgc_Y{3 z{ly`{dZG?b=014CQLVAA383SO(ER$u>Ijeq$RIl{JL2)yotK&s;13|vMSOdhkr9B& zEWjKdM(l%Zog$}g_-wH`wCe%z2yjOPfswKXmk5<7Pi{fre)Hu^ zEYQMtD=uKeg&0LhOe~B@_{N|2U~d)oATbD#h$_pmudt>PRbB*W4~(aYJeF$@Mj<1Y zq+dhq_7jRqDPGjSaZ!FEowI)v#sLm}m*$4-Qmc>|jxlGu=t5DZ88hTQgqqKrU5>6xe0|0zMO b`0@vM{o1p=6N7z8q(cV}Yvik0U;n=VO}o5N literal 0 HcmV?d00001 From 527957e38b6e8b8bcf49f3a7f1f9cbac93c982a9 Mon Sep 17 00:00:00 2001 From: nvmdava <86460216+nvmdava@users.noreply.github.com> Date: Thu, 27 Jun 2024 21:22:11 +0800 Subject: [PATCH 09/92] Add Zero Bubble Pipeline Parallelism H1 Schedule (#396) * Zero Bubble for Megatron-Deepspeed * Remove schedule variant * Integrate zbpp into the megatron * clean up redundant lines * Rename variables * edit README * Undo readme edit * Minor change * Add an example to example directory --------- Co-authored-by: ufotalent Co-authored-by: Wan Xinyi --- examples_deepspeed/zero_bubble_pp/README.md | 51 +++ .../zero_bubble_pp/benchmark.png | Bin 0 -> 119682 bytes .../zero_bubble_pp/bw_split.png | Bin 0 -> 59010 bytes examples_deepspeed/zero_bubble_pp/zbh1.png | Bin 0 -> 225315 bytes .../zero_bubble_pp/zbh1_pretrain_gpt_1.3b.sh | 367 ++++++++++++++++++ megatron/arguments.py | 10 + .../deepspeed_zbh1_engine.py | 110 ++++++ .../deepspeed_zbh1_schedule.py | 148 +++++++ megatron/core/tensor_parallel/layers.py | 11 +- .../core/tensor_parallel/weight_grad_store.py | 34 ++ megatron/initialize.py | 10 + 11 files changed, 740 insertions(+), 1 deletion(-) create mode 100644 examples_deepspeed/zero_bubble_pp/README.md create mode 100644 examples_deepspeed/zero_bubble_pp/benchmark.png create mode 100644 examples_deepspeed/zero_bubble_pp/bw_split.png create mode 100644 examples_deepspeed/zero_bubble_pp/zbh1.png create mode 100644 examples_deepspeed/zero_bubble_pp/zbh1_pretrain_gpt_1.3b.sh create mode 100644 megatron/core/pipeline_parallel/deepspeed_zbh1_engine.py create mode 100644 megatron/core/pipeline_parallel/deepspeed_zbh1_schedule.py create mode 100644 megatron/core/tensor_parallel/weight_grad_store.py diff --git a/examples_deepspeed/zero_bubble_pp/README.md b/examples_deepspeed/zero_bubble_pp/README.md new file mode 100644 index 0000000000..2290860783 --- /dev/null +++ b/examples_deepspeed/zero_bubble_pp/README.md @@ -0,0 +1,51 @@ +# Zero Bubble Pipeline Parallelism Tutorials + +This folder contains examples and tutorials to enable Zero Bubble Pipeline Parallelism ([Paper Link](https://arxiv.org/abs/2401.10241)). The key idea is to breaking a backward pass into a $B$ pass and $W$ pass. $B$ on one stage will only depend on the $B$ on its next stage, compared to depending on both $B$ and $W$ of in 1F1B. + +![BW Split](./bw_split.png) + +Currently supported zero bubble schedules: +* ZB-H1 + +## ZB-H1 + +![alt text](zbh1.png) + +As shown in the above image, the ZB-H1 schedule cuts pipeline buble of 1F1B to 1/3. + +### ZB-H1 and Its Variation +There're two versions of ZB-H1 implemented in Megatron-Deepspeed: an official version (the 2nd schedule in the above image) which does a uniform B-W split, and another variation (the 3rd schedule in image) that does B-W split only when necessary. We provide the variation version as the default implementation. + +In practice the variation version is more friendly to a synchonized communication implementation and combined usage with tensor parallelism. However it changes the ordering of applying weight update of different microbatches (E.g. for Device 4 in the image above, the ordering of applying weight update is 4->5->6->7->1->2->3->8), hence might result in slightly different loss curve. + + +### How to use + +Simply add the following flag to the options to enable ZB-H1: + +``` +--enable-zbh1-pipeline +``` +The default implementation is the variation version of ZB-H1 mentioned in [Previous Section](#zb-h1). + +If you want the bit-to-bit exact semantics when compared to 1F1B, you can use the following flag. It might be a bit slower than the default implementation. + +``` +--enable-zbh1-exact-semantics +``` + +### ZB-H1 Toy Example + +Here is a toy example for using **ZB-H1** inside DeepSpeed repo. + +Firstly you'll need to prepare some sample training data and change the `data_path` in `zbh1_pretrain_gpt_1.3b.sh`. Then under this folder, Run + +``` +bash zbh1_pretrain_gpt_1.3b.sh +``` + +## Benchmarks + +The implementation has been checked and verified on various setups such as ZeRO Stage 1, activation recomputation, flash attention, tensor parallel, data parallel and bf16. By approximate measure, ~10% acceleration was observed when microbatch count is twice the number of pipeline stages: + +![alt text](benchmark.png) \ No newline at end of file diff --git a/examples_deepspeed/zero_bubble_pp/benchmark.png b/examples_deepspeed/zero_bubble_pp/benchmark.png new file mode 100644 index 0000000000000000000000000000000000000000..be46817d75d3d7a9fcc4144a7fd330f7459e5b89 GIT binary patch literal 119682 zcmce;b9iLk(=HsF9otSOwlzs6n%JJ$wr$&)iEUexOfa!+n;o2f&U?=J=Xrnc_uqG2 zd$;!L-nCY(b=O^0t2*+#f)p|WJ^~mR7_yACgfbWyEEgCU#BVqV&?iX*fij>caA##H zF|e8`!V}ON2~$lOGkJM1deCb)Fz|3oFsOf;fKGhS2?hpP01gHTdItaJvjT|!dliU3! zsVRZmtKQEKl27UF9jZG2Pe05rjEv_yOyshUBxr`6uc!s=WBgFdkH%)=hTV;uCe+|* zSx`mnf!DT;iQRQbmPO9gnXL&J8NF0rcIA@4i2pV*3!xN&V2$mY8H82ZN1(d3>NhEm zMgfq5|K}r+mRcl|D=Ltk_dmA*2ee5A04@ZvT@u0I6aLd7f{9TeK``8xN%)`JfWfa* zh(taTBX&p!K>f#qGz$KEQLOQ{T-<+d699FIULfHe53L94@_#(ghM{&P9KHVAA0k$V zFrUiHXZ4Qkc5&PtQplk-+6ii(E0(tumL&m@2GUYEIDBp*TWj#jd@<-B&}ujcI;gV5 zu-|Mmg1`Mu5igKNYP5LH%Vs(T-g>}@ax|Uic5qufnoQ|iVDuM}~QpE5T#s7nQiHnLHT2v zrNi0cSvOf`LlL2!Eaz4*!wuCpSkK(&_?Etm!$V7^T{BP9EY(ye~ zA6VnN|JJaC0R+F>1;e9mwIw@_)D|O~IcUTK(E_~IYCH%11Uuh=SG|ED&Of~0mUm#T z)SZ8P`w5!^WhvJLn^6ut<~P3-Rokz-=rHDuhq8v5c*Kb znWfLyL+|(IF~ht!fetUQ54Gt748B#X@M72xnHl_wa>-yGjp;qYA)7{iz>Hy5G2Lvf zF`XmC>AyICLjg0`Xb^vu=UJn>Lq0rDx*KO2ebj6{bA3OCfgT^lD&Y^2V)4_Th&BA* zvj!D6@P|zP?^KG$%oFm^+b+#=m)IvA`MeIW4_S>WW`mYCOA&7P1QAR$4{{l{x4R59 zN~Hz!&PTi?21W2WM$sUN%*I0&-|a6-6>=kTT0K|$+M7RCirNd4ET(h$gLytPSXPq= zQAvy;bdR^mYILz<|E+g?^u7|K&q{R}w)>+7&c__RZZF@pM?n>HMq-}()|tK1+O@We zSJyLYNx%zECZ_q-3K!BEm7 z@aLQD)id7qzm0Og%2%pZ*OdD3SE#y%YuEoMw3j_HXGou@eLZh9W#B=T{sWu-kNVnN zM|1wIJ^@3h8vSm1-j<1aLBCy>aC;y!E?d8H9@2>l^7&kk@{97R|I6*dt2|#Er%+p( z%9ig&)ywFW7__=9Ps1~5V2^a#z2jW69Ux*1YARoN?N^+4Z>{ol zy7lZS!x2Y~eh>M`iqoMR@px*Lya z!Q9zw`d6aQhuX9kiTk`bPs#KvyZy|PYQAKXliR=$?@OMS?NZ)*3F`(WnhE@Rk8h=I z<16gb6?7nykyghC2Zx(YDLcOXW(TX4rm(0+N0oP+&yY+Sok8(d#Fo@2tFZNB!dMUb zG?qz(DulwC?k}^T#D9-(Lf@c-;aytpo`1e?tHh^%yOZly;7`HVm^OzSHi1reyF_Z& z+fMUO`~Bte49V!hr>&Y-@$@vAwVglOB>Z_lh%LW4r;>pS6u;Wx32EXhc+zm(>&Tx|p;Wwej&~!U z`RFz+U9MfLQzMkeUknmLbno(T%x6%$&wiB8YB`PaY#{jD)Zc+#;=12@dX@ZPaNH;_ z>?I{eC3lTGzjMZI_crf;95e<9>RNLP`nP9tr^#mLcp$oC~&hkq=ce+Nm zoq9|FWI*QpKRw55yGN0)YF?{yyw2;T?hdD5o|>&MH~JdZ^{NWwf%<_WO2(nuC6 z9WU-zbF)h#g%A}pZL)ckASUv$2;Se@MCBcswR+SAPe=oCr46TR{Iiz)br3e&B-a|S z^pQ{o4r8sYyYAl-2XzG~dHj$L$osUKY+(NleUxa%5X>5$J+h;Ul(YVy@4b~vHMv5* zb~8zwf6aGzKU!B>?LYZ1)9oC@izf5A`9J!RH{3>Rt_63(-7Z&KP1kC8?=*hPEoXDv ze^VWfIvo9><6*bj_0uKvsjyGoyZJ3THIG4~B!e-Y)GWcyi1boH4>m&F-+sdhv2$5Y z?jQLxzJlq5_`U@KT_xMcnU}0*$GHPTA^}E@n$7Kg@2|s%IBp+}#qO&C0o=#W&EPC}zP&Zi9Kr;(h=Yf&uUwlO52fB3rmsGg` zPK-Xxr6qmOLJ7R3D#7!Wg%;C|pcm(;{-lGmjtAVZVOTluPJ%!9jM>7w#a6NI=d0n3 znhtKy$8^uIPtmF0o=!h9&*9zfm+9aUgY7#|nno#v*4#t9`6c}U9&PX$Iw7W$iNI_U z*rBL{Bk{XmW6G=CS6&q!swW`VQwIsUT-qjN=XymeV?%siB*Me%4TaD8KS~YSvUnlv9 z1YFi9U?rBDK}yZ}bqPI=^C`T`{X)-hFkRZc@BM zhV&5aFVg@Ecgys4Jv>H-x*N(Rv*-9c=jMBz%Gx1ZkL89{v;=`}4iXmRKQt{cb&CAo z81`lkb$c`>wDYM8xL9$bsY$!;>Rzbk}jbTz7kNbx>#nMs$0n~*`n zK51|S*~-o4DKWE9JbdHM+f`6A;%HDycs${!uS@*%&E9Y%M(48%^z~Ksr`niW z8@)}*q;8A>ZC6Hq^nnG=7k$^mb5G|??HobJTe-oK+}U3WO1mczS3M6=LlXvo2r{TT zNzF#D=3jM8z0V~-PH@QeubBuo_^+^NRXKL+S;I19`liJQSTPV}<^LNpOGXo**X8v@ zAZUHb7TJEB?773^w%%=5MaBEOA;0BaUw~QPsn)FSycStpN=^(3^uTd}K_y2U-tP1R zTWTSFJmRO`sL<};t*H$d5Sc0X+%`ZZ<}1l_Nf41Eb=Vo;-5{^BdeFXFeCOFM*6*|$ zM$PR_@j$@zZ1%3Ch3 zJp7ZacRS^zI+Zb{sJbPP{p>wg|WV}+LWy|Ol5K!0aadWF-i~nOh!|tJj$;2;r#{REIefga} zr{i@xlrdZ&nXkV_=`SPG+bmlkOw%L=U6E}&lEX{jQ)7#!L6zCwp>{Z#m1Nrf-S4|_F>`azUMwGp z&Gp?wP=j7X7mD%*wrly{7#jy^QvI&BO-A-#iqP=Qo3INw zQ-o|F=&m+bM}c~EEC zkxpe>F=slR$mL`4Y@nI%)y>>pTa@=~)RufeAN`}2XVCbAa7z2lQyPNO5D^c$p^{$!Au-FM4wbL@VEcQc6>a=bF5P(NQLMm~OVK=XhikPsrI zGNP02qX>KlJ`D;pGEVGic_oQBKrPnMyh&(tzRV1Ghg%`!efI0 zLn#Crm zOfu4Hx3SrIW%%4fL8n4FhMD$d3?tGP_b!DS;;yx=sfFW&&4ax__#e>ap>(*v7S&Xz z-O*0e{onxyeu4i`{BPFgClNTKybg=MLimYz=41*h(+6*T_-eMGHSPy5YYFoI#&3~% zjOz#GFQ?9fm@@y9+xr#d(GvWu$PT6Y7i#@K7d}ECG+V#cz8c#7NB21Z$`}*;TN7(V zhnD7lGdupzPX!)J1^{gDD^L%^{>7dDpOB>ge~Yx8=nH(5tR-aS{%=+zS}cfjH!)^BH;szW-rdq)98_hu{$rqfa?9iQy&4{+V62ufq#+^~<3xrIFV=XvLl~l9r!(Q<&Lo;mPi{4FD*k(~s>rex&?PfD!rqbf% ztWkYa&NaIwGU;SrQ?o(^y`D+Wawkdmp?>LQT49;bgb}h)Q2u8nl7g3lw8%##$^W1A z0}g=W1!<8?Y8>u=nlycMphgNZGyh|#L`(ss>fecMV*k_a9KHnrs#TPmDE%|DBnNf+hi8!-q239uvpgxmhBTf zEqHluo8#VMnG!`oKp~r7-6dN%Dv5rbA^5m{xpHv|ok|#j*w9`kkK^vapY*w3GAcbj zFA<~IrmS+=To>mnjbq6)waUd&TM`!|cZZYwUiTIis^#?RRmT(_swhQ1KL(xXgM53a zxQLVb#J_HzE}UVMP7Jb?gRH#s(F6dxIGfFPrcNmt1YwKqzR9%GaWWp14)3oE?d&B5 zn|C-~Y|e{0#I|B*?GBeQW4&<`SrqPbix=Vq>?Vz95wGLTKl(;@s&%-Jrk)`wW^+7m z4<>TA!c8p{t95%kCh9)g@33st!}6Gq?4Kl$ z?5S2V_bvqN@;JWz=DyITl*u};{L*bait=24)}JTI@Bi^up;oCKA+fnrqt}_R-X9E0 z!5kURc^D3+x7Fjj)qUrFwpjhUrlfV#3sL6bWKIe&uvhjhm&?c5kR0dn<}qtMQv{u- zNnDFZBHriWEBQ}F4eSUv*A<(SKC zi_+AYXgqhiuJvftZUjj({48M{9F+_FMzEq{R8+!TtK>w_g+l(V=>ZYT=sTuvU*vM*BjsOaGJEP-blo zu{#FUrnaw}Poq;L-3GqTeL{_;@~3m%bJ4|!;`IxSyKP<%x2*;(5+W9$*b0waCjAA- zGop?q;k5ca{n+T|OKcd4Jt#4uqS<7TxFr@g1cpK2y#G6Jv`#ktODN?)1_u1ha8Ac& zufIRY74^D54%F&nEuDW%N@LV&Q2P0G=6h+Qd8!cQb_f`=ElrBWT>*w6KdKC}(>x9! zCME>R7(dW@sYWsH^i`~A0C9S#sbO^gR=?NL_M^&@^!shQ6Kke;3_TDOg4U-~aoOMQ z!I{c^w@2Ir1uIA62)$ZtLX(OO5^~-eUFQPyYuqocZPre_yxx^Ym~u4r((7gSu+(vUCdV%qWR}%&%xnI>2Q1Xt_#h9Eot8+{iUfVWmbb zb%@2LdS-V3-1RI9+!Cbu8`y>^c`DPICz^-w+qJmEpFF|Num_$~AS$d<7VAHz1r&ep zRsUpKVYqP#JkC)(WNL!qs@9=E$7Z}$1+iqCnY6x@M)Lyujt288;up;U3HyZ_)LwxX zL`nH6?rH<}$Mityv*bks%c$g$_z-@T#dF_q`G5kY!fy)=iU$&Q>5MrlH^WOqNY3{D zA2ZEu{WR6R_bqXvc|M^G-EVw{J8o}XCkc)<>1b1*AWbiNl3B_?pwk3US=`{z4&lBO4B)AGNSn-U zEugM9KXOYq&%9;ZVFdYzn&XC}Et9ZM;gypaMv!0hx+36Pune4{TfD|ye3sjwDOVR5 z(rh$aD;yGy59zk-3;-7+u>|gy>yf7I0|NtfsR(|8BcaV&t@c~Q#Kd`D^g4WAZjIt8 z{y_0D{)sT1LQ_{TVh%+n;4<7w0?k&#CkmVIhVj^$--#jc7>ASD+#uj$LsYg+V4w_PLfIXS zM;&dkU0bX*h!@mPV8B$D1;GolL=Z0ozySnf866CT9Q1QWbXWBzqw3Y#q1%~kru6z< z9zjs>awe`Op(;Muu?w9d0Pl9Nsboc=m*>6sN^n9N#XO1fM4ewKe~M+(2VmM4ss=_p z`}5oG!qSM+81Uy+}RM3rL9aI^d@^~DL{|+vlnJLW2+61%HBc?*Izhy$ZM4({R zd)}F0k~Fe>mw%+*Y2C(Q)|Wv4u?wS(_>n{@QhJ%Th?Oo8Nxq#x`(icb_ zei6*rXsVbm6stPck+mMp*cAsyZihysjB|)O03C+O}K;cP%3TU#ae4UGYCkMY-ypcya+RIDLEYM`>BP_?wYo$)g1pgL+k8 zbFeodLTF~A*Eb+x_n%`r9u~%aLBwv>DRRb3-9s%JoS$2PMR-Ocb}rxBMwV*yq7#V` z4s3r#@lb3|p+^n|vNG1{b%w=vv#^O`fcOEf#C53e)#UmCSP=%&v%(C)Z**b%$dVzZGdq*oyZ z?Nln`d!?mg@;gI_^E`3)wAnwO&zFnQq+?eROFDrxfb3?6f~Li0&2@Ww#zt%K_GmIs zAoGZ<$QfeJ{5`Trt^B%b>}a?6Pd3-pI)NWr9&dv=L?5LhB^R<>>mVc9?}syP_3Di- z``$|6coMI8pbu{nb^BG}S_~K56Nu%w+iiI{7(9c_EV~)#x@-$(`K(9u(R}$HaPaAJ zs}uNGtuv-lJe6HrFpOJol&Juvn8t?XBIo1q{uIXf%*XFvNWMHJd-i@++N2A`Wsfg#afTxNG5m_x$I$ZnLn|r;kKJ@CkG*!Z%9y*ZyVL!0wTH>B%cCr}Zeqn4wm}1HF?^%j z+57o2SDMPaMqpNZBIG+WC~X<0nB{^uoh#UFd{XpdOa+tfQ7&iMabbcv&_oKANZ3vF zJf4I%L=qK1?ML`fS&dmf8trai5W*AaV(?5nVTLSj{W}rl=6i2sNray5uH!9TLwXJ} zH?(^0btKcoiAzr=GPHIlUTt*y&U9tg`J6NZ)#(Es5fqPypHq8jlG@g3;z^PRosXuF zE|wrKOY)oJu^3uByxu}@qgUF1kLUP81sXfxKLuAk9z#e?KeGkBxnN|{z(Bckp?Tyv z571Jo?Rl*jMgnJ5=bF5aE~S!9^Lbn`#|D~BW*Nzut(tUdA`LK_;d+{gP@6%*Al2mz zlaZ%4;<|I$EdR9&pjpUdH_w8P{5uE2=Pf_rr19qfBC{b6?MOpW&~O1rE;H252=s8b8d@UHzNeQ@o%!TTfR@Z&lUvE9`a-< z;`4yscN{C|F9>5{ALRKK`&KrAVC+zdjZ*}?=QDoi=?p@I`>1JFjhR@1JY92D0!<9@1RuJ+etJbt%P z9adUs4wfOqiHZ2`$-a@F^(pulQ2o9!ujj$E%M@5Z5lndBmxJ^)DM*>&s|da5?K;J} zEocrHKIuOAz-}G$ovrFS0)dZgzCy){6m@XKfwMHuWX8wj(TDM@6~0ffC9!VQiv0D@ zQv9n|Zy14_R>P5mFxRVOtc1Vg!#|C9k7>>p{XvmEvv_b%?~7C*%wr@56)x|yPIHLK z`>g?0yU>=u-K)JGKVDH?&g>R>fv+W}rYoAz7$!AKa^(_v)RahuNz7z8brd?M5MNZ$_!X-`I zN%WF6<)Ow=);hz2RhHb)vS%7sSpIv}pgV2VSK7D{rq!A)=Wof67uR}M)^0@hFT>nv z&j)BSI?9fDBnfm6KD@F`ib86}Z!yDWcHHNJwu(dt7rYiRmWS0&>Gr(en>!v@@fxLQ zru2+pmdqCY=P4%v%6MEx)Dwh&x+Sq2{)74$qO&d^{$vx+j9d$_1<@kG~w4XdOinx&+B__KZF304=sK+w} zAXe)DhMk_>=3=#Z0ar!f9`l?DRrXdOhP7z5%w%UEGy*D~+>9R1ZVb01)JofF{NtL= zakmPNJIh}l5k(pjKYrM#HGIx%4NDIVBZ|2W zWjbQHlBS_pE=2p%52mdcSU83^ZO_)y@>2v&GQ4|<1BORV;x|DaQZ6PTzltSgL_Z^D zLx?4#5V8Gzz4{s$vC@2O*|lz|=igF9ORQ@gbWs(lJ^HNi*TMdBWaYN?G)Ro=itRM{jG5htq||Bf{o*p4`^JfYT+J zd^)ND!XMRAKW1k=Cb)inODkslKA795lpXM;K7fz)?si6Q+_lLj)B6YAN11iwFF3e* zeJ2Zlo>pKV06%%9MLta0_;!CxTi>297e53PO1hBQ|9gf7hY&3+h#CH^X(}*#I9)oZ zvndxxxL|X^GZtvKywSM@>_Av|*yB3c8O}c3(w}i7!R5d55T}~a_TnmMbOGf*EG=Pp zBu-R0Bz^)QyZ?}WdMH`~ShB(2z*Pb=xU!;jW2RuE#up>SskG&%CI|#(C^_;Hnw-NS zO=P(^ym$NHM8p4P3HG9FEqRMPso%ZltT23gWCT<@o6t3PwajFvYq{Y)tiUSKc8)%k z0I&yGH)H8YWQI*lf(55emytwsp5TdrKJqCjfNR;laEVTv54di$f_!bv< z`>aZ;UxQukjhPU;pwwt(@wX)||<&dN_KeSyA(aOq^!m6ESY1L0Hcel3R~uQSiI=8 zpx&{^#-q~f1cqqyO#_0uUJttu5J30W+w1PYFSQQ`0lSC~b1kowI;*cT>W;;WsX4n- z_{)pyA%MLyLVYhp7}F{eL$ko?lum?Qe8F;FF8NWjojuKl#e|Ci$MF$gKs~rp!g>Ui zpsc9QQMyk~_Vx{w@j=(6TBWY;na5o7cQ*((36mt2%hjgR?#yE-&!A_*w55YkU;f)P zbYr(MQYZBg3Bhl_zy-f)vTcDYf|EArN8#5Bb?;HuM^@m?5m4$MiA_>1Tk(9c(b+2Z zG2jB9n;62q+@`mKr%T7f#f3#h1!%cLfBA@fzLfno*~1Gn5CK$;I$-40_izUF$R?7 zDhNZu3nF7KY2UH=3oXSIfSS}Yp%cOpV-7!&L&RZ`u>V(rsgtaqBu`s~Hl-q=q~RQ$ zs7FX#GZHT}wVqR1)pN>+_)*>A*e(cD37nrN0IP>#-cmxPMDFIsDf*$UX(Wh)}j=5mG+ndrMMl;=}NG zZ@cQz3NX2p5;_>^-N-7%M;4BW2&5zlbZb5vm39g=q{gx#+-ijy10D1iw-i-~uU)zPgt!#E z7#^S`UEM64faArnH<4=8{`gC6ycg>6W|Sfo6iS8KNIkBVu{3kuGbhz*KuqyGUn8;P z9nWbaY7vVy@wM)mp3V<0$eLU>dZ+2xB^WR6Xij#FpnAOgUS`_X^^QOG|{R1V(fDOZS~) z1OKU)V!4x2O6|7+lB9zL73;WgX1nV`AV%zym0D^B6YK9Gog6XMtl0O@33QWA3Gc^i zr_^s`h2u?gcTyS!JW1dx-`%7|X1&hjmtu-bNV+M)xuJB)@yK$&7` zG5?q=sx)xZlchvqX@|ma9&iNHC9(1}rbKVx1KEO+dO79I-Sw74VrHk4bv5ex>GbEq)oA6IV$F~+%* zzziHZRvqJn$X1@vG7f@-q9IL5s$9uV<1@1VEp^D>)K5ZDM@q<&g7bucUk|(1HPjXC z*dR(o@gnNu()_)`RCa&U&v%}` zNlXSJnW1@?rBW5k{8w@E7otPh9M*WXqR?Lq$q|Jn<9XWG{v|G2V-+RJb-(A>v_KMMksla0qaw&chtlXD3EkWAqhK)|9Bnau z+qFWtn`5{rdDwK|@eD}`6TGRUeA{!ix*-28FPb$Bh~GYnUYf^AWacTqfy|YlLUqVx zyDE+5i-yHH9wXKJW#02kg{l}E3sdlyHo9M8rU_j;8i&0W0KR2O$%s+~98Pnpy_A>l z;gY<=%M^UMZ*UvmFaY87x0)g%K3!OZjcb(C`Tig6c##6F1$f&xgQI(~LW@O~|_S_y_I2uQbnHuXVzx?g`V8)0eZ+`6&C zaD3-TzCKfr#O}^N*sPYF{9VRId9l9Jhr6QA_Sbw;dHaK$rTLC^`qd5Cw)w#3=w!dp zb@Hw~JM5r(Yqlv7#Dbz)6dG2;Bt>a*5P(YLLK#qsdb7l6>CW`=i0&=o8`M3$Wb;=P zLILhM!9wz3FbJ$<0+tiw1C6AfF75fxxsq-eB+$oHd*FRzX>$K|$O+tO98;53O6`J- zEsDxd4ShsQx{TZf+rOhgFboS($`Z*ThWIsC;`XJKB&t`h=zW*{slgs3X`Msms{k+_ z^mQ_DTx%iDJASSXVffc7Nh0>b1zBDDY_z*pA|dq8fCeaNt?@RWerfjAeqa6w|2-H^ zd4GF566M@7zwfnjyf9YOZ-w*6DT1lVd@dIz+5@py;;L3BtLOpSw$encF-SV2mI^Gb#{ifN^)m))`LJ;>;sQi~fcirf_V|;W zr0==$S=IoDh_~>JY~M}MH^^}S=8s71iefybd(oTlW6M`>XJDCoL^dGRmlyHr2fJDa=d`AmnDK})g;hC9 zRTHWC0gi|Ni!av{y&@xT(TE>Whd=}`>^hQZx`j#R*kJ6t=XJD^3Mhy)p|UKCFwK+^ z6Ck`^Uk6c=GhD?thh&F@XG+RxK1KM4Zlax5wJhPJd@iftJ+jwEJdl@E(@>S0u&)O^ zLl)5!F1c%B8=G6bcO;m0FHX91$!NN_c=%VAI)kjn48 zo&rSx*bknFJ}0~rU{)wP1ePsjG$ZaPs{?3#$C3T$$6%VA2+ja43oJ55aHf$CbOhq~ z(Rh-084lxkx6l>m3>^5AML_VVIERVz9_I4#4??0IZzS40G>eJGfCAJbHct%l^d>3+ zmh3fiGR#M=#P-uV`VRJZrze)JXVm~%9S>&tH!V+ z!Jw9_(@o`x7Q`Kf$ql~CIg6d}YphhS&T5irb|ki?)QY+f%if4)?y;Z_#PRcL#riA= z%%0)vZJPWJ-p=%3aiter>F7NO!+PEaU#MPMY%TPFV$q9J8M}dg6SqWMYCy71KADGM z|3L}&#lc3qxS!p8iqX!c{nXNN+Z*$yUKQLc9-ZQ=Aro-R|8t|EH2J_7$*#a+EbM9! zl&;dg-0+%9zY&*SZ66G*&s6i#G$FQPj?!2v@rT-&Vixh?54t28Gx9NQHvpx+=aXUs zVwj9_NsPw~@o;vAlTh5LcpmGA#Qhs@eLk=%~u5(J(Yu zqxS2dc3A~_2>hbK3EF)cH6N7RGGS)qK_L>Nhj~S;Z_reFf)c!``p|}EBn5lwK)yuv zUeF{`DPayGXt3XVy*%mjIt2mCaQBZFZATtTFcTX)x1C*6o;;?PUm^18)Wtl0#5{Iy zSBK zQeV=y>pb^XMTLFik)tWYMu}wu+M5_!2v0dJs-9E)3duoTQ?0lkKTA`sLr#BQucHG) z10o1{+K)7K6v_zd9Byhn)|;55S4>3gFQcF75$_rXid|;@PC{t;h0}eNKS?k=`qAky zh2$Yt=5^-V`n8s;*{V}5rI{f;vQ!*`WwXOG*=AC8|B`YgrF_FA6;!w~nZ1^pObff7~!t7n^<_O}(YQ8J>K(~Fow!pIA`9+U_v(ITPEq4)u zyx`{9LBvV7dA%6g2l;*RIoVjo%yfx;+fM#Oowq zV9H~5>S4;~>hLy03v9de_lUaE`hozGdGvve!LJ_?268jL6>XK-f*zzkLj^(?S=`49 z)qCRy(VT6=iC=;{X@p&+4(f6qJa9*M8SbdBhwSh?2ay1g_Vj8-ssO}{%& zePg?ab7&5Ea4bWlD2o+JuFR+@J&!BR>u_&OX{@+4dDu^)lC7wURD6**$^q5$)d;>2`0-67>j8hm3#5jhPBLN`lkl7>1!IzP&bnl~ zpk`0B@xSXMo*R@lMzSH( zV{(+Yts?7VeLYNiaf1nHt>_g9A2;Q$3Y`wZe*NQXT8FVVG~dg!5_I zu!lu|RO;o&mhpB+^G0li$`W{5W2;@exv;_#=c;n`->M$j1^W?O;a^*m*2q_evem7kz)K0X*PGVHK?0`_51>q`ix+yu zrQGA4qxt#0n!q1S(I@y*_hq~|&!5b;gQdRTUPOz}^X>Q~xkkn;GzxR>pWbj=nJrK; zaiO3l3Xc7=4GxuPxmDO_8eH|X=Au%Aaij5$BvOKY-qY`6<0=YrGYs#^ND9`8}EX#Wwp zOC0#qtDFFsn+u*c`&!lYIjVd~d99d1HuGQ~q+mV%b&my7@e;;UY^e?xyTtoN+tzWH zQ-$Qd71i>~2O2ZluFt<3q`UD9E(2wS4%~JxRDPf>Lv!1?8j^&gI|)6)#@%s?2CnEP zcigsK?x&z#PYTIX4^tCYTfW7^ltOS>a3(nVlM$qr(d*?u{qGpp`9vuM1lCi8U8a7n z>QIV3n>>Ee-v(6)FB`T{e>gcdj%`!)rSw zDHAW&XEz*)sreT4DD}-Da)48|MxQG(fPsY?Px#C;ll`>{JxbgF zE8|1c7B77;i_=<8v6BE530tUcrsT^lL*5+~sRa2#&8 z+ew~BYPD`MIMTuK{`xSu%Fj07m?Un`MRwxREqOUeVle0-035gX5?I_BI}UX*ye$A_wjF!}!=i z!_d32{g8UFI;Oa2zrz8del(kJDUp-%je&7!#hb|_YhOXs8*e;~tBV&arhnTw1#RX) zUjHshhP{RyN6b%^#`0WpCW370XtRa>MQ93M4x|6NuA`<4po6S z`RsPw&u!WaVe_Y{{50k>05|0LY?^JS9=egTe#iW7M9kJ__uWqD-I15;1#P0eb}Rm; z0~~KTXwREvZ)Kng>v|~v{f}^W!EAbYKG#a| z9~2StXwfRgE%-Tno^p&vECm|eAo0$-8D{us>O4LeB_Q5cV@|V4fT9OE!q!y7??TN!adZ&OA*@UaLrK17Y5)1kK?l4za^Q7XMX0DZk zWZ=zD7d+mVRk*(%=Peu&Jo}m(Z5tm_m2WW@5d|*a8v*E#6LhU~9 zK3$tG%I69-6GPGKr&He;t-CujIXupN^9kj2YOAjq)OudL0^WyBCs%{M`4Z7XL-Rrl z^D$!H*mH9(XQKhw)fnRDa}(JFh$cITUR9O>!kP2%vI1;7Kt>QoA6MjPQjEx-4)2!ha0$W!Xu-}sK$NA7`#ImJtJX9gcHx=Vf71GQ9nOQ z1|hIZgJ|0X0qpQ3b}_yOj6FFEdVA-OnuLRwbVB%$#KqtFHz%%}vkp=Or=Lkt^4Io~ zztT2Zu+X#YctVmynz+Ul2pgt9L?_cI+KmM#i|Z+hA5I;oLP!1`{po~e`C{8G`m+TS z1DA|X1Yy)F<`9Tlg#i1!{9`&?=%6dHvgh2fBJoQ&x#wIFNbUsh>u!3s_})kinp4Se zaZGTGoQt_=Ml^CKTwQ`|o!%rW8ei%XGM%=}VWsUf*@@a`yRU$R$j=^eL06K5e zeN6;?1b!>G4XtCF>2U(&%SwO)a|w*|DtAiV00i#Le*KU*?KXbJU# z8YsT1uTRUWO`e)&NIYFMFf%kf6I_$hq@)m<^H|nXAFQ@J`<93Y7~W9fC_tG%SL!t% zI_ibpE-&APE0FIQk?K}O5(hm0q%%1_o`=@e&Uhdf#AP0t0pcpsc{!l&}o zd#nG%i=0Z9M3EKT`*2w1Km!#7J_+@>h;83z-l5f^x9H-fpH}$81BUWhZh^w-)xm6t zVcr89iZ>Z!IwuP-njKGrPnFOqRB-+P&jN0q$t3a7aFXV}hCDFR}WvU!5OC@A_-iCCn6^Ojg1y}b>OQ0&=^Fxa$|z&vn)pPdR_ zW%gu1SqZS8gxBZx1d!I~mBE%OyOl0X14a5G+7;#-Y5u1zc;kiH?+r@$o*=S( zDEep;N~Fxs37g&XY{?5t_DQ|+MPEd#!}UqWHdoY=8i(HfIWO0~TNx%R_iF~H)Fnh{ zG&b|YTTweZ3$x~X?$OcXM&=3r;!kl{dd&u|L>zPy@O}Wf^isVk0ZHQhNMP1yOBhrz58Wb;GKnV>6In<&)rVNfv5)Ty}IB{~D!RxFXc?G2*q5Y%cYhgv1Kd;%GiE*hN)avVtSq zx3IoLDRy}z&G7PLo5;anlEka*FpdzTlBw*XCYu*d7;ixO=rGMu*01qX>#c4T$x>=o zsXu55d^ft`j9_ES8!VguDRjpA@ZHnc9mQL<4^ELbn{5rcvL{Lv>H_S5>3cyyoKzvX zj6?VJd3~fJCXVhAfid0JmyZpdk*=4s2^@#NvBBMVz=u(B{GOd*l0(L>NAp5OrmVfTkb@$ zPrT9l_KMRBDo^QND2JKUc&{7Mi=e7s0jweVGffRND}kHMQKHp70=M<iAT5Xx;e6Vw|NGGVwPHzO-fDNb%&H*0XZC8=Ug#3|cmf-Ua?2VAg7WU zR$cD}IaZ0Z#d@1d&g8aM;P9`c=W=UTdk2ds(tyUCEycLDN5RUR^jh|v_jw{^|JCk2 zI{o2rrRv8Zh$dk35sMoXB<;E&JI8VF6x#~b@|A_;_T`!D8yDB^(nY(-ly4pj!0RtT99E+{*1ke(L{$t8?_~VXV%jq^PfWaR=4;00mFk0NO`yHIFFPm zsW$WJ<8f?q3%ow_T8leK%R8Kc(q3AjjUu!g-WZbT@h2o(Rxn-tUI1iDu9r{By4ghW zdBT=ybwGeJ!ZM0gxF_PsUda4(#(G!t{Ds>?gcbRy--FXVS9##{Pg9@6lgZw8#E{j| zdbZ}rPbxJ!Quu_SXjaVlPgV;(N~1(GX_cS$T8O-Wk{P}-9O~|a$rMtgV%dYI;WS$A zuMsAdTI~pX%VT3eVb6TI8WNI<4e`uce;EJE;Y?0{2IF{IQ|p>6lX9$tqCGN*DBzNc^8VUhwL z<{_mn3qaAzr6t((Ih>rXEkb`UAhBA~oU0`2t6pPrW9wq_kA8M%59Op#=|=4RtU8Fe zqjR%DSv|O@$rd$`#U>kqrO#w>=iP5i(*M;$Wdr2LPhurKGAcf21_tJWIQgv*a@1W6 zzK#L7!f3KvkJhh769BkD2&^Kex8$Z-rR|cP?6jyTvADpIXrz z{T}|5Dd689TWNm$ObRhc-Lqp$2oiCd&YvnpT#S~XKvhJ!W^2Rejm_5x1B$Fx?*<7sR1|!r{dSH+0HB<^HQY4>3Mr_hsQHbVZ|L+ZejnX z!XmyfgcCh%&x?MwmSCqgyg9GEDMI1Y(0z!qsh@hF1e$u{GRcwRc$6s;n}*pgFpU;Q zAVKzqDNh1H0m~Dp3BWNf`KK5UoGHj6ZPsCw*)e>!eOBe-fnx9Q@ggw0Qjsv_wtMds znKPd=wsm)*xlhhvnQuVBPFJ_^;Bwu?omoUdv|c|Unn46R)jQwwAlF4R|Dmd@aq&{} zkHpaKmm@#ww#)G2+RItw8|w{C{$CQ)NS0W%s`}MDKki@Bo+=b&B{8TCILU*66wV$} za2x#yO&oO#=`^03ffZW3#z#khJi@V;=1mrwZ#4cy2T&?3mRrp87x4ZtILQ1B9r5Q% ztL@J(%beH*xSbC<_UEAD^QCatGB1UX>kSBlE@n-?7ucmwW3Ja`+MWM6J zO#!GOKV^qw3(HmW%eH2x1J1?zvv&#;XkLAfYdPvq&Ra4YFFYHMdB8ns zl`ETM#C=|$Zj67@I`U>N|1?Wj)awt85QW+dRDjdJn%!V7lwq^Jyl2<0OX9Mn;YmkV zxi1Xsy4j;nz3b3jE4kpRv$~ux=Weu)Ap<;(+|F9dM~d-*47rNOD1sisZ=i+}?B;6C zw;eH6eebGqEktdqPns)69(MCv7l2t;5n+du{LqK9=>QXzD*y3Y$w}4Vy7K15U?lAu zleeJ^EvIUW$+_5ta-~)=hEf#bXsqT$g<8juUSO}b7^3M~9@2d+m)&CR6l@Qg*{fM& ztkIY0V$B#DUC)1Ts2Jdk1fcxaJMDZd&~Mvy~nB2wJZe0h<~O!o;7| zIHTE$IsG-^c0v@Sra109JV2C|KuERsYduc-=;y<)i9uL2rBpLQB;`#_YRi--M|0K7@my<3-Y`WmTIyuI z7;aALmEXtm8ka1sp9t=c=OjCq9$L*+^RP6+a*^Dl=-o;L)`2)p+H)`HE4)KcLq%b=QN-hxW zTSFB=OzI6p6h@LOOjKrSInS;@D|ILzaTACjK%Se1+`1Zbe;hP^ELU%A$bu>QkW-F? z%LFk3<~i~%bAxkNsmVgiwnkGhdQ!D+4ge^0%{%yPt}Stm#@?Na9!9) zf91dny_**JCRVB?JVL{1z!yb)%930elM=A%Y&S15TV26tYJ(A>mCSfu4;#-Bt=eJD zeU@2miRI?goR-6~^CiioORt2FcN^2)MZ{p`MBs2puyoGxwTGWs&~iiML$y8#?%V28 z`GLtA=}jkqkd*(U>r%=n%$&loDJRGV!oUJbq0teW4{FSLICKdX#y5eIrX4-Bz>muU zr1Te;y?=>}O%lPlco)8p6>s1c{A;46SCiddifJyX#WOc`&Pbo)jKJy?B{V`Z@i{)b z)iNUKA+zCg4(rL2WbYr_l9fz?uu-pw5tn(WzrI zY%$M(W|N;O+5}K&Lo%;}diab$-@oqkg``u)xN(svSf|-#SXZcnF5e6l?z`XC%c>&eayBT&B`g)gZ9&K`^P^dEBZ(qa%`P`z zY+No;;mi|+teFu1(8YotsWxue*Hjt>ksKm4dAX}(oQjlH6do31dbf}KL5?+*zIilA zK}Ntu<^~~!`bf4X8zA-{r&?wri4T$}3mUw8mw%yoRk zu-X#eFl-5yn^uP_Z7lhLBU43VqtMa74{LU+u@0t389hZ8I< zHzzj~McQt!<2OWiEsvYCF;&eStQT^L6oO6w@6_~T_c_K_AXk~l;dHs<+iDblh6t|* zcNrS(yc;GWhD@yr_2%VPFHYo6n9i65CL(-Pad7pQ2t3m1t?su3Wv%XYFxmP`e{gaI zLO;|HP1l=r%rsHTCj@(_bb-=FqFj~jXIYuO^8$Ar5%o@A*)sEsGdTc9lUCFJ|)y+o9cjfsRLGOBaW`$8?v=VOgJWvslL4`MB%Fe8|Qm zNEW3_l}el_w!NSD%>;9Z_SSwE^!u4CT(W2w)sGyN?9F!o zY-B!BgCuf5HkLn1bN>sr0(;+wI+QE1Zp`>c|6}&;A|TM~F^oWgKss4uWivpr00gek zygY*C`_W55^kL!0@Q~_aMp=DL7~}BPP*rs0z%-a#VN+G=iXSb54r|DWk1^7Rf!MH3 z`PPZ_;m-E^0KRn+qu3N}Wqekc?kj}7wuy4gEL7SN9!pP*>?O z?XCG}7pX4+OA`kFxpYuRI_2r6!@hl0MAPqe&*P?psIGEx@&jPi%E9j;UZYfwwp6 zDH081HgI)CV7~H0k>K#__sY0gfipEgFLZ}TfifFt%st!Q>6-X_!;YbLhw#>oMaa1f z{GtuA^8IQby@an@{pI!(=qCuKJQ>Xv15_0~55I@w#sDR^tLZ%;x+s&z)Hx{gaVwKU z&e!;Y=0Gh+MKGl54x8UOofElX8=?Vq;exQcp8ib9aXR1eZUreg}J2Tm79EOPwm%I94Ybym+(_t7k$ex86b zv6%s-?p(xea-(POZ!3!Z#7!HsrqkH8`|>^{RQ0AxL@T;mezzwJl|sjOj6?NY1bKC& z{Tf6(f=Y2@K(XN7`dYK@8MFt z-zi9-#R-qLn?DhY1;q_6$zr{w7LoeOMwD29~N8j=X3f$Q`W+wHPxvdsQTK5Ii`~>x3FqM?S^4b z3Wq`X8{L_Oyq8+He6Yhgq*tnlCPs1h!kzj(8=~(#hOki!{_L=Tlxc2(O`nF{*!zM( zCoff}Kj!h4XZ5Y@3B>mPa0^`9^EqZL(6d=B0zh+ZZhR_VksPyDa&cTA`5$CujXt=2h)LVUq2S!HL&B=ZL6;RCY% zZ|2G_Qfj;l6>20iWB4rX?3y${popQ1MJxUMbzxfcayW*&5()!a=c{2h6}a=r7x0=F zUFW=9YDv84=~as`{ctpZkbn6El2;NHXrVz5C5@%#IM#qY?u- zY?bscm27aF70$^dO@bcXk*$#~3+0lLUZpp*ARcx4<#}RCJb>6pKZ0GV^RrSkW4m&1 zkB=X}N!L4i7K*=1u#d^Gtm7qV!T0 z=vOb1i(+~StyxclMW%u}`-+!-tRr0|Sw<*ux?sKG4nzR&tINB}xPbx3gn~a6b5_o*GkG#uQ(4sen(= z?zE$Huge9VY{YMqMdws=l7b){vS55C($Bu_IcI!T_Ul(>QCr|KQLYQ{i278_P~kIV z2fS|gEU#NwULSUe?XDPCm?2cmSTBQEwt`JMfo z?)r}jWCmRpL?Yd9wUc}^7AgeMQLI87ZPlWfk^+`vpm(q-itX@8A=BT$JYi4QxZvNd z(l>P8A10sqvwGF=8VL0&8DMMlvfa*0GUUacur<^O-_|RxVsU`-v%p99l`VA1&{!;g z$uO@yxQeCX#m!MPzdrq^emt6I^m&0A&0SS%|nQWSLqxm5UXxZZhT=ky~{Vk7e%S9p1~?9a1A-PK|APde81QuB

lPI$i@8bs{&A=$;a`Mr)1IKgk6+X zDd>V2RV1R{xLS>X;!st-`#V=2D2tBAXMa7tHCPXeKpcOGaUV824T>Ue1)n-P1v(u) ziLV;iL*C6Tlt}4~!ZrhC>ZA36WqNO{M!!>(^YfDg34|(RD!-DKm04}IA)3^e^okJz z>#fB_=z704Q%H%r&2Z1*(;EjdyHB{6G3>m1z#g#j*s^){T5F1*Z`SfIjL8wTpBbNZ*eJ^3L2dV=MT zEI&INGyCtWT<4OE8Gqq8xs2}VbmE})Jqi2@-)P1k@3p%aV2!j^wRg1^U*^2_E3s!{ zz=9!^6VG-Bcha({Mz~!Mz2fY!VuU7X!rn|TLTKI@jIuT&Ce~*rUS0pe>Ku^Qx0z}HK(_)Szi%(GammokVi2kTvkbm<zI-e?PM(d#tHE2fXsr{|039>xHaVBW#vVGvsLm)LrLOv`h2Qg_E;7uFLt*uB9lHP zrqrn@Q`9@BU0HQixsgF*FC%A{LEbl0V|qsZBS;-M{5ASfi1f0l**K9i47mj}nr6A1 z*K9bVTnp&G+&EqG_2K@L&8wuw&ZlxUJIiA2bLclX|W z5Nb9i$YC~T6>Pa7wS*cY@a9MMd;`-M%j~pIgjd`9k|*&~w`Yv}V&1Y0wEs%5w?Myu zW-6pwC17*V^CN(`29sN_6YrJ^$NQ+Vr{TbKL<^o@hFhz2FHjBK&+aHU zQwlywW47N*Qc9^4da}e^*p^JIU88fB-k*EIW~IBV!@679ae}Ckgs`6PaRTK3>{z6K z8KN$3H z!Gp|w;eu@Tq}HlE3g1yr8G2EVkr{-n`{^^%+AY6@;l>AJNmlALg&X%|L0`ov+Ys4Q zp+W9n`LKPNn_?n#8@nm*f##A@f@DDvAkWFqX7eCBIKyD_w$OpqZzB}QFEt4s2q7Ly z;w8>#WPFEwa~|szOIMKvV}-GBna`k*M{J|m;mV*`>$LYFkK962(OJ8m zK^)c z=p~QoeLHz)B!n7%hYG*$rzSk?nSe^Oq5QKv6Ek#?3%po7G31bO(LFk37rBM+)8UW@ z!8(J~AzWn-n|6oX7%kseKG~->#ZMut*YRxV?pcr_VvyU_>8`d`Cvnp`T7To+KGOg; zKnwtwJ3C^-)ydg$xw+oL%q?8h|7-0eIr|p zDB|ZQ<@rJMR=&N+@Xq$%NXGP+qSYD-!MrqEob`wO%%q9`WSN3tW^JqUk%9 zZ#d~Qd`Iqs+{Hw#zaFVFz#{Lo-El3DQjIN?I&0gk;sHR2f6Bc&%=q1P-m7zi;>cw{ zyVq7(#?*z7Zv1R#V|t;H&XHwem@vI{1l6rkwVG^@PD{BVxPr*rwS zRu>zcWZJ}b3S}}uSJa=n&KaErATCvWgw|SYnGI7|3KowUjoK;}JDnb`^u+}K+wEll-tmCMVYIUK?)ktoW?A2Z-iMdZcIS_4q}FT#-cFJo?wxi=A|GIep}r z^wtZ;)ilD*dd(y|dS6}|c~|4yS(~`>G!-dhls()f>dZvM1R!Y8ECqB;3GBdtvUQqg zxhx5F5q|-{nRRIp8Ncg&@EIeW*r za*l`hNKcusK7r7OTC{L4^+69K8lBUMy z6nFqIPs~+~SQL(!2sj>+ z+(0*4{*6Q)0O|Zcgggjg4aPI@PhKJkRvQN!&jzJ64{ftWqd?Q0>C}0w_iFm9EanSx z(cNYO(aQfJuKT^>J(bP!Hae1$!aG0l)k@REXt+i{ z!zJ__N7S44(-%djtbqQOMNkKt#5FwWC=UyeV{yQaIdnjiF?jNJvR{&4gGKVSfSdq6M#Zhx_`yPl%k&TW^mL zFTHw0*$n~)tr|u>R*dABi&|ay1ca{%=VEkRySaVE49b4F^VRmWBu6;R-+~Xm_<$J=C+?8YQG|T~^Fixs&|mW+`{=<4dqrBeWOJW8^zwdhnz1FBd(mkpd52#y$lrZA0Op2-# zIf#(T1A_m{IR4it6#x_bgAK-DgChMKxK$PfxCt&a98d!JZ@TaQ45yp5L!$K;VCU{{L`ae}0bw z2iznq7-Re2W9DBj1Z4lf#{(jK{`xqKKdf9|(w+aNK>y2yPZo@94k8>r_+K@eDF569 zDf<5psQ(&JUGKF{SetSN->~0oor0o?yVo?aom9x0T3cx+YB*@lfiD{O$89Va331{k zDC8I8q9w_hkI7`hXhTOq#zh^P%FkyK(;h8Wn*Tnks-S`Kbe}cE_SX*nXB5EUQ~#~+ zym^7XtA`eC_^UJU`D4Peiz*?1GrUv4@XC29YY_jv&Hou)Fn`R&r)S{n-v+r3GBC)` z1Y<1!Wo-Yq?vubjLlc8d>2Dqi)D7@Z=E$KIfAvscfBZ^TRW;h*yuqR$2)UY@tO5St zT8{F^hQR|O68_dPU4($;E&GwK{ENZ;m&YLX5%}YPDJ{zW>Nf8Iw>eLUu>|$EmP3RJ za3CW`2`Czn|4Y}vghBBA)`AZ!{IEZ{bSFB(FzF2fwEk3rSok`rg!9WUDh!st{Wru- zup;*M_6X;EOGK~`A3g{q5!JPy=JDAvTBTZ!cpp9q7i^kB0xB{v@B^2Cl%boj^q}0NLY}&l3j@UEveJ zUpgNyHniJa=f=m+0NVCvfOGTU5QakHyY$QR@p20|hy^Gra729W0F9i6 zO0k4N+OLo(4dOEOWh8+*fm+$o(Gh9Fb4fWE1wf-iy{)>rx>86bp(yhJIQExT0#aDuZ2IJJ#&$HB#VG^(PvTFVQs@b)b#tV9ljxE#>3Hc z3He5}^<8$75PY7`o1QCo^AYCO?jGMcxBy1Sr!V0uiD|S68xBsbKMv+S=qLR{;tHfs z(x|?b81Qm6PjoJ??L>s(XLS z$rtF&70KnABmY7ohw!utaXekDM)~f|gz>cwSn8`bT4_cR^0u|L)nVf=pNu6l-|P~6nq6#Fvpc{@R_LC^ECzx!sxiA}zC2jQvnV)cR zaOfbsHI$VLr9$+;_yD~52VjVNYBd_i`&y^Fn-df` zNym%z*TY}A%XDa;q$n__P2f%6OCTO^-b%GpGq7O~VbVA}sy!BBu<18u6C2I$ ze-~wIqV+}P($*Qcx$qxy>b$f>xYZ+x!tJL$J!H`&Pn*H(5^=c}n5Keits^VClqLOg zb249V6blPRy;~k%j&O5B#3}qS(8X0}D>r8f5!rQw{*G`szu)a)t;qXjENOhj!F(y( z%^!wsm>$`^Z4i;D%J?_M99X!93p^4bFQ;XT)Uwbb=&{vga`VJi zL6RzSAcwV`D+|)3Qi*IiBp0euvw_&7psYc%PB!`ee6_4K_>j^(lE*7NC7RQmj=J~D znqWH@!BQtaA7rRTp@?~aA`D^8XK`kT!G{}1USK4*IGlv?!B?~X*dMc_1B_uJmA&$O z$zn2t7w8c*53GsB&6nf3Qhg zb^ZI$+8oG_Hc`ken>GgaFE9bv^>4gCa*UFFVan$kP_oV&tkq^pJ-*|rzxaowP2&_; z>RRMN-KDVUl2x{1S;$%h?|X37Up59n)=~&(-YtAPsB%5xaX`K5?^+Nhpws3!k#F8z z`u;xiWbU+3u0i%())4Jz@<%;1NCVXXQ$UJ3Wgr(@?+V~=`y7=5;yxePERt>)vIPAW zDz(ER1QwR-&BNNw=1LAGGkb{y>*c2b2?w6$5X|VBw(HBnhUQ2;zxG663!#a}W}Q;iK8=xHpl$GGculjfeBQp4-RiV!fC{ zbrpHD0>tYlV9g<&qolJ)1RR$@;!Zsf)nK?q*_?DcY{nvhg?%vri%`4$Ne5(>-7el7 z=6^rASmTmCZ|tG;x8gR6eV-nS<~I%wRV#hAj^d5V3V?P(x;t9eNU{87RmBAfvC?qF zqu~gBXa0Nt*XM<>6~Y#x3b}d~IChJ5e*JT*J@Wj;93k_~fD1paaF?O3lY{d%bs~``Rk?W*+RoK9g(JSRpJU z#@V-Y#f(kV36vuZ;x0gcaKrJ+`R!o(v>#DvDjMI^x<>G8q^n0(2oRn{Zbrdp zvxs&?l?*$m2K1_Q+0AB){GgEo8Q}13%fw}g`6Lr*1Q}>7{3nU^!R@-cI)wt@03kZ8 zAki?aknG>9z%2FhbmI`PtO;>2LKnXwl^p(qlz~t#^j9&SUjvpU8cZq#apWrCR4p>N zC%Hsmcs!o#0C*U{^{%@nq!WP}koH0W%}d3m?EqYG6H%9@kG($thvg%?FZi}GZuTPh zQMa~R#+(15doqnTRsaU=^7gvp64>Elj+1S0p>_@N%O4@}T$a}99vj|X%@KY;T7wpe z#DHuaV&U=}&W4~a%sMsMknBw~SOazGcg zdbBo(my_fcV$dr}pT-wYhiB+1M7%rAzF*|cZWpS8HELF}R;VKVTs)OibasBPFw4KL z(5_ZVShPA_X;w^wj!aXjkVlkDCKZcZZZO~2z-Q?v3I2`!&TF|$T!Z;=W`S||Evkl0 z0hNLNmtHYJpDi)Ok8WQ`Kw724>%AjE@Z<3fkk$#0O$Ufq(T^qXr9`;1+if>j{DMm*@pYFG1EJ@iy$1mc028twGC>M^jae}DYrq#!(iMHF{G;*E+5Hvy!YV7a|0L$1$_Cb zhLdG6$H`3I;*oEN|2zVh$uI(IJbFQhP2&M5ehWME{GEsc*!f-HRd=Tl>ke%Y%XZrO zWk8?f^P`jMb$)av8(NdY4b`+P@e0UG9GRrgSd|gAl}3Brk1$smBK{sZlXH~q@fKAo zo>%@I))JL7r*xTqv-5I9$!P$5wU_KujaZc)o1ol~stgOmn5dQvFP0!WR-An2cq3oK6>Y z2cZ=VU`^)BlFu=|gya0G)b0#M?M!FFS`cD6(e`>xpwr|^FcXlj0`S<;v2o;*6s_U} z^VRxdXU(bgwqFjxf`EBKxk7{Uuw?l>Z{+vGMSpYxItjV(<`;<=Qqi=uwB2a;96Aw| zG9q6=A=UvrrejLA0ahU#8oSYn6jo}(k@(~2`7T&OZbt<2q6*6wU?Sg6B$vc0=l{p8 zL|^3sA`%=65$@s+3V$FdePhZ( z&X?<}Tt?3HBTK*Q?5pqSo0Gp~Jf1k5(O@Wj2$q<{G1;B0E!Si+U&w+iDhQHEMHq-^ z$lreDhzjn5QZ`#zX`%e6pRhXD+KyZ2#6;l1$>1~&9vx2Q4)Z@8k$b`*S`2#URBx~+ z!TC{$n9P>l`S1!9dh}1ZuD4o8M^qV($4Vo3LIs1+`BU0QAL#W37ni0|mB$<>qrYR& zDXBRroavcx)1uKAX{UR}4Dg^D00sN;E?s z$PF4W;W-HZQm*6{-w*+zfHqoEuknn8?+0T1#B^A~?{+WpwdMZ9beSc#$XcjI>5tkv z^+LHYp?*oPr*LhU4G~(~x6AgCJj<=sWOQ)@pIEEcz@D?kt=Hw;I$?a!*L=}D&gMs_x%~bND<=2IAAfI4*ujTTx&QQI5Y(;+e&Nu(_Zmd z&5DeG1sfdutzuF1)`hM33>91S8&@m>8HJs_-j;N_L0WQ}#TIB*S3F{>2sa$>~ z7%eR=X5?zSGK=kQ>_}VPe7ZgU1(^8|woT}tr2P>+aGQ~ASao-L?#@<}2`-x`q4*^L zFJm%OfVh6Rxd6G3_-Fq|le^gr{Rxz2WieYAx(!Fb0D^3J^EAaH0LG_Bo4r=kdPy(M z6J~h53>@Ce5$wg~eW{XoPzDT?G@3zhl@#&^DK;n;bg5=;xJmBRY+zIdd37~w$>Tdl zAQ>!F$o!i5Y*RD%@4Cdtj1R3*bXr!AaX$9Y!NUPpPdC`O>Z?r~64vs;7_NG&ch79} zSji{`yo{XwGI@d~Yos`U2{TpcT|emust#4E5f?-ohtupa_?iyNx${;;x7Eb! zv@)Fbr9W)6{IG657ehKDOAAClMOj(^d&wnxOw;0XSUOL0L}c!73>G$4h7OeUP(Tjuasg<#Y!o!|wn!Lo1A;{og+tJiL+qdxIYTjw^fu9t;JJV* zP>TLqUTIl0)}mZBa9G4QKw>85qu92{bip_Hek@p`Sv%dN`47C$jZ5gup>-0(rzirk zSOy~9Pus@KF4w#9CcP8+$XCpYB^tp|{@^gGV8CwXWU00=95ne**pcD%6&e-rHppP3 zMiRucXh~ezosu(rL*nY$U);*j61f!N^68-DohGu<*NL@I8{ypC2pa_;RC1R`3X^W-ZZy8s*y) zkbtANy^NTGNvF%$urMvFS^<;yHO_N3kJD@+RxaO&S2D;~-t%gM8DE)J6n-czno}Lg z@_R1yL(D9$mvl%oQDIJ5na@AT2N0OsZk(0wi z=iMPx%LIYNte{h9dE4{VW=1`ng2W;at+nVrT2CO2{_*)a6jV@2ex9@oYq}Xj?YR%L z8%`W%LaoLAGaV?4`5vqXmIT!-9paz0@8$Q9G!hP{m-8C57ecnJ%Q8T5(ANhBt%>$kp-~CHi2MDxP-~4o4~QYMf((Dkzr79 zs3DS8cTyxs-4YYHmXc}Xe8b>`wH60-eCq6@mhU~)@drb?FMfr$-_yN_9kU@w%2e<~ zGBhBLzkW=*Bn_l{^~DGchgUbO0@RL*{0S>ns)asa8qdhshrYm08Qa3dHO1&C>w+R8 zfcs1q)T;=A>H33oyrbPkX{!Rqfv_Kq$7cWyN*G><%QsBxdUA93=i4L2l!7T{p~(Me zgAv<6Kt-j|J=2tYw7|+PMlsd}#9ER{kBe)K5^sMw)B#672BwmddJsVvA=L4-z9yWWTecz3;W2?Bu5-?oX9k8+=;r*P^NUUw40! zHHeo)bNztqJM+BB!bAar>P{FBG2m`A>HM#2t?Y|;%}$NxCs1EaS=~tL(h2ygS`Dm> zL)`m%zKcpGh}u+euoSC0o`nXCa89i~J$DGDLWaf*sL-hi%Lmv6)DW(BO4dv57?Xf_ zGz3UuZY%`iJRLjWCDMGCi&#C#ScqN^q$h(jpS8>zG4;0td!|(5p(1X=^c#S1u~2!3 z(<3cd+Z#*%?nYro$B<)g7+G~sxoue;24K-CN702z`^fal@h!&s#Hds>b+eqvap_7v z2GKB0A1^n%FFgjK^cVsfV6$@mxnFx!6@81;tLLsi9LsKo17H(Cpib#lb!ZScYg0NC zr-M078CM2T)!-qII0a5@2g7mw&|vCS;G|gf=Di=oI-)B;#Yl#Pe*CP1;_w)fpQ@>z zz`tHe2|YDZ*C&B}afP@X71KH{(`c3_$9%ezjxz?L4Jcf&kB^U=Sn%}yj6AEMo9AIm zK|%)!+eLq(0K{1k(LXmQs{=0;#Y?HNBjSRU4M>uBy}li2VqtZV^S2t~)NDCKSQ6y0%wjEIj0iTfww!V-bTT z*Y2B5x6H&g&&ZmuUV=VCBP#6H0-C!m&3H;}!I<2U32a1<&Q4ZZ$^>7el2p$gp6%9T z2)Jh}C8_%kA`dslKRYbH1cdf}np(r8S44WB^3B{R#0)diPpjf?ENZ-%t?!e#4UIqu z%*>AN0$jriTN#CRmHZwRA|bCTB3dyU)ZMYS@FNrwF1KjDkP;G$5G`oC$V!KYTVAAM zcli{3l`giNk%|?3sr2xi;&mxyppgu6C>c|cUdbE(`wKYqnVIs4c1U&!8D(Is392zw zc7c()--I*xcC-(RcU_45;sd3*@J5LiA+C^H>;tei%>D`pS_Re~N)>%_za z4jP}W5Aj{jXRln_d=Jmqu1XP*&ZXT~h}bmZnYw2m`RqMUPfvN1XZS7U?XDjrfYqad zInPBnHD2Rkdd&j@d#2BL+<%TS{v##pE3Gru2RS`-OuhrZKT5||)8}ZibG>^6znd7~ z8JY*-;f6D9nY?B+u1&qCFXm{yT-9>@`FlxMXn_8Rf@wr7V;okn2)x^BChv6KSI}2w z9?{JHSUq5jiBi(R?F`lyAdqHTHbzFYUGS!HIKZZ1(=2fmW$M1-Sb>sH{n!+T9#jC1 z0%Jo-Gj$^>ZD+7a*{}ktckr-N8CR~4^KbIo4!Th1%-Lo6Aj9GbxGi<5e^OB<^Lylo z*H!r~eu#}@vs${!o_y{9QINyB%WSz)UwIt=#e~Af<|4A9{CE_mblwh^%c_J;vA+hA zS%#3=zZV&womYC*IrD`tv~a_ipON@OHpA+BfWEI9D4HwZ9UASIc|Z_TM4Pk_lnwS= zfTSjmWSlxj&~Uzu3ltS`u0r)AupN*EPq@s!`xQ`Wh78#gm`7SqT&UNRm5;?__-vKX z4ueXna;>muqN=B(7@{g%{+b_2I$HiU3cgCES(3e8&~Qi5lw3g*$1H(?1{FqB!fsB` zBQZ;@{F4Pue<0!jAU+jcB_xU{?6}|}WMe&7l5p4{o2jS=osRD$v58ILjg4vN$lPI7 zV64{Jv1Luz^RCFyT8u3DkEx?TL=y6bHWu1x%XPTB(Cic!%tGX_Dggrl2wcJ_LNNTi z8sqG>2Xxn&K6@xdRC%1ZMw?Wd^DcA;NV4aJ9`)$uhaSW&s;F{Ft)gUszcxCK+o|ZD zUMnudBMpaQ5697%_P9L|h4|4fS*Nm=(4n0rRQfc^$7F(%dc$u`=Q%WwuvTbsMFsT6 z7T64>cG~pxNf7b2(Vvk?8PM{m?gXuwH>=5k)L~zEE8Xde-Ks2__%F=nI zc!d~6m;utbR=1N%4L7zlcGQ7ZWmz$WgN5Qd5sf!RKolYFyG81A)qm-2ig$HDpfIQx z%afY|o6SULiM(`m{f&-AynkYy0+;B{4-*`NjMAq#1?~E%44>OBZWeEgs06FZ7a8?M zlmkY@nVW?T#?ZCou}GDc^uxBWcsMw?`Fe^-ZYJJ zxt_oE7N^(d5vh64S0l0c1$+0OAIleQ!#9Q(D#KN=zCjPfP6_Mm?{*qVGYyO)vy zRUW?`Pf0_%0d3|2OH{TVwis2+WJKa?QfaISXJSFlZGo6or4t%S0~1MKy~1y)N+r46 z-$SrIzW=`HB>6U9riy-(0zQkXFyXxq=+HX=izwjaU9%#MWF>5=++a8sr8~Q8y~9I2 z9gL@Xgc-vICprVwA{97(IczJS)uCfUlg%PvCg*lI_J8;^9gL<+uhAsi)2P7i(Iz{Y?H!Kx->xVM>gb2+?UQ~%^q_zHor3w_NLjdV^> zrlPd0q5Dr)7LkICvwWk1rfGM$-zXir3MBwr-dekJC2mz^!X?tI7fVQIb_QyE1Yj?`-n_) z#^8%uUz*7bV7+lCst+T18DVp7Yt+BC@KNUzfBPl5zYPAck3VO#kCm$K!6W zwL#%Zt4FA0WH*@N7e($kFu}3}W^?vgfd(8!w!&h=$=QZ#LOgVazv$vwz~|CpMHd8L z%?eB!&nbst@K~gI1GKb=bOXu;dn?Uj~UfvmPyAih|tOHWfejdXECssE;z3IYnblmms0Qtcz}knJ2(DHf?lrJC36 z0fat{-Gd8{Fb?nJ=HXAMu)jj=3P5j4T!ko2)DjJjrCrPg>&R0{hvoS+*y|r|y+ddn@T3TV-m7rC8DhjITyN84&REKW z(9Il#em#X25$F7f=#NJVznDtvhr zCM9(V8@!CiqVEZMkY<;y7C>@?v9Na+?0(i&XbEY=ZWz6rRIj@Mc8}T2ZPE<~SD7y4 zjd!>dyT&CNP@wQ7mhSV8TwiX$ytzDw1_8d`J#70R6r@5=-0 zrP<^^2BnC1OBAI4DW8E+gz^uAOwy5)lh;z#DJd!KIl5&Dm`eR6%L$C84XAI>tl%1< z3fZ|?ys~dw9WL~Ed)BniVS2$7+?qqu`rEHE3qBmTjZD}p!k{B*JHN`Ljlq9RJJ1eZ zuYugfdbUvc>Q}St8?T!okDrsDd=V7!GW~=;RsI=D;u6d@vt4l>sI_FJ=r*NI09ahz z;VbbWgezM*c;Th`GHNXQN@(opL7ZlXT&Yhk50k&6R%F7z>lOP{M{IpWEz#w)qg1Ku zbzJAp^N{JC6vzE@ui3_H9| z#{%0VHCZ0?ZcI}~P!^ph(J)~Oj6P31?$(}SzYrH>dia7?CK`jKt^d6)jaCFX&G98mAW*l1$}Z^W)qw#QUb!>Zr@D9_ZgKrpo1{iaMRfxz>+a zuwIoJwBk7|)&?BRw|Y3%x@U_~7eHVFe@4GDm)m^X89SuHT>oIDILS=CXb!U979|?y zqf17WvSw#O5}DbD=|$Z4a5i;o=d-=EicPg!CMG}7>xO$sC&CIoSg%x#U~+oM&r`0Y zjHYGT|HQ94mFm+eszdMSBs|6Oqlf-F5wpo0X5WY8hH8JqXHYsJMV(o6xhks3kZnQ+Vw0)0XbcGVlL$r~* zq+-@dZsV)x3d?5Q4Bq4{)=9G^qCunEN_|EZ-DkoU;}4WC{_KPZIIrv8Tid9VUu+>; zPFW7MILxsdAkWYE3Vm%8WWXDA*ehVPo@lpi^y_gCvGJJQoZxuzCuHSMVI@a|rU{am zM7x$bvW2D@VkHxM)!NMI#x#6B*N7slBj?! z7e=-msgZf$vI?6_@ezN}{yLu=C#6RwNj%Ytm@HLSRzWIRNEP^g($ZN2Z>Q?+6)6yK zwp9FC8O;=Vd@8ZyHTT1pYXB`!TQSycL>m=`v%4gSXpMl`+hhZ>jAjg|=?4%GQIT)U z66x%*szL&8L$XlxY$ihCf@DQ~SEnm4D7XjqvOhunC|oahvLeR)hD`;CJu<5Kg_lTh z9DxO>rAEBf71}?STm1Ami{IUyDU}PCx~jCEG`m|P+(W_%Xd}1UJYCu}Go{|`|Ekww zCJQ|P`37pVXXGob+J$4?*(Pm6bZE?DfUlV)ybKdd|NiL6{9A`K5$bTZV%6?`q)!&V zv^=DQF*EdIzp+43j?yffCh<2 zav4!_sZuYmKet@35&`-Jx5qGh_bTBro(=_Wkm?>U`z$#tiRLwyhSLB;j8=Iy-&7S zA$#&MbWX^Vw-z4@kpy~QwXS58$MHw=k4sUq_Zmur>H>T_n&|2dnkZ#_Z@={nuWT5` z_9CTC@eW~`Sp50MVEo5`8wJ} zyEhHMB~qH7d&wO7=A)>_UBgYT-g~=GVY;nXFcT#FT->dVBk=!d;s5iX0xw3e0&;BR zN6Hpj)DR&&2D)_jwd=Wp>cqjusbnLnv|{(Xvs%pOjWYbnv;j>6IA}`OmX}4JLach( zA-ebHb0S$QCVRf$moMi??n&vMj=+1`HUH)M4#n}nzf%#*0{oCkdIG+pIcr+C!ET{I zG1bn|WFVSyp~k#+B{Spi1fUp;h{06hmw`XO!{{^WH`E~pWf(;)GAOQp z7z-ENX}M|(?Ob@wD&7g?zebx1oo9^ED z(_nmr6BH7fAnw@i3KLXt>pO-dH;rVU(cG*cCnRl!qDRdA38h9$AU$ab9UATyiwTGc zxZ{$*DbJNq!$MMr(>Eee;<0uKWpkeu)XDTV{CadxG@l;eQ(0$&n`?E5_60Tee|%B@ zb#DU7pLEr75q^KSMvOh4wB=Fp|94-BYH{zn_9h*`5n$RkP$WEcO=H||^}c~KZ0SiL z^YtAzF4#qrC?!6e{A9K{8p{+xhz8Epo7-FPMB;r1+Tgl+y4$~5=Jy0J9c7XNK%WW> zf-sAf3=``IUKcw*x3==z34jRoX&@&5+k+yx-hfyEAb9ZmbGti4%w@v~Hn7dXxc#B# ze>U&^(@n*&gJXg>fL}tKSEWE?GHU!cL-;uDEMgPl%=elwYnSDt%;;=i{fVWfe@1W#vwcj10@WB}JGrw8aX?~}Ez zPsBX>1_l6skLm^_DY%ZtZ_u1lg}?Zo#&-^cB|m+@cL)?8a{y)#Mkg1xTWxQ1Dfkcij3^Mm37gz=eYW4+Eck)+plcEXhBf#~$|^fWLniOY5tXmxE7Lix@H zT#1-<>msUBi?Z!PI7f$vtsxt1HuQn^) zEAl*;XP;I~Vj(T~DHDbvMp~0}dbTBh5d~?8CZup!ZUXBoxSfzipV#`aClsC62w+kD zAIU}9tqXtr_yNFW=XQrZdO1Key6#TW)*)%myOpgKGf-JGX>v&H*bc_h!d6?%fmN7q zWRPN@Pw`R&D6I&Z_(Rc1gA9O%`6OQa=1I}2MvFqqkJAbSH(fSnK)&%m53~O~*n&a< zTffreY_`$=Y2pg}oWjmN# zb3@9stD9Z7Ee4{YLqI9x?&1;@_!SzTMSr}d>+<9z`qb@cnb6j0a3Wp6V;v`&Q*>X@ zv={MBz$b!J;-8?)73(Aw_Tk0CEE1gq`Vw%36}L~U_Ufh*!-?@V=f6fJa1q;et%-%f ztW@kV@X^Ade-eB@k(^V@`QvkSlz#AlNUFbtY5y;bAOEqb%fq!Jh4Fs|LwkVqAEGt? z^_?r2I-q+X{&u=qXF+PS%)0Z%Sw5AaJNS)S$SCI;Z0FFnG>Pi|jDy-b`-Z`vsrvW* zjq3y?`-`atVusWXV=R42_ znU+Fgj#oR#1AAOFfFKu$&#%FDG@8m|i#)oBcnm4^%T}!4OXAIq0lK$v0|0)qV<0XA zldF6=psl6R%m-up0BZ0zx7&{sI}K=5-5D4A@Cu|IWPN;np&y)O z`i`-;R4GGHM>iEfN?yU&kt*ZhTmAz=`=bS%5lz0z%3;KlK~_L$Z3NI6)&YkE^%&Ib zckxN6!SRT|mxDDx4!T|pr!#|?<7*;8#;j3huls z836lWHT)|28Hv3_#iA#ar>I9(0E!SNyS@tZTZ96lT9lKPY$l+RC7N4X1zh^zTLg*` z`P8T&#GF=HqYMF1YDGmkjfNe525B+PxDj-cdT)?K4Pg=x*1*RbFh5ZwBU%<ub-3)UJ<7{L@kO&jwDBsDM8D#X9S$JuiqZ85ZUa+A}Ns1+DrA zIZDU}k{PKCn^L%L=~sbYjd-R*>LQpB8XZ@qYrrs*u~NY*ZGHLrWiT_lx5d<>F9MfV zEGhq)!lP` zfB9CdV{0!?u*w(3?Ip__1jo5?=E2bwMa+YCgK%$8BNIjPq_m_fp?B^3s022g;ewj4a9L#n++5!pEolvPwq0Sy(i-{W7#y`9s@ zmKLCTuvt+!#Ah{3#@mylAdi&v-WtL^qzr~?w3)F1y@^^FQxBKT^f>=GaD4yA*7|?G z00gRt9JBBJwR2e7=X|BB!zH>~CLA@UXb5=F-xT6nCNN$C5y;{m*gO+>r+)9xB{6ED zy|Z)1(SgxOfk|M-hhf3not#CDP6wF;KtM=6G~-ncM5y6CK#&-nLkZ-6+IEUsQp_6# zGp@jyg^WtXnA}BJ_}OU<$XoXqT$28MGejYY2=_YY6VA(66e21LHRvMjP6oaCX$LDd zG%`W@9_=b)vBSmsdI@nu-Gv#c2XWXCxhFeNUM`3cExG1&Sk|6_?0R8Z*fLCjs^|g) zB}>oh4&;J`s$Q>vI#2)(%~#Bg8bR&RHh3~i^|ql2K$67(T``mfwbE?T^C4%9YTD!J zj7jTQ-22-z{EO&y)Jn~f(^n%mh8){w-Vt$H#VrLCqjmVJx`zrb+^WW?At9D zQffU@X+$21!L64ki@EiFDrw~@TpFqKCjj`tij=%Q%zGWz9fDj~GT*1d_q;$kv%(%@ z0mbX;h?&+VEjl@77Al0`9#ncoV>aOv|MuPnV$fma(Jw~eGRj>-jWo#|EdrSON3*Nt z>3Scc0O~3B6e@+~_cXPj-!;H)7bD{T06+ymV!+s(f%9%_97sFd>W31U!69Udi}`fM zrT>UXLjvkLF=neKGBPs76wa<3O9V6$sKlv4!5%S^YdXu)DuBFrvm2Kgym*Q4 z<+D;ZN|g#7%NG{^)qP(xpY`}pi?vV!Dn+~%F6vV^`mg_h8vP<1O~XsaT$>PeT4Cf= z#;$l4!Hc|CXRy6QQH7Y#<&E>yvz*(d7?zci2G%9}vfL2d9*oPi-n{`xoz-4JUIdiQh&2*Zl%0Z0@_-j1}=S)mNB1o0nO4CNL2&f^IPR`?O;K+&%t zAoY=r(@OF|&ep-f0oo?<`@jBxe|_*S1&TBl9^18+M_@hxy%0uNgbxfmDDw2FyVQ~S znQGW6wCuq47W+bR&#_SZcPU8VaMEkABb;5SS^vx(!|@Z0C5Se!mP^*mXi#Ge3yn^L z9!_ub`$N=KyTj?@+4hU9_tHObFC!y^7rFE1+4qn_3@^kJZ(OemDQ#k;4sVmgh~b;F zmh+ZSMHoR_EW}ZO86WnakdWT{=b(&Qc2z%%?|fkVKsy!pVot{NSsm%^qduI^5h0%4 zw)M(1aDNICjPnZK#cfKKL4wl4ef0*0pww3m0+*OH&fe1Q`cpfTdqV5{kISkpCQ z{y+E7|Gu|YNv>@J$pOY3h*D=te{X#Qn7yjWBOS}P_d!$>FXR&ymgYbS6HBLD!JPd!4WlT`Tgnqx z**f5e?9+Y2o#7OWh?yf9Zkk)G(+C=?C#V<{w}c!PKu=&d6#sIxr8@fGZ^nQgmVNr& zka6fSde2c*eglV6J!msUAv+5H=4tqAu?13K2xGWwhy2}@Wjk4(fRbm_#Q%_T{P%D4 zzg`D+K81S_*px~9-TS?z#Q5IR)5C&VjrlKb6e99p-g4x^LDy+aC*Q96UUymGgQLNq zfgz0jrtzEb|A9;Y=F9#6GfYLbN8$+Di1l71oJ2+~7U%WemfV(=NFyE`Zo7Fl{U)cU zzuGS~2>CwkA%Y$wdV&GG={?8fJ|bY}d2*(&?Z&r7Lm?gjWB5O; z>HU?GFgIIB!14#{uO}oVbc&Dtmn|K*WA3)bQs{~hPqq!R5!K-* zTMUm!BFe6s&RKBJ-4%I-p@*yM6)@jB%Z28Bw7*Ti|BN<+LVB z-x7UupJ3(d>uZhz3y*S5NW18Lb@Wus6^giQ%7)ePqh(uO?M@Yf8~pD2!~|SqRIUyX z@dsc$XliOI5l8PbSXuy&kxB@I{!$N`dXRT8oruqtEPCu?GiY%BygO;051GCO=lNOXDMK3UDZMBWv4A#1}i z|JnLh>f#SQt~=l9aTcWYZxeXBH9$FgLBi&@(%+^V3KSaO@4ZzL6|Cz+s^%P)mux$E zS&u_Y1OdalQz}7>525M@t9{mf5qRJ!i&{8dppV3H)PB!w4Sew*?sfin#agUGkS$Te;W_}} zs;%DMVl-y^vnuv0%X&Ow9_Q(``eD4hvG@e6a=difrCawEaD(t5UkxiKYh^ zA9mb{O`W zUJXVE93^)Ez@Q~E>!<&KV;4M)G`CC_>UJl=MWEcD0bro5Z8n>7hM-&~P-^mS+<=>! z21@-|{vI+;D;3a-NM?on>*Vs1#puT@ zvGe+EF%r-Zu>cSiT#QleCNf;)8|b|LybdYMyp4c+T)R0Zv-%Z+jNjk34{kB+ia4U? zraQXWHRa6jg>$XQ`&r2g9ohUZ97MP??;>ZDiYISZTG1cS4sIf~q=q;(Z<%yO1&Z7c zdT$P%`N;rFXb7(D+a}IK=3BdU$K2d2vn67dp+ZlJ-aXvE%M%DV z2hGA}HTGB<*wZyg0cQc<)i>Wa0Y=+U$KpR&>7->xtu&t>;^ve{y?-*8v|nDgz)Q#+ z1Oagbc#jYfyv3X602TJ9g#kT^x^oTaKD_TPnI}otlxZGFhRMZ7cy6y&{r&URe>Lws z?c@kFqOIWQ`Ov{a&m_tm9M4e+Qb*7i{D|v!SYZkHnx@Rga+8$GJ3RJ9n^&LbVzeA4 zzkWbAlz2#^jI}M_>kzsW48kK@=9@0`cC~JhYVF#CJN6Q?TDA!|a4-<~{cs37_uanT z*-!qmY*+Pqd%jtB1~(i(Ybl3h{h*|8BjX*p#Be*hGAb!BI$2HEtlF;fn4WVu!QI~r zGwZSCnzs#UmUMVu6<7-NT$tM-W(vODJvtpWC_L1@`~eXpTsJL@+4%~lSd47zV*$Jo?iBYTDr~JehT` zO_S}H)v_r+w)peN%c!>dZBY#dMl(8g{q~L`=l9;9ryV1M{mA|mwxwaBt-E>>2mKlB zQ==&>7Ww61o_@lH6>@)l`Yf{JC23uK(9{PMhGH5f_m($GlFDE&cJg(W(9aeR z0X;LYaR48FU=|eat=G;tge#K=3)VB7;|{=o2uwc`|_4~j6JGBW`5zRw?Tzj&TX-#ws1-Pme%3Z^!b2zokA z7pZRbW-pa|p$Dmt`P7p>2>g~|+^`HT#&s@|245_g-V&LYYvb*oWPE zf$!7CLn=jBZ*jHN$ZPT3T*LZT%5eFYjd zTMOW08AdYMk+z}(zA2<9u}I*}%L*-**(V9!pgMlUkdKT>)qL|>7W0HIkZH1nDr*B| z&I}*{fbbV0CW^no^oJ!h8p=;tv~s}kppi{b6t>`l7~LCdlzdo`A>rmw6ZuT7!l3m7 z2KPwPw^!;Gz8aK4Dj4oT=EYA2$!Eor`SOMG?;{l;JERYQyzH$cD9<7CK2|SIjzDCmX9KaM+#k~pMrT0_|Y zRz#K&g-4%P2eAV1soGdFOD^EwK9h??m16Ta=-cxFhc_9_z(F8W-43$Xm*)iN85Ili zKxFae+u5s+Mqk;^NL6l7!C9CuV7~bngY~9lhIIX?QmnIGv*6%u4|OfVWM6* z87HJwc=_W`crMU!x@4BQUjqOFY@4br87D#ip?WRUwM5SkAaPNXA ze`l=6o>dmApo;O}^LL?vdbW0p#N9wgyWwX@B-0@)6ImFL_qs#R6$yHmfWWYR{fc34 zeHXR|8&#Z#Aq@t`1r)QN?z9z5J9FVs>X3Mt3>Ya77a5v9eRD;?V#*Q|^n%+_?QWkl z5MWr$P77d~4-xw{cD`~uc)HQ|J&y4_JJ%9Jc7OY7VQ5ib`&wSys@77TocgxWyx^pq z1tKV-E4l&XG|0a=n0#Dr_Uxh6uCAV&4p`2Pe6W%OGemvjexQ+YE&guz_@D;_l+@_3 zALX?i5?|sbrzL;*-hBzbP^Y;d-VKF4k4>u-!3kp#Lo(l{IK((pRPIi%S;92US=7jg zH|nrhz~Lum4fIF32p=bXKDB(R+d{&oW7cZ!&p1rQ5D141s1?Y)&t)JK@3VoYG;D>G zY3jMnr_QlRFvd#|Yq63}7RG@Y1h}n-UIyLn^|;oIFx-Y7!3#f}U8Twon$PnUuhLHv zxC))&FK;;L?8eXq?vAVHSTbeaOUd28$gZ)GH3h~&C6?uZ@Y=W+%Edy9E%L`!=a#+7 z(K{R8&=*}6DdjtJBqLL^f%R8Re=5pxcQQYDvqJ9n>X>ynMC`Zv9)(q8<{Q!&R1`U^5Tqj__hO|P%4OB!1Ro7bhdhikom^>DNRS>q{rma8+Ho-B2_SvB zXk7ukyj~YpD?+8ST`g_Ccb9m~x;s=EPTryHW_%XKkpg^lbaeAOMc|IJ$t5tD3-O~P z88+H2P|es2D$>gVJ};U6G-Kw~5F@b#S}uisQz#XARYQQt@?mbla*+)C zP%_L84-fbBz?SgTx$~un`1@_WfOnw4(1>icFB1#Bvl!??KqFXPpmI{!MtO%{+vygv zOdGgH8DQjb_52`s^LY*@UJ=BcSDE3~iYqhX2d)|%y5hAd@}52kZ?ld)VK|G=UFaizc-b!}kf zaVbxG|8cm@W<_Z0ymeZ~U2Q&z!Y5ZtrI9bM(6&}?QTwDg87CfGozO`rZj%^FA7e!k z8w`-S57TKy!U7())!o(Gyxh$hVH;l}B13fKAkEy~edPZ4*}zI)o?~GXVgT6(`(W}i zoeSmjKEj8Zl6n_x*ejDT2X!Lyw3uGhE1`KOTSQtMucC?}z++TQsr`hC!54!A%9v@s zr5fboYOvhb!=Wj*%a(sq8WMcg1MyTpes!bT)H9EE%Lv6T_@y*CYFZUtqzqb$2{|{86@7`U|HuN+ za1RKwLe6MXm8PU3Wh0QZVe}`W0l+ul5Z|xjUR$teS3zN3buDn+Js*ZUaj5`vXFb^i z@O7iDdR|4axg2R9Af_(FK`N~&7gF7ge~u|pT>Jl!YKP+qj!b0I!Mgg?jltvMa`)UE zySgr!*W7EYGF37}MQ1u>Ib0-ZPu%!bVqG(cF8~j}7$xd55lRz>)-6;2heG{kZyEgo z0JX4t!i}UzH<%KfT?7`v1mhH`iaGZAw$}xuD%^|S`~~V7kVNkyd-S;^m6lLMh!L`!{L*_t0yC76svqkz#U+m$vT6_7Q($TbwtY>!wP z%)sw+!4&?fTs1$wLqa?j?!Dyx5tq&Ka-gqnq)&s{)kNPgYK)MqM?kXvjBGr8%v7F> zN-JBma)WzewwhY;a^toz4bOOs_2KW-t3ijOv=5nH4uKQ_^bZX6d-Vt72Kdy)wtLxQ zLW3@A$%{=K8k!ug~cfj*LGhxwX1Wq=>t~vMY#y zKA9=xOO2m5nQ|J9PAW?qF}EX_(kMQ~z%+qbb=o3d4&o4hW;Q%~LrlQ4%yvjXJgFIG z^5=Y}6yLXlMH(T2d}7wMFp6jY*mae7>se%?q|195^y5eGUz^S9-%b%Y)o0UyXHhog zzSW4xLeJQQ4ZDhf`?K?!60&+4+Xk6yBw!DysN&V*FD1Q`zzG zYX7sxUCvlbQEilWZpzCQuRB?NFe-zWxYBExTM_**8v$Np4Ao>S*~?;hAWxZ6KaFVELH#oLZO>Xl zXT+USFrBQyk}|gII0ot3F+48h)AQPy%?x5K1Y4hCm-N8u*RKH}?iO0b_!EWAiT}E{ zyz08>BSKB2^8IG1xHWc@N>qSOyu@P&TVQ?&%@$C~a0yKUz8b(;wh0b!> z`Xw2Udz0h0O@?Wyx#R|OZ>yj^v_7REhBo?sBEahFXMM(aw0<|V5Mi>q?~qST_B;fxKxqf0=OMwCs^t3ir2xnNBZsmoe%W$J z>hFZQUQWGjE7qej0dx{%Hjy;=p2_Ug&VuyBmp|NZ=42^M~8xyFyFua(#?w3FoeiVv3nTrj&Z6 zA))_izp}A3)F)E947aPZ8xF=l?SAH-cFM{0;l;`a+RphWAv1Vuu79Om5I#_e?v17~ zhuW0Lje4bfJ&5^gLRUM?d#<>DYHDr%{@|_9Hbl#SI~<;6O+xz0OD?VH)ZM|tLk9hb z8uF0F$r$wdOc8k6SBJ7%MP!^6}HZC+yb#rNLs^jh@84WCNm={^CcrYR0{ zp0dK8R&651#QF=QSo)((3XgT>zy<7`mDa|1Gop5yA}Kj&IJ@;<%5>9Irm}e7I+<|_ z)CV~jUZ9F?_6d>mPc}U6Oi;VSA`C*jWD;PWJr~_f*i}l-xE~XFw6g8$@1@o0e6CaH zbc|@P^=1|TO+}vC2Dpr^uR@-)ZqD!^bXEM|d`bq{sADmKMq>mpZs zMFQiN5(d**U^{!u@~iB1`?JmTs@Od-2g8Z@g*SROruav0KXWZ?(a42KZXG`oto~RN zyHwjHt;1q|e!5A~*4e2ON$ilszN(K$SnEe6C}b*{WT>H=i+2Z=DpgfjD{!!#bp z3|_M{yobFmXzY!Xl~gv9?kbZW4FS`v#LcSMX9h8mqC=;K*x@j}{X&%deL`$+R1{uH zP(7oyjdMcDn@ltINyE35>zijz$;^@22j5nv&j3BXI4{cK?n%>{p&3FVz^=Vov|pM zaxi*fb0yDFvBoh)E8f$dOXD&vU~G9`l55FO1{|&M(^<#DmGs62s_4IA8_-&Q7TAbb z*e9~pY_>7d{j9y%x`d?vuf&fiby~!7((zWv=0FTcI)&ZEXV7q@3;m@)nltKWJDV9I zMnmy=(uzh&ki$zqtw}ok^3dP9NwKwuktMuGF@8Qx@-9>V{PNS!FDsKD6zc7XAxXIs z-xNT!8Si#ytjO@8yzz0==oB5%)-fuJ}3mU8_ z`;pTM^_@MdJwcbD>@8GGQZ>gT8!u;mUOMeI0oimw#Hu2Yid%6mPuZoV}sjZ1jRpMHC4dHutmbctV85yKR`qs^@ zci+uZycE*0n@5H9B22h+F@|Sv9?~p6O{s_S1wJHt9;6Lm2g0g7YqTAY&lW4CZMe68 z9xRXyo;Dv8Z|TWc9P{Fp-8s zB&ur^x0$pH1?&4uwc4E+Aygt%_Nq~WNW~ml^9@sr*mxHUs|<}^zi(0RQ@P7D>tj!g zjh2|~@32&qmtRqe&Uw~7PS%3D+c|1?o7GQ^QH`c29%cpa_r4xKPKjVGKf-}{ru|Pf zDYqX_=_8!4t!#BeXfJ6`E)P@7_|A-X*{t7wE2!u4V+RQfO35 ztH@#TJpqYnXldu67VN8@~qm%KatYngKEx z0ktiv&?n9;C^JaP5N~SNss106MpTnTJdXOL*C@)|TmjiWl2 zo7T<0wj`;4Z(8l=fFV&>QWZ`_kvubfj?AgK%s<4;Zfe0kf!bVkZ-9 zcSBqLg9$gCTc?Bc`}bUk$+)B2C=O(WjY^tLTDrsE@z)oFU@eVsi`W9q#b(b3yQL2^ zz2zFsa_?o7XY0pSj48(RuZ-^{WLS-a$vN^^Va#G$v$R4jY~PY3VDL<_8)r9z6y&vT zlV??wW`*=pebd;qm{fjocl?(GDwWQR&Jr7_sf```&Iqs3$EO2VmTa3j*_U9D-)D9< z+7|2+PZYOc?I)3cXJFH8Ynf{Z5MXw1eeaTAy8f(b7PTQou5!q^mX`adahf@azXT;+ zvh>FYBSI}}@wBQxYT0RotV!^n2)bTUyZOFPyCAxkt!v0gQq~+&X(;qF;6!NW>!K2@ zTZ(qDR_m{_2Ga6Rf+*Ti(&Cz`XwW!U>GO5M7-i(mS~!X{G4;4+nemU&y@MA8v?JX=!x6%uG~YpP znQEj!!lbUA!C*lwteCg#LX)dRlUbF13_g#p##*Y_ zbeuW@0Z(QxSTGPvlRmO0v$oD-QUxE6_iq8u+5j{1>!&;|U+=oAUYptbdKasMfg?p? zqQacqJcVKyM!cw(>iJv;Ahso0uCAmj43pYze}3TZM^CtVQWnNL3xwYs3O2J2z?fX3 z(~%m_=~67{v8dv&PGwa(g~k~O7yi-ZW*FZhqsge!Dh30#7^U~yaqX!uO&%%Ehr13ZOe)Eb_{VzU3Adxa zMIKlbbtQ77+&6{T4#Q|Er2wjCdmFpFO346?bDph`KBLN)_hN!kOcF8??m@2=OBLH0 zF<+Qg|AEDWVOsi7wRIj~3E#f@Nzxm~=?QU0wyG(3bq~J-)Y?LOz_Xl^GH%GX`*YDLuQEXbRNDA)bTbeAxOA6?) zO{fVjmcZ?>=UOovPiHGQaYfe;0!9IJ)hBibsR~lEsW22;mq6gRRzE6X;ff$j@sL_Em7JkhF*JG~f zWkO4|KBdrxLoLd}2OwlhTz;zr9E_Ey5VnZp(naoi z0M-WLz~580SbF}`VV)+yvV~*2Vr|)VYas7JCnV>;hsgA^*-wS^$>ENLjv#4OuKw^? z3$~e!ohVavkFl!M?Q6IgnPVDfv05Jx-gRKv|# zb2Xc$okUt({wQTWHqBz8@-Mm*OElo zC?@qsTTK*)rk2u%zrB;*7$#=AH-GE7g-qz8oA{5o7{pHbSf-FN26$ZU?c=q0IXS^2 zZGDZ`S|P1nqF@CPhqXh7V8WkI)CXW_*&`igR+T`>t(B!mhhXXG%IzN<95A)2$eMrB zI-D?W$fhxsIAL-KV`n~nTo6FNrjNKnf6PagjeGH7+}rUv10LJ~YCa+ZDKutY7)(S! zM1Wij6}_x#1XYonq^OkGE(Hd~MHKy{z_tJSCQpU;;1)|WP10s7x$w7x1A)T@#aCA= z9zTAjCcHZzBh&wC%?hNvZhuT5^$WSGLJzgTTSONF<0Yn?EsHDD+7Y ztyX(uoG4!;$o-pBe|#225EFWUFZ_U5Ece7{r+KhDu~pDcn?kw)DVx76+uN;u4LC{B zr$4c%&?FWU(<4svdf{4<6&~}07I{RnKG?>^2;HS$%BzXO)3F5g-bC3;DyKn&EHg4L zRe#M7mak}Znp?~U_UYD0l2X9oAvn}ZqEnO+wsB=(9GLx*%Atj+oUl~22yg8}MqdgA zJpC6XqHmZldU_lC#(0Ky;3^zhX7fqM;Pu@M-p>>x_R6kxhlC~Jk?7REd4;r1mm2V8 zsz>I*=~wF{?@~(I$QLT{p_Zub5bU?$RG4mGU&=@Y@dmFzWD^%}DVjpz{$KM`fB}9(ZN@x%$9%GYwRpQ2Ff(WOU zQ!mFDl~Y#cfd=KTFto(loBS z42w%{AU^dG1hx+22077hZ!AW-mM5$#ulwe8%1SNPv68Fc6{xmHQ!Rxc=@x#d*dcD6 z0Ru*r-{l0Snz%7mJ2^5*Y}l|p%#87e*yW|YEL;?1S+`AjzM>>^s$(Q9pahP0^1dVy z_DRNQtWf+3);5ofl< z-ttu@Ox$+kHrr-+6s4{6o&hg8Kj$RYnljW3EXGrW)ZEXDGrIx$2! znDyC$eOqSBmc&K1VbFL74@8`%(-L3)&J3ecBA&z@PP%rVk;dwNPVBPJV~F+sru4R8GNBsFdH5 zB5(VI%xwnxwn~U2FW|B2^InZ!45r0?8Ld`XA)1$5h=6q5-g7AVUpqWd|ITQ*p_%s_ zN=szX>iVUe&ep$In=5j+*N{?wKue4Dp%VYPF7z_K#okGV=K_y(t z-f)EO!^#zdH-GOjnu18~=RHridAa1+_(d*E^d5692@)@YbD(BQWL|2Eav~qmAzWmE zi4lk+70?meG@3$*PRg%wYZuXS2TjhDikf`2uON)Hg1uts8jQ%Hc+Rj=zskzkBK?_c zwDgMH@472e*f!#vCkF1efTIM84>60$nS;CfI9k3l zbvrU|kJCa{wbBZnS;Juc>I(%7_*V;@-P2QT-X4}^Y=&t~1+B@aWOD@G4{5K$&Hs={ z>9;rb8)i}&BQEPQce}>~)b>bA(OZ4bT@8?3h17;7pT}|-+<*|FOQ0BYHcYv{(Z4Wq zIjSIR#=JK}m$9+W6@D1I`v5ZR-Tj%n&jU6t_iz=1YSInKtR_WRGiZEgvt+NwEcM*k zKKVWD@>=e{rM=5l=63nzT|n;j71~=mMj17U#i$Nmpr^<0!GygK;?T^j{9}M%9pUo$ z-S(%W{$I6~_^s3YdYa*$v`*^nDpZJ5a4viO?xpGW+0&m%PumB@wA>9{3T#&&5!=Ty z9)}Vu?MA{9J7*#is3CJN9+Ac5-H=JF?IFEpgu+Fce9T7GC8JtY6G(YTcIeVkBs%$W zz307=UT-5!4a4q-hNzYOl?!x6ki&h3?9i>T%=DRbL_+9ZsMBgyhP{!C2P`lS$H&vF z-rOf2CwGJhHYVAG@h#T24U$fu_ z>yPlOra!%FhMh#~y-9L`R*{x$zTIPpI_&Uz^6Ysyngb8L1RhUfnIde+DXRmBj=DnB zLTk9+n<_-FK$y=`TuO%bq-A@?Ln$0%tFtob8bm*aZOT2;H`#*Ue7*CZrzt0N9zC%m zsr8`xt%(~Ge76|uYjpK=WhfZn18E#n2AU=$@h#mlY4YUe>Nya^ogR(z?$U|WJmipeW-KM9B7sQMc!F>RsCjdUqS^b3F%Iilx~oc zMx?vDyGuekBoC=H(%mT`Eg;?99f#((3`N%mPWUG237Y{+L(SiP$2_Er8BlCX?~RIH!&k=1rswP@|9#vzxydL7o~Zl&?IP2j(};?|%x$OnQ8-$7xH^*x2Et z*+OdQccTbdA{=Ye4>2eFkE$n4#9Qxciy0jcTJ01%KSQ4%7G4R?|7z<~&KOLi?b&k^ zI2u-5xoz4U(wIsdfMhwgA;y&|R(~4b*;RgEKF(S;8_Tg6&9rK8+{PJ5J~kc}Hz^1j zuC`})cZtu07b7)UUHCfPWG;5vzA~ODnzrgsht6xCP{nZQ++Pn@CEZWc1kfM+IoWpBa#t>L??3#2 zineq5c9@urSGiSU!IP{-kt!vC?4@o<0+U=*?4}Hl!t2S;6j;*CX;~{$2!+wA3IoVd zh>+$rvtFuBO;349o7=>IQkCxHu8W4o6HE~nfXae%^#7^E!cz0v#T$5Cs);y1QGY4? z#wFTIg>A)zJa+bc9%9DF_knQJ_5xzoJbmA9SfPlShW|mmg~^ta9=%#C5|Q^6UGQoy z%dQh2S@4o{W3^Ez=C@IDQdscn4e~|I)DK#{+L>8niTWy%VtL?tixPlaRC59vN2jV- z{7&BtWv?w&40@*YvNJPiql2w8IAvmN!l~D-uM^EjgQ6qTtie!b1@_%TZDS}GlDizs zuECCKtGCwxZ=n&w1RgdVHxM-j61b?C(7m64o}`YHSA3Jz$YISWQ&+xjbau5z&zR&% zgTMGH(3fB!)J+s5{7CwYpQy?c`vE7N+33Yr2DY+U_~*t${uOQ)qvT7JA2U_k z<@onzGv^5mF+R14y<%eLFdX8vS|P>EYFoqgno#?BKAHDmsrk@&U=c}sFj<*+H0&0C z7Vyp{;jR3XVTH$3cGRMGi4&u2!gl(P?}eGJz?j>wq9p?mQlfr$B8_p>4U) zro=Vt$v4SuKlr5{Yu(Ew{={2y%ipWf<^K8BFw=|^X2*N zmzF#gV1m=E*^xAj9CHrYo%$?Y)^dMsvEUTDn%w-Zw4#4Gg3mjyDU1ESY{&{l^|;f! zbRZ#>O#xo2Qv_b2aW=@LjxOX=Xn5e30+Xty~C*e6AtjGtv^hTrB%)Lm$-&xtU*2NE%>1*oS z%MCjFJTu9X3=2yWFOMdW%>9lwS!^_;QK^s9t5=S~?747s z<2ymk%8F%|a8(1*nk5fT+WOJ#E^5lXQKq1*ap9vve%}M(9a#mI(x@^Qy1Xr8cgDLYIun{PMtY$z;r$6dJ>Ac%_`M^dlv3+vmT=s_ zNea80EoDjf;$G>m z=1XSv+)6y8Q7P8!-bm_9DRoh8)#pa7+;ALT@MnNa>!yXFHPuY)qi_0cUH>B9gh5@t zSWL!*BsB5u&G8>Km+^*S2%c85ZZ)RKfKSD-kMkY*b^Gq!7sox~p)IR{Cj_~kG*=>Ngz`rsF$CDrBt^j#XYmAV0hecvZj zJ~m`R;Cc+Q&jV%BduJIa$LblhF`6AoXL=eht_eF?|C!_I4-UFbla0<=boP`AblI%J zR8ffmKS26uzYU+t9f3@H(v{O>V#ms2&AxsmWdUdy?SYp%ne_S)LkQaBoZRny6xvLcA1>J8<8 zVR)L&u0#$SHV?Og ze@Ig)b|t7uFYD6}HOv?hFr;cs543r@1ee&JnYoyRY`Te!+JWMYxn^E;m$$x^NT&cI2ve#DAE`1c^fNQ_T$at*x3$fDxXJ* z>Im%FV8088h6rLS0^I^3nb}ikD^87oK$}^~YA%QMfmjYoY?u9=aas@Oz}3s)!~7Is zq+FTRiS1kbtZ+U=LM}(&fhP+t0d-kl&<(DL1!b{2N_@ zO!`uD1iyEvRcsBb34ZG?z&O<$FVLQuz|g@cwh}}{2iFzm!9BL_r~K}FA;Dp#=(%95 zDHJ}BH%GZFhVw4-&^QTF-z@5A*k$VxXVCH!`ntbe}Ve0j378A-(BNpc7*qZoABc(cSyy!|z1mL5_}@R^0^w9zO`(l#Mef`;0Q z9t=`{d+Wb_00O699PLO28eK~G6>1f5?X(c|fBJ`j@^=~K6P6#9RD$ZrSfU^123tF& zI{R}2jmnL|6h;l~^0d3|2m&j#7;BKHZ_dO_X)6i+>yI&l1pDGmQ`jt=hxwqFvh~2M z^nblO81z`F77-;3218cQpU(7<+p!u@Q~;lp-FjaG$y8YQBvd?<{G~Uuex2KCV0ON|McD^bqTgPV1}|-4H|p(wD4k@Y za2hJT{!d?|1tVHym-fwc$bTK%f4dxi|4+d-L~-y=pZ@=TC-K$7?|UndHt@iDU#;Wd z7XSk`fDf(Kv$Ynsg0Abc?Xfq?fC~$rmn*G>2UuX`gaXDiu*22>Qjp5+rZ_;iIFA2r z2CV^mGu69*o^*A($#^jw1Mt3Ed-05ad6~dMhheoAAxO4An$r*Z%!Gu5_L{+x-3Q#` zfC4cIF!3pPa)@WsCK+A{eLnYBo1pa_SWPV&CIM**KH3yGQ)j_^v5%Z)VDy|jK)V39 zuY+;s8aR%`xkk9>sGO&TN`)88Ur3Y-K6OC~s{W6A{_pn%JORv8?%+BK0L$+n% zMu-{Wah$cP1+D4U5BGON(mAUsf~u;xqd!JWZ0eSS00t}tTL26N`at4!1}4f5p?>?p zVfwpms}dKi&NG@?)#gf|jVF;_V}n&${a>rl-&e7J{`~y-)CTi(z+Sz!L1reon$&X9 zic^WlBY{5(_1Y=OlOE(`#9Y?}%{icJ%XF^p9R#>73BiyOtHh;e!1}a=Wp@$A*un{r z2w)8a4daWoHaZ3dl4>4Q|NM;q`3vb30=VsKc)pI_2nWLjsJ#AGaseDaGReWJKo&Np zS_uew@a?ZOVs+S%j_ppY0Sq!R>EUQHVUSdZgtk$soWW^)MEvh7%fGEM2pF&js#Qg2 z4*(Xlt!ZldIwSj-lFxn(VYdhH;Zxojh?%rT5%C1|tej8kWPny!6aH<$!2+aOF3=^& zG4B{#FQTM46e$OeiL7TYGH^>7^tgbr$x|y>hJidrA-sYEV|9m@j-Z(y^q+nC@D^Iyq7K?B&7xRFw%d0>> ztD9rdQ~0;GyHQX*5Ks*O&X*$ZotZsC_)_1Wg8y_=){5MWtLQ+qzCVtiqLxW6E18>K z1-5Yxi;01arlSrdAa|HVTmkxmWv;{=cr-x=DNuAzK#fUyh_X7ebS6 z+raT@lP1GcC~S?#CAi`4Vpia0uM!|o8s0*WvCPiFNxo;Z4{Qa>|jWEz1O3CZ`tL44j}Zk zT&5F%_)(L`?#Fp^dpgXhR?)c;z2uez4nP=<&yX37nD>{%3TrTP#3#Ah`ahYXNY4V5 z59VqIvEDON!}!n&*Mc<(fDOR1;1GdR39d)-r)#W#yEXs&W~fWOlwfM5bJ%n$Te&c| z5zQ1CAse3{ean%?zA#oNs0?VsMl66G1G=K=RLk1N$?fU!39lGDbYvd@rVlJrYF8Lq z!w5xjqQ+iJe$SOlG$rJY2G5F6CKVmUhCMs7X9mE@9#ytxL5!9BqZnYBf*ICn5XHBl z_F_Ku{Dxhe_F2*ThX}TWvxJXN#Lm6%&L_!g0Mo7z>n6BEF|b0S^9gMn#SpVJYL$~p zY4lh5Dj8fIZr{c;&xEdHbVju*KPTVbwPPhS_+k>wX&Ji#l*3LVT(M!V<1QRK z9V438jpt)a?fz!y$B9qxx9kM&&SKFej&-|~Sg`43lUWh z2fhP4R^^BZB9?CrXnK(UOfOOpkm-l-l|U?%r25>pjbP@FK?bJCQ8GVafCAG~t&BRf zpCnHgZQG^+xUGbNU|3-ReyKu+-;=lW?ceGv|8vp&Ta@?ti(r*@m8k|iAE_&?7ghHb za0o`r$XjxKJg6`kCfN(vt7 z>^?_&y8LuWc+j*H%&B_UXhRld9~JF8XvMsLVkT$S--+i7=RI*bfM2V&rnQ2K`RB5S z#HT9fR`TVW%lgf92khmVnXT?2PZNBZjIHO?AC^ti4CDTh3;eI=3PA{V#C>E%#_UXO z%#4+j_GyJ99|MIRU*m|L3>9?FAxfgkVsZ##Bh+I523xkkH0-h#W9S_0US-!sW>52E zFaD56ME&RunWtgs7R#vaieZ!Cj`Xy>slKLr#hlQ1(9r)UTto^sWbRj1fJhlf7p`qe zc*)?K#)#u>oKTq*c8I$>Ak3esIrMpLj~6CN%*tIjuxtZJ9v5F)>+Ok@qn`5}^G%1^ zjVdKB<7YPhI(CMAB9}S8C>G+uk#R9O*Ua!rcFK?)g0!5Ie?|5G4DbK`3n?yw$gb!k zHc92d_6sW)Jh`pb$!-;xk0I+p;Pi)jK#5ln)CY57aQg2ZKJI`Oual|kY5Xwg&0IAx zhb8T_!&3fMMO4YGPiVm)pBYd*M!9G1Rrxdlk+CfYY-gA;6GoOs%AI|H*LxG#;f}5H zL$O!MDgvKPlgkpp*ipmL7M0Q|vy=aF$=_9wAWuEfi%!;Iy)O`2@+Np=p-fM>YO;30 z9f@rYFJ@xm7T`q5(+Iq<@7zJp@x@n+{~fXa<9|T&L}t8cT4ZYO?vQwq9iDU?kDa_8 zXog4~=jJm{yK2b;p$t195PW`isom+)55}q2hr@OCNG{iS=Lyq7lr!&W_BzEZHY7Ru zG`vLob_)wSS(2VCqiUY7ii9r=M(R;#Xg5C$x0`)IC3=R8tJ_qz9dypyIVB-x?Ey`!hWh`JFH+eBYrjpQ4;+e2I>8L476kd?L(}zDcdCa}bq--KIEFAAx zuYl?P%(_h}6F2KnVHS{Vw7RQIS5HAW1t&LKCv{sw{U27{c12_i zU2}*nuKFeOogvg(9N7%=Su* z<~xQhc5limfQA05K&Z~o!D|2M0)YHr!$m&mHq8~ub8;GRGKc#>r`?n!-uSw8@8jm1 z%9VvMk$%4u3#Ai&K!yd8w&Je?Tg&;4ADaJ58u({g|FQ~xg>qyjtu7ea$FAud4e!Br ztZgL=OEX}t&e|7x1ENGOjYkwI*SRB>Q!>|>HhZ3ZGL`B~5F#~iVV;PTG+d`lCe_Oq zz7aLILVCi#8Nl{VHG;7*&Ly~o-sd~jVwwwzNgJPl<>2sPvksJ7Mml@jK{qpY>m-1> z@$|dPJ*EI46Mx69Xx~p?%NY?9g7{h*^;5;T31A&qpR2(#E=}#yW&VdSpUY3tZaZUs z!~#SzHN>}6V7IcA!=$nU+^g5KqN;nNIbrpVAfB->FhBwzvJT^~OnMFvx2JL@#+iBT zLkXIWt6(^x^H-WpKt}_UlhfWAJR4D$F@83d!EB4f4_0UO5;IE>Y*QiJPQHM!~h^6GcEFPyX221O_J_?ZB`?~ z<9W$sV3@s^3%=f421r{YLaQ?0kB5=KQsqd_P-?`GRSnFa;HSnGwR zs{lmIEw~q)qT1e9MzIe^KheNMCQPvVe?u(mw?$C11@d6P3|Q#K`as5)PXnTb8pOik zuUr-rbgY`gKcTt*LrnVj6$??0RMrpGRvoa7c|TK^zc1G9!ToqGnLuY&)e%5~K@VIN z$K^f5HybH7;#un4H-d{VUcEIGnF7y!{A>Boh2b4j9|InTM1wGs`Z;7&yrENlot2Jm zz&hV(4|6v*qGQ1es+FbKay`7fCm}A5@VTP*7^e#)GHBSI2B3#i=ohR8{ajuDMVtu8 zIH#z>8=IW>)z1_*sYU~m%JeWb6Pq%O`{F|GdcfuIpRV&k&EuPI`!H2!lQlcT*Y`Uv zW-={--RdBX@86ZF{}F3^4)vW4n+Qs*Mr53)B7SYL54QL1Dm@hb3L^QBy^hcF0b4lj z9ShQtRSYgcXy98(%u6SA9I07l0P1l*n7g{aIZPqEWtD*q!9J8<6L0|1sd6kXz`g;B zm`t)<-;DfJKr?<%z`8DG`YUe6@6jE2Hfo7@S=~Cg@e^w|VV^&NQjmTWn@RQ2clw__ z3#gxmE69>eJ|cPQf|yT1sI$Sks#53>{01CP5`M1gg(gz*r?|fFF8O--T2xRepb20IU)^q&c}R;+V`sJD zB7UVWOVxk9tQjKT|LGdIoj<5C*|CHO^i!*cqpKxob);6*RT_AI|F#5Z!nS!vOIwxf zTV<6(DM18%+hEZML3*zl_=Nw7h>fl(;BGD-luM}Z86Jbay%D8fQ>e6Luy!=xHs2l~ zD+%u*Y)yv|)n;o;Liq|7+o#TTpnT4Aijwfa`jK81$clykl|wICf%d(S?(s&|8Kq5& zcvL>=c6=bj4|6~211habMl>_2Eyh{A0|TcmpG zO8-d?1dq*2gm4@y&n!b>x`ff2g+3s~j%G^?dsnr|XW8*W=G}LT!7Y2zd;+%L8*93M(}Dk6q6PxZN8Q;Bbk3$rgt>xA zWguivtGX^)_{(s>7lD!&#tJ9lXka|^FILjOMG_)FB`o~#{x9VjaKmCeDl{U(r9uDU z2t}3}nmwSQpa0K~ih?gmLpdG!WZWAfc-ec~YsQHmh7c^6iTOH)v2x$P$&~)ZUu!jw zvhsJ}<3K@?*otGukUaThwjL*vTeL7velB> zRWM(QUi_znY*PU;v=&tFPE;Ibib!bnuXeE9MI_dA(+vW53qgY_0@sUS140{UH6 z1F`S|&{`gaoD}PZZ4iz|wExNI2hj5*U?W-dc>t2(IC#je6<9ki0PaUBfsV{>(F+R7 z4*iQO$A&%tCkk3ully2D^Fi6A-qpNI*#j`UfEomXjfh8yW$;XTZ?V+}e3~aVkSstj z0F+p0m3$-uCQ+&S_p-;`&e21G^A@Z)nG@2N`592qG!?71D<^R02>QrUY+C9;n7wuj z^gQX_gNEIbO9M6_3IQ#3eK4gK(1wmDL(Q@M3g@d} z5KofHkXXPL;>StzDoLGYF%;iyaKFFp8q{#HytVQG5VKHh#A3x3L%^)jV&|3-ht-kG z@Vpo($iwH5%HZD-I7=+mytcZVx$|(&k#y3Uei4s^^)}&cyj-LsC>_+f@Be_7XUiq> zn;bMvT^j8*%W9nm3Wh`P&bDJ4T>=hgD^CHd{R;ezYQE{CJLJz^RlV!+ZREb*Y^|ki z3WJvE_spEW1gVtnJD&EL8zW1pH!*FG@h~brD+0YgagS^!&@Q(@SNwe&92&fG=6s7+ zBRckna9nhx5Kx(KwOVN6@>d6R{x+%^Od8p<-AT0VJTQ&cdwv^id3->I!*bh9bHNbI z^jeQs0p>WZ7SF3KN_9v0peN>=uelw!jwUp0j0Tg(A60B{HGz(~KT%BJb-xnENI61X z?2(x55vHyJ%FRKK)7B)aB(9aar>?*>1Ax+Z11M(Mevl)um=)->wy-E{uSt@A6gULg zOVfVMY-(&dKAU-!V!uYKFOKWfcR6G3C3TwYf3X0RcKK5on%p_mALlcZTx!!6n<7mJZ$7MD|SU_p#?e>Fvo;mEk zySbp9un!fKf}govxlRa_|4-jerFf3x(iy~|c@h80@$>)~ z8jo`X3MVWtx{y;?OosVx6`vOlZw$r5aV6bqd7#ccxHhi0S|61YL!L&r{m}8=S@<59 z_#%-}W3**kK`cWQvQE-%GMwsG?&e>w5x@1e+q>OgQF5lDi@1D z>4V^4Yvcp^>K0&l+z3@(D`(_joE{y~ReNoOoLi`HTO9RIseRlQA$v)~I3u!cH)<7^ zW8(Qd>*2BBVH*j%kNYqkC2&6_1y!9i*0KO2yHC%?ZE6 z!)*6lCF5|Uno95aJ+F7gV3=2Hdb8mMN7x51-eQFtlO~s-=%*Sr2c5aj#XE8WPOW)% zf%a6E8&ilZty)c~W8g+7K3>Ok9j>fiVKXwDtf)AVkS6gWjBL)YrwO$tnA-EWN? z&MVN!xoS7MF5k|UIXSB* zo;EXZXP0*81_h5(V?**2c(B|&S<4=Q-!TVx`0(JpoF1^E8Nv`f3$C}C5xMW~;lah3 z0Vt*zxdBayD(xpbpMGNq5|$r`zn_(GqT@p6^uBck=bA=32{ei2l)I9Z*{@4d5R?4^ zeX_IanX}prj=fFyYc~0m>QdBPR4c2S^R7`CR1`)#mA%`?zg54I_`MhINqLVx_Dycs zncB-BvM58&vv$-9<)wCG)3tVg?m4qbsI6nf!BhjE{jtYg&n~OS#_`s`FHp`LZEMen zZgaS(lzSEsB+^MNlg6!7D?7L`@L)H6Qhhm!fmUm|ovHnHBu5Z^3Xk~0eG-S=N_Dll zW11y*nJG*rPi3L*VfokV;Mzr{8-Keo?ix#xdycU@R-4&gDXQuneysIzmj&19wc4HE z)faG4$yGYt^2&wMhwz?Fhm|d<`a4&;dvU)%i%MSui}*8ClF6m^M(04AJL?6ywHsu- z%)52l?UOFMte}rCho}91_G}C$W;p&(EJ)#7JpOXHQL0u?sH4uUP~~cNa>vY`He#}Z z5v-hPap&{m!%p99((>KfpG7hXF8VX8_kPg#K(udrEiKLV+%n{c@tavYb#)k zoSR@!Z$OBiF~ztn_7!?=KHhgGkZ1xpD5YhFm*`Tn%DuMHAV12;J8m6Nll}es_m;VC z(&L;8G&I2OVd@V5Tx|^AvGPJ>PoIL>K{EN{xIPSA68W0=UUAQ|Bi9c56OU2 z_n996C*8p~PaIP5SbKu^u+N?{z@tr4CSKNh(?dh z67+*L8x^^UUj}K&MvZjIcp%rG?%sA;k0Kt$Gjzq2DbR zlDpl5I_r*Ir36i1g6HeZQr;+i_eAE=l}qD4gc|g`BNo`8&l*nt?0K=X;J)>1*nYF& zcyzrV9N8sBnZZqhyOVj+_350WL&=<5)|h!BT{r@I|MAkQ939M2gHSs5! zK`ZEn#9RMQv@X!lcfMW&gH@e-E&3YMaj=ukZFmbX;A7kw(;G zj0PG}7$M@MJ6G7>#npUFTDeDCdx$-d^P)bGM7SdUq#WpvR1!Jp8Tnewl-l*?9MsKL zvlIBe+=n0Y!N+WW$dkP*6Ru?OLskC)JP3lD$G$p($$V-lQ51Q!0+D&rd)ltcPdmD& z-IKb#Kv}=Dwt|Z?38&?_n;^YiWBQw~_PmeJS~8Z11`@mns;4XtW8{(SrXcZW+>U%M zwhTHiDmqKRs!xS1b)9bvax6L;w_L7-Yb~8z>rP2I>;h?8VvQGndxCEKl?7>Um}lolTHZT8F`=gj zv0~K2@?Z6EU^XD>=ucEp*>u}{fyOcWDYz1JCf$Nuz!DWvNF6LuBouD)^Bn?6U<4eM4R{yMlbCZHlzCQ)cd2c;1(7qBQNOFv|AU~ z{*xv3^60n)(>wd{8+t1Vmtwvu*2;Kb7`lx%m#rU1$ZG)~^=ta8txM=1w>G0TF};%J zj$3Bu@!T;GPy~wfcLx8yDOvCY!b)HG36`E+U!3@+RUXU_a;IC6ZIy7`dcHZo)t&(( zURKP}(*_%Q7<5`54YRecPPz$f6j=DW=mSgZfIt+|$Wl<8A(ADAzKg_})PSe;Rf1{% z9@L^=Cv9M~YseSw(?KFCRSQkSuR4j)0=b-NAP|fjI*Zu0rmwgWFC)kHUj0w5y z2uEHRCK(w(eFZI8y6xl1T3*G{gj!jBIDVv*Z7z!5TbD1cL41BhOpf4@&p;&w8^wDf z4(6(%qhCIa6iK1$$$!s$89^xW6_=Ug2PzVt{ej z3?;T%lu2%Nd|XzAK11TX&qh9Qt_?vjw|l#gq`WEct}vuLE!CvO-iODqK}y+y+2D@> zKhB*4r-L{Py$S5?yvrh2UMT|sV;=uS!hxHz{%4-X6TA3ot}`e|Q=DXeLD*w^bsl^@ zJf@uE#m(SLv~FgRxio5C6N6D;ET>x>V)S#G6obW0G6@5)Lxp_M5a~_(4(6N37(<-r^4q&xzrK8K zoYt7g53<1=Vy(}KRpx%l1}K z%Nd5>&#lU-RdPwY^jABqI$Y75(lUU8ZLdaQ6sQRWr;Qfx@ zc@LMn`W?`Evn8T!i0s`W7K z`gY^Et<@lC2#dyC;@Jc}GNLqB?Ub2P(NN`iDOvcB>#AnxX+OTEiv1{erD-E^%)}rd zXuRz9K-ZG7hljm69b!#_P!={WeFRLpQ9|e3oq(Un6C%0#7r#8yZS~-(mSVYT5qRwdnqzPc3cr7%-1hsrUG%Y@~a^hgMc;Pa&Fl_J4=uyC)W{?zEl#cY9M z+d;5p0*m}Za32nz$IrAFWb|^`F^|rV8tKZu5l2=Vg)g!CbGGh=lt{#R$uois0>cZZ|+T@wp0Rca1?({r=L z9wwH9_SRRh{0lCOMY)kA!pLgK2L(5}4OQFCs>D;ZUkb&`k&K7yYSnwK_mgGu1fJFI zF3itN7RwtCsl-T0OQV3yz)Ud_BivUbld8klj9@>?Wr}&zk z_>|#L>JABrj2Wc%n6BF_+^D~GmnI;-9#dn6%pU*9fKM2}+Gj2YLK1K|d|7i?dS-z! z;-wyO4ptG-TekP!BWYIIuxAu1YR1iS{8yBW=71xA*NG{oDhd3y90`}*9Rl6{lA3Hi zuI5U*$!#gP9u0kZFIG?@DlEyfx>VzlML<>lsQFJ5#WH>1ycr)ZcP>A$^<}?=?HucD z9c?Uz1w$T?~c(-^r^{9(UY*MMXLl+Rn#|cH4@^wnBANYwfY}6J- zWT)!PZdOrvz??%E|Ng|%B*ChmALQQ2TRUv_`Q_1PcHPkAbF@}t$eHuYOJ}NyG8GPj-DHu785bTlEF9CA7 zON3jRV)^TMf)=Q*jXyrZulPUl-lB%5w^1t^lksHD=6)2iNhM)Dg()KQ+}1ZCiYYuh57C#n;q=oUH{{lrU4P%!F5 zs<0I%JZFMSoMYiO$8pEJ&)K_YGFg{IgIO}rA>$tG;onYw013(sy>N!1Vxy*O<(HWP}B^s9oZKiufB4$kAAff!ij$(da zrhcOKV*{@|?CRZY*7Am+a1GscwMWO*@V&c~b*<(#O6Va@nH1B|`*?wZF?w z5f&Jvio`ghHo*>n=odr!*H$VDYB_Dz^1i1q_VksFz(;s&$lT*RBx?15&Y?A?GT{m- zEKiBM9@m&fX~i&hORWrs463KixJj6y2ebLTeI?~`@UmdkdH;9;#jVh!-$mp5C=|2Q zcd8BS0EW=&pg`YBJGUUZ#5>l)gB0{69)y^QgZsNRgW;XUS9Kw7xHIA~Yygy$J zw`!tZGaT1Dn-v(L)6MTJXk1RgWU$kDA(O#u!~@-ePASBwTRB$e$i{YSyMLD&4jQhh z5IB@cuC`#?NohoAn7p)i<)d(ANQBFVvULLWtyY z7G=&PoZ7m22-o5$Rn1*Cd8AZ}zg}Kh>LXI7n26h(25FDV{KZ#w3C)gn;+-=7=Z?IH zA|Amd80%qzOfLmfsV0c|+TnuR2Az4FZqx%8xv0llAP7O$g=nDA+3Wx0(0y7n9q;MS zWj1=D;>{uow{XQQaYtAna8uqt`QFXzg6nWzEEMPU}A^$3!o2Nocq7O!|g6> zel9xy=`@a+sIcfk(5TGYx++rNtGf4A!P* zpu%m3IG03gpO>LPlNHSFv16Fs?~ps7=DD}2k|D>OfX0jOz$Dxn?zQJTa7)#8Ga@g1 zopRJ3w|}+a5s_{|WI#1txoB#Ye*Yo5I*&5nuda&HIml)mWZ@=!k~HLg58_qIA;Bl( z9Lu9kE?7D$21{@7zuGLyg38K*np|AVoif96TI=7ek`AkMO`OyHNKmDgsib^_A#S*@ zk@ou`cuyIxWH39mZRrWKRN%?$pbH*gYNfO~T0~w5B8j^MRS1_0h$qeYw=^$_Ci=*v4id57~rJpCT zkJ=7!6@jWj(_EL<$Zd`o>Q94_A?Fp<%Fr<1uqO*onvh8~lLmWceWQEeSq*5JCY9Bv z{uFEAJXkAB4nkOZwtd>Hn**-&3pIG2cTaiSmBshGTeh(=7$LmJE`OavPW}=J)|0|| zF?U2k?vGX@zvWoM(JdLryqm|XTRk))eq;0gluez2uEpyxEY$azIe!j)2pMsc8P_)E zEUCNAE-bA@OKsaDxDny?LVLPoUcHCVyCj=7=O@!t$qN#>GLz3WsYPWC&(z1&qyh*+{7x zr`576;&mamb#D7}1wto0FYNk&=orroGVbDniB5Z}edH-~hOT2wm5;Nb1uwiNI7xxy z3xPv9!zeNg?M*WqivYJl&u$I@@)v(bGkFXH4^2UC#$q)sG_=~-I@w`+Jyt~$90;YzqbN9V$zzY<#*_G)pM6(o{q+Fr#!NEL) zz%{=T*u61AL-2HhvwOx6+#@)Q={^Ncu^B$9(rWt8qptQ+PU6Y2BX3*-));juXC6^C zit3H6ntno}GFIMrH&ZZ>E?U2b+XfnZihiGuWOk@Vd~suWa2&(K{5Jz0Dn%mw@Fk#w zg26yp7D6~Fs}EShZ6*`v#OZu>;pN8@YdOtacWb~5EWTSb-A#Bjk3Oz8CzY&b2K3d` zXCkZ2dB%E)^U#DYoxAIE2lW}N>dd`(1(_1na`FXM%U43-S}Ls4ncPWS?~n#l<9m{* zg-YgA>BS6Mk*7KbrXIak=}!l>9Xy}gb_pI;DT1mmH_Sy|O>0GJ8X^+XYAp#Du#tjc zfC*z-zsz#~=926D{DcLRgj#@FWGRgwCaEX_?@6QknaE&RbLN$_0We$V5?7|dDJUC9 z8LoU|ay`;zKqtJq_dyNNs%)f=?<^GA^{`2UPdzpNn`9ddKSgf2vZmJaz+G-NpNfn( z%H_dqy&~=4E;at(Ip#PMP0Vu8FQH%@lW%7c$YZW(uYGrV*_+{`CWrc?P~N6c=}n{M zmAbuh>AYs1GD_ZY&?31DP2qtOx-+&sw;qCGp?6OQmyXp;*+yH-JlXd^%Pbs*)Enb# zL)qSe4b^dL6kbe%Nl}LQ*BS-Mn@TE?I^87|+z~u5_r}dL-jK#nWI%_$;Ttct2 zExX=YC^O{I019Puv9ZV>EDy`sir#Em#riJFTjrYc*D=BHpXz7W7XXi@0x$72hr6Ya z+WbfVf?H@M%?Tmb>n7zE)@QESz>hzfJ+0HP7FwfO7;Z9{Y@~fj5raF4k)R17FOk3{ zuA@$-RIws`YO-!8!NSMqOlME@;`|#0m`*;c_>0|$GV|<#Dub(Jo?)V1peRD#7vExM zdF)_d33*kJUpO61;6^0px+E@`P$rN=B&7YFRgika14a;-(7JM0K7Y!UuRi3NBTCAN zs0e0uIVpa*d-_QAE18{F{Oo|W6A#QL802jtu(fJq%io2+NM^_gK^eB4*&5hv!F;${ zOfF3AaI+mS1tU);rQ1VtvKwu}%s=A!%B#S`X0~}@+?w+^=HvBLl+l^ zg)kd~%!MbtxN`HMr_^ZKMZl+#Q%J*YCxm+ej_6Ozrt;Xb%L2jft+MRKu8mkjTdf)T zcEg!|<1hLkZf62tiejQSPjwDC=2DeQl@~T>(UIZPtzI2hC$FB$^#CK${wXQG1}q*}q)qQ?*LK1_@^-mP`E2a_lcRqZ!&kh<1t#!m9#sRfYni% z^H$r4k`AXa1IEmk1u}O-X?S}r5s$uBjSPBi!sY45ntpQg$nWHkgeUNxc);*Ucwp$g?5l6{U0w<<6JNeU>v-1-+KQp)hs{8nIZ`wSl+O zNT$Ur%&h+z`?89S*$li@8#I9=6{tk`#nW3gn(H3$nD&Wtgj1-6?C8OfZC*|XOo{5P zN8KMXJl)Qqs{!ALc=Wpc&$XGZnsuyp>H44P7gQ>)Wej>=`ZWeK^>2R`72vSBxo&~` zaHKR-z1ika>=A*7b1V~EXr;lu86BN=?B0L0-F6< zeZ+|kiiSza+GZ{!l?uloAtWddOY`2qAIu}KpC@;peOMTy^FY8q**87jScpZhLrVLQ z3dshKM+KiHCSt-s&z93JDDab|b*HsKSn{4Vjm-avZ53=E?)Kue{HQbRa|69?44pY` z*&Ix4y=S4%H#{sMN>k@=w(nvlpCz7Z-OQPYm@K{zyzSZH=uKf{MyS^5wj=pw9?awS z0MGr6dxCWtsTUd9?<*eK7ptG0f=hp1#kJu_>!#4%+5%^Vc=C4IDTY}~sA`_dBo(!f z^Suc}1QE|f5UY{=tC51>gFe>Xi`fU1b$_GBs$E!Ua=m`ni_XC(*061^W<#>rGNn@} z0}XH#gjv)nGG9I_rcvlS2k#a_urNZ~JDo{?6ov4H6@k%=2BQdrPfRcdB2AECV3L2T zq4oC_^+d1;ppVUA1$b00pjf5-oW@gbTfW#GkpE|&;uBcv;L!KzTD+NH)FXKMPdlh! z{m9$qW(O4WZXGxa*VnH>8_Tcv0c=-brlmn56EDU@iPkPxgbC3r2Am;$*z}9S+NvSC zc~{XT{f;;Ijv(3E7m4$8U|JXZit4JO^c}&K#@CqJeaga$r`hRxpx&9Wxl17LE;06b zBH(~EivhsRoS7iEMZ$oN41sd-##pvM3VWiDESwObC!5vQ`Q$?*lFOp@{)kE9ec9LG z=hYftaEy)J_!J5cBAl4p1blkm@P3ewG<#RCJ3x-~8?hPLu>lhBlj6sB#)He%c zGB}Crzkbt?7az9p=w(?>EH|-R?~G}KhHcE=)ag%ux))W`%Q0s=!J)HcQ1i$$`cl$- zvA=M&J1p?>_?uGJ1Kp?A(}kM_Tu!=)?xD|;G%sLkap==X7q&p>;PIB>PEdGo6wM1j zX)zAudFEWN5XZNn@%){Jylk3E0sJOin}B9+bI8l?lCywYpw3@APNT*=2Cy0wU_KB0?GZX}Z2 zs@bUh_)0e+y)5T!SH=j=q_PIFk@MMqN*6|d|B9hN)G*XAxt75XNx((DNa+IeO?#`f zs)fsP;$Hf>lK*bFQW&FdvEbFS!i3H_^;d{uX1Hc23?6^Y2dtx#!PDKz!riKRGKcO> ze1YqJXsi@A1JQhv`Y6Ak3ltHBH}&za8m=>(BnR&kL6uosETP6e4SwfmIv zc^yVV&DqSkc|JBNPDkYpH55*G4M{Tsc~&mD5bI3enBmHZ(7=j0I8x9NdbiZVH}dM3 zmgFj(v<475+MH(8jzWTln|SPnlK+^cLA>#Jklv=fhA(jtbrEGX1c;Z)W*lwT_Re>L z8dVk-&5p@TMFXLiA{kvde{Lpa8~kkE?A}j(@9)3o-6{k1^$ibH?*br0o{zeJV_4p5 z(EItUK_qNMB4G;D2EVjbyC&Ipa(*JWDz^=nAxL_`b0d)&aCBrVH1`Q{nY3wgl`ctj z`*d@-vKW4M)%GO=@WlVfzdxc>UbJis&?mLJNUMD0xDg_5)rV3DzKUXl;qxdLOUy>v zX`$Jk#n~NtzSB}U6rN@YDLTnPS3y1;!al?9a_)&FqEn#w`oyjoFw3!a$vQK?wKf}V z3}|AJAZ)fYBDaSp&JSnaYGZ~)76c>IA?zV)h@+xbmUwP%1IHnhgtx&0dcRvb{IO0q z7)a{i(bq_(X<5*K{bJ>+`|MST#ec$WeAw5|HE5qH931f`=I+x-S-)_rsbhplsO^@g zES=VbKsM8=@O9e;>3(YKe}{h*r)^YWr2l#a+}VdrICTX~--vNUwPaRJ7~#e16LKc} z!yWUh2<%O^goHH&Gn}$uo`DA!@uuRpfG4PEz$RdW(V4|+fOzwiX!H;)yY8}%L}U5(%5aJ#T4hLBxo zOAyCX(767I1f876`tF%JB>8-uS84_vi-7;;HYH!^R>41e*WNRv%eaIFefS?I4=t`D z5P4&jOYww&gMJ$1!%E zlw|9B#y=Qse!gku98gUMs+m-h9-#J7UMnplum5=2;|<1Najp^f{YD+v^@C)ezBWBfKQalVN-aW@xNWJ#9nw)NdGnixdQF~tG`>S`PB;&7-5j&|JC|TI}a@L3Ie0% zGXK}9bn%LfB)PR>|N2y!k8L+C-#Wxar-kBwPU}t!mCnro!_Oe%Lvr#A`T9R>jlb-k z2b-7wP&#n`Lo@l$T|oPvyWl_X0x;4)=Z z0n0Ej{n5_cjQ$ljG0MpW36c2s=oaALd1H!eCLt+FTBlISjEn!~9cZSm$XOA=iOdbk zInd1xLQ+Nh4a4t8S2NBCXJe_igZ#>IrqSsQoa@myrAWp}@6ErWr1+*K1vp(hQFmJb zmy6+57{pKXV4Tx4$3{Az%&Hf%(C9K__DtHN%6jv_@8V(Z!zCJe^UAY zW9%k?ErCG>Cg4BeaU9xvphYvHdQ{!6|Lz-z`1ZjO*qi~&!DT8hF33T%7z5xVvkKtp z0Xr0hxzQ0_Rl3sMG`|@>v%x>W`9oM`ohI&Ow?F!b(hHd?=24gVN`gIxD**&|Gzrxj zjUhJY-O*dZ(fl^WBy9xI9QuHwWBLg8-qQNQCrhhX|8U0V*4of?G=Hu$rQRWf?Vo<1 z!m;=7lEm~d$+bquR!*3M-|$=|_feaVAOCcN(*~>-dy0DO0XTQ6`4oqTTMv7tADO`k zXyKZK^L*;4Dz%69%}_zob=%kyo=wkb839I)^Kn&=yVItC>p}@hc2^vS8lT?efxdao zUpju6QtrF!5qMq_*e2sBuK1&*eUIDW{i&HtKGN-m>q!C z+CUTdzs{RpLwe7!2TwD>3LRjiqS$?CFI)|P@dLm^04US=gde!CntiT<4$@$oFGm0Y z0@VLSax-A~mq#_7V^HuuF*yw2azsm6Y`^)Y=py#+oG36Y{fS;(C(U1#P>NuC@zwysdfnD=(vQeYjx0JM@Xi+}uF1=qz@x@Rgz; z___`qNo~cii|3HM5p$ z%UW zTkh^bB8JO7R)+IEFpXqYu4mpeB}F=CA@D#YKI_T-9<+%67_fDQQ}fv#^!q3-e(jei zIi&fd(*wD@X7_wdXPX#dhI!SOtA1iUgG1lg7xpP^>~*T28X>*Blc^TR1=+?VJ3Glg1Q;1gb)yVC@czTipjl#6D0 z`r+4CaK+iZM+yqk>50)dECE)gEtov%?{<1WbJNrmLHnbrRwc($jOCWDp!KihyYPQ7 z(r_$v3fk=*Y*waKVl3Cby^_Bzeo>A%g()pSP!$pG_PvwQin_Q>Fay#MBHFu7+x_(S zu4yoA;h88V4VTYLYNown%kG6kdqZ8eVR6FGn?4?X3{}{esfEq^IfHtiRkcfgdCsp; zR#M?AQ{e0J@?5mT{LO9C6UlS`RjGKXjWZIO`nJi*IJDCdB~R9uCR)=d zmVery8yFgY(20`#Z8=#tc61w_qErv{nsITDwsVG|#~W#Gdp&lq;sBtn^`VQ_my+#9 zQ<+xVG)*b*{FF6M!1rL4K zfr^o%Y%Pf6s;q-=6mxh~txHSt_Hs`m-!AC9kaFRdb|#)p-{0>`C`{3J&6?PMWY-{S zzdWqdSl5kp!}N)BoLf%i@o^z6L;B){eB>3`HDKXL%Q6yU+k%vX-Cr$d2F!41E}y-Z z?#kI%2u$|NyT`O&_{O(_FpqKRzY5?HOIO*1M%9W;PMkDZbv1?Zba(i?Jp`YF9a^a7~}PIeo_S!)^jvvvKExyIRqvRcBZS-C9ib zb{wod?x=(`x8LeFy0Er`2Ep9$bqUSBUn7sm#$hp+m>hYm82#Ex#~ci`P^Jpo9?_7uWi zDXKS-v%@`$Nz8S2^X69G${_`BPyExU+CL{UL07Z7#E)IhVmIp__;n}14<0=~jY~s_ zpLdke#Bo%RY-V@e?+Byty`2ib8W;f2sJ(e}M-3 zn2XgP1^SV?Cu`4LiG!NboSkYfaaaR_=#2?UGQ|b$#}+2PNirr~s2gt0Y2d_e1Z&sW z&I6d3abN0m{4Of!=2Z@sc`_T#A4Fe&rHPAM3jkiuk5nPJahDVL%fK;V_oV{L4*XM( z%{jH~K{L}y+N-$lg0^(CAOOJyK*3rAd5k;N0hqnqxQn7%+UNxuB?~T?oV{-0G*LJe&;U>K z`;rA)w;2VDv}->a7gVOuy%ar2z~dFLVG$IO^E%zVfv*-3cjFeNeX@{auzTk!_cBhF z(Udkc&8$o0&iFR)KyCSiEZuCt%}5I!8xiexd7uB)FYs$1-c1po$j!5DaUOj&@++WC z8;Hyu5!d@za1Ov0EQ#t6e>-1#JAUrh?P=5V>xZdkhORQ)BkApyxNNVCQ5L*_BkEwiY1f!Cd+W zS*yk@^;5Qez6_Xfp5B|PGL9-^Rmsr}Z5OVx$=70j_QWbR$+|a2xTIq8LB_SIxoXGA zsOr&2^iI#qZlso42|Mx+Fprj;jK~FzTj|ipmfDKU&@T8lir!?=&G=ZEJ===}bbWuq zm1iG*{mOPV@cLpZw*YG~3KawBkPC5lXIw7(!wb993(0aQGd7lG0?jB#r%ixAQGQQb z@`w|APg^3EMUD}Fz?UI%1B=j`c;yt6ec_&rT(el7c{*jc&0XT^BERB}SaI(?5o?;I zMDsw!W=9_xDV@{Wefuz0d}hVYD)JWhvnfpxs?GB5i{=Yl4EYa{FsYLlhgS$BD$bd6 zI!f{6KWR4cS{+t?{3`11R86WD-#T3LN@psu>|~B$Dk2RH zCSp0I1VSGFIx&Y&C|GrJbreEM(0n+LTGTB*;jPTl)aqnqA*45v7X`qDrJ(aLS4%sM zeRWuf^V6}sb2Lsnt)a{vgS3?$FLRZz}0nkhk*M!j7EG0mQ8)| z+-~9Lr9?l>?qtH8bL6(R_Q9<X;G5j1KzE`24pU*Ql*=cj+W zKCC^|u1Z)qBtTfGN!7hrY!8FilliLCYh|~_enH-NMFCK!npN4g3w1%kbxvy^ky^RJ zP8)fG<9XSc382Gd(5LQaFIJkj?vqAAjV9p}NW~i(Nw?aTVH1GmBpFK9a6-sNvt(eU zWP&yFxyx$N@>io;8@(>h>wI`+1oQN9MgiG1={g&VWOX<6X!O}3hPD5GE?FOMk{kaD zyS{&ef$I{vPKobUD;SAc$<#2LdnLp35DA^7Hq~AtjZwcJpIkNHZj47PcSkyQRf&TC zO(g&i)?9gJdaalh+Vic2j9y@BiCrtHdd;E^^X`N5%G;c8X!m%=>!#iLGLbCbq`k(kRCB#T1UL4?|01?MX5lA18WIuEJ%EQXgv}YU~zk9aHJ{ zNpno5*V7t5n-(G<=xi;0Vb6_fvWVP|sQG4uXo1Q{9nv(7lNIAxl%AZaTpVm}^sS`J z!#3E{Bp=onL44dLgm8%~KEFs!M~f0X1BbT-0ZMP8vMUDr@wER*4qZ?-QKNH*iY^7dp7T6yV5c_|pHyj9>0+x??+>c9LTXxw z9A;!)vmDNgR49B;tqNVxBnSX@wU2J2^Nq;UskhBdVaDhRh7B-Gr%?C$LSG)EFGRxx3XR*d zO$K)6Jd9TBMGB^GRocGrWzG0dF5xDL4m%3Jt#95{(a}olEepak&CIOg% z!Z(I5?`N$=1#HNx^-PAkZdU>9l>hfl706uCwO=NC!QLn$LA!;|);85-zjGS*+m*w_ zf=ka>`mpl>_R0Uv0stW1%^XfZG`nP=0a#JgmXJ=qFZH-7*Js!TzyOnVj4_k;-Vxc* z=klKoPad9Gk-e{3vp>E&BrChK&qevXv7xqHRIA|4xJ>+)C z9Gyv&uMa`!Jhciym4vD??lCJyH&aoz%tuGH6rE@UU)`>~|09X*a2{X&d`=WTr@~6~ z>2%>FoRe}yCx^NG?4T9B-xQz!suFTHfyep$^i}YfdB3`hVv3PjqbsO0Q&yYCoqXBo zb?b~|+UXO|@X~3!gTk^OU4=z;guA7E^K4ejwet`#H{iF{O?z(Q^2^7gz;(-Cw(9(+ zotBn=NSx}=i-Y_{%CWOL!ydM?ZASO-DgTfd`2@BwE1CJ1WXIc!H8<>zx*y`;fiQP( z&%yyN$91U?S=H>17gNuCQ|twB_3_AJh6tWvmH4?hCk|~1SwgdmelAmOX8s7oM*Hn- zoSyF&yCi(+WBZsp;tvtd-)=$`PC@dmTq=hL0tzd1hkruJEz|^*P)`r7?8qzf+aaz6 z4-!;e3mU&r7w8)|c7J5F_C#u&cYUjyX|;6WcXvmXt%M$T0-mADi>e6=4P zicd&EpAsTg#-sE`tx<2HfPyqF2KJ;nb6rSNt6vJ~Xg6K#kBMujo$7Sl0Z(9(T>ckU zu|mu>!h0;b#Wr^v0s7bEnF)Ep${H$7hC;EH$l-)6UCR#`ve-qVp>o^2`qrg@r5bNk z$);=3>d0&%#c0T#;Uu`ae}z|>&R#@fo z*e8Z`+uG5a^^Z)SeBP2Fe|~$~CqT;-;;}J$9G-N;hSNbINoWabk5IsVn1I|C3V$dr zmw-o6y_zzgzc(a)h6xwVYtiX3*C2|eqWf;-Dw|Ho91LSR^-xq;C3FzX1~nYpKc~skN!0;$;M8G-SidgBmb3y z{je-e8ZDm%4ZOD$ECMsKk|Vg2d4+EB#%{q=C?4@ngf~JjZ;4@D?#_Cd5(KR@cHnyj z_vSHedS2|PRw8%osE9dT(w!RvcsN?RBP^@+`<-KEotUkv(Y{13Z&Z#d%jhtYF*cYi z4%8{rZixbfE8A+)bdC?=vo!&t0cqolEX@+^3@Gc4TP45sh|e2PPMV%gVQASA z5*=KPd6o$eixo;#NKpq3lRaZ2F&IrlSs3NW6MwhqXSedo-453~9n(t+QB&m5=W93B zOXgD6HdAM3-M2+wwUWDNJWw%c9ebw#N_-^AcN%nsJ%hro#bcoD_YFQ}xaq}#C4JI- z#b?bjiI~dtx4A|X>?iqC73La;kc-07@;$)^=A6GqKKCq*j-5DOBX{T3V-2v_&yJ^P zWVbHlgLoA*7xNhS6h$AsM0=06uA5mh5$8AOiT2-nPTCgUlk=KCH>U^e`H_5gIam%X z7rHu4GyahCWx%+t@Gh*I=D(X3c@lU3tbl0#Cg{efaoUg4TTFkQ%)Y}zR_2)0@ct*p zDP)kvf`Y&t#dCe=MM&Z_#BCN}@#?grh1^cN$0(dMjYRk4Li7)bX=Vzsry) zQVp>yn2&v~OmlOW`3B#5<^F zibjOXj5>C+3$p?*u#gzs`XzP^D`c&6SY4R<=zOP7e}RhC62oCO`=nH2^N`@aDaWh9 zrg*^-!Mxa(#0QOReQDU%+Ag^hOqhip60v(2m!@~lAMy&jbr!{>WJWZg4}ap`9=*%d zWwr4AHvt)IhVxLfBZmrv3xU%VLTuG+K+hC(1s5&-f?c`W_x_pD^pnmSamfL8+?%w- z0p6O`tdqu`Cu7d;hlfaR3Kf44ZU_;{J5KrGyWDly%XE~4g)o_|A9}cwlhuuAVN*DP zy{&``&hBX*S1E)J(9&cWINi+O_O=+wwKBPywY=+05MZ8C;_H_9J=3QufdJE9ryY^c zTiS~ejwN>+N;l83&G1dN-ozfFpysh$23u8K3HvYW50qI+uU48-O;2JGW^o6Jk{f%f zNsP@-jQ2>XDy+aV{Pq+H6U$c7T)OW7tdibNg?CNQAf*$YO`B^`pXoA9Bu8r!N&&Da8Bx&z>K+EI(V$FeDg8C|A85<;a-z zd!ka+5~!y8(yNjFS+ZwNMT1s~srD9Sn8nn31pmd&Urx*FV?rLUvp zf0UX=-E(I$F6>dWjg6YNNIo)Z4xa~iMjaA3!HqJ$wCcdN8`>Z^HYVnpcF8SxSQyK4smT>Da79#eTz2^iHNL}0Oo4}`&#CewJZ zb{*f*B*7_?{?*2OPWp~6s0mH8c}%D$_ZFelk4L_*^`BX44ww%Z`|6w=lVy%%m30r5 z0g84(Uc&nzH?34056M}1nJBce0P^*UDba8%TEyOOU0W$wyuc@*G%#Kw;MT(_bFyqRGQlH}yD~tUnvFM3WUm$O^Nl2@6S|sxn1oF1T4-XWddTuY2YA zkS^i$)n420k=2jbwhJVR33!E&@r?xECf}7=mHYsTHpONqw%;yr?d>0?5_Bu?EQBezcGid;8Wd)x zv06RRKC)i15gyqM>=yeAul6C%8)nWbK_7i` zKmN&|tytyx5ukVP3iJP7~DV!@4tj zji~$NDLWqVi#gAmECLpqHLH|XZu+HX9o1)!DouCdWveU2(MHb>zFfg=VM-SWPkm++ z3p0C^e2?-Qq44;YPf%J3y*4JcvB!m2Kah{sbTlkrY@>Lc{GPf^olnf}&}Y-0>f@0w zNMBviEg9Dtw|S9&AmeQyQL(Oeu`n38 zQJ!$Gz6)G+UtHn$UHlfva@Gn{$PkC_bQ#JU`DA7J_&2;~`K#w9#jxw|EjNTiMgq-k zPsBHN#)G-LYGj#Vzieqd_F~p*O%C^*ZW4IjjmK)6W`+`I6wGP_2KTQ;tk7x0mz2L+ zuT&`npPHwb6utdag!)2e%)42CC>Qml)tmZJ_bu>ZJ-HDOewL>`!%-mrm3u%`z8J{$ zey(t6`IYDMVVpBj2>%`i$`A;8}pS-w9oasUPgfvR( z3XVA8c=kig5?}YM&0)}o+xZ5`8%Cn1B3J7qulT$pSMDzWS9v)Jj(Ju-N|&e+*s|W@ zIve7hDd)BmxsM$qxW7KI4?>QTNqW~U7OT`&q$8@H`Cem`7}5H5D3iP3Kv1}5bUkWI z(@tll?v+C8pTNpiv);Cl`(q4;x83?|Mv*4-H6+)s4b8ad2XNOPc=Kle ztn3+vZ$*I}=Q^SQQds~)H@=gcF--N<6WWaB{Sm=01L(`qam@@YZ=$(b;^K1m|7f( z!bxqKXJ%_m39VYJC3-3X>0>qoRSwdOf%cuIvN^7H@K{@ohmzo&&Ay%=_(*OrQ|35? zm=;#cQH*MOr^5JE)MLFSL22+q@%KUW4%^8Tp^3s)@uT9YrGd;>TDW+kQi@A>_~+Q? zzYda_Jg%$V6s+kZbMczPyE!0c8yAZI*c+@nX}z2M*wU}TL1{2Ep^^HyI3j3c(T;jp z?fr}jU3yXn*dev9gP0tiJ`tlu1q08M?1V@ehj*lI)S~N{u-?r|&oehsxS}BG#(Vkd z>+@D@HtQt8gQkLwgneexvy2y&ny)s|FdMrB9WyRt&;<9*`6mK0N(KQNnQ%J=~4k zTdcA?PANi-Couk2s3A4AN%q!rH5rks^ryJ*$(eM?#^`scQZ#Y)`)Xf;JP^ZMgjGegm9~ac@Y{QzPq;%MGt| z1&xyu#(FvQ&W!0~?0jNK5L@(WbZgy~;_*8?va<@M1hvn@n`5;g0U*i5WFT8cyD$+T z6pz-1I(k45tG=453e3~-I($P`rn*ixSB3pPD(;Ep)L9qQ zs?xtk$w6eFksarfJerhb3CH9~Mpc9`Dy&$H7Ckkz@bbW!6@=1s1=l4O5y{dPw*9b{ z=Czs81smIbU2$wDjk!@tIY|z@OtBxYCI&jnH)P)~B3W@+B@*nnkAt_mv#^(xK{v*e zQ;L1@zfV@yrQ&G2)3_0BHnf0GU_!xS7C@c0YH3M&_p@u7fKFY^ruYsJzrO$ZW$hkA zFfr_sv*c*;FtP{t%9o-=$kIm31p3}$KAUMHKEnImOGVD?E4?RwN-Xq3maD91 zlbGYM&$=H8>+Nd8Ml?QY`nywFAHvQZU0P22R#W0=u2os;TDWyZ<~Hj&V!>En3Mi5v zm$;(&y$n~%R{Bz=Ypg5H2cew{4;vk;zEl0ib(0!znBV~`eWQ6L%J=OKXKyCQ;@~+6 zAA6@xEVH5W$ZG&i>QGsv4m;wh-4mkJt#fE$;O&iMPg{Ub#yoaL99n4Rms;%yWHMS~ z>y6OiR?FkMCj>&m8K)p)z}0#ExGfyzjQ!=jq$hD(a*>8ejFyaO7>y-NvlA z4S(Q4Z6Lb^m5l`|_i)wHxD8jwEJ6O2QJcM$5v+IgVre6X(`DwJTYvHd>R#g5nNh5n zMTvq!dU>M~P(q;fj_jsucEHM{k*`j9l!ZI=T3qv~O|XOqBU-BC?QInu7q7A$1certr? z?HI&9O-Q9VWoIaa6eX#l>{|)pi8Gf_ec@OFg~aD}*2}*lTP?!@gc}qaz+h^oZvt$` zh^$T$n^vTz`y0?;BJaBJ^NX8H^p6gwIC%QCS|&nsP_Phk-U9Vs!)wr(J6aj?oNF3# zvkyYDWyv`yG;B4Y!ysa0BrmL7 zhQ-4Ncb4fbYca?;Vec(|w+h^0Aff6*RyDXmod`#h=;)7&-_@nM=ag00UNU^$eq{(n zH#KG*h!i)t>JcIiTD--4K{YU@S0bv6)IQ03<#w#Fv-Qwx=Mti^k>R%xt|Ir5PoC+G ziQbzXx*k?UR07KUs_{w>)EQtIZ#$Wr&1GXEDk#ZK`7)}Fi8D$=SY(JY`S7hh1ogbgh*o8v-gaDC!9Y$E321{fu+l5O`f_=EKqMIr3oA*oufVf zR7>h1d6Psvg4ZjnO&a@A))AS+mAGwkVo;|3!bKf8)DwhIux6*H@K`|hF&P#fiPfl$ zPwfz=@J- zlXnLjb;DkR79!*QTlV*3Zob7a=Ih`59>|gZI#RZA;|V#a*qRK|vWOSqsurl^Oaghc zVZT?HjeUK^vZx%_-25FIQ2BEeV#wo%>NB&geYA$U8#=-$wUo{VJC))2v|08g5;mQz zAEj|o-d94*OMP5mP^D2*Cg2HGMRy94%Xt_ zwt}M*NR`+#LWpQVlxmRXdGMXO+prhk^~?(O0QvfIN2;hDV%X#M7O9VVVU-LNqi_hK z;6Vj_hLAL}oh%xau3P)jI_7**ol2JrYD3^4ro@_pUONw!MpW$(r=Y^2T=V z^!4ETTrGLOc`OjCkUYd(m(pR!#0!;%fhedb#?%tZxWsZ_xw~Q|9X-1_^?L0+*2%0; zyq7iX*!J}@bGfjENQwK#xZO;ez~a)1?uq$fy5N41pnK5!vu%a=^S3{KR^=FDtwE&i zh2zpkV-ZtTRF&>vpO_Y6Yz}=Xz$RZQG^MGLYyW)r#he(y_{!1v4-cx);=9LM%`-Nh zpE3`cJoab3iIs6>v7w3H@x)OoIn_#tY~u1ElSt3x&B6He$jebVoW?yk}Q5yvP`~Wdg4oQoF)~L`UCsrEd-$f zk#Ih^okf=rZ}*~h70^|+QbrJ0IGE!#@W&Gk~@_AJw@JC9^`DA zdyBOpY3yD)H%GP{**|oCvs#PBKpn(}v(=dQEn?EH zc1s0CqmebcTZfW6027hwfx*kCoOKmV$?gfIgH7f@T_IXsQ^mx(#dB7EIt2YtvOEQh zh1Pxagz!eR>|u@!=sty8@k%2&va^V>)*OH4tWfAcqWUDZ%b2MjkHZFEjR z1*!%?n)o*yURAKXMf)kfv`A}A_8ok`nouaDxsMhr>_E|0d?K5Us!)>Anmds}yQSL5}M`@exx zx6_91Mhqj9S^1y*q3`Iup%pyB%J}$9S?mjLWTNlYSTj*H zZ>axkO$D~r|Mk^5ag99*l~HpCYy~ru8vEZr;mw5$ zY|Qr=C;#Ux;7GG1OM0Lz3;>tVr1f@-*Xba$wWdHP``a&x5fEn{-VHYS?=xI}d#4mX zlv)%6NWo&MG1PoZT}Bh1HaNMHfbgwzXKavX71!Rao8WJgsZXGWH}Phxo3S9*-{;o9 zO|Zl@elId4{I~k)e>s8Z2Z3L`@k6ivHv7`j;<@O)eW81u$>(4G;lF)^mdkDgff8Y8 z*k2#Vp##ThpInW)oXG$F$NupvpE{Ev?e7&LfZu_L- z4u+N63y=6Fix0C}8+_l89G0E_-&g-1ugB&0q^e$of=KLFNAaP7FBk}@#6G>vvP+Yu zcU8XC=0Q#tCrGRG&7F&utIH#eZ0rm{dgIR(U9@)thdhCgvX7*&UZLqww#=6|%J>_i z+d|XP`Ga^P+V?x1+ny0jSP00&Q&%)&c|M^e<*I0U&fNi=*Oy;A%58S`} z)SNSpM1?1Fw&Y)2mdi`ktObrEWCw`;>bimN?bKhPgEU1A(*3*h7lPvk*`|SiJuo+q z^}LB;vodSM|L*+YPJxxER!IKkz~SJXK6w!|y8Yb&6rcX(f4dxkV>WPHi*@qfJuja3 zX{AukszN|n{L3rwi5wg+B-{Mgo96F7-$V$W1Yy=k^#A4-KwLg48o^F~`Qd22y=Sl8 z9KfVR6J2hc|LOx6Tpnk8CHz;%E{A6u4DXqu?0<&;Kg0hQSK$BqPyd`39UUE_D*IpF zUoW|A_xF0!`ES1VZd@6FY(Y2&ujF2fokct5E?h&9cu)EO4^Whiq&c3lPxDU8+nH5uOoi^QEq;nLa z=o|E`59`s?lU{Sxtxb=VzA>o8%A3KBdmezDrnOb~@<^c+R5N*{K1 zX)4rmy&UHkmU2p(Nc8&*iKMgd`!t=`)!Fp*iV9bWlkk|-g=&|txGYCLc-mNjE=Mi* zP|k9l%6b|9;e5-WvF@M`F(0FWdKuJy$|va4%A;Zjd#2-`Wsmu-XG)U{?@@=Hm=6A7 zZJm0R&4ue_fJHUWOiN1;rJS6p1Ce47+8zBwa@lksGthP8#2)IrJ>_)V@ALV~w?Exc zT&}_DhJ5>~Ygk*TV%lZUqKlqc!Te^X*dCZKULyn_(|d#5SCG^22jtm}mUQ+Cg7gZ@ zAD{6-FjWQo+cR9W(hC}rXigo?GiY~wB=P2%JyAt!?Us+zD|#!j$I{^&{TY4%mr>*R z1FtEBuhJ3!#&r!I6>!8@y7-+tI(dXXFzm42phNL*n$Eh82WwDTh7tUZVx;CRk_#&+ z_Tm#TfUi8+>UUg?%*wSiK6SCXeqcSFsm-ZFYBCQOoaM6~(5YV^E|Lo3lh3R#P?vH% z#rkzbP;H2?Db$5d-)PXPka`Hsj#yfY5`QpSQcrQY&2kSqO7l8j8^~w_r4O}5i8d#& zXg(~65q3SzL*FGNV|{uuRMJ>&XcLe7&ET}BA!FOgQ4+}4256ktw9d#5*Jz2tAtta$ zttf#%Yg^0@3_W8}LP+#>?sPiyCJW`COO(Nz(uhi`!*29SMJT@G7D@e83{80ZA%uq- z_aQ%P*s;lU0RXqvB74D2W4-`KP#w1w&lW%wF9-E^=lwoI$xtAQ5uB0F)Jp=v#4Z(26eD=()YVjFNVGhZ@)~Z z%}ZnCaml-O$#zC-F8K>C2e}+J-Sl9J%M9c5!K+q1)SNa_?yk7}3 z>z1WCqnoiy;f?`H!x7^15r}^%)-K61o2|Fl54WOh4_?9~HpF)Fc@)OIFPWZwv@bES z;A(w%ij0gpOmyH4yWdTH_Wp!~ZClv3=Jhm@PSxg`_=ko4Y(jAzjx@7(vn2*iPmfx) zZJeo1ELfy;-}Qnk)%POx)W6d5=nHe&Xy#-DbGV1ii#v`pl?rfwajVnWi}RJm7hdPC z*^(h+9+sb0n^O^blL2EjUGJ-u=o0agqwSfQ;W6v}Za5CFGnqiC&6qcJu}wRR z01~=7UQVt*$OyP1;^Gu&#FK_~;c9DoSEtWL|s*u6rOnp)CxB(b*r6rgWYzN zH2OZ^g$mxco*+N(&Nk~u6T_Rf)2%P?a|!^!WOD+}9N!yn|BCj_HDHgm=xuO~(t?X8 ztXY$f7R|cNeYJS_G7F>O?%>%iQqH)8J0ISeO&xfc0!`lgfT7iqEoIR4u5+8__Iw;&*plI-{;7i#c09M$;PIuRJnqw|m?IzE6QEXFRjh70oJth=HMA1u%KG)jo$n+}$r z?UNULo(PcL#+5^8zoT$Kq4jI;8}KcGWWmOPoS`qI(&S%92qriw$3PmCZ2fXysv*TK z8vc==(8_#08|O`AwS{mDf05p%C|$eW?OY=CCc)X$5{)LA_HL!b0oOZkaY!EFZDbq1 z2eXdXHPA$49@`K2=3PIbJuaAWIAFl%6hVWgXJz``i3wS7vcvi`K?q$Cqc3W>cB{p) z(MB8zvhnn_O*J|qPB*zN>2`<-EXT>}0CKjwIQgnG7UMed+WyVrb&ct88017y4eq%} zq&8k<)Wq5VIlBg3q*5||nk0T+F{)icju0f&;NP8io*#v3dEeU@y{-LjK+7QBCK~N- zurO>1X~H1GbU%N2Hg!uf@z}tHLV=d(Xo@l^JjFz)mEw*Z-4hA7sCVd zTJFqpCG#Fil2CF(nv5I@cHwn{T}}F z&9oU@HE$Gk{1_vN$su;TX{=C7=_)I_f?DlzP$9z0wXmQtriizUdHOxH%{=QnKv zvQs&CX3~i|luoy5anX~9sAU%9{?Dh4?YFi+9vtW6?dr|q!%K`hl#)J%=cbj$u<6#g zOjwS>H0=I-Rg{!gG!}#nt)zB|b7A#6%<-+(%C~45E%d&lvm>qj*t|@~s){SF64Z{` zOzy21XAovjCnod&47Hbld0wZLF!~%@(j=eb&#fcG4v>3$)^U6}IUUV+A)BZvxd~?< z3P(zE*Q#zn0CY3C)xX4jCKa*_p~}Bb7z=Cqbg~fB6*q;s;QaG7|9A8f5uu(3O|L_Z z-th>U*q21zOxhygeC6$^MqwfCXIrJ zPlKSP9|%55V3nk(&;OC#rIlEqOd2LVZZCWp^l2&VA#yK!d<0Bqd7?-}t zY`*bh-hSEf>pc(!s!fZJ^S@~tprfa3Z9c$((9q3b>OAr1dMa*6Qv#F?zVgcm4Hl3F zUK=?Co!(Cmri*iO-h3JC)k|FhMTWJ|I~$H?QI92KnR65$RKA};3s+goCyFeA?pYDE zgmHGy;YM0w9?%E==S|xWe1(8C1@c8-MBD_af{OD${DIV2<5Gq)GC#XF4e$eb$y7$- z8@RyeWG?6&03QbjT8C2!1_^k9AoVqNZZpRj3oOWg5 z)n=_)oubYy&74y_9W8q882hdytcIpL6uOdKfn1!8Z1p3~Z2>(1!e)^<=rE3r~g^J+2O4aT0co zd7aK~@Pm>lX*=_iK%$K^>y|k2PffoV@|C;v{a%a4?oeyp^cS3JIdku3jfX}jfQT$? z^rFjv7Q9R2Q!S{(kTSe6U3a~M2h=7XTk`m;KsyV20YaIX6%wPl3=m*NYAOiRccONi zaU002KC>U)%KW?H;=O&jfv_0_>@u7CJscOz7RuKxf{2VcUh??2oTsYO1sX*ARe@K*p-dB33vAFkxY-v+tw&F5@x5 zCZOf_5ADBoWF5=RVGdDD*R1yb{FV-*SSqnzyMcp;tNjjj4t0Udj*PNwXNXX!=-*id5EyfA^}!>KIac?k*T_a_W_* zKU{6jc>FNUZ60M48#oa!D^3tEP_$X9(=;z$G|dAR>xVP>Xj-+|CUX}O$?)b;Akbaggz+ZUk5s5mn`DpOIUQu^oQ z%FPujFxfb+T9RIbdRF)o<@hu9N4DX*CkLzCaIG$gfoJj8*wKKAak=cc`J=6mjBTTG zcQ0%-t+xieq@t=P?_l3ecG%xXtrwo7xWtG2)~tEwIcY)j+V!8t??^xk`w4DvHyi8s zhgt?NML~-IAbdJ)&%mT^U|vBEFKxPa#CP~L{}?=ceW@c-Q~Tgty3rZA0aB`F(V8*(PH&p3d+Z3em@M5E{vT&y>>$jqfwwS}i5uV%K%d6+dDk-Ny}3A08{-c+ zpm@%5AZY{tE&O+N3qi;tP^i0NUIM)u%c8O{*~G;O<4plD*Vf^a8D>s!vYl(0(h;#t zj={bsAM@t;kQ5`G35g0xdH`^`RB6%IB9U!gf}Dfgj^mLYU7GcX7K*WW>Vwt_eK3GT zXrMPW4;A37G<}CNsf~BZ-dwh%W*ou1i>B2xG|zHO)Lic*gQne&UlL0i$BJedY+C3$ zlZi1avrW&e%_BEqvX6K@I%^EH>h4Li8*CcklTNgQ%|5q>e$$qauCzcP_fl<|ha~}6 zJfWxL6r}KCIrY`i+0RVQiZY8^+ZEgyUV}EyJllp;!d&u7^IGUim19$J!y;Wy%cH=d zD3t1V`7`@;VHb_H355DnX$*tIieqSI9$>vMx99OscS&=+)BR$JLsOJw_CGWe}wfU z+o)#m5XI915iyHDDSx$mK(Gs*mr$h%j!ss7nQ?t<=dmaol+hU0eJ4DPzl>pXtv zaV)-p8iMd}@@(y8u$5o@&Jc#6CW8J?v;H-H59D+yEUvXP5^dEol_gwjCnC0w{BDr( zBle5VdtEh);E$t5T{1^DTCE!sCr$6q7RpsExF+CNPD4z%27=|LRMQ?W-YK75c3}KWrz#dk+)n0djg=ni4DKG51MZi7@(ygaCQ?!I;OCw z($&XJB4&^5dBvo|PfMW$|F{m~c+H1Hipjj@`*zL1CFL%oOd$f~0-JZo%clxeT2k8p zy8b5lK=tQ2EA&p$uqFT126uv+A{BgNe8BOISejV%38`U=`=^NhyL$wK zJ5zKiP77UI!dncl@A=ZGTybvMBmBPx{qX~5mzwW$~X z@VNm@zZRs7dkSRU28)N`O|iL5jgZ)fCz}&Q65A8UZa$Ad|HK2XBHd##tDj)kV)+w5 z(#rE2rW>*ZJ8O-iV_rWL6S~9Geizw@e?HZNCmwB;fvr6%ZKq5%_>f&ksGjY&{}txO z6JOJ*(45g$#J0HUoCZ8vM>T*7ka~O$fdBjkmFyEeJL+`VO@a9HGEA*%%_8&zWMppyv62=V)b9XRG@)0r5d+}7lSP^=SoR5boVw%L zcAH7h>+Jx~nQiA!5dKdQ@b?Y8CywXExP$IH!b7!%b#EesDoyRDn&4hge-Ve}cQB(I zsi0=nX+9rO7s~35d|84RRF8L>3X8*+JoUDCe>Pgw`#dcZa z0fngB;%W3;nqZj$?5Acjf#Gk5PuTBdG3DaY_6x$x zgT)Wih|UGYp+eTYFq1L*e$7W(rDi`(^0eklwL=}Y=d`li;`F^o3 z+ay#HesJ6QPCEQv&rFMS!7igax+VUkq}jAQ6{`|gC;cUE)a@T>ea4Sre()L*r_C-m zZiFGDY~KKQUn6znftDGuuh@27KiXiLbVdS`9wjjo6xesWIJV5J@Yqk)Y)#I#()=2G z|6j|!6bqHSV~vQSF!X`h3YQkzet=i~dl3ZA?%8OdOZBrR`@hD!{1Gkdi26 z2BCI7x!Qd~t_a3 zQ0aq?@N22ZGG5(lu8G=n z+^Bm@4Qc+P$Dj`h1t$7-RJbt88Q2)nV@^)(lKi z$%RW?8u6|33=80}volY8ctMO~2pHZ4m@arxtIfKcJ7wR!frw#Jpl^y{?X+MYetqFI zWq5*vA)XoPGD|;z-&qs!zk#b~mF}$^x&ldv;j_%{Ko_xSS_=lg(?fp6u{=(NNC*mVp!0OOe zV59DP^AEbe7A@a`Mce){#IKL`ua9TH1jorBH{Q$sB}NeR>K*MVd zfj|G6L%9DL$V#^lMptM4=J>~J*=Y~!mFHgzemE9jU;oATNZ8}AkAE-;x(a#s?)e_s zzg>VJY}tjCp%R~ zfB3Ecclci0m6Vi}uf5{G5om9XufNOB9Ernk`1BXt=@?!ClYk2dT+w9q^lhkiP2yXO zf9!SsXNCN)pNJ_l75a3xSwey0UskSBf` zh9@W$n&?|jv$yuE9c$uRyA0+1r&a9whr}7m1 z(T)KUSLT4}DezbU#w#hziiserwmp(ovRF-ZqDRku^3^$!!-}%1h?}O2?V4`!A}QvVF)@8U$;^f=CR6*u22J)EhzWhX6G~K z4cGOIt~73knNtM4gFc;zwVUw1K#L<)NeE`aLvz%J$7jJyfbe% z-S7gwHt2`f3Wh&ldtU9hSg2Hs6{@%#vkLtt0u{^GcwT*jjVi+&u(nJ9|@X!)zVaKXiIlVz-!@i*u{ zV~#Ap;sraagg={=dy|E!n5NjFZSU_;@2~bdE>(jOmk9d#+UCWPBF%Ed`9|Q)TW^CI z3j6ng)!s5PiOA+{rm&a)T=0s9OE%60OXiwQD`HwThJ$NeFq6C06dw$ z;=fZ+^ZKCgZ9l@|`oy`q5;i^?hu5A^gfs$OUNa408AU_iEz`cirYpd3%K@=L`spAI zX+|yo^#mJIRl>orIy)jLUp`6Q$Oz$T-BaODYar0;y0#pG`S8YK+S3@R{UnFeAa zU~D!v`vEIIFlA3#xIG4&2K&?CFs8M>qiM%hFjc)9bPN0KPEYXeyE88$gl!)s;{r+H zbm6CX;*rw8{= zAG?&89n4{>qzg^KH91_ip!OoLX%CyhkIEbZ^uEBIt=O>2>at+?;T)J&mPo!pel)ky z$&?Gr5TJe8?A_UIADR(R`O&7%?xl#=mQT7E=<@rW#}koJ9nYBC#y`$dgbw6YndbLh zzCk0fkYn722p-#G8k>Wkg;~^NmnPJj4IDRH4Z-NSP{sHWsZs9pb&pWLg)^4v;WCcA zB*=R{qdmGnfKT80o&A^@V4)Z{vD!go>NeC${W0!)0>U8RQ~r6v5V%Q#RvsmXj+F^p zyFHlHvm^kIW644rPE)n0&#@gCtmDus%p@Ov=CS|G>+3}`_OkhEZm`k9?-){_;}E%@ zz`(Z2)ry2s#x+5cNmbOe`#$L_w9OZyxrjfRnSr~t)>^GLQGJfzKavmE==ui}$+!zH zd*K@eiMFit5pCOVeQ>b(-Fk&qO?dhoo4x#Px4seFyH`hdcp4IL$@!^5rW?7E*uf>t$&Dx;FZ_BLZsNhhEjrg5-k^ zU9kb5*dTJGbiUymSt9JEEgpYK>%v%r#nC|cQ-k`@O5&{fXd)oY6iN5&8`qmoHX8)N zsHLy=#%l65yY3z5k<~cXd3D-30249T3Fp>S-4U27)GlB3YmeOSwta29k>$O8&d$O8 z#RGD6!gh%e6Dbylcurf4Hz23jki1AjeHI$3S$Au-{Iu#@>o3`qSK9BIjCMS-0bpVx9_B=w*xkB$dyHv91*78mqFr+P?ArR=X{U@3n6= zqPRIUSPNVzmVJmDybkhl6WCR#1RX@{>3Gfjzm5Pm`|4}6nPY=5I-|{M?X?$^XY87f zohQosJz zi)Q&K^ORWrDU~5?_q@L@{Kg$ zwp?bErPq?}sX}n2hELXVgm*x;+E0Y0N|Ew>Es(kYjG_>5l)%XLFen=Ffr@1DoQktN z)F3e9BouY-?2hw}E&7T4SCbHza>EvRB5a@KWw3O=5eqe{Oz^;t5L7+%Zq!(ROjMKb z0)Kgk9=^`W`SLJlk~3P+HLNU_T8LT*3GDlLpk6NX_X0&cE=|&8(rOeB?B<)k1imhN z$yNRV=i)$ht!nKQm&Ehfv`V$`@>k___OS!kw*I(Q(QwnYLW*c!(IlMDRjgEi^r6zr zBK_VDE?}tk6|EUi|E;#@JHGXhh0_?vrc}9ylmwT5ozmJ{TgzMB9%R3fZsV*Xbq)9W zI2bxqC442WJ|*jG`2hAy*k!dJlIsb(t*ksm!2f&P$wQQz>UqL-NfT>n2ud29peXTk zE8y~<()~{=<$jq$g~}qJw5{e5Ey5@YW2dV!lZs+dyH)#~-$c`4Z^S*=+WJvOZwG0D zu$5jN{(e+T)tnPv!`(b%XpPBhA%{kmLlfpD3-*9vO$7DKvAN59|8|rM#Zy;Whp8lP zgAo)6wQxw@uiS(*0-U%HimD?vhDHV}Rh;JFqD9B&ymu2@UgNP+JOhZT6yer%i_0`v z=dxe8FyF5$j}AsDpDl(Q)D2ZT{k#y?Ouqm6L4OM<+cc@(xdf!v!gH&^_x7u zghmx8c@r@jH}0=0me+~9O_68Wj#n8YrYSm_+zP|-xSJ32i!Pj8%NyR9Oy35$b^7JH zl>N1Ppi9S-x^yW_tn|KQSuxg+=j}a3ePs?a4VPeP%wM@^lt_iobSH5eJ>lP545=g& zw5}qXLUrotR!x0q1~4B_gNG$`hH~5^(`p@By0E)^5BzQ)RDzF|EYa0TZE?fiVnVrA zt^Mj8|Av5uUd2Dm1yP?HJ`hp#P;mV2Pb0QpfG}EI`vJcAfAA%4kd91EOkdIl-?M#= z^5!b%Wd;8J6n#S%kOE;va4a);SeDT@m7fj!Nlc0U+~+ zZJlVscqvh@$huD_2r_}Gm$$^StVNgV4Yf|Axwt#o#ZiXs4=K2f)SrG^{91=-2?7Up}dn16tmn+SbNN`63=e zz49Ac>V+W)Uv+$cd~y!!2Z2Cz<B+|QezGjOCK-`hk4%sYJh%JK7Xky`Zw zcd_%RQ=9daEn#Qdq_V-#oX})}yY!KY_kLi^U1Hz;abV#9wq%l+ShorOsPI0rapMJo z*7LBhr@ZQzs8uISUos(J_h6-}XgogYlj-iyds)V@^u3(rRy=Up0oy7M52H3n*4F4Z(I` z66Z((7w_(kXXuSBXXywdRH0aypGoSZ*|RYmMp%lh&>59-CMM> zhABteP^*3T(55N8KjJtwbodxxa#$I@%|C{V$k7+p>mJ=aHXBPUo;^t7J$n4vlAiYT z-oAN-;NMoISNER#)a~W1Ob#uRVW$eYT=pACJv}Bc+MF#`@6}|9I&&7XpTT_KKdUGP zd41M_&fwC&9Nc;#uwEe}AH(Z5|I@VB=@UgSU&sfdRe=D3TMD4ZEonsfF!%;W3{&QU zUAb`?Ii|ORY8VM~85&iUVKjfufxH-c_s7GRSn-!HRc{->-C;MJS9(A~@JFzZz~;EU z8qp^G2z;WX{ey(XFfYmfxR@i;yx3ZLeOv??%#pPnEY;a9>#bFMHWWthx94~ua^V4r z*${-xIt2-UOhz+HtEwQq<#K*^HqECodk%rwNzPm;_t~$ zY0~den_~sDG^n2iFcAV?Xxg;k&h!#j{VuZq)GlVypyZjXPkeRs`a0fJsXAWm zlMTh18PZj|A;_h>w@hmwxc=oen`S{5q@r-_MY_1#%whBsy@sD|{BntlZ-$jr$rYpT zXSoc3osq1bgLWKi42S7^dI)!`zU8&x{Sy>u=m~o@H}Wr=JG1Qe=L914jl8L;Vh29O zBkNiX<=?zA2z++y!PAv{agH2 zXRC-)b5ZK!V%fOy5;*@rzRte4j>eKl(0BP*a&B#%0K0zZ82ldhW&#IysyaeDE62to< zwxG&#&rZ6R7VGdsg;%v@I(R#PslxYgFN3NDZcAmp(AA!_enp8%mtVX-t;r`ZuE7zm z)+<5sbv|PsC%Q$>D5{|oAo~>;sn@Ib$kMs(Ou1-fci zx)aY4kkx#KFCr@%u(Q?SR-&E5(ww|z|2@>@SXFr78VsBwe=Yv!rY6L(MO1buE=B1> z9dPWq=1=V;?_k`eP)jl8g7TxoX@skkp#pbzPPwYbkL-UAb!g!%s&`XTjscVl>G^CkMZraR{Niw{S5GtFw_Q(D zLaP^S(#IT0&XxG=Fa5U5PtHTd)gGw^RWPFZ{8@eQo?kRkJTmTnXT=ghDPYmP+IPW} z^ZuBhajbB7fb4;I>MENWsnGRCdsY82>9tM~lu7)DnD-%QCH53;?CrdMl%<>@j;3~O zzDq`v%7j5wdqZNE=xBNQ7cq`OrtGSqMen5yYHnS8acH@Oza*(Yh1u_iBg;7_Ll)5)}ui4=(dyLWQYE{_mcznsivdNaB=mjUT8fqQ$7A6 z9`^3R^JQ+C$p!Ge7%!L=GN`}?dDjnepCBa78o`>RHc7SM}{ z`1oG^axHIIVvFq75}lNa239eeT9lS2Gc-Iep>Pd7F|iF5JiT)+r#%YmzwejE&*53J zZcY^X-1^L3#E+Fhf6}+U%XX=t*FQ_CvAW>x#qA?Jd@;skTA#z(x9k}0>wJB(Y}mLMl7ZW|H?1Iq7(k$LoE z<>ct6&;+aq;{tDNG})N5vR!xS{yA_GH1WnaGhoQ6?_93S@p72O-8({4Mz zJY0C{BDX5=HE5%3r{avy5=I8^E*|VGP<+^7`27RJ^41u#GJR*POWJ;8F9Y5zU4K^~ z*@jQIZ)+MDI~p{-s}nTcab|~wShXmO-NA1yB>y1DfQ;Fm<+)Old*@-jIdZ&rPc!+J zcdNwl@Ek6AbY+XbDd#tzek=zwGJ7cn_J9K(q*hj0$jlN~o#*A2Dt7yo>&W-r)v>a1 zxd1K7@`90k@#h?S$*|iYr}ea zeM*PRNC*_junKVoqCph5F^daygHL`F>j5L@)n>ZyxY)wP7o9scYX&1j&TjcVsgMS% zH(V_8^~vsJxY%TBC)#P0bl8pGtK1YK8HW!zc zf%~EJIg9@i2n{Uicbu;skJrQB{%)D)G*&t$Z;|22n=Yi9V*y)%&1`6nx%1AX-+GU( zdQGvX*W#`*92GFDh){Dt?=XU>sbN8`6Un7}yFHLbZTgG^jHvlk1o;pla~rFEXcLWV zhwn`px@1R%2ufZj$Qz2>#2Y8ZHeCe{aKn?4BURW~<4zH?DjesJyA={2bxIf>uBZ$Y z5hr(MR_JCuFy0X;3AJvQ%O$^w%CQ>N``$74NtB-Mp0_}Wq5u&QH|n7Oq3a@Vs=)q5 zBYsQ_FJn-s?jVgY-N%WCuJJLxdO;gK>kFUP3_?D6!|%C{_xsxXa$4my4IqQ@@+>#a zHWa9)nwe=~GRq)tVC!Yp5XK^T5uVJ{@K7SkrHKT}@$!eJKV3nrXHE7R+*PvCS-UiAGvc_C*G zju-jfwE7$HBo%-sFuuPBPc*K<6n4msl)nZ`ZdwAcWb^+5o@m|ke89xSlmM4J_}@}h zE^QEM@TPS^_2I~qpiEYx;f?GE;E%j7{hlPm*@hrIdz4k~f8COQ{*#R>x@<FK;?m z3{ykx{*xzK`b*-f&zuXaKBBtW9#u<&5&v{)<3zJvvT?D=Fxsk{WpHWZdrw0GwHNFH z&BJt-o_E`2(odwpt&Z|Mw;NZ&R0baC&8yJgL9F=aOxOz;q=?BmVD9 zQ&u@J?4lXW_e&WAb{CB-UdKF>eZ^4sx!+*y`c&v3tDuV3Zjt4gmoHAtB+NJH1KV8HRhS83E!3b#`52ROBM5ZA$@j zj1L~v*-buR(E%gOz~Yu#Sh{h!&VCxmRIOdDt_}_kAQYHWt1!AZNam?K>e&9w+zrG+ zgHbpze^?57zj{?>l|`Y-n5l*fh7%z^m6gYL4XZ5J)>8WYDJM|?WJtxum2kh%dg+-P z3b#G0VxGk#ms=}@dt4C`JoVvR7=alKhP2FZXdiZ^amY%AIL=>g4h*(sE)cV+4W5L6 z_-u(Yrloq4Aq9PZyR8<@XDof`gb4qmk8s_X?0#|=u3xWnKicYYcYc4)qCeC-s|Z;s zZ*{$EH``nwH&0PjPZL+giWn5PGPVNqNu#U)3eP9O!r7~iD&j2nFiYonKC4mdAnfVGWM zq3D^l6sVa}8kAf~ZenW4y#iCNXw*qkhyUf7rBn^uI~6`kUb(egU`PBw9Ylvp)vTi; zC+Zy`Afpycab-#LVqoDZr9G_$nP2BhL+jdY@%<=(O-$80az6*ijsQt^!Ur3Gs~_f) zWRE&3JROXbw>ufe=0*p-_;fE#+|L{2a~~z=-gR;gvT>Wk$8WWw&$2lB2-Usn_fO_y zxVsG}n60#JMZBE$pi=!~csIFOb)AnHy&2iE{gh~|9?D^Pe6wYmXlt55^Wk)1Z+0Fv ziiplp)3fJCRFj8;y(Sk_o?OWH)oww?;ZsT{t$+pSZq25c(=Scc)lth1Wz}<&Uqe_D z9liBk!cON39pPhgeed~^9RW{Izn(e4lp<4weeTdqHrcaq$tsGf%-1N$edhpP^7FS( zcI;djraZQ1D}CsQ$hHgFz z@$1(4Ly z(m~*UTyssI`tO06a@|MB-yc6wa3hY!3`{%1IKjVau_Nf)E*WMST1&)0- zE!=H0k>DT+2UaYL&{gx;1zC^Ff)iy%@S1aZ;Pl3ZflZk9`_Zfsh-#us*nJOldVcpKyx{0gAnvQq=Xpt#ThBTKvK!tHm|L7nWJuO% zu)M0&FwT}Z|C$@edr5N2Td{wU*o*-{{hHw2YvwK}XB#Eiep+C|N)I-MvOiL&(PbIdSHXW;TLXT@5Ub@U@ zz)9^RF$g(8h4z~J4lT&!AS3?}78XM+|4~5R6vymGg~RMq2m})LCNO3P3y6z35phQc z2XO`M=1swXL;LaBk9>m)q_qNYWvC^XFXNSs-`sM!>zTLYFXl4#`k4$O<8%Dixu$#; zJuh4RTWGXFHau{-g@B~ssajjmWY9cnKAH0osJ!8a29LNvPxuiqFpCNwi^Rik9cU~#+KhW0 z-1Ob>ToVr>3Wu#zmn$SQBL!8Vm69BsagUo%zPze!bRLX9D5%5Sg3x(7y`aokj1~5t zA>d*yrIq^H(AJhtU-*^YPxPYx8v{}RUFjB}HcC~?thrdnSD;)28HK{;uUx;@cm^@r>j9tTz?D>tLc;e`V9{^|5%x0dFTnK6#Ezw;TUIzFe5=TKU^_k>X6ZU)az^(^? zgYbri!s^5Zm?%rx5WwABZsW0tc8;K%A+7#K$%0k`l794=Mi{}gC}2X~5lPD-+w~LV z#9hbV=7Kvc^r|gzW3)t3f5$#KQ>P>m6GP?E0GRYXnSK(1B6Zpik3Gx1!gds>e78;F z&1KZM+?I7?5;gS6Jh3n1vjl`r-m))M{y~LU)610yFKgYW9pSL~jR@!y#Zg8VDmel{ zTOB0dATQ}xsjI!b^Z=D1KWya&a@w43PQ6NXW&~8U^qClPRo8g#g;>Bv#*uuavO$1u zax;teecnaGAM3A#6-->{57!DXiuy%0Uv0%FlPJes^ls`*mRBpCeN;9;P@l*WwtAn% z(imiEBwT(g9XwOs)G3^j2~WFFfHkpit2eAobd3cr?9e8%AxAg&P{)qS7vnk$q3dy4 zI-xs$O(&G47EuZETo08FbiWmLp81(bSg>o-q!K=5NRU&iGd)3U^mQfk)z3Guu|H|` z`?SoVaWqxqo~J-?wpG$}Cq;bkJ84xi^C!h5<6GuBnDWS1bKkI!3wfIhguy1rdM9@9 zH74<6h~&egI6bR8keQKZ zp0Cdp_uQLAZmb^#K9#DeLOj4@2zL)jcXg#mcyi#U{5!9=_H)GLLO- zZ7nD$K!jyUhju$w04j2}#YZb%z@aj;aIBk6#8Xx5^av2m*dOG6G*>;;|k*twe4x-FZ=Fz(}vQ- znv!4G-aS7&c~t#bP^b3dQODTTCpG9?yJFGzbR!Whskyi!M}2f(s*rQrxrAeycnyKW zs4LuETOaS#fA?TNN8H~GE;5?(d9(YKy=7a_mdv_iV zXcA2;cfNjl(bcvZsJ;%}jGH`-?Y$)&BSyO|c#o9l%Utr8Z?W>VbvGh4jD($Eb|`E@ zLNcoTQ;sh$q2luoXSZMi*!_kE@B^pzZf$$_G{QR3WwyBKriPaivs}}zYv{0yp3uzO z(;LWn^a)BAy6bjmE+^7(BLG+E6f4^Y=YI4SMw*C6;+JRpNvlK>7x!<)#I}f0U4H_B zb}C|HH*!7=3 z_aG6upbI7*#Em!y4ml-NvbURTmb8~Rta?8+`pM>jN?N(_<(jY})6OM2Kr5&q!=F{9 zfH%W;XrH*Ixi3WTn|o|xGI33NC*4-h{o(tr=T5l_f`tak%$};nZNpyaY>>*T(o-fy z(NXJx8iga>yxX`xl{22`a4~7ajlboNHQILGOYYH%j7_nB|571eRC9Mn{UxM8R83-W zDV=>aL3h&{9-HH+zucF4x|)I7ICNR=qaAj;=$LYozYb}wNX%XXE4T~uawam z(5KSV%yHCK*3j=AwMcChd3t1_y|J}AD3bM?Z4SX!Q;GC5U@^hSKwc9_(&*kF1KqE< zyzK9oHHC&ISd$rjSZ2L>fVD1)^F<$p(upXqTJ9(nB$Z}(gl!?SU3krdIOSP}X|B^< ztjz5$=URN0=lOASjT@br0p1bI7Dp0JWlq~!aC*mGNjZlzYXSt=HlypdsboPQjyh3} z&@l6|;a$16ZOa_v=9G_wC1o1@hgx%D7!~hvH(4WhHF(#w+9EDnaGt%zSwcumj z*=EN7Kr8OdD{zeHJ)MdBlC?3hsa}7WZ~-{5zGiY>Dv#NYH1+~b)IB#_-7te#nqZ7z z3Fts83Pqx`IhCJWN}}`Q+vWg5VpMaLmkWqHiD!7Rhv(j<8SL``r>pMoWq{pAt;wFK zp6*6Hj9UBb-flA_?>)0jX^YY>RHvGc#f-H?SKYC3F^I^bvVxMC*16+p`Hx;|^^OD} zVN}?n5@FOjvh!8GcF0qZHar{}M(2OpHkXI*A8*3zXJ0gAPRgZvvy;qohzI_n{93gm zBgP>l+yTarPnh~cZ8%^BlJFb579Xa=u2mRFfFV!9s+QRXf{~cS z!J1CxdR>?;*#H-+R=_keX(ylM1R<|2;JRB|*QePu6Xc@dP(hAF<5UPv4!$60@pT9j-zQdYNA} ztrgH~`RZg_@U52r&~4+FcS{%Y7g|_*Cj30RB@a0TRa!A?+vAoY99pd2H}qXiM}r42 z+p%v3v2N?We~53+DXn8JR^NVLvB_rk$JR+47bK;dW8<7ZLGBO@s^?%I54~EmtdIQ) zXa3T%EZ(cBB0f!u(1*qQF%YG$M6MW1E0XQoSQ?7pX}9SbeuRMo#4^t8Y#Hocatvy$82ZDJMaZ~yfp64l{lR?xWI#e z6Y~svP$l0fH0|AnqA!I;=jzhD3lGqCa`W_-{yAeuAH#*lONK?Js*LvhZ!BpyQIa3o zJ*&q{DEpS#?MM(IIWc3` zMsp&1@r>MkbANfng_dAky6BSm?u-`TYsX|%Li*yJiUs0Id)PV5+2c7zSEX&^>*iTV zof~x$V@d3cs{Lh)R$;(L-kUu=*@k+ggsvjsEI(l$LO8p(yo%O#QPw}gi)yecaNJUm zdo|pYU^9}7IljU<+h8|LKfKC0S;gY?yAap;!}Ecstp_q%@q>f$^^TpKkSDr?@vAdP zT~D?w8SP7s+KsZACdZCu@>W_x%E!Lp-CL7cqwV;2+3g+Xw>&x~j}!OV=8xyYaKP5R zK_l!fck9AKhW%~mxZ-DH>tEgI_-BctUI+FU=+n{{w#$AgS0>~xNhllS`_qhYVPB_t zgF7W9U6Hn<_K$tHn1b)9_wZaxT`O)`y5Nd8W00iM@|e74iMJ;Yh{SvCk zhzM=ru4y70pf?qeMNcI6&^KB3(+Q&h<@=*s(ZOM?l0;?wi@ZcrV2^;UOGW7#vtp>c z#j97Dy=Ah~S_c!wrrMG^kf`tsZD?l{{rGAe2>4-syeYd`PXU{|aNK^#C=@|GHdABs zo=TBp*&GMf42P3XG#!U7wsRu!tl>;z{;u;5=ktPZ?CTv2hx5BiiW>z}>eC<3&Re*DY6y*9TEO6>Sou?L z50tFJ;e;RzsU#bCZ`>gHEdTtO)~Lw#7Y+w%o2C|ks?IjNVcaoxzW&(D-ir>ro^3E6 z$r@2BHx|=dWApGWxgp_8gL^(8Q;Ggx#6AE8kz^=#aCYRaUnUp$9gg>}y#`VgX= z%;*!9R#EmQUQK>~32&w~;A$<)FBURXV3@p5ZjV=EgZd%}(YswwC)_Y@p(_RDje<>q zA|~fuH&gulAY1RBCsL#zWDJ>PpD_Kx5Xrnoi}JJ z@6&36JmO+(aym{yE9ZEVH(KXy1usu!Z{-2H^~(43M#5yJ?0h;pv>nxoDIi<9z5ox;3GWVCs%--~Y_cjY+30X%WcvvLf^HWmtdlq^V*& zYgT_isw)UD*7|S^dKv=i`pFu4qnrn95-OxLpCp^eqKj^ic(t6k$L82=Na`7vP9c|< zWe;KsOHzchWca7aADSC(Z@LwCN9lNugT2xDvVytxV&@r0uu8~^&&qmKQ%ecCW|*kz zs!muTEAr`nb1ak7Ntrp`E3f4q^8Qp3)z!-PKe&yWGe1rSNYLN)epPNRRi|q+9>+4j z#MSKFbrH>1u>Z?%W%hX14)2%$hIN9=X1;l8SfgLH^(STJ`@4K+<})Q1R>IgPXi!FM zt7z|amf44%>;l^QYujDO_dC@Jg@Wfg70<1A=c5vyO%{J$j}y-je^aUu^@qq=Z~e4& zM6BE}xPrrn%Q-+7{`~<1_B^l4=plT`Hy z%wNqAQzheN*mN(K7xxFo{rS*Ig>^NA1ZY=6Jl#ke&^(hCz)`=~yo8KD1z@@~FB28Tmhs#dF@!}(g)E%s`|uURc!OEN{2 zat3wl(abJ7e$3B;62DaMd(U-}qzKkK60xsrOWf3ZV0V`7r9NH`n?pz_-b=c22OTyu zMC%l%X5TSBxnHFlN5GkRdY)O%ieu8F&60Vy&9R8+;Y6Z*)k!>kU-!$c-I~)aK)b*+ z(AsE*+~JxV0CiYL%C3S@-n&@rx@9wL2;Cf+F&;ud*(af|t?n2F+Fk?Yw(T1RZ%Zu! zRND&-hdvNTJti`UV*L%G`HY%er@-}b$iQA$0}8DYF- zK|KP?XYYxbUH&~7N8$+T?cYw_*3}oJEZ)60-tu&o*MEo zmX|inPzCI4VkIdd$cx*&Q+ox;!*+zJm$hzfrj2;-{k7@DtdjYj8TXT`J~q6xX?b7V zgAuS9{>|oKXFkA>-}|#dQo!K(a&nlTCjUVht{F0Y$*fHsH|eU$IxUecENxm~d!|m0 zg!krGzOyT6;&X^h9^_K6!!?cyS=i*h7_rD>AJu-aH8a^S>svY`!8HvMk^2(} zhlu>@;|TjgHUBB4(x|-xUMW-F3NbM;d+)7Unw!DE&thO&fg&M?6L69mH}=_v?;If40<*nGwFE}Myvx_uw`ZfSpulpx%%~N8Y>@1vLnpm=Z~ziG6+AsXLH@Z1*ki() zuP#w^AdZ$_RtzVXD9Go;gxQ&WHI~EDJ^5u@pFFwmj&@jwLeK;?wl+_4LtR6Cl9JW$ zUJrb;%b+nXV-F9P*+!IPQq+Z3*C3;}1=506NUuDSUuU|g^MmbpHWLm9a;{9TwFN1g zqh~7g({8(%c>l%@yQ(3J!{!Gy1H{|BOCMh!9eYJ z8$a@%d>6MY(WkJmGSDjN|2ntI*yks*{%QHsiULAcefK;9YO6h3y5M(IwJcKijX_;M zUm;be}QT5*La9MlGztgO=OzCEBZ+51H zVPdO6ldCmYfv>e7xsyLhJYnx9_(RuxfHgLdS6@KPXQ#Bs9@Wz76iKj~I&2ndb3Nzq zA)>`SgiR?xloU_4C*3dB5L5X~7Crhiwd0oOG+Zs7JG#Ado@>HZl*!X|ac&oI9VKR_ zN6q$VPReZCxc>Zy6=$E9J<@C$Jh)^tULri_D*WsCz7=CstqpIrZz{xJICg*HHC8P$ zdLv!qI8vNR=P2QH{;@X|_k88Q$UDEt#Kh=~t3@|nk>bR-T!gi3j2ebb@yB;%sKv8t z30ku|SyLaBFZ|1T-#YFYpA3ItNg4C+T6tN^=Fum3*?Ou|6W&TlgkJ=yRcdZOOr~u* z+7MgQ^dq$H{BJsMN31CCA83))=CKv760zLPo7?_z3B#J1!YOiI{{`wPpBWFD6ytnG zc_lq7b=w#j%#Z^To!Q(=E^!VxuX&ZYfThGon^seepP)4^8%dMA`7^=ksq0#W(@!ZB z|0Z0P>gMX*20AzCG&>*~L^+L_D<(};n<9EsZ@s{W_b2ms&sh4|v2Tqa+A*&7b{vRc zRFD#xJX&hB{bIHA0vtwldAT|B5y3s7#cMMWitWMuK-~6QY;NyEt~tG`Hy$Ah+IEpP zW9KbyB2jBg)dE;{0^E@B_|!#azb(M`N&W-sW3P^%qM37PtAGe>4Ru=P1?v5|ir=-` zJ4TV_qppe3at3L=RnA!}(u`4xfr3xo60KhSVKTk;tqEQ^UYn!SM8&72@kA1qlmQ(a zwD^xd3eZ9fReB$^{b1Y){zbfiP$1Oac=DjF#qv2DI^kziL94CJukdgPunc$jQDs#1 z-5d;X)C88sJfwq#`_4OoWY(ER7YYs?PSteLC>>l`hUhUY651EGW4PT`Gn$rtvn?o( zn6D0W4rP>l4~4oF@^yqL{C^%MyG&L!%01SF1_)%O8#Ypr|KTd|^lAR*`XmDENO-)j z6!udW1X{f&r6)~Aj=zupmhYOj(0^)C<#1MnA1<3K9d&;Ap)`>tSOKUdp-`LpG$gOt+K)7fSpZZkx` z#mM?UTe$F&)b~oRN!G5xcf$P&XUcw3+4)`O(h0C00}px)MlP`OuDcci5J3ulVgHzcxu^?aLFREB>&>YQyM? zztP2>SaUna=wi>oyv`R~6kptgvC#OyOV)Sp>=t_)jhTSuum$9Dwh1{*s$5U6yh@E=(#eMM_kRTMVx9)W`T`Vk5odIbECfgcorC$5{J&~m7t2?Yg-q9`k+?S{IUeDgj*$7Dz3wN^hLf;%ik z_TTT&AqGhfsPQed&CR>)@P;$q;;;v`8{>y_l39SC@b)12Rx)y1d)c;`k~Dpmj`~K6 zo@HtL`R(uZ7t<@PE7lJmemLpd+!VXR-#@W>;EpB9fQ0?$l|`Q__&`%77=G91->)X5 zPax4GDq0lEzh3e2C|I2M*Q}oYrymBSOz7kPbm0S6QSk|k4ff@`jYos}uUBeXo6!G5 z7A7X7Om{0Xf#P2y0s#gF%*p@L0SziXerlSPd8$gpzvcFU+t`#~{YUC3pJ31lAFl?J z!haPYQ2zbK|6}g0ppyg=mUs03mYN2Y%+UVYe`juL4ihL>!HoX;=HIG{0-CD)ZwGih zyr2`N8(oY49TZace>*_?|J42eGIjSToAwupR|-{{rI{`F2Iju|d&xS3!E)aoz44qD z74MTgtgo-n7u8oMU$>&YxkTk3lNrbVG`MjvPHo2WUmK4?HXt13*-gH?;X!{|OlXP; z)eBU>oUL!NN-81Y;RpNs`$fyiy=FK5F;#tfV2)$@`5gHF76_S!H#hV}x~;!Q(Fd-L z^x2-)(fG7{jX=(dsmZ=EXebJmC+w`~pO#81lN_0)6 zoQ$JH&lyqiH0V%2yXu)2-h23GVl2?+x(@Fg2IJkGV}-BIT(J7M#sAsd1#eK|unjE= zOCeQ0r9&QVKh48hHz$KVR7)POa!7G9G%OqgIv*XaaM_xmvJwxwMR;Gr<8*VT-XmW- zcc@%$*e~1ql)yRBsymi;oQhUl;HBSV_Xd?W@MtCAqcrUz-2W^k(hN6V)XH|&%kx>+ zMd$!`djz?i^gD|$b=P;!XYcIPpUw2_JY0@`L>rjRn0=ezxeqLtHP_I?&@e!suqP3| zDA%NByskkGP3w~=%yDP?TNih^_iAy?g#7i9bxDFxuU&B4)ql_dvqL` z8EFU9eCJZ2fdptUqPN0PqKI~0cfL|yUY_)Jb0F=N7Hf7=M3n*`wYg5n;B{%M3BJVN4v7qicu*Ag$z4m&zJn3$qpCcEZGgizpvS{JN`Iz@&C zA^6^Wm-8*J3E8wA4;(%5a)xO#L@ww6(%`p8#xu9x1SV zw{!SZ32|3vGCa64!^Lro8nrL(({-K<1miQiH_Vi&nvw(CDqs?AVWB~ODB^(a8lnbV z7@jKXj?H%b_Bt5Vi%b!DazCnS=A~U<(rwM86rziL`C{QP`tn1EddshGe|}gBzsC6D zplh_)#e-1t<}I@Ojb5J4ZTcN$8sTd9}>ULUL@!JeaN=j7U3ku5C zx~N*@OQAb7UI;DReIy+(Zs)RdgcVA=GhOp7)$G;!Xr9CL=L&-|>$u|kE*$eLn1!Un z)-QjqF}FWdO%Q84X10P12?q*lj8Q&l22LT@g1{fc=Sb#o-r<*$8MzP!5tE|3cFFSy*A>K3CI2q;p zZj%NvcRPWR)v?0&<#_Yk)3x07`)@y$e(>17oh0Ej)%vNpnWk>!gt5o!LCsUE%kz_( zH+PJm186hC6`uGQuGR7Nn7}%Jxg@H=I=`Dg*Bhm-YE`L_p}@2twH zyuV6{=!4cTvvzUC!N`5cVkr@FSYvZNw3;|<66=y8SFN!BLBTF8TZ95 z+CJRmcHqW+y3x+bFPx>$zdC4h)THuKuiOKCIs^SZ`ygS!3&Tmj*_}Q7&+Jj8+vA@h}a+Yec zZmwo#DPdPgy#BY>^*T}1;_m*9j@m)v%mu4w>-oj9@biP*CH|e)FPPOoa2S^75Fk)* z7&l&C(41FZ3iFaPMVLQYDBke8xHy`X+&ChYPS42LtMYEVe8;5T%vDe{U7MaiMOH@G z{hP-kP$ryQI8fKi%S%%-j7o%&@%n3jU7S|tmo~@mcj{OdfnP$?IXl|<-S33eC1~Cn z6!lH|BhHm}cl}WwtK!l4`r+WH^QMSqRJa@%?onvF%dUT$+PQyzo7GNCzb#F9g*$kq zG|J;(GoaC-elM0_Wx!Wv!P%ZO3YM*s>=ZiNhzS+`sR{u_L|47pp1=gMGD6c zsl9Im$ibZ!e%y?l^;~(dMu4!I>JB3jg25h^GmM-NsZogI65t5@s!cdi^JRYPGld8u z<=nUU-p%0YaKFC9qhnjo*j>(j;t6)_i8OnQe(}M9bJo*)TV}3<`ehiR@#2)+vgadd zO2CV<07)*2-6fxhtveyZd#*nm$Z-`C2mQ3L)}l)?+UcZqbuy=JXE2te11m&<85$-5 zb6jTnig8|@IKN4Ue*S*4yL zfrys)&d0Ez$6}JEM%m(ZV>9d6Y~~h@#Og0pwN@jkl1W-~eSc^406(0jmb3WByt`Gp zQL=7aXq{0$y7`=InwqEPr}c9AMC`f+O)eZZp(l*zZRexR)LxpE_YF3K$%t;-w*+FX zx{wJ9sJH~*O1hNtc{S_}dwNEloGntNS8N;~8<*Z}WLjIfwTpkj@L6Qv`H2qK)d)!uI z-i=Ct!*7U?65!yIy}SNHn-?vxcS%}i?Kbb>wICd^T0-=uZ#>cyRgbwfaTNBBdUE;c z{O)S?*wDg_a1Xa<*wf8q{DNK;5)1{qad5)GI`XE0b)>-sonCTYWa3!Rel$Xe;`F?u z*XV(z3Aeda_kDq}Ek)3i zsJTVD#ea8$yKx~(pWdepwwmBH!ZRDQhg;7mYP^a14ra1>Ot)Z}IQVwx_3m`lEG>*_ zDeM}p#e1UWy`?Z+Ccbk;7#lOl^(H16OkuG2-(TbdRAxIOsp%gb*EHV~V`-!Fdc==H z)u8r4j8tLX9V60%5o-?T1+aIy5JD*g#pB@D#S2m#%M~ zHyk$-Qzlc=`uqEve)HXvczwrpJE(OpmV`E_QS$QGVwgxZpzi6C5^&{qYqOdmuR4aP zy>`>~-$_d$OrIeN48pK6-Xi)LQ8RIxj0cw3V`YpsyPIcoa#+qO`K^2G9O{_rGtuA6 zWiDvv>tg+!sIYTNCkWEhd%itY6XVU!OU>8}vX-FU95(|ZQBi+?1^<>8sm|#4$)t_T zti!*jj^^pGpW9E4eY*L4rtZaRN^Si_VY~(Jxj%iY>U&`+{c1J zLcgcFe=FYqDdYq~rFXNyPW{mb1{s{dSbZ=0IzkKHFcRS=qZ@Rl4}~txiK|66wq_hG zVrcMMT~hlcPwFa6?CJ#MDflii^81fR^}T;z2uILa{SX-sv9SL>^ zl5oH;bhT8sy@l_YQIAEdo=i)-jcwiBg(H6Jkr;veY`QkFO5|I2cee=c)W{zGhf*HK zQoCGMveE03s&p0^VG#Tw#y$|_3ce83WhrD z1xCZ5z;Wg}2@EdvThtPDzni=U&&;IJrU%cc8zi5vYrAc=U`ocK2t1x}?vCZuNBDdU z#3l_nVQ202zHZbzRr9Q*1-+eQxhqf>6ZiFJ*PV#1;wZ*nm^*w#FK9o>yvC*@%lc%R zEtM}M_1L|B5JUO>z?bwD0^Zk{q<)v5q+Z>iE8R1xGk$Tpzqa+k;ix;&q^`dHm7m{} zscK)Y2@&k8obou%rz=ja^q4^d2{V7-#btc6EzxTezcU)wp4o}ZxYS?9Jd)@f-8zJ+ zQqk~I&p+5aEa>viryepPAXm&TT$9YAiDBi$07?W70ij{BR z1+pS!1s>V>b$*9&3G}oL;8D$&J+a+6R1!{7mg%k@`^h|9?{QhZ$6-f@QoFP|pW$eg z_Io9~!BNWTywk}ITY!rRHZR|~d%Pz3rTv}?Ylcy~1*?VdKH4`t(?Uh1<+lkMgiBqF z*tfvJJSPd|$mZfsjY2Ru+@N&-_RqH1Y;6Acu5-b0(`>JyyVLB=+t*?C&%7^BR&XV@ zY9HypqqjUKm@IECx>oX=+}E5Q4zF{;_ul)`{dmUvwBii&>#Q6_K{~0VbzM+c zBgQi4@r%*^A$9cYwBRg#J+0$e;VU0-P!v5_Cz;8=?61DIab$ZkR?2CpoGtMJPC8c?T>uGFVF$Xv@kpDvBbjp}WYlD>H zI*pIuMKcD>I4WQoA?~rixi6h9A5Ly|6NbrS_GIa;Sf$sO+|(eHNHXatibr$LjRJ8M z+ZF0~Zyb`Gn|tog+L4Wl1s-Zp0Jj>F&&S5006RQ9tjSrT zUiDr$4o(%4;wg&dG`aEPrcsgP#UfbBBO-1Cu{iuT+ON5(i>hf)ra#|BRVevoBs$&J zYo#^9RF_6lW2#>BJ>S#xH)p{|NO-2_~d3`Y7hQMcQ3tApDcZ`be%8qjGG)oYgLc zO>Csvv-#%h1v>eL6?v9zX5FZKZFC9Q%Wik(4@dL$_6*5I!#&G84L#%K$KD#92vY~V@B$=9yI_N>dKhkZ!@LFM$Jy<|Xf zoBtmp_-neKTwqCqrH5qJCr}i5k7BGZCT4!T9R41|2zMEvFMnavrniCaI64w3oBNRY)sg zJ6%)eD_!jb4)i~({YNuCV!+6isSc2wK(vB5l<8U{hv`zwKaxa2*z4`|`71^6pn*Px z(d3z=|EQn~>a#14+g2H1dtzYzMRlSX?SGnTn1V^5heiE+IIu2F-e`_%bKWm@iJ?Em zCnBdxs^^;Kz>ZB2HOu;k{h%=c?H7%NM}R_`L=g;3Ozrle!I6OH04BJ?Xs5nRg|xJX|Eh3jXQ}ACC7$Niwq!opCwbH{k!+ z%b=<<_g-;Uh2+kc)?mKd{5aoho;_UL?X0dAf&L1aCqN%TbFv7PDRE@e` zX%{jbsuXcr6oK8oO@-uDd3wbk5)(grU7Y%)qTJ4rrF=@~c+zae(Q$Ba@ch8BmleAO z>Mhj~%~3wz@r2(M&rJx{U-6=Etlg#c6K=l?nFTQ$s=9iA&~DI5ig$xXh3T?7GY%EDN z>+U;dwM*gnMrdat<$?5HbNF(Ak^AUC+c-gw-Q|oTNL=Gv)iwxw(xZkIwxdNlXUD-m zw6axVYJTSFL;~W#=zv26jU@=oPFZ=xxBHl5icgY4@`iK)-?O=)6#gzC^huq{vNK9l8w!p zJ}1j+O&uLKe;iPWdypu2)NFJ+b~T!}p?@~wX{Vzm-JafyiIf-tIiL@`6mKp}SCoVBa&=vzs3U@TgYpA!M>d0!>{@*8J@2<{()vl2tA%4o3DTWdT*nC&M-BJof zQ>%Me+J0cygrE)7*AnV+h3e{qvFkr|@|!^37w|(duDW3$EFxhiPlITfkx^LvbOQ&;q=V zLV6(v>OH)2_9*Z=V}L7M33+`Jj0C0>7|W68s|2}nC=aMw=&~9_4{ch)i2L#5ho`6K z1M&M9$~XVgR}DU(rKV1H|KZPkHW3^TJ`1pj*mOitmJWxZLl)m82~71u#_u;2Y<*rJ zHpeSE#@dBA1bW22^6TJf(EI8Z8p`=Aj1(AXg$F!L0AdhG1I-d@-qQxUN0$6aC-QX* z^Sf<2S!Tb2XHS&SI|kGk zJpB@9IY-moZOU35LoXt5W<{a_kyz7WbLfFYZ_)18xLJYi)k`kSUV}4|3}~ad<^T-d zNpn0ryv0nSiYv>c2TnT7RO2dl;m4_V+cO7#6F?PLeiNAqmu``M?*3qfEBSw8R4XW> zRmf6;2}P@@fc;0#P812VM!O#zil74)@fWKoQ0YVanQTH?XV!{CJ^R39(Zm1f~ zD^?vi4)=@o)AwJJ-&m2z{0vX|J6^-Wk`aLuQi&T7xL!Q>J6ZSJ{Kg632Muv&;^Iu zs`+@7t3*Vj6pW0)u)A zl2bnLKF1aoo+y;JVML4&n-WhHdYorV#BnYOrg=e)cb)=Qft0Np_!bQ)J3buv*RnLL z<~#-->&19_g~-%ZG+}(}-o#9$8ywEo&<;*Oc)vsX0QXIbA zlgRJZIhUe$dH%H{C3OTB7x$%Sth=pb+}ojg4=zWyAox!Olylf@n94_DMO z5e=x>fJG$)uptTzOY-_}dM@`RoIU*(cN3c;h>uzPZlb`Ww`}PB(#uLf)@N-kuNeN1 zH@VwX3U-A-tnjlkO_-6;wdS)h`d!xj70o)D8%_aWyUV(K) ze#At|^90&=ffzdaYaFFDkoT-X#Kdk#cAI)D4)1=v`z^guaJ z3ladg?js5fC%TcGDFh{t!tBOO&}j(|WU4lJVJcTq3DndaZC~Yso!qF^t{uXS4}V?d z(4XMIhGSEE#K2f1Dj-Ja@uS8j1LK7TWQw7V?psvzT+y79T^rH%8n!xi-7_TI3xtdzBu7~4rLcWwV+%D7-El=oI~*8 z(UI_rE~rN^S2^GtRps8UfgYofhj1}*ar$QZ^Yimk$sP@$zK6^jU-DrM{a#f4pdaWM z1ia&tK0h;f{~V8H$3Q}iukP_#FpxguxUWHZA>m-Yz0KfeGp~J_KwKtv8mUkjIM>{|h5{aM(ej1pdnn ztsl3&7^s7SPQ1s+O7i zUSc8jMGC;koy#PzfZl$}0qy?^2xkWSQ$HFwLqh^#evlt%&_WhvmRZ)h3I2eEjb;4lD|tJy}jj7_ul~vn+6M z)o_GW0{Vm9jSXFO9+2ig+QVv}ZK(B52nrWWescagBwDdf;L0Ik{yiu^uK0B9SNJ$y znd7DOj?zqDW|9InHK{jSgIPwPhs#!iu&Z$m-_n%W)UNE7#%5Yp@B7Q=yT#*W*0mxv z(VTiN?d7DrgS=jsUd-xANte_6>2e{u`vu28rl`La8&TtsIIf*%I%K`f__oHbms@B5 zwRBXn7~3A-?bm5IBDzb+3c-nUMxTEwbz%Q zMF(pgZcTryllui~33@){?9fgPH4f`R>K~pUp?RbT|_$s#Dy&o2$_jkKVqtJuWaRdon&`b+9q^EqkLQ zjO?AGcv_{ymSp;+ew|~9x?sv%HO91=ld_c!;ey6zlYJ+@mOXTLIhJ&7r&gUhBB<=9 z%AGbFzTckhcXEBs{UB?9gjMTkb7r{C>KE`^JxLN}mcehA*rMO_+x2DTs9qLc%-=Sh zs2bjtP%7>+4qfIp{MAjC?wE1+L|gU%QWe3jQlPBqn5Od zB1C7mV7KK+5@K~negu>eH(V?mk=Ls-?kGuep035|G$QN$?aK5Ox ze|pl@jzyl$kR3+Je|XqCSz-GPuU93ngFUWjq)<O&4ysDRx?-4mTvw|iAaEVg?$`yY5$+KfvJl;O20MHlN+^>Wx+ zM0@-Et~q|D)tH<&YQwHmR1R2I!F;UV&C{5Tcgzxxtn(dl3-rg!orV=|VfAwCshNix z3d)L~jvaCuHxE1rvaURcECd*1!TN*IuWf4^oyGmFDUSwnP$@Ec&&-V7-A`K;@pon$ zBP%D$4Tf7vUN@#3d`wVH5%ugiq_nY4ukEZ%K$W)I>3ZQ{P3}E;JiL&?C&=&iL!Zan zcjSGt#EZ{nKLXPeVmKd+O}9|?ZZopX>pALAYxKg^*-FN=PdY?$f)#-F+cvjV8 z=t0kKv&&*Tuj@J8#9ZF|C**?n^)1Z1WZZvQkEasxZSDk!4!rlVK45n4_ zfu67(r5XYLshGZ`%c_LnZS&^uXs>Tk#wHrDu4r0y#Ypi_5|#B9k9uz_{aWrhdpzA( zpI~FHOw9eDp6#qwGvh|MWA8!{gL|3%j#7F;aP3eIZid>WC*Q_{N52~`5)u=f_1v?p zE6rL0^-MekLHX85&(QNVNI#QxPQMo9Ldzao8xz(m)x=8k%W$(z$B0ddiFLW9CTyJy zJL-vgEgtufs&6Z;_6j*nB%M8A6}(-cRKYer6C=yt@<7qf!p0iB|M{&agqYJf&HTDR z*{t_j{d3a|`}Yae?7`Md+e$Xp$B(kU^j8)pyf>(Pm2y8ZLE!jnW&-Igucx-wKPk?G ztYf$4vVD3B25w3YXRCH3OQ$XSyciwJKYnG5cJ$3y+{5>af~SNZrU%*>u|GHh&^dNC z(MCC~_}J%{JRUD#^jG}BOp8NFv^ex6(=+cB*+hxqOA;Vc1j=RJhm!jX=8w_fREc+b z^G-Z_Ot%ZQ3RC)`uwXS2_((FZ`dBSJrzRSc+0m5kew8K+Z&P8 zVu1r()s!&$7JAy2PT%_@4L0urRcYoVe36*Ov~k>8Gn3uqFib-Dg7uB@s@P&>MPfuX z2X0Ur|041?vh!3xyd58>Ve>a5%*1QYs*@zSWyoiD`_2yCZnE&KZ`dU5X9q-a-99?3 zarNlBCFX=Pt0+PWRSMCnkN^-Lx3U=l+N199Y4%8olspI(=(}dO0uV{*_zL7$9#SEgkii9{`Y%p z4Gkhoad>V>(-i1-s*(#XjdUZU5876r-XVGb6?I?3It>qUbPLM2(MLhO+Uwe_@d> zS|l%g;f|QOTkpEygX9Oeh<9kTEZCL=SbT6yBU;Q;TbxDWI}Hd@B#*NXiY(&guLEJ) zmc%ww0_mfoXQW|DS6?xOi)JlY`aTKqu@BM>Bf$kQi}x z8J{#iBxWz|x55YC#s^9LY^*aHsjzBPH-v(GtOK(Nab)>7hH5Y@#q4l;3yvo1aq+!z zgK+$1kQ8t_3CpBV0vZ}YIGt5z=k-!8H0DJNR8bn7Q)J+i0UTJl9F4&1FwImB91Sjb zdRSD7-7gBu=m!G)14x=q8l31R^kyT6x&Y*0Pm`7aKaM<&4Lz(=CeU=MM?(Q2>zOu2 zV`}P;NKZ(!zSoE#=*2w1t}*ywZG(n0z=i`FloNH@+_sU#kvQ4Kmqph4g$V`5-aswH z_OBZDzw7j3@CJRIT#$mp28i3hZolZNqE>BbB-1+GXO~y^6 zBnql03^pii|NBGa9l3v`h+?QK@LzC(%P2Xt9gZh#eCBI&6-JUvA=)y8^Naxjz*N1P2dSSu~B9(qXx zJ2i%Z$7y|l<_hn0^YZn8!>70% zMYk!VoTz;S?{kukHHP>7pr)ONUXjRM(9Nx%I0$@%FDR#>{gL+3#v0UnBVMj~ zbJOnj_1@vJp{4QEPO-6ea}JtBh1E7p5%QjS&rtJy5X+KK3!q5^esf+CP8ZE$m~K`d zh@2z;h@o%wVH-c$x4x?HnSRVI=fDdYh6ej z*UY2K9q~?$W0&T~2o{)mx=0k3D`RpRmC;#+JPkTFX&|X33hxu^rhulrn0y!wyh&EY zCm<;|0gGG|(>DiO7CD;gN0VBLLo&n(=VQBv>7`MK=$W1CXc?uS{Bo=L&9c@_$@GLJ ziDTc&yKJ+ss6REYsZ*Z&h{Z2T97|!*&)`n)n8K2 zWOB`JBjtc4ya@{FGSMalV)Y|V5IMhPJmab*ZK{PF&84(_P})Bl9|Mse!;p(GQrOP`A6yj6)oY}mr2>8K0h^tO;m)G--Hjzf@MAx8zMV{lQt_y0dY*jGilAVeouh^UgTY|BkMp^7EuY3 z8=CwwxKWp7Lg3SJA^u#dUo$4 zkw27QEMz5YJzxbwZ!1;%$?|uv6)|wY!`9^ZyDy3?r@&Y+;l%8jn2-NFyJi68CRUU0 zm%Q}&o>(sw!8_IK*_VcTRPG$}ZufpTD2i_U5;@ZIl=R%v);gVPN$6P0?&bO%vlf5% zQjz7`FOk)cmYsI#Z^TyH?Y;~4!SZ_V+7$*}=ZKF$v3^k5Zcj<%n;k2o8po!^$Bjzj zpa^S&oJ$;!63b4H#SxE*Kxc6@S7{1^^IV2TMjl-eQGymiDv1zof)3rA zz(9%2zJkB0Ao!b#h8G=8j2s-;y@ia!RKbCr1it$G6HgAmI)F3;A#g!(RH)nb|Ip~E zC;;CgM4?g#%n6OJ7*5m7HT&e(JN{y=R)A3T6@%abF*9=p@Lrk`5O#k^<{Tdsv7a6G zp^t_>2BUX%(Y^HvKmcwiusDFvLg)$u;Rf*X&%e-#DC7sUzi6SK6v?;%jQF62a(R^g zH_s;~E@W|q{g~kZ_`Nx`PYgUVB8%eHKCeKIojj^-6B`4-B-53?WN;bf$HGF-dn_$x zNL(1;2y{@~{(_r+BL5FWB6D9JfZUz<0B-t%0oWby59a)M5RgJ}1Hd{QnCm?d60Ztj zx~dhurG`NeE{qT;1C3ZvAYAx=8i5pmG$(-QKIffF`#KVUx2N(T1_s#v+Y7W15I=wq zrvCj3Xlbeg4ARl@vD+`}mJvlT9st$nd&-{0_1UrsbOUTCl?OJJk@*Qd<1z# z%Zb*RfCNMYT{FGySOM!p#$0j$Cw2jvH?@l#B@|#jrY`@xZY1cT)&nl=$fSY_+2g#d zY7Rh@_|O4DRQb)Qd?r{m0@+mSYVxu8q7urE;Qx%mH+>PgaYf_qOxF2_y zx?`1}&IFttZBI^4&Ng_S9fdc7t2V{X&z+qCGvqv@WvVg`ft%yHScDKc2khdlhQ<|8 zS%!nNJ>C1ZddMz zP*BrhAuheVCAxQIifAv;sG|~q6jQcj+hZZ5G0ycu;|}Nssu;4r33(DzV+gFom<1CH z$^bWC+6N;jMENY1!vM_Z9(}-r?X7#VIbPO09w_kquz;1PK@0-J#Wn&A!Db_ItP-@|_-yixaLO})r#w$D-?*nlf7^C9rxY})EnN?tcnd&#uyK+P`ZS?!5CLS{ ze$YGl)PRkR`O(n`xLlNa3>dK#fWV!tOu8PMy!MNeQHee|IUy8d&7xrkm%pe8D9SB5 zuniu`SwX076Lfplf-D;B!@EuUfMTEii0LqG4xT9 zi8YBN2qKRea(Qu2Ag<_SHs`0Nrl!ryoE>c4 z)C<3z9P=U3YiCBB;ASo*+gsr1z>AolpRY;s zQ_NF}VXLXB1!y@_`r``=_T_>nGj$1T1&x=d%Knyu<$bLwtVTtfT`}zZ$;~NB5Hqcm z%et=kA7=x`mW(O=E>i(~gj^_bIJG2ImL69&9DY4X!jlZQ;ryqv>Srbwlg;-I4JVTh zjWMhyOv488bTWuKVrx|mk*}S$K2}P;l2%all9iF5HYjmHLbWt_=vTKwOo?F4^&U3A z2f+Q8M8<1N%oBi^BV^Z0rGRcp9ZkEY7}C_;XQvaRd+N5>S**~PD7XO%55eWPkUfK1 z*Ubh{e-emAjuW4hBxc*n@*^Xv>M?&$AXNCRCF0E%0Q|v^e1gZ|9?})*GoT4EM_`oz z8FaL*ZpB%z>IO>u49~xJ_+}323TyTGiWJ<-VQ}#mX4ZhYyL{q)TcRe$=C9+uqd)3jd(kP!8+|J=wGVJX9 zp@0?!VnQU8dS-YPT1)W#nZg2q-*VUOHUn~cUhdi@|L%HWw)Gc;uKYiAOiNc++X4ux z`uM;fjA_5;zyK4cfHwFeyQP_A56U|(6ujo}EI#>2s$@rgGf;!5J6-$6FfZ529(~DV)OI#$(I(_9~vImj+Xdyd^8F| zduFEjlNl;UdaZyd`C{SYxlU#vXqd;?ByN~j=uVUb_Lww$muvc;f;^(GA@?%h{IJ5w zDy5|s^(P%~ZE}=AITG`fnvl&vQ+8}*9(rFh@Ef35N=1p>b7rdYb&@)qnfyQ zpi{@kjSVge-^THoRGfdIiQT1f6Lf~x35qxi?Mrz6BK+i2YfPRc8$T>vj0g1&KJwR& zw(s4$=NK$i{K_61D7h3$G)oJ~*yLgA2vF-K1o$L-CmITcGVxaA2@VM0+QZ|+50^6V zfV0ZP2T>Lzh0(@1>rlGV1}0(Pv{Y)ND^0`4_f&uJ6YaIvmTdh4QZl$y`1tQwlO&fQ zW_wAOexw5(^2UVPbfKWeL>;h|qk6m;Ic_i$?+d{$9`~Nr{J0{efZG7#%o)tsEt3l+ z(xkcm0uKxbx#bsm3Wax(!SK0BaT=DU(H5QHE-mQ7aSX^xGv9_-^Af70W_ovn97G!R z#w{uiLK%INow^IPTH*;sUxGzNNC*S&5v4&$v4f)uONvNFXazo|8>m>}lSsT2LFe)Q z@-mmCJC^$y%K{D=8C!rx6ekoUACGGAE(io*6GwJ~}B2*wjsM6Fu3A~FmThl~E zpsRCpSKgA5GWtglwt!oD`LfPl@S{0Hst9a%tAWuPgXx?w`!?ZUi4b|caq)+t(vJ+O zrZ7>}0Cd8l2aQZ28f~@c_c;hyb!ERWrmr*5NWy^0Xt22}3mpFLRKH`p50yv$j=RnX z=3N8oo?`FY1-idMGQlbnQLo*&YZmp$N>RX)3uw8AJ!qZv4dnYv#&Y=4a-fQWicyj7 zd{bD;;l)T*1ULG?MC0?FBMCrX2po>a7i3~$;(Hpe+TIrM9v!~q6=Ibmk>f47L?mT{EvZ*s4uiqc z{nBHMW9<*JJ&yj`El6Ep5nuB}bHYf`1oBpl#KgyG#Yc@(iZaVo`QP^04S6hZ&2-z{ zx9Y&(o_-J5=}q&+Jn^u6rso?Xt6HHOnwvVNVk_t}L^p@+%WKt>s=j4E+P&L?PQ`8I zi+;3yY>j4pb!h2?3g}36Zs_o`qZ-wFowhnL4!sM_${t_m-Ji>jLKFBs`~DV&!F3F) z*{R~1zKO@TNDRZtM$W4Bf}TyZq_4&2ErO>uZ@*leo7-~<41E2SQ?8%STK(;x0s$J- z+hFk+vnM{GKX7LKEIi}ZQq8ixy{(n49wXh5e7*(l?RtO<>y@5Hte z^qtN>?hVmup$U#)uE1Q+w@k1RQX6D>(lYA_9J8Gcbkl z7I1&~;HS*t&AQ8hkB8(Gu@P_ye62M0$Oi3b8;eX}H+RWG`iV-&RqerDu<6mv_*k#` zf~#Hx?7H@6hg%Q(K78p1x3w|Y*BAx972n^9D>*@RRj`oR04XfEUlmR#myD8fDkEKX$3}E_PL6~hHW21ioazj3nl+(+&=BXU|Yn)nc7mJa0-)lF? zZfY{2ZqR_jLUQ$aLxmcBbUYT>|DEDoY1f+ypd6FN6`U)duC!#FFlc0{kUA}cbX5~? zI9uJ7WBz-+k*ojRJEg6IPCweg?X2|Y@5eMP(HTqy;YIiB`2H$~FU*qVK%fzb`eG1s zwc3Y%Ef+~2s7QzM1O`*+yIpv(x|ry}7Z=bDeSW?Xi(~2(P9^@?q|xj7`iN8~P=996 zrNre?NDO-*CMjJ2ICR<3IfBDe=lmNGPR6uZVgumlaY!3|mZTRyAGZYz&9!rW_8Ls0Vn+4+(G{d#vDCHKwmCC@!KTvoqJp@Zv>W3ni>h(S>HKAwO5 zch67_VU?UiZL(vn6og47$`c)3D?0H;7a-{HBt_Ng)<@i=P z%A7#w+K_>;@74(>Yw6fNz>03h*Be1?R}7 z%vyC$>U-9DdU~`;B2MOuId4F588|-|Qr}e0F$YJNqJ^%9gS}9(Os`z*8pyu@28YEt zTwq@5n~ zKOaIX4T{&r4WZ)o6IzXimoJ%w? z;qGo|GDvVQ-8oz}FT8N3UEU`D8ws2|-1xkFaARvGm7yt9#uz<2WXW6J3Ai#AI-EIlH<- zBnqI4vWM-Mm&!DZf?F9EmpicX3tr zn*$SDrDHj8rpsYmzYDWv27tvDXVc52*O3)TQw0~*y9zIR<0|Mh13jayGgQk;M#pUe z=HYf-YAH{$2hBu+oKpJQ<+BGDipS(FQa&)n3VrEPsx0XU{`7@!VS{gh+@4b|+AvuO z6eF6A-YF`5M$9d0C&=HBVD84TxBBH-vDd&*v6-w}wcW%;btCpIO8&u2H>bjFTF%1e zyo{GC%AgKfz5^c^DTh4Rp=}!c^mxa&dJp!&K>4rlu#)tdG$bNtfk`j%^j=O?3e@U%BN?v zQ?=cy?6)^w-AI0t(e~7QSFHHBqf4U7ttyyR_FF} z5f+HgMS5S?(CAEEa(g~ieRxCgVSh?no6c~@O+D2&`vTw6T~g}LVqZz>g(iGkN$tG zy=7Qc;rA^n2uMq(bST{*Eg=mGNJ~q1$)=^4#-ZFzN2*zuj1g4YnIX?4!&L^QL~jJiH2atDuafS1VNN;}8^t zH(uDi<_J>DPU6}2j*_~dUk<2~%FAoh!J=tNvgL5KIWR2osGLSWb!Z>W{~9h4+mAc7 zG4JmkGc$5Cpq4wNg@{IG5!x1jvxR*8l&GcX7BhfeOD^mJmEXBY{0;*9g+G`Us+pN4 z-Sn?lH47&*$~tHV6FtvoQmu~^bkSdZ61|%3F8Av@`Qr9FsiwU02WJw)6{&s+xhg91 zXI(%4W`5SnOs&L|OtFyMy!pK!r-M2~JNgNM1T^me+fMO{bLpmZ z&dWQMmz+s~2`ba3Aq04T9`E|7?#F8N(o-(+oHXbNWa?K&k8WEKZSbclkks;kmNhgV5etPug=giJ^e@v)#L6 zReNWc56t_QaI+xbSBa7nwu4BP6F02q zRZ!vgoCDO`%PR%SGAmns#OK2F!u3;3O0L3Y78Nl1;BwzPwp^}^A~DwQM68Dif5UZe zHPgP}e54nF+@^OQC?*@nT%__lTk*ja-RvdnpO|i!w~j~aISW^0dVR`s_FMh+dAFE~Wwv8|DMSScSQBgjT{E7xjBNftxq@TGawSs-x~MyZ7p4^EHYs=B8t znvtcA?z-BShnMr3#13LN4ZbT-lwB}JC#h2kYOwby)zs>u5^MB}oT*-w2Hbo(po&%+ zN%lfnlqkEqn^pWgEX3s=@<%yJfmZM=4$IK&&vI%w*oxjg!lk)v`QC?$qfJS#UPo|s6|)2#APLJopP%aX%TvhysgGmU zM{Ot}MHzNj0FY4n{fG78cUcbbJF(R*Bev_B}W zCMO8n697;qp2z@IkK`Xf^u7uBhrL83Xy9ZqIPJXRRyTo?T5CD~cW?)P9Eo{Yfi=#S zgm$?#B9Xy;#wzSr;1Ds3GmxECCP+B!Of$h5xFZnn_82h;4)9&`*dVVEBrc||o!*1P zp`-O$I85@2RJiVNb&Pt!VAl3&CzDElWBYP%u3+OyXE5PWROVhi3Ea<4M15!#M=3}z zbd2`TV)(Z@#4{G@G<*uI6+tHqJX&lHHNk)ooW5c57>=-LY+b~g!J#egVrGmgt9M@? zPU)mp-`eBC)O8h&l4c_SmCPL~0TdHw_{{5`h_fsHJ49w?zie{9sTIlWejS0|fc4v@ zX&CK|kzy8(x|j>aui(U-F^i>FK`nD&O>n>839*nj4m_p1vgngp7^tz5e`?ksrxU3)#ltjpn93qJ%SCT)kg-0V!oyhN4@j)n50 z*eCd3jlX{J@*1>Vy_G+E!%H}Fz@pPy;cD{vif4tcP?{kv%`$#%M6;h%;*;6V4-(jW zO%*%3(lI&Uo0V4k$NL(^gEIH)5za~XzRfC*C44)!<1P*|9yFlbIoIS3Ey%17dqqNG z=d|j!KdcRsoie<3^g`R$7e~#@%vzOix83Z#7v-A*#mtBr(Z&{R_BfXso8TA__YK44 z=@LvJ5godxJF9xRpJVrCr(=yF5dBwMNW^l?HxRYTk>2z3#Hs{rZbtbo;zP@7;U_$& z+uH534?Hx89W?ASCJ0Y>S=;pTBh)1_Jk8o+L3XmkM4WO!9iC zd4;iXEJeuk;_fDNd7o@~zO}uF%)?auIwmb{!k+FdF0!E0Y4bNjCslyWa(yCeTJW>M zwDI3vQ~F@t*Hqc=vX9u)B-dH)uBpE|e1(r_T+NyqTRbzU(^M_Ckh`^{jTSKNN%kg% z-K26|BE7Up4{&t~07UKDLQP&5S zXtG3+_>YgtT}?vl-W0-G+7fn@Wny&22|)m!U{4~P8DWH}bb z%o>lMlLKO98yEI}2&SM$oUx@fMnj%7e|^>XbRe1DtlTv{%{{bQ#J6Ba+L3%cjb5ns z7~fjTs9w3)YPaa#~NFQGH+DcggJcy@B5-kMeTcDVCOAaUn#bOeH0*|1b>i0xrS^eZeIJ(a3z(xpymDPk_Asa;Y#!6VqW8F1k_hvSALq* zJ0=5g&nw5Rne&d9jeR)U@t?XA! z{CBqEZI;tchORpDlh}3w@fYeGs%m~8u?X~yaNfFezb9;b2AviwCyTO#+-$Uqzg$kc zhm1TA$S!xae%IV)@%dU@uv;fh)NummoMqg*nNxGFIs2n&? z`Cs!htq_#_?F!zY4fvNF-y7EE(=vk5cmt*d1E?93Er@;VIQucD&|0AxyOai%Q~+}H z(jh81u(7Aj`?>J((@pIktL?$6cV?Z}c24ad{~iSh*KY9g=;(#4;m2{CaqVhqX+cQ3 z-Z;#MB`RFbe?8P?73k%0g(#$ac0IKD~t{Xbx@!%v$Mv-Le&2R2KCrYKv7-I1bD+&Zg?+SKMXde)}A zg`t|5!xDbT*y2@aJhzQzo~98?;5kll8q?eG!exXW-`N7E+eQBglaa%)Mgi;5JeotR zHFlwBY@#T9=4Q@jPG2r?l@M%787CA~Xo`cxq|ytJ2gIcg=F zW&&pjIm+XX5akACIyEgVrs4kSyQUa)c#M<3f00oU&reRs(f&o`OdukMreZD!;c`7V ztN<&fR;tsU2yCy`A}PfBRo~nV7O3CDMJO?A4!ML?YjT@?Y%Nr)sFoa8ePl09x?Ui< ze&@Dxfw7bI{$QMRXEPgBthuYF?*ZZE@BJ!?h*#Rv6;kO6->PPglZ2o0tV!nQ-s-=e z6|-^`($3L%g#@fMp2H-lm*#vvxjOQl#4VaZzy0$Rf>%)|@6UqV=?h<{1bMNPil(S_ zve83lsPAEPFzkb?Jo+KFKh}Svy|ZV7+j{wPPn~AbG|Z)$bT1`zs3v@S`qr|0(%w4I zthH>X89$y`X)VgS+Fp-Fp)ue$nv>4uK3SMPc@YFPyjK z*=J@ZMkz_X=)!6?&sq25cjzu@^UOwU`t-4n@c!REHZ2RpZgry4i(7G}#+nT9a5z|tHUq$@8p;_K-Gl}@28=I`5% z-U7GLs_vFwioh*7g5d2tJTV`us`Uy-twE>eO@?;vO3QD$)sKNZT&u7 zUvnsxeOw|mard=dg*(qw_tD92)hxYcO)ZPN`0lzW_x`K|$djtE&1IS}7li1ST4L0b-%zM_5oRJX@z)Ves2|*&{E* zOyk5|VEn7|SD)-X)v1BosX`^Wt5H%Gsz;MZmHFQde%q?Yr6X>5bxcW*Z}CUGfz2cD zx4rGUmGII}{bM?TJ!^a~cYdThp(M3Xy57}#@Yfz%Tyxzis>WnnImZ3~zJWvj>H_mE%y&Rm-+Flm+GEvu1wwkj%dv*lj%ROjaxN ztIEL5q9UpP)nK9JIIzNVauzuilC>$F4xdU z5%8I|;m=d5t8PYG6G$5GM{0^c(lZ`#ln6?g1XxiM>dcfUoHX-@F7I7*Q~8CCCC4zs z4`BVr76;?XRFJ{xGYPW-4cnZSQnpL8*aFrr9r|vP{l}%s&H6^1R8O%wh+qEyc3btm-L^l-(Q^; z94XHa4Gpgf`yP{U8xBo0HxjO~n}K`4#sua;*xp~*p^q8)99lK)wmh6Bl-y~Dz>x6F zlBs^(%eNhCx6deKuFh(Dc47)?857M~^vNzDb9KE>-Dz!EkeOMbn@L8@v#jTje!UNpT$u3BTe1Z zN^x)(|6{V?U>@C*s=fV5gvkutuBLg?7BvhFHb}q!u!#5tuK&gxTz?rPQ)%AFl1RTJ z{Eryy>)~Ea|HpVukORs80dgQJs{ANGP^l*ZvLx0bvx72BP$dEr*O2`WU?Kq|9{=lV z(6l{pm&gW7t+}y-K5XxY7d@bRm5getMxjFumG$8H4HZv{f;7vN(gfpcq&JD+zUdn- z^ZCK}V?qBP)aHRc40@^Jj(q0-Uv2>i=whdaMWF(^NN`~-4DKo=sEv)9tbA1pb;cK{ z@b$ABaKL~%WPoRbx>w*r)gQR3`k&&V=``6FDjJ7{jE=t~l8b9RGn~Ej%4Q0FcsC)Z z{v7++CrjmEmW^t5>|5MRQtzP9X@@gFEOB7RnHf4?{!SDRT^gCr7);zh_ohbpJls--VT_ z55GI5r^0#{Dxmg;RBM5unCUFQd22nE!({mS_Li^lEpZfa)j!uPDsXut@q|H{ak!IN z?;h7gfjQY!KS+&Ze!}U<>lBI=2_LEtxe(o38wq202en4}^3F@~h0$nlF>sK2*MfEO z_cTN{Xn!uxq(!s6ql(=2=)B?2A5)64dbDDcs)Oy0aJh1ce>dr1FA9Vr(go7^V$THN39ufn zKnDdREi5G^C2DiBp?~A+@ADnc?O*GYgt)dG*?0p3LvG7iy?KKKH)cf3Ym3gA&=x$7 z^17<0z5TKb94?%n5_(dTk(@?ERQggS+NLkT2=Kx1Y3^~p0rNi6>lEqw`kMTk^zm|Y zcsiE@Cl-wUz4>+dn&>y6Jqc(NVWJx2lIBJ{ZZSHG^HhH1l0F&)a%V5NFcGisacpnc zfwBe&v{Jkb6(z-kt5;`_64jOjh6fY5OdRgF=}A>+<N#z0xB35Ep$Lhh{0#hhIorv`g;!MntoKK->Y~JdhoLZV7kE_`ER(2YY+uGOXnmL$`|*yL#Y0G9_FhQ4h}opT0dcCnahOahB$M9u6L7RMv|a zZ~u2^G&MOJ>|(jP0h*`-WB9XfQbihZjq>Ucjn5t!%O1es7MoTsekx5{^1@N&vA zU{m-^DbV`N!Ou#?fuEJFH8V}P!P0{`pIPy10+$<#capL~Rzpc(58ZqT6W}~TcEmW9 zg~Zy__@0M_K@t{E)XSop@h61_6h8G~VRSUcv*_wn;*GOnvGT(r9)H^&B-GW_HNiKj z;z&aPFR<6|nPl~s@zG-7Bk`^UQx9_GE;i!B1bt>L>xD{VYCT@KKc*TAT9v>M0(P9b z?c`|O5?|A38jfIjsW>^JJK9SQ(RMFGjwopgc=`15z@i?f+5FKc--6!tfR)yC7#w3D z_zlK9CJk(GQ)d5r0pxKGWvfu@eTTzX7G!5+x6{hvCHjyIT`{6yZv0=aNWwH=COZvv zJxv0)qMdXoS~Qph?mF4&YzzX8=MziE_}ummfB?3On^gE)Q(#^o1aVD&uM?yPi!72r zS|>*f-wExXUo5L1^c{-^n*oJ2xSF6=eFCUfAlF6gbrjzR-$zGBhmK>7EAC?eI!U3( z)8*lURAf?8NEIC&U8HO*eNI$BAoQ5lLj*5{#}8{F@?9+=Ym-~8EgP6b?^-a*mFQz{ zl*)0CQw8+@=8v)T)NuD$p@$<*l_OhN@i(okon&@`bONWsX(ZfV|Nrh&e2o?@vz?-7 z6rd)zouXX~;bkJ9VIjqCA}txxtomHWIj51P(0p4K8b!Io-7p$4x8tAD_Nrfb;2XfA zvJFh9Gb2Qr&WT>fy^Lq3pgjU*!3Uw6O~BzHPq7>nMaX1-gt)jgD=9*E25(F7#;z=3 z4ehadp#y0$6htRQeh25rtDC2gIv-X4Tus>H_B?*&o8pK>!kN=XcX;c}6A32i+R@Q4 zukM~Z=a)nU5}Y?sGo-V?)YHIDDrw1tsDa8F^`1I>8Co1UQKxIH@*uNzb zi4C1H*yc?EaFvu8>io8S<1{6jHCR}WwU>2F$#wau|YNWZ=wB5whqJw+WP+us$o ze-CbR-*lyh97>0p4GyYT)1mC&6Vg?Z|XY z#KDZfeqTz+VH~tNoX-EWy5{TU^-M8a`Pz7lhe@mEV5ThHmBlZ9>Zf${ZTm%Z1nquP zhzywQHyB_M+*ET}B$>dK^Pv=}5ObPFn$T2|V@1A%8NOjx)f$)?<8!}eCGQyPXcY@U zwWHgGrJ&{B23;RuKVQK4NbIy)E!3i+BBr$JeFFs`@Vc@mm6xCzq{xJ&^mrU8Tm~x= z7lxwA4Q#(gDi*47aB?zjLrZ8B#VOh;@|wsTuVEahPtM0C{OaMX1|nXQ;HTTyjD!$T z4c}m$#gvfiN}}wSEq-1~3E|H?BzZ+5^mOl#i5*5NsEL943imzw)BV}YID01b`e!ZE zsanpgaWoJO0Pb=m9MR@3IaE{NHs!q5mbMmeiyMOx2NU?7Q&7&7YFK35Co&-=uy{uZ zKfbM0+>8*u&)4y%dJ&<=D<>}A6`IE~c`;vEvF&cQ!zdQ+PErEgdjkCYg5;Mg3?)(J z)=q$x^h%X7Za9b{o4>VuJ8AFYOhh`V;kI6UbFE3EEi*pQkBV~vO(G?Tz9PhHuw52) zQBAL*g;BJ4x6b$fYvyrSFUej}qo-&b;UyPy0h;4zl<{&oVEXt5E8m{MUL?|(MqNv=pm z1O2=XA{o*o5*YFKV)yj-a&D?uoctSRtVo+MFxQ%A+e7<{?`yOyJiql6UouLetN~`-u1ID z8m_tab#4@C;GRz0s)l|u-nxF<#2f%%K|4i%;W)vpaTGuk-BpS6A z)PX1tr%aRUd_J&9wNS^~=*r>hNc+W$7mFtqG=inSvk}mDjg0WKIVY%^+z#h!&~6gr zav!QgfR)-a4ure&U8i?AxDYM4m;d}loBT= zFK;7{WKIEe9y0hH@>ruoT^|ODnwNSv6X)+X8+QyhlPuw_Yr^YayoM zdY;5%)?zmyq0Z$p{>5q`4b+(kGV|4cJDIl9!sSifTP|}JK4`#ou{o$EByIV<2@#&iSIWd>*aSU15kXGYykS5q4P44hfXf1GVQM z;0+GMVW)Wp;*<2`8EZ|3;;AR0-Miqg7x;YI$1 z-TjZP7&B%V8#T?uCX4s3a&C6Tx4W&ndoJIcXfHUVS3EIitbYitDRsji(@i}YkEE7Z+0B${ z^H|RkkdTyk$OLw4j^$brAKrKei4out=28i&#L2FLnlxov1+z0_;xLEY&mS-uLYQF5 zgdvQiN|D4M2AG*x?&8cq!x6qyW1% zwj`h=6b6}d~ql*%ZcQH3 zGJ*^-=#{7I>+9>-%94yvUWAl5B9b@pKmIg)$wUFOi*dc3Y=bc*YFNqVUINNt^im6( zeVc0wApG~2{5GMtwa2xGGj%Hmnq}bBd?e6By6q$E zQlqn^`V-hd4JRSsyG*&W6^5_fM- z-x~%`btpmTIS1zC#a!%KD^fhfm7QDr>zXv5U`M0PY&i@C8-dfu@k8;AzrN2zA59?| zKDZ#vV&{-dIE$UrP>kT!4YKx%$j{732l(p29TEqJN>lE*JD%jteBeX0FXveFs@Q0Q0@v7$So?@?T zOE^9y@W6x_Q9RNc6eK!xnF<0R^d5UPC=Ln?;>q9?Uhz|ODL&*cNP@D-eml9n`|?zy zB)@01^xsK|holb^+tCmaF%(FIzrA8%#Nb63s7dCsnqCjbOVl>VASI?WoWZ9?M@8j4 z!J_5BhqXSkzUIKUFoPC?2_V^u%cdO;YbJ-Z%_*C7c65~cv~JvywBGJ99NNa@(kI-= zBzRf4%v7)$It?epmruEv0QEkXU&oIhKgjheE@F$+K-_V*sAcqN7((u3(GR za1YheHQ=q85=Yo2w6NJr3ZQ}@A6TT#IH>lyv$GQuQa+?*)$(wu<*LyDZ%#ME(bV`D zcYf;9AVj=hP~LEsP+^FG_G)u$Bj>3wJ22w0{5+6Y%?7cL^$=r2jVL}SiWM}E;8JLY z+=|w`cz#7Qy=ov7hziUXt=`K@ziR==r?2i%!(juf{@a$TxH#N$7mv-f`|l83?o00O zq4b7C77%;gC32Vy)*;i@ylO=M)lj(YbqK0Kh)wj<<0=e$_E_xJO|Xf%%sK5hrPO%T z4nFf_tUSZV$7lP|2sv!`E%eaCuoQDEnfbdlFc8D!@ObaS#s+^z={0*~hh#^j1)HFT zYsv&3{x|k25mTs*fhsEtqMnILm{F>rGoHVEnt+CTq1){rI?-n5y>U90Fg-#g8W7xS z)S}{*mpqzXfNlPi$8*`eUdNTWy;nt3a~^o>msl0gJT+C36ydDA7zMdgmK1qD4E?v3BDjr(J=UQ|UsU2nWpc_WH0U}4LaE(WkvdTmw?HL&z{tlnI> zEN-Yk1m2{=GiAEEEtU;sWyhcrDwu!(1&_In?62f%I|$JUipaMObD^u}zj!H8wSCe! zw4EMoJ%-l-Y7QnNBy9HLmU0Je6wjxay4H*8R$R0u$&~~iyTlYBpy7;oqR$-WJBV|A zfBk+^>J}%oa9s$lI7TMN-FzVF$2%PiDYXN+#RkxD#gFW~Y|Ca5i`bv1gNo{F1X4I6 zm-X=AkRu$%sy>K;5=eU5d=o_^q@2U#=jP`TrqKEs!vh|Z->*0xk9l;RU~aq)pLrXb zECyHExn(9J(btRiUgw{AGIa*!JIT-oz*+@^oyozRY2KV2m=&EE=Dp-cd&O$o%}5nN zFfqoikxj^J=;s_kgy-K}qSab5Pm)FkrBouxVI)Xm^s+~RcLGc(6z_Cf4RZ;Xc*pw- z48+hmJRNy-btiDU%=c}u)D5y*YYc`%X&LaxUe4>g>i4lzQRrn@gP_RRudiG@;md4| zg_>nGE*B><3VA3li?NA~^VzmM=m*fru1AkT!Afn8XbiD`J0O}AJp}|3G;MkWc=P4O zcwqK^0Whl(Gr6OEY;D=sO*8-yA3+fj&JGSm+uB1x?^ziD54N?AdmtaEM=;DhY^={3 z4g%Up3P8K{m5*us1L@^(ut6UP=BR(@#L-k6UG95%tWi-?fnYlBLDmJ(?0^!%1jLjJ zT+(mwxi_w@l%x^EfWllV#uBM~om$Tk7G(U!46~b?n!?gJ{?j$V5OZ}>#4zfM5EQ&; zB)2FmTlldzY*yS_cklmxmW|3Ie>i+as&BI({*t$ zp`12lQX<`k^S_C^F`UGIt`}IW$l#tN+I!`ogG;UqJ6ryioA2wyt$bqcDzK;qs2l2i z+!5y&6BA>p)3f>CpZR0DMPd;LvK;{9#4~G$1c%R+>-!26jc7)D(Lo23E}tFhsy%+}nDey9;gm@L@z?OfQQwf$J5Q2HrJ93ltXKnp%KNc5tD zM*%*>7XR0aP-%8=^({i*!Mp!O?rlP8jvXQj4(L4R1+R%#au61L<%p{H(a;|9t0wBLAEQ z7HrsuRmk`$AW8lV@^CZ{cYWH%|1TMEHt;M?7B)`u+1irb^Lw5x-GL$^3uy9;4NB+9 zDOXbB$!36JCV+i{7qGa@XS20l{t+g=N(@K~RH6dEs$_vVZwLcr`Cy5nRKOPU`kmy! zdU66TryDH6OA!)4xMb)=3r{~*Y$nsdAD~o&K8)HQG<=|Oj!(E~^k6C&++;VD@o!il zcLk=07(>J-;JJD_SgG9`TA#?$jyG^=|t zI_duhbQ1lbHPK}dvo}t%#LNJ`37_TUU~k@Kh;AXD)MN#`*Z){#K2=!0dVRW~Rcn1( zxXoobTkg&p^xiny3s92D_-wpLQvPkgB;xk`Fgk||9Au5o$7x3Fhj3TNE6%pI>bFZu zDafT{VT~z8MMYoP(T0W(`FSj7M>EA1-{b zq^>M9YOj+cNH*cCSBv!;hR<2_EAJ`w42^uVCSuHPjJ)6me#4F-pb@u~8_OrZ7Z*3x zx&}pMmP@rjSR5WDPgyfUl-2O{Mzi7XR;0abM_lA6h8S}qn-hYLaLJ94{C*v;$2^9f zT(k|`e=#GDFcdRV{RcA=l@XlKC=ob`d_Zqeo!?fBmW6#a)=%?JM*-GFT<`j1&1%{D zISa~G`v6n{)?6$gn!a!FpHnz9JJup@kemz=aX?(LytVadbWMzl5#snX7DmW1-!w(r zql3pT;C@{mSptxz#A70f0b>#9bqtI<5@|Bh;HX8(Sy?~YR=Zvv?0#C5?kse1*cJh) zqgNZQ8oeAZbQ#bVAPH`;5*MisHs}suHTU|pC0i{YjJEhf?9cya5_3Tbrt~VVez`nbu31U4%9_5}s;8-Fo-`kc^6Whme9KghP>j%tz&bN~ zwnl*@75~oBu{ylQTu1o{$oqKmS+vE>=Jw$}4>W!cCmdKSo+&u#O+nOKXBb?nGKrvk z;HqLtX~w`N<}kPp>Q`ZC{YshqqA&#)IqW0hHCyvalZ@ z7v$Z(ek3bMPO!N95kY&kJCO$fjNsk*pp&uE;IrfgjamDZ*+1%ZKEqM+wtpQQmn`t% z;oyoN&A8GKS#;|E==5m|1VRLuX|!1@G$uFYW?MY?nL$gZxV3oD9XHTFprO8=;9GEv zGO{iugFhC(GFj38cmc@nlza-Xe`umkk}W7#($okK4jv8%f`1ik=9!>nm(A5EP@*`J zAk>)?CjH9}OuB{w+OjEWa2_Zye$pg@(Kn0!J!vkvjviVN`ikBjP=$U>NFpL4c%L;; zE!33`8FTp%(+y+37AR&1M1D|G`tiqfjRdc$EN6xJcidtF-4=lG3GP9!R)-1GJ~W~q zo|r&fck{%{IvJ@1;`Enc-AhoNWp7or>ivU4u(5~=KW_GqRWj)j6JXRowLQ+zuH^oM ze7luk#JJ$Qz>1dB4<0=GnN4$sPM*ED;cd8BZ^vF(UR@Ld6B*QtXtrR=xxa@n6Dx+( zXg9VI77#{A=-XCLkX;i*kPTZTq}eiuHLdTQ0VsOvd{yAZlxlBgIUIMO^#yX(M)b|e zqZ#>4`U3q#No?7Ni9s48oir<0f&nG>W`or5ZudFMo$O!FvTJOszf+UATwW(O`wDU* zCIqXswU*s2m1d-Vje~?zC}6k6%!|H9ra&{^1ISk8^INY5EFSAcujRS9xe;BC5V>Kn zxZ^&8#qC&aj}#f7jO`t;hBnmqIkXnHl@d?8b%y?Gb@}-qfrwNX;QN}tc2uGl7Z*!N za!7=JBEnT33o}JFDC5Wb+BGR+cQo;XM)qW~-eckG)V1u9P{$7sIg!C>IC{Cz4h#%f z3e9mLP452c7HvMhTtT{+$H6!^)Xh@et-t1SM2l`G7+-5EIV8k_vTJ4$s*Lr9dR~&g zBw&4ZM^g<_0JW(>o4lTn_rR-mO?Cztlne@d;?>+mwnRj?VLE)Dbdqy-Shr$4oD!@X z2U?t&@oVzlX_^s>$uk4i;Xm6J*m5;6-u&6&l2Exys(*r-Tpo5hRaug5C&!bj_3qmb;$@qS`pz~ZKVOEgKIn5TWf z#~n}z1~efeZISGMy&L@@vhz2a2NpT0FS#11y>8m>L3jiWxgw!QDh6}aFazu?3gUZx zwy9RSr|#}<14j$sr*n(aV{r!}{NSh8I^MC&gk+}GESLt_NH-QK&2D$h2!32K9TXtx z(j}V1i4>GPrk{bNH5)<5Z#>#2q9df^TFj(Xdr)IB(*?KD6VcL4|dAX;Z@nb|U9LzM-4`6dPDw)SxLPA0z zm79x;>tcYV6$cx!1EiGUL{hY9Xr&s`d|)_J(Fw-Y)gjIm(>RQ=Ugd;K0EBz0Up_yw z*Ekx|F8oB7A=Mlo;`v%31+hnN6cL716?CY^!;71HbmD!~)S$n2FUFrS-J){G1Ns

-LX;;_8{bTPdT~Y4MXdYcMPL_6a3XF0rufID41W~7%Y&&i5L0Q@ zTGD+)lwIF?#JqwvpUqg~w_0Jtcp;HPNl6J}l%#|N^?8+x)i>X^fu7Ig=+&vMW%W_w zr*)~M1SJecAYhRoaWR#K>*8NX;dLlJtW5hz8>ZVnCVH!wEwP5B?`9olTB6;ES7ly9 zg$Ap}+zPU3pDCUCe&x^@ycq&f1vv}{jn~vYqDce|gR!~iazeO~CV{J{#=LT`yUDJjKgjIq?@T(Fs$nb)@m^9>A5iuDw= z`bJbg)*5{kS6&KTKdW3d)o%UvP&Fb%4BBlC%h})5j3>U1ZJ+>XZ*g>^1`IcKBouVb z^dTyLE)RR5UsE1y((HQS_u9A^v+Q$224NM*N^JvHa~1j7`8|8&jK_pR zmT$Xr#dF>){<4EQ!x0#ofdo)(E@qt#Uz%eRNL{S|d@IhXTVsxE%wcfqYHUH#C9p;Q z#iFP1&iVNHaf8DStIa45f3Ll$lPHQ&lQw0{_|T-686Vy^0%6htMf5QGNkT~ zr49;f-vsG5E@x(|vakVYPuTxD*Zl{T-&t&q;{qG2zZi0Xo|mz0Oy7sLtF9H!;; zA$4D=Q3Wau`-gz@Pr3e4ANK1g(x(nKF@v{KGiBQNkCTQI6oXV>FCFQ2DSyfBA?Ojh zK0Q7tTj!D!Eyp{wcKHsBwBDoIAnT|O))R!w5^-AwSwQ*Ni&t+`T-QQ4*>ZER2j%PP_G^ohrIH2R$3?EN3Up5{aBLGOB=PVPUSK` zh{@zq%Wna;CP0UrEJmymLGYmwW1~`z=C~S4YsXux;?54=5K>%<7)EWZsL*6iD_D)s zEa)27=Z6dUI@kh5pqq7P_cDUa%MTGC5&NCY(a#qSKVLoA;3>!B##8){&!9$2PKd{4>3D7$E(^@oUZ4fEmb0LBzBF$eb3pt7JDAxSsMBIg zI0nB~St!i1*D&3t4Cq|)HyrZ0gXLNU5GITqb$kZ@y{01v14<5jR%Lt!0xtR}_mEI5 zu1Kke$nSBJT0xh9ut?ws3&wNG#|&9Nc{o2)(I=iJrKA6i$Y{`3_>Hk_t5%3|0^b{m zhA2DRDl^-{aW5Hm zZ|6YfXnl0-RK*hYqAJUMeKr^YjYzNk`Q<=I_eVae-MQKP?$HxzX~mM=bxA%beCM;$ zca$X+_V~(r^W|5pxeJPWNi}M_W+HRF%GcUjE{|p_+}&FGaMp7pN9+U&B!WOFS!DZ# z`QJuE2;=9x(&{NJYLGyAscGylqx>`?{Dc?)58sT3ilID^@v@YxxS|;q_S3tSqouF2 zUDC}G($dl$nZx=;=jV0=z=( z`JeAsuXn4SXSEi8`u$rX@p83q;wQt)P@GSn=yZu%RAu^0))l^o?s%G&x$CDsE^2a> z{(cFdSeu2g_BdB`+&@Zc4l7a`;+zBXK>&z*Bsg5vuBL7C%XLs(52XkS3JONm3Y6Yw zp8`DpOuj02$kDp;Q{@-w6pz1~!QCqB6slqfdCFZgN?nwxp`>4kKDAX$Qt8fliuICPecB{1dSd?w9RGfa*k0t186L!#xuMf| zaN5fplP)dPSsIkdX|>$<#O%-fj=_<7w+@h`bd$F!DPYm&u%WliP*fZ*i}?KGy1PK8 zSw+K15|;(}3phAa&P$*N$Q(-`ORyK*s;g-gVX<5Sx@q^o(Bu zu77tRXv|Z*sMt6xn8*WN(u;H~KYhYMPv*4#`*?R;wEuo?Vgj4JRuqpb%l2;D>Oa|# zB;YNHvfw4!J*}hua=JkFHCqn3p63@tM3^!Xdoe1SmuCrWd$>hAz&237!|xVtZ^v$I zTjle8?V;L{pu4dZgIxZ50wlCMl?`>|oYZjnWW1r`5>~Q#Zn8s#*)5f!3${@eC2?7I?J;1TNh?;r|WmeEid-kyx!TlOH&^o?lg1QRm9RK~j4L|NK)#W4jnT5VUbUuwe=S=w{+RCG-NS#8^Kq!O2Be5xK6p`Sx{RdI z{CM{r@h4rmw8eo*MV0X|vJFmJ3FFtJ#hcyH4|yMwh4k9I1`%K?V=1srLcJm)Mz-sR z)C*!uiUyf0z!b~>)Ew!0B2bYBYSkh@>&ZisgUo|#{hq-+twx>0P&H2C)teIo2ajjp! zhvG9W1C5{q&ci*&lE^;LCxcV5@v>$<2@LgqpCo^CzKB;BIGch4eacP8DD94Ny6F;M z=`=v#Sy)E>0SFn&?YEWu3*=FfIWk`p#_~S;!NL2@6dm5*Q>gLa|&v*WL&krkY5Qqs3wk&+eY#XOFs!u=N*`VhaQzU%Y+BHr5pf#q% zi!G(>et+5%8qat%(fFZEr@2x#%Vwnoz&5Wkf5|Bg2VQV-3XY4IYB75hRnX2Knl)Q!4^%c;4|m4Ypw5uO?t6vk;?Dz%Wg>%K6V5-PZ<84N+hcP zdJ&7U3cdc*Q8E+BczT;iB^&wIeAVL`86i7Z8C{kSjgC`;onfp5ng#4K0 zRJeg50y@E4$cdhxUzEn-P}wh_&Mq4=tw2Q@Oy$Sdnbs;hy^Z&5-mVZ%3UE45>#L(R>A<`vChcrlcH%Kbo9nwgu2nf>M-7O&^ z-7Os=ozEVx>-XHxhxb2t?=Sp@VVHBDd+&3twbwe1Vv`4~$Ab%*Ev`i;JryJMV5 zZ*RKkhaVLU&aR1xnDxn+B_p&kO2q>X&U9rpnKU(;EJ&8M>+Nd>d%H8-rw?>JXE@%j zc7pI$&*BAZzM5+-Oa60R9B$f^^<6ZU-|ax4u6{{LNy$j0A_uZ+4Ndx*V87NKHXoU- z`?!6R!mK^Nu;2ltdBsDJx_g=I6MX>2Mw|V#i~wkjx_Mom(n`muAolT^oTmJ!E*yam z%8`l&c6JTJ5wrZoH-&1>oD}Uk387|S$^ss)x93=~;kP)QD#6i9*I&IdiAyUDAk#W@ zs>}qkJm>4*?5q9Snot%UEyp0;DeHlyYJx94m5}VAB1VA8$1_szqtX3ZUmPGdI zL2i!IynhnC^o^}uSy|~_o4kWkyVvF{=Sw}3{cjqUBEF~gr;EpSQ>C+C-4pz7T+m-_ z*$?gv)$L2Otai@zk9$E?YNd!&@Pauw3w^j621HM(y#lZ zICH8rSw|)RrjtfdTZc--KgZKlO10qWw0xuUT_H4vgpUEpwE|Yadt4sh-nVx*SY*7V zh7s<$w67svBDy-CiERi_Hfe&|S}u4|oix6hz?U9WP#GBCV%bGcjp}yz3%hF_Uir(U zak1@r51vdTosof5fv4rec}_)7chBO|(z9MAP;Mu^@TA~#{NnA6K;rahFK~mxC+UgF6ZRu}F0gJe)uJ%lP^+k24FDO|d!nYH) z$CL68IK*)ld zOEu4H?~WVUwd`Xkmka7Us0iaN6^#`m(&~=JIwV&1{7?I9_OS>|Sr=9mUWbi-!c2{v z{wSL;%g2~24s~Q%Vb$z27JhpI*L_{hnN{=q$&SI+3ZTnUJOWzU#fj&D&a5g8r*ikPo;Tbg`N zx5f&FikaQ1dKoR|@Ls3v9g3NLg+ZP3_32h@`itq8eXk~P#6aH!HYg@$z5M|$`8bzh zdhs^PQB90)@ZK652Vv|9_xaxRuzd&nx#HO{K*mWx4$Jz_jES?bE2@NRAI_{i2&s%n#JhpTUGsm^It;mu`=c(i|R5aexbcE7)w5p{ZVskISxbaXLx zZ2q_83x+k#Sdz7pId*Z30**{Xu=^Y3GertXPxRi}x9^x1YhG=PY1`}l**A{+!6wRb z?eDts0|$c)Up4@V8zBIgwUSiU^w#m4s`KH)CXVOh0YoYMC*U*NuJ6SmKOM#%E{>%6wb_1V`JMe9>S2PD#c&Xb!+4SG7vb|zA>PF=`0gh zm^%4435I;fv`YiWC>ZknYb%|~H))LQm$6QS^-w|gyIFwgulW-2F|Q74&7O;MVfKSd z81ExaQ2KMeKh1|Ai^)(;LYXWRc1k=RoaOR+ksol(N)N2rt|=o71?vZ>)#9FEpdwL; zzQd)}o4U8{j0{fCwch)hK>2;y>8~`2KH~BjyUwZM%nu5^wm%FbjL8q^1))5ZB-fqC z&Wb*IJcpbQ)U!nle5c>y10DKhj8QX(U0{VxmrWLxegp>P%$RxPDOB5)>^1=;(2E^_ z!;RW~bq%l8l+>ZOC4|q_ByUS23I>=hKSSVA8XOm*UPpm8*G$@){7$4)rlkj)-;+qa zDL*F%Dt5P|Cub*f%iN;=?xMimm|SUcLq>asO~X;>sB#o5`gRqLt7}!qy}=rKg4NmE zw@60aMPBLiA~jp9SOdk5n%*XpGKo^y8`jF+l+EUkSznc!UFRx%R_v|f7=05zVnJSx z4?oC0)%f3>eIn9&?eUD_#%1Pv_I-0Qki^B{dg3&La*yFZ4e_3wu#M^wIsQ7$H~Y zEoO?tHLvkW9_Ir*{C2~Ur{yOLx)DWbnfW^XI`5i8K?ly%uvlbza|#SEP_H9w0hm3U zAel?5`^p1k{Ao}fkDYCw)q>tggt?Zk%f6QKWtc)P`HLui*|Cc%NhM(+;i!Yu&0MWp8q-6O~YbDhl&7 zq-A+6n)lthq-qiw{F9kRA>Um7E-{d;`v%l~{?|)(|m?I={lB6oqfLhCxS?-fWeuDSm}>Cr57n5Io7z zOHT{0@HE7Q->kZ)eODiOrXKoUsyVofD5oNTK7Ubd=h)~^p4&|q@`K|kp;PVPDH8*% zJ5%i`q99rdZ4jt4qD(o?*Y4V#E2~4ttox#g0$yijgnXctHY*n`zuL|UnADE6NR{!v z873hGmGkBI2o|`@hN7IUeHHo@4chU;#86gm6Z$NoAQKZ)wVcH=4^?a0-jq;-v8ea% z-l*pOgMk0}GLgFZL0K|!U@%(l1QNotR`>mj%qPgqnq^vZ-%x$&EcNtm{uYEC9Im>W z!l8=bA(bj+b_62oUGWzgG;wuvi$@mzX(aT@?C`$N5E-XMa(y$w$YO3H-`Fd#yZOm~ z+-2E|c4T6}dNbLp<&s<|A$jvav&-lWnok11A8Q8dQ>MPSO_iVURmeV<`>dHc)PPf5 zOE=5N^PLN2Rb0JL2S7Hb?0_b!clN!Mdzqj`bI6YKZQKUBTK*o+t)VO9y^^qB_rZ@`Id}_ngWV?LyPx{PVwe%x5 zwtKVBpLN94*HKBHnq&=TG_e%Fb=od)XicfFM(v()@p?A8Y-74x5BarX#D2*OX!P#e zyZ*av{!1hEQaEzbvTu*TZ{yZXDyt^iWjS4A>KiWQcTQt+f5nyf7C}a1B(<>-+*You zqZW2leCBuSi(yrG&jMPkUQJXAur7P5XLmvgxvB&_>2{^}LJoZ{Hx4sS=O(B+ZtG{G zBR;-ov}(K@R4rX{)^5`YH4ir(jOanqnAP^ZjxAZ^nEr8QbYe z6<=>M{%Ii0rEJ5?eJ7;xFTlmagNZ}GKY2gx)G+OI*mB(7@@}Hv-|u!bQ;_VrGNq{Q zQj0(+=m<+qTy#@MeAZ7>FICTcH7`iYh+}gWu=L>Z`-}wrTaocoWD>IWXZCWka&nyC zhJJ7KytDN1VEG$%`|Fz@vog`M2%>mw4ntwFFr@ytT_v^0{Vbdm;f#Zd=NNjyWf1pTQ@yBm~#QjWaZ`M?>is5kg2{Ff}Y+HQsI_b48Kh5 zBo4S1+c)g0Py=*sUak21g_X2FuKAS0?*jqr98q(C;Y@mF_%c9Tum`NdtLtT-<2PA3 z{jUE76IUJh)hZ%$)O=w#Ze;1tDgY?o zbN9DQk0NM#v6seVhSzqz&o(9$;Bk!@5e<^A>+hZclIB)BgS0tj_q zsB0A4eCUp5!0;-rEH{{%;53dDm;L09B z3GbLTwp-J2ZoBjXt@@LJvrUsnF5lMfY?CfaQQt7Xs)RSe7rmT4kU@pLs9!p5{ATJ)v887 zq5AlW(LTfDY)AF{-5ZNK_ffsxvMx!_j9=g3$$?I*bjy>((?F79&EH`*?T+)5iw0+InpW0-p#R7EI9)y0>h;(EdW*=uVX9P@+buv*F(mKWb3^3KI^BUI3)6GJg9F%ioXv+P zw;L5P3ZDQr4Ju=rqlP|0NCUrSvnC}CFl9k$CIXz~tTFOo(xK~#=le6t8O5JvS$x&X z3tI1gEId*?U&mYj^Ncbo*dDo1yHtlJ)MLy!HGfe42gHDMAtp>sS0XSiq%!EcHM{TK zE`9@3UX|Ezxe2_~Jgw*JlcEIVYTW*oQk+`sIg$gyuHA0>J*2y|_+MHtWc#sDbf`fZ zn{c+UG_e=OB(jwTXLiaiXy&5D{ji(+Q$z%^``FKGNDC@B35Lx6 zU>EId9l4WSubRP7V{Y4n22*AZ9K?!fdC5Qz!)W@Gg7yu1hNnVyq2AE>$|Q0a1!~_R zf3Bxzg_MnK|Cd)O6_#vM*%@>R9iWu5#U7BV1PRBf)H3-Ko}Rt{j)LzwQkQ;qz8@vP zBMTYj1jXWL)fYWc7EIp&AKv3z_T&(h=HY9P>s(@+&e?gnouK-uIRW$zF;Nx7F2OsS$xAiDGdf{Sg)TQyeKacyrS~$A(JX11WS|yWc z2IVIPNXc5?^h!tWRoqU9?VLXhPs|$D=>2fK7@JR9j#Fqg*{VBjD(T z=l*!5cn`oP&~C1R%6S}>VCJpRR#FITlxg{02pvt_`Lw-#Xw2!#0_`sAz2CRTGJybR zLrs14C^Wlkjm71S{QPr7u-9{cqSgihf>dByV>pAR!oUl3TaHt4P}%<`*5Q6VA3^2M zC>f5;bpW~(n@Ne40~DY%j?OB`>$Q|E)Jw#F&B%&x3M8pPR2cUj+LG}`jHD%k9qTUjnRDXWq;C^(# z&tf#+;q(Y2jl^P@B)ZeZJXg<8ey;v|^1FIGrAYU3*KDrOc6rAA&1Fd1RIBb^kO1G- zmf{)G>_gXJM7TbKoB*7A+)gf-D$zDekoO9wGnr_g{!6CE8!Ij#oyD6Rhx z|KU(9(gA#-4Ww}A+7pTHWZ+uHXm-Lp+>QZYrb*j5q8Y&201gjj+itq_+PFLIabR`^ zbd+EMTGwAy;Dw@{77L@0-@5>01OQDZUHs@EO-nyJd04uQCS+41_)z}R?}i7xr@iOe zE>UjZE@uR1*>Cta>-%ssz`!mOqwbmM)=Axq2S}};1Kicgt^-_iZy1n%kP>XEF-rEb zNuBOm+jFKuoO=U5mtfH3OdLg~udcPEUkZq9V@UT@dXQi6b{=$roVD#SlqG+JYz<&z zg?-K?(PMIRbCaGaJ1*3-wwTKP?N^FS>pnTcb)x<(1EGThM`$y?f0&tx|54+}XYF@( z8R+ko*aF@3a*Y&dI21#WFeXz-PXTy_Rj+n7z$Cy#Y!&1rHD^(9CQq+Jh$;7{%X_3Z zXMaN9>< zXJH#RscivzB}seyiCx}_STTaT<4PnPH-&(!Nv(7dFU}#*Spa;K3mB9LmxD71`I6FP z>Y-_3ThSn{&+7Q#Af|b4cD8WlEiggQc=%W)8Q@P%k4u5h!r|{dLMkfb`MTPhx@6Ws zIKn1Vs*BA{3$#MC5*U%92%%AWSWQ3m99$v+H3(?0NJ~nOWQiIxA$>}xc>S3t=sVCv z$XOf95&|+hqL?@+ub`1MnZXGULTw<8J1c_bY;Rwh-jeQlvCy~Djf{u&{3AQy6FS5# zlm>)lUQfrR!eow+fYDDKk1K_#TDhVmvm1+e+q(h~S)N@^E$b^DG~!0ucgT>agO!@c zF}DCDWMtZQ#N=YXLL;&rwJ8MCl{8S+q<@Z2<@Q*p`Bw|jhz&q`8uL&<@gs-0uw!;Y z_~U0<_^UBcP5FXj8|NUCE(e%bfQ1J_XLYcN?`Q&7H_XlZc^#);21M~as?nxP{>b1a zI{*0RI%eL@)?CRXo!0?S{t#lpvz@&E9MoG-TCmwGk^xRZILPbR-GtFKVk&sb? zy3kRHSgTy`I!QcB2OmvKI=d2}PUXXTV8 z0yEX&H<+_eUQd4A9bntwKg>H8%rzyyk)z&50Ptgi0UhVeXnupp)u`Vl3=Xvj5JHk= z;n5-QfwkP=-7&OW(FWCAeF?!&4iu}#&mGW&RTx)qK0DCccs{@mhmQ_oXs6+D^%LLI zgLP0*Y_P>VF9GU+mQETliFlNBf2}ose3z<0F)g(Y3!5Jv9#U)LryqLc{JtfgScHpW zev{2cZdc*gBiBooQ2$th0V+dvTQ(l`(3oh-w6e3r*LS0QD+Z08G(s%oRNqPEdU&AF zwWSDjN-np4jaYR;m-ixXNZh)F#)i|_Z?CR&q}^B=38BYvACq*vtvTI#(+1_UYqgmE zIQ3`sO^(DcCze-sb9^Jw4$R-?gJ;geolv<3f{z?nkdOE8v-z)L&HH0Hv7+DPNxVD% zH3fRC(LCjUf+gP}$m8_OcOyR*pr_mV$cjIn{uhLsAQp%6^Ys{~fYCpGJQ6HK2;q>U z98A1uX20OvEZPT5;p;}W*8 zOjR}mzzAk=+r@=V4oQ@B0c6BK+MEtPEX11nQi=|qHUtzf3u8c9LVUp{QI?ulfcILm z+QZFBoExBXfw;-KE~*+Wpoaj~=sDSB+Go?+f8ybiz->$7|EC);-9OL;IZ6-Ro&_@2i0I4x_UdwgOfo!zP(Oe*Ba;{HTfMFc>AGwSgoPwS}v0!lF z&OsRQX;3USJ)`dFix&hweVD`?V#4Nt)B`GI*yiSiSvf3-z}2vYX??{2M&kcc*nVvUpm%@( z8>xvNM4R0(Ha2dl)<5Y@wCNEY^)pzDWyRUVgv7+B1@blJ;A2+O;2HY#f_M~|N}y!* zA4;TnC=aL|ASDG_pa6JAV!oo?y}R9aJ=q@EWCBv)EV3-MHp0d6+~ay5n#J1o_V((| z30DPA-&~C~jlQm~X0h6HZNw9^zt2`THfs1CxwULR1!(F#fq777q6&Z#?gxF4z#d`Z z9mj;NfoBF#vNGE4qQxD=3<3{b-Tq`bl>t>iI&a%&V)xU{@5Py#f3|^MitLzDoT6PM z0(rDRxOC{Rt@_@2%Z$_5&L{%rr~l!s!uBcbD+i7$P@CTO)a5|;3ka1Iqz#jC5TBS~@T^&zD%QHWtAJ2HyrFfuEL0K5_f zkARvL<1+{^DyTyO$S^P~OVyUqIxZtLEC!7nnD68@)Dl?#Tix~Y^aPDDnUx^YFqWaT z%n&+^Z>a>ZCa~9*GGC2At3NhDTo0juQ-e}pXJ@WW5Q9aNqE^$dZfy|!r&u|MYtkpz zxdU739a*p;5(38o&YKH13_(bp?G)~w@6Jz}s#AclP0+Nfz4{;w>|&Z95jETf9VG(ssmh&t_QzqEfYwek?am9R*82Iu2e*wJkCX*Gh@mSW*P=q7aT zgZa95qZEP#YcMs5G_#VQ3^laa*x1ywt>9 zXI=Jdj_6F-5U%>c={L$<{JC(nI(jg5O{+-i0-->6AU&MtPS8V+z?EA;Ikxc%i%7STTt1N$M#U>_xpTqeE*5(e?zv{GvgieO24_?@)#6YTdGk~&n0C}?;~$%54VZ$RhJLs^ z@Gh-@_r6bnCQoPbGAN=5E1r&mgvV25f!Co#t9-s%Q^BAG(1`%qF5J31nTrPOT&_1Z zTHNd~p0m*}D_yXg_-VmPS}%I0ETZJ*<;7BP*WddbM&my}oz{0^HL4|^=)p!*I5N-) zFa)z>NJOv)vl3vQw68p2l<7<`RhL3tBN8leaKWh31Je2ibviUjun9~uvCKb(QIAg@ z1Ew>9pd>H{Bi}M?=)u1KzcPJnAbLDz`)~C46d3KN|835U3p28~r6U^4u$K^lyEFxWXlPxf&+}DBX$aza5wLo~X^A2UP#e@A{ zjGAz*9rl76z6jf9CY$0R*9_wj*fMH(LlAiG=15Td^E&ZjORLRqhIlr%2EKp`;tMt$ zFIJkq0(?cG5+7XTe?w0KZb}~TeZfgCa0}b>?QvJ?sQ!-(5o>-;v)8iJ9VkFfPMCq0 zfuEnh1LHrgE#Wzs(@!~TfISba-v4vfSz&DA!~<_)cj`<7=o@IZ`Ck=z7GN2Z?I(V6 zfIp(q1a=;QSRo`Ngf&;B6bVH(FfhG^>;Xvky@uUFkRp&XVOHl60;fqah&0Wr-m;th z$ZY>8DH%PV74U~}U=@Ej!cGZ(5MEfc*PfDJk3Ia1axp-q*tH1Fnw?z4bGE6`B`6pX zL>q0l$4gw6!v;;BTdNNtN>X3QQTgc{6zre zGy$Q^r5d03j55OyIZ~KMf}S^%Z!V*biagXj4j1!On4$>S-Oo@g8K%ELjnF z1_+WkEcayf4>1qWEcVAwXa~+T0%R~y|P8j79u&a{Ow8m3t)@{@b zjMY0WJZ%0+N@QbSP~}m!=4)}e#hh2&v63Q>eu?icCTP^*ZfMOH9UUzgu;vR|AP3tb zW>buhDrE?1W}AfpKFzWGNOO7$tNL{R4=l|TK?B8A<&a907$mHKQTHCr* z{YFu9U{#>}MM)r$uKwE(NB7IPJ@@RI^fjx~3w@^zeez$_phe1VtD()uXRh39?lkV; z#|KM3JVzo9u^p@KLx}*smbVWON22Z}#a6>F0w1LK)H@6<8q(L?-wD!3JLdm&HoUO8C{hOA#ErsHRu(y18PDIwqpj~F@LYOD)ySp32l=&te zOLG4@x`X`leTTev^QZob#2=2STd<$`kJ@r==_noYq@thqmK2LL)@6Q9O%ZhbxEjYl zvbqUKadodLNvqg<)ciF!T79R9+w$pc3${N`pJV<*Np217Kfx#eePyO&{P5E1^w{0Ea+>Lb2>X zQh4n4yLAOdZ5Dm_rdB&I4?Yhl(5sD9dXdbIBum=00@c)(_p~35uiC_2pqN4WEID5-_h`LsgA}W5}H^NShjEF!`%M4x1 z!gv(pcN}>+`1)zOnDj&^MhFK`VPX`l-G?K=s>DfjqIdLjE?Fp(L*nx1Q zD#7R$DiCJqO2DmfvI&$#NSpuyeZ_CNjz36^rTZygQeRB0MJ$pxIOZaF3=WBJ^h zMd08s5g6YE1jb0?H|rK$3W`Sxq9iw#tS&>u8KD{QTt}j^Cf_Q%zD67=z`@ZE%1MfA zP@q7LlvLK05J@~(U74Vb@#2v_q!x=SZ_`2o%c1l~{s|%bNUy>JeP|IDpV2hfE~9b@ z@KT1+ob8RH4S`facnlwfXAdO%zU45xmk5Dt`AW#^3PEDyeZNY7Ol%O~^fC;RF4><) zO{t_Z>z60`&_l1hpXSYuMhOsxoq|-Q1pk{mxe=rG`k(hQKs*kF;Pq^Jzh9L zK^-6%moH#&$Lfkdkot~k^fM=#bA}$^NFGx6KH|kEcUR#08&Gc;K3Q%T-yXaF|mopzx!p^q+~Vvl^&KDZmzD=v4|NU*;f3 z`6>5Ple*A@`{lxphKm2>Ic=__bx2GcvM@q+I*qV}tLmCIN(SNc@t|zU=LzFgs%!{r z+HdAOhN*jav(vlua`F9BY_Df<{ZmfPgK*s><$cSjza3rWh+~6y5JO|wX?4UqXYihf zIfhr1ft@QAuI70K?=>h}bbk_ZflKNCGg$WP-!WV|=HUpc?DSpVE1c#}4-r(0KBjW1 zSfY(d`@UD=h}Yu`AVCtA@(b0;0pL3P?5-rr4T_v~bcm5Oe=dAQhoV>3SF;(H3t7Zu ztrAeDE>D;1lUW0~E<7BZpln=5z~LcV6gy{814J~&Kc4{uY7_+nt#8guDthe{)Qu!| zx3_0q*YYNf?B{fmQKK72BwQhPkyooP3oE zn-B-7>-%?K(9r};X7BTTab3WYXXMOBumrV2 zQXYFa`sI0v$Tix*hZpy>q%zcFPo<)$1LBz|6a0LA6FQfdm$Sb<3AHtvN~r#9{elio zS$`qM{p_#Q5Fm+0|E9?c2ozQ9RwH5LRE$92U@ug{mS@+mXGtdJkkkT5_!5xAvF02M zDdFISHffdVNeG&~g_8%M*J4noGGQQQd*|xfyQNvIB`$xww=Apj6%I)fEr&?+vK*6s z9%pEsKDxtxfYjf{z?It|%n_+1f$OUloSLl%_h!6fcrr0l*Otuy&^a=fWENs+1=SW$ z!&n63c~lInK3!LC{M@-wRDy8aCG@bIbQ)ip4KtZg|x`j$W%N1UC92Y(4Y*iKiQk&}(}V}v6T zyt&F1E_l>R=IiYEpbl`Bf7XlLHQczjh8@FecC1v2n?I%0#W!G8dryrPYxihoW@ZL^ zRk+p2r)2m3%n=(C{YjPfEnU25&q`~$ctI*-erI;a3Z_)-#+^sdj<{pR2rh`Y<{+Sk zx`ZxKWk%8vEe~#Q$EuK`nU5552&Hc(DOamA4~kg;rBZMs4E5iPF+^t#ip=P9HI_z8 zxsI$zmP!qpoH?+1$|~vT=yH|X1Y{!{j#a>U7l`)-`9-W^$A_X!oEew#M{2WaXYfYazselK!`r82gy2Jx4 zsHqF1u{(EOF}R~X(9ouOe0DE$G1oI2(h2=`vwzCaFfGUu^jbrippqc1oiG z?c6cEM2M-QIUR^wA6;}bw7Bo;H7FT^4HAw3X629?`k$$M%ZKJaH~O^~o2R&X+Pg^$00x6)0UeWeYn*OJ~?&&^LTND+!(cI zwCYKXAoWBCeoI{H3F&49_LedvlY`6|OGj+cy~f^3#YMrVgYCrvwdi?e`neRh&)>zjz&q-qqZVbpOy9Lmi|8;Wm1}CU zAMRxT+qxx7Z7`gzc`71@d0BChl-xYn2-RqpQcbJ- zRh77l?0t0gKP0Du549xDuqos%E8MDfooHsV=9dSD<^mY#m@Q_=W*FWkL}X-^Z{tc`U=~@%ofjU$@{> z={b1OQrCVbd%4Mzl=@)oZ&8web4q)wy`i&`MzyQv2i|evAI&X+Z@(L41Yg7YB;fjk zGWNAL_SJAaxvUcRoMs@qXr1WjdW5XPJx|H~JJSp+-u64_ag%c1P5WJK;pK3=YsCc* z+SQg!bAb}~8{L((ELC$EIfX}dQRirNgn&B)y}`xdibPZd?6*aMcc1IOR+{+78Mu2- z<{9Sd`$L?wi*j=%+erot$Tk`hQ9V!w-ITmqYIAE86qQ`H$96`qA*V4~zZXX|oyz62 zzW6_orecf1!8bq?(0f>E#YM5jIyK;bwWJGF_Yz2e7oeQ}iZ(FtHF&uYnnMDCQvZ~N zZ%UQ&zLcJ?B7vMk6PDX3iHmW>I;9{689e z%`|PG*bZW&8^@Ws7#8@uJ#6xX+a-x`VUrh{5%M%JkQTn&hJQVW6HMN!4w#U*@3U}0 zpfSYpasu`~ju`bB7{7!l(iyna2qAS_%iM;7D-8~Hn~&@)KO+SC5_ff4T*iP$8a5QP zgykhp5;E$a31A>(xMso-XsA(Oq}I+rASHER;2b2^qr%D8uGrSd_fOx=^xnC)v7g&s zqeff5V-thfe1P&T(D*yMAR!^)!&s}c5{SGUvK$z~q!L*KRrO&(L(njI0^BGR3!Xu5 zjq`_OB$zI|gtH@hdiq3M+TZgZf1Y>8$~cnDohk?1sQ2wgAqh6DpA%rz7u9qXIVnp? zNoBlzx4z_dEFYpD;AZFQdPbmaplWT-BusZQZ&zXYW7OdMck$WxUCKRPLn*kRN?em1 z0R}34!fW4Y2>qeA3a^&Pu!xJK z9A4R5NIk);5UW3Qs`!^6>y zl_RfxE)G82_hQ7Dr*6?6EcIpggbx`XUu0#1rGE)En6=h^3B{oWi~H6TGIQDFufCRl z>+iD?cGy2s=G3@`#LUrjw2il>ZR~&k^A*5A&btc`mtw~fEiFsubu+fAPT2pBu4^R_rA5%?$cA}2KNMi*ybnl!ZTrm4^1_A9!=33v>SWwvC82SXn zm$BK>$19~AJg+)B5+qZ3-3tnO%>%BFOUM2mYc$1nmV0}1VaG|rp$6dLprL{Yzy^o< zcHbmKk_SAZ2yC*{&3>+C&QF_`_FuLB=_j+CTK4k^xO!GYM8AuVzKtR}?JxWK|KLa$}R+jiVv6f~>4fVp|nn8ZdTC^iRb3u*Jaq zHDDrjnt~Jo^-l!w&|$^Qb_4{JbODh{$M-F3pR6;V3mQ#_#@Ff+2|G6L=65cclr?}y z{;2FvP6ITw08YEM+hsA56dvKTGPvuYvMmzclAzW@c`hE2&-Xh|TqvCSV=8gr*4 zv+qFWAdc8qFK|uDS>0vz#X1G;H#I(&j$ZGy&ho~A%v=zkZmo@KL>xtFL`!KZ=ph|x zOHub`gCPq;^k#>)q(30XZS&;AkH;ymt)Uc=^vWF+*cHq^?$AL@*}xS>l;OI;v-B01 zq&9K0efZ)sDgCsLW&;LQu5;AZ@3j2!BR5&P5hDC_Gj*k@`!f}3!H@Sr&MAs}^g-E= zB4uav$(Tn?ez0BnViS#Y88a1e2#%3m$W_Z&tdf!--^jf{+j8TA|V&`Eh@ zpDs7K6ucN`Vq}z+m;Z%v)PC#Ov}rjd>yF`dreJKB;?(?y!Rtd>fbfeWB48swxo~g2 zhHI^vy$_MX!h!0;>t|V;mb_X8Pg`f$zgg<(?d^4tT8Q!>d=@KjTUj;L*^FD~ck8ZO z`cUN!n2(^RW3N<$VcQp@^m{YmLLLeO{WP z8giQvm{SY{1{bH*M>B3%GVf2~bbqxt&9!bxUPqo-QG0_Rs16Q~RT~Q{;}snI6NJ*U zBhHzAVr+%MSA}=w&Oe^GfPQRTY;KDuI6cO?7eg5n$LL-aJ7Ry)WW6splQ)4*-EUDI z8A))5_NWPNKN;{xK{OO3jSBu8Y6^15RxG}@rnDYiR*yLEqoVY(Iv(~bI#LbdhErMh zJuL?wOV=LP=*7G*Pe`3|q&i`NNGreN8F;E&X~DY;Kl!eytzALb| z(Vd5dg^8TAG`|K*=tzchV;IhkZXLtbyml0~^GY>u2b$Ly*_p-<$KpXMCu|92ZoxqO0NX zpg)cJ3w#4+>am%;7{%-1zX9+5dCp9{9nb?Ecz2MqI5imC<@h%$ys{G#yQk;%ouDjF z59jru{~##p(SFCr&CUH(?O}|n0qu*BSM^(kH83kn%c?gKfgT9?iz1f5onNi}S>ELm zhJ`D5t0ZNWWX_O_!pW<2_10eQ^&Y4O0p?&yCZH)8r#x>o__}TpX0axE^IbLV zGUg(;@E~8~2RRf3*nv{&|Ib?>l#T;W_WyqLe`nx-XW;)oGf?aNGc_NC)CLfRO#N!m+Owa+ z0dPHBs4^EC9p+HsHxQ57wwFP3CYHA!0Z{??WLrQ`2Yn4&-1Z5{$kG!Ne;pAWLY(^a z5EOB*F7};{oA=A9{I9xDZKq3h8x))A-v-(}R`Ig4E7!?^xUuI+fJ(F*Vf)Q#36P&G z(gb4WU+(t5+kSvRj6n3Wb&iw!XoYGIn2d%G&Ha6VG9S_JFsKFEKXrmY!026b4$g<= z$J^U1|C`-Z)kTzFP$AwwnPMecWy?nQ^?R2o+D3D$k_r#+cC!p*;>h2;&4Gn`eo3XJ zEN+shP-5y%9S?E9OC{8r!!*={b@EAmc0y{G6aL`7v)x`R=7-A6#Hwyr9DK_xP z7BBpOkgtdpVwLbtBT@0d^VA~%H{Vw##$Md}4Z!^34sDEPf4>%8_48#ZSy8kXHlinC!6#%XT_rQW-Df7mM?@{GQvfySSu?0S};WR^G& zF(184q$!^)v^Z7hrGNfhJcN(U@e14u$%ZUG1Dd3?wEN-o)Fw0%BM2KswmiD?u~Dr* z*TUKwu*R>K+_wGaXyR|V$5F@MznirpAO^i1{aefQ8<1Z9ohd8XXj%uX8R_D8_i@cY08L4r=Y9Fbv@fB*>QpWb#soY^Npp+eB@AorrS&ZdM(U98h2D4MOw zW@N|30}IJhvE;Nx{(SiTj>5OCiaxc6WHG}pU%q6bLe zQSUX#RK+^6fd>sor!)OGJOt&hK=bP6#}=#p%~MMRMS>GAMLPgab(3aN<}SX0wK&q{ z*=x;HA=Um;I(z@%SeGL5O+5u4`=lB8(TaHLdl{I!g*FG$^{Wgd_at5k`!)T^jLT10iEqK>V5@)<>l%E?)bbLwVwA9$KFTHKEaCk~tK zD%ABsvp$6sO;hmP7B33tx7gNzl+-9iABup_jKxH;ej>rrJP`T&hPU_9_DPJ*MUB|Sjctor+bG@0c<@IL4M;i&*$D? z*pq?H5~SOak;2R1aQuJ9Ufo>E@U6Qcn)OTc@2t;>A@g6am?!u6+3fSaOP93nd+~Wj zn4@Fkt=4-!p+OD@cz`K5tw%W!l7eFs{>}jx75E2yt}_Y z{;Qrnuoa1`_~+mr$xkqIh0f2l_{ zaNOSV@U9(WMMP`E`U7Vqje*5{;{99mr>|bM>dD>6G+Gcd!%e%T~3}~ zwsTc?oTnsXGwZJ}FBjV{Q(dq7?(+WrBhkU3i|@JLS(&{2UTxKDj!%4gTdywk=c|qT zwlk$}Iva2_=;CZ%Ih%<1B|QzEpuU-uZ<1l~1K?1@Ggjp^KHb39s0F~dXZ%J_~fHi@=3-1l3AyoCU8JBrmKdD{_RE%D+WOi*Zt1dRzsl?1+$etU z6cG8~>-W5uynoD>voWscHn{ld7ssV~P^%j_isKOmJOXRk%l4nw_y1d}ALIrc%C?QI z{<;5jzpSm=$5oQkr%awS=@09d=j*fNFRxm;_x6Xm_4^h_z2Xf~Wo79ROjwk9CP0V- z)XsMhUH?5dc{ zz*1nV#^ufZ-uC}qEbhM&bu)4PzOCo0KC&|g0|yjl0_P%HLCNsK6eeJ)o_Ff*Mtzn& zsuw=(DS7)zNvE=SV^_kV7S2^~f+w`^4(UxP1>VoQJ8F&Zn{(d-jyfK6Ve~5ycAq)} zT(ZoHdhod4{#yCX)#3W!T*dw3<*lbLb-tdN%CgS>;M2zo)=%Qu1zhJJ!V9V>LG@5j z!%=d}U$!dvZ~#~chfHT)l5ze1 z!LPuKx6*e>!1LtTDrRuStQ{36@AwNi-UQ4~DUGi>-n_~AnYNNo*>a__(p4c=mP1Ys zaywq#TDqDmRJCmW3_C-It};)R$pZ5iYzt!be0=isiz#fZDB)4Ckm-KS*R#Ndy1?1{ zZ3P#dYU8`bbT5f(DhsGFeTvih?{WFkr9awVXIV+JRIKs7Kh20~o)|C}`SZ)q1y=?c zdk%2x@A&{c@bKF9ztiLER@SAj-+prL{M(z${r9#RF(oP~+`RGs*Q5B_rrZyF4wXB! zt<`U*Yya)kpC78Dgj#0L;XGlp>QP4P;@mppzveS%&g?T=Xb3E|dL#@Fsf+ELX|OGP zjsMc~i_WhWT(k9;aJ=2yNS6Z=jCryqz(S1$TvB&1eL6S)YX17TU!N{r%RTW~&LzF< zZ5m_cDzClbr(Zn>?&z0(Q;_odgr~6j%d<~buH80$>(y)x<%A``xVh7j7dJL8 z4*V#!`f+wxUJ(7`azwBS3j3^P6V%rnjwr$&**tTu#Bs+LH@6~oPiVq zm8-2}dsPI5s*N&lTLY|?;IWSUu}e)}eo;glMdUz6oPWvdz~cyXIGN34@~+6}aPK=oGeF5zi#`{~%VYY*UkL7WQCD zdOG%y5=6n>D1*(HXjL)8f<;2gPf>3h3*bghA-0D+6Ec9M^%KBvL$`W{pw=Qxjip!g z93ZXQzkz)@`uLdUSvYrK_zm;-haCl6K5{D^voBh~j z@!=6vZVkVs&epIedP!ti((77=%MXOA?7Thq1wa{l)y#2U^1srfpO^8EOa9vb`4^D) z-C+k-P%yF85C`1Opd+kUH4g7raYnczaceE5$GTt)cGa{d|2ZvHi0Z^({O!)J((PVq z-0L!gafYittBYd=Bd#Y$2)HF8cnUwf=J%8zb-pEq6WbB{ zzCRf}hx`SvA%(&5vD47G=_$d!$$uD&mwR&hj{DB|`C@Wxa}2oQBklKX-TJEU64*#= zW6p=X?_|PaBMha(rW?F&ok2EkF;ka>0?hXPaZ}U$lLq7spcwoxL>q=9uff(4qWz}u zm$mF{)=lW6Z5{)T0#y2t`(y;McP@z)V(){=hySqt#VaZk83H!;Sm75#;zodkoc~5J z*%ECBv66ZAo-!E_+n(R# zXldk6Z3q3qwvf_ni!s3!^`Ou7c&mDlpM3EVVMP5gV!!~s!fM6@T1NnS)NzPAeg^Vy zW4%~4xR&47dT49V&mkYS$=RVN{iL=jTc9ud7v|t~e66o>?+Ks;1x=xF7=*_WEe3wt ziFCw*5u=0(?#3t)ru6RmCq+KT!>C5$faUObvT6&}$%+9mix>*7lp&b|^bSw%^oH^7h${ zwkwV|R(BNNQ2R|a1Xgd#w)i!PEfhrnb)S+TLvBQkzio)^_sc@(J`cj!zxb<2o!dIn zB)^eO5R(zvkSs!F22^$x4G=#uHLH0qne{N;9wKoJe#Jnon#J#HATVHq|?oOi@_yA5N@Jg&s+{&`rk{c3Md7Fh@ z`7@@h4fq;ZG}%oFDk5AW9>Qk}ITq{|w#-jh4$PFyr_5Dm9gD;k8WwfTWaen*NoKgy z9eFE-B8zo11{QXWzb%waaTePu7xHD6a7%IWGmATgJ(?fCUBKhSMMcV%!0Vd$PO@ey za>|`xY{X3#-5uYZteuz~U$hBpYidVXH(8fn>TIBGSo7fUxO5VCnz^I858tn1VDty| z>hCiqF+4I^Gw`0IAJ^Z1dkKHAxUW9(p~v`*XoOEs&QQYAVs1N@%|*ep#>JxwzUAdr;x+1q(&m#I zmYRcQv{jWg-kSWCZ>uu3}5=JufcwCLU?*xc5gMq8+jhSN2{+w&%C^c*TQo5ZZ<@q62X!aCNymxeK#cvNCm4 zbWU`zJFGp<+=t!iZg{VRuAv`O_Z;Sr@~ew&x~{yp_}(&J?O&ZiP7!rDBp8ie^NK#O zK2;woA5CDt`Q3Yx{xtF5@muQ|Z^z@0;d@U<<)Bl0sIXcfoH^A<)i6KFUqd$0d7!mZ zK)^yHP(YhOFF?!u(8hZqA19=ys^F~PS|{=pbyb8QAY?QU8L*0i4jwMub&P(*;<30t z5jQvT+<*B?rU0)nTQbCOoHApm66uC$g+min75#-FBSo13!O+UEcmNku>K8Uwlfn62 zOROE!-sIri9@*eklutHdMh*3jcb;}0cP)nmlbI(#c{X@IWC6yHHD#MH01; zwIFsjZq~WrAp#bs`L`CxHTq_7&qk&ulisQ?FjN%TfebgjfFqRagrAduhr`R|#l-qY zbKF{d@5pk@%!J|H;;qh&|5N#MCb&gnESZe)(MS@IXjU|bVLm*S%INs_3f%U{G5 z2z8+@TJCw>q_q*BI6hh^@)Gjn%rxeQ9_9smTHA~sbS+3LTjdXRn%roND?P0=-IUz=Im?~K zmb`T6w0!CbTk@R!Z-NG~c`f&CI+`d<6(@ejPZXx}S^czXt!MGH&@PlQUSZvB^0S(1 zCbQxLh6Cx8xgwksE}!ZYJL;`e*Q@4~b(EFcBVFjqMavHsGChW_THDx+Y=j~eCl)nC ztxkArJ(kw>UDr{64E3&~&km?%r??_9@=8zT98~!dyOVWP|=(=j`V$_ScCGMN|4T ze{?oY)L$&`#SABB5Ad~qBmnOhm{#hiE?~3fvXVRLJrovpR_z*?D(+6l?C92XC3GH~ zP(O0thJ%)zs}bHY?~e{H6D}XwX4&3sk~^3@oA2m=-Dw^M9=M%T9$UcNf~6XlC9$jM zBVH08_ZhYarW1!}bP!#nE>Sn{@zmK>vTfnIZ$8*}?6g$qp0!M{X~379eh+Npj9XfE z{E*vs&ObdqeWRe=Kd#{NU%3VuvNj-kc$Y2x@Y&8$#hwE2|FpHju_XRw+|WwYQgTHm zynh6CGky63F~y`?@b>0~4FYvr2KR1$%P|YPg)c)S>|nBlR>)~D&m;STC+x=eMmvn> zkrUE`^Sb;2Tc;b-%9e2k{JeXx8>>nDmXQIY{Om)4eF-)LgZS)y`FyZGA22ZR9Ps}! z^8GgF>;LF~_5SB&0ioI@7#Kg8gs^~;>zDIPNKYl@&G)IywA6b^m|SEi5kUoTzgYz( z;TrjK;{|Q)Bq!Y^ZCx*~t;Y6CA8jX8*eWmHM>TONlAN3Y&T;$eAE*2Mha|JtU#p^jJO0I2{Z{4f97-V*U?xpQ| zne1={!{eB9a{ngdxcf%t-uK>WUslj zvE91=s}ACSk@*SLhWs+*gniWox>UqaVBJY^er9G*WW)LHY+zszm30#;5{@>J&Q_gQ zx0_-S#np{-(WTex7tQyKk$iA)WVPBr0eoHGv)RG?caFY~g7__U zhqEM7)m?|DXmuFeuGjk%@p8^L>#Yvi9`CL3ULNDmmmBT6^pAc;@yR;wW}coMoxM>^ zAmCMT#foqc*T35xE;PgvbZ1c!DcC{P);f#ZtOUte(x37Y`AFt`!u+gf5Yy9IoMu{; zZ|`sZxq;M-jEthYb%KAlQPXG$KZ{Ce!hEqv@5dDr$ip8U&q3*OaHdzAow}ZnB6k)B zhKA{yDx3CqG|}9*;`ZjZ{Rn*P#EPulU1lF^Hr+Dr1kk~C)T$~f-&OR6cWQF`Ul5MB z95G$~&zkgqtJf-#@Ahb#s;<{VW%CB5#&4uH+ZjR6G@)_vDROdJs0_E8L3G1ipEQ&D zXA!bh-7W(T-eFHB+n%24?)SF9P>w^L|I!-MVE!?b;l(0VRKhj#pd^blGO6=NoHabx z?JpID-gU69S^uKtpL5I6Mue(rJTxP~aM(!+MJ8kIJ~%R-PS3XLeP>ZCIW~7HC5#@P zo1VVjaM|i?+gASd{+8O^u2prUV|p?xL=zI0{BLgTb37pJPms|>t>N&d%ZHa0xL(hn zFLS@8EnrTEG_B7UE7!gv@cv=4>3Tlod%r=*tc{EiPv6~!|DTrszmw)KfpSD~qT}M? z`h#KtcW`hJ2t&8`U$XW9#}Qntpsvo~^YKRR$l~!}y>JNrkImTlIUR%Sj$P-m8%nn6 z^YO<2=i%YO?U^s#M7G^Ct=H3;*9jZkNd8~7HOCm-q__uC?yhpJ33rNzh-a@bS%>qT zd+L8*$^YW|8;HPj<5}Q>^6gF>$sRpz+P3+2nNp>FE$-dD?!x8J1eJ`GR8$tnQ<;v& zQ{z+OmS52>V6MW#3H`ZDRaFY}A- zhT0W|7yMu8HPHATP$f5W@{gufin?I2jmGtojqCi}NgS_(!g0vF#zH}VC#(!fuFW3e z;;NVWx(}D9K&QniaxfJM!3qAQ^BNPSS)oPY_83)H%+-iHUv4(v(FxijpLUgoupcm+ zCM*=7WrGGEhoD{sn{Y=w#f?`-Jv>XAP!b(VB|qsAFhZ5;Dd;oaoXc_kmxX#Hm^7Lt zI?%0)BU1kILwNHz{zn}m66cK|v@v)eR&oJUrW~VN-~tu3zL|m|2XwMI#KFGwtl%f;Wq5=<-&yT6oWK*f{*N^`dg0|9-=e!?(^i$C?m^4(!2nUy7LX6ygBST z*L1=(2E*@<$+X(5j-&}(5uN>U!9{Mv72yW^l%C^tLco-!NkohRB_`1O6qTv7k9*&b>a?(-rlF)XBS4D9 z&mLT}Mx{KyvWp?1`_dT?o!KUZiM?>ZouX4A3~uF4nA{1Tf1#%+DDhU-NmXm*bA35EQ>1PAyu5vlblL?n!an86;Sk1qR|($x&wXc_ z3NFfk9_ag&OhfuGD*ja*bs3T0{A>F3h08ux(!Nmgly_LD+dCOj?mHj;zbcEClj0~^YU|Axh&lWflY*gn)smrgtTVg@KFro9)lfniYSM z&4}QK&QEHH(^zD&BDY+)W!)#yQ3Tg$@E3Mp5H1U>C)5v7ibYw$=XM_q&%+Kh2^WOk z65-rgprYMJKF=A2r3Xs>EU}i~3Z&Ei;P_39I^HQZA{FNsvCl+)3Fgc_<^xa2tx7JQ zD`5BCJAIh!3qs(QjYp}8m*ADn&l4~_8s>|m=ie8c&E49;5qa;fZ*sm}e~MtG_^jfm zKGrNZAqJzDj;EeoIpL08_n17|IlivGgAZ>fN|Rs2TKjNB_s!+Mac^0Ze=z3yB5jhL zEc~cmtn6Oy!P{Sx;sJ#hG{^h%2LUBSnJH$jMAyEPbO^NcYSCOYYW6oFclFAVCL$-y z;V5!&SgGHY4rj+5WVcgsX3|npQnIqLvavObUEx|~sZxznqEV|M;^4fUwfk%w`exJf zYers#l4W7An9mO;>v;CT3VI@?9p-9pC88fh;yC2e53v_QMDus^glOhmoHZpT{90X> zcl%p%M@mBCa=EEm=U5kx(u_hfpJZI4(c`&PLEg=q<#Gv^INgog#UYeP zsu1wJ)%_mn+&!h;ZMAnRY&bFP;BY3qBuep^oO~SuAhxsJyYKqN+COnCV=#$O%Y+lAn0Y`g@ z87x&+0uj`O7uE}2HTjD$Dao_Ba?PcLV~oPZyydsuB2K8XPMI6`b&+822!*f-$v5cu zrPd|Aba*E(5C*TMZ8%WXYCbieEE*d#mz`#H^3Ho<>dZ+gVc$npw|2`M5U0p#{D`r= zNXGoM=Hwfji`<1ylR;fxccAQwRY+cKWXapHLBmp|nSptoo2a<>V<=s4aJ2*jh@x~s zT|w9E78l2LaDSDFMw6S-{P!V^?#SPl_*kElXLQjkI0Immj}rRBN7|Eof9hh32Vfub z!ChC?p8yk<%ey5;gKyd}`zpx42&Yz&(C_|K?Bu(GVI=3>W{ z959m_eKLy$lpBnDn!0djgx;|0sjj|~ef8mB53n~il35{|N@KBDK_7cBd1W}SD>9cN zTt9&HSq4rr3EHJhPN99zlp27LQQ&^@Mm#|{>n8v3jFm47u-&%A9V((uik&+&y2p&n zrA$WILoQP3m`GapTXwO0FWcW^3jh!V+|^85A-4@Ue3wgeue21tfcLLJTrMx8Sb$wh ztKPKI`mj?rdi&PzzFIh+RFJ*&&*?f2EJ#gH^C(t&>x#zn{(*?RUQ`<^-l&E`(yL}j zXZ83-FSpgHpAzVT2V1CAQb!82@Q0<%uBTze%YoCZDVY{D?8~R!dYO&arXZx3fs7|| z9?eha5akL3`?8Nch10+L$ zhm`ktB(E%oLcU#w;_>G1vvjs9z=K_L;0E%yL9axORRNVi*Tz+(kOU`1x^BBUxNa?a zNgcwv?$BH^Ii`av2d7Ze2cZU8}}8%f!gUv_{#Z7_isCbu^I+|$pNfziSPo4!ec=!_Upl00AY97Ff?eDS}a>E!cV$`!Xu z-iKDGmI{xqE8;3-uRaw(2<+91yvi&4Q3i}hWBZGT-F)S7BE&Xd8ry1S2M!9NswJ150O6Q}Te1l*&L7){`8rqA^W~J<0yV-lB z8*n8VA$v*#+oEZ%xX5g|;j|qH{B>*xcZ|n&vwkTC1N3=Mw3_%Gtl0d@#-<&3f4oLhWt>%b|nxSVXO_lyTV+X>P;At3&tPQ0mWC$XesSYdkmUe^sfxp#Fomy+AyC zKF2OJx2w%)YfTQe5Ri?==6W0eQ+#mU)|mHtEq2wxe$>P#`JL`O;jN2(n7dpB{tgym z2#?eFm?_Wm@Hge}WFx8!p*J|gZkXc4`z)N3n2xaf&eAF>gwqqP2~kyV5CU2k1q!&le~$TS%9eg+Z056iR2NLVgYX$caOLP^Z_ z6SQe3Bwd`*E$yS;!Nvs2fTrjM6`AtGLoyVpl55qI1Q-*Ylrd;2%Y$C9)n z0FT$?#*74yWm`l;#ZSDqd@Ut(>#bUcCk*N0{25%>`wZi;aASfOlT;rcAdW0LhAU#b&TBk=i+iv%2@GM+pQuvx3fS2pMwld z9&g}>%J0#b^&rbH9ShFDEp!w?V33&B$4??*ChlMktJ zmH<)FYJ#nak9cybGK#V2vtI`&p_MS~$84p3QG^#<6-gJ(>)uN}1w-VmXdJ^I3iP!_ z3U>UjxdU^b*aDWBCT%xR*}SLsN5>jup=;iL+RWSB;l@X3r;g=j3&GLu7cAAgwG0VU z@Q6Cm*QIs45+Ro}|kMh_CJ57z0Xl@=$mFf8`; z^}Aw75AoobqhMq74bc7JE10r>!Q;!sreeT|6 zLABg=Q{k#+X{EN_ec`mIdLBo8|7t?_`qpn4&+Roc@CyG`*ON-vPT&Ug;r$W&;j!PS zsk|T@YrFGpn%TqgiovdLV2=*!22&cT;WRT@>x}oOz4DX;B#bmU#h?La zB_eVEg>xw34@HrCJWe$0rKgqx_VKO+?p=F=!Q>>Ad?CL&B5?|ZVk;J5D2vscyXulT zkH*tzu{)QF2Y~m7&<**prwc#SEh)?*D=Y+1(-%co_5kcr7`oL|0aQx`c7@WV@TV-y%->2}MNh-MER z*dHa9e6Lj$y^o9^BRbHKzD1>4DUM_lI-sB#AJ%}x{gO@@nUvAqt?f&pBC5qX8JD45 zel|;PumP~lrf3$8O2die93UY1HifezjJd-+Nl~{{fIi~R;IYdg(Upgx>r`NX&4Q1- zoNJ7hs4@!B>m;&r|0|KwRiqGR5d*lk6AyKw-3vnC4a`1RE}a`6@jrDei@i15hI%fX zrkxQ@{TO-?tUg}hgMMDi9$V{GvS3+QAUEfHI4Mury@sKm5HxVd)0Yvst7~e58S4(n zF%ig)WJsr5tr|jE_97MrZQ z7Lnbf%@#2sJF~}4=4SbA8Eu^~5m$jLRS42V6bn^a6{mC_?i?%KKz=RBSd5Zk`P^pN zv_8nH_b#(DS)L=;-fw+F(qaPnowi7e@mn4KK<{T({U|FNyXE)6ZDPI2R8>iU%k~V- zXZk;mMCt|Z+=L$Evg1xx>toe#sjByJo)Xf$qSGQw78zNV7V{A{5VnRHeTE@+^BiWk)G)(tw5dicPcGa1k3zt$ChvA+KYxSwmO8g=O~HcLtQEQeLSM8 zc&8GeivPOaHzRF$c)qV9nF-D4; z<0+$%L~I`oLGOR(YOX|UUpi&l6V;+W3HS2wnozlelKZV!@tHXzBlr_nXA+d+z990} zMp~r~daO0nB5Rd`EvAY}=u=|Kvj}URQ2{6b&KQSDTtXwI5}eSQ{Q|+aAu1Kr=Zxn0 zo_k`<)5|sE2n4HwmYC4^RhMqA1%CaUQBe3I#BH_8O__pGj*o{6?O)fvQv2R|?#zZl zIPvCD2t3=sl>5S^J|W9kvLtbLi5m`GR2vmQyp+?__v6R2$l&yXJe{+IR02wg>gvXH z#x-H~Q+t5uj@J8gaGyH%TD{z2&pHv|Py6)WBakTuj?ZI_Th$v>Z7#^kI*i7kNw!xS zqR-l0vN-Y1%X`_2 zK2FZ`1J`rr&l)*>?df|Vcr2B2N@SFmyuTq~2LbNG(W*=B!@JarYL6a6W2q)2zn-q= z6z3C(=GJAr|7LhVRCdR{8XEYR6 zgyOv=P$*`7R%JB%%gDv0YTu6z@_!lkT=gewY@L^2TrbRzqkNEb=KMNnIUj$#M*|lP3 zNcIBJb)Zbsd|HkW_J$(+FVqH2PducoSl8+f$TFE7uD5#=Q4@=&dD4psAV6x+E`J-a z0r3VqaM}k|NPu4&C<2LhX}FTbbN!BgF|wh$f&5bFfz89}s*Nv1u13Ws@I&WP0vE15 zXY)GzLU^jd$o3}&gnZDX3N z9A(2KY5SCK{oo{)g%oA*>hnWIx6Gz}_n7cS+G&E$pSL)!iQS{MVRjK4Tspn>_adrr zeYME+V3-p07h6@(>eM&ciKDZ)5my|#m!4je;cZdmKQM#Ltm=Q}8bBBe05Exvu7B6A zTkAHGFOqc@vbU(3sG9A6?=l~L)v2#epQUd3wI`3c8+nrU~8E)!{hjf-59vem^hJv7U z^=6-)8|Id&zkRw9}|7g>udZ}df5b`YSvp|Mn$En{IgnifY=2mBYoWl5d}$gBo}!2v7nl4qfAMl_l@a& zF}SkrFLZPpXQb(XJ5EdqxD^Y+(OH*dE!`^x`m523O*DkIFy2MRQ%- ziRc!OujGW0gtRKITF+;yG%rgIq<9f>XHH?FaQgBRy#bKUoT59MO=fTXA2uLBcM*r4 z-tBm5{WVQT!CO4H`MLf^fL=1y zUX(8= zVPQ8~##L(~j+9g~+Q+-X@gLgX0UR`vV59rA6;}ut0p*@9)?0h3(2~}fCH{;;Cn6h- z=P(meQ7R=nXhvE+q%ADBo>jG{d4qH2Uj|`Y_v{Sg`9ySQwmSzMhvKvpLRP7Fy0dUE z<|@f%urt^eJ#YOHh31lxLOF~Bu$_Be_dAITgpU{@3E7K$#0pB1 zZ))COb|+It0m1>52(qepPNRqovR2^sO(UR4)6W>pn+WTQ(43cx$(I;W z82ss=jBgWv2(ciCz*N!>TBulu>1yzD>`5HW)njR%BR zV!6u>Q#gJ;Ke$>`pCO(;*+-H%DMQ=_FXzk2+nL_SBbE=shzbK)V!% z$c}Tn0LF7d1tlrQsNOtR(EGkobjywbx~|Kln6!D}UD;HtT^~n=72$_aZ_1P&X8`2t^_lFV;Z0=xMu{4e%D8C%UZy007EK0e#3 z*PyKr-fenZc4{iB{>e+8c4bJG&X$8D&90|g*r&KDP(OEu59p%#27U4bk^k!w+ef?i zoBOZFtF7*5iSfNuDto){N$I|K$fVMy-o2k-1G^TJsjNdXmTo*93YiD0drnH#j<*VO zt>~`d0gh&3mx_=?YbXbrY)Qq3wx>^_u?m${_Tv5`w4^EmwkkSXx}VeMFo%NU9ZX~M(FvGrWwEZ z2wr-fxt%uw+^pGkY1Fu~3S= z37eTnLkX(Y8XY!AL+k51-naTP2NHy0)=&{H1}4kXxko>k+DXP1P@DMpy3EHG^D3&a z+hzU=c8^u)#hFIl4#%Gam@EM;0&1jg?6liV62rtRP#}Pd@!SohvR6FvEw+I_?DP-e zy8Q;Axmc1X!?>ea?63>PJNKO=RQz{E`X&7LbNwyfUto85k+zt;PFcEdQvvjmW*aSK z2i3fGdUI9B0XC2avc1*($T`tc5Lf}gahz)KWNLSXZ)=|MyJ0d()@isO*IX=XP0INF z?T%>Fy8cD0b}o0qXVAjJdP(0JD>_cgU?G8Lm=`1AvE_8<$@Z>&UFa(0*sM0`478AR zgTyQ_m1)h*?>j8E#oYo#UhUpghY;c6=t3{fgK;vpe890ZEb*eZ^%I#1X!bhU~a8t&5G1B86+ii(O~m#(DfI#GhcygQ>AG(jyV@?>&Z1hs z1=oqu(kgf@?CtJGeW0178GJ&K^zLB6XBHOA;`1FWzI<7D8By2lIMns|xd002^}2C# z8F0o?pdm5L)iF)$ZGaG%zpfD?f*zw1btQr2-j#v&VjA93dm6EDYj29cU|$(l8a)h(Z02=bDjQmJ~jEn95RJHEoPwBgD^Z$hvC16J6UhU%FW zzZ&vPLqa1^ft0d^nxG`9m-|8;-*Gt<#HrmN7q_hvLDON?h|EWdqy;$h-adhAw-k&U z(Dn9_kc}R|cn%S1vT$YjO2LSA*OqfY;WjZTl{*i~V8}F{|>G9q^b#%4LY4CK-Ia3Hn z|BV2u0B_7HJwU3ur)wj)o1DzI%>_6PrLdDuX*@7WDG<^j1euD69?l8PM6+*X z{IXPHXIO(L{M{HBrWUK}{MY{yEQt=bELSMl`-`AGD8X2_R0lg*u5Fq!LoE?oJ27g4 zxJB;jp4{TGB%EP`)5E>47x%2Zj{Kn(dbg>Z^Wz{g??RKUedwscAB(jm2IB=9TApJj zz)=6!fE)>uUDElnDCv09uCb>vNwaiy@W_HS_&t`&E|1BnNxw2pj1B z%)zrt0N++NINaUM%}vEwW$xRxb8yCLrw4F+Tsot}^j0LB7SwQ}d6_}`B?Wt46}7_W zqPj1MY*%{jXkB0CRFg{Im5=vtT9Ix0M(}OZh!aTc2Vb>9vg156AkF}ZDbUCsrG2E8Z7MPaRNKA^lANO=_RsQHkrJd?6Y|9tBYvcu6+>b zfePCB^7cacy_4|dYo<{3QAr?a1Plw+{nH?ewm{(9d$%=}DN1eaZnw9KP&djq=d@|& z9TH%;*uv}=fOTYqLa|hmbR_PNN+B3%@&LivTv)nLsB%`3eK3*Yv34`iB~fFP9WouO zIBz+idRC<%qQEq>WR#D(BMf)5C2fMkx0sxkoBWqpfbH*WreE)P)AuY{dc^Iy0ta-UOf74jcpq-k0_5Vzi#@vaf-NP^&ON&PY|CM56Zbc_Fa4 z!Cd|H0fu=oM+6pa+VqN1&_*8IVLH_>vmyrKvSY@^Afrw0h3nwu<`2C{xrxhu2M3wa zT&R^?+UQ9dQ;4uIsb2K#f&o{5Q+YB()oBKljXC7|Jku+Iom&Ysi8B_<>1+D!c%&Tr zyq^aJ7e+GKDQt2Ngu%D22Z^c%#U8**U*N}QvONlukF+MbJKO6OQ|0|+qaDaJ#f_Y@ zhh1+axEKI(>yi6~dn|*FS05iA??34CiHA#{F%)IB}iq;?sb`YD>UgyC4BBuTPlKdgZ4s&mIvozj1h@GfRRSIL6 z(;3xdcGMe+xz>nOFEQ!J`=xxOT_1X-rj;y9C#?G4@i1B;qw1P@y~81=aOMsOwhD07nL zVCFAwCsB)tCHd%s04*{=25Z>h+3*lCncu|f*gR_S3&__+GeyP7IPld)H~Hs5Z-een z7*6eDKZ4%01Q(^SxL#AFOwRB)91sFJ!tuIZlBMs#AypW?jEtdsG%s$-x|!4S&*iw+ ziwTV<;vpl@i=$NFfIouy{?~7V({YM(0kU!WllkfvbNswjW#s-sA-N7^yo9*|!u{_` zn$IBN($>472tY9IWiX#t={~x0=Z#2X0kX@cyF!Ffw30a?iZn$zrFGhB43EJc@&ZM6 z0Paf>!-SbqMp%ImX=yn%O3Yw>zSH*$_`G?pP9L{Y6gTgtF3KD1U>vOsxC2B_&o<5d%W$HXR8AvBKpppk6fY20aj5(mSC3N{>J?-FL5ed52#=CFsh62>Z`+=~_fr8!## z4IOCY$zyW;=`%-0boiH43?TStvC&kRgQNE~k!B=c*3$XmtJKH_m$T2yOz}~+&wCwR z(#~cup7*kmLZ@Tp%;H&9Cp?$Sx}BKWi~`?%SY4QU{cnB7i64$Z@~%(K1V`{Am+4-g zWgnN1H(FV}o9ukuUqD^G*4*MOck?OPd;a;0Yv2B9=2|ANm;0G~F~eBL3M|K?OqaG_ zVsN4=UX7#66NsI>Vk7Oo_zX@pBQ{`Uj4*m0e-@pIdtCW-y!|r{dy{@b zKWI3ekowxK7fim32}Eci(IhX2HT_brl_!Tg(zdM?lDr?YN}FT8`B4MuX?W5Ep!UP2 zv3|eEe)u^9XI;2xRdy|$=(ZQ_y7vnZp(zaU(!8Pn%!3W4dCrK6? zF|HcbE~G;9^*qA;+Nd>2w01u!e{rd!dn?g8r&TUjI^*)vHT_&Y#T|ndgupdip^?mQ zI$d(0jSs6NxQG&oS*rH-E6HC){iT$qB=MKMpX2@6#zSAm<=%IvA(Qi*@ml}3h;sxo z)1Nj!i6DVE-m-KU4;ZM)q=PIY2B9b?8~8R|z?)>ry;##i2sfRowdSjEHU?Dw=5vBb z05$0=N3Ws9wh}j~o($lkILWiE7R5~`IfA?U!Bz-=N-mQTCZFQzSZNr`+Dd}I`l+go z88h|u4WW4-hq`QO(EoLl@@6O)z9Y)_txh^@u0mekTIz73U+|lTd>PugHWNTUF`57w zg)V_5oyh#|ZQ0a9C5)f#s&Q$v%$f4>^eZ@p?yNK1+Myc8 zHpM@oIOelVDR3)A*M1*x_2h&1vZZPBv9fr3^LY{ecmv+u-qnOmqpE7@*RSxt9t*9r zeWhoi}>Pf!Ws<;#RNk!YkTE($Q3>h^)`zr-sgG%SrLOB6KBN1qJ(Y3P zcISLpqtRmy(|u!Bij92Q(3QvZEeK`EiTSNTfJ|i0wSl|L0=D)80t>&vwOMoi<|97_ zA#@D?G*roC8AtNetM|LCg68`oGO!3Z<`IxG>P91!&U4V9Hl$x5*<5kGCbB8r1YEADX;Er8u6# z%m|>pp%bb$3mw)}30u&fjav|qtp1a!#}&hpOUm5J2Pqv(VDP<;M}^-H6_pvkyQox# z%h+tWVI@6#Ylo7j?l={a$-@PBN?pk10u!w*O!foMrBG%R3NUv1BP=z)86331cK^~p z_k6Ih`{?m*x0!>ooNE1+i&Q8h%Xb#Avl3%UwEvy+D2wCNP_RcjjmZAx$Aw1iOO5M( z=-^v@e!ihW_nJJ!?Gb_$E?-BtGOb@7W{T}yiiex)A%q^lt3(*}o{5R`SrumXUT-0l zuNqB|9}cvZK9AK@BK-Q^oy}#!pC&-YYE3;DfNrN?u!xE-LaD5lDcxTmY~gD#EcrDN zS%!kFvSHCO%cK^6DyCQHvgIwxX{!=+_2G;7c$btGo0gsJ^PsHS?{_=Vy!pd4Em_-r z<+FnGP^8OjRNZ=*)?<}oa(4t;;Lw^Mn@2tSny%yde30zZ0gPtXXff62pX<0-T;qK? zCNfQ9=Q!Dw&}6m2WOc-_atG97`$5=?s8ea+z5N~ zTbJJKFwy}V&PN?JWe5PEqOD(bZp-7cBC=h1n>aWwh<(8()RWR8v7khFlA*8}FDlAJK? zz2zd$siKCw^qL0SA8@f)da;dWnuT_g~{`}W~&N0Oyc(4>@tvP7!x^Q zxwGB-A}Fr}ios54RhvyZT*3VH49eHDru~~f+0QHpz0MvY67z>uwgo>1L2~OI2{ExS z7WsbOw_=CM3tCUvT06y)G`O-Nw;eYMIUZNVVyI+J4_EomFD1nr{DBppD31#JJW1c; z#%Eavw8Z`SeGw_N&I50YkM}p+Xvp%HjA;zy9W z%I5{0q_b2sMQ>tX`Dl^*LK>vFTnyw>nw-C{%EWsKmME=81juK<+yjQnKS8fiM0%|0 zXgm%CXr(B5U@~%Wa#ya*C-OANjCIB9k3O42<_S&;DpHn~zYgar`qFUwU=juDRV??! z;IMf(`-oEK1vEY^Ehe_Udmcn|392Py1c8W{b{&6CD9!! z5Pk%P4QgW2$lW}uv%RPb7+odaF{v7ah7|!L=jG+vJt)V**?Kg{wprbBW=<(3BkJYp z={KoY0c@`DMA{S#(NHO#5}3I2YA5A$&`nPJj&SR#~ejk_r$En;uG7v>$y~$krF(ejb$q5Rf}`$G>ihL8quD+WdnlROxV$raAoh?1NrP zrk7?tG<+p;x+^2(M9Zn@-E4KX@dW>Qhk{#=>dTwNM}}%h#64uISy8qVE8Q0sVzEpA_z`Ak?Z7J1uSHs znys#Mvv1{Ss+c6QV3JwC6CPZ5%j4Lm>y|Zs@MVW^7bCiFQWQD=rsylVOL}u`FXy*| zrT|(HP`6JFVx$y*Fhz0@&Mh^kEs~bo0cEzURWY?U#|!e16Az@h)}5~ZYKo=eNra-Lo-y5^2ShV>86l z0`VlCBPpps(^FW)zTJ7LZ2}W=Qv*+&xkjqHv_#dJgm@lYoci6oS?O9|x(KI)5d$Gj ze+TSR-xK+wA^Ao%szC)>v&no4v3Y4W*qukKm4=T;>o=ng5+pZJGE1K6B*vOQpJ&z9 z+U`nB25N;gu<_4-gI|acxPWnH1eo2JFd6sXF=@B5At3#PgtxOR|RomARs zCJEE-^Xat?I{2A9h>=G^x`K(_4_}bcp!qpQW=X{Kz6@)DZf!3n(`-vzz+@S_zw=em?xaJ zmuLd}&+SuG2%Fg^vF3~gE5{fNCYJAqRDr^^Ms1p4Qv7k6M~~? z4Awz10jJ)1+-RKf)hF_S0|I#tky(eU7xIwSQk-E(>n=_`O?X_zHT~Gm32+vXF6a%D zmWhs9L#4!)@%Hh`LR+=|ODE}{kjrdtni*BTTYj&(ZWs=^$po&uciKB)yBM`q(3GnY zLDH9mdZ5IB3b&$oR4N;v z^Rl8hzW&Is^OPp9D0l6Wyeb06JFAHq0K+8T?B@WCzVtW4^!wz(QS=N{uLNq3yd+VL zwun{YH@+kd$-R>p6k<3~TwCEX90n(mcH$&H$YdyhlSIMS?}jE??s*-k3DASFL2~;ZA*nxeQ_A8xh{$C6=o=nzKuaU8)5E zJx;5&;O&H?q1j_&?2+JIA!!8IVmjSIVj%Wi6Z8)Iim~a6dnes#pr%R*9b8CWaSlmR zg53#=&q3h>dpc!a^FIM3O`N5~e`EptlZpSo80r5Xsznz_tRk|i11Z*5f^Gx&@A5|# zzd|JuJ`Tuq6GjS`af1nm_H`Ce2?Ve~sz{xY$Zj?m6X0|M+j(eOTn$KZH1Hg{*ZJju zkMmwHhG6d|WIhvNmxu2GqIEyd(oy#HV?4??xY&H2>ttGBB?=HYnvDjpwD-(Yf+j1_ z;4BdH#4t#dh=f@nV{~rvzxD2!v3&roT8LAY8Eel4_o-0ZGVAQ($LU>iOX$R-a79n$ zwty_lhcR)-020gkDpa+egA5?bVEKL>bumYVv;ox3<8{JG{C2_+%+ z@HJb@4P=F6O|2;EJhWVP2}~E~9)koluE1i_9PBq3Pn6*LN+kPUmY+Dam6nz+FE6jJ zuj6)lHn=ZVl2xeH>hU`{-Pc@wI%hz9Mtn2={kpaQAOwJw+%v!xri{#FtYgUgu0Ndn zH|e|S>Z{R++o9-IISj*01%FXKdZ=AAv$r;DV&V`M7A0JU(++X*k*xNoB&w`)f3xAMrT3E?2Jvkzg>E5Y@x+!0Ihp(kJ!wdC?K+mtmm3<> zY|f{~=C9u?kYW#~nAWG)W4fyU3vyy%l5gffGfN+2d3o#T525@~w>eABY%G^^If9m> zTox015o@N2<4|i0YVB#=+Igy1sGyGmD?P)%7$$jY6?Ia1g9Ps{CSPSccNqY-P~B44 zz|-00EFBgtZ$TZtL2FYkXpgmtGICT(y`iJnF=ovhkAXMW>B{SEq!DnuAD_P#SGTf_ zc2%=l$|x#}%gP{NuhEckgjzGK?Q}ehQM6n+pF?NpbjiG2ET~Idh6odaIJf1^;|p;!SyILRhdfAo({i5!2I(1%1KCw51_C_!=V8HrbXAY zF3apbfJb>fNYRZB4~kwyWL-!WY8iw7ti8%I4cw*6%AtngNC|~X&v!#%Ao$S#KtIT# zMUqHDfXkfdFU=ifa9GWZO(11{dbV5{d=(;eyu9(Y(M>See$S>0;# z5&RH(?ECfPi;Rp6@ZhhDYqr#Y(yM{)^{c<)hS#DGge$eG^A7x0_d{VU{P#iRyk#x| zeg#dG-i`kibGrKTzhzDVV<~w#QU6Iad2o3E)7katdA56~6#ew|+-oZmAommeEp9&D z!v2k8=l=jErucskm_!YgnPyhOJ1*n7@K2r1Z{!!9N>GlJk7$1N>D3Oo$}58}TuDV> z|FQkwz@=p7hdRf>GhfsqPN7T&GFN}{ysJZM&7{t=McNnDQ|+)B@lInPQPrxkbTS>5 z3$Eu)4Q0Xsgi^`y1~FKyWDKJJz@DtE|HhumWsvY{QoPcWZb6_XX%XalIsb_)O)z$U zU%GcoUn#4hS$TS#6DXT68HB~<$m=s_@35&N<9(muq@8TNh>7oo<}~i6jW$tucN8G1 zA^12rfjz4ba;i*a%(Fcz{KH()WVKG2?dGg)kirvyTo6?^mg`Yq`8)ILg?)7`d(i5~ zEHX9b45TBt@2;}6A`MRlfq(u?%eLp{ zR8)Jd+!78R?8d#|ad~}dCCrCPpb0jeHvaW1jRq{mA_3--Ngtblq!#b-XZi`p+cxGi z8o)X!z~Qz!9yR5mEdQN19U?Zq@D!?WLuGZxOfWE-K@u@*U`=F73(QY>#9**aE}hNSj>># zlmRd-%}Z5`3bi)jY2fN^2sKFYH%=c8iU}k+ViwVd%OVWH@_>(Y@nbz!9Dn{qMj+sf zUU**0kb}wn8*URFcES}^Gt5{f=MJYeUvGsy#2oL=uCH-K+>!H~cgctiZSlu&gVa7& znH+jb9{h!B$;VK!d(-Xp@6r>51pd5H4wN^7Q|e6}7~JKb>W5of-lFj}KMl-MHq$t> z-E!y9%pugwU<@NaefJ;Jt% zdj7DrWfg}NQ9ru~{0aPBW7=%ZYP}6v7&cS-@?|c7yHN1YU4LD~rMHKpPua4fT1s+R zHPbe&0JF9-SQG*Xi60<1g?%Q}!Qq@~e7LI=4KD_R* zfEtU&D&)&xS%xAkHt-+m%9o4@jbXQ0q*Ue!_@8wamFKU2OGw;as{lVa!8li!+P3}D zYBr0qU}4^QHyZycm#1t`=>Arzad#0?11ZWcb(Jm9SEpyKLZ8Jo$@uZVQ%*b)Rn;19 za?TzF$a2$z1`?t$`?&)F{2awh4{w1MOnCp3Y+AsH^7wP{=>k@)hG~DoCh6c(RV`T% z>MZ%&yLcUo-NHra6d@!Lh08UNapL~-El)eYIP*X0rbga{^uQBKtfCfhaxlD*M71LZ zo9Assx0dKEj)_dJQkfDg@@A-f7I@G>F=1sKtWi0{Sp}pj3Bg3IkT9GZzpk*AOFh(T z5svd=aE0Z}CS zgbM+OyWHXKc=^)mXQ@J?%kKGj9xmY~g1b@oWjKkb!qbkd1$Y8S4`O6{$@Xd#L#AOijq_{&>dyY(0Q}GL&*K7u!OX7z}$)95e zm-qYX6Z<#TE4ihH?_q)jbpzL1n*|k9jKqE947l9ZN;%--Ll(zkf<{VGauo`dzRc!} z4`>Gr4YgSW5^*r8Ngd_qe0l~sf%{G|7NCXlarp)NQandtklRnIjDk{~r};kG$V-*p zZ_D=C$Q$`_j?Y!Sf6R`W77U&=yiL|ap-H@d+t$>qd}qNDKe`#A!+enCp(|WHJWVzV zjjh;uhyG1lLJlwwW7;RZL_+z&$>DDi6k&)$E? zCJxuXWRn>g)&`1{xJoR1Od$Wt#<|*;mPT=*s(8thIDXKxngUdtKYMWGoqD|o)Qz6VUn~dusexO z@n_lfJiKDAw|wfyrath5_gIe-N$4j!9UBV|0wJ^~AnwdiovpTRk)5%oRf;kSfy0dK zTgS(?JoO?YecO*b&ngJHYUM9mvUCPMVDz=uhXFz*vrt@U)b$$(&O`agAG9oA&$mrB zKX3Im*q5lE{l&o=NzUb2pz`_{wqRSn*3~$@=g{_GQi9@Knp?VBu~@6VG3%T^RI@jy zbay@h>{X|N0BGIS5w14#&bNYYBmtg1>op{*2-458-sQK6HerKM;6gys$g9UW zU+<<~52qD11bkvSU?I>9ZAKl}eb7MCckoZ|`s*$BEjSez)ZVsb6DFEXUg~$b>5p-d zpEn;Fa3Gvh+)rA1CB9#V-5iIb^;Za#J8)?(f))x?T!Izr$^zknnlwxf*)k;|C$aQA zh(1?-$u#BCG4<6J52DwAFXBh!N{avL%`aw%eWiPZbsbshmF(LNW@1Yo1PS+=b^1E; zUVLJ+_2|R%u_&+nWQ3NxVjX)-b+e~8EXmp-^$fH#>agyICBB3@WmGhuXA&=mnl1yC zFh>4k1@B$-L7y{0_`@ubtXXFAI!TAf{ADKw4!!bX^|Nd5TCL!AgQ-fNYZ&H=J6W)? zws#o3>JF4x?I>Z+EibQXIngSDJj3)kMVd4pZ>igD@VH2M;+*MSn3&q#n;PfKK`v-m zTys(Thv2Lr$*#)5$gqD%CyWQdgBh(7Xl?!ykdb>R=*6~L@3!YFW(bMEHIhh$n!J9I z==P48bsW?!%&rx^EgdEOw>2beDhRxm7c5P0=J<$pK!;d+!VFtxx8QrN6dChg$+-12 zar=klVqkHWV;xb}F4vFuCv;^2Ey_(@&$Ev1?Pd$5C7F@4ID8-Pv2&iR_yn&(mBHd( zE|sXl5*hP3DGo~!#rhJt~#bg&JF5*fAl&;kRcn$73y-M0{a2BwFZ z*m~J`)T#x+umVajNUKF$g}PHTGny4=gZ(kw?E3euQLs%(6_0uibflbZq7s2CXF>tP zHC9OIP_e%xevO8SBqp%1a7#)`_OE>a8fZXP-T@t~LzDaB30rU1+)`@p3*!~Zw7S`x zTuYmJ!J#wBm?^mUxsV`-Xr!c{z}D1a23BMNRSACwH&S-u|52miPyZf|5jZt5EHF zi@*MX_{Xd&f#Zv1%8dvxF4qHHh-)yog;Qe$zuG3Sd ze8e+nJ_PevY1z&WxU*%V;c#(uI9AH&94^5Ck!u*>rlh0_k3>~cgZOs1N$E>g$koJ( z{b|p;uhWXLW{RX(xT&AdKR3gzK#*~lPcVW!*NPwDzq&6ZKhh6>`He7|PWCll$3M#6 zmhz*s7xn5;>eZe!(7GKArgQU#ixq@MpyH~2zA8t2K4gwS)NgrY<;x=zg+jyS+~C!C z#WaT1WHIJT8i1l@g>)U}DAwbQaHLYMH=BMeVMvIdCSWElqeg#y>MKd3XNJI4m=x1# zNJgOZ4DNP?pyO5}!)YgKDv)+S@4{cf_&2FlA=|czW58zL<~QbaHG3YI z_PG!>x~8z#*9!eAnSPob!x zUTJxr<2HsRX!q^2J(>h_c_1B&0%THR0#T4+$;&Y$xNkm^_>=T&smBu-oqr^rRj7f?yG!Gf~TndJp}+ z>xICh(rIuTio`rytP0QzKl>|mvba57F+u-1tRIz9g(-_`5U%(EMlDr`;Q$FEA$xbU z{)w_tw$#rikATa0Y|C3@zoyUVE=7xe!*H@zGE2a4+<+l%AXaO}w=fnd{0Xh|?W!oe zU_M{ztZ{XL*VB)eOo>G%taL zQYaJjukh0x8Y}`+QA=Q+Volo9uKOJB59Yf#ddH)PKRXP=@Zrf%1 zWwqr}BT#s0C>R(jN|6|q;)nXto{Sq1~2(ORe(Y*t!7aXlYR2h95O{ZI~Y^-~` zN`F{QV5RXinEYz5Qbj?@0e&0_zJOJ*D?r(fGM`JYEHa!7;1hShC>FG6WOE;C-4kd5zt4;3_GK z;MauDHwuN!&u+fQGQHCuBG09usCdIlN;<5mblG}@Thrh2WNB|tukwhuT*Gl>zU&VM z{dbYI<-N7B{0W#mZ`0ljLFR6LWPAzNI(TtU(Eqq|gEQ=}#YxWC%p#RaoqjLn_I%nt z^8rlc&z&7!e}zOfbwc%+ z_Jh}qpSDO5R_`%4PgR#UI+N_}jWdelYLkWr{+NsfX4y$E3_?PNa;d-n&w{SXU1QwoQ?O)D0L_ zY`7QnTYa2BAD72kCA1k@=(ayA05GnJmhi?J9_Qy_@8nwBH~5V3621l_&pAUTgJiIW zu+H1j8IV~|wagWMhlAt$QbXjrY~2#8l_tl@78s-OY*q&OsLDZ5&+M{WCbQ?}3|Bpq z@>n?*x<+gPmfnwbLd)zUcC+SWM7{)*!P9_grnf>3AJ4aZHf#1i{NyB6?%Vmchcozr zAP5)CCCO!0t1+9TM-pIb@t)=?V+EP{4y(OnEfp0snbmqz+4qOh6g6@RioUHwX#e+Z zOJ!x$zGtZn7FgXp2b0glw8X^wTT0r!lCx^X#=$wX6>@Zx@~%^SeEgD+{f||rK|;}J zrmbGj)>8{-k6TBo&;_5btE*wSrHSs#BvqZ0z{{hfUvqxEWiF;or~Xl2o{`EX^i_tV z&d>qvur>Q}kn_;e_1xBWcIMA4`R?I8$s*{iy@Wb=$?0^TLmjd@Q=84+&)0~3E^M51 z0mlkLxv-EwUoWz!QVeZ4t#eK0Sn<}LU1e;_Ybs1{5mi*XF6Rzt?vnZUY)j+gt$uXL z`O_SKLs8+x^)VUi(imd*JkQoOB?^KVDI#grIX45Zy_sdB%OMd(Fi>V2QY{UVhw60D zM?Q35Vwo31V?DAv9@<@NT0K#ZQT=A3mdw5hb_(nb(Yd}4IRVSqm*d`T@DyDY9rj_Z zkgMYIGk5p|;_aCezE)2My`kN0X~1a8iIU$2l*d5AD<*OjHb+gej;S0!-p-;DeNA)B zae9e4@a~`jz_J3*OUufH+sixrK8~`O<4B%tY4D=!43aI0Nithp7>+ig8a3%EOg?&c z46%#oI*#+qn@rnx_mea`gOU90);vs>$ZG3c_Gq(Ja%E(s=;|mk`YbHYK!BY?D`!t7 z8|M7PTE|xYz+DD+?%kK&od*YU;70JHSepq=kvHRL+^pw$a|XlXX!N%=DFV}6H|XQ{ zmSWP+35ZDelZr%v^xLwTel=tdIj;}M?vs*ljkMaI#OW6m-eRGq;Ya8XyTn$(~&#(>Yp!CwsSq2d}iaUo$oOUdG~*Z zO-2$0U5)xiPC7i(R8*PP$UAoC%wxCBm)T{xv^RnBC&iSx^PZ$vk3|nl8Wc<8w}CJq z_`Mb}!S+$&%9+b=;eFn^c$OWsW(A<-;SJ z(${By%GdpzcKIbP1n*_8!1p#|JVcS z-r7_;f&E8x!3yU=dghK`2$KZj@WcQc{C8PBE!zp+pX9qPY#*c{{M0usIb+9QhPZ3oYfTK^y~KM*o-p9D=d`n5xc1Vc)D^zRSDcR6>UMN*-CG;6 zxJ5lO$mt{<`$7`uC+7BS=fv(1Nw0?9vx=xyB0E^N&FcqTHhEA50{TI?85<9K*&js< zi}uJRX02G_4K2$MQE1ac!&VFq%Q@L(G9@^d zb2Sp`m}Uj3Yy$q&!sFXGqP++XW=crqAHnOLy`P<;atB;fr`fMl(nz6~Fj(ELu{u0lANk;z!GaxRa-` zm@D(MI=5^HX|Ioo>T4U(#MKcQ{RVc;M(lV%h|dhTFq6qxc5Nix#^q_F9GyLJn6kPB zm@+IZY;(*DAfgS2Vs5JUu1qU<1^09I_xDY6ycf?7qE=Y&E&D#!&q#fuF{v3D7!ck~ z`-5Q%h9WIEb^!VDk^S_jhuW?iK~dPOHkH+-a8SJFGcV6Mcw0@@Yx^S!=$|h7bMh0a zwE?PgULTE18N)`lUGP<|FhG{lBZnzpeU+{0%hMG{)OKLSN{-6<_m7Fw7Pj5Ep=%eT zx;NknPaMctAiK22RNwuUT6bw^$MxN6M2E>UhxR#(QKpq?e1Pc`By-#Y>hs2X<}t`| z)uqX4M`$&=kq|dZ#=G(*GE^uBdy^%k(2i}Al+^Jw>KNAsclTEMU3Jj$jt3faMZmIX zoO-9%P*pbNf4F!)YECh_>lS)iG)J1lQjJaq;4`5do*Pv91&J_Z3G-~)_C|#^_9XA) z*(&#w(vwYWRCvM=;d))`uw`m~qh=>b7S{@9kBMc8(W;oMKc=a)vz`3y&!de6)3Jzc z(?x}su+KC^{NT{HEI)r@i?)HGYqZewaV_L02X~;_@ghu|vYf9;0&(;Dt!T7VPeNVU z9@(Xgj51g-v`I2@2xGB_s`ed~rA!*CqP`W^vkcPMr@Gka{2B-=xgu$Sq5Fsru5wwR zFIB$c*f`@?*O!=)9l)aRpv>`Y%@3|Rg-h=s;ZCLBas>lHMF`J^bA}qsOL7w~Z#py` zZO0WBUZ#2iG4tLMpNZjGWOSKZeEdbY*z_72K!BKFqEQm_1jJ9O;aSX`WjHym1xBAP z?Cy9tjEz9|B^$vUD{Hxkc~EZBQ5KjoQIit;1Lu(bG+>FxliS6{)P>MUf=>SA8x==J zdU#k59}F(i`RGw__}cu)@p(T1M9H@uMliE208&k=S<83=Od>Eh*VWWp?DXY7?RoF( zcIzx_I?)o$hYAQo!{_Q@I*jx*ELL|2LsUDufw2+ZkDlR8-;ZEf@9*bi@%{+m@o7in z@PJDrQv#BzaG5Po{0j*!rt^g(&KLKF7$AW_&&C#VDlfP2;&i6!&H28xR_312Z_+mE2>7Igm<7TXojcK*wmmxL&KR!wVZpMYFJ$V**Z;tzIbR$ ztKFPP4lC%xJUH6V?T}%YP9jEdGjdb@u!Mc(tzf{YWSL*iG}C@-_4PImWv+j`?E}8h z1}YGcdS$a|cuyphzxLb4(VX8UR||;@eQoRswMrBGRy+PRT_pM zY2?M0vyd$tf4v+sHvLa$v+iuV(;0FXpuxsNcs+I_=|v2@fa9auA53HKLXr(HGX7N| z23XUO3qGfvvdw6D@BPYa^)dTP;(0%PYyCT0N5jvL zGqUqlt-yta_|>JdsijeMa$AKh1~;jBz*Sx_82BFq>P#f*o6NrSBG`$;^L8Fy&ump! zQ4(J)$Y1TEE|XA$N_D|aoO%anV{cn?ZI{tDC@-Wy-?IRx=aPLr%8;@W=^U3)a~ZFf zy8_>?gCs+a42_%y7~{%=u9 z@qL6=QzTN&!xj;`Z^L-p)x0D_ofq0rEJCcbJ{?LDj*Y78xIAt{3v1bH8?m}|P9xqo zou2F{f-7Ag$G|6hp7(h4oyo)O3wnhO2tWjOd7xY-OGradTy_2y%c1hHQZ;2B1rDukR6Q{Wekd>f=Jfd@(mK9j8Dz^-g*?X^)p7IUkJSQtlkT+ht5Rm& zi5#n0c`zWUpmBdId)u2;)FV1BJ3H?s|L}OsQKKAmyWJIVY~_w#qKFp0Q@rvy?b#_f zU-Znu=J=_Rys_q*vZuj~yqqac2)r*Ki{Sx8jGH7HGas2buycOA$V~giSZRuGs3){T zgE6vd_zHh9;~V*B|8JPfQyPG|JR-PvuY1D3W5tTj-G46Qrfq>AIiaHCSIbxxa)_N6 z)FR)5s3R|50V<&0Q$HxmjG8yzYVbb_^e$QqUD+)%BAxC6^UM*xM>zzPPPTeokGBF2 zp(QPa_TxDr)JPc3h*DzONan*h5{uJ1Y9p7yY@I{;({A1pAH$Yyjo97sfc zCKm56sOnyD!`xYAbLnJti)=_oVP0u7osZFWzkhsm4>uQ$C3`SWXxg?u07mdfk>@lf z)pBSefK~Hb9XOIlH>Mc;Zf^tFDI|c4qFlmo+{Wyg7`YbUo`%C zy4L5-ZlPT=R}z)Yb(g+|#XQ4;Mq>5NgJ~~*w=a1;g(nxfUnz0THQ8!SJ6g5Ba-__+ z^=eU0$4~*w2_}vk^y1rCy#DM%M1bgo!E5NKAXrel3!@`nGkyWdWa<(bK7&qZZL(!7 zGe7DoCXdF{@E`3zzzs0K`&g+K9v^}k*}(jGzv?Vf12YIc@&6KEo%N&N)8~;#`eJ1z zmP)%f6IOZQ?7Nn$H;Sfj9x^Ag<8QYw?gZScIU#Dc9yL1!{r7#EscNa&#J#V{o+Af>op1X<@IIb-(;zh97$TLsO z2=&CAXlHR}62LzHG(C?0t0_Ev6e(c_0SRf8`IVwLggHSnOS673X1ujVtQ*OX zg-DO?IE9-Ni=P5>On6Rl`vhmoVNQ?Wo8k#A?KR}Q!#{|s4gN{)N+^=eR zKCH`wIFQ%ziwIPnoTi2aLZh$0k4~xF@P%U~PZ42Va2-s~Sh}YcYRPO^x~7Us#mLmn z4K$_URP)eC$-{nbi_3Uq!+_^ml@*3p?9!kCSx;&CU6p=`^z)NGKmj6badd(X3f+Zd zPVuD7n5R*5zaHMH%EB&x974l?p8OgmMMVA#vlOP}l6H|*DL(lRuA+^XnEkv`68edA z`t&rNctyosL*@F9)}KyeqlNYxT#obaV-GNR+BI1 zD`;WLi!sy8Hz!6<_f3MK5I=+^Ufm9g`+4a7+a{$+F3dC&e;yte*;?VzlOnn z=Kg5YKhPS|eVF6Q0M&ZJAnzAYKT8C@M~usCMfeU5VzNx5XEBvUG^O~nwEk|aaVD&F0m8k^`JAG5GXKm^n5P|wcVC2hh5Q=S4? zH}#h_qN7R~`k$rdE|Vp~zuIkd4nHb;_QZt)Bp$hHvKf4Ldr`lgxfAjKO>X%>d~w;t zx!?nrAeiZ_Z9`Dpl}zW+3jKm)BAkBzh^Cjsqaw0={l=j^D+{^(a$m#JoMTsH1la9Q zG$7xqGcMEyJhh4}!sZidM>w|oxirxC?(sKQYe$plX*2@6-$>VAtA_>?m~FHKKS_Y_ zy0^Rui^J1U6DbJzKtsPF{sac>2{0Y`rw8x;-$v4X;-nMB{`r~z^cuijWz_G8pfw2p z`*->Kwmr0vv6Uz2D?(vJ|8BEwBA|F?*}7cjf4fJ(A0{mD6k*ktD-yBAh~MBcnXbT_795!D)WyaoR)A^TI)NI!pE_%p zdzsj=6J>Yb1J?3h*}A|uil&R8f?CGKCGpj^ z3F}hBinyj*{^WWS{w*~Xm33`9q;VIl%^t8!9^F9FBAL{cN%(*hx*drx2g*a__dYW4!x*W}*rkg=RZ&&l{`2^Jq}-xAgkR}p%0D;``CgXDvV@mJbfKqOveK}LBLcvl_uYq| zqvWd@=_uo_YKJE@|MnqpRQB!g%dI)$UG6LqUk;f#95y63NA5A&92vLBGO_>?XP>jK z!Lx=~Anoe8-Nr|F^5@K-AGTah1q%-{b$sJVxkn5SHMWV;fT5#kjgMXZYeBS%<`{?6 zp^+oZX&VgbS7mTEX(frb0y}aFD(J$xJ zQ*$_fR|<}MpyyXoT#eJ+3+8jwjK?>Q!|YHP0P(zk3M6roj`}4$T!V9UXDV}X=LIlZ zeU_vUBdE_9Mq5k+zQNL*Ho;@#cVml6W^Io+%*_g?hY1SkBzT>Umo0X#G;y}WnLH2k zo2$Wp6w`PcwHmHi%dJ;auBLLb{(Ny|JbncIX{O!E;cZXx%0l^E4cY3!c{_5os?nV; zz8J5P*oqe|eyv4cX|190eV=hyxRCvDLffKc%bQAdj?$#BRBP2Z;lrCN9vwU4K9;G@ z>=|raI5nz(w;J`@uiSHp#AfrWZ+!Nn8#rO9xmjuG)C`NpEXvvdDq9du4OC)HwCg~PF8wt0W8`o~O~ zhR8RWV_bMAh{H`62*g$;$tIv%e4B{~sT-+y`$YGBBZSQ%JkRMY@o} zmYeoIW}0Hv)I-xp>xzl*rr`sb8iUc@NC~Jzs3ap&N4wzfgG)_qNLeM^kYuOgtr6x~ z>wyXds#wh9Wi}Qu5sfSC84@@OX99I`I%(z`Qb#J1+MX#d(4DYp;xx=<7XGNwc??06 zb;<0^;n2+iA;$u=)BMAGy(z?p(-cvW;&3)YcbojGHW$WxxZ~QxBaYSEXzLn-JA={X zm5rpAzxFyB7SfX|jJwSFjVl(LedKRD!(14njxK%z@ocL2$R83P&XT#zN2&3%-!Qn^ z6PDw2T%|Jz_MpOBcdjK&9@ZGHhfXTq+H(pddx3!oRxEd16)^Vh@(*!J?sGsiaHB>q z4nCA#*h6P~;DU{i`=p7+vCULFx~)2w{#ZayJ{lS9gk0sjUc9!cmWS?mwc$1f@{N6J zemW;_{`rh<>*wl-lb(1SkZT=)r8D+JuX^!Opj#hIRB{AxzRcBzbaFT-9IQK>lgv&& zNZ3#kj|T<;2?z)XfyGWv%$z?|S5nb>T(ztLV2QP>j?M{LpHV2|#|Kqiz`aPR=eO@7$Y*UMr1h2j^>)9&5yR%#o$oZk76l0Nhy8lgvF6GtCum3 zJ|KJtlkH}8KR#>2apr8UhFwyIZ8K7XJzM+}04B@ccJQ)riT8vzQq>S5H1K%3(y5t9 zELO%s{(x(*h=YLS2z2p!3Mi5h0!gGrGcyq*)yaqnp*zZrR*d$sC)0!&a(mNk8rl#b&7t~gdR<98f5hf*ius7?&;=16tGBdk7 zM=;trRB|!GX}tL*g8K{DSEF2RM0b{x{+A}$k*`MG$1&ynJ_F@2_M@G6fDvSPOw5>= zBut#eghQ6jZ+)`Cn9HDe3D8hJ)1T8LNeO6U>F`|7JyWO=txJ>M_cNlNh4;^`I2yk@ zTUwss;Xm*h2qEkx@b#*T{LK|p|)sI|LhZ^Fuz#5zLLnAIX--Snx#!?k}Q&F2Uqi)m^%m>EZZ#qTv-4-VU)}^-M$c{CR z#J@Q(o=0YCb)V^NCfFtte2K_Fhfgn3NMvnuFiC7|TI-@#`y`MXN3KNEo0BH=y}_n3 z8Fp0)%7PD-;OLcjWv9xnNI$A2`Mf?9e^ondoAqAU$T0>(Zy5a-z->9v1_ut*SqUG ztAUJnLTi;MSQe7FB{%mQtu-9;7>2J%Qy5TEM@}kVqEKHlJzSrUG`oR@xg53}yPFi< zqIt^(OFa{nfjq1;*j9;)a=W&1)}ZXxd7r7_}mF`cYdN#xqBSu7VhZRuag3>Qcfiahf5=0$OQ zT0Y}^Er|Hfe$oMuGz2o?-YQONYd#bw`7N89saJwQNQZ^ z^TS=LOmv3*m+uTcV|CXcN_GF-sgm`b<~lY zBDPZc)kk}?A3IocEl-@C@9WlD_Pf38V?qszvOBJPT2ijyG`C0pMuZ88NFdnQ0oKDy zkbw;JUnk|BCqQM3AGhIF*k>V0wWz3Q$4$*fMOpa*5cUsHS&}K#8@2j^CMLzi1G6eC zmJDgTd%MU;NUNuTrtC2p+U^HQ(@gl;oA$#v;^N}A-5-zWx<2$?1J~o7X=R;?Qx1^U z7P`Q8OLeM;7n=`{Td7`wcXGZ#3)?xb<7dg5kU4` z&pF@UAyn>LUhm=3+{6vcbtaE=7i`u|8pSPrJ?D~AeR_Y^Rzh_|_gM{{7A{ZoIc7$M zw9Km$=F*u>@jpBxd-Ffs8Xm^Mcc~^(zznRo{|aM;q@g!6T5DtdoD6@gLn!*H zg?g=>w_bB4gJ8Axx$>oV4b&uvHwH&Hv^Sm*Y=zOVa_S)AnYK17HgJhz>Sv#y6T2?8 zGf2Xzp2Jpiz)BEgudtO!m-)?J*IfQx*{n3U9W42_RdOeWyWZFltRA%99a*7W2-Hfe z+xv|FalRs>ul*4V!DeQ}rRp?_#(-N_RXz1#l2M9$X&@K|gDG#^grn)iQ^nzY&s8_o z_3Wy35scF~hE_&#GPKTs;TZgpPcoRW)p@&!$C7xOu zqWmWR_O!c8LFtd0SBJ@A&FlrB;dn6_EwPqV=l%F51ntb7P^S)~PI*h?uOc}gsFxI= z(8Ua*v2xUX(X=)ci8sduI2P+n?zj8?eg1;6l>f*Ac*ySbI7Hjm z`Ge_mvs|Uw?9+V1dHx7!Z}psDJethNSfylslY1;zt`J*%7yf144q_5y$9cNs`;q58 zCn`l_@_nD&S)nlcq}s^)jT!VeZa;Wb+qnfceAF>2gWj@n#itLgCE5DjUfXZQf-^U* zwN2WrswzmP<%4Ekoo)xh3Vn+Rye61b;Pdg5($!N3Yq#tVUMr*U{&m8ea7%Qi+gvtW zK(hkj-3GC6dy_#r<_GaCq&fC?T;zk^BRy@cvk1DQh3sd-X^Avd41FK>kd zqa5%0)+grvto7}*cgcav|A(=A46p1<^M_waDy~=+r;>_o+qPA)ZQHhO+p5?+LB$o@ z_W#tc`{}2rXS!!z<~q6d3D#MAuXV4_{jGkeRt}rgX!9Y~(PXaG1u#P2H{jBR&T;H- z6yzRMs%!e znbFm|-)sGle~ecq_l1n4kU|wIeg?zqj!#nUSSPm!V@Pzbl7RpHoHD=Ymiq8~yH~H3 z%WahDm&A&zOm`j_lEry)l~7)N2`-TWv$;;}D2XVOV;!zESGv88)J=0v>R(X z_qC5z<`YdXd#)tT2O|^Z(!JNb4x1DFJ>Z2z?xMS1@??)sel=HVG8Eq;sZUPx)P|5V zOIdUr4Gmg*2~o$yT(Dn;VBi}LJ}D1|2(*7*QFE=>YCQ4tGv4{2#tU&NO*o;epsEg1 znOw}VZ^m0QPW;8~vR-@u$Dw}PqTE4fUXoB?)!-ybQvN5q>g6|)_$%4i>+iw%dVWq}o%&EI#j^r_Dy=$pGi#nltH7Co7h36a}o~f*R$gQ*G9~_<%qw#BF-HK=kBr?BN(FJ^DoP<6oMtHy>C&p2Dio0 zZ{e(mUFfYV>sJ)qjsMh!mEZ|}^PIK=C+8M^4gYP?FQK=%d{T$0O7fsBMqobOMzsrg z(t&@J`>UuLU7q4e7lyF&5i^xS|GvyA#?};qy($D*6Vc1}Pn?Ytule?A9tJ&7$KO;n z6-P5&?K-IR$S!+R=aBq)BzV3g&=-{rlPVpOeqABXUd7m4%RJEMb{EYZaFu8qW_?&I znq2BR|8T|yIxilpOmc=QjpLOOe_XECrFl(Iktyb(FOAnwdh1A+Ka!Q_bL*hczOhiO zRWf}ZV@~~rgnyWkdIqnu1&S?eC#4G*5yWyu&|@dwnMM__#r4TC-_$HkD``-L4KCK>2xW)5UQ=RJRgoDpjC`KjZnUPLY(!k1 zeyCFx&huH_*9^UH$DQ8cOM1#e5*n1-_fu^2J37v#MoB&zq#Op6v9*R7YUF)M~XEuER0)p`>QXo7?4lt=`@>2v2uT z0RSw|cHMR#X;e7_z2dKryCt2T_u>=T*D3gOPB!m8S5In6YDzM)R?BN6TLfAf%KLL) zH#*TMEO5oK;x~GPec-MRsTK?W`nK`M`$Lb9%fm&w_uyFOvV)H2)!K1qlzaD>rcR2eEX?+_Z#1sw}Q_h?+*(6L)4KebSEF z*%A?rQwIg>5)toP@$qAy=wNX19=`Xf5r@KjuMpQY#C2qi_yKr-+8m=a{qFscWk7#K zu`OoX2IJy~`4xT-gHKgZ{Rj(t3hj1xP?+9{7gIP~@JL!$|v|_kU?M#IK=x=+3UZOl7jR*Gu-W1&E zU@oqbE{~xiQkUKYSWfkkCjAYt^4*0j8kky zL1Q@c*|!AB?nekXj5S}LD2yxM&u1x%E@~Cf;oGFuuC0Yv5=yW};j!30CABFpDlfEN zHQtYXjmymQyJ%A>13+>4*$cD`7-&ipL&mV-Msaj0PajK!!=|Dcl%*J(->6px; z|K<97cI{OnUmA|_h4INdvY`cJ{u4mfxR5@M`Yypw*-mnnYStoZ;lDlVwM zdpMt^v6&Gr(&pGb0rM+E33g~ZFF}pBaCkgjb#!#(OlPQD*8D^HfDphKm|yjDy~p+$ z;1Vnx%)fY>J@(R)*9?;4TMTV=i{SGJ#?t+KT*3COaC*(4?MizG*fiFyrr2$>>j8M0 z3qFJ>e0B^$WpQz_TD_zAscfoUCsnl-n3hpmo#TB^KCdv*=C-(QYVd-@_kGQe(Izwu zD%iv{sKEaUu`!=hWHIhLoK%Qo-qxkZL$l-T54#m}itd-M8)bITV016>wYV|&<48X2 zH~AUv!2>0^>lfBkN<@nBdb9Z?hVh=i%H6b6c3^atArc^Fx@>6;&hSns?|x-j(iT zl}8;ZPH*izCMh~$|Ctg`w$cG(U)|L9x(hr>li_CLr5@|%UK$rbB&As6(%*u zlq3=ZLs>bQ((1q~Uc{VN`+d5+KWIXJ+_Cc@x0h&0d&5+0AVPKmG)Aen zmzT5$XIq0`=8oEhHyF7_k;u_N$;NJ=^>tji4J<61zmer0urpo zs^dwVGZj~m@~P(iF{2>FU41Hz=fGp(p6c;gDX}X_#hwK2l-=t$}y`7u!;AH*F z!W8q3wXa*ZaPCam{ZE8-6lV3YNZ>xdwDZtag-d*pXL|_@`sKn(( z0>*ic;R?I^2R|HI@H}~V~KkRUL3bz*ekNJ}J zyKFINFrJipl?d={RP+K>pI@uJNDjIct5Scet;*#lh{7Y-~H?8spu&5O&u3^n-Os9mM9xtN@< zYF=+yq}szVat%F5WV$c6+AF6ayYK_>@ju@QAah%{r9-HH+>YxKMN1pfWiYvy0v`{y zFe%owa|MB0Le!MH#u9fk<0k2X9ZW3S%Ls;@QMwUlb_Zgeq#PmT7#FX02Ol0w@a&NB z$~oO}OIi-pDU@-zkFLkkNKL^j+sMk$j^b2g^_tVD71|W5DQ2`>&%F^oJ=;^f839{$ z7Kn_ta05XiEw?qQEJQ33X1Y#I?wHFj1>rAa<<(__9;J&@cwXC2WA5m< zmHPr_aGDJ~q?9i`vOKk9zSv0bP|WeEW8I^c*{%R-iyW^Rb^FH`Sp+>DqFrGr`z7`z zouEDXYt$23zjVrfUvMXmBj;T-^2tGrL(zM3Q9%oDT@m8t~^&no!8wv z$2mCGHQ$;F)taGAn)P1xAc zqWE5op3C-7#lWn!avQ$s=ylJVNoi5xA)&rY6y2pN&85bxQ%VMGV0w0bP{i~7Q3Nhc z$0>ua*!biLpS{qk##$)q#Z_K92L$Pg8;15uU< zqd#4#-PhNL;5&D2Y_`Zdyk1we-riTViyEa$VUi++9RzgDSdZhZxlP^Y=aYW(`C?9V zshC+6Jq0>?YOGFSqw$uUhF9uuU4#jckv03!@bIv7ohb58j{top(_uK{f@eBdgO`WI zx0j-iBn@In);{?a#W)x@T0|x{v0u5Es+(_pv1u`po%><*5xRyTm=kD^Zz&fL zB;o_$Q}5%vFW$2BCrhhL*1NHMZHrV(kA8^N^-7g@ay?r!=~w$3Q4tlS*bF7=TW|i8&iWke}oiwBk8Y}iyF&sE|czOqu43j^amg7 ziX&aI4sbzeUJ@Vgp2e_9@C|cHA$ErWItW_o2|Fq0ajd?#K@=3+L24`7hJE zNJQ;orlHX7O(rNQ=yV*#KHJLRF@6E487zXw6q7&7N!)N=mN+$<*k4OtxVdr{Gs4u@ zmJt!sV&?|nWM`>A-CsXc;A+fn*1mtvm;-K>{sc!R*xP0M$zuJ1%j5Z#R7P5Qa5FO# z=R)|u`zaFh|ItsK(h#q!e_sMdweBo!bb0L90Q}v!LhAG$pGQe^Tv=sO>GF%(!MedN0m|pq~7lR*|m%cUnaxJf#qc zl8+##bMb!S!+K67SMsCdpalSNANq4b*L?v@TPP2+u1De;exdRs)y$$glVuD9V7bpw zj#mj4Y%PlTEx}*y90ZYLPzgtFpv!<)4U@}MuBR6Ql2uz-8R=!c(CK3qATP|9`_)ZdVeK>DEsh zgFDK<*9!H}pil}`PkZBDi^NJB>T z(_Kt#B&|nZ5`Y$i!#?K;d9hbsu>jpSZ82LrjOX!1g7Q>^)q0H$818<<5$@dn6{-~7 z=imA%MXi&P2oUdsPDoVt7IcxXPzSwHG}e7mP{1w8z$?~$uWR@F?z8@o7(^A*OXIH` zpMn;mLvohuYgC*(y#EYF3|vU6d2wtT z?=+fu#4a!Vjy;?R(MMexN;h3IAx~5Wdy9K(pFqUKIsxUg{}#T^*~~$5-y(iF>O_zm zVq2zKb;1NcQ8J1YsD>7Og+^$sBv7NJw$p|a1;-peIN7#+HVvEf-jaNfTVnxrFs|wY z-VC)Yuc#Q&EbIpMGRPgai%uRP7ejGw7NHEVw zVRo#G*1eVKcVHngR*8LbTY>=8PHiM~juY;;h8FUL8HfL5B0$5Iq& zqCi>{+Zha_p*>4ONSFRO$LD!BXF&nnm_dSY7N-1z#8vnz*YlZf-2M>wDm`u4X_C5b zhY(Hmn(0IbiJieDtPI6vS3bBQ$MLo0xRvK0EJNr{zdh^i)aLcO7d>z=LfRp-l?a+sk&}yyOPZ~KY@Ej`0z0??>jSlR z*G37v+Bar^nx58rix-*!?&*<{uMD1IfT>m$dwYgz@D`VN=8RgcTS~gRvWk)Q*XK=j zD;C(<>FkynoH&h1`c8a_^nKx+7oIl$kbXNRj}ol}M$f}o5%TR;H-ZEHE$GOTswc?r z!diK~MAhPLMt2BrUMAK=u<*to4;|QZ1*pHw1OS04nX<9j7s6Jlxbg?(@ekI#pR>kx zyrbO&X0=luELX>F!BZl0F6h5N+9Ke1J%+6K02Q+9Ecj$`h`WtAJTf-E}yL&x3L1W-hR^ow!S*Jg5${h86mk}vKB zc>5u(NfANwetQ3dLm^)zWpLJuy>2(fQv5?zZ6*3P;>(hx5NO&NGkI7aVodL5;95GH z?B@|B-9`HeiX1ZkiLsgrFU*A*wEAtpg2`ShI=?=_7#I(hqZd{d!_Qt?)xg}tdsBA^{@VfN~e!Pfg$GV zV6QTC3FOuk^fXfa*9dre&aA9<{J^_0e9qjT%_DpyL1Q)2L%RT4n6H+1CZVrjvzN;zId?J(|G$%hN4U)ymkEu5dT-F{50*C&VLrP7dVrlv zUD>z0zob-c&A)|G(v+dhw-dbCX*^cXq7}U6E2b4KZeaLDB_*s@g-O5zd`-oMH{(8f zi(1l_hYL68t)PnKZRJdEoIZ!NVVhuWhc}0W9sDh;Nj=+eBo|I3Ob`RQV`_;3koJSx zI;s6L@zK$Wt)VJ(8(MrW6@_EZGx1r=H0b!#c-=Sck9?%XZwwTNj(M=b8|QZGLXBDh zaT?DP*qLxf5Go;I3taW-3Oez+jo!q7)_=xTjfoxrFwJVF$D#ei7wxCeDO(+ z_>h%in{>BS#7Dx>)a?M=y*(L;?yX$diIqFAgP@Y#4%YCy6jKoQW{;9^F>sXND?~ zc*B7agazpcS!|Ci49keE$@JEP7gc7l0#x>X;5 z0w0aH(c|~G7msMRw`t%$RUI^^XXABi92d8I{+qdoSzK@0H7faRp ziv?7*Tfr42>5eqZ^20c2?jG5*-UFv><|<>2{@A(Nnd%Pon@OXF#@g*aseV^X^Ukc; z2PA?3;|P5f`74rO8oqg9RR=gwW(fH-__Un&qQh-j^No!t7U%Rc1wG1 zXsAT?FrJ6zo0pWGPbVw^>(Wkg+A7V5Jy{(o7*qXGxNXqQ%bp^Q! z-g(?=Ab^RVhr4a{Fmk}|t>ZUivrpfzcWu$zU=$`;<~k%Wg^>Fb=mNYfeGFx-9c~d> z0ik-_6P6ToL&z~=Kl{2aPuhXAq7HJ&AB^?pyE@; zl&p~`km%M%LU|I=LWjop(C8P%^%bQDDiZ&$Gf}fp<#c`9;K!dREG63SrA-#e;F=TU z@?UB`mUMkO=8AJCR3&FaEF5bp$%Nh*=n7y-y!{fVgFO42DKRSw1%=Rg7=%VXkV_#I@ zS?jf9@E0gC6B$$KlkDWux3KQV?Fd_^i}jL+HH!lk6&3fUd8B^Pi5y=1I}%C6 zkY3kcZ!|s?ms-p?GbYUUX>#CSC6$cozeuVu+@3wckq8-QSkKGIiWcscj&D7NNAWM) z9uM|p!S!A*vv&)_4~M+^?$j|3ns~MQ}ff_kHD1of{QTIbrl~+RX+<^e5+^+LN)B#R7#SR2zvAi4T%y8I#{O(&tw|>?G;D3CkKiowIwoN;n z$*ZXDgh`IE>3U061q60H01HCQ&x%+Ms=8iH4|Ed^Zr83(=Xg%6UGIUTk-=WTV62{= zkB48aj>`(befJaU7Vp=q;SJY4V7|jjx7EvU{s6s~utP!V^!)@`p5@oZk}AP3+Sg0O zzo3|Jf-rP{>aNWuPihv+sKT+EU7>;eE2T22_*Y6L5Cx=E{1Nzwe7dj-OhY09U-N_X z^(AIgYD)tp(Rl`tA@UkA42_e{Rzt3n?<>TY)>>QTu`PD=tiKg3ezM@#!OMo@`kotw zpdzQs3i2S;NkZdEmSKCY2zF7*lf)rq9}+7asrfag>_9%%e{v^k)saox2U&&S4(h)#JHrs8|Dk|L+ z8y1D}6E_Tqp|o6(b8yV!4ZS@^{ilnPPi+vz={g^B{Nr%On2BE!s&#R=%+Ud&N_Z4x zYfTwxVC`_2%yvVaBG7SH0)b6h{XYf&fu?4Q|3@@M2$9QMbWTWGJbXbZg4!Ng|O1>`-bVwJgc6 zyY+J1YB!0)CaerJeW+Gig%_Aa5yIS`<1+H2oSp7%{SLT$eq9Hghd?3v-4OkC#7@(q zU6aj2#!zav{vo;R%#{<^dFRNHuzXtm48LwweL8c4^{w&j($?piyfjf354gcpeF>Fq z4Z}5n*c$0!H{7qJXFH*yf2sW`2E7~qXS4!5e0BT0E!9vi$w316drp^Up%g~dXwtL$WG;R= z0_awu|Bbmq%lOM&p*aCYSDlPrM|ulHFtfC3(z)G@H7&f^AIFZ?+-hlN8F&wa`S&8j zP8RoOeU6ga8HR#<<`(`(IHgtkPjIT-NX>I)Uh`wyyizC);a}`kPp$sVwe&gqO>a*- z|L2soL08S*QaP=2bF0!gW{`dotpZ%uuI75RxU8)$B(1sfrpeLLTm20`jeW%qVV)Ph zrE&<;G0$9Lu`EzoJ2TX#Y`-xAxUDu@fC2GmG^|*vq+ejq~?x~xop%}MQNab zgC~9%_BP2UviCOHx+$JK@Zu(|O;B?s-B)Av``~9@r-wq04lOx_JOFk<0ihGtfEMLK zoBVzptpD+&Rh0jfC~%FliFmj>CJxy(u9vaQ4Iyptd2Ty=y(tSo+oGUI+d%SS!R1f(Re|4SG;oA$}iKN%o zKf#ag!1|T=+cpVI&totbB8{{}*Ab!FYQJkjKeoKu!@>UEvf>n)>rZ|mM##(7Pl3#W ztd*JLj}_kO7gWF7OD3=v;wTj5et4=TK5_7p-a)E<#yyT=QI7JMLVWN*|l%VFu zgzioR5paEF>%DKlCzw#NNdu-|!5eK%7<8ng?I*7hXU|oeBYM~IjWw3*5WKgP{sU8G zasGv=a^_jGSu6jmy1H@ztz1yo5~`UWgk1JVb+^dau~FlWk>a!JXRo+SVYP&{PI4v& zKEV(Y{`us=$1~IBg9^3?{JBS%l7i~Am^KMhcNZ_qKpi$xlji;h1ZLsgH<5X$`4Bu38|ulr3=2RyG|c!-xsKc_FTi23>? zx#b`hL;%$k@JJb-uc0(PUy0;jUwH?Ade$c0Ez4OBU_tTQ|I9TwXj8e6Uh+D?k@SS} zo#WMqx5^4gs$5)UT4uq!c2^G#dD!6=Sa^ERhcSv)hGH0F;${9E(ZBT@!(=A2Y-|E#Ha+S+x@VpBW%t<_6mL0YpiunQ64 z999h@-Xc)Wl`SP}TWFuvfXO5B* z78MWv#9^6oTysH5;jmtD-M5>%#I-F>exWgTvy;R6j|J;0vHc+NOS13U&A6ao7NTRM~`! zT3mlnY4efT%4Ci3ZU*3R#EEb-t}#P*{4Yzz=}DW01WMAAp662bDDWFJaFa*N=c!WX`uo`%1Nq9% zHX&cV@j)W9mu9$EMdnX}Qvt)t=!WbYE0U+zV@lQPTX8I4fjHpAdF5ZE6mBxOt>5O; z8KmCzEayQqtjht!C>q4_ik|AjSgHqiP1a>}5YAN5f9J!9q(*WK;%v=ftD zVeNBV((u^T3+**jX|WcrH#oK+0qO2GcLNVvdg57;`I)T9*3PPlWBC{?!#+nP)VKBX zZ*TYjer$xA9lI#5)_&x0XznU5s!+6kBu>6+c)OI;q-Xe!b{kqlWl*raV-515$w;AY zq8tAam+xo$ON7^xW{$fYaTovX{~0mDS~44VqI2zRtzg1*9LZCL3!PC-j0mD;;m$RK zn_81YU&2RjFMpZ}vTNQD*^x@+O^kncZ!B0U9x zQ^2NeNyT5Y&%gVC&zOULw`j(~#TEMxb3mxC%^z8beeS6IAKy|#+ShPWrJw-pud3|7 zn|3bYfd0~}KdTy8NBg%fG?cFb8VIn=9UTe&;|$jQ4XC+nS^>)c@hyR&hw;psQPJ@K z5r+O`{VcY43+K{K@gLu^iUc_PD`sc?{@pJ~@Mp$-4E9}QO(_EY^|zq^uAJf&f-JP8 zYUrXa%T!QGOMP1ZVoWyfMe_Zb$0SZ2wGn;5si*{~GHV75gY1ID831jqAU;>hzY2-; z7!tIk4uF6&evDqL&{lYnp+v;aEETqe$aXtpq#(1EYcOP7stc(_Lg$tN;aaTxgw1UV zEs?NA#m=>^Kn_}}+ifn^Qe7XuYHlQT_qTE4(>^b3_qj3Fv;zeQ&#~0icxOnn#;R@8 z1S@r^?L3F*W&A~6l?x-a`N%$6!GIQr?5JoRM%yk?7S>>N3$jYA46OqOYyL_vF0)av zFJaZJM;<{lt8gtT-AJ~?{P!9S{U%o?U^Mb9FsKst5@~AGI zpT^i3x*X-j7>{%1 z1cimvEWP}j2eC-{nsLz-*^*#2GJ01^=f9Osuo?C?eKYJ6HO?QcA7a);+}{bCRj1F_ z^QrhHn#9WZRaos zlx0pLLLw%&GSN-8^EV`IGV|^_{{)@Z)nNrwvcb%W=bs4Si+*K}MOPNE;LVJH$^Dj< z3FJ1M!rqX}Xb0H0$YBJj5s>Dn5QnT=G}Kh4ej+ zF8%(IKsNEBM8rSOT%o=Q(2rW|W_6WMKLZe6accg(V(K@X1a6p0XEGs)pRF`ej;2?Q z)JObATu2UNmxw-{a(_@SR=Z09mSzo2_(6fltxWJSpeZ8Z#07W4<2q^%X6PR-sWm*8 z^z2i&#|;K?4y^%Su9|2(2P{OVH#r#)Lw>`zYNi)3wfa z!rXJ!0FU{bRK^D+hlnNDv#gkI?R#!mx}}&*#0jIppH1St0mPl zvF2&;3gqzfP?6j>Oi|aZ~c<1W4`Ql;&<^W@H0LwXP zpw3yp;StAvovIKMa{uXFczB}q8C|gz{;p85Y0}0{_w;j^^Xos8C7AFD3E=S)ysoWt zVMY|9KL0)YD2?#k)w#b`2?U^7DZT{2j0xQ$Rc$wA4Gk=NgyalmWo3_tbL}3zw>cw8 zR?AaWbXIvp%T0!b~*|nN=~P8K+%dHs@3TDD3#V5s3HY5`KXx} zb$JwYWIFda>|N#^$h5sXxuBzy!eg_Rq5N-4kQ;rEWZ&u? zl|9$A1FpYc=oj_C3w_1W@+v2nhbXEla@L(a_)O@ZB+UzY^nhnXf8@X&Hti5`xC_sR+hRomoC1%x;nj!=IpggFELt7%75VKsFNx1v>^lHicz+d)?F;xRG#t_Ffn1S$jCRV%OV9t$-=1=<& zFop=nfUJ(p z?@PPYk8srSII|U0-Mz|_=RPuLJV-~CD2mzGHnH|N(ks-A1=0A(_QAFz9;dUTA^|?d zs5n{`%bPhpgfyAJ)!Lc%0=Mj=`dM{wd~5S^*9BBK786U_^B6XBAtT#?W?+_&nYE## zKO0_#Jh9M`*QzIn*=DjGQkhnmnIkTmt783mp%wm5oj3qq(9t$lDL+s}@=A zJEe?!E2Z`{Z|T07O8Z-OdnDh%mR3HC2bhh7%DI#+q(+cSUq4sPT6l9s?{ajJC>7W5vL9#U&IYjwKPGch&VY<3eLhYV#!taf=k%od0S z`1@lqo6qMvMNp1!ur=}8e^3H+`2!w~7b-;0qdKo5UyJ)-awY3|-yecPH;ARknKfBB znoJ6su@sN2N@g({gFqIgns(xUIg?vRkcQACmv4(E=P zi$UbUbkCaoz*Ni;?*D=v>`5i_E!mCdTboQQ185lg$4b{xUnz5jiVv~|dN7iF}kuaof*S~kien?|+Z^c+G2U?4) z+@I_tBPP=*gK}oKFPyy$JTa1HQbO)ZysuQo#|Xyok9xmSBYn#z6=|$i933oRo0S&f zQ|EfJO?oSd7og0;-m_#9STg5R7r>x-if_?K-N84r^M0<*S;|v(NdXbe1Wh*-<1a0& z3}Sp;;mT#mom=Fmv5%+WrWGHH>()XIQ}o>aMWg5E?qU#}%;_d2DLr_kSZ2+={5wj} z*BbrR{}^UzjF9ykH}fEEtC#?L`D{v}t$bPda;I5nM!~5<;u(nvQ~5dCAwhijU{J)U zeP{@KVvG-@r@SvHYOXIgj46e^g1MeF&S*EnZ55IR(Y~XUpNFN{`a}$M0B3-j7!Yd>{Oi zFycA4Wj7*`$e#nmlF6QrTOq{Vd!n4kBhx6o3+;~B3fv!b zef7FdR3BEG-yqjoR|jB)`A-gtkL&Zlc$*>~2atgg!otTx7v%MCPOdACr%XYJzV+zZ zB&Vy}=JW(KkfT*)(|>A5MyJn@uer2!QLPv;zcvAhc%m9pDD9{lyigi6lV6rYxmpFdlbT3p4kh(?vQ9ImK@6T zF5#ZOG)H<6W#+63Nc01fA=1GT3^q4NrJL(}#v?5lqc4*FA~P#+7TIAPp^Tj}Ru`w8 z`TZu-lFN2U0sdfMubG7cfp<<{RkbQ)eGMJ^U1+eHt}_DHQ=&Z6VPGBf8ble9g3}8J65*b8@XL#8*Vku_%LH1 z#i@tXt>F3C!#ksTs?gnzbeK;T&F1&>F9lnr8?GNg25ZXZfGvW*R!Qk!3S!C?;H(cCmz1wu9K>B4jv)e$05e`d2Ks zZ?Rb%FWk*tjIuS-HkT6A$Xx`6@R>|y$x^F$>z%A&6rz~zvvrH$=W98Aee?s4;!ogX zVtkBo2?|~Q^Uz9}nIEICz`WHm>kxPxg^*25U~#n|hDSqA?$6<>JPaA7Uy)H!cehub z+hL@1gfrWN5mo)^*_PlC~>wK_;E8%p#`xi$(I! zx}2EOT7c!uud#QX(y3U5RKo8zYdxkcl@4ciZqIcZ8@*1p>oyZHszdNS&hXMQvq!h3 zEPIDLAuJ!q$~-g)5!Va~&DH$lYq_z-hi(!LtlH5WZSBAVz@KE*r{>da!HNf%01i>S zHcWZVmXjQJvkdd%M?Mk-P15oaka>nef-n&gb2K=zkt^suXC~iB7tI07hL@8`p3Is) zFiox#23Z&dIYF6(N48qW)kJmUKC+@0c3zxIw+klNk)^v4WHBN(?Y0tz#REgt@9bo(%Q&z~6#-`Kt0S8Fps}#m z)A&!;_l9|n1xLZ1oZ#R*bh}8?zQyc>pJY`c1tPe)^8G%_ZbqT12eTx6wu90}SksYH zg_NRxTutl{ov6TEvJSP4rD1U$Aia-_5AsSE*!Disq@5&trJ|$I!Y~FEC{?&cIBP@h za|Y#KE}f<&q$LHI)-#%~V@T?HUa!v*l?5G!DYRF`q6U z_HX}o|5oC3!pSa*A!mwLKSWaPaJkz4Fj6)NnbeHXFB;{Pz{o9C_88Q(!C6&QvGj%W zL{+p9NBjA1O=S|hWSzdK_-AGHH6AqNwqGFaa4$8A(B+9|?n&^_C8ovT*NtE-=*@lVA+u}1NL zd^06Zq_|&e^AH;KRIXf#z{&7Py&ImF{C;+{0>Q6L8zKBs` zt#cJb)=t)`h23%*E^pDu{)Iy-B{DZ>lSqbR zV_v_d(UHr+3Cq&99eI3a3=VhQ#|iBYk+DhLB2LAJ*0d>mQ0?7M`~rY0uzg7#fmYa22s{yj z)>sS2*TV|1!wdb}(&mNNO!`BvZHs-Dd=|vT(azY?fgcf0eT=#802YwDjNDk-P~L{) zn};aO*=l=+zP)trtLaQ-?^rprs``+_wPN=QTVBt|FcjKp4SN>B-QzV_;-Y@evpwCZ zQ5lTV+O(9e%Zll_=ntYboS>EzGK$a*%(~l*CkUeQc9SE+D3uP$=@B8bR2>52&YPp` z{<8DzzG+F+5q+e}wYTzut)3_O z{GkRfGmOaie%cHaM1bYo2x}{_8bJ=pTa)f(SZcg7xX?6g5Cbh5l0R^>x>Uu{{G=M^ zR%d@^ULe8~=rf8{T^@Eier|0I&^eBb7;S*Q&fn`usp9nhCK|{LY`=MJSWCHl5b@VE ze5&^6*_fd%QCvbHJ=3Jv=uZw>SQrYa27JquJgf7mnx|HWGxbJ=N)2#F3fVNddaA+c zM5R$pw6I<@2Jaa4#D!TT%soB1n=S6~=^!VCTKoQY0K)mNIxrB$30~ld{c#15Brv%v z6ns{z6~i>sEWV0+U_Oum3}`{}g2wdEMs8>_JH*Lg0qZTk+ze(TCST1|#fFBFd@_Tb za5q8@P~0DzdLyV)15B;0{uSWC5VKEOir?LB%W6fb$aEC8y zOcq2~jVSRpYhgE(-df}bB&{Q5?ruvR zR__OvY4Rx0clbxu)IDK09i1l-^VTcW+A!paau4u5BN|`&lH~*qK8$*-c^&zgtbfnl zy{nuTOQ|Xb!vz{pclh}h@1CuC{_LmqTr{7-en9Xkx!Ii#(rhJj-h6-g;M#MAl)ev) zIHj};)HgCM_N&V3_IkKt6=})7d3=-9wNM?G&sBav;H{oqbUP5!`Pw>KUz09A(zaYG z5?(xDujefvBbtqO_j;N~>$&VWf%9!$H8IxCb;%~y1@$R?^imm9zCRr}G}9<`+^sMB zT=2D@rvg53by*RSa@elMbv+y$b)Gm_ouo1!aOCSjrs9)oy|;;n(MTO&eB=tu+xT_pUwmp?BEtijPIt(<0E}nWk)ApK z4mMUd00Mr*udLRKWBTJO-{Y;ac-r%BCLLG^*$P{P*i0)=o8IO(I|&R+#q6`C%3@yk zcL8CUNtJ~GKR!gQH(1z;&0M@bqBgg(br!EHnZ7uHXh2I)%gtC);_)Sx=N77J4j;j6 zzT3?ZbtVjPN=Q6I$#nFujtkGM+Adm zV^(tRh3H*M@l~xO-sWInKGcXb=F}Qc+@!8!fvI}Eb6Uw1D|#LRcQW7TEgh3j zCaY*(^6Y2Qk@M=9&`D|k5un{~C-+66d&WFSaaWf#JR0v8iBR+Ijy83pjK6Ka7{94h z&1qhRBYD9$#Pw8T zN9pCqyUbK>QEtx`CNluW2TWrdSSwID-GA0khdXiOdYJ(5!;jJwR~GIzvhG`khqB@A z9_Tb#Xj{GanFA8q{%mW>@q;(oH))NjGLp(>4}erCSDhLvVL@cMtCF8xIZ%4#5cohv4q+?jGFTlH~~xeyeGdkuF+rxdw!c&Z&%z2 zmT)5;jpQfCngHZu!ZPFaj{vtFb0#kpo#5kD+M4PX(7~&gI^Nt5i!}{pqA^^l4gUG> z-?*#c6?vp}=QRg`G(R^VeptNZeA0>`v!aRCDz7LH+^y`DSgy2I|G8hXwbY%vYrY|F z`uwAA#$`ZeO=qT0R-^g~Vb-8GGCW-Bp_z0$_?EW_!IpuCxs0a7PD?t8BeQN5YuN3< zf=0+YIodMP4C@3D`HkRSnjm-Ydiz7lyVM~0=+ThZIvRB-Thffe)9XSSNK zu(PfZ|GZxdEysCaKfX}s!{BDI-Q3LVG4i-~k&TI7S6MA_2r5k&90m4LMc3e{zBP0= z*!c1)$db-5G%RIvlPc|liqqEmdC-+Gnj`GY7F`hwcM{bL&V@p*=sW(-B8|&g!OCg5 zC`sY)r$&_|?-Z1-PR(9Uy`fBr%#H~18@OV%pILx4nixFHx<6g&acFHwVww_3x(9Ru za%$gGGv>2!2~ZRSegp*Qf4Y<|1A{6&J2IC-&}#wK-3Nk~7~~XjITk z`nbUVb>KvAqWhQ~P@~_Q2VL@XIZmQvb9lo9Tp9Bh^qr=@NiH_ZVH3p*lo$ZC{gzEH2+Jp8P1$tQkx0gD(-aQ$GHTC~I` zsKbAnCfGxpyX|~9*2Dj?zF$4Gni}HXV@F8hLS3GEe+)}Cn@kq>F2|8th7o5OI|3@R z?zRrvrOg%OLT+C}-&u9Qf`wQ(&M}(Pke2d+gd^@N;qkmX#wp`N<}O6LeD6`ZW|XVs zdY|uMTglKOIvN^rciNYV5tbrdS?AG#XdO6VRCMiv%n*q{EN+wcrsU)tNM0Pw4Q1+u z&TpH-6F=5Nb+vWzelqAL=ton%m-xcI2ZnA*@I^;%^io`EmS^z`UJ!Z$hbJ$%43sFCvt~m3qh0&KwH+w+rAN(y~%v}Y>^j? z2}Uks62hJl$pv&<-OK%>bp8A!uP1D2|GT6_taQ_9mv+7JakFhhTw?tF-oc}M7ZAQ^ z?y8hXl&N$nXnZgLL+ zOEWfRgE*E!VNjo$o&D5Ppi?4~^$PPN(+~l*B6>i*l8blnfB~E=FILKzLecY%0jh-T zkss8{UnqAwCuD&JSmjF_OeUDx{gjBT5ltKIetvn!JgB+Zue+P4I_@`vTfzEZeLvqQ zn&x~Z@&pUW4%!`W;&?hW-;aw^c&^Nr0qy-8j&4q_gRB9^~(^+Y-gFKsm(PeA!Sz1#y?d&Ft-Os+O2j z<_gO{3!FJ>A;H9QTw^wBefSUtQ@LWu{G{)Gdqa_G1uPa5vcu^i`Xy^hXVs3Aj|H~wwXl#)GA8S-{Hm#W4}fq2 zO7B`?mE%!$SwiLPq+jJpPVsDZ&2WFZ$YQ1W@eXeP=K;`w=N+v?1G_513`2GgK7)RH zK)`vze5Rpi8X}QM2IG((_^T6tGf<}DK$*Kf!w*B1hgvXX`kRp)x{}*;j>S>$r}Ok* zqP93ijlbq{SeXX2G`qN%50x5+h?lR?OBd69_-(m6YqxXvQ@QP7HZKGO6~5jF8l;DHE!Rbx zWZJ46`%1sp3{?j>N=KOtQ87goQKog6vvPQQ;w~? zIXgMY!zr2!xOKjz3vrX#y=!Ii%6aqCZRJ5DU{qiqEh05`o&<`?w1oDZNDEwn4qYW^ zp+H~TZ{MXN_!Ph_46-!}2Sq0PmyqtBDc{7YGbOhp?9_qh z=#}DXVOE`;_JVhXy|p0G9U`l%tqsYzf4{6%b>+hToE7vT%3(Ry?rh`p@hEDmm;TTh z37zAE{iTA&?Z)Ps2ASNdD3CvQta$z@<$SkZf&nv9w>VhzcAUDAk^{s~t~IDGv{yr1*&^CweCCsP>rzvVgKq?8)`4rn7n*!C&+9^UbNNv=^MSMWdW; z?Qa0j=Y2;wKc;PFN=+p8k>ooo8T!_`NjYSh>E(PCf8mQGRVuq(mw#ql! zaG`^+u)MBAi<2Y5VtZB1@RvWjrPG|`(N&82ep+M=cR$;S=~X6NP&3%gH<-$VE1-^q zO=gHO9m5%(YfzI-L7qh-J7~o$f)&6v&6ZzN6#4C{~`D zTjJH(?QhJmkC|rv<~Uu>5b}y zn-I$sJZqKfUgEiuC&6n&ab;*ZToqKtLqC)c3Cnr`*PNumiMVq>_+qjA#{gIw9iqavQ#j;R; znSo&om+63DGUMUs{&+Ic_!;!5TB4QDs^!Ff31mJOhGnJ?Q4LT%qES>} zzDuAg-sw7SdFDCHF-xrdw2Q=?d%J-wTcTd^mO)zTVD88Ea&Hq&at+^-tYd;%h*|Uf zQNwg$liPe7zdEX}Gi^rYNc3XhhIYm|1yM zI0MN=;}+ffhS?8p2W=VN8xi=$}e*I@*p%DAH+GK3JIfLq8#QzB96L_FVX^M?-e#JDc)f-1z@GS=l4I}zEH>IY_np#|(n63q)3pH1I~c3e$-*pi##r`K zQCW)Z#%zKj#bf@Cp6z|AO=w*ptYgEo9pO{}(af5o6N4Y_MhyRlmQUS?4LJT3@`pvfN0`N;W@u&!*$K^>DnKNiVTM7h9 zSE;I7q0+MU!ik*JW0XELP8S~dnFP4zI{9NVvtqWCS&AtI~6X<=M89^nUY#duoXJFm6tA2O+wK*Q!do|)qCkhzkEt-FZ5gv3a+ zqvK)z*t4a?L=2|Dp4DC2BUNQm7#rMXDtmOSUZSkM^>sfQ1Es75jxd#C9Vg9mhb|WsW*z?-VFH*|prw>pVo6Xe|~iBY3zL#Xs$r zx|9-1Dn#41-@Ki7Ja+aDgU3k;BlcIg9bQ~WcH$XR86Lw3nWgJ1ZG#3Enkff_u^E&lyEZDQcuD9r6xBU z$5J#BBlB{A!@NSaLOU$m#eRIWx4yX<@x-J^pBrQ=9LwKPH5=>DtaIt(MyDz{K+z72 zSj%_xB}3mg%1Pmm<%ex&?SG>u?OwyO6vXLxn4aX$&w_e;dn?HSnN#_7(N>FTp(eLp zl=g1j&YvS3*ECpmxV3drxj@PJJt;7PVzjm~`5AY@n4?eKmHI1HU6w>`cUAES;e!w8 zlHbJf*i;E7$+gm}`blNL?4jKqeDdIYr_f$sM8n>%P5$`TuR;gg!H>`FSR@}Hq@UZT z^A4D|1Jl$Ya%($LMExCdT3?BBi#*lun0B&9WoI0+HOG&<+ll}b7he}r?e^Cg`#Wag zC8_)MPJP%WAJP~CyB@C7RG%aeMUK|VoGX5 zN{Y61Fp6Fbm3tH`yljxzn7tzcOQ5x{4ja;0$Zm(%Aj;Z`w7tBrEsmTX#n?w7Yq_F2 zOO!>!q*;A0Xh$4Sn37eyKn1i5166QYkWNdA|MaP4Y9UY01y$h%-x` z?bJ9jW!ki(wkg|TletOusJp~Swr%Yk$|)@){YgyZA(IBA)5)6wEpbfdSe#Ht>$lIZ z#8C`yh`89=93!^1hSC%S)rXys+?*cb=BL|ZzoRi%E6n4^CKXl&lr^Cj#p!5S7Rs?Q z>fQ!A378>=k9R+O|6nB-+fA;)gQWV{j`iK=eJo2fv4Z6}y2`-#Aoj~i3&npqX`3k1 zMNmDV_b;Y)1|e+TP*$7=<~dG+)nUOnHP&@wf(^3O$<<7CBbAsn)gZf#2U#O~Y`&J? zyo3}`K~I&iS0z(wp`&IVopeCDwZ4)eoqw}>Q(^YKx4z`-uZ2Jq7Z zjgJVb`NdOxK|p>#@;`l?jegaUTJ8HV z_J{xGEv;pwBme20|2n3%8=MWPJ_3i{__`Y0F>4mzUTXs-W|Xrpsix{+Y0&chT#0>w ze=lA9J9ZA6Ci<-;t|Q|>mP$+CVP?&3ZDPy|!t}FEw^ef8wE5|Uti~!4w_J)pd{({{ z=$j(n9dKw^M1wdg_Ms+L-!amshfyc{aBOAVZ?#{m&kF5NO(Z}Pw;)nS5Z2lhk%F<2 z(vWXQ(N}IchfLZpkRdvQYxGm2-0dkW}huYBIHc!DDC@_TaGA!HIZ9;&fHi!);%QbEKle!_V;49_Sv*Q3HlRWQPf2J z>mTy^(V^c^!eBrUecOCH0=H!NS{z@(Yxc*au0DBw-Wy_z?l{pkedE#(d)mx z{;zk=ro9{!m%&8CKMnYwu@G#r9Z%j3CksGLlSNNKfXopA{Dg+?E!5a#pNO!gC#@*_ ztr`Fp>55toK-ttQjJr}Ifx2oz&yVzkgzMW=ysG&Mf%-$=G*31E_}%&30+`c*IuZcN z4|}m#=TPBL-r#sN{?kx&pfjFEGmXpbBOCc_M@PrUuh4&Z2rz>g7_o9{UCt`0A58Bo zTn}!3mbTp8jrHKsOu*S~{*CVXtw;C*QOKj_lWU+{Y**U8fB$}Wxepl#uK#rio3){~ zwz8?kN=j)E=(T>Le1CPQS!ueqx*-RY>9v?C!Qpgvb$53LB5OtZY9Z1eZnt}V-$2Tw z^8m|ss8XyE2p|#+Z+QQ>VAWI^nPpo*jns%{W^@{@i(bR1SCyh8xUT7s%LfU%ZgcDR zd@GP>G#UZ=frQs08l<56p_=TTNEKdVlTWfqA2;^{bv2uX*8$&E8hGh;ul`E0a^jza zycoumdps!7cv~iQ+ndY#f}F&*?r6rzHx3MO!qaqC5SB-MG|LTE%JXW@MBZDG8|+ti zgrL;K(h!c}`^T$&8^)EikX@T$*Yn5g6;+e6%j=1;A2lMDRhLbAPk%U?wya=S1;uVl zOQti`OP}GeM^7GhLu5^ijEZ$Cuqv!>EhO(F55HotSgP^qy?Tq!;c}!^TwhiFP-g_N zhY}-&=PPaeg+_R|fvTSKfSazG+w#%*U{-=Zj(#jxcL%o0Az}EOCxzASOz7J(qH)?E z&ZY}>#^ro6GMS8!l;RS%)5gVom3blH3SGdRz?~rQ5wf#m9`8>VcX(9f4oxPKj=Ns& zO-ZIgdf62yTHZB@9AYWzB!0ZT?Q>X8HJspV9|RDlzmuiaNQg=u%?kI+y?8HJC^^DJ zW{c@cDgO3_1x^Zfah#FI&Jg6pa+~Mo6Opy)bG^5o;gONmX?dBI@+s^NThr&~vxZS# zr(Nh1xtB`1bXOT3cI@`ekHfrWHSYC^-<&(n`5Q z?dGdVgVeTKH*|NezZ5$BetvdlK3z|#;t}R1D4Pug)jX~a>$+~)Urdp|)4G(OTjLTl zSgu8*zkf3+s;!;!--;uvq3&Krf zwOKBv>t+9gK*%wGSK>}L(rnf{?HfGlf^Ijvyqm~rudy_=GcJ+ty66* z0ymi|bQVfp#YwhhH+Mf@J{IBL3Ydg^cFqbY<}9;(E9HuEFqs^Fg0DI zdks{oPiME-ip^JiA$_uK0VSXiQE?zjAz;y5@7)pz7n?MIib{7>>!tUcOWvg)TMz$m z>Q~cX7yZj}IeMcm<#-GH+VuF==XY6I?VX*w-?=yGq*`?kV!~lDB{G?f{|*6#V9@OV zg`6=zTJ{dj7*efFLl^qEz+X+gK(0|IRiKT|A=Y88`YSX01!^CDj0YD zRjul`YD<@2TM_Y0$S>cnUw{wRNL&42rnoeQ=8hke-DO1>*!jSlVqzWjZ3>H)J0peTTuL@P`t66?nX^l2R|7KX>3JCmsIe9ENT zF1C00z2>%>V4LU%wZ)cpfJ?AS3jr{o=1YBodArfz$<1Z z*g(y+jRX>iO-Y^2crp`qBGoe|6!zLz8!u!kfCsqxc0qjDi803m8(DkvpnN$+WICOy zq2b#&EHS!1>nTJ;FC#Wene6MyX(rVv>Pf00yU+u0e!34%S);_6)ufb#XTrZF7YlXO zfh++OL2hvqrBu{>j?l%Latd{Gyd!^G|}@8Uc29}*j|pxuZtW&U7czk*<*1ngfCgm)Ext?%EwYEjHbe+awJmg7(J z%#a7>$?UF|Nb@)A@Ox&fBEXU_i+!H>du0ubiGX1aj`!hgEaFI%c2uOFY$(9o8LjBGbbdVvJzY<~Z@ zk8iIUeo@CQ`Au_bI%}>aQvhfjKAsqJ~4%@%)9os<~eH&6~^R zZt;ACcLm5@061d&spr709o$)_-*I&F(RCz^`$pLH(;$%sti_h`=5R7I?9Wx@Le-^| zz~8=t42jtj{jT)Y!>P|e{KR;Fsqp#>fuMk(;Pv6cHDE;xKcM~u z4BEA|wFZ~tk`JEPs%6hWml)Mj^@y@uqX{5k<0B(uYDx;VqBZN<<|gU~l%h7+U8nu& zq1)>Zo!&MsQEa>r9; z(PFv;(S>2SYt1m|c-&wk@#{^)K%A}J%j2)LfHUF&f=YbRpp)R^isLnwbABRJB_$- zaXemLn(9DhObnHBWsCZtQmWIW_(LXMIPpap*xEqI2|Bd;yw;_AxC6or)$kT*o9LMh z?@(M!hEo_Y-jS=yXJBf*Q{9{<&dSb`FV}8#MP!a1jmZtF=}~4>^I~&4o7|oF!9YMy z|NcDNtKNy#LGx+RuVh^g^i~O84qd=4%wS?>rbd0hcrW(cWCV}HaR)G+)z#J28i5>! z7Y0qWTvp2QRH3R`2mPgHrTN2tX$w3qSMa4F#Q>qyixELC>Rt!Ze&oP9_U3=xHVG#8|tw^zQO}tUQG&V~<$@Wjf zo-ZOcy>Wh&`V=vJ-*q~Tf2Xif5fg)RS7(ok1f{7F6gYpLLJ-_QfuIOAbv#us)?h{% zdy0FQV$NpkVy@7D|3UHtXp=ZoF1#O5QE~p5dQ>CDel~(w1aq6O-M~^3A!re((X1e8 z`Ke=M#Ad2MIWjWR>+YgXK1Znt(k(J7O0{AsR~kE0b-L1Kgr^8Eq_b1#z|z*XEC)l? z1-Xk26otiV9lr0mzmol&FcvLK3S6i#q%S&yGh-14X127o2DZfuAc2%Sl+%D$Sxh}W z-5+aKT4&1aj#WTnRk-X=!xg&%yihPI5*?`rvnkm(ZxQ5vcmfLM`SGMhYZ?^E{;lkTfkHPlnt~PiyyRHDSsRj#fxCaF--CU@1FuTbEa5V zH8s(Z0NQ032)@R<$qiX*FT!iLJxy(F~9~h8c_xy~C6eg0849wo>+@RHN zSY4Pq9(tM?cveUdI8H$(lkR+2p}B=;m`2z2w0j~ur=e0p4#pWwWEYf9Am9Gc%y!v% ziX05@dAf@(%j=MXW*#w$!2KWw9J|#c2zo4H6~AcP`AV}HZR|Zu5YSLHudL4V z)}Fa2#1Ga%#={ItLq`+MY_bf|;OpL-OPwZ<{E5>OXyDt|JhTPjDkX$+U@B)jM*v}w zLI3CXenL^*@p1S8p->dK8)_xIUm3WX71GJpb~$0xE{f|+PYJ;VX*7RQTZ>~D5shPX zm;fx@ULPY4B%2{ui@S?m4$+)t3S2gQl2lHo?l+(V{M?M0E!Yrr2^9(EkdP6Xhl>i? z;+cuHCwR$tYG@c}cc2C>%R}>PUE!mM&iORAyQC;|&+w>VjmQt|NX0PJl56PTpW|sW zpLInB{9cX=s?;hiWwRr)pXEZLFDVx5UR zOr`vB3f-7b3YoR?aK74Ntg&lnEStd#KnoID&}+3-C%%6DY7T}PScY=6Ay2TglbQXC zEWAf_(U4A~)^u$K77|YBEkh5{HP8!mx*UlZ2xmqeaocjU8gxP}-u^meLpzwNKrg@E zMAtRoh%eU(FniIAo1|at8H67@=uv`McrI@gcTVAI8vNf<7-8vjL3Ck!fn$)4jR&xR zpoSfw69W=shL#t=f|_!(|L7oBt`BoWBfqzf;8G#MKhvPP$iDe``YuqE(^V*d!VnKL z3=A2T(CgraVbW5;L{M3R+4rm46!@~;OoR)wDu9D2872b=fZs#obNcl{rAoonND4D@ z6^J?v|IPF!mqGHv(|m3>6Pa{MrSWBQ&5AqreHD;X^6#36=CzVdPl4Rm(Q%}mdFJd; z5>gEqZl|}MOm=a#r`HjM-Z~Kj!-7Ew6bbnfZ1NeMg1e#M-AP&xhX$3%3b{Jx8ZG+V z2Fc`x0A_vLJrko#`j)HJ6gH-hb+Bwc25lo9)4md#SVve~u)it9U`YyUbZqk<9NF^nDs zy4?}pXG;%nP3kIr|Fcflo0BzH-7>w`%XT@Y>T9!hhLD$dl~)N>rlN*R_KNDZk8g9D zqoKu54;V+*6^bOcX7kELd__!$gY}%J^K>#zvG$;~6ez#lU7ZM}h ztOs9l36K6mfGp1~co)9NYZ3L(d$o5hCC*32ZtazJ6WxA`V2%WS$^7Q2IFl>_>IKW} zzhv&KSalT{>CMK8nVdScIX>zwB`(#c^rS>_UtuiM!1B$Kkwl~w z%08Bl3NNR&w*ph)_ z?*e(JJNDps@{+G;(!cPL5m2cX3xT0`o6Aab-(Lu+{~I9Zw{Ke+RznTSL26>Bwn!== zope2#q)^mNKjp~P``~-Q0pgq?PHY!xR}$gAOI0m#x;b4JVI-V&Of`N+b7+qlNz zys*>>8a2_|F z!;jaJ)rcgq*=1p=b#kV~|1w8lnAkxcON4ZpaTP_CY}71(1&Fv{czCXSm=TDF22Y_k zl1+^rqK)*kB<^eiVE*#SWLoYWUQanJiR4C3z|x#TfE6XM&I9B~h;h2y~~cJvHE#|k-K{E5t(ZZ4&m^a*K)8sk}YK$Cbfa#pUQTY&;P1YiSU zUYgo=0)*U4|d8?SHSa$l*z(nk`X2N-1Z#GY<03AuZ+uhZ1*xR)DsJ~1d z8sA#BoDvRq@a8_4F zM`@oaFYb;`R^K>O8<)K2b?06(2l)zZpQy!Xul z#E4VgMcdE_2tA@&%vo@?Mz3J@x3}H>ah<}N@1Hoq8OG$~i>U%M9Vb!SiR`%YWyHn# zVbLwM4{hM1%BfWI8=VGJl!V$RZiqx9Gg$KKyVlnY_NB9BVm689?F))=rV7Oe9J$u3 zzbCy|vvO%*m~)s3EG9@4eH0kJuWOpUMIHdaa3l{Sr%agmr_s0go~RGHC#PWVzJEqS zxHOem3@B6vVF4*IsSb;$)rb-cwgUs>z8X0aaYdR$>vm(XiaLB%qc-6y8KNwUX1_=q z0S5zvA7gD2;O6GJjKxDyNSKAY_=fb-Z1uGAlc!G=V??MfWdj>bp2r|-D_1rzoJyja zcM-tyA3Kct+S}W^NEFU`*)-6cTUx7DW5j1{u!y%nc>nM-Gf??}no2%Y3)rA2qtKwB zAQG<_aNLg48+o_9r~$(PA6{}N1_<=ZwTRe4ivdZmQB<7bNrbY-q6+;aQ}GOb)Vp%g zV!xhl3gz7~Egadp(YfH98RLNzbAiJ(Rlk1_$<%2o^WE9rdqh(Jmoen!DzT3gVac{E?D3nGyVODwvSEWSxkOS4udz$H@m z3y%+(4k?%_%#wJTKu%M!a9_7~zbl;BXU*f?UZNr*RUfh8=X0}e+HGv)a@sdDGmF1H z22LF1%ZbxO)Ki;Ki9~t9j2GoH%%=)&XaUEu^TXZ}HK_(G1f`w$kE;&>rzCQRP%bk! z!BhfHgm+Pj*k#wpE4td}fPo*`Gg%uTAiMX~gK(^3yQEk9@$Qo9r?#h%w8B8AL>!eq zqNkLURHc*<5KNi7Bl^8LN13k(6ZfXa6zP%9w}v`8u!({2&Ev?Bca{j5xt#&Ny(GN6 zQi02N5($<{v`!2m^9BJY<4W$8r)%d+JH)+!tYi2A2_R_nnaRArFm7|~Iah-j7nKEZ zM<^-WHBUnos>v!VNLmS209hNALW~ z@b9RY93>b4%yhcj*426qTf4CXUR#?!W`#uLm>&Pkw1oSvR0M(VJYHbirNQ_IS0 zbS43q&K?BR-&*Q$)eS?DFR-?~3JbMoDuVU(A!#2z~v$o_?GomM&t~ zp$op%cf=p`S=|cBmPj2%qT?D>_<`@~c9_esDPck(5c|e3CgcVAIQ)bugK&e@E)jGsx12 zH;^N8qL*?C+c7cg6eRkLfQ@@98o;rsviw8#+j6l(4G_r9RxoirKS|JWgHBD!V}4Z> z#8`e%geBjVO!HH|D}MX;8&*4q8DUPcq~(kO|nDeT6xD~&9qa*7T4N+iO;!Kq<0 zTc0pugANL-{&K^@TC)*XcuBYyELhrj7Rteb*xb(eP>K$+;zsyOn@PX&f-Ll|-4{9T zu<+I?j1M5cn~dz6l;Ad(6lYe2f_>W8eO8zdz7-bB>tKG^=6&XOE0p2Ic5nu~+^^LE zL{q!22*s#(D+o_vodA@_<+!SFF7@ulpC-$iv^^5&wo|fI!lR>bKUAp{U!(L<0Jc+d zpDC0q%~c@6$>0Eg;Ijnr`^OI~O8LkfZfBp~6_|*ml>(;__V;+Ign`rP7Y1dy;QP9T`#cQ2xNAtkxJ7+58__2B|N4*C?+N*jfPSV zoBJPKGcQEE+=^UReSCLr&gN;tak;UO85~C%5w_KvQ@K4@X#G!9biDeFl0fG|Qc~~g z%X*XL;00k3>rq4Mn2bQjkfd#Mshp&A!5lzZ6NCYWVmx z9!ILMz7@_og3$u(o2AkVyNKI%kOeq#aX#)4pFIp6%`P@-$xe4LkcwTt{d+9{I7)eM zWbU@16!!B1_0n6t-CDRyj{QIiZo)N|TG@1-z3_Na0SpbC1ouQXAQ8sa@y-6+Mj>X#4UbpEr!`XdKyKIctf`r6iWiu z<$@uLrC!yAg_5IxV8CLWG3Eqy{l`fsiq`fo0r%ckf}2f&&)CeCOip`B`w`12W9~A5 z74Hi}1fXJp?6Hf8FiKLd_hl$;%7d8AXV^Tps71zv$sN|R0$m-$?TB+_lIj6e>z#{r z(`pr!(8y*jWjmLI=ed2RX*(p;nV>$G-#`Y@>guZP;b+^@;BS4o5bT`PLTGAwkBZ?yhi?a8{YaoBi0HNKbk4v29 zxh~!*G8X^AEd1)T%YX=Cr&gsYVr~Y~1Yk%7gX`?oT8ZVwZGLTSMeIdF0wM=J4R-=o zIF=|RWJiYR-ZdCiOBLniXf#?VrzYUWYIXKGPDGtZn_L{&ZRJ2j0m|@$*X>k7unl1Z zUa(~0*7FNWA6xy3BbUqd1m7L4J5=60^BGQyoG^lg&%^oH4YYSYV{LooQNe&1i0#fN zkkojxAK3Akig{>iMp2eKCsID-@?fC`0eQ;4T*%JF?P~vdMF9&|Jc(ahrOB2#iIzJ> zRNC!cT*#*t~OKEQ(L?& z{$MM=LUCGf)DS?Nu`F9#7FR&Hh5P)1@dnM&JcwW?4B5Xpd#4*kORSJtXG{q;&^u)R z0nm*2qA5TGd$!u=li8%v}AS z^zAc$ejsA~n3hH{ysm*=)Au4S4oa*zw9})bqZ0Ty6$2JeDH$n|sF*lP<@$*Ot%iQi zr0C?>(Dcs$;UpsipR2S!MmaH|P0eF+T?-5GG6Bp= zA|Ag3zCKqF@3R|o2qo#{jGDQeMd?P9K0X~;YVMHNiC2)iR8uFsm=U&+V8M5p%I}}7 z&D2v{T}@e&CR(DOrJHGLN=KG_lj4WPB|M!r ze?9lO8p-6oAm;?O+p?;ZoktF2zk^bMvGn300+Rm{QTXfQ0>Gu6xQfJd_!9ww6J3wU zk(2jz?frlKXMhYzuZ7JKCN%Qx+V~vRsPIz}xoGq~zwZK?p4xeJU+sT=*KaI(cKdh< z+W_tpHL6HS09ZA7tH@5Z>tv$Izs%`hKh9RYvap!HT-zR2)q9;v&Jz3qoxO|rLdM>> z?l7PKQ$v6LTlSz4ss5o9>?|K{*oH1$O}Jw{*=a^y8V*=C*DLvvyPf4p4_b=Pf-g4QcLE` z{+B{vxYf>x`Yin5n}gcfFc8AP5st*s_Kc4po&k8Lrs}Qa7b1N*e6_O{Mp)ATXtvwe zD4*d6l~GN)_xal!Pt;FQwb;M6OHLo0SiL$O(i(Lu&rb9|Z~cEPv)&9?k-W`JP&%jV zzlz3x{2lPOPyi`Gi^4kplaw?7TRu^O!0YlS${MiIWw|uC%v*n@2mSY%IMVnKv|9u*k@_^l=$z?vs_=f=p8o+=Co`*%ZfB%>N@|M{wAaMKtBWwV5*XyS4{J+Zj zznz&fVL&RZPV*Q>{xrY>7;t!Z;;{XPh`wIfOE7j_-Z1S?17=?Ux5Iw3ZMTKh`%ng z{}qV8bnE{+0^uV_VSKmR?7u%#+R_K3t*uSQ%?r&)eQPalX<2lc5>33O>Edz?L@H~{ zmn0s%*QOKm5>csC@+=Zrtdd7{)z$3{&Vb62#URjDk_lHMcZsKojMwF@Dgy(9*zZJD z*!aWigywmlhez8R-T0(Ta(#MV3v6fg%J@Q9w50 zr3j-Qz+ZV>{$YdX^f{}YF!fg)+s>GvC^ob8caYQ7;2hFE|3Q`dx$$02w*gT{G9|?C z5ZwP~4F1QxsV4}gn&ms+ck%OAPrb{rH8oSbdwiCx&`a^*N!0<=KXPF=DddtPMj;V! z7t8bo_ZP7vx`BW#zvyg+NX`9K#E%^FQq1fBiUH!iUKspnE=n zC3)Bj5YLngRoK}cdF7RqW`W50D>wuMI^pSnXTxKz)$imZyiYODk74 zO)Z&!)&;%Mgs0~tC+B(dIcVkM@%guHua#u1fNAEI{TOf1$M7}DT z^SR7=Bs__6o~{Rp&kWP-PGJ)0SqIbq{9) z3y9PYBkjI|GDq%p2Ngy)7QBqL6QwpSbS)3jsntZgbfDX?Ujl5W7C<3kPlbn&o?WPa z>~yn@<9jlYT%ei?5b=G--|(X$EIzj?pO@UpXjLE2!jJT?JYCiNJpEZ~d;WPyr?iF} zTsbASxV z_PRZ1$y&F{d@78tDY#UY`!ROm5Ojq%V&J`4Yj=oj{np>y%o_R#oOi|Jx@nYu$H>UY zW!cGO2f7ky3jXKTLao;!e2GRwfu*z$=P};!VC{EYr7chOd7cY;X8J81{#W66Zb;4E;LXww4<-3am7odc z8J!8x6eOOf?OVYXpm>r$L91ZJ_wV5Mb#--iKkYKT{qQS$1UUau${d-%^v|a)^Q?Sk zK5;f9#CwibxQtwt{-|@vI~w(afSs2;nl8q^ayeKY0|hw$!vef4jeZPs0*&UhZC)nM zPAQn?{mfM=fJfEI0;&GGcpe8oU6$R*ZmNoli$A3wrC~0X)IPw>VN@@0B2hm6^WwV6 z`XpG_nw{syl>0hkx_=-Vl65zBR=EN=pLp)1hseLXwES^%{J`5uEk2hBKhfA4sYfx~ zxUHKd@>Kw|L_ru8iAu{+~~X7Ad}IvNP=p-V*jd zNF&g+X_XCZO$++5He>gnyXFb`KmzTg+~jiT|FP-Rsu<9KE7F$2^dF_f^*z3N45Ttl z4AlF7SeZzmGt~xEf*4N{c(@GM}Krtrw6-Oow1m$@B<9hJv&7ltrC%)-RHVHrC z)XIY?PsjA4?k2-fMq}S1Jnu`!POnw%l>F%(#Kqa5=)YmK6`iO#>G)JB9POj~o-1ny zk+_uHfY(4?GC4P|plF6Iit?s0YlO}eF6h40%TY($I{pq?ul+0{_c7;x-vj`%N=9G`Z zN!~)OSzP{(k!<9z7^+2Oa(2J$_QYOcp3dS_q-kxzFsmKSYOrDW&_7Q?HMCp_O+w^)b_CDBCF|na`(GS-l9Fb}iMMGR z3`MRM;zhcsYqYHoU-J*-qlAsdjTpCX675a`8Rr8A^Dg##L&M`Cq*XpCQQ}*wl@n%9 zWs4*QnT%pC9GZCQD@n}XVo@xgyQ&o_ikTsoYY7N+Mk8idaRh8&NQ->8Ch#?gqIYwI z7$pi(`;H>d`ML$4gCt*B1?wF{HB^7H9SDz5S-Fk&J|(nb%(O!C=&E7833;EjnnEVm zTxM)TF?(WOENg)lOVdbX!_CCSS;}0h)=Ca+X5jA743~0`v#{ zzrJMbls^@||B&U=1BJK=4Mj;Z@t-mHcYmK~c?ch#1kVi_q_VK~43(Nn%wUi=3YDc_ zN7fx7pPgvT<91Hy^b}(<^y~k`tN7r4&^UqX9^;b{I!I(e+btBOCo(VSo%?u9@>+fZ zMH()aagw8rD!8DvsufRA{!>;~C}AkBVL^tQa7+KB;V{&5f*$+KenZAg?G>D|PU)rA zK29%8GAZLwMnUxFy<8hgWKX|UL}AapmYNm$S{LxoP~hUoa0SHqgjbid5JPl%I%7ey zLdCn%?|Y;_@rXM}x=Zd0c3^)xLN*xP$(-6m|0xr1R_+|jHNK1NxJ7;r8a8?oIt$iD zRZ8CxO}aj(Imb$)F5dz_k6G`k=%z{}oS5H_(tu+x#$tN>KYYDqP+Q;k##86dy30mfWGr;uM!9494Z1t@%ysoAE!9EdwLY)M0VLEH z-2eCuzT*nD99}dpz$nQJPAU9b7LAK&9mD+lkvGLFM5&-KuN@&FHmIOoJhw*^%-viB)s3`RX4;XRehI}_}1&N2s<;k;|ahq%kH-M|fm zve|7H(kJu_5A#7R6>NTgmuyR4!AB|amFb?LblO~3GPj;@9UgqKRRR#2v+yy#70lXi z)31MVmZu}Uv1ese_Anpgs=vkdIX^-!87$wAr-e#S6xcPf`=Nm^K|w+J`T11{+OePB z&bqkfxZ3Y#WN;(*W7;lwUZ&(|;4Ps+M}wq90v>nuGR0XFPwkWb#p;cg%G$=mkM@RX zUoNrZJI4ArXL9GaYgANID(lF;D~jn_SZK)kFoWiX2WA9Bh-c5wf0~B(dczyKHmRtb z>7H9vNy$ZxjvRlg%WC*3r<4GPsv5%W7w4Za#hdQ|D+MktZIKJl-_cyN2LyTTo|W znp6(Tj>u0=u^g7Nqs>+cgfl?mK1iQDEoE4RrpXaBG$sth&wBq=?!%(_!d#PUZeg<+ z5i2x?9gLDdy+{b)qI0OF<^YWEZ&n*NSZ@R?EJ}w{PG>g>~dRB?o&0#YIgw3{O)W=KyQ) z$^v}s@;_2W?a!`X*yG3PsLOZu)Nk%hl&)VgA|5zMY3(_=Oq_o)p9%f^^~+o-%H@Kw zQm4!)M~_ipPGe)w+@-Lj(D`&8Af=R!6DoQzKa=17)V=|?+EnDJW#W4z(^gQ_ewqtH zlVFeaK-v-n0&5Gu_gk~Rph^~?E{`&ut(nz66&4h+?hRI-RbCQ@Ck>2{-(M1I?H$*$rrq3FI?}N#qGKQ^=qwFN;ZKhOuG~{ zHD7LZU-=n18QE%!Pn4nFr6bk#uWZ*9uDi?q9erM;EQ9-P1TQlN zEk_HhZBevermv)HLo*J)zJ@e$r1Z%yIsbDdi_1AMJHBtI9_$fVXS8i;HTAvu24~Sqjfcui$ zk_%FA*mDFMI|k=@ja;Ds1`2FBW8zUxNe6t188(z&7hn%c_0rL@(gYqC>2_<&ZMceP z&CJ`M{~_xDm{tyhGjBJS-_hgb@+Fkl}2*47VOp{F(Iuc2atu<6Nt>1_dom z%;u(7zSin_!JZ15ssZzTrUXRgFm9P%YdfqLs3!TxxvC6d{DDfdoveg|>(>P~1rh~u zp|!{R3o|0CH_QCxI^G%97)or>^R)u2iNMeaq8x!jA{_@Fqo$ET#U-1%bI;crZyTE1 zP&ZT2kx%ceE}N&eTtsdilJ-Bc8X#-GBVmN{Vlnv@V#Str!}&C$<$M}QpLE*w-#Sa) zd#ox!G-v444CnE#;~fv`wT|Rwtr)Qp$d2xx5Rov(Ye|qSbT@{qKm$ohAFZs5`QJj8 z?bBGhyXO^?Wa1~ve-1}b(%-ayuReSmkH;Z!hb|-$$B^~GWua7UHMOwnt(N7rwg3EL z`?`#}kIy|&kAwwu^(2dp+D~U*t0oL5W+i21WOxpcXTAr^W!!FfUIrJtKc`&rW5MmO z0dO>%+7W|`63sX8$4`uhMLcc8G}ny#HBSXS{A&*Gghku%(98JuzHe5?aN+<7=X9w{IPeI?>a6+Xr1PdWV9CnlB1q&7_vGfs!D5z%hb^BM^d$8zCTmUI`{3v6K1(KCj+*F{!}MJIsEXXbrP2X5 z7GxXP!u6t#cKR7%nHa>ie zqGoPN#=SgR5O@)!H)Tk^C*TtqVX@Y1O15^kaeErAh&tOFQ}b40!;{wsnMLUbBAzNU z%xPj7PHwE>aMn1V;GLIIbfG%|<-Xdbzr8x|JMJnt|M z?sLat(>Q9@wtqZM4^~v1&C}#Gzr+dsVnyw2eSIddMq5}p0R_x zYq%^(!5wdTFqx**Q-YV5pTAlxxcx3x`)xRaANcX?$+yl(Tw)`FziKWsufqMbvMHv` zOv_BIT(v6uhqHyU^Sr>r+U*QIbMU(p*UP%63o}qeHS7gE9${MrwdBARAvF&|4;}TW zvAYHvUn5=D$~TQI7&i(;jr=l1f6neP)nWr5VI4f1^2rU0wf3>&|9DcPe^0;Jzi)C)pwW(Gbr4Oz#LPlD+TY?@L$crfJRwua0Dwk{J+T9vKdNF=o zWlVQG=`PGrnRqZ>l7ILWd9+`ySIE#&Z5{OoOiAaKL|?;wrYMRT@(@cSQH}Bu~}=-+ecr*NhmKU$IH;9b0_M%@^?Phil3}n<-^3vV%tcAGainZZcpiQw{orEN(5|*v-EdIJI?c7&;AGrWK8~ zEiSqSa+wK5S8Xtu1}f658v~mzi&=C8X@Ce2-AO`(!RH^vseSN~S|y1%O8!a5z>2C& zZf;h_7Udel&rBSEvAS)XSDWcj0Q-j>9^X3W=(Yc@rV;bUb6v7QYhP2tx) z#Y5W2nCX(bQSYDmuVq)8^xsC%PSd8{PTw$is1Op&LIqBK`lYiD@SGGcyGfV+^KgtX z+e*;v>~J~-GqWncBymr=4(n7ofEUn+?rONyJ6BFcM`XG9`o-GJ$QMue!Ho1>$G)}9m^V#0+%BUwbwX~Dxd0J!$Ito~C)BIGS zRFB(cR>s~bMS-4*ZSm%b5o(r3jIH-&f_U-9@U0Lxst~z!>byI@$Un!7fX~+-W_zCL70`n(Kvy*~lbEV+pvPQ!KWd+}mAKBt>3zE&kr%l)_x zLM8+KK^)!m+UR(OE9P<`T8PybktT<6Jdxbu;>XWaR*$aI%OT%oJtOqBA;-McDT;2- zM3Ls4S7t%DZr^jFd9JT?KxH>YZ`PA2PyLjE)M{}^Dw^t-_fR|#@*_=O87oQg>>Olp zo``e#mcTo*`>B-`MusxDGQD0>nC}o9rd?kXd?>^!Wu+qz497eE(HP|3&6DUSa(UOA&<*P3{JWkuYHxaUck~ z%L~{vUGP&Oi&D$-6yk4l%}@`c2eLlZj;2*WpP>N>xts__?ZWBx$%g1RdWMrlt9ovN zGneIaHV**SO?Ovo*+{PRZ{L{7$}8{9jvaQ{JJy?cl;Zjx+?u*bDz~7Z>{;6w5&>pv z!)j5f{X=Y^upm3Wn!(Hv0lOs{>wO4*xz!P`5q^Y##KD4+GTnC$e4THR8xytAffWhKmGSSXhI$0RUs(_U^o_k0} zM*7s8)5)?7JLR`eEaM`-ROC%2e$tc_l_~^u4&FX(IQ{c)agu+1jFOdlLRT0VI^|La zk#)9;wGVAC9cX4~5^xYl_ZLy+minh-oOc2sj`~Qaoch$8zg4!^H(xG1r#NWV-8xu- zGSRF9--lhq>6))i%omadst_1!VG`tmxCf@D%7k<=0FHh8yQsYO_XCYw{rkV$)rIhT ziVLN)##Q?L5sXA8F?IzBzfhvdC*v-M32``~$vuo5>i%R+wI9XUZ)a*4z{ewt9o8c{ zz*5dQ8&HaW6zw7EsMLhKP(^^6$z-t{$M0EHXjk-QFYSewPfoTH(|L{l2@R}K)6rfL ztb0}mQ93ygo}ARU;L0;*JrQR7o!Q&qEel^(n4ofXNO20`J)v5dTcWl6Lc{yQgG_Qo z|6d>I(rrN2Pnm#2fJ1-NM~qRzH*gnOB8nTt)2!bclK&|pAkMqyEYkh&}{`m*92Q4)}`V9=0pT^?+qn_+oDAfMAIxxJ$0mDR@J+K0Il(K`K zFk9RHEOJH6w;6^P80=#6o__u=F#;fV-`*?g_k1Hi{Z}8DpLIzD6B>wvt>v&k*2CoP zDfog~x@)l$GBfK5cB`9Bi_Z44Z>o4zUAXA?4;xNLyXSNp#!X1Ma8xfRD$`sW=&PSV z38yPFyD`Hj3KO1cHv$BLSWcMb>Cl`9&elxSo0|2IdJ0A&&2b1KBV!NE_9>KW=MF)` zzY2wk=h^x13xr(KFcF zd$*e2GYu&ayf)wUPxay(n*9#=s&$i)(`U%30sY$8d~}2s(m)*8jeuibUO8>my`_oR zo;8b3{C@-1@t-Wv7eZ{x{GKW|4IL!PWbQQN^NXQ7k7ICs1O%aR5kmFpD9a@)wR7~bvLh0ZXB8|2tDuTh{3gG zCqJ6FshS*rX{5`?J2e4>Q*Z+RC>;reT6R^nq5~r5sDvG#%TMV~Isnq@bm7C&g3`{% z>D0{H@?F05EN!km%szKppwPc!U7L$*iK%~CYJDMbaLux;+`E z7=|@Lhu;5%uY5WjildZql0lFy(uF!MOQ3d_?tQg>v+LE!N|jTZ zQ8`}ogIwW2bWY-D-1S?jSIlR!DuzH{GZO_TOhF!zK(wq>-w1)$emgiQJRiPKp@XaD?OAQ{@8l7gaupsnM1c58gnJpHDzhW10geHr;O zIpyuG9xS|B`ocg&#Dh-%(9|;ffeK$jTI#mxAa;+7O8spI`3Hdh5sA=AW*SUO-i&@| z`Bw(as&oz7AF}i+EVf=M8q|o{s(h@@l{P2{nY3=d_#`iKs}TEF&&$@HP6{62p+{H_ ztnsY$Z#!v=10`s+bsX)hSL}l9{2xU`B#K!DNGucd4fr@X|IYApX%a2mU3s?MpX{U3 z+DKk-WI`yI+Qq_UBDo8P#>c{gKjoT#g9WbdzSqOXh%Z02sG(#fG^6T|9wS%e>9Mf^ z?|ZqkEd+m$^Q#miuZNIGBcANeOi8Bq@)!* z%M2d)dC2K@kD%;kt+O6DjNm=Kdta}_MY5tZUta+)Q@W?Sv}$Ug(jWW_M1Nt}E6{9< zwEB!Po6el>6RRzI3!hxE{ai0jF{`1i{j#>1w&}Q7*_7zUA~uH%nKQhMT;yhHnr~`s z??k%%2uv5~8q+OTwXx}R7^I>AYHouPNh|CZPnqA{AOF4sn09SYKWYQc+W^)KnbLksC`45{8r1wZHUdpBwkYOrpI9Q{l~hcvi?)=OH_-SR(bz9$^kL>VE&C?PjVDQ6$;_iei#hSTX)@Ez}OxcSZBSB8|lgJ@QZ%8oDUXC{B<7uw0ATSm_w?cI#LK(Fw^6)j$+e2!aPy z?%tr?&ms~(M}JrUs@=1EHkiI0)13S=_7)20o>KA|&83ir5h6@b4O?!pygb0u{ipLz zCoyF5FqR`|9Dp8j7_!l|dz4b77+u1T+N8K!bE=HTIzrJ^QbHZ|pB3g$!^+Cp47{A7o!J{ioT7VD zzoYeeV_Rmop3TXH)Vy>*ll=mmC+iYtF75#9O zYOaS$_4nt|Ny1lvL?vL(iP>3>DAQ>ze-F2C{&d0C>v3@&^2VGP!xV_5E##{MpFA9p zfF7b_ke0}}+Jqz2)^2W&k{DCb!=$R^@U*Rg7d{e7hxqEbp!d^E(PDC~Iguar6{CIZ zlVcYx2a-aaagObLoxEXB+jY|#VM?6;_GD}4Kvx4wy;;B>mdpE#A~L8sU z=9v~t!ZQDH2vO=c&(rT+Pgm#bx?X)1_XG%8hm#p3W_LUhmO}`x>I7cO1SUCB(&vIc zUK$FR)i3C0ki$tIYOXn|XZ}=Aa*n#5b_{i_BEnW<*iizVzdS5N+vPZ5SIAAe+fd3D zC&-j{j1@6JG;`s=M0>%?@?~5Uqd`w@xS1!!rf#(3r2r$x?Qzj0mPpPkRVH2jETEEe zE8Em`Qt)1N75t+^C?jl4uZSLc_qGr=ckG18f54JHchUJ?qTroap#{}{L>xo$Nzr)^ zq;EC7&cxttr97~-Y{k0=oDL1;{Yb<)cC7OB)sN`Y)DL|mczCYMqFsFq`4;NOB@hqu z(+u0{0nrvXMERXL?~5m{$#}l)1X#MzPT{He)MloH z!^B@-FBE0kcboG6?OGY+u@@lUenOQP6+^oYAu{s$dBS84Hr9v?YL=rv&$Q5X$^Bv7 zb<^!F8p@=u)af7$phMqq+l}@oyN|7p;^6TyIIQdLo&oP3f@3!3k?**^>91xH1`o7X zj^YZ8gBb@oAb69h$DAgsL2~WuVF3?;FlAPDn5XO!D8tI0^muQsH3$D#%>iG?;=~s@ z(pp{m2P+(`4JPXYgz*}XTS#HDE&owWj|VFrV!VB!h86BbR3TYpdT&}|5E~0SP95At zga?qbx(^wvuD`5Z(fe?EOi=7zsCOG6vFWaH=X4SPl}DeE-7knvQjZ*%o<^~(c7M&vaR6 zX`2Jtl?1jw1Pd?n4+Jr7q%V+I3L(0SyISnDqm1cs)S_T5|C^TOO0-^nUNnVJ*EdpL zCA-B?%IMO$6^5g+f;`JTCD~yQRmm@;Z20X=R zWOkls(B%mpDSe;8291`Xf`~3XeHwwo;b~qJ4f-wd^Ep*|FR3k(9Dqj7O`c>Rl$1{Nk zfcsX8BBb+!w;1Zk`_{bb3_nxB$gC@BeCe1hp2@h^UNVR~hOyOoCTvq{;Lm-@gzBYN z@=i=|PzlW!z88w4Un?4S`tM6?=z~!?ke!LHx3>@k2=VjPW$N_U$mpW)jY9gO^;UvQ zeHEB1<2@eTa8n=~rAfzQzPCyH=7q>}gFOU9)-h1CTh8dQQp3Y@h^7ogo7J~o2|LUF zVV|$9#4<^(s@p#WzfEUpX<^8BuL23k|8(^u4`9FBp>f2^54Ifnwb^yZ^ajv(_yv7c z7T4@fPpDe0f=_&P60+nf3MmZ(U~M!-jn@}rU%4NPSuAB;jl~ElCtBlg9)#4s@dm5; zQtnyVg!2u;QPY2UKZ1@GcZ=>`96h1C+YJ6fwdzl_7=9e9EtE0^lx2%bp zY7Nl&=|$G&4Mb(gKNd;w!GJ>wLk)F3T0BTrgIy&oWNLW+0?o6PwtJTHGovt8lWRpT z!EuDp{>iC9!*Ret-KT4v4mTpS($Xv%UB&keAq%io__PM4x3h~{QM--ogKn>Eg34)V z?0ek#3VMd)0UU2Iern>I`K zcB&hV!gy%5%EYaBaJeu-b}Jmfc}QocVv zpIW6~2qGH8C^G%L;xH@gzjS_Poh<0l0lNiFm&rK$xzI*EMcX-Y^KXo~tR@Wyljn5}+^MU2yecYfdW=SQ zETrgRGY1||A7F{f@Ocl1lDEVW+_0$pb`DHBT~xp8uf29rv^{gP!dr7v4j7D*{-9xm zL_OPgrAbFmZ7y+nu;{{hMk^_ySHv(E`SVP_GZaXO}(Hl z`dlq*w4ceCrC+bln6B}x8miR%R5_($)P)$^AD?6$%r*P(hBkhG)m;y!mp;_x;pdZw zWFY=`=JS5}=f?X-V8xJ`dR$%+?U=wLlui`1(c$Upg}5(sHa9mHBoXe-a1%IraA1P^ z*0~oibA|K$aKf4onkewxBc$6czo_LnZlAJMexh7nc3%U{l*I=*#8(Oo%*@P85yE7tkmBLGlW!L74X&W@VdlO^ zcr1xNqIl1J9VhR8B*^#W2K&7sg-luR3Q+6YE;mbWIx$V<`;B+WRf8(a@|JOK@(vEf zl&o``9Z2FZ?L2OvTGCyF=T;k)YbfT&4byDVtxJ!);Pi8rV~hUm`ED(HAD{l{w)w0w zAAx=Z9TAX-1cm_&G;ep)8G*6nlbu;tNNmW3Sf0BZdcoYjg)(eqa~TUBB|vzDTNhLMK(&e z)l4BH>R7zLxY*>th0XIcKjr;6Ouqdp-LgEoMh9JnU6l8`uV(|3!2n_BlN$oAF`~OH zU)2k|XT3b+C5~N(#G(Hmp`uI%u}_J3qIJ$lSl6$PPRA~YZX0~02PPT#z2ItECQ9eGdeSl^K!~UBz5{JqFI$)((C1ZXjRu1ici6GTk zYTQ}x2Pqr|B4J46X7Q0-WQ%n2m!2B47!)jrnRC=kP34P6K0jQ>2Bvz|nOIT;y|fr=%vZ&wKztG-29m%l#vq+3VtTfX1J*Ckhr z^^QF?r73zgAlp?)?NpevG3Y3$)T&#r3pw=JrKEZ+asK(dqv(*PXQy<5$(da&NIh*D z0aRbFu_xn4h3%cx_sHcgvw>;}2+BgVojAGWV=mKWoCGnE;b3A1wN-0W>qwxSh!pBx z_fJ^S$2K>Dm--k09vlbG*R2xs$|{YgE1vFP(sHL8>hS4+d*P4m{49f{Hr_miq~wDdu(icQXQU-%lNIi6SOF^?*HWvl zdpso&naGFQ1(V{kK*?B8P@@j^2f04^WWR`+k0BtT@23LwIifC}2#}fr4mp=`b+YmW z65-FWD|C#N(mZE~=ooK%?M!NHlbxO^NB38jho`3}_Z9PM4k8W-YQQ%Cf)#TS1D9V} zLu)UZV!(Hw(+Wjtm`q3bq5sq{ne>THqF%^i>k<4+>e~sNZ)Y2x_X>NzNuj)&vDG*7!or|gc_9*Fz?)#6|*V<=-Vq7XZ=2|0Lx;;B=i=xA%9*2=f< zE4(-`l9ub~cs`@V`+rPkly7Zs`tQv!oq}1HuF;ymT%CkHzIBd>16iCyxfgLVykQKF z)!ekFUPH?sdOP3t{F^B?9slTB&dTvjs3#*`3D595d)IjuGcO8lFc?iO~HO?*?DpT;|qglzQ9z?&X4R9X3U9PfT zeYhZ4DbcOi#LL{zCEP7n;%t)F#pX>M0FQmVj42#q(DcntOiaEz=7nefj=0!mLpz@m zB2Pc|i3yo>LrNaCvaFpr9H-4Sh2dHS6MdK_?eL_pyvMn)vsf+R<01{=U7w?}>E&^9%C?J2(Yp zW&t_}nx|ixL@iD8QI*-x_TQ8^j@c&FGFucL%{YRwdPQ6oTHHmy8d?aCUS+#DOS0W`1-T1xA;(DPaTbz~! zCB$;r_iX70e)7a2r4^_?>WaL=iG*z37HZnH+6b}Pe-NS6XDfd&C0Gwfb$tnqb#p=4wJ0}ky`1U=xeVqUV4wD6X3Eo-=Api7_mE+ zKL0@EqUszE>w*WWPxm|=Ts|5huMfx#c$j4fYAWg~e<@k$FKxIGZL<*cB-wLp1@Eq` z^cm3Y1;9$tM(RAJ46npr*cW-MTT^)w>Go?_8J7s(@R>8bP3{ z{v;+vYZP%3-%S9Mc)?AigbOKGl=Mdxh6^DlwY_L7 zp-#i3@j2|xmJ(fBZ{ZdO0xx(6=tF};ce#6gCmRBzas32ILv1VI)~EJ<6e#`3xNdYTO;)AT z=d|lDn_|1o>(lp9jJHGOxd$6K#6|~tXDQ708EUkj12#`i&feyL_|>TT@_D(J_%|s} z8(=%v@;**W{cWE6vX(Yf8fj_DfIzpy7$XQH3D2#n4d)=lM}fpH)O?3|v^}}M`zH{2 z%o0JU9%detuBL}u^048Hfd@Wah3*jojk|g@sN{~6QN;FfkIN-`+mMw;eB=Uoy+>Uz6rMj;bAP2 zsnrasMJ-nWiXgkbA5Fv+NQNMaztDW?9;tSoFS2^EkYydNXkZq2l0h9=U?e3sGY`Ak zXuZzfT#O7B6g&lQ^$QjA+tvx_l~H|4ZgkUYz4e}zER7J4?@Uv?Jk4=8AI zH6Lt+*SQtN-20SbuZ(;4B864=cbU*87k{3?`1bu|?5)5ewUhAaHvRLPl?6AQx}B#N zYgt+ZGen2gws+H|+44l!_gS?C;Yn~>sm_t+b|sa{y)THqQiQAIknaz&z4cXPlWi+R zaAE0jY@GW5jVvS_a0+WIs6X_#bC+h0$RO7xYp-hZ`FHM^<@#aN`oogy*op3ABm#jq zUqKLe!C3j&8cpSCSdXYgMmFABV`pzj3L5cPqmBD*o^Ij&C0WAX-~NrN%It$$ORi@; zW36E*E-m5{tq+&WznhcJO-8fTGu5VsKa(kTU4Na)=BF%q3ETL0`T6Cs--pM@&C=q% z{ft4jc^gv!i=y0eq4BuZ!~-{oUzT>v(_maLUg0Wa-G1;n=XFU{;puGM)#Z3|lZHMi z)fx+nsi;skon7Ac@TpylcWq*FdeyyQ;r-w;=Td2^pV>0Bk!+6UHrkYenIMvFa+o{Q zQ#~J9PT23i8N0j!#}V{s;3o%kRl=f)3L$>FK_%sQY}B;X?6A0J_I(?jGGnV{2cNHU zzaZ6Z3qPG$0uedKWtdx>pt_d$?bM51WmRpQ_RqIgAt7H8ir-oBW15Gg&!F|K_tra- z-9*nK9`t&$jo03XO<%HfYIM15F8AG#XwgvA;7b}6GK)nH$W^FN5aNr_gLt+;JA}aV z2<3T*2ZkMssZH~MYz$+u!Z_su^9^)lpfC?Iue1tpeqB6z`0O$U#Q>Qx_* zL+ICeWxhxvmvfQ!U_7~O$2S2c-dQ@zmzK&UxC|^r8suJmc;8eDv{77EP)`tQ#^0Fm zZdgMRidCG0PZ=UYC@|g(r0FEmHN%LZefDM>(mNpqY$IROCDK4~FcgkYFKo!i>QAD! zPY9y~Q5w1(mR&l#*rMA@rK)2s^Hk0bwMI%o2^%NNWc_U{2HLsV|ut$2dh+kz!gfMOINVj9QH4)$^>ri}?zH_*rNH{*0!Hvz71>WN%G zDi-Y%Vu=29st;&VDHAtjNDdP|`;y|)D|H`Zu$32Q)n)$Y;KNfbB6^1F;G2(P)dkVS z`BO}cn2FE(m5|`b$CunOGOaG1MULnJf1yVZBjn*{IcV9H*QF8s>;rcF27N_JC)WWVWagdCm^!uWv=wrrmNS2 zMZ?$|t%ya;G2-iM{*M#z@=l_xi3uZ>b89idXe~2sv)jc!SKufjnSQppke8zJyjz84 z>)xt3%4uO9VfK#66Y)TMEgSnjH4h~QhgLQ-MobIjZfyeZeV>j?@3SM%Xs5bzivfaT z8(%Du>!D$68`c+1Ck$hqCsM1!7XhANoE~=nd22epAg_mNx!&8uHQz`Ye|-Wc;Cniu^1-7+ZT_%u zB9-U7ko@6d@uhhefBQXjF!#^*=e>TV!fF!1BYR!$fW!ms&0 zeN1z+y(tjYH7k_!kd{yGa{`1gvE?X|bqD$Tv%VSx(HlU7#b>(xB}@`HDhM22L+HRn zshH*T#1PTP&Ru}&9}}QCH)Q3|%F4Qoy&oS7D*=9ZY7F&?;`Z3l(&tdb)5H~Nm5qX) z`Ec75RYMaFtGW1j?BZ|Lj_}VgR$cJHwa@8cLie;gdxt#d-Z`czDQB5%tyQuNjUO%E~n!@e?!GI2as_B&*71d)0 z_9)@cyX|UF88MH+UpA|M!=}HhO(tuI3__a~nYawz+YEhC4!Ql5i6 z&=9e?34R#AZ*tc-$uJnVmmZdu{bC_r-Wl+hsg*>RvL{^d^Fkq#kLlbBOH#flOD{nV z{kh^ge>NL0G&I-446G>_D{;PL-NlH_8*7?dRA;cKD=q!oG$Mv6SO+Sq zk?%h_i!B(Vagi2HQ$62U5M@kWY7P>suh&Rg3K*u!Gj||KjY9bh{rj4~&kvLza4gPS z`1qQbARcg4eik1yj(R3dop?M`bieuhmm{Ylru|H$*J(Lo5ygiQtulJ3tZxCiP{2(d zuj>jnn{$dy^YQRL96Q2?d-aU{D_CV1#0L~L9Mt-joH@Zx`d)cZ&Al+!ho4w~=Oo*x z5!;D&qbFRUHWio{;6@U+Zb^dW}B~f>>AgRR3gC8-Qj0g6#aOOD`KxkcPfuu zpj~WqWM>vV{M4wcLHG%+^ja!tjNdrX%alJfbn>5u zyY{~zE6E7Gi_HfwQYoxVwQo_O5po0ZEu{chsTh$*TgWkzTQLEm*$Q((g+?+4@qKgD z0+QeM`U=^kz-596Kfgr+WN|rbf_e3KB*wx~g2;`3+|?jYuff8$RxuxmPE&Luym7DR zpA4k;VJ^oAZbq9~*G{j%Z!8*MBWn#boe`3G#V9N&uM7(%W8_N`5P-M|OcpPn<~`eNUKRmNh3y=_fm!ioe^TmDuGumR(c zAa$>LJ98>Fw`pE7XZ~!=ZmG0tUkrA4rYDd#7A4xVnyT?-U~f8b*wrK(vmm%UBzrXf zmF#C<#ovgfsnz4+YQD**uPfAT$a`=*{Y4J?>^JV)e2G)-F5;7?4Yj@Z$opAnJ*H)w zgu&>G`w~k^=%aMd(^|542U=SN8A{GgB~+_~6$4?S>t`42k%Jn!Xc6K*PyxED z54UP}<6@qPX>b4X1U4|{n`7Ka_I+-wmcQ@l_$i@B zw1|%!cAEP08=+*aN&V=Deuy>vZ_)gz^Y=!+{LQfLhqMp#&HwgouvdSmwNFO~e6@z3 zFZvf(5>_DgHN6rb$#m=g3$Gztxm%Lw7gd#^T^&eUl5m^}mdAMl8q{(a*b{Vr8&Vm*XkTxH zU(@3PhtKe2%VzP479^CcYsxe!ZZK^R!}*ikEv(&|YCu<$!7dv|3A=4j|t+>=6kTr1O?F? zsLm#p2A&_YCQ5E+XMh*o8(Bi1Ro}Tbx&)dJiz7560(k? z+`@y8rS*UxrjJ;PRhx@1rYrk?$~jT+Q4n}^`zg*+j|%Yr4>w6_)w2}cD;-m`#7g_+ z%2a=wACg@N8r(x)n)$QBXVOWeG@bkLpEoQjA#O{yvXMrT;$VOr<#WxyGZV5%xvrT1 zri)WhK|rg{)mt$1!~H*ngB0KQ3xY$NJ<3M*&i+eK@aM)WB`A_eiL_5xfivFWb5?@}OKc(s4KTiFrESti1S4ct- zQ!gv$4D90}3>%BWMZ&|zCsQ#PnJJw+>P$)bYxrT%VtXBD+ zAnq%v8$_|M2p{b=$gIHNY?s`=lf0UNEm~(~C*gws$Tum3cc@4m)7}Rx5-XYu7ukK) zG+8mSRhqF;IiD?#aUQj1L~N6}$M{M#h|M!c< zI&Ej;R^1CG9p2DfdHR~sAO9c1-Z9FyW?R=x+qSXND{b3eY1_6|+O}=mwr$(CZ+`de zQ?+l^sa-#3YokSs-sb3m9&bEv#QBKXaJ})5liP`kP@zlg|AlIyhbrrzxDMbyNuEsE zUycYzqnjmSTdVGD=l^4fV)a9484Zi*0L9(JbIKdOFmeqiZe-=(HVZbKybd_;;KKAy z!$UhqQCJJ0PL4sSWzrRp4UWOd9}QWcA#vhdp7KdO}RAku$k+7XSJS+BmmLrisd=b}Bx$^h@PV8~B_|1f-n^stNn zj93l~8Ye~n5s<~k{DWnYoec8+&upm-k{`wkdn%2i;(vg*{DsI!`x#7&jRm{^6XS`< zR_32^GGhX*jp9FylYAgD(mx?6*5~T{=LpFELB_}!LvNxW68s0YOflI{2&JntbpJVm z_#fsC7kPc4b=ZG|#wa5Hgs@$zfQXM(M`myl%LcT75-s zDlrQy0#nUDfYjz!0w)5?#Di)5I{~XDanHXXaVHE5A#N4?Q?;iTlzn^E}8!Vv9P^^WU@K3 zV-O*bNHdkh6@;yb?)h&17tCcNqKK6rhc)(_--MR{3>J4o8xAoXq`J)w z^FT2cQ)i>gsZo~A0(l&`k+Pgu9=_4mbn}FTYu!j1@`NA;&gTE7xx6&8|BJ~|NX5rz z<3J>L=U*yJ8}jR$a*P)W+d)!0JLj3H#oGN2{+X7`cMFs<<;xTPfp!Le>6~C0c0zti ze$D?T);8e@qT!yy>S33u;U*46p>eh^XYv*linllp{>aoIvW&N(BT(Q8n&?_R+H=rB zde%@46%eqNc-LG0@y5msSorKl5$rXp<*_a3xc!$akSwOPm-;7Q&;XV)d&+^_QLonl zC&W#;OK&iiz9?=ovpnB7$4!J_gu7(;6!W*qMrdgfaqK_ZOgyTe)U6vKq^%~JIV|^ImosGs9mw@#av2Dhnxmo8}S(Mf@Ck>gyLe)2mf~t|z zGY{ir_cw0kb_t3)voa-EZy8pXj!8(4)BXApzBN(=p<#wmZYzMt!#`zJk1p0UPsv*_ zLr9pbs~`JuO%gR?;K+;u=fYx6JIsT0=NB4#I4sgeSMrcj@O0-iqokXeWN!G8>W}?H zUb$4iy$w#y`>i*3pq@WUW|z#&zqNi+v8r}xUQ?V`a(JlDfP@oPytwmsr8lzm=-!=j zylK!vS)}r^p!UEK-w^e7VE2&{^xs)5B@qyayv*dREHs^oKBMqtIB7*kcQ&fEw1Se9 zZMue95sToyvByDgQ7+^>qJocI%2u%NlVCJ7or>1WU3e6$cqdvQW!S?>X#7;(K8m+l zL%^g}LqioZr-7f~V&W8$VY5@MnZZS7HsfdY)hS-`D*oHvnW>BmE>lad>5Jux?FrAD>2K;B#h8xF&D z5}GV#k-7%F<(6sXuV}_!tL*HcpWtjL2K9MA-qjbkC-X#$|4wPH#pJ$w9oB*!va%Po$+DsSgXTvm3k-TLy*b0?h`a!2SPFcZY8mG8#?^` zLZUT|_q|yWI}ADsgPLQ#>(&_*(Po>m+2(LHV`VHo_)PN7X45FsS=j3;_i)I>HQL_i z#er5s{WSSNmqGvWqU6_P1K3aocxp>(D8(nZbk~KsvM(ut(16b4#k^ft5P%IFS0ts2 zC7s)zT}k*+c=4OjD6RLg@tNfu9sTpzGvadKlK>w&<5u0rQ}J!>Hbt#Q(mUgSiAWETJa(9iK$%#lEhaNxKr{RNibza-Qwe;_k>7DYa(I-1N znx9pFjLr&~vt;B8G)HtHkN=oWsFiajBXcI+ZKppX)jFu4RU( z1?!8ax7sbCHjgJew^@Vv@&9Al6Z1cMZ#0A#3Fb@ee7lXGt5q!K zOjsPh(zUHGxr7cfsg`S}(=f4|8dygD>dHV@yl3cjhQ0AW9|UavJDTO!zoA)PIwfa5 znY>u0%T%_F*MSqI3>qz`Sq{&0?}tp_PlJ{v;Z`f{?@>g$&}~$^uS>gsYgpX8cKJg} zwjXFYTze&5XhNi2^&2qsMQ@)vE`2!&(gbA&@QB<9aRapvBfB(VWqzz_xSA$ivdO*Q zi5Nq(*iP96B{?dRp3#Z+Od(JL{k%n$21RBv|y2>b~3L$4N6sCM`D&HzVV{=`)*$?=r`qTdlj^6lBBrGaCDh>Hzh}V@A|!DiSVTg z4)2kMGgKmth|N1Fci0Ncn^3} zzJ{X+H=HQNG+_fvE_|_6<8Up_G~{)VA_?ZJzMS>fp5zncaMxQ_Zu`tNQsVg{Pk3Af z6IeZ@nV4`#HqOVbojJ2x2B539&&{iuC$3bEG(6VZ|GGC{3#^e!YwE zkzB?5W~?Kddm_|(g#w&gK-~v%3h07K<$D)W;g7(c0k~`iNP4FYi!}*nla1#SK6`4a z_$j7$)}Klc(zq{-@Tk?@wiQ33pHPggs>G zNre@46^*Bb_<`d=R95|s6^Y{agX3~>;X%%C*P|`+Zz5AjODBrUM)SrnK6$~GK)!U3 zqrYNsYl$AOuKdy^keYG;>Jv?ED$Z20(Q`~*;b8qNTOF^z=wEqt8Z4iH@7-|0yFtpD z@=OL}5{V)K=800#FwIg0J*2`fOw|wE1fGb^CPAUmBHHwo*4vt_R$1)!2Cx(-y`DGG zMJQ`5o~sg>TQGB7|Av7Uhk`ZPY-c5<2K6b=S7ZH<0IlwAJkBqhzwVN|9VO0%YG1>v z&4mOYT`#d&JugyI{$j`HSP0_GMr1o63)+g-?c57ish@qbcK zLa=?L95j%mb+epD)jn+}1|<^O817~(uk5BFP&jubBZ+t%9Z^5jE&a)hkTK~?J9Uc*(T9%&f=FNGD)62+`i@yHB zK@n$pw&Mx#a+ccZk0;R6RGy+e@Y$v-)A|%S8`Pz%AUuH~S_a139MgpE@k*=1b#av1 zo2xD53K^rPu#~Dc@@M{k%CUK`jVmo-gRTbmzyw6ge#Gbbj+}^rYWGL)eplZK>Y|NY zCk1mzT(MpIS?$OQ3N)0B;VJ?eeUP~Pc!+y3&SGkX@L`V`#YdU?fkyYlO^2&39uHtq zX_C=+1a*laRT|b^bU88&cBiLW?>1U5778kxN-AjSdbY73?zn8uy4ihZZ(h9bi-NPM zYQ~LZ9y>dO&)bqxMV0H#k?HK=XEMX$IHhuUYLjW4xI4Q2k8jO}Uj!UTcvL3W&_+ zkGFNj!p)R872J)yUJup_d@oj;n|~Pz{Q@B=gu9@k{qwWey;&c|dpvv4Qfdq1`DEe# zkW|pN=CR}I@&N4P$+(q=ut7JB{KNRrWN{m#KuUp)Nf0D$edv4V1>PmlbE9^D+$xOE z%%OofUh6N4g6-%#p<^2?OIK`hQG~bZk))Lp2WtKOQ%2W_PC80*vRL~3Jninj*?>-GvAk=$anxr2 zz~i0TKdkHLJIOHEe~V^-cbbRtm%7eN$$# zUMyrjfx8e+kVAs*pYn!?(SmMmw@9`D_KfozYpUDTtwnX9Q-U&0q@_h&5mt%+B00f5 z4+gpZO_wNIU@xermt67aB7neWwKg&{Q$k7spep0* zU*5l=oD*iDpgft+u~C}`3qG_k^K7PA?%0%-%8vuBWM4~|wM^l>y5jw9h$R@x10w#JJ_E~#!C!N0rQTg+ReCMj?te?!?gVA8+>z42grem2Oi04Qd^T1sbL&9q=vPbc6| zD(lcVhGsM|Ydc^LHxKA|vCi&y8d@?W)r~RK7YplE0A|78@Cv`P&;RQw9ddwO)ihNO zj*c7@^gqX6#ZmZ@S4;2_}8+;yaJSV3okpKa*o-Cf}M# zsk6omJQ6@cC~2d}9@}-)!4rI2+1Z6BQd$ZO2G;O~#veFNQ)t18gdCiXv?SGr+Nrf%fu2^$CoI_Q_Xl`HSn*LB(to9rKhPgFpsSSLZA$oM=`of7}c_G%lO4B zuc@iYR`GkGC#VtoXe!fUDDxpTpP*a!bgq!w{SiUcFB-}?9R!h%!^8(=N-`Giw09YN?!2qLk>_)fFy+ixMv9TpzvKGj37za2My(9gWK z->LA4-zHNjt+u;;Edwo07UT&%8-uF?QUAj;7V&KZj6 zmp-4#nmia`6)?7(J=|YTz=3WU)fKc6SulT`G09d;J!d{mmCnn%=YtFoPK59MgrFA^ zou6Z#h7q5oy3wQvhr#D1Wu-Z;#2vOJ(r+kg%|$CpTe~pqUN>e)PvcHoDX2(U0SR4p zGkO8*gG@uK9JSk6(D@+3!c;TDq+|_%g|QcWg&d@di#dpZN$#_rRks3>KY1N-RnD)F znAbox?hTi7$RMFCJ!aScjYU$3->|}rX{?xNR1p#hF zZ7PXc0)BY^k|4y250EW=-BWZFtOTAehq%19NEK4fPS)(vtQ`nT0_cdXd?1~e_SB0k$5 zuhuJF0Fug>D6}HDiMw;&<`WyYuWmg$v9$TN3d}YAS+IG=CMF+u?BE)~AzNY)iCdVF zQ51x&K3!4sbWYmH9r0)O3(629$QR4$Xvy0{c@5bWE}vhRI!fS6&ZV44Ohe+*GLQTt ze@}u#@}=Nj2L0DM)@|27-EerGXga>G=8T3bmCNn!$M(eaU?f(K5&Q`cgVh^nY`!YZ z`F@hsgDXo#+YMBnEj%o&tgI|=KkxX5D-pqG)BWkJp??4Qyge+7(8b6@f!AQXj>l;U z#{?*Yym4aVeUI(=FobQBBOsTZNKQ3hKd0mUdWcZ`Iu7o`ZajhUfuk0Y^M2n4{&45 zgET@4^-Ka5MD#hb01T<@{`1#5s374}_SBI8nNUrz!V`NiMz#~C0h9NO&*L~G^@Wh}4ReOY&O17!o2~ktCk46;Cgz$r%X`!?;cWfh1F-I+ zN4NBHnqs&G)ukk&M2V?G?hg)q#a26cIm<5TMe=dDI4>Q9*3+=jXZ9Boo8LyTiiyfyy)CTvb0XrzL&+55cO*V9^EqFoY%UkXm@g;Cr5c_?_jhijFHPU24vLJNL zXN`wX`;TFZWnl*}SM2&@zshI#VFxj+E_GU@ z$(wY(OCBOb62r8!O74Rcd`+9yJ6CkbrZV)ETLnC6YjRy2ueJ&)4iQR8ZFsv&Om+40 zx7F*M;Qy!#Mw57v=L;VS*|WG;)~BsQ0~QH0&@kN94L6KLl?rE(o9QhPDJUw=_I!Q5 z3r=o0OmqK|!r^*+Kmxt?~h7Jd@=Q9ID=7!Fz*bGM9`0 zPh%$iZb2y-a_TvfSSaupKipT#-=kqnLll^)g zFp6ybxmwxoay^vVc)nEi<>-{2_N9o3R+JlX4Xva0%e-&w9C>=NB{(}f?MvcMS zuh-F`j3@Zq`T$({yS!p17j^wxQo1##b#(5ZiADME;t62`LwxhJ>s@xE-8xF()L5;7 z$*5FLSl|be5AV%gFR}p{*k12If)Y?0*{I$$M9Z8aAjpI*+C-#SA&ZMS6m?pRvV9Q?95AD)s4{(xyRDrDops+>Siuxc%&VwXroHYEI?Y38b5f$O3CKiRyI1Yk)>ers zx2&6v$XiwFjZi*6_I`Zsc>v81GcGJPTQJd1zY~8TOVF8S@2mszeKLwBY?q8tedO7m zcVBVOCQ})$R_iPXMK52KKc)M(@`3~s4DY?mwj2$M^Ve%Cf(nz7K1aFR`>OwD0sOF` zev^H6Reu&tQ#Gsga#o6+IB!Q6NO>+bj(>@du@FssC!8{Rra5vhhc;o#m+;!cl6^vp zPv-`NSF``=yy!oytN%l;0g^^C{w#Pq|4mY`!dD*NL*uQ%gens+#@VGUUFI_2EIW4Y zBFvb4dZ^17(nvpd7yabgY_V8B9ka>V`!dmfPg@L#28%^!M8y)5g1Aq=?pK|_Z)#tA z3tZK9<*Ndw^SN@fDjU^iLl}A{`4ME(^+BM{5*ilFb?8rQ9vKvYDH#Mup4X;--G#30 z8CuUWESK-P7DY~Cn4z)ucCCEyw~zBjwm*(~XraKFaX6;kY_k#R zGS&WFss`V&bRE=FRQ;uMdmir5?*O=pPgJbmXqkhM(ja=>N9Qo)Iar`)$1NNInd(0z`iv>sa?m!nwy0Aw76S5PAsK zZI3dp~0ko?xf+jyVmS_5%&1;LVvez-n)$7EU{mEnUe9wYH5z{bEdzT|ww^BS99H(L9gtHrFE!I=cS*m>bkLUqSA-XXh@r@+)pXj^Q4?YPqA z_V<*>br>x=14Sr=9N%jXtwnP~jNn)qZ-p%e9(h9vmurnUaLpfKnO(pZD&@Y%OFND3OHhG&tmclTepmjJU^fJS2T*yAyUl;=BS za+#d3j}OleZf~4h%Y=RO?ib+^J+6nQsw%iZVo_Q0k@m}`^(D_%u&x96k2}B&pX<)& z8+1=1h?$w^^)KN%`ws4DdK^10XmP@+etX7;!&-ykrq7?kpd8xhY`*AvR)?xs7#?n94l zpf)HdxBHQcGGg(#{a~9)b_KVRH=CE_b}qe)3_YwdI4lT}lxa}V-`{bM%0^3(RH3Gw z#%B<8w}elAf51-t4Nve$5a&jvMSA z%1sc4!3L%JgTeoS#VLpfs#;yHe3>L(UQ43KVkxYlcw)>@H^| z=S*kmo#D2QXYKD8<7n+GK7J~E7FR}R_cp43B_;XgW;S_Ozeh8I{_&oEy?xta+%H2Y z0r#gS)!APj_8ZupG110w?3t*Z{DFd{CPli@v}?ttr73^_XrK0~mk?|OHLcf_mpAd`$cZCL zouu~HSiju$m-P%jmc5|J_bDN+OEfIz&l+;?8d@D3-XBVl7enk~S!`?Uo-&f2(XZc+ z(nI&j0bZA`gFAm;G@a{rxPt(iqGdu9K_2T6N@KHjiq5wOPl8j?BaS2lo)fJAt8XG0 zmb&h%2;IfYWVUH7m6QDPRdQkNDd{CH-=TNEdx`LyGFFEEs7CfEPL>^g@cyHGce#t$ zA582by#ffY3{krNI4e4!C)-kSOw;{RD2oqDw#;C%6*lbc;(F!ONX&-fO%qmMSjU>1A4H%q_uQ|JNyNzYK{l#Dbh z$+`aa@US2Sg~xUH%cw1ex@|nwPaggCP%yo96)LR_WBlU!{w_hvV7<|{>8_FD9cdFHitu%9`acKNj2r zrbSn15mNSg4^mIiPwnRZG3?lJ@`9`2Z(7W9NXvOSx(hwT6_i;3rc=z*d+Q3f$HUL{ zX4y2RRLWU^liYVVY_09DdqtooQ3B+vEVGj&x$>fMv%J+`#ChSm&&PH4cYgf5dB8Y+ z-Xo{2UNq@EPD{xU1Nkoy3T!czcQYE_LFknD*qz3!b!dqEv~J;rp{|+(dRk2b3(5rq@rMhVCzhOqcc!{hYG_eI)$-Vt9#GCi$-5^Gwj_V<|Uy`+M&(zNY@NIP_Rjr;_u_dLE%*g;=J@ZqJ)_L^w2$D1jR&_f!Cmygset z)#h@&2qfut%E%kDBq#A-5 zKJ8eJ=;VtPU&3woeQ9U^+XRVl`Fk@=Et^!Grwd-Vs||Luy#bC3j|BL^D<1=zs;(|# zXk5x_*&D@46cj9JfLL0DmBo4;;-%Zu_yd?&zBG|3JjvZ zcQ#SP5B36;v?rBKHpy@OjPF&utX>0&}rI1 zSW73&5c#1(g5yav=bP=UGDI)N&v={N>oiJ0$Br!}%gf6yKSy+LpswxyGLxgDqZ1PY zqoK%T-cN%jmDyj(f#i4|d+BB^RdfAiBk?bE>VMlO`dR2?J#& z6YYZ8DccjhHAGPi#o{q61$mCqhTPgrhBfK3Tvj>5cC(PF0g9aE(RU0q%H7;C##UMU z0HxR=W7{OS8(LuNQ91AI2|Oo&bH00-LRyR`VdoU8beU#awaf8VMQJpu&S&Q00vjK} z__HQP%cV**eo(#mrxd9s9y2ne)ys-HPEt+T9TlQo)P1hd{7VUm%h#M%xsdq+?h@6@ z`|NG)DNGB}X}@QS{xGP#=v9$G&r(n@<6KiyAjS$LUpuCFzU0rS4SVV6W^y>xULfJ+7r^@QjVt1nz3jQiuLjqEIdAdkhf(EnY~ zVhY9wX(}qqV8gy)%g=d6a3$=)$jB;bo+_9=r{VS#voxp1VNg}LubBpJ4V3-tlEYD! zDeqrUUq;6lMI=h(_mAjcJdmu^ z=DI5m?Wo1om-Fa@$I@Y}poH1HOvTew^^kNpq%l-X;d_@DL1r2JHh+UsE|LMvOgf9L zgH}AW!tHHGKvtT`WT}hsxc|LdoPMxpjLbW2Xj)xr=O9ol>Y8%D^Igp(RBEn9LNdhE)0 z?Vkzsn3y!SqFRVf4|=|==QT>+&f63(MZEiH$a|JclBBrufF`-^mMRt;4-{YjF5?tq2#*e&*X!s>Cy*E0LXyWIp*a#j^U)8UsQ5j7 z9K#}dm4TYQ-JhqBBF%Qpl90_;X8L2`=4}Mkw{1;DePw*y>r+7((wbb9$@ndz;-^_% z=Vt_6l&jJe*d%zaeJ*4 z&M8W$Q!xLxrD}+b+0(w$cFwYbsoyqMcD|o8E$iLqTnuZ{>Gv;0MgwGv5)m38^9M`C5fDktj}4>ys&}VHQb9T#!FIISI+d{gyKVb7CR}^>$;q zZbZ}V@3F{$*S|SP;%#-=2}#LM7d!yBID)IqlZO)KF9m)VasoF^pD;S9l2Q_X(CnkC ztIFmaqnE_uNC<$yY8Ms&ppejq!}UQ@I)m2S1Lvj}Ai)x~Y{8B>Jo7)Y()9NYEC@Vt zO3SNNM9Z1tB#ux@>zDl3z~*IL5i7`)L*KKqYUg>)el}>Y@mX8p*0w+SSWPNtD6q}~q$hgFMJt@uuI3vJIz$)l_x%f7Q&fG%Cwl)w=V7e<+Z z0b$$v^DJ6O#i&8kAi-QiYI~_#ODcr%x~_Kw8lTS`()l9*Atp2?DKmRM|5v`QqkD8M zCijrx6Gq~a6~^vf2uqs=fA+*xV6R*8o^ODe=R2*LIf=-p0sIDjzPy7qD+44( z<`Y{x&60Kay~l3$3$owGgk%6O=?8k=%3DZy-j^0PB0yw(NR*6 zQx?iKZD8;CY7l00H?!xe zmpCTG0y$ON&r2|Q*^Rh3;ohn+>)9RD{rU2P8*Ue_?t9Ivk=v2rr^~iWSmr{o(k+5T zHyPY2t!9uB6US3w*(3fXlVfy8e1-yt*_nwvy-eufBNQ|vSS%qB)H%UtJT^9$8Qt`f zyZ*5FsodprR275NidWahp&@d2m=Vl9Y8&UOU%_>m%XHhcZL#TDGW#YL+@R|5E%S;o zpEybW?vP0Q-yxx)gsZDDuaD!&<>}O;JR=?J&KF{eaz~=F*?r;hR@m5e8o(SN)s+OZ?* zAQ|Y?Bqf$Q{N(^Lq2JnDF#?w^a@@84K`MxYPob`)@NtM`HsL_4+3ZIQD>Ve zEeV6FsK-*jzIbgB4tl`Y;rZKA$~Ak!$H-pCA0u$b5~@x+hJJp>i~jwQl)h4Qoq&xr zjD(v@co!@&J&u*4;|w83^V0kDek?_2xu2Cr^M187RG^3l6v<{su}M(vZj=Hr?uQq)wfLkj&9wdh)oe6K$G8 z*ZDA4FdCmks{wUCK~>eVH+=0^mA5NwRc=nxT*TP+?fZUf&W%s$*>cf; zT3?jy64zPi3&7C7ls-14P z4Bi}dlNQF>wucNGjwJopl$67n^oFOLKdo^xT~9y+3+u~v7-*0i(b>GxQe9%Kf4W<> ziqUmYtOhR@R7bw@O(wn5WtVVH^ZQB%U*dcgR>ZHDY6^M2(q)-v-_ldUmbT;px)Q$2 zt9J_g$_bD6wr`Tv2o;V$p*tv(GuIV*vkgAenVCDCpDlk$c`0s_hjbw)p8DhSKFl*v zJGYh$iuL>IYOF-0IxOrK3u|=p+Jmb&MCsyVukm=V-Pja1w5ao^hE(u?8MkhN!hK*j z$X^ zWZQKFOg3m)>7cY->yORs9L)#)v_$xdXeaPgKt>Tg_5tGThuG4C+J5~7ef9B_3odT3 ze}8W?iN^)uD}CE(2V9+{@iE6QYtXW|E-poQ)1Vz}UziL0!SAx!c#i)3US6it@q9Ja>G3YC%$#S{XGTw` zw{g)rMk{!UIJL^Z*!Zi#*dEJN2m~dHPj_;j>eAAvF!2lr|KoH~#w`;#&-1KDoHWCl zi6V^yNkMd!4r z+k&$r;US@}uFj&m6%8%ny5n6YWWK}xEO9)U4t?QxgG1*>-JOe1G-bSw6`;FA*!UJv zn!wlfaW@`}?~bvp)?sKDGXJsqkzJ|T%8;=8b1C(<7>Ro|a zIKmQ$cd)e3E5z&!_LHdXxCuZEr^#HT``v!i=AFCSdYi6knxeLr7l6hNdPi_)@x`i_ zz1==^C1?3d6w}tDMgc!=ND^p6>>>_V!Vz2~5yUC678K6p2n0)h8%=en6>(GdF;`)X zyI*uD>WQ5QaFw`)IuHPm)DuL$n4nw^ry2^jqh{>DHs3U(C(-3wjxR0tgQW|g#N^uC z9=;kU2Sm3@$TQ?dhA7;htaJMeJ=(1XqQDK(0&f#3ZrAzIji!$^*?Gwk?Nv=OyY#i4 zJEgEstSD<&L%osFX91EO#vhT-gApk*3!zDMSo2$6LimZ?t^+OQN3CuLCgFwVtz?L7 zoj*R~Fr7dM9yMl^k-~*`;B>D$nM9}nHK7B=WKS=Ps&mS|onJlTf%7CJHk#RPE}I84 zZkCkjg@zl)5yKlW@wQ$HE(Ml;y~YK7>9(cp?Ec?(C=@4m=PW9 z%J1;Ye#6i2^KWoz9?OD&T1EQUG7dhXa^X}y?S{Ck^!bXt{`rCoIxZy&yG)A(>j7PiHl0<%=Rp(xf2=Wg!9HD?4Y!5TbS_iGOU;y z=APUQfhhi30hDfQ*g=hWh!^*=*W$uym@Z&xd znS2K-`rwqKpIz_z4eq4m;}k}R!ATndP#u-^^6qmxaIJuo zvJ+R@dGp+7hX!WX`Hl+Z^==-b6%)&N@BusuJ0awM0d?$y(DnQ~BUExSo9X-6TP1%{ zp?cn2-(a7Oe^Y3ohWMVUmf7cq6uE25DItxm@~aF!sh1O59=w*_jc6}x6^iyHA)(zn6^7dsO4--pnN+}&VaPV7HQEP1wR50B@{J>7$jZo|!Q{2C=30x>&MFzS#pW|)2fNSCql-Zn{ z*p4J%w39p`AwzNKC@pZ&1Kn~_H169CdQg*Nr!`;A!=GTwY})_h(aAl!;079xB!i=j(oklN<(&XAs53>2;k z(tW5rDy56f7byv8X|vFkX?j1od-TwLRW(3#?;tDhA8A#$T01LxW#T{-6crJ4nV?P+ z$1(g#*ZqC?jnskv!93z=c}z0Wad4w1%Z!~5Ltm?Ec8Jx5*-P4MCelOiHfB++R}7rI zilZjpKl72Nf;W;gUJIw|8Fc!+h( zyl9rQ`rYUVl2rggg(4>OD`ip6@OICiWmxk%poG6aD-rke@lvebI3(O3@t8IDI$Y=+ z1}z|A%FA{^Xl*<7L$^@cwV13xswz@H70r=DOyHxR3T&+%P%QOWgxrq+D;K#ZJbA-a zb4~_400-WzrsehJmy$DqseJ8i#{?@f zI?BZe27O=<1m%3`3z5*9Db@Yk3LBG;M68>X*TJz?Ez!eG95`kyv3ZQVE1u?NT>xG>=+EG(8X*=7ng}1y+K` ziPKdLB6-Y10B%(JVxrE4+|`m7Rso3rC0M7nH;pojjUDD%&_DPYr9{@EHhSH-^=vSK z&{2VAUS+pkz?`ze%WNtbVSKakfG*tzebxN$Fx%#cr>`5{HNk=_Heeh}Y7OT2jR;jj zDh~b*t#B^oMkM>+)3gW|F4ThgaNLA2oaIc2dcWkb2_m7-7R{2Zo!2nnXIPWQSd<|A zME8R0kgTVGt|x9Vsf7d4aLj!<=nHTT`g61c@mrm7E~Ng678j(3Tv+L_#W6;Y+X!NY zFNeFDtwbC5)%`lfV7S0w&ELmt<^vb4w1-T?}D>%*P99p7{DL^!M}t(2)YozK`?;+-#>c_Ljdxb=?2vd zyo~=p9{KME98EzmDpzxhvthITLxcZ3ZA(GaV>&5mx1^el`9Jje-wporMz~mjA{Ndk zrzZX;|3BSphC$Rw{TM@5eAe*X4^)%EWp4aJDUHo;p{39s447#F}I6J=YxXcq96Q5jb4M*zSuLB?2BQ(+s&Pe&{xW zOYFAqjMR~f(C|a9mvMY#)cp!~qT+6q0mpIMxDv9O3%MB*1K zsj7&Dh-sZhx2Id621iR;8l%as5N9*I;EZ?5RHMTPJk5$E954!Mm$S9^>gAO%wv8|t z;mKb9V%9xz{d%z8hXC~3pq;nuLaNIzNgc1Zte;8k;z}KWGl*>+SHt75lqF#ymv{zUD{nx-eLCK9BN+L-v&FcY^}MRFQ1S|rTTU?X*(}J zT5(TSe7c{Mm--I$!=Cymdi5Of>>6?>rjic*G(rbnpR^k876+1LLa!~iYyI@I3ghSxDfgU0e85~5xf zG2J8%a|rcL8C@u?;{=l((1?LC)li}VjL_&DrEuiW)9BpKzQtw(JpZ#&oq#AFX45RxXI=aF_vg|~q|SOZ#s4ac|5>8`_31VhXyhVS z1UM!Ae=DQ^`T7(v;H8~J#bHkI|3)W!5!oVu>N{o7C-HB;LNBr3KSCX5W{_p{Z?td4_&Z?D9_Vaw9r0J+P8aila-o_~Z@mMG1ZVTU;X z)&cWq&Tf2AMK(VlBq0r$1a{s!oc;UDN7A^=1vMJ~Rq%O*XGOtLVb zf>=DZXnGDe%+@>l?w0G5B1hEJ7~lKYS2b}X@LER(p{!uFo`D4ral2AySm-T-!Fr z{;(CTtl(`rjxbe2Cf2Qq2nsod!5wt@d*=S$$1^N_Y|>clZXZeyz6aRvqKEH(h=<-X z5wLg_G+n`(Gb$tX_&GR)5^Be$lAkVd2+fo)FW-!K_*4&nS61hrgC3#>qMud8%+2UH z__FU~&*FmO^@lWyx)u#mUB~)B-y-M};`SXKC35~K4*|HAT+wv)b--%rjpCwrcJywv z+W(kko2F55jY2i0ys*N8vE^D|7Ujo!vSmUP?4*T-fzXMJ~X^~@ncV_E%H>bIC@g6>~&GVRUQ7jDV2Gn!42(Uxm)W!ET zMW2MihH{drJBR=R5|ZND+FD7Slv-WhLykLOvP-1Mp7zr^+T-X{nH&IX(Pp{DCTD05 z1DgwuKG>nVcT)%(#5Nr*q-9YxB^sAGzBFH3t25@f^bt_#KvDh_-QH zRvsL}oMqq;B{gPh?$wM>UHcLBZZYX!Rd2r z)8kTIj@veQg*>ExeZ^dKultOB6NH%spQ~Ti7SEJDFo0W${S63aE1WQF|K@<5jM_m}k>v1I1 zuuiEw(APWk@o+k&;0AmuxpBhG(#{<|%abR|oM!p>+47>+)i7Cns1znHuI{!{FK?>C zsqL={!;n#xDn~Y~)w`nQkz00HyR;dLh@O?OBp52;6f1)}DVDSXwyNSFC@HO+IaQK0 z8Ly6igTlyA5FcN~M9dr5Su>;T0gStLCK8yhXN)4Jmq}VGclJNe{pM6Ev2t~%qIw>a zY{6td2Cg}BygK#QaG~g#41BDV0Lx^$Ke&^~Kl&nfr* zFp0`uZL22VwRi~GCwAAn0iK=poMo)IviS4Pu1iBk;xcjNfT0HQ3TjhZFD9>UELXc{ z%4N6r)2K!h&-Cs3@Z7ZH{>*0a_@s(`n+YfRr)U=Q--;zMlOosw6-$Xu9)1(Wwo-|1 ze3TJm?mCl}s`A-fl_C?P{fEmvC9`y;(NB2ms7){J<8JZy5 zMQ875$%}rCBV^CUoXjQ+N=j#;e;#Rc8YUaXfEAz<5!m>pr*Rjj%X{mEKC;g4famjm zvu@qC^%R6>*IiwG$=dTFtzdwxtbwwBvY@KlMvIXR%a`qs7$v07of=3^EuV z;(9jrl;n9W3*cPnhY20_5M#13hK{;i>n`?wKnAcP?+(OD-l%DpV7WhZ-d_^BB469i zfY|R~4T$Xu)=;ZC=&_dbxaQucmGsq~lt{sa`UiaV!44u^5XADTb#6AES^+Yk?Xrm8 zasQCsU$TCdpAn-q;zyTP(FAus+)Yv0&;Yz?MWQD02=*xpZoU^60?&{7UMNEf$)KKIYdTSqPwDznGM_UJv1;) z!rNw8=Pp9xretOGT{qOwh#yeOeA`<2MMdY{VkjKY45=}h z2!Y38_=~BD^^6Dmf5CHU*=K?hu28s0Va<6ueBR^7laCn>shmn*?Nm1;Sq+z1CFhK ziFt*G>yz~uR*j#TI3{!b(>B5HhaaR%HKYs5XJ3!^)KgpunMv9eGj8OXQ{X`w)l{6@ zo6VWSB)?+Ie?wl5mqNdtgpWO5vG&|DJZtUw0-bL^IMoapd~Sk%&;5=0ZT5HL=}+8% z$eca(JoJCU$NWY67d}SJW8W5TpjNslu<^Rg4#F!vBI3azoSThH@WD0nenV|a z-3vf{gF>{_-jYqw1Swv*USeCx!PDdV)N)&AwU+Y9d2k74Xpk*5ANq|?91K-;eO4hz zWo(k+??7WHKuR^KnT>{m84Y^~qbf(@?cYLUOx3IHD4K%z(Gba_6-|TZLL{8sBwZ-R zjkPj+!AlihYUi{*LP*8bp~GTuxa^QC54%@d@zEE6ppo`mmritr8TI*93uvmZa25WfNX@Q3s$|#l_Jiz3oHd6`vb_D@wb)!%;S(O z`)R`c@qxe#ADM!gc<+y-gQBV?lJ{!oD}~evnJh%#pi}n>HvSw-1?L8)D3;OfpBrHE zLAJK_G%LqQiKXcJSeGso^@|J1+z&)IZgyQUzgU}RPk3Hl%oXHsCV8{lo<@40G(aWw z-QRv>&s=!g5tk7Wh2LYd3yIBKY_$cPJD`2NRWv5vKhKj55@tjf)oN4$6xi*07~)Jif~@rX&jjv|sLy!Ci$_O>zj7F6yz z>w9G!5&zlLtWhTZ_oilv_Jk*HL8DfyH;-1JVZpDzmTU#|DOA!gth;P^MQm!!KNfry ztv7irjbg6D%W*khuD7|388g+|AB@Jj9j?1M+;tqKBqvp?)%nk)qvrLnCVO=s}OiH3%z&cm|j4>V=ATP&Ltq;nM( zcbg(?igTXM7wV*6Z~XeDG9DGXbd6Z}A_ya-tBt&}tH3hON_)_-j+gahC3{O-+c6VO z9uxD^G^Fgp<;I9?n@ZK9V$%Y5ojo{i4VqLM*UKwb?=CTH`Ho_>=hfp&;YhvJ#OX=P zrg3spA~F!MR*7{f{`8^Py6l&=P3X>8hM6g0qY%wMg=v zUcDXU@P6T{-$e$2j)cr~!_HHw?4&e2gOj8$VoG$$)!$?$MD=8l88!=-VgC=jB~tJ| z;4Qlbh08<`GGZU@gLKVzq1lARQFRsTbp{ly=F=%axcbT3o72s*dIBah`lBMe<&6&}ov#_Gm-abJ=5hwZ9-{z1A0ws-LvcIodk1hifl_DfA zc9XtE*b0coih^MjVk$HnqqGtMoTJjb&bUI3r4@eTjY*WdI+^TMOW2W=u$Jm|37Z{j zR9U}wW;E3^|7u4!_{Jdl-?3tb;LjD=q?vFtLnqkN72ZwK$C7F`^(#>Dhm^`N0ZQlaH;s-D`Q z3NG1}%Qj`;c{a08w&uM&aUxKY^40ucjXwQ3J>A*04Ot(CH}CE-74|I3nT8ytE#(bD z8v+OrWq<{VA;aV&rY#j=q+#?=SDgs8TXly$JQ$r?y&lY|>)}yzt%v<=B8riEg8FQ+ z^6hQT$7iPvoyBT--)ZNi)&7~=J9B94cSzVI`_SJ9EW6E>-A2s|GRw`<>1Y##utXe6 z&HL5-l5Ee@fI+<6&jvhjN#-PA+V}HKf{*L%#L^bkro887$x&BTP0iI-%~xyP-eh~` z-9B&@b!f@?eA#X@Wkqup3O}juFM`g4M5}5M70it4#b4TeGY{-B)ijr^k zcai=VbXo-Th*ni`;W;RDRf}>ziqH=RP;RpnlIuV#pw&u*@A{?l#jr+BjTz)7m#mzl zs$c2PL%2r1>;3H>3=E#oD(&1n0j0fsAqVa{@`~Kf56-*2HGj8&6fF_yNbN z0X9>%IH1cM6`2@5WW=Fz`#M9Vk;(t);##an@bYFqgWwssWo$t z;bd+VpkaZsumK78XTG1YrMXyym>;jG)bFVstg&xAQ}@8 zRB_IfY?A;7?a6$%3Yv%bbH7nNHW|wYp#OAh7;89KSh9td>iIA+a6aOQT9lr%o&u6% zwrl=7a?IM#itFTyhX*3axW;?T)xqw)OqgGptzFNRko>sec2G;Uewa z`?2s-L-@VZ2%Q_sC=5N_`aifZBnI=$3C~hq0gDhaX(DHN93N3X8_~811!s{U4>hg_ zTx3W>f6^dI8Wxbj&XZ}o8yYa{o&%wV^`!edxMZ@8RcTF{>V`Ku^xE)ni31pwxUtrhd?x` z;}Q}RdjlgFgdo#MG)}FJazA`D>^Q`AHs{qDVK_|iv|HeHi%T|JE}$7=qH-JxWV(^v zvm$;O!5=bi{%uE@9=vV>0%P#x=L%l_Z}f}aAN{EVL30%WxS1K$hbnxSU(A^U@E0pm z(Vm1xe}8K8cuSAe@nyiVSk=MUr0unq0ez56_3@(&i8@lOZ0o_dDB&gmf)NZ1njphG zvb%7VfD#RLqvvPxHZyjc3$|bsqypxNA5B22iZSWi21~-q{Q1lP$?w4d60y!n%6?Wv znQ2rr>FYtdh4nUl?v?H+$B8T^ufO%&9;T#x2qQ80Fx+?;HRg%RX6_Y#s9ceZV-kdj zAGk!}L;?!M_{3=8YX-=hM(8jqr~W+b5X>_##Kd4;OBm0{lh?yFUb-!qj$bk z<+5en^}SZ9Nc_Y=p5OarQA*K3`t%U^|?93A_9j%@=jRG-Q-*roO1p zL)M-?QJc;i2nw(Vb+>v3Zpx2?6kT|`^d2Dl&+u<#-&EY7>5J%?70VjBMUScG0{B=l z#oXU0tiE>d#}^YlNhF6me_cb;6M}Dw$)BoQiSfUHkQNae!X1OSnjqYw1!sIN+Fl@B zDCJwvQ*5+fhCt5PYO5RZyZ;=+8^I--a8iwx>S_TT&+lz*-YE%bkOhSc$6h(4r~XEa zu0_8;!B9#Jw30eKHD#BkUMD~2l4V1+wwi1c3!`1^Z&0gR*l0+Il@l@}spO$5Lc}5C zj+)jqW95mq$0nv+z+bu-tAo3~$;>UPDL3unJn!ppdPR?*$v!83V>da0^(d%9`x6eC zcDa*q{9n;7wmScyU4j<>J{2(Xw_(}m8Hk;UDQU&|fv&t&EA zU&hYPtQ{-TS4n)xP8fRba~gL%%e(a4b>{~rsW%thyBo^$FlYx&YL z7r??-D4w)Q03v`-&=p&saq;``%c}!XK#04|3rlwME%3Lo^JACW*3#jVR}W}xR`l(= zOOD3=U3Dm;IE9tMdY~C=(BNZzyxO^yE3_a&O!4UGsp=PhXLW~7&qX1ualFRyX_gO% z%2z>1fEMFGuvvbQh=J%d1aH7eb9<)6Sgu-PW?Z<~apdst*f!@3TJJy^$@N2JJgzO* z_W0IuA!?(?6He0s)DV{MbF>wsZ@1LE4iE_$4c7x(*4<(0#(#}#!40Amn2T9LWJ!@0?e2~PmCkx{bXDGxhbH0C@q)TYMK`NA z5tRm&W9aq4Gv*P_EvXsgn$}Rt9hp}=A4-b+Aa!fG)Y*)l1A!|J>2M+`1GeZOO%R94 z8j5qXd&{&LZXfH&Hq1EdmW~G+eAet@SBVZJ92xWIR9<&iA8-3CY>h*D*i}j*M$WKN zm!?ShUTuLOWSF&ivuMJCN>B=s= zMbKo2Xq=AM>*xt`l;T`;JT3ILI^AQ+Y~Mh}or0_DUR{BnxZrEkbBQB%-83j$^7^jT zEXg>naJX*NmEso2#6=<^dU_;qY#z^L-SG{Vt*8L{f+5e_7t&A4xf0osYU15HuCV(h zQea{ztcJs0aTFaEB)Rz24YFLqGaeHecNbe1a8C6N;XFjzf$QD(t0aL$T0%@pt;KueG^t`p8r#Lr+iJ@F>2f1qt^M~{ za#S{SY5TZG~7qf72$RUObVNVginD;?IUEJcYo&^0jF>CgdFth`l! zbFn7G>O3;Bu^%EsjH*j=&})S8wafS8mKlnbl*2A6$)5GkX&5eF{CM#0;$vjCKUTGG zU7LH}K{LC5^jtU#3iuc0r{7xeW7zdBeILrFI7JMOhYN=ZS^MhBT=smN5q#}sz+8;5 zT-B-5NQXjYxkzr=bY1v8lQ1llk3cHzB+SX0MNXa3wv7}c5#h^m?#z%$OsuA3`s@BU zb$nebVNAQlqUGqmj1|}0bqAJKWMI&+vjG+f0-U%63f$^u`m?|KDS~Wb2O!RlQ=P$9 z!4O%L)jJ zTV2%As^W0k?Pm9Ioq(R_!~9(ak@eP8O%fgUF`+ii;_3NS9s}F3n55uMttMWw6w`i? z>_p3TUd{e7Q4Vlj;va!lH=8rj-3mOxCyVj7ueDQcb!U!>S*l2(CvVf4{pL_i@q(&1 zoo=e4rW=@$NTdFq%c1)`mT@JVwWa{zVC;YX9I!_m)F@-h-mq5>xCZ`F%HjSN&n@*qle-&bSuw)K5pS;qY+&N?6zQQ0ovb7g1@O*2WxOpg2r$h--8(b}{+cF6DV-fc4( z?%vqWINY==nfR6k3WY0GEHP?^?6#D=Q0S>DbhKQlv%~^jMgzhTKt|L0p^J}Ml^pp? z970=~NP47CkhbeRT+bZlsdZcD?zZmxX^nwL(G9onI0fFI__jVf!wKb2-&rUL2v#_MnHHCQ<$0+23AC&$$b zd^*xAJjT%G#}wqA)BiRPp^sZNtg%x$Cg8&`@y zALBZ7UWTY6wvOMPb3#>QB_y9bAAVP^mFWj_`4^q5bp)Xzc{yu6=3Y8!G|Y*fz2T8V znQjxX1VxUD;Mp?7+IxzIP zQC~=qb_!A_Bxo*^>|JLcG)fcb-c&oQ_W{TCMTr1HBit;T;Xsbu>Oju$tYCC@iMzdU ze}6JJ+$EVp^UUQ0gZtaG9e7Cc4zc$+cY11$jm_yz!?y{g=i! zN%Cq))Od^##&F%v&&l!|qK;m6v1_fs5ilNPAr8Jy&l~BI@%cUfWV3uZt}vqNTBT@e|F_lEB;X z^QXEBpvLk*1z!pw5@gMWF0bQrtkA=IhyM^c{&;;-eYz+~=z3IVnc&vCcM;FHMjZTe5ZkWnkzcwD9nFI`Iu=U;IPxQ8CHi05sJuZsd&!U zrUy&op&njMj$v&Jeu^p`iEeP5_(1}=W%P8eU=jC!Dk(^4+)Ce3=(v@6PFWNF_!ad5 zv3gB~ypX9-daLxbeyQ+;OQ6%_km+>RkTZ|Ntr+Xwy43U`ZH{HJz`YDW#a#!fU}xD;|!Y%om6@75c?Q6n1|^H5jw{)b3BSZbjm7Lkz!N zL7$U;tDz~A>&~amVUJcO|-Htg;G3JUKl%Erj4~lS<0yq9?Vxr6$z`m2HE=k+Y5J&r$A-$ z@#Thf+tre_7g_3h@QMTP*G2*O8OyId6bR|eK=0*Ov+u5zgT{HZKk#@th)V>s+A$Wl z=-WmHQ53W*Mq!@N;o4r!g(ji(!cxXz&h`7M`{yr-B^C%c_Y&2P5LbfDZfj_R&RG2C z;LcE4ojxCEx+3xAi^HTfb2N2233c%dkJ&qG#3$_EPUF?CBY-&PKJT>7UrIE>I+3Ds zldY(3za`?VI8=Rwi~sxqF4wfQFko(WSO5@Kx~no0y)h&A{S#9^PH1=_48;yEneoe( zMs}P=Zi96EhP^Pcl!%y+kWfa=dkHmhGes2cYlHemX4#m@aKIA zFR(J(bJm)77II1`Awk3%!n?{0_AFxJ>#Kz7qceKwI6RC3mZ1sa`^=gNZ=M>gzxTpN zfpxbeL)mpKJ=Hixk{_k;^2;yrAKDpU{2~AVbA%owDksbK>15>dq1Rq|(}9hVRIaq( zpE`7s{v2&627OqT%t;ix16ULtK+*mw@cqRF>fOcqgVh% zlV6VpS_zW^DoG72_$P`0pv2K{@-Qr52+rCH(G-5yDb=N~qPXa(cL$yxMv;b1TBISf z2Zz2NlaTNO3rw9|o4#f@#{CO15WbB&m&<~Z`?~<%CY@R%CqDIu5hO9aAoEt`X8lFL z507L^8V$G5Q!rKv3*OJ$Ytjzn3nY%3bm5a!K`v`Zb~`8)sf52V&gP0(gYa~pu>ek! z=#DI?I$`j)2Z=7BkBeIA{G~IkCgcp~rAWM(U-z}ICN1qv)zH`v{tP114!kAMYbHYF zVH2Sx_gxWe=@)uj^aNm;;km>8_Z~|$N^YOhca;!^MJikPW&?$ZhMum^+EmQD^|;sGP>cz|{-ZxoKs=$9%=AocHyF zoo=Gsp?5XKq!8QVS~DiS9Xl>qtjjuKZo3qin$Dy`z>nhNg>Y>76Nd-saFIYyW6k8? z2-^>s`%0G$e3pDC0Xx?U-&Zi=V?Bb6L3^#5+{r`c=AfpY*s9^z)lqnu{i;%_2~r>e z{XwnSOvTEIj(!KR?{o>%S8pXil8dRIucN4#kd?4;%Urp^Ut(-z1T@m@?k>|Cs(Lk* zAFSbVv9(^|?`A)puQYu1ep0g*R903}C4x6R=`X=sv`r#-Z$k0OHnep+9`$4s93~J6 zFj`#-cuA#@fP|yPWzHEb-9xD;+TZ$GaxmNS`Gov(kYSfc{FAR3mRpyEhmO|F1bNRi zk%YE}`^8;W4Va08#q|`+N!F=SVXf$GepVJsUBKmM3|*Zg!`o05@&uvs+$1Il8Ww%L zBuNkVUfM*o)u<|Bpeea~?(QmVc3)4eGyV~Uv*R}#nGYi=R*ER3sh<}}S8f>^oNQuR z^Bp+KqUbD8mwB?;6anCOgM~jl=E)iIk`Y9a2!ad4{YC1p$fnt2u_uzV#i&d8-8|@A ztp&bsIAqI~uxZjHIk&{Dj2_R+?uXx0DjmZ^3TrSCmIX73uBF!KJ_7|-?5SXX>6se; zo~>XR2;ieFAIv0um?|bkCFQy;@ssxtG!(UStI{F4fd^tYBKw)r1}h&k(OKLxBD1$B%2{ zb(_%zZ?p!7C&*jgqH1{408R(tK&*<;#hw^YdMbu$!eTFr6ZzdbUp)k)w3N|`)@v|& zW@PnCuLs2GS8GS_WW$LkC{PitlQIM4f_2805^PXm(|7%Tk?YpF@fndBQ|9yR{CEmM zJ)NXILqn`U2a4-jnNW!<-)XZ&L-72yLDMKfA9&_hQ|UN0ZRa%iaNd;d$#h(^sVT3c z8phY2%GStUC?OK*L!0^sa_$Xsx zd_9Jy3%ErP>qpmd?Xxihx5>z5*7v?_K?wnWb>T)H76toS>2w1|_6cfSx}DbzUEe|P zk^Y10ZH4+6Eq*NyJ%+{1^7-t~0roLCfro@aUoEu%=GNH+2*Mi}&H8R=QA$m*iQ*8=}_v0_!e1Dyq z63>-2A)Zqir$7<}5+Zp1Bk)0w=37*G?A2$YQdWBAuNKyMo3{(W!aBdglFhqq!N=uR z3h^H(dO1$Gn;isid9YxgA6OFR+BF$R&5LH@jMx<~r=^)y4PPm4WVw#A z-EXo0^a1ywlBdj2D3WO;h#5JzQ+%!bxM8(?mQjD*AfTD5w~^~;l5?Y&guBTlIgcU~ zltOZHz85xJqy6OTl4>qwp-eE7s>_Na-BL71TQD6;944)6o5Z70p@7Bj_gzq4$?|U8 z``xAw^nHsrjhOk5Fj;~mQQ54r<7|Tk7-OH-mv(!$a62NW^K%k+(@F2M5RkUq<%{Nc z(j2}V*`AB$N()gzr!fhlX>8&UguW~qaj|RjCOLwES_Ibzkg}n@=1vdnkBEV%uyH09 zbG?K!|1eW8Gn~wCq2lgpq795+HvF~S)tE5E3KISlM57XsathWj;WHq~GU8EC(-?)@ z*KsIAZ_XVQ^Jh12YNdfmF;peaXVLI6b-lYFxp*lICcZ=G^DpZ!{F;b^^U8~9Jwo3w z80;1sB??X|n^#8u=1`sD;)dzbAJ0-;XPgRkXw>y1fGEXFn~a&t2^GjEs#)h}4mIXC zqRK_2rBN8BswYnGP5Sho`fQ)uvYqL0`ts=V5^`AC8YlcUPSEgXe?pd#S z#B)t@67!+0Dka!IbXfS-AJ-u8<=Ne&%$%qsyHEya9x-|-`1bBH{uvV*Udnd|Ti9MK zf>OLVYA&36VO4|yoqE7+V z!ZfHP6yu;8x@z5nFS*EMpDO;5;r;b|rCO;41cf4O(XM(&;_>39*!G)@P0gg^mYB@s z3lBpso|USu;a%V5kb=_r-+DJ(Iv7wAK%8}^?KzppoSTzWyGJ!p4X9VTBpoNU83~jK5Y_4 zJq(n7ZFB*r4$Z+rzQ=(@vPc#VV(Zh3x@*-&zi(n{9j1hOXC!VC7o7DbX6}q=ixxZL z#H~81DLm={gyLgs&ogn$vhF;cOb%XPFZsk&eh!TMw)v7Yu?}}&H&~xMOdvho0v|h{ zz+o#@&kxQ|QSZumoC0ELu*ZQ|nz|E7wcg;7B$2jRt?nA<59EBcH(ka_5}~)_=3Z4i zM>qGB%*pQ%j=$K*oVK^&-2xbHA&MW)M3u}9==L8kw#syU8VVn4?JA*>bcF{3a%zh7 zTbTDBv8)-z+;?Nm;2=K&d^=?$qQsvT&K`)OkpEin zXjd^u(93)`2wo1)u*^XhoS!;f7zD6)M)m^=3{3bfkVWl5^K%3pBKc0|PsZ_>p5-CFV(qTdSS?z{i{f%S)-Wh6# zA%nTv$qAE^vhTo2N>GmjiBrwC4k-}z=T8yjvhwY0=8Ps15HFrl40CGpS|1fqjfSn` zii`#4mhGDh9Ej9?0cZD0I~>lwvA!{zPI8TLwe9J%f%enYxfU<;in9OU=1C)D0HM?KWr`EVcSpLO z(RhIQXJ9vFivv! zCm0KH?I=lA2ZPmnU384c-~F&@Nhc#Gr^jo^dEa*FYlHdv)UT?+oIRN2r0UMb>#mBm zWGXc~DJ@9XYPl9VNF$AIR6%sHpLkJ zgmHoRjwFz!n5i#LACR*K7%+~tb~JOP!sf>q*qsxpT$)FX*3Q5{NL|l1H4|HLzm#{c z(H_7rO}r{T;vcqW*5w&x;16InF1ZVxL_)^JiUEMPE$6pOkE^;Yj*r=iyV+Za-X*Pz z%MC;?<&`W*>9**m$!!-FxI5ZXy(Ndy%7BPFam5J_S>D)rW-JN+Zk;2zsmY0R`cW#9 z=T*RUjK6b;9`u2hdH6{JT^_ZV6QgkI@t-vuM-4=(;TkIJ=UI6XCo{B_qvC1^E2En$ z-NKKtuOnJ&5vRKXm!#muj(Hx+nOj$gcl1 zh{U8b2v!#}Zv3l#zdyaG)Oj)@Zp*TZOb<(c#0)}q4r1wbSs%Ojdz;t&d zeC?FF-!R~WA7-y;nr_xhxc>HW+37Il)pMJe)Z;m&jbrb?9p8?<$+GiF%;7#?$?h0s z*xXA&Py?eyu z)3%+W>vJ*F;tbXw6kq4yq{KvC!` z2OS5&GF@XWcag(0U~WqPXyCSomjCfsF<# zj}gVp028XR5-yj}ecs7ONMt(K62V`97?b1#GXXte0k6t?ruFfi$>VumX0giJ9ePje z1uH^26GuY0e$Eg8IqBEj11q68Nl@3}Fb43rC7!u_Xr1#&-ASflb&~AYld!b>5?GS0 zm!(OdrT-riMRdHXu^`0=@ffb+43_T_j!q#6iZXMi4V24^{+ zszcsg?6t|M%DHe6ysa+@JNls9x`KK?e>}nJ0QQ={%2TgC=oef4T)1TzUQt5I&u#h9 zTs>n*%S?LMU3j(^iWD|%jV;!Lj>=01>aEXX_0g1Rkj~)ylMX!dc%lXqpEKV=-rhW& zyvYxkn29{3rcUlmSVGvQkN7EbdU&tDhKh={@7C`MMQIHhQal^FJjZ?tK}ce;oL858 zox;VK=C7_uxo7o|=*Z(b4n6Fq3gweZlUk6KU`WJ6nnxHDxV^gt5w0DSkenqMytRKRVLIqqFYkF|mfvFMn z7Ky10f68@hg>!`F6eOk|654mr zzOo{G90E-qg3FFzQd5(Y-TQ*xJ|ypJzJ&PZfBt@c;sI`!%Ro>f^#wt7gd~IILdPzW zN={c4C8;RD=>=Cw0+UJ!HH1`6-v*~{w4i2WKW#B88+P0fA`%k1!T^cE0F;hM&1;XC zuFZM50@xW5At4wFX~G%4kV!!=i_hbcQ(iNS(h4|C7AeMi%$$_|W`?S-&R>4ueC*)F zej^fM60&aZ!cntYqP|>rpAa(C6a{nhq}f2z;E#o*zthvH%WA^J%AxZTNK}HhaA@T1 z8aJm$#{3bZQouh14CiNUviu19p3jSDOaVyOWbRT>h-hz)F}Hy`BDY#_u325)h9dBF zo#$EX>LL3l)Z8aTTJw>w&Z)PT6K)hHy$6lV@g2U4#Gw0gPseixUp9!EI$AobeKL~) za(fn_qoBYLOg?{48%nr??ygGQ!jA_pBaQokR<>Y@bz$*WRPb@?FF7vae5fpSAJ=2P z#mC)FzS5u)5p9PrOPhv@vStD$o6yK`B8of>ra35qL`MrwB<)Q8Z<8Y2&!kneaLgPT zxgip4ijA7QH0!jj#1ExytfnTD=9OhCotpy#V6JIYD8kPSe=YYMOx9n~E2C#d_7ss7 zv$MjEk4&J}HZNDD;pUll9gq6S!Am6QPS!DCh7d-P~osm zlRkfQJ4+VKi8)4$;GxyrC~lDcD<|j*W~@;pVCnB5ZksMOk-sB4m>SRWaKlT%V&SE% z94dl?g$}7&)8yL1UxQNjm%h+No;ijHA=5L;xYOvbv%hNCAPi+Z%nWK^`T2TVSSxQhja&4{GXaO6P7x+j8jaecoGT@jo1N>83=A^+jM@Y;mudK=bNNp;49u* z#UQZJBAP83$DahC4Or~#L;~FyU#uIWrA>(NE&|{*aOgN5?4sCasXla`?ppBL28WEF zXTE%Ay{QQZ{kD0F#6hA4F5m{El9OkBUdZS<_(R3&v%~&6*+#T<7oVgQw(lphyPy!B z@qCG31TM&*84$G!B>j;aod0qnmI z60wDIp>xBRN_B)2Bst6LccbXmz~;RXx6=vQW4dcQ$hmJ6WrG<3lIqIHIrhK=h9Y?7 zo%P0YvydqwqF~@`TYi;?g`xc~#?CS-&R|Q|f#3v(;O-vWA-KD{ySqbhcXxLhcZZrg)gNWDsoHQ!$aDl&?6t#m5D2cvgpTkH16eB7u>wc6Yk=y{R zqZEG#lu_}Z#Bos_58#x69Ppx7hg-%cb~x;~^yGbCoF@2NG*w4)25ExgM@}L1nrdXIQPZJQ+AVt7 z56GP%IKe+?VGP9hDeeF4cYmaeyyyALed|L0cuj`}*&$I24twi%HiyLitUs7A|D`z34*S0DS1U)?Fdt1NIZpt3?o4n|#vhoj3Bi=@RGA z`vv;S ze*F_c+GjbL=kfV^Wbc1%`1fztNI-2{!mphF`~Uh+N7|;0{Zz*0zl$(P824Wf`kxy9 zJtd1MP}_BqfoZP)@ihPa7Xf*k;Jm2M@?8P%hlKKE1PJvf$zB=dO_5v{_p``RdX@@k z*5O72EKSnKI72k3_A`|mr9>$tr(K&4aoQAtf*4&1HMd?07UgM2=uxdXYr4tw1r5qO zu1H>;r>5i!q>>_s82bt-EwXtg?l`-uxIpf1^_elZX^hoV&YN?#O|Bcfr|sT#(1Tv? z@zpy`_(GtGzJ3TS^*xPEVOS_qr8erUt~=N55K~!zQ}m1=L1pV)j>uD)Le?`WR7CIk z2`^9q4ZrpiG?TSZXr&GZC3ElXoXMqUh!|9Cxp(8ZFwh}qS3Hfv`^r&@unyjnrnSD3 zG0u8Pgp1GoBJ*71DpP*m#Lf>*>U@4j0XbcKl4+)GOjnuhDmE-`dgCS4l4 z#3n+4k8UVUY2}wxFpNqhqdGzYL5~z4_(=1JX@8g4quY(p=UoGO{isc1*LJ#(i>A{q z{Y4pTh79|n?-0m+E??|Ww35zmPCATU?ZODT7v@9|!nurExI-KtG-C^OnG>}Arx2ZJJu|nwYlkzL6mIOyO z)Vzt|iT89LaL4kB=N^A?Cw7*y(cNi`iPJ!b#HH{d56?0uK@kx|^;y^qdDs^J+e4&5 ziw;m;fI+kY&k*xwxKy!IdPV`}#a={Q&RWkxu2dYP+&9nsa5v=#tqgr!hf)zmXZ4n4 zT45a3h^IuqY=tQG)&7Kd?@N^zDe$|nou^CeRGeE0vPOJ8wywqDJxTaX>6K!k_xPjr z(%p1)z1#PtORpssmuG>Rjl;4UG3BK*lX`pEFtgQqh`pP~ZSV&*0r?Xgo+IAVL+0V% zmyh-w^s>G1n!PX?4;%Perl}rCG4RcmtlrEqz*WAz@<6`6?YyuAqdf0dGIxrO;@gMyc zk$xMd2?;!V$%b&lv*Ldm4gPt?eWrhhh4IkXkNAIi!z>6vH*A%^?N;euKZkKK&~r#b z!2clrmp5#NA4cL_nGW>Cu9C#R6X{_%Mnq!DSHjRB%~FYAG?jhD5Cx3(++0BfY9pHp z8t8P8Ad8aOf#RbwS%VNnNI0{iHv~PuZk2v|8IlOp!Xtr!73>uzPPq>;rs$&!{Oq`dg#c<<&{(qk_@R2jdKEO?Yf>J45L{y47p( z)$2c_p>{Lylk{NP>VU*A;u;Ycu`;zk-Q0X;>I%ea;(`Vc6@H8#eH_^gRycV>JeT&f z&Jz3E`MN(XxqtcAF%0?`1$X(vEY(vz4RAcV#nNqBHuS}WOtX(Mesv=b_Xtz|_OnU_ z5Xx?bHNiy?;O1A#N4Plht5A(h^6NF$Cu~t~(bXZhnxa}nDmhx|ccETjTRBf|fbQ0< z{23Mgn~stQ*rjHVCjt4~DiovU+2ND@a;E+c3r)ME|vVWD+vFaIW+ z$_V3KNQLty^y=!WtE&qnrS7dtAMepq*L_^lw#SYc=DF;-ZD4t-7kKpoE_*+mWw2T* zEmw8CuWa4C__M?On5Uy&(Np$w`cqdRf^UXdxFLesv=%VWTW{?BG?v6-Ci7_3Y?iV{ zT}QE8KC#EFu9kY%l1r}`m7AWvk{rzt$gnB2s}mTYV7Wz^`|W#xQ)c8@jexe}U0r*j z4lN99M1&@=R%u`Wmn=q>+>^R$tlA~7>yR}iEoaTpo-Qi#8(E19Oq8TETmm~PDZCdH zTTeWBvFx|@sg91_&o2N~49WU>Y)wt3v5K$FrIO&SLoGl?UT6_U5^vo}Xt&xZHI9el zG;@+;MB=V{1ev&gOB%V6)Ws@PQNzLV*+s1!(V=Wzs|pm($;8j1i{vRq;&BOVfugqV zfm$XRKUzvEhL!a3DikA5npKk((-R#w^8P(`_P3h%=w!&%&=Cojyb zaH5U(Ks#bK)Rgrrx)%)!_ zD`DqmrX#JYqG6qt+RX_}U~7zskU76+M<=Ki@SRiC&qF6nOIjDkz}Br@(#n{2MfY^? z?xyJ-P&AJbRF{@#qX=GKU`QHU*eO?qG?o2XPT3KJVpz?|7db#6&_FYK_+bFL_D>IT zye%ES3eP89%kFT<*%|nojos5|ew}D<)%P@L8w%=Gpp#_DmLZ+7q^01np6n+lS8ouo zH@7ItF1v5^U7Ukwmo^uyYT3fMA2;8GY(i?$lMqW00$8ff&%Am*0HQZA2fK}>b5QC< z^bFiB%4u>!!5xaWjgr*|+{veo-oND4M0jGn;tmpXXt^i>dgj;VSPy(x?G3BCx%x&` z%%|kKIzm2PX)6?-u1{ zSio^7BsD3T^*BFwcfpFMEW}nxjLO&~Iq`G31R4HaBA3J>zhILP5E;4H)y^i_$1lbQ zLX>uNB+^~#wU%Bq55)^9JhB9zAxkX87c|=IAI*k_2r#Xt@V+=M=p`MiTpNh|#qo&7IE$p{ts1E7 z1l8b__6wfw=eur}&bm5sNu+`nTF|(>VO6I-N161r$2XL;XI+CDLz#1p7mTMf^R3J;vzXt0cG@E7Gx{f?`wCMH9ncfs@Js&6auc>%F7P1Q(kXkJeR?++_@UR* zx!Scmgb7lS4~o}g(Ej4kIj9`ZdYlS3oM54S&wQ>(S5H zi3UdYkSip8_KE-Zy>(>I(xflgR5XSH4iPxXLDUVr=T59r#%`nHzd z7g(;{(*w8)iV>u$)`uJx-gbFPo2wPFvI#NuWxH1O-uhOYQcOUZQZ>zCr{@EUdq$I+ z+bJC@t8{E`&HE$2(KV8z5?#D*#M~e48V<71ZNc=DNT?oq&sdO#?8|?QD}rex$J<>K zY3Zl_jv#R$q94-aUjDK#2fQ|he!rmv z2a`!a(2GmKv1KCTe8r%l=2f}c3%JGM*xfr;~Vxgs1m%iS@L$JK1)ua4Rt ztCg-}Wzk%>Wj+US1G5T_atHo>a6F)L)G_Ly;VfYFo^F#YVr!f}lq+^H5ro z+7`W5A{#Dm-)RDjEVxy*@1|`B<$iZ7#4SPn6e(T2H%0W&O`(Lf{E9}3@J!Z;yn|BC z&q*hIpJ176YEV%wE$b+sTim4OmC3~0rJ0!jT{2G^SqKfSpE*0t$9Ed%C~p2q^dU~J zz(3?sdUEPqX-+|}bq{OsjnBR&>op74C^3dDaE!kG#E)jR=l$GVVo?#b;BFMRBI2aM zear9nU~CTI8_!OSg|SSk4C~b=Y|j){eKik+r`UN$sC)or6 zvx(JsB;s(%`9brf!4)aLbL3Uj9EsBR%X5MY{?(}cL$4WMGU{yn!fZLY zZr|&>()?3v8i}Xn9E-=zM$~i&gcIEKdI2oyzj2IYb+g-K5*@G$${hGu@QZVe=| z!?1h8p)t<6dH#aUGS46~>=JvF!f|kGenGkPi`07wiI8xMj;!9geqP@PkvujVc5eiv zderFROnE+mP1@|W1+%nSsW-b{cxqjuCO(rAXz|5W)OnV}W-;43K^{m9K1zznxe@TG zJlVE$9Jt4c$*rilh+yo6@W*#rRCV9@u#S?FN?ZuzHYIpFRZhv`sIu41N5TNi6c}Yw z4VhVUw%{qZ!aiH$kO&2_S*Y++v8S!Nyp~{6?`_}rQQ+WIVvtD%}r)%uf4&HpFcSqi_zqxo~N_>x|yg1iblI8DdcfzV{o!@1Te!3e|#o^$w>-X z$L|@#BUADY&F`(X#2-_l5Ckkq>11{4#}KR61?Iy{>Kp6(wFgaiw{Cs70<` z&HW~#Wiw{odHR?vH4oV>hyY{~Q$>QEjmhG{JcV;^73k`+&e2mz1C^IzR4$2)%_L#eNMx+q0vay?V8IE@40r=(*RR%lFZr6W`&UaOZT{qB^qIexKzyHPdv;y9al`Lf0x*Ar!1mL@i zuOy9#$IS^cKimTQ-Rk$arLWLLye8PH7Z=0bETd06(_WCKNuCvEBeH+YBX>iv*TW>VM%F~?X6FLIUdg4*izTSOorutcM?r}`!EPpwD`BQn{)y_ z3(VORcoLKzpTqH7$>{01`{w4;e&hS4|J#f7bZ^ts=_CV>Te;;8Ctq8v zKqsUWGiHe_F8fXgVAYIC)1l6FekrOaRdP0>uvO3hw1P7C**XLK`^BX{Dbb&e8V1tZl8Hn(f{b){t@0fB(DL6Y$Zs9@*ogDSuUsK8 z9>7R^&JdUc)f5b5k$Nqbf&SkvN<&P7M=VBzn$2G$aVKfUN@vAnbA`xUB^{Y|Vd_cS z%Nq@-gOAX>zvNn>DidD@EXs~w2t%~t&nzx4NQm#tDg2e3FxuN1NNyg== z&SnKbz^PJo;%>47hP!O*Ybq8fsu=k9Eb4%DI@pYRwXI=sItDYamr2Sv$2+7UZ+gB4 z&Qu)+lijtzGMz2VDTblt`69|WFT7?oALCax5u2r$s8%%|Fe=FA6`ZFpl38xMkQk_T zbCPl|8?wQZ@(thB!dvoJ*+P|NZ+4?KucM5uW@<&laO#?-8dz01!l0-TBl@!wukEW` zKHFe;4PK2cTN-<(i&Pd*l)I?F_ZgX5CN}-~$*OJcZs&dO`X!*E^-W3YQUQSAShwgF z?XdqJECAgiHsQ16H)*w?9;C4dr~?_;<|PSsc2s6B+JyofADj>24Wdp13FPkzF5T-cM|&|pj8+(jBPyQPkXbZzzm~k*-YszCSC`jN zH^gcLwEV(eB#!OVZ;fPKSeA~A#4Z`Hk>fRw?R)%NVgH-!_`9cXJpD^C>dYV5JsvX` zUR@ur%gt`jpxakdqSvWxF6R>>!5n!mT_^tMBhofNGTCt~&qd$chu5h-2heH$>2fS0 z5N$bdp18QCrKYgZXkvvaAE)ZH@{NNid?ub)Sng}T+ocY3Fit9?KEKn1gx3Z6U*EIZ zi?_um#^aCqGFkmD4@VOjlhY`WxCJnk%ZH1ZMyGl6dOO|vl$xg}2=jeGUJlpqyy>nu zT-iI$C@iaj+CtmFmOU|r&2hLX8}WE#xpt&AdNhRnx6wDmwnM-Ar(z16;kpC^jeKef zSY`wXnP3Is^teMKA?N&0F=8x7?T3r5Y-{@qViCW1slKk6*acIP4dHnuaJLhQCQq~yN7CUh8{a( zwL57tg+_#+|HPeAQCF5KRztv7psX~!-YK^&1X@jZtkINPg%*!^;vM|z_(f61PQp&l zOQQAFl>ss};#;7Zgtb~~s*tMkdP$>EW2%bE5+`uB!g990o**T%(iTo-<`I4v%-%Pp z_~c>6!nM0lZZ3Mk`7&vsc(Z^I7lI8M|=M_-RI_x5NFivAk&c6r?1DK_X1g;;9p7ICH=Urxn6R z`q6HO+_mN@)Q7`Sg=gT5jN4K0B^k}PqdXWZ>}ZGqt~1zlC`j*|R!H{jRLx)ROm-)S z+gv|IUAj=Pl}Uu)#0z}aE)Q2(0apM1VSUgB?ld8b$Kr}waxcqI6Xn+rbOHjfmswrt zF+n6r#CEU!nS02*Z{!PFt;SYf!x_MiOtc}pw9kTGti|o69f2$5N%L*Rhy(!uI}fR} z?3)yQm2-h@$M&W|IK`}AOdsi!RD8|Kl$0$ED>j9HP|7Q+`xgpNkZV@DQ<_biFG5F) zkHX6(;-uUZYca%Ohw>$Ly-{Iuo!^Sgc=78AIDIC|0VJUWi7 zNqxXerK_54ETP9Xn2GnJu6rC7Ti|@AZC|^ST*0romkv)@rk%PmfcqjEF?hYzilUb7 z56G3FYOv+N7OIT6I@B=EU6d>(lpaL4%m1YBoRnDDyUhb4-At49t zeh$k`U7?_Xq`}Nsx~Reoxc83A=k(Bj*wWS)RW==m)5BDcs^e|5qKZe`(RrTV*%4C|_mI+CuihX0?FfkHOjx0`DJU60p&}lw zV2dsOU#9niu?Z2rli(e-iVnu_S=;aEQd>-cSbX)=(($B!v7!W%Vzn1hQ4?|efcsW5HAhO z+i~Gy7>m#28z^b}$WrOS?N%Xt7#jooR2D;bXVq#EDf(53qiFXBI1Or2qA2<7-kp#9sj*BIeP*KsA0Q#+7zfT;|Ms%~c+F5+= zWrQcxS2)l^X~`C!NF5n-8dIG#L4CDi1{uFE$RS-?beX*Ja71s8eokxeBqV~26Ts^(z5pFl<&LMc)V+Z zgOgBj+uvz$ZW@`VN1*z0i*O5G9bn=Cp6=*5eS*y6_ouM;Mxr^#^|C%i(#V0Swf>p{ zddt*yL6pMZ=!I=|+vW010ey@=_D)$4>o7_E*}7D9VBYkivZ>ttvK$({Wlq*9FW?5i-d@4E97(4h3#) zx8N;I_YzYvaY!`LiCc%mQ3}$sIE0zDLp5gmzY=12=&@d~)7|P^)&AmJ6#g&1#ZBtk zPg?uma4-@j|KB(m2oWrS$bN(L*Oemu4d*l+rLKoN>MaU5$@AI0sh5kNn~0lED*^Ig zP=E+MgwrU8#5wZu-DWoY%J}i>Jif=xXz_!vks|}B3$yQxg+O|WJgdT#uGd#-y|Soh z#-UQl&(xcV`Z`Lyr#+r_il;)y(T}KCWC~wvHaakgL=MP8hQrt@0V_&{C$qxlF=*AcS;+YyizyKeQZIVVK_rT8DnnO(s|Bi$ zl%>3vhx4D*cdT#O6tRw1tLAyWb$6eUOF_F2>ohEbC!cN)TVH9|iN8kXaq3>O9NV^U zI(@2o0GUPXpbuI|x6p6h1krv0d-gi_{)rfBvkaxJKYPcPbWZD+rNro1`A_x>1a|s= zuwPcGnVS6|wi&q3XmhD3`N|S z5;CgWsB)JGmSo?;Y#h9Q&xI2yT1-SC9m}tZD5mD}I4VigN0E`;A;8?QhHA*a^;DEg z8trVUruf}S!aZVk-_ar|;R-v04Y4rs(wnEF@iOM5*`)&b;qUcL8-SxQmzWjD^qACAHyD2^RfsEiMhqb^LWg2oT;%PcT6dm%AIP6x3 zYg}PWpuj=D_{h64Q-tgf&AVUjTd}TOP0Y@GIR2DqcsCFJwM63I-4Y!e%~ynhF*r0% zjTP-ijeU0*QtgrBI|Q!t_|E?K3(?;Cm#%yBB1hXb%=o}&oY5-kb<97(B|~!}lYR-= z?CT3#{7%cXA8n@BxW0rRXtO7hmdClZJ<+Z_Y-b50pJ%AXKD!pjA$Az2j7V>4Hw345 ziMfZ9u@DArv(n45?e7~})E?ZQA_&B}p^mw%Pe4DlBROPFIz%=36LvhWB#;6Mas1db ztl$d`f~{8T-QcN7p3mCO1c9ebW@T|p_HPC+%%iUL;jydGRM+<&MKfR(!gmmu5 zDi`$u#8XfJBUFqNmk`BD8tQkWS)fZEGYz-#od$4bM`Q07BB9^o+ap;gzGF5Sr@28| ze5;7dGMTLRZp-Fv*a48=s*jK&3G2DE`Dy-sR_-%iZUYtGZC3N@u7AmGhYqP-9_#IO z2-*k8l)R8Qd84`kALtXG8auNeji!bpEzB}j#^ReCvi#21O&iA)8iB-AL?4pBBoUF}8!SKNSu{B;~ zUd%hz?thY7oaO!#xh37p=X2-b$N1Rs8bG=I%GXq!)>om z{;D6QTmR$zr0310VVU3h;IP-X+~+a3TT?TLQZ_V(ENX5?tf91ewCNb0B*Tx%F&v(z zV-T(I$DQCe7s^1G<7BbH1E|@tK*vw*3Thq$?PsKgsQ#|Eyir0qHbOli>tGhjmguWT zuIt@bw=k2<($$b)VEaZwEGwV=ClGFhQmzAc>){OT;2PA>(304nhv%k0G+kc}!smhw z?(WXx5r%mGcNPo6Qr6dLlBfG+GSTELT^{|H?Cj#3hj7RwqXx9f1zGl#C|xRyW~kEM zCO}v$UfKrJvPdBcDHQ`PzmJ$yx4bA96? zN?Y&4nvGck8cs)9_QSCRpZDJ$>;4pbX~U@kPjc6FPc}&htK4E6?R+1SI*QEU=8zsc zT6!gIjjKDv0bp>5cEUv1#LOi0q@9t$BudBwjg94(t+y)FH=DA!5og5L*Q>xafJ`T7NMGl-8Yj8nF@P&m&Z|f2QX-fOKVks)6PntQ> z0Iwi;gYRc(L?dzMd}%OEi{XOj8Ho^U#g#qlB;~}zO_l340i??fF1rm6F;?ipEdtFq zFrRfvsKwYA{gn`aouGMuTZGOR*V6kc{;MPR`*t_IqLo<&2)zRpTBrB*k5K$_ER+_r z)dn+I#E-$yP_f-6v?|M0ZTpo0lA8AGQYqW=EMJe^2dotZ`M@4Es$xsRvWk9cjls5s zsgkRI0AVO*!(o*CB5AMe$3}4q0u%~`lfUB`hkHiEC#UFwg=l6inlVQ_!_XYf7mSbz zUbrr<;zj}KPx&C)04t)O4l3iVJwuJI+f)jWT~%N z!Kn^L$Rd7q9TqW;j=q~;05Mxoa1s2mE-io{W=kyH%czR%p=hl<)lu@Wvk7hR{SVaIYUe62fCn^vCW{!a3vE5D~ zrKUm?t}BxDxn<;>h!U|s)N1ZeyPU06V|=(QIyEze^TxU8xE?jlihx7kCp4ilCW$5& zEd9$h$iBlZ)@h?}xvF8GoSCS}Z&M*fr$JoH#|uZ}5jEp?D^^@2dOmt0NdgGy#glRtT| zuXmeJ(v>eg6>xlYhcmoYYmiair3rzrCEmJr;9({N%x@i*lThwK!{E-|On-4Nahd*v zG~7PBnN?v6zb0)7Qu6+aEk7Dz3>fzQ_O~E1Q9Y;h?ce+c6;~PsTPI!a_LUFmscskK zuy?=7RTWeAD;?BH%nl_-E3IBg+;l>E6NVXsA;kfyH6_I zPb$p06nW3^RaBBmh@>bBN*;4+6>C;mK&mzh-Lw9Ih(vUPBpNbh z<9E57Le=!o*1Im8Zgv|)Qc-0&OwCKh4>~(y{V1Y@x$z1_$2jhn?D?eg>{GVQpT4`3 z4;BT}Ulm{O94RY~RAMXZndn`gzge!HBnWddriKq#}8sfGQhoBTMsyKR*al4NKr1Ok`U(!~P~du72* z{#lL45si$uuqs1S!#Rcu4#p-=gRKkQ!cwWxn|u)a@*WBkwb#tM%d^0$EZ+g(Xt#P* z^!WYJZx6UC;M)rez((S8kFNc(UMpbPNL*6=Wi1?+o0QZ>bl}W8HV_(H@mkk$&3+U} zvvHrOSJh`!poIr!?%J-bqf_Ei_le(%Ql!jP@X-)~r{z>fZnDM^b;`Bf`{B51zxrL% z2_`asFJ`5F-))Dv>;gS$wRrY@~_<*mG(KbzJ-i7KJ$mu!;@*3*vH}d zI9hoqanz>pGaC+`#jo3$Ot?(CmZoAAAg`#1xQP0R)Z)Ns+X9u6iIYr(9Or||sgBj& zmrS}WmXc49uMB-UWX=gEZ}c^+k-bg{;$$~1@YY^~fY!?9Q54Uvs{xREVs7q*_68`0 z$>KNXvGwiU@3nt~3ML~V;d&5F{?7?_L9=Wyo8sGsx;!4+Oco`-4G}FZe9ni@+x@cf zNSrNbt_NDnPETk19niOK1SV9e*VfdO$3{OjiyRgTFs{`Z{sis1KLoPYKB@Nvppc4n z-9GSy#oXOTzIQ_Q;&RyO^!mLazN8XT&k#f6SW7MOfBdkRSfmuwWj?fW*NC%{jJbYl zSltZ#g0uN!jh0qz%el3@65X<*b;GrlkD+JOKm^jn9{7FZiR3T>W7Y(cm%#m9n-Si# zinf`W)w2ot`shJdiT8Jiw?gvd1~u$ZR#VlFOSg8ZTn{|_bShnL@y8QgOBK?ravYrw zdq(a`cDLhQHI$}Rhq0BHu5n!iQ!~Y|(8aFx^5nFAOn?@{U9KeyLp8I>(n+F1s#Fw2 z9BN<&$L3LGMN8#SsK z9Cs(Eb(3p!TFh9oh02js8y$z=1-Fu+8dXkD?Rw}4Cl^@BUC)nE)$GSs*;i+T%>u77 z`os0&=Z7BhTF=(FisgG1;?|dkg@~;>MxCtkdL*tmzalLf(_QJ$Hjbc8Tt!Pn2&9ThY zWGVl(f`Noaj2?ylF5Dgm?Obdq)(u91ti6<1{v3VQY#zJGJDg-_c&G}xsd#*PoaONg z$9O}vxHL>*L;fcoAraBzFHkXs+75PBPRV?aq;oz)Axt^q?zQY}>^Rw+%^2qs!kL&% zP^)wWoRm`>TG4KH_i(Mx2$zW!rnK2`1E4Dx#R5ljSmDjkZTHB+jX0RyA(ygtXkGXQ^goCdz%OJMt#dFE~U65 zGqA3XtE`Qwu8ncxQRHYmPdm>kf`pt`QkrSm!p!Tktvd{*g=B9puW?Wwi(C$|m^k8@ zU?|{$FA|eRLV!oZ#vWDbzQ2)9T|4nGSFRpcOzW=NRTa_VdlQcpI=6vHZ_?oQ>Xnr` zP1*D|kva(8-z+P;%S~VN_ECzP6^cY9-r-}0E`Gq<0lMA$Y#uN|RL^^vKH%|pjgKl; zkczuwGtu}Cj!p@O!F4MJhujk~SFg_)HbWI@H-@sU`$WlEgtBKL$i5KIG?^`qgbr^w zw9imrE*;p@={|KR;DqL|0~8~#L)TeX??HTk@vZ^W93?MSmnl4XeSB2fc z6ARHL{g9i`x{D4PX;%Se$K<7Nja#QJ>hg3q!-N5bY{OcRNHnbrU5`t4qAIa*R{LhH#meExKW91e6U`kO;#gjEmK*B8Kb!mzILuHFl^m>| ztQX1HFu!4}BTM%o?)+jCgVeC82jXfg2ZTiCJKV$`Ex37wm9jo&{)9+4Dzz@$1z<}C z&7(oPOA=(~GM8Hm^{U)g8lmvLn3p0-h)=rc_elr2CeWoSBrHJgOtuRA(XpgyrVf#~ zb(*{Y=5B~QX`bpaEv)!L%>hwyRXG`xo>XkBc43{73r+!heDri z_61k3cKM1I1m|HuUEC!V@Ofx%7Zb{ia|>qQS6SNv#?>9nP#d2_yQS?v@N?MiA>$rI zxNq!(#rgL^6wOdx zxb~K}fm&avQWY*&hlx^qZbNGXW8H#9xd!fj=kER%Fj`N7HetoWdxx!S(V#_C`T?#5 z^9^%Mr?}S{)Tu`R6I|IK0}SyTU>@fCQ|75Zy>+|C^{)G-CnXOhZVn6x&h}&>eboK9 z^%Lpk{@nfLytOs^vrrzqU+X7hWLShaIQu8nal>GdJl_XR71zu8*6B0NMp{~0-}(hb zQt`1QN`gwplju-B@nG`&0Yw636MyUVTT;LzJJh!G6cj_oE)C-gkJe7oKY= z8h2{l3&cjJ2(;x-cp|N{U8P!Vasad#CmFLin1p8P$>0)F=Ma)YtB~*Z4Dcn8iwzk^ zH+r_`h`+gK@G@)L?A8;Ze{L)wOmVv(9GJI8oFp!%Ze%QULc_s&Dd&(Diho>urdU2f z?RpW4Y95+P5nSG?JDyq>6&*@i)=6Qvp~7LOzb@?v?+<#BOV_Z16C4zeGBv*ZdiXUW zhM~ElU+ntqO}_GY{PBUFQO$pd@bEBHjWZ@)C`r9lPt$u&Jo4>S!1w+7=z^Nqus*~p zXe~Av>}MweR&7aF2bXUc;(8vN#B0;j6QQ)2?>(9fTkRtx7L>9-s}w#&dQ=u zW5Pai|EcHopl+PcF-^JWEz-2xN!~%c&Z0KlqLH<(P@^27nqiImQL~Z#CKZPkjv`)! zvt|eStYI8T|~m~ma;=hsoB8?{Mp>uAx&}CB$cJPr(Gp^-BH3-XmXs!V^l2E zW98`4=@e3q1U;3u51Kemn`a2}_O9qD48FVa71+miIVBeB>^qg1Ek`?OjMXWLY~8~M z-Pgl}b}uKGJYR;x$0XyoNV@*YaYlOg=t*x-;74fm0rcq#X#5H9raTK z!yboG!W4?k;6F@+}%tGp1Ll`sVf(~E`oT{m zfeojY5dwBBE$nl?2b(9`7gPo7VMz;2>bZg_r^s&t>dDHd#3a|JG51i)MS4|M5tH5! z=ZlqsGO&QDA-F+?5((l0XnFlQu$U?0(UP6hPd6DS4>$_plD@|ITvf-;GY&f^)kS;C zVN;#z#fZZMj4LgjHZN`_94zu#%(sz{vY{fK=;yQf)7b~Mk$~mrOFUta#Lu5)an?13*lbYKwl1 z4V=oSWCgPM#|MLD(o8c}b(~4rO_R%*Zgyo$&g}pQ1}Ufj60-7)Dqm_;YJO_pQP;=F zWI@ggxYkQW<(%1MF1r&?mRkEc_l777dhYxWQ3WX(dBTTLql8uHiik=>N+abxmpexL(XVA>r68@a)aKpAEtb4Px?z4WjZZHMrTh0=-L^| z3T;o6h6o_xquULabEeA?F}+ezqIFMl%3X+S{TXTT3dKW7K)34wYp?6dhG}P#t9nFva zN)dk}3%*!5`x+=dnl4j>(P!frgUjV#AVC=F=X~S0sTa)Iv!0X8=JF8Z?nPH&wdz1$ z>v&y}BmdGnGV~*$aJJ{8^YXU$NsJROXCyAKcb_pfeAKGhrcKOpQ(KR@MET$x&BwCG zL%RlNo;8BN+(g%5k=&J7R^k?h)|&EicC5&*O~YNRurBiC43(`En}H*|E6#YnNi)1RvaNX?BPo#%Pz zIt3G8H=}!EEZajh!vvWgWx|lyE+GpV_i{NM)Lo4j)QuZiBQ2bYX*_S3;8DD z?v9D8!AS#9eA_~B^zItYTk?%PbY*Y#h)y5^KjSRN_KkU6@ zbY)Ms|J$98)k#MkcWm3XZQHhO+qUhbW81cECwKRA#xu_IKlj&n+?RLUao+7@*REBV zYptqT-}#xP9(#~k&u_QQ3%?RoDhyb;Y+EP>f5kY7iikh25+FxidrvVoye5X3rBhU{ z){|W*q%$b&3lQdUl9H+#Q7^bu*x&(`fAoOG4+Mth=EB9%P;z3yj3@4F zfSPCqnWYDI8{|I^QQ80&n0{r2?JLM=$nYU_6|&i7g0qRiCZ?(+=yXo#dohRUhN+gt z0^FNrWN)B=Nj z!k`){mGJxY@UwS(Ez^moy*iLja^5XNh8737DPiW4(5bM-&e*fI9Wsn{2Alg7U8eih zn)c&IuHl=w3(Hht9q$-?!CLR%zpm})ohEEGu76K`TnN zCpzDU-5(s8Eg;OtYr#KW%7tC?8yT-1hk_xgOi|$R6)H!(O|w^eB?f&@!U5;x_J^3W zvaL~mYwMQ+Rx|w8L*#waQa*W^Rk<%ftXS<1wg_0Lz()ZVeH4VS)=89ei0DKROsv!0 zKP#Hnn_841)6b87tmH2iI0}k9L3RzjCQ?HApBXejcnia$p;pM*&-X-b`FcTGKQDaE z853+^%UuJ{plK%oZMp|6s){e=#?6Z;me)DpVW;C!&h-!p>d8?Ywi5NHc5Iy(+ znDbb$^q zX)Ccu0!n%;)jX9ntU=+fu1pJa+&*LbcrfWK%^NCUT)Cl|6PRWJjK75U^I6R$$H+E^{A)R6` zT$V*9p^#o|))v0q80XrLY}SspN&#mP{|YJ1P3M!Sq#S{!^gQnHeGBPqtqi@N++ z%Zf4r2oc=zt9sm9>a5wZkD2V3qzzjM<}DG5xpRD!{7VgRnR^EbTB*TH;HS&y!2hO(fK9!jZM{0qI|%= zxdE;4215K(;y*rG+kXvYh`Q*h`JZ3>yA1afBy~AzeOn-z;6GLXD2Jiptuvn2W=;No z)co%X{%gpg%|Op9w?k$A?=ASRazZHDQb$5g+e?u+*$9?##?{0~wNq&cs$CZGB6Dg9 zq8aMgq-=;yV#u<|8(xE{{kxIYZM+;kxpQvK=WCaqB9GnC2NktHP8fgs_u~!cxYlVk zs%Z!Z4;tI4lw%F!uC5pD!3#ZeXB?79JS*(R_wBw5ao2nrl= z@J>VIZB}0TXu#1@)l!o{+QP|dFYe8$v)aXs31pfoE&Ue!E-uM9Fq<}9~@hQoprJO=*azPu$(RI z3c|!kX!)vB!rK`Z+=P?P;8~qPU0#qHy@GdwG@>fH$!24ZoxXNrx`8R|1-UR-+aEH!-!v8mWu8;WA zn`*H;)bbDa%YU`S6G2;f)@n{2?r(jWVg!OVYmC4<dN-?p1K= z@@2-zbf~{w_f;B1B z6JB|X@pNv->jReB^Yy2j`+TXwW#^-%t}bqG>R{te(_aRNxgX>WexZFoQ4Nr{B{c`z z?X2b{@C9O*DBW?zWE?LD7PHmu@nSDuKW%)0J@(sQY5_bBBN3wk_p)tyq27z(!+MR~ zAk?Spc4%4Els!*6%ikIfINx}aQN3l}_FTD>Q&CX?#D~Pm#$ClxDY=`}C<{iSwlKL} zp3IkB&jTF3fE>ZP)adDshpi5PEaEB%SXi8vYt3VwDM1vvtCOpYRvYbBo1Nj|;X1v6 z1x`|2=0Ki*ePXXval-{L{E?m4S3J*zQE52t8+^%NH=zBX^whsz_iAoiejGbC>b#o~ zJ5n_69e^3f)B1QEwWG>9eO1bTm08sF62nuPjvpX7Kv3O{@3D1Zzi`H%bT zKR!-rg7|XG1SKCl+3aS$yvo@f7W<4waefeY%CYOFqG;J)(@AmB>0iNxw;va#Nqf#V zfmODikoM8{^7c-FO%q{z-E4jPuKV^5b@_R!^?8z(-SN4eRCl?mu-W)nvGoFQV*S(H zZ0Q607S|uhFzL+=$p9Q>sHB;j)SrQ^t*wt6Uf<(9Ztt1$q|hUL!YEsbDLG*=;Q&X? zj+e{tX;qyn9}l+SgHnRwSUx@^vxU$Y%va@glP|p-Ud@2?k!%DGJwuM~ylTa}MB!dL z*F*7@oS+if+w(Q7z?CxpvB%y( z70BQZGpQyffVf;_lp;auHh{UXgMN#Ni80tW==@l7I$sH(N%LHbpQkF@r%(QZZD(t% z8VQ}N6SdUk^?6-23E){j12RRd+g);OdOWVjNpUV0r~!}=6h(irCvGq}@P}$cclS4) z5yQHc{hxl@ixR}d#8(~xoKtF&eua$JHGm=CAKF4~Ts>cJ#r{#oG2y@`mM0(}z-9ij z&0KV-|4*|AnA4-5KwSgCes*s7BCHRgE$bHt5Hqv0GV*rAI|NQ^4nns>TiDggmLt*%7h$qyRX*t&EK(fr zb9R%QgUN=kD>mwC|7=2xh)2o=3~rC@mr;)#%}3p(lCN4P6g1LP9`v z1G{!?FfI3hp*$SV9v-AwX|`EXZ2S!Q@WpaH%0WdLXMS*B8ExXAb=z9kTu3|>`De5L zN&$>d&sx&&{*7~Tq;}-}^)V0cK0hBnqlRz3;d?WM(mzj*H;@m~QMJza?^@gAf112M zzw&kjA|EKupg8=qF#gACjB$Tg7ZR4Ill<-Ro1Fo8MXxN|PW<&6gn$>YY0pT?U&hZH zXa$10yySly0hoaQ9f^ON+5e8j|Bl4}d?x*`P5l4PCcJstRO}`=zjow*XYTKVK={@P ztNuTy{ogN^dPL>x{m?{gDdS|FafBVy`#x(%w@n=0o4l4CfNqME>!$ft z>s+omh$bGJEhnchCe64@U18uT zl5^U=pqHiFm!$nEmCu^HSxApVPdy{0lNlh}cA?Q%t4t1>fjBXjt^(lnLz}_^_SyE$ zSPbO8x(KcYHOf(<%V=`DgE1Pi2e~WpQ421HN4E~jorXuHR)P%JB5a#d_R7S7kMvuk z)&Q3&lfy-kkpJwmn&#^pV>mmxU5rm_o?xnwO0}X8k=i?7S3zZD&984g8;&G}lA-`f z!ds&Vj%Y-*ZZvzfG~&|T=L~wm5#0Wq1}kMyGv4*;#er`si1nzMA{yWRiLg|NLWvdD zW(^oxLN#?ut76g?JkD2^)h*}mDE-ao+3i`AxA8`)kGI1SM(A9AOjHv@RkJSSq8S2} zTp(38YY%dEU2XVz_!zNMr}I1Pl?Qmadf$%%WQfv<(@qc*xz`snMxL90Te1OK&Dxq0 z20gbtkanQfwYqE|eR}>R_Lp^%^sirlWe%u+1{6Ra_@8dQ|K7i;QUUxS|69esmizy2 z#v;(0JOPTs^Q(yST~CP|`%?(+@p?}_`1|!|yMi2Pth9ZPfi%Wb8A2^CzQB>bj=h-S zTmPWq_ye=oT>BkihqYc_K(xNj6?mSIM(e`rhAn~5uT%foYu5+U_lsGxS&x!3n*GfkgewBMqw^9k}& z@S*--4x#x&pxZNKmg(Hq;Mj}mKuj@`&G(_-j)A{1U%!m4%YsX~GM|E4c6>a>)|65^ zVaF%ogFp=HNvCr!PJhqm9PrH^IL2~YQ}>(i7QZA-Mhw0qpb;0jwot-}&#i0V%UR@n+IEYr@y7=CiaS;>VT*HF} ztfZW71l1TkQ1ycLswrpnA6 z+w17)0EAa~g5gK0)A#s`2SZs&NJ$+}J7J7z=L2b0>^Eu@19O1vJ1VvMW63nL6RCy$ zZ&LP|!6p}*A08r}5B}G?{xoZp0&T#!glL`aEwDxwI@=%jF#E3&tWM_U>Y}bx!!sqC zeQX8PIUBQP36ED4>oRd!O-2zNq*MT+CP{&qv$;_a;ykgg!EotzA~vhpUNiedEB0bl zFpVM%7Hh6xK7$B+kYtY(BC|O6_>)JI$ztJJq1^-vn~avBf_u($FCkQJlf623B>Ol|jC8&?0A} zdDo0($ALZ>oY3_jYbG1$_jYaQqmXu;Rb;Dr3?w$!8M18;4kvl&?M^72ZZCNkN!mYE z)Me5%4oqWv)?#b+`4%W4Vl;5ae+!kj=LIs@n3&0iv)Pd~?$0O8wd(G&8r%;H*NZUE zs_wOBbImr=J}+sE6t2IX%NoZahGuv!saxlwuMly_o1Xm!3O-C^5M`dcq%fIE>5Yqv ztEHXAb7)X(naE#)pW+(x$M%!>mo&tWLc-R=Qw?xjx7o-NX(h>A?(&Fz@lmlnk;&k; z-4n3xYX4dRp-R))-29a3b{P^DaVw=P=QvzvIR0{1Cv_=%0qLwPN70X$PVnlv@_vB8 zYeF9}fe4zx=Wm8A!`|#wJ(acKsdzIinzyOqa=!Tg1%9?TTQI?E{8I4H`rF zPkRxW(JCBE^z<}uy7sb~o6&u$;Pgv>G~L@Xlp8@moxbpvl8!+gvvwm@JM{Cz#`FlO zkJ&0O=bLlEAve57g4iW?%V38M1C+qiaK;wVs-;!i7z-wG{c+(1OJ6C^vGQ$#@TAFbF7SgP0aiAtRcld+w3uD31kfhrxcLibmk z7ZzAW*QHN351q<(3VyFOD4l9uLfH?YOz{eExQrTJ_d_*)wD!c!D1AmNaXF=BR!^&T zVAX>*xpHQj&RSo(8$b`C)X9_HJEByYecSclsmMHaKVL6QJ(ptNuew4@+lho!;(zf? z70fO+ZCGo%n%n!US1L`W6X%R6QA7Sw1?p25idD`|11E>Er;b2GneCtN_M>Q|1G0)2 z#3xtBn?Q0xOcv0-n2I$YBI5BrFUJ_e`Buqtg!n>R!`E zXtJT&ME(6OuKeZEJ=$a|X?>GP8dPU#L9>~SUwbyquy zQ?IK`%j@eRAkw^W>gq{z-ZVj~q9@hGtr5xdN(#W;aMjm;K#x^BA=hkc-6?V?!yoLs zJrH4C{=To!!6q?0zC2TJ*dh%n(=wA!f0bi@I(tKMCAMcOZGLswvH!@K;pfb5%R{{8 zqYK;Jbu7Fz(PNHtVjm)-vw+`*c&unPh1a89E$E<_2W{F>r|pjOR^vH(akycJ%gSKN zzvKTkgADS~^V&hhvO@Xp?1aGUgp-(a=2u&|DUua~I+8h6hq>1Cjh4W)B76z-Pn%Yf z*2Tbxw0I4(1MW?e9w?PMtyssLDf**X*>z$BEgXO)c)U*=IAuef!NZ=3tcUru~IpHc(4w zBt*(r2{3s3_@P8*@>m_)nDx?&zey4b?9WHNl@&_|@9Dq06x}<{#k;rW)@Spx-<`FE zU1(^>rdpQ{`tjX+`c!2jXamGML)l{S>N%S~v?*_z^d*hm$?Vn_wY=V+M0V9EO~y|c zf9`OE{4apnY~`unKm_S`jOlo&p9TJ88V(Z5D2JHq!_9t#nk3DdnETRi&V=Xk}L)15S%iscP22=S=ym%BgW(OLCm7gJC);SPuE z?jkshV(rf_xyo%rqPhTC&JTo>^dA`Ak;7BY*E-ndlM7{8^d`m|mns`t_xiB#Hoa>3 zcP=}&&-y9o5zacao?f|@Q0dTvZR?02>019h4ZUVlmQx>RZDG+roJaBdLi z5WND{Bgu@08;xgC@+W545R9uyR2E^edi;J`y9nv`(ZjOX+&=%QmO}h~Lx&G_rMSR` z?M3rlhB4jx>KBlVWc=n`?W+NSYD4r{hMK=D9`plQmGTx%|AOf7@H&{s=NUG?ji-W* zq`qjfMqj<@K?Whj zN@=;3jwVg6UmHk-N)hLA&ZiyN3yUEzr;7#>r|5gvcZx+()Mi7MDB^t%eFUh}wWJ4r zWUnt-kL8;UIQ&GGZTqfqbK3E22k$tQ)u&7re7-^nL93rvMU^w2XOB23&rY0;I7Px| zV`U=Z3af38nY?~?vhyeJ8yj9jf5Mv1IbgC(Pbby&)6TAY(SKXsiwgk`(d>_9-`@6> zG&Nt>R}yba$|}{Kmq!LaZk9H&KVKKM00gvM(vr?KSf+v92;JCDnsF~Bj~d8b^i#A` zvlE)D^HQs{E?F@Bm^!k<{zEPGx2YW_EiZ&yu01szNL#3l=S#Nl99L6fpROjotuR3z zZhJ!IS0=lXeByOxu8YG(Qk=FxTv0r?;$<7|6Ez2XI7;^DD^l4!A|yaCDbG3#>5@T1 zgL%u*oPI>vD-0X3BtOnI`y<$ugk=N*K!zZ9TuY`?i^m($c>Nh{ejnYM*_RC6{;FJQ z8PvwfIq4|KE${QjB{_-HvCukV+fRG^AV9P0rE7(N0L0NP>*Suh+Fke{Jgw51B~~hi zNn~)c!k%}5nmiBWJwa$d)?>>_jEPNooTGWfHFNMJRPdk5Tt~L2jgc2`e-na@~>2hMqDl zL&)~) zs>;Xh!Yn;>V>&xZ+A-;El@wG$AJTa?cQ#o% zZPUvt%J``%^jZDb)hid}OIG(YB)ArIM>vPhm3J1RXbiOM{&*FEuw1K3VQ3d33LBRT zxl$}H?s|yrBP0A-%bJ6;>nhKC8I7ZHCU>SnKO4S|NvX7bUP9sVQ2-+hdP|iRjaoJM z6uH`$FE3xZVf`H|8O>CF%GkD8c9x0)cO!da8!;(yH>T+E1}**Yc>(hXb5H4yz+#yv z_J*VmbxfdfxYE+VWIT`3?ImRv9+}_Tx;GF8PcZ6A;&wlj%4jlELow1d1xzcYgEomm zZJ|xe%CFnH7lujuB}vo$xR#9Sb>FSV?6p#)NUCSBcBaKEzx4WCujqQ+7rXz2GE~1htD6+;7i_>-2B<|e zE5aQ&u`k zNkHdi^5AxVM}+ajPNTCf;Lww+&!+vn<7O~eFhJfn-?KQw6_0^7j_JsCuBwvBJ8H&H zQUw?8{RNU3286Qe`KEaGB$8|C;;u(-q=A*SU0>56x1zJAJhbG6l5y?=r~Wy)7KP@lj+ZvGkeB8os3s| zsgsNt-wJM)8d!(4i}(Q7C~&ZSoHuqLRIadVJdEQ7^B095Sm!uBZaF@YZ*Jd z3}?DsK^?8@VYePl%;^fFC{H|50q7nJ9zcAl^&Evanp0gKO6}9L2}_I{rKHwQJN6j4SH>MYr3)#N!o=3^*h!Z z?zubVwW6crF|bke64M2@r?!a~&O$DSN3+H-*h4MFGF8PLuCjfYGZ;q(O3}Vo!gJIK z8t5O(Jg#NW9s4yv?8YryzagW*tIN!d+=r@ieK9&Huqv;#4%OrkM+kg?P8uB-{Z`3F z)Do}#-ZE2W4%;SY)hMR(YIjSve&W(8l!TvAd^1q1B2P!`^y`o03Dlrx zPZU!Du%F7SO+OlHgf`pZW%78rE6VgV9gM{6ykDwy8Qmi>-5#`?WI<7A$Alf^`h1qE z?_sgs8v>|6u8ObBDqBD~fI4nQ$S8VKt=g|aJ$(<@@^*9K7tC8kU>abQfpt@ZBR1?N zIi5}eP&9u-!@rDiM5nE>JY#=81yL2c44ymvK{s#-Q3;N2YHF&fy&i{dn$z2D{r1^6 z*dGjbYg7m0g3kNC!*i&S35V{pGKDk&i6gKhDLZSec`JBq?~1rWE7+gC(MA>JgN~fk zQMj}ED1*mF;_z#wV}^b-$d(?w2sdcUzO6LWDN(nFe4TjeT9WmUB=XbmNv^& zysmZn_5u<$fuQaFC?Yk-uwkF7LNw`<*SY5tkIdMruZ}*dI=d-kC2GUD-P9__dI2(T z&f_Xt24T{IwrKaxt>^ZB@w>F|@akd<`Z+m$bAAO#!fE3^#cMlS8oh^mld(s~upjSx z@J+qn*@aJf#ZHk_fGmT_i!Z1~`s}JfgjZ!_#fHYSL4%i?Vpy!-qlSZsg$BV7$jH@f zio+#ay4h9(tl(oD(ncze?HV1wS0%<2C-Wt5&~m)CM~H}HRjPjHti1X+fokYH-_Oi9 zTmJ{h2YD^KQ$J=gwE0lJWsSK02&Ku3WfIB-Ic){gRe@A4nc0>Ot+W$ z^HtIeV^@_Q2m=mj9vi4N1A%Z+RoB;!*6mqGjZNuTi%Aiy5AM%#zjK)ECiaPQE#t^+ z5=>YmgwO_DJf$x!Gc3|bPy2F(K5So>awK3Hv?^CZj1*N;8Y{1H44W{MG_3UN@GP8! zmXQAlde6X*pX66S4{%rYt(t7QoBI0r%NGE_AZKbV0G1@`HzRSD-Sfu{&cipr1IuMP z6p2g_8e3*l`f~^`YO9RJ?qKUusjQQ?!Ddq`dYaGpM=!V^7RKyOS5c;`>15( zO;^cgQ33yk\&)KU$mqa}PR7PK%i$mK?l#V9Ivt>zQExoC-t6wdeX5Yq5Os!aFn`qM(?BEsb19EHj5fZRPyb!I~l%dg!Q zSGD}%O#KxJOSk_)941VJuixvkw|$Gl9bVbw?q}B9qT$^ryVJzb1P#uSJ=QgYRa)x# zuVVcgRU$2y0%|mMg6B<-yN5_BF64#+2-J+ju})D*P zbyR4cnwz-SI*#tYG33gi9K_ERg8DgqQ*ASxXJ}}CEOiVH`2M~dRFR;wjZ4VOjCg%q z$^nJ#QK;C+nh)1>)xb8rVo)En>4M)Iq=k%A$davPZ(C$sx8^M-4#&x8#wJEUPJdU) zHn^L3z#VflQ+sdaUSHSDqyG@nY0wR1-t0PbgB1U-SYy<9h&ZzQG{EN%!toIIoZ)jf zl@!(e_Qujb+7(^xkGJRXvV6J|%C0wC_kY1pnFuIAc%-1SRa>o6t-9Wk85ZD6(%D^V zt;9#WI)4(3n5gzlg(ki9{BFDKd>xo!qHNysWiVU#r5zfhq~Vb8D0VNU&Wi!*sjIy| zk}!_v=gB>e0Ls%S4PTjxWW|AFtC*AWNjI6bO-r;WmhmPRKBP|@!h^@3CBqv^xs>p^ z;FUxcA%I*UTO|<<@@PbSUVD)y#sASdlxc(djYvVPL=wV));b*6<$(Pt?v|v+SD4H6 zhpE?UsCOYED#ug0nC1In_3a0};Q`++3OgBZoj==I+vFy+5+sc2a=f+|PO9|f%=>){ zRo7bC!wUW9NgV>Tf-xM0@@On^#69s+Kxjm0-7v#*E&9Qm<#j+4C)d>s0q@BUY3Xw7 z1;P-zfLwK3nL#r&LQrsxM}R~yrrOBI$6A*dx`QMMFj7HBQuWq4kbggbbH@KL$y!Q`g(oyjLol`|!M8;)jjAQy%yE?dRL0 zpDU)SnE?9Hxv>?7mFryqgN$}vqs*Gi=ra|kd;d%jGvc9X_|_(u-lLj`L0hV|k)A!osLfHoa{4 zsF%7Xh0ccoud<9lN1IHmAczVH#grD|Zou9wAAh1x3t(!EKKAIFKIee&9_WltKmB6UIzqu87-%vorUg1(`P%9+`> zqK_(l9A4sDQP_IK9jap{B`S~z;9VA(fN%f@x?hfsZFsoij7}L>rPtrI1cf%9uV%Kh zn#uF$L4hfOx|-NTRaxIb|c@g!yhH@WCHJ-?&?) zjaml|;%#xLdn~6pCJwXcQIgI|Bq2KaAzsMFH7z5eueyvBv|PUD86O_N6{uD*mIg*Z zo}nHyIRS|h{CZT-ymZ-1ndd?-K6PD{FoHQj4pW?zjz-8S+k7+N?xIeWuF@jFbb6`Z zt9&=(hYp1tFtQKD(vodCiCAA<0FZ{)$grmx-Xa3<7@jg)b+m`Jy5ED6Gh+95YFFHl zt5%vxx`b9;{zGNKbXb-3esOgn@m@>DIDvGdu@HqWrUE`8pN?)NFyiGmJdtcS6u z?1j$sda86ay8TdM4~K-2`Ep^4aE{A~yVeB1#j% zs&|I-QCx{Q-E8*A_L+X@fq0(lokITp*%{yN%^ju%c0nI54DAepMiX{j2eFww9=Q}r zPZ{5wyZgwilk*AT%R$DZY{8tVPG*?Qm~tEnhlpv4mC#rvc<=43lyC75G26KY1MVxn zJK_{BHzl1HqFV{m#^=d(*4~tIQK+<-&QIhgSI38N_% zK?Y1{M^fs8A|z)qz-jFc*5&qSiC=rM-nLssFoc zJkl(Qs~md0DmXhw7mCCvkWs|;sJ49GgQ>4J1fQ*XKJjR~s}QT5Tbze-$tot(dY(qN zJZxo=|ury4g4aCcAQ}>CC3ZEyA5Gs!`FpMou`DwidY)S@kLA=#Nf-5 z4Uknk4GVz{LZT?LUAmdCKd9n}>~ybAjGdq%P$sGFz5Kcu=7DU;ae!C%DwsG%-4NvJ z;W+XXA|76^CGGmCuF-GOq<}=sr{Pftn(C<~-&W1;gxlnS<-$|+dS;=CdT-h(k~qzX z16y*?0bLHc;E(KF_0=gaDsjYAoiE3xx8;qaL`y>Nqi<+$ZRlp~dKtjtM;)V?LC1Fg zs%EbwD<>DmmUaVrJ?F(4QRiIA=7b+9Tc%!y(2bvmz5WtAxcfO-(LxBNy3W9V})WVA`RJw-;mdC{$vPQ;TDlZl2~dKOw- zQt=&zE>{T^1{rHhMNmj^tJJ#@w$?S@)osXTCx>dmgxl~jI$^p0Ril}LA51ru^9q*Q zCpsN#f0k-473xqD)dSCj2Rq51yDw}7?DuWHX zokQU1rdWW=<8{HDArDo;p8gJyQ1Sk96B}>($#8OmBWmq78G^JZFVs~;L?mhqYaAiF zk*%lX`1rV_q~s^deRv7B$BWEu69%))SH!#GvLTJv87%>e#$fvKWV%VT)iPDyZUDJj zFSX~`{^I|@RprL~C$7psaWnE^B>aNZ<#YgJ52Is#(qOM2PG+-Kr-K>^I)i>u&HS8S zfQ19ZSN`|0ef-|J?8*1L^K~(rBP5Z_(bQHAFfrp?X`Lxh*Y<7(4NH~x4zG8bCJ}!A z;LXpAn~iJ85fj|P5Hw$&&Kf2&Eyau*4 zF7ss>)pcf@8+yOJ8d8VSMqGly75iM=LvL#`*SP_hmH6G&P~Ucs+!}dsH^a~E;sEbt z&*&;@#kL%th=4_T;?vIcP<}rq(vDwG5p%cmO{=&BHs>vQHAr9xa@Lq%L{(7hYicKk zz|~@?Uo-Qzcnsy;mDH`#ZJQvSI`RjByPs;=)K_gE6c*UPV9*U_$bw1ocy!B&EmgB4 zn&yAt=ImWkf!R`9@|i7?;Ic!P!0Jd8eFF*T^?>+JH+wqIs*vMMj67ykwqWTnx|t*S z`-0=Ol>LMfn^V_cWEmTmsdPQh=}5_w2fh*Raw(bE&saLGM$tNsd?qgU9@lG;bNfmd znPh457Sk04u)hfVkSf%E6(&F{GNXH0$*wBoEZ}wl4@q-@qW}HtWycmry^maBbu z2IJSO(OX)bm58`%=law<=r?DwSbhsqTpv|)l5Q^a@of6 zVJnps3C&js`GS~m;<2$`8Aw|v?RIhn>J)zy`&JUqP9nMNf6YnL-z2M2;Gsn1o@!-KjZgGAsrjn7&T+*9EOi+CqtLM_(Nd2uj$ zGp#HtC%c$QiP!?Qa3LfedT3Rpn8#`8Tv%k31nlp8HQ=CQW%*Ti7$St1Xqb2evA#gU zH1tYgO4GT#okJ=uQkG|=DtASa<35;0ahRkN zzJyXZ5Q&je_@-=E92b86{$iN#5F9&c1N?0=HDs}{A^89t(l2t$x?||>SGav+W#0)oSUn*Kv|DT9)=HGi-KZ&xSdBs~VYIMz7mTJwk7ArNT zUcOFrB%fb+xG-+@Y0@r*TjdPaBMoo#l zEq2f9nqA#4WQ*u3E;|P_l3SdQA;4^*)04e;tnY|1Hxu4l`sI(K=th{C0oa!k#U=iM zEXqh~>AF;DlbEDM?=&)MyJWVOys~XEy z#b&u&rEV(OkhV6bS=&|1FA#R*ETP3|bEE0w67<2vi#*dxZh_qj!%4uDFc z?{_#V-9RK}7=U|ZHuW#=5xcUy(SKo&KEbd(skOhjKJA|_0hvx#Z&o-<(O;nGz$9tW zuIt*5POxs5Q?uOgV!W15yek%rWvkE-hxL=n%3e7?g-LBNF4^@a0HvwRD>aws_n$UP z8{T0CdTvi6^w%`yIBe#AU&yc5Jnj%G)Rz>#@NgBq3;`UR6W#x$m_qOnc8(hRTEZdZ zoHT)URPpVg%2(#MVO@x1hQD={%6&}^4U;-${cu3TGrE{kCuMw~PLYIJyl(q;V|JJc zFT(y;EtL;F_|~n4kTS~_A#7bUYtng1xfHVUB}|2rNb~rf4BWn?Z{ImRN>-amj=o#W zjx-d2EbG{cB&(khZ)(=tlI+Q~K0qDR!Nf`k)~a4y>>FSwE{)H>Jdt`7jk`Q}d7bfU z>pew(7&fSX@4?_vy_rb*IRd|cC@~Ci^o~IQR#AI5iYanckZ`~N@S8GHl6mZSBeoMk z3ujPg-xhNg15@OZ>(2ew?0t0-vLVge0Uy~))vxgn&8Gyn>U{4$6qPHHAmL90jzjb@G)iS+EC$V;`eIhDYHP79bQ%_Y-~fo`PJ^X?nt5ZH9o#a3++b~R z6%ukgMw%2b>c>gxxZM~y^LTb>B{qMpguEVci}-T9*cK#Gla>&PI&*(6rp|90HW#O+ zY6~PIo#FzxN0iw+klWER??Bb6Mh;R8ZJ9Nu{@WmthxYc^g8x2 z;9k_P+CYB#2^%}hJC9bW!s6k-0KD+IrEXcB9)^>t8Y(KsVFi(BET#YeG-%!SaZy$# zqsK~ZQ(}(u0U&JwfbjG1o_a|W^vpDl&0?rOs$%mb{B^B30r3JsZ6P~EL`LmxXY=Ez z!dV|amZ~U+@3^82nVfXqpta$+3G91dP`cf4V3C*U5S5F^HRiB~D=U)S@&o3Odul8I zSmWSW5x*zPhG+4b)C67%OCx7>ph&S708W*DqYz&gyg9DKi>YvHe748E4437urAjRA}1Iy60l);i0d!9PT>-4YIX@i;rz8}*I<=CbEU-E7EoZJc6$6 z?YL`p-UCdf*%(^lnu@ZsJ+)^Qy_qLBZ3|p8C#evd-mgnOJP=@5Ydk&RQ#FF25$Yzg z&`RaAqso6-_O@2wGiU7r!r1{Q`MVNi>6Likz#5db2?Y#l7f|*?bjDlRE{l_Dx9_Q3 z8&TX#RaO3b{^{xIVN|U@35c~k&K0!8Z_nx`Gyz(-4SQkaomer->v!mL^N$y|hI1?j z+PLE+Tw8!w`!H+M;ZWn}^UKVl2hF%yRm;Qsq+RUl&AEC>&2$A#$C^N^br&y<|?V@ zSQQ278v-rE{PhuJc{>oJ0)MwV@cPv(r8c(t+i}5v&Hv60{5vj1*e6qV8qqwuEB`6~ z4rg(lOcr#zm};$4h+`+baLh(-GR?&!t}{-9f0rA4!h(2cv{wb9h{$+1;;TAEYInzY zC&*zawmO-LQKs#(Z*a{al$^PNB+vFimOlf%#nRI7;h4!xj#uMQHbqNdx!7XoEwMrV1tZogn$V(_MwUp0VQ^bL$fqVWW3F7KwGX=(C zx$x9>8Dv4hGiWQnM@-z78YpWcIMT)J;=(FR^II7>^EAKH*P&-9ffwrO{K;1rH~5_= zODEZdsb!{(8bjFC~uQ_*z%Aa|Awbk-s>nyd6YV4LK62>Rfg4H%?c_>``9%|M&QU?TsR z=F(Uu>C0)s-rhI_V;c;R$U+AR)%!F!_ z4kqakUao14ay9>*sQue&^Hq0%Gxq15s#FPGfKhg8S$cS|<`34;)VJ`7Y;f@2AV9)DYX z%La>a1j+iYvDdy2p?EqR5(7M7`E(%34Y;+XG`1$cyX3M#=uzEs?`KA(e)94L{mTGV)hx0kH9vkl$ zXf?7kv!~;NVi`c;^Hrq(8{N{@SgRbPK|@m$JCU7v>Uo~fxz7kPS-sMyIysj-c#-Za zDIYpoR-x0Fd}PdCP9N%?cScWoq>8_!6?f@6RX&F!y+o`c^OMV1jh3TPPf)dIRjN>D z-yu_XjVN1sy9SU|y@^@g!e+ft!=`Gdyo8jB7WH06W}4NnX2Q+mQFsZ;v$Zp;*)R3L zt)QR7#_6yW6lKOenL^#dUE1ap>7L#1X(D8@j^n*2*GbD^W2xmZ6>`*Uio#h}*OzAq z^xoOY{>frcU{A{nJ!*h}AWjQ$WthM=k4Ew`?PVGVCqif-WxpOhB z5>+FL22(fVz4irD&-dX5>EdSEBC=~ik@rG4kynE_(t9<9yIooz^!}@iqo8uH#R5HI z4EJOhH#olx8sbL0G!dmS**2)|r*+EUk1I@iV>c+44m}AJ@lBVgX1IHo5CV4c7P`ph zR&Pujq66RG1p*~a^{eU$P0q|Cf#~Y3(sJhM_-jYez)=>|AAP^xI2)Ya5$8Dg;l|Nk zKF>11(h&t#2FhC93>^)P?6hxVk(Ie?@p-CfqlOn9qDo$}Nl9JaNpf`D*232X zV-bo66N^d$#V!+AXeLV3#Tm7}y19Zb=xEUsTyfTqsm^XS3KTL1X9nGTxLZwy2d^1>RWHMGR)dz`q+a zUj3fv@}!3*eL%rZHSv%UNM94;3TFr!c0e0;!4MgYVx&2A5OMx zTz`g>v{6pvNM;dBVG`X%m2!mru)NV@{#J;fgwGR)EVg_IR^@8dAlJ_Wq83*z!$l{S zK@F3uwfY+ZI4s0s=C$|utdEHFOb9nK_!I&P?8qPt-Y)~wahp4{)WJS2bYvK}&*0kG zZt+h&>nR@uJUG?W>@y4!P9jH}{7FcPTg-~zW@whd@ypY(*vGjjHCKG2?6^G7ST*U{ z-crfj#$M)mMtakI&MhPZ6-%}!Fg?QiAqy@~QYRM?7=HCxlc`4hiz*ud{@N}jfI zgRJAU&*Li6O0IgZnpw=ojJi{w7g@yQ`@e(xM5-Tn0YhL~q5!X$kN9nrUAx% z>zh6e)3f!5h5L%j4IKOh{udCxH~9bj18yBycp?1aEoTYPn7;6i$9A!8{;W zgT^2<=8vc$cHcy(`hbKl<4Cv&0MygHe~w^)W=c{+dtI*24c z&rd{D(l_bocU{r4ORHazTgv^>ro&qx&J}u>$9LGbJfVo2n|0F+s1KSw4PeoK`M&6Ee(Oa zGRBNM?R@|?VbUA8vL~_a16G?VKgq~|1bG~ipEIa`KIDp3#VgFVK_QpwU|21xS5IdJ z6^bb8^S{*V3&bKVPnQo7;slUj5%$q5@enm2aJFpc=L!%tk$i>d?4!(_>8o2~&yl!% zlVj@j&y>u>$P)#h>qTJIK8m)V#(O3a{J+?H$M;H~z1z1t9e2>_*tYGY)9Ki@ZQJPB zwmoCpwr$(VjC1C1J`41Er0At{&yn! zZ_y$M1PL1TN+JM~vMD`NQcvMk?!q~oj!naF zqAUiQdU4}$wP>xIW=qUQ7e|}^Wmt-&-B_g2ugy8OTFe~~SSb8Ly4qq+%w(=D5QUWk zm@|(`dj2}pdFld6oZ8Bzno1;x$V&WSjB&_TNC=@aF^!My4i}*oo7ck@GF}P%ptHzQ zUJ*eXyF&JcVh@lt`BWCtGLDzf zEAraL0U=oht=(_Sf!okw@zLq|Et4?QY@>cZf3D)>Lt}!G;zLlHOYjjn7r5-e-_u=( zm@gRTyyfthdk7Mjl_4cEV{s?}aIUAOAgTzS8rTrDGHorvkm14ze8CN1RlNMmk5`0! zTMEj^gb>J{mr+JFZ7skZ%Wc1)pRkt16+r6~>`*Bf3EaSXr{>3pMofo$<%pVrM}#F^ zso%k!N4!E@9{5KH3s%V&qf(MA7Krwc;7e>yv4VM5K3qR6yAqf|ZR{-@@y!SMbr0aDTJ4}7@x%YdHbY;asWlF_5&QS zkQNie;Q4f23859;>Z0LUT3VLM?P9H|xo+jMzK8~Vz44&1Txl8u6`7QkloFGZ*>$^p z1xyfLZtA#4>8DwMHacor$}G;~oZ~;Pc1QS(7g=uv0DTsKlI5;NJJT+K>*AuK1ILqr ziOh4Fjj;;Jx*{-KA>3UgxQ#hg@JxQBo#oXz3e4W3*hl>BpUmI?dx+S220 zKdfS+G;X5;;MEH01dYag5yk*QDBI}ntDxhrwM|*8stt6&B1TI7Gt@9wXenpXQwD|4 zB|%66>+qTLId$nBNs;m6`6eenFP*j(1K`oOxiqqrGFsF(wyD#q%rGE@#m*$`l$zf(4)D%yvitoaanI3IO3Zk zPw)P@0wk2FYsf`TO;mVWLwhYOaT3=!ebny(J2FLrL{i}2jKUfaoNg7j_9L5 z#IXiQlJYlQaL=pCA62*%6&E<2Eku%Iox)2O?<;|g9wH){@9`=rrs`X)x7h_$)c2dS z7#{7T+@D>sq@2PF7eiCS!Mdy+abF$-pU1*R6?p8>x^9}yM(M@@tZVOiQ5Pulh69Jx zYmHlKW_6;6_Yu<19j(m~ox=HZq%Wd#Jl35V(oiE|B-BiA2dU-5@kX286~kzhyk=^5 z?4Q7Nv?kLJE{DN!kT0%TBiedGMA{3AXwL@!$x7Wkt+Lj5#jh5V*|=P@fr+IzS;zVL zJAJ#ZlyB4`68l7Wup0nqwaNQ=CFz7UMP(Sbt#`w9-x<{#{)@dzTk(B`r)ZJsu-`_A zA!htAHhFyXk`rr^0}qeBZqH>Vs#OlqNpN3rmaItQ8Cl92(SpFBM~p200-xbJqZIbl znG7+!%N&sBt*!}fgHSJQq|t=J`#>@;H1-^CB<2WBt|gTT3m5B_MDMoL;jU+8IkzI% zy5crix-NBPv2*|R`6Rwd`Ta^N0672fc0$ zW%s)Po1+Td8noAv8<-yylIxg>f3*a6?Kxe#p1BkrnHR6W5n1toZ}OV%_Oc@K62r=A z9npNak6Db>o3NJjvz#$DO&%w5*NXhgG2?p_{F$$Z=neP6c>nJ#fHWv)pQhK$qm-tE zQfCrf@(-uc=HoEVLzHi$5i)lZOMn)?uB@t&#%p{Z6YRxW-|i+EC6yFO1M63zrr3Ph z8G)7i6COGNL;r#hD3@fF_Iup0bfbC!v=)im<1}uBwvML}@-SXHvS3#V!O`#D7;Pq- zZ|?cptyFQ4MiVx`O3!!RM$+q;((jho<33>yeh>1N78DeKpsejk_AS?=4VND;>ncYd=eW{C$PBWR z3M#E`bJ()UkBkfG`Kz8ZsqDZYM~S>F=g_1}#!U){i} zjoQ~|#ExlEVT381HU6qb{i`?|b8*Y_nWH5B8(E;hNod?Iv+1`;)&Qziw(SRH6vjmo zLozj5HM*?hqcnaF;UM+6Tb4w^h^SU_2&nz8wN7By73Ue|*&4ZBZ>knT(&e*dsMWK3 zv4rYd_!&KOnY9ASSX$oh%k4t3>Va}n$s<1BX3Dz7d(hRq7C^b{PJHl!(ykN&wB&^J zjo1JA5zmO?zR~D)RWZ%-yFKLYT9w)5&&{=)HA9-J0HH!$v+*(hCS2K=M!m8ETYTIf zI+2P5{0M71m;l&KN{#Wx(V`!y%=8mPj5ay(KPItWq5F}ax=Jhcf$1{L4>2jJ9Jbd_ zSjo@bWvJyZ-6D6{nNEA}nzwy2oD4+!;FnIf#oaP@b^JWNVJ>z1{kic|0aKe{g%T@i z3lg7?OLQ$QW-FZD@hipYL;4Jz(=6DJI5Zzu_Ce@$u;CBs8C1sE(@uM23UJsQA!pt~V&z^{nlpz<2FAX(_f=NQI*WH|9TOu%a+5)G zgFHr5KRyiuC8L|YXvXa1gQLhShR*&IzUvbc%g>Lc&zE_6{MR~+zHIgO; zj&_*P%UDfc<3e4BPmP7i>$|2xWX0(U{HiCmTG3bHprken^P~$y)DM}XPLBgE2XJN^ z1}>)u)CF>hlS=a6|22Ur$f&>J{AU6){b()JC~8A(Gy!s0yN$OP{Jv^1e=Zw|Cr?&M z*m66{w4#6jMfr6U;AA4`w>#7}Q3eh8Rxt$*~R;oH5PqRPYj-=Hx z9pU(1tRqup4XeCPRIWUfy}A>qo__o?^i3CZ9Z%fY@`fm`u^g3pqrN$Wo>U#t0I!-TcdWo!u6 z?$Sqa5IneJ2)xO<=#v3_wU}+us>N_8A-dhb>7R|0Efy0kZrMv?feVI^xz zlUJN;?GSjgiSx&HVlJ|{tc2z5gS15NYL>e-TZvd$@dE@!o(}P$BbTQx^L4#Dn{gWT zvhj-Au9``e=t40wRASm5MpPa>TN>>L7m^&U$n;UPy=Wh^BafmpWouoEi_FTL6ihad zl2;eV3{4&}Zf5w))U-~LD%OG`dlH!BsH7nj0*AlaWNy5UugjNMUb6?_AS5Dxr0bP4 zdq-SG<3)!u%49fcMXX|^w4xt;8xW-2=&+WWF((V1Gnu%72>Sdyen5KvI*yun#h2rj zG}-z5$Sl91T=?|{(R17MinT1W~Q&maL zOUX_@YGFYB{#(1nn6cE{{j2rcgV#-q8>_IwM@+8G`)P7{p8mn95^|B$QbS>%XIhzt z_VWl)W5G^{Nj2LI5f5enGu~-}Z9*zW_<_I9R&a7cP&duRs^*C#t#+67^NZ8CVe50} zt5)9Ec+)eKuj|fRl^y54hc&Jz8Gs5kmbB3!c{_FJDE2G!Ph4YR0^}1@gUQq(xG|mMS11!#0`;a9_$p&28 zDl|pyTBEH)S{;Ml>k|j#7E&Ese~HZ5PKz^y3#YQ-!*$Q_sXXKKbJUJIcD|O7#Me^^ z^PjE7Y-Y?UbIz?u1pm}ouanI{71p8l4vkn^`3+--l?V)0{mpi_X6}OS1!|VWYBvjW z(--@hWEuYFd;aX_BX%9HJ*QlWax3p>{Gl7W6^*EQjaEa-DDL!(M`3Q~Bw8)B_Wf&} zzo=JKS+Bk8L--z^9~yUdpZVTKUedFf2g2=y2eh{=N% z;B#|x{b$@Gk!@FhFrJEfR1Y?Tyb!pga{W8{B z3L@WQ9taMUWx8GBg4*DV!5hED;`=u|O z1KF-5KD&~p$t6|U9#&4J*-mHiVYj#x2XYr0xS|MqJPaqhJKR1j$E zs05c4DNJdXF(Rq?nxV8ufbzsWyM^V4xm4)?Xi?k zOEIaz%uwwV=7)VeHRmWaM!+K)Qce5wtq5!Qcr$>MBsX3`!D~84--}-h3~_UTp2B$~ zAe81tpx44HGW`?Cinq;}oJqc~jxe~)qiQYNG@+vrTpj0eL}7dwvo1Xozs{MdS7{%+ zu&^jzRszT=*Qh-nlh%eD;0Wa6u zyx(7TOyCjm-pp@Qm`x_jIRjH+5pDi3Tfs61c8p(F8lTT3wZ&=74FRjGp_qqAAGU+L<6KJK!+ zTF+EOyyri%*_>8E_*-&98j5?h8~$Y>2+1FCy*nh%<;IA|duhl&Z_}dTwmEN6xU2gK z;;mu1d3gi{Uzsx!WZf?+$R?Tw%JhNG zC;G~X1}DNcRtwtqBS@b1>Pa#VU9&US#}tKUL$jfju-{?nQurFG9X=-MyZY5sN$!Ux zt=9I2?Km*4)|w+OR0$8sqBuNu^ETS{V^A$u*_zqf(`N|yp0{(j(u*C+Q1a*X=9$rc zEiK-}=;hUM09cO^&>DUVPV|U3n-A_6ziu>!rG-I8Q!>)h6EX@RjccPCd}D;>5;u~P z+jiMtM2o-B-$ALI&wuMCEN(8xVIw*XX&*Hx~S_6K}E%cGy^j?@}DHCx5ax#Huo0NJ8}-Zw zl9Ryx2+3c%%kPxHD&;)s(K6{+w#+ak-tpiO9Z^!up%y+OU z0AUlzCXv|7`CGnt?}l_9fAEvwxE@yX8(VRWyrZW` zMA24MmE;O1^!$KC&uV-<(T&9w9CKcI;a}8Z4TXZb{=}iW-Hxxdp{@`OSeZ9LTaG4B ze!c*^oX?k$;EczLXpzaJyFWmQD4rWmOKv<_CfQzSb+%C$jCu9-EPRySlNp?-C@`R@73&cNVw@=yFTA- zWarv}Ssp`?*ui5pVKe?fv}Mc2DRdt^cfg_7pL`kl?8B;_OrQ5Not{iSFZGZ4-SN3A z>O}%P$B_oiQW%VQjk5|P9wZ6Kzao>!;7HOMK?Nug9*CwYOf zag}&PZz$t&X=;x1)(wOB%kj2)62dgia|qinJ9b2qDKv^cTEiYHTqj7W`-?? z)VUr88j0Ax#ht%oMgUsc_iLw!k&d@nOw>w4GQ;8Xl2LN56)bR8epWVmT5TmOfB%sP z7OA=(N}p=gp6YdLUUvh-5-}%YzZuNWQ}JJq&0~!nA5WQ*bsh$}25GgkQ`zoPo~ys0 zBtWN;r3&rAMPG}5FL%0RFI(T)(-=Ejw50Yr3Wpp|;V_bHGMuqa*inxn!f{LsC3=%> z)sp_BU~G(hx5DT4n)=d#I&NVAxe3BZWB2Hs(_3?1BJ(it>8XrZ8@oCU0%Y&D;Z>H? zPJd`6WH}#6`Q*4#uf1_9oK0og7YmBsK_->M$XIyrl<+&ieR~5Y;P%?4-}1scT)q%z zDFv_z8pthKIMi}D+FW~gKy@)!38S6MUYQgTg)ED)(>K}XDwyBw(Gq*AKEkJ*Vvvn-3(&7~Fo%ciSPC_lUjM-LST?7}Q6ZP%?wZ+#J#A^_17vzQS4=Gr zQmt5grf};IU#t7l)F~Z)x15BmC5cZ3{u>beMTIkr{j|M zbM^&28(rIF$CabbC(HNAE46YzQnKz@I_(a}P3ljkQQKFmpNbO_ys+%hK-_aEQV zKERI}BRJTMT|cJ3Qa-gdWxOBGG$F7s73)6O#~;$-C8n?Fr`o57a(jxF^h0Q#UGjg7zqthEXeI zPy4mQyGYrb?h#r)qEl_aBgi4`NLC+1c zP8S@Ug%#h20e8wG^Rb@$N5b8X^9A9Zu^_R+*uGx=s7; z!lc|%N`I=}-|vX+T9ZB)U%vbNJ1^@gFqdrEt5;#l=V3v68R7hK1>IQmfiVo1E+g$L zb|yBsR8|VlQy%rDyYn$AVr1)W4WkR+kUkVcm(u|H-QEJM7&a&7g+_!J7JFdi<=8I9 zT20|$_EeYsG^MK>9He+~3r{u%<$_tT1a3xyV$~Y^PMU_v`{~VzO9#XCDLKZ~Ilywg z^^iG+-Bem?rdhB0b1l1aS$^Qexdx>!}S)P-{Hq5 zajR-vCH+MdLKo{+y?KAt$!~N-=8l)*6+|u8^U#(2!*^>#mv2R4hBhBIxmw*BxhbLC z;tu(ZWrjCf_{N)pG<2lE@z9%ha)vBzCB^5Jqwo|cdGCB_dA?SKM@Xv@P|)R^+ldW+ z^ZV9k%SQ`(FwdKaacy;6oKv<>+Tk4rs+ZR(q3%hU9t83evd_(KE)24~SJPL6Yw~FP zs~>PFCJHJ#LeQg8WNnKtdh1(<%>w>Zm zV%nu3qjjcofj#&02}<-H=RR=sIlF3$$Cp8RW-%OqaB;pD*WBaXn%YkP+D^%nOMW5H zTIOjt9`#v4s9?f}nEG};wo!um_Smq=PNNfKpIq-!snULGGeJD8n`s1L+}s?xJWmZ} zWw6}b&Uo4cHjn2k;hn01HTy0b9_UeBAWrD%J1-KL>Gj)mmufkVjs_gu5KmU$iY-p? zk@5BZ&E8P3c7xFwn&(D~gUNBv-euRri-wAh1FN_HGMCTCo9P25hxPi}BY}GW(t3SO zz4uWr{ni^fwH7?Urnh{>?D^ZNE0ZO^7Syr84y-rqcMsqRgu%o~88>k32p_QCXyfsC z`;{0uL(>7iND2`Q3CWkCL)FR5Zf`a^W!`lKBEMrixCLFm-Y!8EZX-Y_aGXjRCHn6) z=0w=H&B%OKF2m%{x9kJ#u?8pWtB(PQfY!o6YS3T`^HnJN%{<;iy7hEb)d^PR9RcrZ zjvzgvwA2l`ithQkE0LQWgxjb_XfunkqSqX+Mu}b@ch(UGKmoD!Z|(06SR1$aIvx95 zjCgOCobk54jTVUw)K|z2ZRz$md&8e_?oEwdu2#hsb}KP-W_d9eyXkZuEy}!nY))k=axka|dqqgq~)*xAk7AAomVgH7jV)I&b z5V7JWudkhqmq>#~4N*En1r3=WIe3%BJ+W74KMl4>T^fo8s!+} z1cO3l(EO*{>h_02Q&3lQ8DpB zqRTie+v#*Z{|d8B2!?oi=3v%|ITc|_kI8woKcjb&HHa*tOZpEV^6O3~uPZ0z+kebq>>{d0-ZU>^cKkK`D|{myD9 zs?}=(s4kd5<9s<|6fL`lcAqJS@{tSe1>CL5H5+u1=(-OY>rhpI3rE+Qu0FT<{#dbU zEY^U4!Cf%(qmX_oZDvN=0_ONl_fxWdIP4AcSMkA=p->~2##$9$y|QB!LKh6#Dw7Ib#vjXi1F_CP$rCOWJE*IJz~UV6jnw~GN)i}eF&>AMP} z)b%x|p`jRp_8AqN{a&w+e?M^GDtCt0Zl|+<18bCDy?|T6#40*Y58?Ek%U^@DgJ(n5 z`Pz>ZRRo`JRX&DKotZ~GHW!*m_M=Po%v6am(~7bd(EJBAJlH+44Dt4kj3YwW$$+E0 zz}DGl9lF&{z5b9`Ig3*fx~^51Vbyo%F%ugDYwi)M%9j^nbe+RJW9~QZ1TR~f=iB(E z_axSUsg6eN+wpUc=mZY(xnYB78RkpLidBjMIqwB^2o9jT(g|ww80H zi#6x3cmtwavp#e#4zCv7$?siYl*Mku<=E5GhX(bWk?KEqsw4z-XQN#)zvDBqwypWL z+VD9mU!kQ!VJVNXhztRjwl)Lw3X4!a9(zf3E#z?{BiWvMaeF!D*!g9c$Jn)az>Eg+ zmcb)fBUh!I*A{h^rCxK83^sl8B=25=jYu8})Lp6O9j1l;{Zng2fGO+iOVDO#9Q&3>rfGc?t}smujD>pIwQ$smP_aQ)0JE-w|Vm3~R`r}5x> z#;8m)kxBJe+U{62=X5nGG%)i^!A|#mHmol>M0c!_;0r#6r%Om!p$2y6$=ev^%OOEj zfIyF;M6T%M3bg=|E!&9(XNz@0K{$C@!vn`IZ|R$@k5;FP)jKko%HM6pcSRM>o%KH} z7r#wnUNX3P<>@RX`ums0cJKwkjJ#Q{k}9N^cXH)#P!InOs+l`@2k5M5zP(r0i9~|X ztRm=X&S%G01mQ1Jph6ce%@lF6Cdjfi^ua;cR9N2PF?6|IawiY~Nm$a}-iP~VIC;pa z15S9zkWmjy3Y0o>*>?4KwuA`9`1~*0IFiQNzvl!aPg_fCBfnbY4hG`_fwMs(8MIf` z!olG-yD!7{S@67syzaK2EAnwY#J{~CjOTd_MEumo_7m42f$knCDs41;7?WYtK$2jQ z&U6YVZBY3ZY#?oE1W;{rU zDxS9vZmQO5_jCgs)&uWI=rrcZA{$5R6!yTCH9UQvm5}o7@)7Ug{W~)%cJe-15y~kbZBEslCTc4}xcKaK4-E=-&WzT6a<<*<*2N6= ziOur!ReaBFcf@zVGaOf1w!si4{52Qdn>R~jMi*_Pans4w<}y7UD=hc7D8kIn4k1l? zdT>ZTYUi4tS3W~s6K|%n6T4(T^?<9Zwr?}_#an$Oq>M`Lby_Ceu$ z{zx{U8rS)_P3CC!e7+4@4eC$8=i$Ql+MjtiTawA*c}~(EaMv#U!PXpC-IUS`Q zryWfBdB8Iw7XUg>-yD)cQpHrp(=M7GO}Eo}G0yM%v}ddEX~sTkU382_m31n)*sjT&o&k7ebO6m(-_Tn( zh2kZNzifoxI~Dc9ro|uw{Jt$bOhVypsqUMstep37S57v*=qCnYEMUsw=raGQ=qPvR z&B|N)980(`yG?leiq4_h8chDnv}CysMj@}JfWXk>5b7u| zx5=;F9;k-7%{Y705zJJ{ZP?9S;X;ruzzHu>1}j-^IOMe3MD_Js^&A{pj4~*Na|0}7 z@H24poqi^1REZ|Sb<-5!EhL4%+Xeb6BB-ms!b7rj>EWBW#DvX+kY0O(UuqxS7 z#M(3Urx-$o2OQbV3pa4;OVNGeXIK46&8Ae2#$}W>B#p~Ue5U0F>O!vf8=?`H3J;+E zHTXmt2`oUda$+L;y~SeK3Rfm^0ERa;C|I}(%VcNgU$7*B5=e(x{pgI|`2I@U=8SOS zAY6VP+zwHVim_8b(c<;uUJuceUzubv`NFnBNlz)D|MzlZpxHH@U_|dkDiku*$l^k5 zrZbO^7s`NdX!~@H^_7b7t`PMQ3LV@g9vsgBIV&xxvcqlXx0+dLsY$z-9E#lCbrm%| z6l_ugiKskALPtR2LRvw#1zH6px3cD(kFr#bsH8NFNa$gCS?el>4Fe@)qDRXpHh|Dd z@WjL)T#4EIm3+vM4A+$WpY~0T>6G>-9@M@^hn3A)=q#NtzR8T>YPfjb&XAjAIxQZB z^NFnDdELw9t6^7|h1Th^usd6~93+j__U%nul$jfkr}&Mq)am&{ZOZQjf?Ru4=Ss~W zINIeCNY|-e3h;=N-ay+YOa?c3f zNT>$?(N2%<-CphEQ(ohdVfC7Hk>){WF!gqkI!4zotJ?p%o|19{J;Ixp#Wv7C_<(iSw^Pok#T42`^m}Fr^&NgfXjM zf7b)$aq5DTC+13y3k>(ew~BUI^~6LTr)?*>3)Ns3N4ufNqWne;@FoWJ9x9D@WyC1U zPYnK4>>n8(-bZ3_J*nAnuP~zavr4g_61A}w9;(YTyfiA;B>QNe@@oq%R)yD}P67=O z4C!^WXP#UtAX$aZZAInOaa)Qw8PFGW&%C1rO~$t1!x@tO`z^og#`;LI-`#Y}tK~)4 zEmoC;NXg&j#93O^Pusw=&RI%r@_{Tw+wBb3deZDP~P%b7;II?f~`r=a%d&0cy%U=K$Fn31;GdWXl%Sqf~9!wvzvOm{sq z*8ujY@lmNls-f!RE>v*0LY{qu&B@EWoP3x|j zBHLGu)#&}q>4{`Fo-LQ3$$1VPfN%+ryj6|ktr7n_R%448W^Y~fMmBQMw?5BhDSJ+% zdfREiPA2~8enZ-0EdjsZCaa+IcB(nfsLtS+q6H_?*#@wfRyH!(2R&sI8yGK~ex7+$ zgK0dMsO9FX&36RS6$7bT)d4~K1^-Bb?2WE&LbqPx>eHAe$FXCQU&@A#hhrx=)U`zU zR4y7P1hzeY(Q()@qj}zjGt!S(KJTCQqPspnUWEpPRR0{$O3*i~SmJx%6MFN4RwTf6 z2bnvwWxsp3qpHh(goNp*f~S9`?h<${1biu`f^o)ed&3hBYV7Lv4O|8 zpGUp;=~0(o{!%e?HE}sN2S_t-yz;#<8^2ui+5gLyb?_atzW@rn)g%R`d$oW0SINRx zHG#7;L5-pNv{h3_D@hg}aY_67QU4ID8NV=6jrtuUc&YQp*Uv!0`;s|AJxKV$c9O^Q z{JOnF^(Pywd}}?6iFz#Co5b|2wLz@1E!W-7@=e}t5)9W}9vI#irz`#*GDGlNqVd1G zf&TG|R~0G`t!JT`9>6;6t7l-fUnYvsX7%$Ay34GwA}Ji0InHW0FBEYeb+EPvtm@)1 z=tiH5BYK|rbzbtpH1!%7>DsJ_kG|FieB_D!ow_ZaH%`+`!G1V2kMh2NucmoaP%IM+ zDVOAmicubMU1dMdP7RC9sz%@7y!|hJErLICS6+Bu%?&I#ogaF9FxLBUwc~cJc|tc# z_--cM^uZFM3{oJ-De&dn?g`SRwtQ@9@FqeXy}#$K`_p;%Xtr2k->s-=-d5YqS@CEr zJ$78B3IY)^`q^yKKDWP_y}t~QkR;G)K&H|=+^={|VFsw7^I~Lry4zeYDjvL@({y!r zTpJ@MyO;yLc`abs>u@b^_|Ztv9GM1(aMo?VFzMWGXI%EVFQqXYtdT*|78OV~+Z)5*@`7eYO7UbCMJ58m_b@vts>m=R4byj{ zc5NSTiSh?2pT~EZuv}wt?xtg3E69IN32WL;62@=jV?WQmisjAYCWAKDh6a8^Ig$QS zE~s>sc|5e54)68n34lasJvh`*uF!mF+Y$_W(Hk3{q@bhge5)PX5qN&Vu7YBn_)gz! z+v5+BcT*z$267Z4wZq+6e%SI3eup5#W5SAV77slKtxlUiJ74W4HSRWH&C}Q?TW<2B z`Cu!rnQz)du@`MHm|USf`Dx!O>`QIt;8*^WK78V4f11}}ysS~%R)9bgqtPF96 z#{Y!k5sr+NyCWv_f&9uLBaBxXQ~}N-$e4(eP9-p`{?MsnTeO_Kz;O*c-IjpZ(_5=H zi$)uf=D#K~?YFJm8}dcrA&Czp2pjs`IZ})(Ycqx4hs0Trj z3uo;3zagAX_O*SQfrBKQrpmaGhw#T1NP4f#epLQuyR0r`5uFK`3TI(s)NEG9Ny~4! z`Q(}`W3;+njep*Gmb}S~`GL%aQnG)YgQ8KM`fd4Zzg#Gk&qAw6QKWMeKT|%nyb2uy z(;%AzHD=xQN60uV8^ceeu1AYSv|qBXE*JQ}pLy0ur6c|xVz*~|Xvh0>St}>84xbtI zyr=sW{pi(yfmlZ;n5seZ?(IS9=-I_@(VYrvk1}jW=AAq4jaS|R?bm)#6AHzBf2$5( zuUJ80_?55wBg{4(6UHnnB$OO9yl?|@!r{Trzx8dC9q`qDlf&)Q-omn+*=4qIfX`6p zOu)}EQ+*#D)}TixGs-KnrYKuvnNUsgQto+XrfbCBa1L0TazfdibGV!QSYs~#{B`Jf zp;J?PzGjm;6whuvej&qiA|`4X1J}tgypp1#tbe!;KEC-C9^D~0n6M2=*xvXQvn#b9 zb_ct=jQxTi{8oF>%ru$$s%<2_yBe+)@hZWLy8?usf$*ZI?;L8eMb>((+Tb#UsauZ(ZVX|UgHsKeok*}aJ)3hC0v9gONA`|)xkJoj3a zXTy@$e6{Us093hJ7i9i$Zb{mNabSp2sCp6iKFEuN7SXte{3GHf-scnF?^$cra!<7l zS@8XSl|j03t9xhH_bZt(x>7mjti1wfl^$7n<@9ZE<14~YYQebn(ZqAdmeQK4id7UN zDH)mFjOtXxHSwK=gGJUo@tkwb8HLL(V^a8G7j>EWH)HVab>z*RR9Q~u+FJ*_k6<~g zGCgRmp714SzvN~;u(Bw+F=5=cZ3rWcL{$~0mVxm+D{8bRY zxBdbMsp%Wip^R*sR#qvb>P$#bq!U%w0``^N&tQK}r=aeev;(6(+r5A6Q`IZmQo+5m zaskQLKSknK!JijKXl-GlS5qaUi;5GUwV%y=KBcrf`huBkBEw}Qqz2=T^U4;OZPR&Q z-i}{_#F^+S*4&nObKJ4d5S6gt_2uPj2{6;ZF51RpzjHNSsXnZcW0H-xjyet`J}fAe z(U=$;>v~Vg3qV(r1R(d;rv}RTwEeWGUrA}CY4+|6qm6U;_)f2C$S=S0MUA-{&OIF2 z@gD8Y7EuGgBV%eOWh8m^; zOF?0=LG41o@tQ6Kp#XGWMp@sdYs)}1`j2us$J5NsX~}*pfyMH`Iv>se&V!&du6#pD zX|bI+Sz?ZViU`bb-)n;hgCF~3{$(TT(>WauTX{HGr#~!(KPPTO57x!mXwxd6jmf;Qzku4?Hs`#4)!4*C}j_o^40XM$a znR-c291?sP3e}=)1ohul((>bHqq=gFuX>p#kk+DQEF{>c{9?9sWHGmf}#k?R)2 z#2ZzO=2%Qg8%)KQ=-F<7k2*uniE@GGfpCW>UZC8JVai-p4ScMJ7~^?kgr~Se%Stto zOQjiJ@SClq%#!PTpYBYTT{FyYglVrDp!i|M5waOO6#sXHL`xN6jRLmC4Z1g!4D5FF{b953+|FZf>; z6ue^9W6zNCVPvTFe;i$ukvY!OL}d4g(0@?N&*Ib)&<^9IMFX09_&CRRKoJ$!V?7ZgQFJ(u5I^)b$p zh2`-dpd)!{mNUE*U10l@HJmN0XO$mcGTwwV9N#I$5`&K7QNAljJU^(0!n#N=HcsO} zs?3tf(zB@jgBW|)(>PwI_61cSMDkVIELyRX7-;ZLCWi__WAzR5bFdW2MkP^GavOx2 zGqs@R#Po!fgy_>ms7|Xv=8cFDcnM_^P|ej)W7lYd*8k#Y-FW{ z9E!M6AyixegFoW9FFuUo@u8e@A3_cJ$E!@}0&9IV)`XK|`k|i7cy2`aZ1c2WM7OlQ zffp7mF(Sz14prHJShKzQtVER^@9gblx2M*dyixct5>uk^nJE$w0zLY+(<$}m4!0%x zKCfM*~-rzFQBSd=2-Z`F7(ZMY8!0na8_zh==pyp9eR3lnUzp)nB%@Iqtx#|7rh zLOnYaf$!5%zF=e5OK$&HIkp~O8_YZ4)OX47PwUs7Lxzk<0oN5_8`BvA8Z6ZOoIk%L z`7Lc>@v^s}>q^RW6RRKD{TwvF$g5(rYG0>a)!Sc%TdM)bu3NxVs z2da(OA^w2>`Z9^>x<3+jZu-}+T$Eoe_F{rHxSiFhs>pvc?ZN4xHQ2HJOI%#MEr)!r zOgh`dkO5Q{@5JOBE)Onb_swp}7$cnD*3*TDX^==X*1zGBCF8;I&Zep}zxZmcGKB0h*xgD*%*`3N*7N95 zqAFi8z&rlUvtK#GE{4=!#Y%BQOQx~Fj1&LntIZ{Mi`?q0V^9t%i?Y+jHNt@#&duS# zl*I++{fZSXYsZDqpT5~-!Ue6a;>c6>cY(+q=Lu8)Z?v80C)XdPdr@gl%8GWsaR`d^ z$hh1=36debNk(=#0`Ua+otf-01x2M-`l(WZPhhC)R{(Suq-#-nY# zRJ}qxKx#NfO|rlon@DEA?|EG|6_Na6x;oF#({PPLXa`~BimBOL$~pKL=5PL)ce$lG zQ}f%nWpW@HA(L17a1n02Iu}W_!tI|xPeVcWG07fL3^Icc>%m^~qBHdZpT|QYX6RK% zWCl{M$>!Jmk{DDslqiP(N;R=nkL}4y3{Gd969a^ERs?REJ312nN2QG={8ejg*sM_U zABDC)4W!2v!XoHD|5HxL56piOK!UjcivJ%INB)sg08C8z%KyEwe_{eaZ%p3Mzfd-` z8|KHhBwDJ9tWgHQer76y3Rz~mRe@9J03_Qo^1M&f3_?blM*h8U-qyW3p1g@|P01Dz zN~|T~zO2U%_~X>s|C@Qo7}a)&DQ+faW|S3y@r~wUhWZ-G!Hl%$uw8kBa?+yl?l2 zQ~+F#i|s$ZSmon{n3(4K24aAi_L0!hxT-u|(X#L3(dnK%y?oYDhg?wg-%z9WkAzb# zcvUN@;*?`NXLJtbNTTlP+Wj^ms6;AO;doj^!?h!6dJ`2F3WoFC-&R!*{WUC3dT*v- zBpE3Dzx;%o2slMH{7X4R8)@al;HyOJ`=|w5f0dKda0Ldm7Z8pw#CEIjWO5{EE`b#m zLP7od+7mdYc?0A9CS<6nr7l*n=Wi<&7T7iP_v9!{<@&W{acI%tI~iMyvzRT-LPEW#>m9A{t$6Ga3Lx{n60^(YdUJF0+GX>F;^p1l64g0?RL5$w zeP!@wKDQR4~3|F#%8=vu{HO%av$C2=q{-=HA6gTf(*8-C( zQ6=r-2O}fow2TK3?A}1_?CyXnITJon<8-Xa;{r+{Znq8P9?gImw>&<0NT{V&P050~ z?53qNMVeg&4ED54@!BUsIY}>9b+qP}nwq0GetuEWPZQFLu zx%a)9b!R=zA27e1bkp`}d5w3squf_ofvJNq@81$Brz)ACt8+EUZA zbhE~%%dnn!wu2wLP>edr{)=~Kn|7zvYYUCz@n{NOB=4eEfQ2kdhr9drX~4s#4noH1 zXL!Hj3l*7MRxx_h*52b5T4F~ARi$JV>&13cpPIG~>-QQ5$FHw!Fxm;)HEKO5o{>4^Yn7@aqwG;bPn}ySE8Y^zICgjlfonzut|!`vJRI8& zs~U|p5&`$_DT)NYslY8RcDXKEloeo}tyr&?>|z`@ncDf>vGc96pUGl;*bBqIUM}2- z%btxqP*G9o@!4&$D7R=I&p?o)Z#zs_vQlLmnZ>fw>2$xFMHI@6o0VmUGS}sv)atT( zxPJ39t5|SXB}@c^k6O{v5X{R|qp$e3##6GwbRrrPaZj~ojq5a4b)A`WKiS>8W8(6K zV9fGdus_}Pwmdx$3PTZCaJZz6@4O>? ztkHd_^Ku^2R3~Ewe`b1u*$&|W4ZN#Da#;G`3GVKiAhOL%SvH(z;YTS}B_lAQ?8Umu zI;9AK-Kl~mueMW)H{&7F)y5fB5n@~vJJ|2KObsjkvU8=NUBk7jFA#$!Yq1;jQWi8F&?JDso zce~g3k*VE64h_+)qeN~JIrxzUyk55pn_a#-cg%i9&%^p3-TyCEdv(r$5tB2Nu41sv zj8J|DZ2yRGQN1E9v=DhLDk!tFd48%*r?nQ+4k%f} zz`!7~x!W$Utk`X}yPUB}?PRq*YR;_v0IgYnTL_roFC3wusf~YKM#UPp)oKimioRwe3Iiy&Pmm?VDcK+8!Jdb zv2*yR*}DkM9*?C0;c4or$hYKG*z|7uvHjD1Qe{?Umpce$=E|Yoe$qhNd~^W-!`#u= z^h}nu%P7!Tf&6PJ2(Kd}WFE5|bG#GgTlwr`WLiu}DlLcQA1P7}bnT>PVcMSiG8His=F>nMWa;uty))ful-^bN)H~0l0i(AxS|-*6_R!H zjf`_D*PgdOO7Ics5ugxp{~Xfct2Fy6;JyF+tZA@q&yezcCV>(Qu5<5i8b=y{LKv`v z!8|PwwpRYW_+!$bL@wQn1dfT9VLFf#H1wl0eOn=AQ0JSdG<1pFZWp7HJQT+l>cCFJ zXZIL8XoX73x>DythCE8N#IB{Jd|37A5?p3x zis?57W8XF;F#|c0mBJ|st>SN2xm7bEGBbn>-vI$_Wqton-Df@9+7#Db*&TKYyxzwPh z;4lY}UJJ9Q<;~n&X!Hyqt|Tla)*>6nL2DTf838sn)=^Tv18BxgXIOVhpYP!HHXh+Tt#hz zM5x^y!{s}~Zu?Kz(n_`F<61G>WQ~H+<20p=q0{n}y8n0s;af9@!@~4 z%klw0_KVu9B|&o*dLe4Ab%V}>BN4Lq^`)ad_Ny)o7l(TnyWE-X^r?zFR;npAr$`*$ zt2nxQZ+{XeChW6g`o6JS(64Hi#@|5Qwl?{;!tP`WKa97ZdPkngntiV_fKQ%cOj&x& zPtZzOnZ{hcT-?Tom9b~0uX)XnoPI_s#?LDwk)5jOQVJlXl2-o<7hd{k-FxqU#)U`p zyv<3IQ*xQ0=F=EV`Ga!@yIkTk2zj*Y9rgYzhRaU3M|Z*L_dX~*&nb&mJJ#gK2mI&p zx7S*?4~y3e$qS8j5(ANNY6YHjWvY2idA8UKrq3OFM_)FD$vvZ)@7=DI`3aj(hwta( zgQZMa=hLdPQnbb47ZU~DxPc`&^YQP&%3{>`PQ z3(}Qe6>5KpfFEo$XlQFoMYgv=#Bo>WHW3$5>|Ec`f z)$Tb2U)bhQRGxn}A)?3ob-3{3eVn`F{S>qE%ZY#E6OS+U;Gn(*r?}O5OcBS8mE$VL z^^!)bVl0-=8k%}rh5IF>EspQ`^|R&p;G6FgWtEzq((lLb1|Xz7qU(Ei^klW%^4c`C za(;d;016?4!M7JicmmRKUQubkp21?JnI*0FMCMO9mNMH)%? z^#q6h^5V%;RRu!EpWA1|_==7XFE`k!WGdl_YW|hhGQxexf4dzaEgU7)mlP8}r|`1>{oknf?W^t8;pYzY z0j?3`Cv7XLsqY#syyX;(w>Z&`@H_f~uHGM{WbYUbee6z}20^z!mpTyMC@SB6O3kJV zFlU7EvNymYo~ZJ@f^|WquWB3%}NL&BK*Sg3h~<#6cPl4KFIZB zw#}*OKW?ss=m?1Y)7Kz3J}C@bTo2=J+nq1IJ1^8{u1IIbL{8+>zdO8BIP3l;N5E(l zy8~^m8V@G?A{JB>tWs?}1tZ4c+6VPjqHd%rvUkdRH^FC=-5~Y7AqfnkxROo^k?0*6 zAH>3qq}8h?8DudWU#fW36;)MK{O*zteBcU9@adL}J381*4|3$R=;r~S%`h1B{&l)_JA_3I=H z5I60@lPs6`diPpfSCuu`dmXI8LBfm?kuB^NAdF=C1vc1t;@B=u8u;_~vmbAYDoVhj z?iQ|Nycjbb7|hbjGKuIUD(BFRz#B9^6Lqo^G?wPhLS;2mdzdHpC%n(%wF!Ckk4@0} zq5t1J>#;3kehtQ7<$^ ze}#z#s1O#_<=2&zm~Os0Fu?z_t9sD^;!6_>gA1WD*?77Q0(g_p&mD+LnQ10b_E9o8@bk8Io?mk-s|0x5Fi22TVC3zIWJW}I19@PXt5y$lVKcaRkG zwVCr4D{>qbr=rGqsz_=w(~ohx?^Y4CawCOQM}^00JquRmZ>w}#@o?P2q@uCBM%GnI z!FjBU)N&BJ_kI2~O8>aV1a2vX%<6}nGXJG)I9_2zK)z>DOg_X!MDjK(6Z6|eeo5Jk zr1h#~eRv%$jEahvhlxs6pGB4tb}zz2aqkYDX!YjpI(4fDhbTRJtcaTYn)-Y8BWHBW zV5a4rNO_oQA`g9sx~Bp2Zsu3%f+A|og3E*?-KcDcBHp7hsU0_AIs<~r;-9>*6JxP- zo0@{crRX*?Mh;R!UezIX)WN<25yH-o5Q(YZiU80ob-2w`g~{MD(i#g}sh9z1#IG&1 z`cW@oSH0w-^Ob*g__f2T0&~`nyEao-JR}`#qBiLC&5jA{RdF*=ZQF?{e?6QlMg7V) zj~a>PV-gzAFRa2<933;_H`k^{E3xSq!ry(o6Gds=Ref)P88L}HpDsBL>jyfKqR9jE zCm_-Xbq%NqvbA}i$7{yY@vS+6k%0?YlpgLY)x33DoKiQ=l z;NChAZ*YbkwAsQ8Q)LK?)GtHo#<}(9Qxfy%p8nUD}8 zbOwXUpaAy}^B0Tv2zjPpDpKAfB@%JOcHCSWps_0ItHQ>#` ztbMnuy{kCtj}1m3>v?@OfkME&O0Ktf?WENi*?TV~0sLu>(7-VL0Lo^2Ax^iueSk6s z;vAyW$+U=Y9z5S?El+iu`<>w?$V)ck9b+ELhT6JeeIBHJ#;z#7@2kP*f6xtxgyeIR z#i*6$mA2gve&d(E?SjI`;&33f5J%3<)2C!em)Na%Z}3(w;H{`G)o8WARb7nRsRq)9 z%uFrlipq_L0U08^biE%Bwu+zPuyABTBL8|5;gziP5mHYHnol9(M0362#RQEKOr2V7 z{I**CMPB+8uu~b2`nJwiWHe$MdgZtBQ$$YMzvI-j1R^3*6$aXT7z~G4X3PL0+fv!~ zo!9LwbQSt9yljRFhmOLiN;*1vT?-aDJ*`MmoIi;o7_L_+3@K~D+V<;Q3QYYpBh@TX zm>2U*M;f%Gq@uzEzmM#ty`0&~hA~*?F+MT0JC;YO?Iut)gKfD@i;@WqT}jz!RZ+X? zumkP&DwDm_3f4^3GjBIq7NtNqs2h%e>vCxbpNA_~oXHQvU0TJlKY776GW^Df@NRYE zHh%W$R(;Fd(;))NxzDIY$Ig-lVf2?U1Sf_%dFB!}pI{*o-NN_~JsW;GsVuSr zgT~i8Jf=soPIAOv3n&M~F&cn%T*C|QSbc0ivH9^Gi&q%$Dk#-a6yI??IevwLe^Ndd zsV^GRRBA5HdFKZC>#^?C>NPVrOd{if{x-_VtrUFKK0>->2Tz@q+=e}Ta}tJ$eU zbTbr5=csRPA*R8nRu7YJx0XnaLIMq25%9Tn@O-%){9~0$xfhF4Y`#`e1kI&5T-XDaz zwaDdiveJ@TO|LRks8was{_Guo%6=`k*zK?m&FA`kw@JCCD*embF%bn4G;;RF$}$Vk zG%U6;<~H$$8C*xF9dJI?uiZ~U!tt!PUC5Z(CaVEQD!<~;Cy`d7-)A_Tb&QaaN!Cv9 z=Y`EG8NBDuC*U9TatZ$fdqBCZSM=jZh|}HIV=QTVJm^3iC()VK3Dx{;d;O{p=~mD= zQTKU-4mFzEy4qiNu;&hg=FPOk?`U^YXR872)te%qrc)gw+q5wU>JohSuuCKMrQl)b z3@5>~)?~9HmCMP0;$MHhP3$`yogS^S*o zreI#J1vCBUOQvi?iZD5%4T*&X%|_?7xyjO#6TX8J6`R6DYvNoDD=vE#JC%%hS~k*v zM2{^dU5CfSbqO(xabIF?=jWl#IwUTBe}I*2BRXDXNKmFhtY!Fw%*v1{Kt%AyG)EhB zE5ecM+b?P^IKB6fK#uzey*M#>E1L1yIi@V^RlCNpmU_3 zaPHp2eUNRQk4=}waTz3TVoww{5OYIMiqT3e{xAP)*9O^j-)wRKo=;FKg;e_L{ffaq zVplW2BgI5-Lg#Gor@*f(#CTzbmK(O8v2EcjRuxV-9Le0g1ACmLdxnQZV;quD> z4T#2IG~CX9@N~Wj&;zK@KtN=dwngV8paVxh+|@80X`1KgJc;Zx)x2GzDmKH}e*eqj zoGO-NWX=}Uh&jDnf67$zFi1sfyQjVLYq)gf>m|v0q$Sx3MIb9VmIs(7)1y|qV9oVL zBuYyJAk`m0*``uA&W_L?Ls9`plC&*$R{och9eks%4rHjYzCir<_`f$^(E+ixcm z^q=qNJ*M36+#fWk3z^M!z3z`cf>lSP6Jzqy49|L>Q}iF_5;L|Le^cQXp~$NBJAuOO zp`_+4!9RYR(9T)7#k5&$U-I#KP5InBw%u{`JG(AnMm1M~+(+_<$Z)TeQX|USTC`kM z2vor5(wY^CcF z$R79icC``Mh}biRP+HK(VynVV4LMAJ6iefc1*_Mhxu0}#8W(bk-S^QPrqVxGCfylT z&I6o)v5WhdvJ~{Lsixrb;_XHJBZ;vt-Dt!rG=`wF-tuNUj`thhu7NNCiJl`ip6#u~ zX_(I1+mB-nqCQbN%_D|(a;YlmRK#NT4P(@o&??Uj$rtwvN%$S= zGUp~SS8tXU9wlJO6r<_+s`NZ^;-%F3l!s(~e9)~?!^{PL75cK)L%Jk+hF9qMCTNlE zN)Cn-)%l-5FmQ_4(V@6KeaSy6=9-U;+hHs6-!6gzX{}2Mzo-NAxe$cKiCKwR*}|u1 zjd5h-ch!XxzQ5j|F8?_)Wn5s%!1Hz3j!d6tMOSBwo%pXQN7&<0b$~4eK>WE5%^Vnv zh(2v*K%|2*PG+K`^SO*kp4cwqYSp4`Ccx-&D^A4^4XcPAVQ2lM_E*T>5vANK{_XwKQ$dbD(_) zAfgKc1L5(PZ*E=w9e-dE^g&F*pBXFmPhYnvB`48Ax>l(O-vo1OBQ6@+fNU|+*9TY>oMawZ3;P zR1zFqo%>-;hlyvM1+nL0Aq6H^B0fI%npJ(@weJJVee3Y+4DO?MKKCRjdm~GnwZ`ME zlUVBwe7pyzfBg~eFw-m>PF*?3Ay*f^B!QXE=Sd4)Vo)OkAUY*$)jnIYWPd<8vq0W# z^t=UVj#sa@Y`cC0Qdwc8YYdJrP2%|4{ia^_vXR21Gpr90H~4D~qap{Q0Nk0)&u1n@ zst63?6WQ1ai7KT=bq;c6jU%cL;ck8S7rQlTnSMs?ldQ=B>%#10npwHe>z^N=YC4Sv zV)m)<8Ym^Zj_C+@OGw>6bRtL^qVvlT83=wm}h__rIz_`Y~Dqd6~-_S^J za`Kc+w~9q}=lAn#tIQ^Z03-4lA=s>C^Ph!=otrh47oQWIg<)7|^o%3F?-42X-!AUF zCaFnDyU$fsG@nm%sqAHymDF~r)~mR`7xor5X_}noizL{=Cp0=+FbL;%-d?!aJq}{n z_HQU9?4eki7%&MH6ckE74?osb4LjzGg!CL}X%noT8zgbwK6ZZKViZn&ZCCw^`D9Yz zKxT80ZD?8ey1yvOHRaqVf;XGxIvM!$%Bd9|X&0$_yVK1@7r!aZ9p zmW*xI{^b%k53n=r_6_vW@n_l!AqPD9HK&EgFL`{SXT1 zy*IsmlCC&j4@v6Q%U5e{s0*1fOCExYd3aQuwUDp_t|6%l%KOwAiSa4aZ{7A>S7@hl zZy?kYbgnrU3t#4U27@~KM(INnJ$?rVM?cz)-&;!%i~}kfqgMES9k-Rg$yleyMfp{p zwVhIaJjrNq_C9*^7|+tzZgrtQ>r$y1B9m&mpb8t#tvL33p;f*!oQuFI3=E`$u*p6dp6aHSQsEB zXV+Z(%ATt43M10S3PC7nU&8gCe`JmHcjMtN9`$zvqY?oJ0C6@T$O|1AS8-8LEh(%BtX_L3y074bKFj? z`Rly{qQmV{bn{`m%L^WdyY|X3DLFekre6P<*L(4{`QdSQUleFurdlRBbH^(uO;RGs zas863$-#pb7KOTivXpYC(2X-2wYUT9V6QI-e0{C;U_1gA88K57qqymvfO=fYiC=C( zS*u|$q9^{H6A7E=MGmI%4D$(Egqpn0<=#<~m3>XIH8Z3D*F-o{HmI76y)zJn=yIbw znZgP@y1lQGfdc;%cV_L_-fqJ!FMo*BaS|-&&U{E!IB>Xtj+8EoRK`RsLhDdJ20Oz! zV&QC74Hs!jtONZpf}5Plo`L-8p-pFmK+;jG_WL6qMN8AaDD>@36yz7z5v04NN%z|Z4?8W@eqn@#2 zd(_lq?8)hF&@c*)GWU+%EYu-G1R>_zIm<=ZMgqA%lHXV^MZp;rj)pe9s+S;C3V-)pz(D+q{NtuN zPlM?&o@tXs25n4hD~X(_=sAR=r-TvSb$J&j@95Bkgv$(%$tf&JO(4yqptJW6e#4p> zak!70Mqnh)VFLTmKs-0lk;8nruOU7DnetSc>rqV<9m_pl+^|zF4Q8W=*`_$0Pa`y1 zBgaxv(G}4LBZ?3F=Ber&7;=|g&gF5)*88!WWfL#V6E`F>g0&8caq5erpk`8_tn*A^ zhij>&Gl$kb+?x3pK0iqZ1jvXmlfs0HNA)HCx`r8Tbk9o?vw2Rg+8ZiMd{619%a;rr z^qI0N@7YT*=K8){oZjZ!tZ+ZC8nu3nN2qOk-E^@-eT}D- zq?E1xSUU5*t&juO%DldxuUtPT)6E_0a0phXA?SK(+!pt06Frt(P+baoyM=*)fz64J zy9HvsmWy`2Cw7CHW27Q_d|t6ysSEw$G?{i=P!dlCslb8v;XHV5PiFWC`J>6E@KSBj z@nB%tX2LUd>jT1#m7Jte@?c3$>W!EgtlXXcf3U?x*Sq{Q^`qp<4p8I$I+s1v-GrWn z&~Jal#hTFkW2f6gCgzDjYF7!F!zDT- zSV*Gn>tDV{m1@0)L(wV5np;_?RO<9?7Z5J6a#K+!bfHX~x5cMIr0eN?sOHxk4^g3! zK<|HS+Wa*ZGABl4M2yhqRwex<0t;{U#&2D)sH_^eVBApOdJBm>;dPj$B6L8>;BaT( z9m$bZT~ncWBw7Y0u#2&}g<)w4^eZCP)I(R*c52&@*3*F{fX=7h^LR7Ux~@RipwWrW zj=&v?WRlL2IrF!RXN0ocf<0$QdILDwv!}AO^6&NGc+W0f%zSw$=)Vq3L`I82>g4wQ zQ}_BTN4B2k&O(||QsBUVh<>D?M9HMCSTu3wc|^?23Qv4L*4wBqYOWZ`Dl9@@Z5)g_`xQAqjw;w^u&1%wU>Q~si*wS9oJ?h&)Z($7XuN#2_i*2(9#N;foG(pyjg`$iiB_r^F<5y>*jh#l{}ijUAdH|S!@2-Erp1{#z|ppJq_sUKNY zU{Yr%UDkC>yne9J%R&UcyefJnrG!E0pxT1R2l{Xtnc1p8LR(ZtA)EFDtIIvb2G-@m z%`x4Fz^IWgiZ=vf7r>H!tziy$n#EowN6v)nS`d2{!=I613ILY2@w^^Eo~Gr()X@Th z$=XKkaM)34`dllvwq=w{P@dWHH|)(lwpT<1x$DJZ6fbY)?1j;Z&PJ(Ko0?&~GsA^5 zH{6zYPj8gS^lDM6-s%#2oX!i3vl)Xq3E`(Fvp9=}zp>g~codjrAGNtC@v=WBOt>T` zDz(6JL&F;70!u+q;*Nu<0;>@7$TP`U{X^X&}y3 z^pnpQtAy9vQLR43q?IZmDIw;J(rM&D_)DX(7-%NYwP9;K9vLnZ4YE7W!Qr}1D)l&{ zgU=@EuAyxsVgx=-#zlNfBHki{C`Udb$Dic<22__wMFbPZB~u1_;ZtcU8!GFltoHU_ zR5j6jjX&NQ2riM$ zwq#0A@#%tPBU*d~h3B5-g^kSqa7Hj+||6-(=YuWaMDY@QicgtjPO6k+T zbto4Chi}^p#YWgTd0NrSWpL7#bSFX0s87bNiXu3Zlo~dzI3n3YLu!9DI0@dMSzcQgt@R#vn8fNhA^Lk<%jmz$wfTNrc+o7q%jNhm6l>KeDYIPj0rhF6 z&o`~akCBnBk^aJe_6vN{nEFd>{N=gtniv||vjrt*1{=Ug^Zl-9xS#v|q}e8Va`;96 z=9TQHF@~HgOf8-O-s+NClvy6g4`%0s3~_km213y7S|p#=WTBpU ztE!}?HLZ1$gxhp>)Jj@9Qrc-#A~~$Nn1g`a>#^&W` zPDkZyz01w*dTn8fx?;};`C8A=Xk;>Nsf_=XUc+2w5dYu5Dq=2e#ni@xmhXG|9U$7T zr(*W&6tSBcSS!bU+9Z~x7Rg3aK_+uybZp3x41ZjD)9dKJVL5LCWi8(Cha1`TTwd;X ztKEgd_k;rAa#*g?Ca$CSzjRf#EsjwFSOGcN8n_4p93VBLp;XNBGU~fNd0t84vh%p@ z^7TpNXt5d5749JAusB?I2rA(5Oq&C%l5&n;w$|e-`*VDF2#dpxP?2=CrL>^;&p?QjA!&94q$z0-?b?tk z7CMGrrWz^7$p(0|&UW{o8q0d4)a{WYY3y9U&QAUupyIENqR4PJD=jVPX!^s)UGH=B zQRJz0^*x~YPpRxaEE@28Hi^VtlA^;MU>@WS&&77AbcH6MTQ}>R2xBY zhF@RB$S$MjXk;cPGX-N@zeiE1(ExJ`LYMJclNmdXU=Daz`vQYt)qPY#&i&Z;J9!A= zWjSHX3yZwTd-_QYE{cH6J_9-sRmZ$^7_BVW>c9!H9tt|s=0J`@4eJ{rIr?Mf zA*{G+^>qlo1{6Y5r-(&cCiGFR{pwu;(XfFnR{oSIj>GnLOU_#Jj+b&rxyh~P&pLy& z;|=h@h>V-%Jjs^*+Hva-?{bD_KV?6cnWp;mSiJz0^--VmiO#o>2K+=c(CGzfQL*0L z`~+Xm18e`P%4hvti&;9a_iGZiZKH^2(=9Z5md$wL`Nes08*aj~MQYr(N(wh=ZWc9n z$GvbO8q^G_{A%#ab)sL&5%zevMC3)C{$}@#YT}o0Pluo(C)C*=2B*O!#$|C}gIS%` zrQ!%80&Kwik@S0gXDm(f?+9>vS+O`i%Pa@S_1n+Y-Eo1>1_AiB@T8nYQ7&uuZU&T^ zFyn}GAmTbu&w|e9%LB(kMvl{?7D_HgkNH@^5BrZ3Krb|PBBYGBbjfefq;#q+Pppsf z#9;6H(Mg{&m)G$SQhKk$`K(|ryJ&c+L+^%mqotQ0FSFipRbs|ifHuD#E-xm2=4>~N88VZ`1FkkpM`?KtHp}4 zGV@R+R%BA=)6E3CDJQ(DX>hf&N?1%@ZH`Om>XGW#7SZ&%YWtTidcn*{BtKw8CC0!~1enaD`} z_OJY$Hh!PFa8{EsF+qR-$wXKPGVf#3M$okG4mC3)@xA*88^2psif3d>kcx=QkJk&W zt-g1P`8hg9Ce%gIYG`zTk&?RYX7?u-1Dq+R^Df$K#x}!g z={~mq4b}5~swshJW(0emMW9Jyhcv(vxU@vhXWy=1Xq&g3EkBs@QGy%ERw<)Uk~h7b@$5FT&JHCK2@BbyU+xkzgRaq{bxi& z^C$mmrp`xdZmdor0DW^Pl=5I7b6*qzFYAtqzNLrZFxJNh?>!Cr(StTIsRlJ7?CG?t zOUIC#J{*i{A+qloM7s9+M7T0?)p5U5@>;OauB?q;6#E9JwBI~2aBjFm4~80{?BQ40 zf&aMkz2h2BmWdrmvc-Wh`Te>YcxI}sH$opBHAOj1b6zp_uI(?{YW%!4rC<%$!X({;n!&EBcjrBc$24CXC;s|;aR82GMQ6A?J&Jb`p@0`Z;07RyB7sEs zdBN6aY8^Q+{{DMLcbuM2SVe#7+`6a9`(4mz%D-0<@*jON3jjJULOxv8(3YWx`UTbr ze;GKZ%7ts#xRMQ*?4Q-azw-jqGXrsWHZC6y?}5n#r=H^62!*jojtVAw1%hiMYdIY9 zJS1iEJw7oLvJ9=M2e|Mqh=BieZGf*ldGIqqR6x=&++$Gb(SKa+Kl|lR#E+NdTR<=eqo~F-AGGlJ-!}<3+Ur5YZ!z@%7QZd~KfmmMX#ori zSkej{%Rv+G|JVcj9LQPm(-SCyt}izP{Rk}y8mu}Af-K2DU(-w99~u&qnsa#sXietA zxPgl8ijB2{2R*FWTC!Ob=G-~rEm8#0u`rvNHFvEF=4oN(!VTo&Gufz<27yAIBpcJn z9hnN6s}!;B)`G8XE*UV-mJpRJ4GMIkV~Ae0qS%q#BE0eR2v-jQ!W}*u`TpMaC+yV( zp=};H=G@T;!{zHi#20nzg^+F*%C@>EXzHx&R++oaxRAkZDp{l8zuxpyYm7xg#M$8y zH45UJN<(Ci5p=-{P(-Tsv5;{0~!R6qP@i_Kwp_;32xX>ji?UV$TWQ>FqGiTxoKnV_!oF8z9h zV~`@JIb1Bex1 zx!U>C$2s_AByfy$X1A+)-)DYyHc_NVl1v0Tg9qjj`nv)Qmquf_rQQ+oC=!3g%^jWG z{?Ec$7`5lKp%~BrW2=a{47ds6ekL3!2SKx64xXqX>J}_^)=-O^x-zt+DkMn0x3{D| zF5NPczM=}dKt+A3@y4&e4vdUsjCTnI&-}ZN1*pch!S+>OTo`Ew&ws)qDF%@0W0cc*Tk1HuDG`*BG0>A6uD!4z(7)wnc<$C+#y{fnriFjDp~>FY3R z$rb_up@tn0rT*JJFhlAuKq5KY7mHH_yQD3ENhU4BN~;l0g&qSXJ3BJYG0w5q0dTX4 zauBB*k7o-(TAU~%AKHvtlO*bCV30t-maHgH;T|h9dbQFEmweL94>hK`7BE$quc>xD z9q0>yY zm;xRL6!$1^{M|5(GZ08u&n6)t_kB7W=D#L_?*?$Zt)k`)PqeT4{yt%lt!ky%_3k(v z4CiZWwpW?A%xs2vH!8h7Bpeyw-LBUTwky*p=m+zfoWsVIy<49-GGZ`!%^tBo1`~|e z{^$Lhz^}&*-{XE1jWW~$4-?>;l5=?%dS4ZAD1m*30zLjQK({b5)JiwzLOyHp_M<0e ze{5u8#JQ-gr6QKYO15+YvXIFhu{lJIE~{tcgwMAro%wosujKQ6d3u9XoQaxd$R`~y zT!}RGGxi2PG(dIX%pV#g^9<)yFZ}(V@26VG^BaG`R(F*=KX0oQwtzlO=SE2_OZg<) z^c)_JM%(~*g=)xeSz6l8fpISLv-*N9UMKu}EOQl&Zs9 zVA0dm*e#!3gYPM%aukVT@o~J*d}9CKV~6+9 z0TjMnM>er7w>=6u!nP0!UisBMuUol|@Zr!;Gwx6aX8Dr)HQrjTmzVk+Fi>|Q`c-hP ztZ3B9_V)<_-!sDWhMGo-`|a_*LIDS%wi*8VJoboqR}Jl6apHCd&|_CK5tse0h}Ib) zw`jz35UA89^c>XfT;<&v|8zjxuct#{2=W*gMHQd44ontZ??ws+ms530e(a`S_qL*E z21P13#L%6KIBMoE9*9c^BG?RkHUo^??HDrVu^hU+can~ZVP>*dc)j>C2f}U?2XV5# zhQ4l|CvKft^EfBC%yUtP**lDt@5k)hTs=nG@yBi{4xT5i&!D3gAE7<6^}#BTG%zIc zz@pA|cml^9fq;54rmS%?$OsfotdEEnM&D5T9lCsYeN18xosM7*;${_u&CBSH>!0Nf z*Bo>WaO6OASGEIM+<~+#8?oa*3SfcZ0F)eKEnrgsh}wfnq`B+(MB3{6Va*S^-|v?Q zKth&N$Jp4|z`!8&lFsJs{UWcf+hMOiWN&ZJZObXO=l*lc+gJTz000^#lgrVbE!=gZ zFG1I?g7jagTJ2HL)8RCHWhe-*Wn`o)fKoT=v%p(#tZQ$4%&;o)Ql>&KwP$clv`Uyf z__MUEYTblWDPMct+WNXU$t}7tCXJL89Vi8>6ls8zm{8yG zf}j1Ws+eStN!&sW^g-VV_jXVxvuH1R+Vy? z+*$Hsers$vzNM=80~cMqoy**} zK>*rH&Nz+~$LS5!eOj#izaUCC> zXr$Dk*cxs-Ja94*NHmp1WJd>73IS6HVCPCblG{wvsV^7(b6JYV>bQzL$GyNxc4enM z5A@{X;sW3IaPPJC)qgHSEDi^k*%h`TBPT}|Fe7l+NA03XM1`?G9eGLe`hCPMTV*R)LPg8i1Ot7l8 z5CsbiYbw6dtn?5r6#s$kr)lwCSIFR%aOK}2U_oD4czlRV`L%2P`FHwk!)KTj6BG+X)-Y!?Czh*ZVwM7^$XOmj*RrdSndh^%>yOfkJrjv z^e@5w#^VVwF+pWzM%A?>{m)tPT3D`@DkocQ*+EXCxeWIz=`|5i_mw1q0j?ro-vUI3BWuMrxJapx9A-#gCi}D%WD3E3xKoK zcsR_4P}HV>W+BfawEz8AxZ3{W=IvQ)YB12QlIYl59ZP}uLN$*@Rkh)uW{%8YhCBjFXIxZd4aLjgzpGYz2@L zGieKYb+06OC(QXH(|r8&%~zS0B1@7Eq(vGOy{M57uZ-jw@e%RZ{qd|v&)+C#Y^zQ} zOjF0L?t~Z{f%@z1e!JU`pSuBkFV&Y%N`%#_*6Uy4$Z+}Hj*8x(!z2bQqD%C5vRLoV z7W7>2@GIaix_{i@aTG|c+g?ANeQ}z-dwcFL9p_K~nI@k<#lhJ~+E>)wTRXf?nnlDz z!QC$YHUw$%vI{TmQCDJsM^fH~itx^Gc9w3rx&C65y<;!v?r8q4YB&@SCvJb4JD+Z% z7u8!@z3amFqY~gs);C(@d&j;R)Tbrracku^RQx);lXNu zwJ9m5@wlZ@^ZyxsooOJZs&OLTt}Y6|>fA3LJks5R6l2K)VHO5KKo>KfHKm8ahsrP4 z`iYuG0JFsLfrMR>)u7%;ZuEyKd-q*stRh@?{7*NA%gq*f&u8#y*TCvQwHwx=3#VWUcL=0#% z-Fo!TnR^-S8aG~HPnczxP<84q2XJM6VK6{vg2Ep&=!_=jmatf}>&NdFhqvK}v0ADy zd%nM5t!2I18`r(>?7HPo9k)}lRxy&^dDymtbMEPpDV3`79Df{`AI|8B_SxwPVPAp5p!ZYM^4{5w zdSggn!DTDeVZEv2jL6bxn?kf)5fI6*v)E;2-djitHYBfODYUrAAD=r}8`5!0RVa*8 zR=t~GU-xZSx!Y$v1NfHl(nn}+9Y?7j`>SjDUvd(eRdM??G8Bm%Vkp^)e|IRrt%~iJ zQ$fSR&-uzf>`in0gmvYIGu{KVDm$wD^xv^U#iKqZzjw3Y(s8+GCt;Nfsgq6SW9A~C z9O-M8NSsGR{wOmlnlXaH@%CYF1}L5 zK!GPt9N}w9o@pZ6Q`&jMXL2?&DwFg1w<%QN*|w}r!S3HO6e7^-SWjn5x*AiYH&^1P z&DpBoB`H`CPvRZdNoYn&4WH)dS}7b6)!B4Q=fYFH!ZcXiweR#5{MBpZl&h>H4~)AU z=10F<}HEH5nKm zbb5)fpsYC`(OvNlaiGS=#LT*EY5I`g%#Pj|0o=v7A#sQ!yvy~dQOT4I;sib;g_F+m zrqu2?4+pY{F_)KpBtH!7uOfI#h_`W0H)IuSJlM*#q}yybGqGtoJp;UI#+S1_;roZA z-&%9!lbr9ym&J9NGrt6eNR}yGO|zM0)6&@YSyj`u zFzZCyQS!EJo|H;2Q$HMptK?S<6_&df?EBV0cSJY3tQ<^*Qc*it3HFuwl#sKQ?GE(~ zlr*RehOz%y{U85h^$Y$Ye#v`bxxefT!=URLxbFNxX!&a7@I+Kpo9lFPSWExB6{xD~ zeUne6^_KiQ^XrdI7EfjT{Ldy>Er5LVTYXvP9=$dA z2AuV3z7029=_et@`S-j~Db!5S=Y@ocD#Ef3qp)wJpYFBBS|+nejYEXxg5Sx3dJY_a z9%=~9tUgBg9x(DDki+QIZ+_GjP26u)q-ai(D5*&)U-{TsETT(P(GZ8v&b^w0U+NT9+4rfKe9+ixDyQdR2>|K znkFcG$;$dDo2pW-#7;R&u9LKE^NxU5Z?Opkw6a2ZD2S9kxGlDa)G`q_)94`)4B069 zOUahfY1$^ZdfRw4G!98OIfn_iW^%d;?k9o5Ave3WcMzfk6T$){#c6{k?lN%TOjvZ1 zq-}&MuhbPuu$dHrun2UzVFmzRt@NKZy3Ka{%a6@`XHm)s3DmR~$ec&$- zERN`m6*J$w(p+%l4sm3PZbaP+(&m#)U6>#08O)DgQ~42z-+=vkrR+|dxjTmUYx)c7 zDWmTk9Ler-xondW_mu_sSnkjvU~B%l{MN^*(Hk)j$}_Y>a@1P=+vnreR~ zVuVKUeucJ&d=JHn7=x28h#WjQ29WQ`W<0vFO-q_7Zx=7M+CjsJlpPr&ik?*4W;w(~ z%c8Up5Wv#6(Y+^o$b>NWKk&CFd;Ha31`A2)h8l1EIX_IPtCMt){&=#a3hNgJ>{&&z z7AP9KK-fw;LSftUB5b=Lw5G1}koEgD4ts_Cs=L%Ij1FtIhw9+sJITlcTwgfnw{6K& z@(<*GiQkn3g=t$>hdA}QD3I=tc7rnuLohjY-;q5~3pnIKfn)twQz-;z`Cm|dVd$>2 z5fE$f1s2a7zAVssCK*rbxBNA24`?d;akQ>^zb0M3=D6u%Z~B%gzu^dFW@%bdScuK; zjaSh3*yp|u0Y%K>xIZz)v~Y?q+r+Rsl`AP8g-hpk_fu8Z>n7^%<)SBQ>g~T`^?7!% zW)4S^nC&hBoTS^sPJ--1oOf#gQpK{w*T-P_X{MNL1RMJAlez-Zf~6`ksk{duvxih> zgfeZ|g%7Auxb#~Y9)#aCCF3d?f+8Th!K4|}kXy=*lNhuE682)kzA0133G2A4PZp@= z=CEGHjQGc~(LjCX@FEt$L?$YJ_G>UPFj>n%rVuWxMou00WSC!F_!>MOs3GN96-K#} z{eTse&YFXf4l*dKVX~jOv zHdn`?B!6)bEnf*%Y!?b82}ZI%k;_VBj*}sk7K?9G*BBb9!n_OW+5Q!9uZF&*g!D12 zVYBMwdkV!PaSEqOydZO{s<#RAAr(hze35NxnWl=ppfmLhQ6>dVdTi%aNsGAQo$?nL zlb6Ta_ZF7uXrp3?k5blWM2oGA)=jSA#6$;b)AP1cG}hPVXRdjl>R5~8ku@a^=T}k) z0^tEg4$T2##5^qLml!jCp>+vOs+Fu>Tu=%ry*Q`70C1))1$6ZfF0qLyYKZMlmg2l* z+9ym$e&YQ)Ra9P1q%t^YCS-5IPEu~|YOdpb6Iy=cd}_~~N*zD#zA_sZ*JCJjV5Z&7 zwXr8UcA^==-AyjV)#!N~rya}8@4NNRa%xD{BFhPH+)0x`Ws;OSrdI6Sy#-a!9_xJ{ z_`Zk;W*KTulFnyiWQ$~?bNvGqe@?5N<6!9=Bo1M8O5?(WKGMELWGJ1!UWkyf5mVzZ z_>V=a2nnG(wFE|9R;}C3;7@wkgHYh~E?Dk`q##PLe_@FK94j0C%#rY!0xi5GbJ%TK zl(#!?bi7~3s9c9H{$X% zC=8Rn@5FW;8?$PvW(nodVnb@O+H<3(PouCOVy9adyQ1px_9a!7K#1EY*ewM(Xmb6qWNr=ouW@v zXfJDUQ6!O4BJxsRBmdoz@xrUii>$i>aTcVqCY_v8fVT6ueeC$145qx`$iUfF*{;r} zc_U`F(l?wxv*WelBq`<>t4E9)s{{=ltQ;`$o)SkS?cZSan|4m4!_{sNZHgP0TdMm= z0H-kKmSZ#M2=P#oea8AeH2ATk>-Tr2zM-ChqSnA*fnZ0(yF!`#>dF>?II~WJWyNef z-8EMdUR~lq-)JTkJm1W`s7T`$+0wo|nX7sZLwG?1k<)DBhGfQjyr^mNdJ_dTV6NY~ zd2Pnrw;0UUugaX@p&0s+%lEiK71QVX*ps+%QpA-~CN9NxrzV+r^GQvD#~G7{6pfds ztK-e~hViEb>krM{wL6y?Txvlus*@O@QxG#vy`pDIz8KB->YtKYXXhmEIE|{^vsF!z z)_ZI0FNR0O%|oU|(4H}wLt)?k`Nqm5J{&rbW)rf&W4`GLi+gq*R06$V`E>M{@`7y(;toZ8 zs}SEDG9GE;lwt{lg}m3{c)|noF%p}9&aULlF?-8$(ZyOQ&+jbbde?1R$wpnQsb-1E zjC!;x9$l=%$?Kt#pBHywrpz@AVjzo5Mzh_L_ z4?1YCmW=~C;Z~~!Cd*#IeeE}5Pu6EXA?#sxRNStudn~Dh-h=xxnAgvWOw&lv+A7~? zdtr>%5HzL_?CX?pBTtGMOQ-@7!$xReR&ewc7K|bmra9{+9rPIsNeEvpes8=6N?RKY z_GwdUt0I-3pjpeQAy-J0@rhsMLV!ckdTA_$tc=uGt!n4dBJ^T% zDUhwbcYpiu{5OxSx2ozY`NC-(0a+blC;_dgNLl}7rU~#z4t`?VjW(A0Ac|V2+=#_ZtO~?~lo~$x7 z-;y_=?r2Bu(6>tn2=w$~MB*d%Yi_)Hwhvv*7Ypa;VaZ;+62BzC{yv9K6I8HgrijFK zgNqW_1s6sC_(77z?rP4eZ(bK-8p5sIVV$pjBu$x$nUS=y{z|Q$*6T+woqt)A25zc( zeKz7g)Ye|1AOe)&=Y@ZR%2$T`U#R?&N3ACPJB@y&@&pbwc1hw*<|_LVggQH)%Ivz$ zu==$2-L!Ud61~b|nAr~2oXr9jTK#Y-IEt4hqlV*My5{7_nV%4{-j{%)tr>}Q zvd$mm7;wS5(p}%zyGQ4r@tZ!LB_YpM3?G`!Kq<{MzxQc*#3^+iDXlMa+Zq4-60NHe z-`R}dEiodK&UeMt&>OcLQ7)68e7TUVyY5jhC;z${-5nE#zHfGW=rsRwH}9&-S*1Y= z6^<2$Ww;o>D1JQl;U`^T-nazk>Q zUBr7QiI1@UaL(gntz%KcP+IC-U+lg2iYc^}g+aMvL==!q6&0CRW$JI+dpUh=h32V- z&ykc4jR(xuY0Bd@8P=E`Jao!Q+B@#y~$nb#z|{A1qLLpR9X38=JPuLPAeifGVZY>H2=6(rWZ$=AcOLb+Y}7 zmkmP8^CTR(~-I4dh=`>Vjd_qm95aSn?V9lkgM)RMC{`q@6-rX5{yIO7g zlSJw;J#EBHgQR6xe>)c+}o{J^}sjygM7fvo^*!3d0s*@x_4=tg9qP9zUGhSRBE)p7LI%2Szk`j_GY+dkv>ih zm8;V3SxO~}L%b|buSv)%fPYp4sth6G;{U2Mh=*QR&)97bAUp20wD3Ihdfr#qC^Z54 zfnJbXZRIyNHyj+c=dQjjoDRDsuhy)VgJ&M+d?{~$$lwL0d%`CHulf47X16P2&y%@Q z&8OJ&Ep-VPJLBF^^tP*P@hEGb&NgCN;(<>8kIU&i-n2iLZQAh-_t#$^>)Sp&rtN~c zAI1oO;waBT+|57**}9z65okVL6z7;T8;&tn@Aa5mvoSFKtDfrtnGzkWK{{<}gl2~l|Frj7*{IJ+7j@0`Bqvj*U>~Jk zqY7V)!SUmFD6<~po?XaEY*2W2CW&%-33>n1C!TVi{?&I^v;Qxoys8s-BFU7QW^lre-$OQO2R6Jo2#H+ce*oKM4%AJN z+92@{YB<3!a)h9py`iV{vMF*L56nm_gOPvzT%|H@SgWwM`*JbY)g`F2?XT9qPODKG z54SDDzrT5PvC{xk(4!a!B7E{mGBv0fy$kpcB??nwJCtt8V zp2BfVmDI@E>0V{pR1!)akW-9B<^u}rwf975IoI@3W~yFub9a(MYweoWNFH&~iJJVj zZFo$*6VSl?2n?6}CI6U;&X7V*`g}AQ8JTNyRaMpbytwXWMp;?e3(XG$@Q2Rz-Cg5= zizF5^l(z3Xv}>9xxsnMkuP+GRO+eqgY$Jp&M1q_mtUo#mI{M9#u3cVf4?se3i+LYv zVHTZ|ogOn+rrzusn8OaEn|`-KmNY_gvet_0qj%nBwz;Qeg}Wr@wn~OB74K(Xo-ZX zFC|5g3Fs^Q=p*l-#K_GoEe*0FW(1AsaRs8Gk!+)&VnlBEsXZ|K3nd>4kBgWs5T?*{ z%l2H;RYd=rbcfr`sh=zStCSL{d&u{nYn*0lSgbO-`zJaN>W8(tH|U??W~@1dQ=A%h zDOUF@lj}N)^BT`Kke%5eg+FshbBf=`!tl4#hfkAuKegtFd(AHBY3ReEMqo*d)62S@ ziLE;~A8$_(?BC0zaV%$kwJ?gQ26{D$|*XHaP|4bhJ{HphLUVre9>_EVF(5+7a=md*`w(* zMV$;4cRsX8rx`Vb>$=f~DwKKV`BtHnlDKZ%e9Qet=IX~F9mZd(!vf~+gt;H(H-7&d zkn5k>)}EJqdrbqd@nsF3rvAR1Ddc}TMmGfjdM=>VunO%!R-<}b zAY}Zo#YXqb2_x^zJu&q4j+rvO7k%Z|;_tZ9;5d)R)#L8B$04zS{xA-(_C7D6B&b;V z-6GoM^LyKSHQMhiTqX*Wl9J5K%s4o`SMKB1%PgO6Mn0C7_U|bXrY^nebG+;Q2+I65 zXNd^Q+op!>tDtG2xU+kgMvu z`@j}&r81ReMiz^GEk|ideQZ<+Wt$iU)dFiyL!k?npkb9qUft)-#y^ZcuvwjQNgguj zJYb%&LaD0_IQOVB?kK=*f$@L+JWu$bj-(6=%9Y-XzY*OSKNq}y57qzfsQ;!{9nEAJC4#D*dSmDDF}H1S zjDJim>}Mf)x0bI0xP+x*qXL++TyUk}6195BF-K}*W-df^tnQ;1UzuooJc0^p*UHc| zX03~*NmJt8&7>)K&SRi->-)>`D4fz5er{)KJ)w>k6jD>nL0TH@+~JW<4xu21h3dY8 z_MZNVkihPdbIMmxAxEWB08xxaz?&1humDxV$ikIP+0R3G*IzX9Mm8iRW^zTm&d zi51Gfcm2@4sD_YMrI(5nJDF5bveV%`xAV&`681LRFD*~qhWhdC_Vfc!sJYFDd!X2s z_Y(;!V8SE8H`oyg0^;C1*mnqAY55J9EGTN#PKytMp{gxegw$FB0!lsKDXC$gROO>1 z1$^a%B-WYMxj7kVlZA%rEZ4j)0k-=TPUfrRq}9~7G$t2+fitb09^a2*0V)wZMx5#9 zLqmo62d%O$dO+m-U{!t*o~`&p29q`^Nenk=zfCYFVCZdnX z262}zD{kq$S%Otrz4zPF{KRNFtyd!)9=pwjiam|hS;jZwnwthQdh-U=pl!fXS*D;Fm-=+lRq4%+g8J}Rb8xu zPWL-<^HLnf2v7>EMYMEwQ5u)400;iAM(r`qcNb?ZM)m`rdOA8P5iNm!bIzxNG(I;L zyXmibj@Nh``zeKXp+p2ug)Nm^WPt22uQKXnz}aQP>|tBBNxU^j7=}ysS$l3u3{F`Z ztg@m}Pw95?l_@lJ*P_R|_NSEgelGV}sr^)PY9lInC>Ff+s5&sSEYA`KaV~WAz)>3U zcG6}CqVCs~O_7>zd}AIr;pTWTyPoZyhwSIaJ?|O)ut?-g^4ZN1sDZk-&_PS&xZ;ql zc7W->M7_2KiA)rMJ>3y)b;hu7-po`OLf3|6m^S1-*GQNy$Q`}cCcn4b17WUE5!Q?T z(?n2`5B@VQdQ38Vq;T+Jmj1?;FG5Y~7cRe`_j%@aeG`c+$Op9WqvJcs9m(6B248~B z*b0n7s`N>lX;%gyzTLl0J!HM~2VyI?eOHl`!$MwXQ622aP%uphryksseJh0z#lkjY za61!6%hjiC^6oWh_`SdG^Mi#JbJLM4!W5fsjDVYb!piaC5;B%5I9oGm@Swf5N-zUo z&~ty{>1?PVBaJ0pJb=-J()r}w@0XWNS(+u9C|{tgUu42pohE93Qs~>;o^`4g-e9yUtcel36JK>&L%RuO>p^yq)K+44`vo?4Wg+^Oq5q~&QHrr`_68gVw zaE|}Lv)iw)>dHi$KT`o=A=YFs}As9bu8;;~Pa1@N&_-=ofcO>so{O47fBv z^`?`=CLuIwMM}j}E&{ss?U3*_MH!`<^?lb%H|DyP?{wupK|xGdH5&}~u@b-h43=nq ziqui5UQO~6G)%8Fe8C4fG0;BAC4vkuWh8Yx&qUGAj_%e__r`(;D)I?ESg66ZyN&co z{RXaQE(@t93#oQa7BR}eZtm89wP2a)$d$~jB13$)2XU~hRLIV$y6bj&{O6D!?zW3>8xk|pP)#i9&{NkgX z4Ef)KH4j`r1$&apa}QdmM086C-VcJ?(7z{`6Ztl14v%?Ji?pUyd+NY?rIh22x7Tv@ ztm|XtimCm6#(afZF+^ydW{m?fSR?Q^bD4~GxHsR~x(&91;`w{k&UhmUuFA7ivM#go z_1#CM?8QjJ?C+<3a%J*Z&Sei;xb`c=1URqX0;r@6xoRiv6-IYH2W-#}VHm6Ms{TG}5! z2!ntliY!l+fQy=~o*~`y-}1GhFD!}}IK3VQtF*CTXy>Kx&WPPGv5{P&d!JcOR`Xrb zFh`h~(;DA0hn?t5MnSdiH;1iHxB5t#?>|81>+mL&t3@6r zu>*v|r>o8FZ@)c$F<2irIO8ujV)U{=P=122HE_PX7rZz^+&7lbNm08TQsf-PD;0{q z7Wnj#hTUv6%akEgm!;6z(p zPOX+tHJ#xv@1A|-cDG+F`J$Z6HliXN1B2>aman>0>kRhzJ@w^xEX<9m>b7+q#x^v5L$lX%qQ-!XuAhr4 zX)x#cL^PYPM3#OqW2%Pp0 z@a&CQ=+mTQ_Z{N^0&?4IwraNEhuF*c3!<#FxWM)$NtyXdo7a;yp(nSwT-(!HGgIDT zzDs%>jVcbEs%LcTRwVbf{=Ui8<)ucHyarp?j)D_W_WkqKI0Zi+Hflm^yHp zvl=~3yjyk8R0=I$ee9$VtyO6_jeKj<8zN_-K3 z(bGc*;Z4%#75=;^y*{0Si}w=OkLrpUT&Pev$Zw5uQ=^>zd{coj-Lqi5s=30sd&**= z63W?qTDk;U@<5tf^Bfw@V)Q1#pl~7JYJI5~z=&I(A-KHvUz7*y&R9NgrY1y#Qh2{6 zs#9Xz(JZkh`gn*#Z~r_d-@})zzooyf`wGa$uyLS63_QEK~|opvW%Z<504 zbDP7;LdwgzLBm7kNsYsbUH)1q%#V$Ookj5RS% zt`AzGJm7QtG=z<*TfG$Ch;btn)1kf>M{?S#U8u`!J-Y&e>czpMu-(a0$h@0VzKlVx z4KG}3nEx=o#5}%P5(ZT*CMufEKgx_XEgV2Jgw#>HP*Zk}GaH`DoZHvPvK%r!h#bA6 zPt*5J!ZG`i-Mlh$$#T%EMHTE@YTtPsJ3OP)k-wyj(-O5(K>?Q3(oI>wRs*=@I%ulL zye8^+-OQDL_+omGk-;U7+K&7;OGvh{iHh&=@W~n9LnG1+n0LH{YL^@tPe-lxxNz*i z>m7|YK8$(Qmh>)16Qffq?tqzRpJ?CQiQ9Or`R#*4J}RBD!NVmZWbA{`m`RFor~LV$ zNeYU3X`e6lc+r8G2@0w(-R{EIxQWpdD$n{(la%-Ac5RP8VHq7gouK+mC6P6xQ2qTq z&I&jKLAG9!S5Bp+&SBVFvDyzEF1XiN-aWSDtJ}RB9AXC{MFi7sn{xeu$J>e~-QC1P z{l759b^?e*aRnvpOzm{ac`I&%?+eDF;Lr~uw{xm7V#)G=_BTTbrkw0FU|wX{gzj+o zU~ij&aOOt%Ukmk+!e13t=b2!uX|*8vfXlh@s6;<6B1`6l3xYOG&EyI^@>bCo$wXW9 z-TEb?)$M9YWsoS(L4*fMvOxAyszF$F$$vPGe){A94;+y;206h-v~w zT8WTVJ~Ne&Q)lb>+NQi~D-;Xpu&&eZNsUn8BWG%Ak>?mUCod2=xbn|bUQ?RDVf>cN zkLjtOQOs@Od%Ks|AGDX^l(}?9HfN8lY_U<&ywP3#ISQWoIw$2yLDWmC+8h>Ev1kTL zF|d1sBWZf@8x__wNq!8IIc2hCkga5|n&<*!0c4w_zI!J8bCP^n7wOsdQ&=t7u_6q#0m-_6}7r3#SN4;ic?_Dm+4Oc1{R6pll zxRNr?L8giIYoOpf*#16z_@Q+UG2ITz$k>#yuFdt6`9Vi|XHrPI_{+5|On`N&2J^is zG6BAU9Q+{hhfa|IEw2jEgn0rgA(HHh+pB%e8C8qBP6T^&G)e)$lR|8`+G6~PG7HmX z_`;I+EeyYd0ZCm4jHwBEjhVR0 zF_eVeBQX2VEGJ=dR4tn|Q1&vE>RAuOGQW?yyKmfdI&N0U_uwv{sW0iG)9oInTpfZu zeZ<5o%wC-kbT#UJ>DVc}PRX3}L%`5)jJx1^_o2UX zxG_*HT^Agt`E`TyOwwxJ3F3Q;Py2EJ=d5k>AZZCgFSa$DUGIfC*Nk3djrs+iH6mjXWZ7!* zQdyvGOg)xTHFY>bCQiU2t1wc&-4)Pyquh%hfYQ^9UC|7*X|IVgxo;#7E3S!Z%+$op zG$<)4-;D0YFV4`Q6+>%m*cH9-n2%{mZgB zYYFZi^0?WCR7{zbJC$6SvFM)3@ORtO@yeE6e>qg<7%UMn{$9Jh)4kMMJB>Z1n3j<` zG3_)`5?$AeeXo4lWm^l3>)4aRvXi#or?b5?wyw`2b&8eCaLzwAn>DerixN}GdA4l7 zwQ4mVnH|j5{Ho`G$DnAN+kD3nc)n_PcUXp=Wt*}I3GNVhd))jKEM%#cG|=@QZzid| zCYT-Cmq*V}eT(CTxleUVZZ8OfrrD9PWS5#K3i4?KdwBCujThTX7(E~UTFxQgaHeW| ze;g>yxGbWy`21+4#9T@|&99n*^eew*{Ii3XD9@e~3?aoc)!Ey)%Sy}nYj3}amOu#t zvNRv%1~cIB&1byby6^s6tI`$I4MM!owQ~uSyUAcjLiG=8_|Ho78wMjY>+o(w%loHQ z{%>pXUdmTbI$hpD^snpC1X%3X;Utoo|G9Yo$G>`#1LiVb&`A8h4C&v$(ZLK@gL_|e z?qAJ;5A~bcw8@MX^50Bu1N(gtEAQ5T!e8Gc9h#x!fZ4n(_`loo|9$B5@ORyyG&@78 z|MB?$c}3sxKW~c7sw(m~s|L=7^`yr+s?h%WrY69fK8Ay6vi|RV|NADxLjnwz37$I4 zU*DAZeJtYK^;nFHObUnmd1#Z<(e2&3&)I5oy=uA6C@KguCCTBhGhRiz5mvDaH17h3 zX>t{p7UgYqz_D}GoTpOmSM(rO*t$)vzdZXdyZ5>Qtb}gc(STmj!303%XK$`?2(N9k zztULEchPiI_HLf~=^0(Q6`ubjXW{#G0r3}A0CME_1)L3_x5NiAY8YSvr70J_mbaWwNlDYkDEr@2)B?#=mZrSWQ`)BomZ-Ubk@IB~CCWjZJO z+knftLx541JOH(=ESu5#gch&)R+slR9#N;RX*XRZRI*Z&UN$FAM?hCYgkE5s)q0++ z=N|tVT)RhuXOcUgQ&#}A4V{~0dRPx|Oq0arRMa`5x2~<6nD@^sK)?(g>RN(OAEe#v zn0oiv^4rDSZ}=v?q+F>HMITg;*!B|fU{ljp$4U-}aA4f9?;vAw+LP*okbfg%`R`B5 z@_C&u?lE{hh9A(kUdN2cEu|x#a^Iii*+@R=*gpMbUC5#My(=nB+Ow{)nfLAvf;V;L zb6?qq#r*u|^k|XIr@B#i4?zMb4Rh2f&_aIVvfq|y4Z30TX?5Evic5?XU2--J6NyUW z)u^*vY?eKv$m6pErb0EcJUk@#(XUOed2PaJ%)+5Bf>5={B>)$wqWqJk`{~1iyIYL{ ztP(_dR4%fSDS&#W7xr{dPvS!2LTIH#3ZECw_|bfMze90+si73A8ILs}R+9A5%WjA& z_puQO6j2G18G!Sx>hyH&|L^+%%8`dp7`Vlv41knFv8*657Z=xoV+`K7*LWIlV%1gx zKJ+N6*43vEI;YF^<4*eTa`87OGKB-z(*SN`my69_#q^XWqv7~QhrPKX&EoT6CW^l- zq3;5pCb~g!)XMgX2++`WsrpRXYk>y+k%Q`3A68iro0fWpx3>-FL;PaL1_lZ=&qCF# zi0a{aBgo0gUt6~Jgmz8X{&B$G!+`(j*h&2M?KCQj62c~N)q0+Jrc4`Gnbl+*#A|j* z>ZQLyQKEA6>h%QMo(fc5PnPS&qq1C9y!(NI6ioXk2ZjF;+5G#O{KNqc##^FJx)TJC zOZ?VP=)-eB7~7MpBYdKMi%cSx5%*XipWU>!P%8oPNLek9R!H=4Z?}a!ox*SPT2Pto zSre!Q|Cg}<`?8^MPXDv6aABFr^WK*B>BMDSKf|{5C!jV+r(5X>*dsKws$)Z;38i+3 zm&fgBGQZ75R&TgjeA$Qo(%gSEn)3_++u1qGDcdE*>9pjBm4Fb`czWGPj@<-Z??#t( z9oJT%{bCY5ix+CIJS#7 z++QA3uEXPxOCOIXQ~c)@<_o5S47hZf653yX86mXuG{7IPRT~Zd^9Z74_^ST#$1pUo zf0@L05G@qnE_EhplYn#c1c5v2)t6dxuU6dlrGoqIGg`_v-A>0%@BFzQJH*ErvjmS>r%dD-m(IL z_xV}IxW{b%-NqH%XenTP?pVsb$1F*yVrfo%DDC#FV!s&*7mcGMWbv{O8|;lI+JN}g zGjBOS6e`U{(Bsz!hkmq^Uk=n=nNTKU2bhV;2N$JXKSBl3#iA01!TvR^9sXc#u+FZw z+3IKzl(x$6dssA`+vt=Ff8{9+VIZrHo)yAjL_ZGiD*ryDYChLvm{rsqxBp7+n`r#@ zQ9PYhb1aD#TWsB{O%4BcHVm_Gp{hC%ifZw*?STS?Z{qHa7Z;i4R4koYl34P19si8p zTGMIs{xL(ct(xof5%ZlSjgXYX%!K9;=lBMFPm;Hp{m-{?rT{wEw`$lRn64x;xJ<#e z`jyrW6x*_w{SVm#Or}0YJ3qkl8E)|zdNMbYA-*>rH_=s$Nm&1DBhl_~sZV{RP6BMn z^OG+QXpxvlxX@NGWLzoBvqG#fRPM&J3B>@z>>g}DTpiVxG6*jFqGd3}5L)_1PmQ@t zum5YQjJSH^G_?hVir>?bQcAF~4ueSw^cQJnWAw!NSmthgA~mY-$p|OT2~KW2W}1mr zNNUjX)7`0qlaaA{=H_}B(BF9u%tZp2IVCGYnNU?>zGq|LHTy1sIF(teBQzZF)?a+L zb$PQHaDgnMle^^%1a|L|(CGZfKyu}M+qvWVviq$5m(#Cq6_S4UjyUd%h@jHH*^)pK z#y7QUDMIg4p1&CI*KEKraxV((lK*1Ba(NIy{yF_4o%?T|c8ap2!q|+*e8u=L2K>tm z*dvCofA5X`#eg$dU>Hi&aiVbQ0qgq5EzU>-NMV`0?i;WE`ExYN2w;zh|HonZ|8*$7 zQAgm8{+XKZQX8pcXDJ+Z#Oe5G42qS zg)vC`#lRlFS-gd!)7%~}IsmHlMaso&rW4=p&wncyDO0%(s?zDrM{57!%VwR|nyz*k ze`c~-ZaNvJZ?{~kF+1~ay&B$%KG>5<*KBdMGd116hu*rPorpK{00bIVi~|D#W{OqI zwVR#Msg-|L1BLPD&X0tYp_f3rF6h-*{j3xOi#%nz%;%A$;L+nf&m*;aX7-x3S)1M6dfKv z`nLPcKy~`NZUbCP%S(Ize zJL?>2kawjf&fY%O!C5p_F=z|9ILToqlM@8#t-dB4dk^mw! z_w|7%7$}QZ;C%)h=7XDC@aK3OcHvaekO`u7)lesi4F?c^``__n@fb4fa^WE)mCB~` zMI^J%U76$G;ss@uoMwrwjY7J=~iRDe!b(lI4poSbC{+##O8N zG_OT558#h)Q_owpyC zZvPpn`Jv~#YN>jh$LIV%QL*ixR2M#g9a>;8V{1CQ@LH%8ZGj_#f`?GCOn`txtv8-?9S`@oRC*~zqoT6 zy-*w>zpoTiyMaq)w-IYH8TYiPIp9pn5Fmn(J2(K+x8-IV`b(0E1z_>m?+pExdOGuY zd+Ja3`E>dHYoILOiy@KZl}=qjK|#@l^48{NOMPfoJkRA?^lY(ezXDq_9VaI?y37U5 z@y|?QIfrUM2EplYz7vv=+xr*@ARXZ?UIBpJV<2>e`(pOorPpUFtje3qO-83b5{%Dz=g0XnP0biWIiIJk zdXy}P4RqBm-px(GVi(Z>i?t+hIc5Eq%h@LKZAMVPf#kJmS{vGpj*F#~gYqk4Et)G= zB}67u8ky_{0nlJ{4AF?jKM6uf$dH!1+7%FNchZXRs7tmYHH|1x`epx!`t0TbT?GK( z4BVv@buV)hbc6yvv(h;MTH?X>httTx;_xCfdgUsm8r8<5T1U89yxeO<@;{I%7{=5b z@q-`r3q9X%$dl!Mk;(J2r>ZtK4t4h>FY_cpDN$Xpt zQg+N#LkgE+@RXu2DSWfks2YbCiPLCZlyH83-T9kQYcD_5y7d%y{lZ#C&QA*xQIB6e zK;luMGLfB8*e`TKOT*{T@yn>1RWm(K{G&T)p z>NZio0|xwy^+cJZzQ<-_Z2~Et;#mz*?#2}RCEl%T2e0N&cjr%by`Sb%&Lx5J8=a9zrIkb0NZc zG@SD&7$71AT3r{LJctt&mNVRV1F3bh{kNbsO<6~CnXEYhI=rM4=KdESh61g`djN^* z7G<4?AlW!_!F21MN+~xqYf~&P0Q%lLxR(EbN~-QatNn^os8YOmekzUIDj7aL`3oLp)@ z%@cgHX8HkcKm&go$bBh-!i(INrCKynPvb+U&ec@G$U4%<64wi|vlJB|dp!{Z^gsSB z&>!x;wt;u1_Cv73=0=86xMR!d?k06L2ZwCtPhHqthEe?G?T6x?OpapC zGki5c$eRzU4nbPScLoSd8#bdKz*`sFhHKD`LjbNJ6J7?54?; zY85Q4YP?Mg5QkT>*cb*sT|EV5gHY1hgr*F)bp~!#F7k56%UmVIZn1yo<=NhDjo1l^ z^!x$TD#Lf#=d5M=2P0kR&*G4&?KyqbTw^w-L|l6Yis-1%ZMY;7L9?M2{RCs}2Zv3G zDpH@71p5=HuaJnLO6}=k0jXPUmkLjtAnRM|-T7J-D!o;Zc(NBImm!07#(NfxOPd>3 zH=ZC;;XUM&-@rOzi{mW#E$Vrm;HK~G5s~th93$s#v;1i5w%~T`U}AJnesufZ^~ols zxpG(_m(ShJ*^sUEx)nG`p6cZBCfOvOQeN=gE$-%kak{{0cytvJl}&~S=dIz4CwBmm zVQBm9CBb||%p2@u!4Qs=0ibAbxnU36Dg%{;X8pMA!r^|&MLv&A2@U~wCL5?)dBtk_&fS#RjNSfZf7+Zy8K^!_V_5>3y*jDYl({%= zw9o1G_8yH@TNVflP-de~^#=$QYu<{eSL(V^05v?6wiWkNu%g;ep+hN@j=E4YCxHa? zpqH9}B+A&z-j*KKa0#kaR41i9wDy1vpmqA(x4(8>Z92t(m{#*Y1w^O!XiiQz6UdD= ztz2({xCSSz+lABBpU*WlgY~acy6<62@`P-oec&+gFj9TXw1feTU75ro&95bfdz4C?n(-__N4tv>#_?Dm7ia|t$&WB7h_1f_E;?F_YvcKWkn4-x()pP4Dk1z_W^8AX(K`&0HUNAbGT=%T4nA?lZTQoFwrY;K zNv+tZnnfBso=k1Pv~9Om&;N!1iMkqX-kYQx%wk*8i}uu-_8acDKE^=$DM`Ev?;vhP z6=6h=n7j26S;xPr<#b_Tt1*SJyeVdjBpHu}WFz0RWP@4X*&@JWNPGydZJGEayVq`sy4|`U zb_{n?N=k$+ff%eRu9-3EQ@>Cpm;8ZlPFldbwU89N;9Y`zWvH?;%RR&b>bsEL1k)zPJddCUu-AvtlpCziYWP<6&q8=dnej3MD zJRyFTqZlvJ^6thIf&VTl8s+lsDI?3Xd(Fu=+joo7%oS)67IVj&EP;=Wm`z?}MB8mu zS+-q!V93#PCB!SX=M3;3Ed(6(;`F}6Q^$JWQE6>#AOd7w*uTgwV|!rJaPk8ZAM%qu z6?fV}S+k%U(=7u(emFuh#xn{C0V%->&+-%%e2jLV_3a#P0jrfKd*<*!m=KThw5R*5 zSwQ-9Vo$B(_b%8mS>^F=LGk@~p{AQ!+xZu_O@d?eAGGO7eZ=0nEHrrdr4So3ro;%< z9efkoZC8Pq64+z_TYLce)D+>hbr1JvStGDvivw^FqfFITVo23uB&)iiQx%KAB@l!&R1_ii637rUJtV7bjZ2c@ifo8 z?<9h1%bRD~Qp8B?-A9kR2(#i>)U?E(c6@>tN@eMqZkulaWtz0N44{)TZLFHwWr?^Q zFH%%i-hM8%s_z-DMh+sCLy=mOe$teZ`sjoqTn95pEDjFk6Y(l{(#8eG4Bo5<( z&?b04@6S|XKEHnF!5`yDR!nob_i8^6&bFP77-h482%R;FJ`}~`(4UMLeAhX0eX2S& z(UWKhhZ@&7tX@_<*DWi0ofjb->!H^Ke^y8zeC;u-jaj0!gL#{~tUjz%w)%Wi0bFlbUM5nmcKlr+Gm%JS+Plom zQcV?6fwOxxPFY$?+Q-8$nK(GqG>6`BL8WM$pH7=^LFqnt*=k`v$BKs0R|OR;%mMg5 zqk^Gf8cb2zI?Zl6HC%oD3Qs<>xGLs%Wt5o;p^vIu4K5&W87Gkar-C2AINs#rREqN` z2VyWuxw9^g2))`)STM z2HIzT4YV$cZL@V(PG`Ho(nYzx` z?E>*sui`X4>oLoA)d!xY+sRZbCN}KLlby&L`LaIlkF|n)XPCfC|0)<-=(MiZD7i4I z^PaE3u+to+I>k$InOK@{ZTNuO`me6mW9#`aas+W zW+k_R$E=pGY}nx}`se`!i%+M*c_)U>bc`j-Xa3S)Kq;7-O z?VBK3QV1bO24k2rGnW^FY8Ik~eKM*QsD3NSV$T>5mUKU_o<8sxT-Z>Xn(BW}qmBVc zE-7+NmDc@v<{0&+3JRc_&wYxStPZie;%D=2wR_Abk&@4C?W-;oBfAT}8V8m{0>SDM zvOZ^O|EPPNj!sJBDy#bhSz)Qr(vzz!|MgqoVd$t$|!F#zE`lpXq`QdtB^NSS|h;l0y z3jlbmfGi9JMH86M{t#Sf*fCrd_46KK9Cv)mAY=6WhlQm@qMc!UhZ-1-juY*P9Fk5J z9s-5GCGU5~L5%Y7cQq`5`?v4leZ?bemNONWN`@Y0Mo5CCC)MWfy!Z%lFTW_hoK(n% z?ePAng)L2J)k480*K{Fi&iXG?hTj|@j-eCNfh`&@Dp@(7SV3}XzfL0vqg6;Z&wu(; zzTA99*Z|0A7Arm|%WK=&^xU0rt}D9UgB?RE%&cbNvDAUzzg3bi5W)v~<9LwTAm<7L z)H*rqs;12K=YI6F-Py=T*TPQI7JFUc1Sgdh571x6OM0m9aFGzhyhS75j=!?HfB$~( zPv-CyY>0=1K5D-P>`E#pk~>Vk<2rW<0C$+bKGHCVMj9fa%^m-DwVi58xLn$ z!bL%XF#lHz*rUjX?)S%{Kpttu|NG#(nIK znnAY~@^xIsTZ9ocdqL#!L|KvMHmZiW0*Z=C1LD09vE2+OIP}p=8V-f~RHfh974X=K zHs%HW5jLeaomX_ulaF^8pzdI^GvSj zSt99Tvu~;xznSE=>&_E?a4AL^+$lVPUHj$$i1bCn^AJj=)7woAGYt{(R6%!PQ@)); zyseVn=UVlPDt41mJq5_KskYyFS1UKf$J*Y&vB61q@z`#;nYR0mE;CYBvPs%F6nB?9 zys)eI?MI$B^VcVK&Vw~Yg_Z@REE=fsy6zcm>$m7Kj1~xLx0cXP^91cuRc#zRcS+-* zXQwSEDJD+SNaF_UG@n_N*&f%n=(@}HDnGF`#ahup4;ifeEUv-cqCM0@Z>^#eRr=yj zt<6`SG^8-=1fZ^s16v7HDDs_z{9krzI5}VX@8alajXkibJiR7rsNFMQvn!=HfQ;=8 z+V-yjiWkON4KVXIAPtTeNb0O!EGD;VPAe$IRSyVQ>l=H77zKd{WSx~F_zKmtU;9c< zYY(_J-_(sPJ1gHjoh*e_xTEL!L5lp4T}o`9R1H&??Mx$x!Lm-)*}D5Afhuv( zvYTPTCO{LS!t>s$a9eJhzqg#gzTb8RFFl<5M`g&G3ifm@`7kQu{d0UR1_!LW$sG9aXWN?7L8#TZUupi`}yr z6y4Ezx@DH%ms#;9yT(vA-oNL5Yt1Z}=;|{3XuV3%H@Jmk_{ok%B|3XZHP2%5_bT(N z&*RCupV*)G#g=IWR#dJgB$-cLgFYJV1v>>-#k|hrlb%Vds>7%8{hD!kkVI`o6HQLj z-~FuYr`r75HW$llWKwSpD~@`ShRyqDvZk@DymbGBU$&P;%)5hr@xr60)68f z1-)W;LO4ememTh)+mZcb3Dq!gxh%ENOuHh4*ilYtwkwhkaa!4jFMD`zg>(kHBQ4*! z!o2;nGnJJ%1D2Npi)*5OL`C5$9{z^)0@4z4-3D;UVw=VMQ;avFQ3Yp!7y}S9U9X}{ zQVEne2m)0s2K)vbTJ5(m`lzvSpxwqW32b?KD!;ReS?7jti%S{!V}_sOZ2&`);Q%|p zcOH69u^|7i?j}|x+RMsP-FBi|cvB)S)Iuons~x8@j*ySa{DBphVF%Mp`4-*?!9a-7 zcSn6rQOqAY9qSlTm#aOTzZ&g9Csb3M7StU2aKjE*&WL<@t}AUAh1+-B-*s(mK0!lk z@fg9bUfPeU<<@arNf*t{;*nA^=74uv0Aom`peP7PY`FLv8#Groi2zt2HtwZ0tNeT3 zB6Nk+g86twqHuxvU;oe_w}=fJL!xe9*8VNZznaY13C)cOig~``KEbwP`UmpZ8T$6W zJz{@+zm~7FlG7x|q=S&-$TSJn&}*z2Tqzl-M$y0JYkBaQ)9a_^0>F zAOrm1(fd{Jzr8>fd#2tlgbw0Lfvv`70eTUuT3uqF*#*ffOs=Tu;&laovD`L_&FEWJ z$4{gX5{oaH*mhT(b3akX| z`W^!eilXN`O>QnYks}tqD{b=z#Q7!9d{Q1s#M|Md1=#L2(KTBco!{Eqy>-#4jw|W* z(FYaL#volwY)L6B4@|LSqNEy~l)!w)yE2MkBBehZvV@Cv-8i-X;U|&=KlsX+!P2;G z`k#3w`JiJUERw;95F0}sV2;6-La{Xh$D4v3L~_stnm3f%w%<6eDXb1mAJh`kEy^z1 zhLz+Z9^78nhDz>SGwp+vBdtbArmBpiV}4|rk2Y5|>r755(ynHrna?V%WdFqk$XejD zl^DMWawiy8=qUSJpf!f2gXmMtR`0^!O@Dy30>GLp#uc1D6Z)Ui>A!td0wSYo2eV;7 zeEj#)*{^HZ37wddB|$ZT#a2|6iWQEq!jNN_-P!mJ1E|JXX?Bgv(pJ F{tt@NtzQ5D literal 0 HcmV?d00001 diff --git a/examples_deepspeed/zero_bubble_pp/zbh1_pretrain_gpt_1.3b.sh b/examples_deepspeed/zero_bubble_pp/zbh1_pretrain_gpt_1.3b.sh new file mode 100644 index 0000000000..cf5705d973 --- /dev/null +++ b/examples_deepspeed/zero_bubble_pp/zbh1_pretrain_gpt_1.3b.sh @@ -0,0 +1,367 @@ +#!/bin/bash +dir=`pwd` +############################################################################### +### Main configs +## GPT-3 models use 2K sequence length/context window +seq_len=2048 + + +## The "GPT-3 XXX" below are configs from GPT-3 paper +## https://arxiv.org/abs/2005.14165, choose based on +## your desired model size or build your own configs + + +## init_std is standard deviation for weight initialization. Usually larger +## model needs lower std. We used a heuristic equation of sqrt(1/3/hidden_size) +## from the MT-NLG 530B work (https://arxiv.org/pdf/2201.11990.pdf) + + +## We changed min_lr to a lower number (1.0e-6), which we found is able to +## provide better zero-shot eval results. + + +## GPT-3 Small 125M +# model_size=0.125 +# num_layers=12 +# hidden_size=768 +# num_attn_heads=12 +# global_batch_size=256 +# lr=6.0e-4 +# min_lr=1.0e-6 +# init_std=0.02 + + +## GPT-3 Medium 350M +# model_size=0.35 +# num_layers=24 +# hidden_size=1024 +# num_attn_heads=16 +# global_batch_size=256 +# lr=3.0e-4 +# min_lr=1.0e-6 +# init_std=0.018 + + +## GPT-3 Large 760M +# model_size=0.76 +# num_layers=24 +# hidden_size=1536 +# num_attn_heads=16 +# global_batch_size=256 +# lr=2.5e-4 +# min_lr=1.0e-6 +# init_std=0.015 + + +## GPT-3 XL 1.3B +model_size=1.3 +num_layers=24 +hidden_size=2048 +num_attn_heads=16 +global_batch_size=16 +lr=2.0e-4 +min_lr=1.0e-6 +init_std=0.013 + + +## GPT-3 2.7B +# model_size=2.7 +# num_layers=32 +# hidden_size=2560 +# num_attn_heads=32 +# global_batch_size=512 +# lr=1.6e-4 +# min_lr=1.0e-6 +# init_std=0.011 + + +## GPT-3 6.7B +# model_size=6.7 +# num_layers=32 +# hidden_size=4096 +# num_attn_heads=32 +# global_batch_size=1024 +# lr=1.2e-4 +# min_lr=1.0e-6 +# init_std=0.009 + + +## GPT-3 13B +# model_size=13 +# num_layers=40 +# hidden_size=5120 +# num_attn_heads=40 +# global_batch_size=1024 +# lr=1.0e-4 +# min_lr=1.0e-6 +# init_std=0.008 + + +## GPT-3 175B +# model_size=175 +# num_layers=96 +# hidden_size=12288 +# num_attn_heads=96 +# global_batch_size=1536 +# lr=0.6e-4 +# min_lr=1.0e-6 +# init_std=0.005 +############################################################################### +### Training duration configs +## The main termination condition, original GPT-3 paper trains for 300B tokens. +train_tokens_in_billion=300 +train_tokens=$((${train_tokens_in_billion} * 1000000000)) + + +## train_samples is another termination condition and also affect the number of +## data samples to be indexed. Since we want to reach the train_tokens +## above, and data efficiency techniques may change num tokens in some samples, +## so we just set this config large enough to make sure we have enough +## processed data and don't terminate by train_samples. +train_samples=$(( 300 * 1000000000 * 2 / ${seq_len} )) + + +## Another wall-clock time termination condition in minutes. Set it large +## enough to avoid undesired early termination. +exit_duration=30000000 +############################################################################### +### lr configs +## lr warmup and decay duration. +## Original GPT-3 paper uses 375M warmup tokens and 260B cosine decay tokens. +## Here we increase the warmup tokens to 3B since when batch size warmup is not +## used, there are more tokens per step. Thus we need to increase warmup tokens +## to make sure there are enough warmup steps, which is important for training +## stability. +lr_warmup_tokens_in_million=3000 +lr_warmup_tokens=$((${lr_warmup_tokens_in_million} * 1000000)) +## Here we changed the LR decay tokens to align with total train tokens, since +## related works (e.g., https://arxiv.org/abs/2203.15556) find that setting the +## learning rate schedule to match the number of training tokens results in the +## best final model quality +lr_decay_tokens_in_billion=${train_tokens_in_billion} +lr_decay_tokens=$((${lr_decay_tokens_in_billion} * 1000000000)) +lr_decay_style="cosine" +############################################################################### +### Parallelism configs +## Model parallelism, 1 is no MP +mp_size=1 + + +## Pipeline parallelism. To disable PP, set pp_size to 1 and no_pp to true. +## Note that currently both curriculum learning and random-LTD are NOT +## compatible with pipeline parallelism. +pp_size=8 +no_pp="false" + + +## ZeRO-based data parallelism, stage=0 will disable ZeRO +zero_stage=0 + + +## Total number of GPUs. ds_ssh is from DeepSpeed library. +num_gpus=$(($(ds_ssh nvidia-smi --query-gpu=name --format=csv,noheader | wc -l)-2)) +num_gpus_pernode=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l) +num_node=$(( ${num_gpus} / ${num_gpus_pernode} )) + + +## Data parallel size. +dp_size=$(( ${num_gpus} / ${pp_size} / ${mp_size} )) + + +## Micro batch size per GPU +## Make sure that batch_size <= global_batch_size*pp_size*mp_size/num_gpus +## Reduce it manually if GPU OOM +# batch_size=$(( ${global_batch_size} / ${dp_size} )) +batch_size=1 +############################################################################### +### Misc configs +log_interval=1 +eval_iters=10100 +eval_interval=10100 +# num_save controls how frequent to save checkpoint. num_save=20 means that a +# checkpoint will be saved every 5% of training. For longer training you would +# want larger num_save to save more frequently, and vice versa. +num_save=1 +# estimated_train_iter=$((${train_tokens} / ${seq_len} / ${global_batch_size})) +# save_interval=$((${estimated_train_iter} / ${num_save})) +save_interval=10100 + + +## Activation checkpointing saves GPU memory, but reduces training speed +activation_checkpoint="false" +# activation_checkpoint="false" + + +## Whether or not log optimizer states (norms, max abs values) to tensorboard. +## This is not required for training and might save GPU memory when turned off. +log_optimizer_state="true" +############################################################################### +### Output and data configs +current_time=$(date "+%Y.%m.%d_%H.%M.%S") +host="${HOSTNAME}" +seed=1234 +num_workers=0 + + +## Public the Pile dataset, can be downloaded at +## https://mystic.the-eye.eu/public/AI/pile_neox/ or +## https://the-eye.eu/public/AI/pile_neox/ Change data_home to where you +## store the pile_text_document.bin and pile_text_document.idx. +data_home="/code" +data_path="${data_home}/gpt_data/my-gpt2_text_document" + + +vocab_path="gpt2-vocab.json" +if [ ! -f "$vocab_path" ]; then + wget https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-vocab.json +fi +merge_path="gpt2-merges.txt" +if [ ! -f "$merge_path" ]; then + wget https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-merges.txt +fi + + +prescale_grad="true" +jobname="gpt_${model_size}B_tok${train_tokens_in_billion}B" +jobname="${jobname}_lr${lr}_min${min_lr}_w${lr_warmup_tokens_in_million}M_d${lr_decay_tokens_in_billion}B_${lr_decay_style}" +jobname="${jobname}_gbs${global_batch_size}_mbs${batch_size}_g${num_gpus}" +if [[ $zero_stage -gt 0 ]]; then + jobname="${jobname}_z${zero_stage}" + prescale_grad="false" +fi +if [[ $mp_size -gt 1 ]]; then + jobname="${jobname}_mp${mp_size}" +fi +if [ "${no_pp}" = "false" ]; then + jobname="${jobname}_pp${pp_size}" +fi +jobname="${jobname}_seed${seed}_rebase" + + +username=$(whoami) +output_home="/blob/users/${username}/project/data_efficient_gpt" +log_path="${output_home}/log/" +checkpoint_path="${output_home}/checkpoint/${jobname}" +## Microsoft internal constraint: because tensorboard is logged by last rank, +## it's better to put the path in NFS instead of Blob. +tensorboard_dir="/vc_data/users/${username}/project/data_efficient_gpt/tensorboard/" +tensorboard_path="${tensorboard_dir}${jobname}_${host}_${current_time}" +mkdir -p ${log_path} +mkdir -p ${checkpoint_path} +mkdir -p ${tensorboard_path} +############################################################################### +data_options=" \ + --vocab-file ${vocab_path} \ + --merge-file ${merge_path} \ + --data-path ${data_path} \ + --data-impl mmap" + + +## If CL is used, make sure to set "--split" the same as what you used during +## offline data analysis&indexing. +megatron_options=" \ + --override-opt_param-scheduler \ + --adam-beta1 0.9 \ + --adam-beta2 0.95 \ + --tensor-model-parallel-size ${mp_size} \ + --init-method-std ${init_std} \ + --lr-decay-tokens ${lr_decay_tokens} \ + --lr-warmup-tokens ${lr_warmup_tokens} \ + --micro-batch-size ${batch_size} \ + --exit-duration-in-mins ${exit_duration} \ + --global-batch-size ${global_batch_size} \ + --num-layers ${num_layers} \ + --hidden-size ${hidden_size} \ + --num-attention-heads ${num_attn_heads} \ + --seq-length ${seq_len} \ + --max-position-embeddings ${seq_len} \ + --train-tokens ${train_tokens} \ + --train-samples ${train_samples} \ + --lr ${lr} \ + --min-lr ${min_lr} \ + --lr-decay-style ${lr_decay_style} \ + --split 949,50,1 \ + --log-interval ${log_interval} \ + --eval-interval ${eval_interval} \ + --eval-iters ${eval_iters} \ + --save-interval ${save_interval} \ + --weight-decay 0.1 \ + --clip-grad 1.0 \ + --hysteresis 2 \ + --num-workers ${num_workers} \ + --fp16 \ + --seed ${seed} \ + --load ${checkpoint_path} \ + --save ${checkpoint_path} \ + --no-async-tensor-model-parallel-allreduce \ + --tensorboard-queue-size 1 \ + --log-timers-to-tensorboard \ + --log-batch-size-to-tensorboard \ + --log-validation-ppl-to-tensorboard \ + --tensorboard-dir ${tensorboard_path}" + + +if [ "${activation_checkpoint}" = "true" ]; then +megatron_options="${megatron_options} \ + --checkpoint-activations" +fi + + +if [ "${log_optimizer_state}" = "true" ]; then +megatron_options="${megatron_options} \ + --log-optimizer-states-to-tensorboard" +fi + + +config_json="ds_config_gbs${global_batch_size}_mbs${batch_size}_log${log_interval}_zero${zero_stage}.json" +template_json="../rebase/ds_config_gpt_TEMPLATE.json" +sed "s/GBSIZE/${global_batch_size}/" ${template_json} \ + | sed "s/MBSIZE/${batch_size}/" \ + | sed "s/LOG_INTERVAL/${log_interval}/" \ + | sed "s/ZERO_STAGE/${zero_stage}/" \ + | sed "s/PRESCALE_GRAD/${prescale_grad}/" \ + > ${config_json} + + +deepspeed_options=" \ + --deepspeed \ + --deepspeed_config ${config_json} \ + --zero-stage ${zero_stage} \ + --enable-zbh1-pipeline \ + --enable-zbh1-exact-semantics \ + --pipeline-model-parallel-size ${pp_size}" + + +if [[ "${no_pp}" = "true" ]]; then +deepspeed_options="${deepspeed_options} \ + --no-pipeline-parallel" +fi + + +if [ "${activation_checkpoint}" = "true" ]; then +deepspeed_options="${deepspeed_options} \ + --deepspeed-activation-checkpointing" +fi + + +## When saving checkpoint to a storage with cache, their could be consistency +## issue of the pointer to latest checkpoint. Here we find the correct pointer +## and broadcast it to all nodes. +iteration_file="$checkpoint_path/latest_checkpointed_iteration.txt" +iteration_file_2="$checkpoint_path/latest" +iteration=0 +for (( node = 0; node <= num_node-1; node++ )) +do + if $(ssh -q worker-"$node" "test -f \"$iteration_file\""); then + local_iteration=$(ssh -q worker-"$node" cat $iteration_file) + iteration=$(( ${local_iteration} > ${iteration} ? ${local_iteration} : ${iteration} )) + fi +done +if [[ $iteration -gt 0 ]]; then + iteration_2="global_step${iteration}" + ds_ssh "echo $iteration > $iteration_file" + ds_ssh "echo $iteration_2 > $iteration_file_2" +fi + + +deepspeed ${dir}/../../pretrain_gpt.py ${megatron_options} ${data_options} ${deepspeed_options} 2>&1 | tee log_zbh1_exact.txt \ No newline at end of file diff --git a/megatron/arguments.py b/megatron/arguments.py index dad993be04..e7182c317e 100644 --- a/megatron/arguments.py +++ b/megatron/arguments.py @@ -73,6 +73,12 @@ def validate_args(args, defaults={}): assert args.world_size % args.tensor_model_parallel_size == 0, 'world size'\ ' ({}) is not divisible by tensor model parallel size ({})'.format( args.world_size, args.tensor_model_parallel_size) + # Zero bubble pipeline is defined on deepspeed's scheduler + if args.enable_zbh1_pipeline: + assert args.deepspeed, 'Use DeepSpeed to use zero-bubble H1 pipeline' + assert args.sequence_parallel == False, "Sequence Parallel not tested, proceed at own will by removing this line" + if args.enable_zbh1_exact_semantics: + assert args.enable_zbh1_pipeline, 'Exact semantics require ZBH1 pipeline enabled' # Pipeline model parallel size. args.pipeline_model_parallel_size = min( args.pipeline_model_parallel_size, @@ -835,6 +841,10 @@ def _add_training_args(parser): 'uniformly divided recompute unit, ' '2) block: the number of individual Transformer layers ' 'to recompute within each pipeline stage.') + group.add_argument('--enable-zbh1-pipeline', action='store_true', + help='Activate zero bubble pipeline parallelism schedule method') + group.add_argument('--enable-zbh1-exact-semantics', action='store_true', + help='Use an exact semantics for zbh1 schedule, might be slower than the default.') # deprecated # HACK: added back arguments because DeepSpeed still relies on the old diff --git a/megatron/core/pipeline_parallel/deepspeed_zbh1_engine.py b/megatron/core/pipeline_parallel/deepspeed_zbh1_engine.py new file mode 100644 index 0000000000..ba451d9705 --- /dev/null +++ b/megatron/core/pipeline_parallel/deepspeed_zbh1_engine.py @@ -0,0 +1,110 @@ +from megatron.core.tensor_parallel.weight_grad_store import WeightGradStore + +from deepspeed.runtime.pipe.engine import PipelineEngine +from deepspeed.utils.timer import BACKWARD_MICRO_TIMER, \ + BACKWARD_GLOBAL_TIMER, BACKWARD_INNER_MICRO_TIMER, BACKWARD_INNER_GLOBAL_TIMER +from deepspeed.runtime.utils import PartitionedTensor +from deepspeed.accelerator import get_accelerator + +import torch +from torch.cuda.amp import custom_bwd +from packaging import version + + +from megatron.core.parallel_state import ( + get_tensor_model_parallel_world_size, + get_tensor_model_parallel_group, + get_global_memory_buffer, +) + +def _exec_backward_only_pass(self, buffer_id): + assert self.optimizer is not None, "must provide optimizer during " \ + "init in order to use backward" + + self.mem_status('BEFORE BWD ONLY', reset_max=True) + from megatron.core.tensor_parallel.layers import LinearWithGradAccumulationAndAsyncCommunication + WeightGradStore.set_combine_bw(False) + # The last stage just runs backward on the loss using DeepSpeed's typical + # mechanisms. + if self.is_last_stage(): + super(PipelineEngine, self).backward(self.loss) + WeightGradStore.flush() + self.mem_status('AFTER BWD ONLY') + + WeightGradStore.set_combine_bw(True) + return + + outputs = self.pipe_buffers['outputs'][buffer_id] + + if self.wall_clock_breakdown(): + self.timers(BACKWARD_MICRO_TIMER).start() + self.timers(BACKWARD_GLOBAL_TIMER).start() + self.timers(BACKWARD_INNER_MICRO_TIMER).start() + self.timers(BACKWARD_INNER_GLOBAL_TIMER).start() + + # Reconstruct if we previously partitioned the output. We must be + # careful to also restore the computational graph of the tensors we partitioned. + if self.is_pipe_partitioned: + if self.is_grad_partitioned: + if self.pipe_partition_output_meta_cache is None: + self.pipe_partition_output_meta_cache = outputs[0].to('cpu') + part_output = PartitionedTensor.from_meta(meta=self.pipe_partition_output_meta_cache, + local_part=outputs[1], + group=self.grid.get_slice_parallel_group()) + self.pipe_buffers['output_tensors'][buffer_id].data = part_output.full() + outputs = (self.pipe_buffers['output_tensors'][buffer_id], *outputs[2:]) + else: + # Already restored from partition + self.pipe_buffers['output_tensors'][buffer_id].data = outputs[0] + outputs = (self.pipe_buffers['output_tensors'][buffer_id], *outputs[1:]) + + grad_tensors = self.grad_layer + if self.is_grad_partitioned: + if self.grad_partition_grad_layer_meta_cache is None: + self.grad_partition_grad_layer_meta_cache = self.grad_layer[0].to('cpu') + part_grad = PartitionedTensor.from_meta(meta=self.grad_partition_grad_layer_meta_cache, + local_part=self.grad_layer[1], + group=self.grid.get_slice_parallel_group()) + grad_tensors = (part_grad.full(), *grad_tensors[2:]) + part_grad = None + + if self.using_bf16_optimizer and not self.is_last_stage(): + # manually call because we don't call optimizer.backward() + self.optimizer.clear_lp_grads() + + # This handles either a single tensor or tuple of tensors. + + if isinstance(outputs, tuple): + out_tensors = [t for t in outputs if t.is_floating_point()] + assert len(out_tensors) == len(grad_tensors) + torch.autograd.backward(tensors=out_tensors, grad_tensors=grad_tensors) + else: + torch.autograd.backward(tensors=(outputs, ), grad_tensors=(grad_tensors, )) + + + WeightGradStore.flush() + + if self.using_bf16_optimizer and not self.is_last_stage(): + # manually call because we don't call optimizer.backward() + self.optimizer.update_hp_grads(clear_lp_grads=False) + + # Free up the memory from the output of forward() + self.pipe_buffers['output_tensors'][buffer_id] = None + self.pipe_buffers['outputs'][buffer_id] = None + grad_tensors = None + + WeightGradStore.set_combine_bw(True) + + if self.wall_clock_breakdown(): + self.timers(BACKWARD_INNER_MICRO_TIMER).stop() + self.timers(BACKWARD_INNER_GLOBAL_TIMER).stop() + self.timers(BACKWARD_MICRO_TIMER).stop() + self.timers(BACKWARD_GLOBAL_TIMER).stop() + +def _exec_weight_pass(self): + if self.using_bf16_optimizer: + # manually call because we don't call optimizer.backward() + self.optimizer.clear_lp_grads() + WeightGradStore.pop() + if self.using_bf16_optimizer: + self.optimizer.update_hp_grads(clear_lp_grads=False) \ No newline at end of file diff --git a/megatron/core/pipeline_parallel/deepspeed_zbh1_schedule.py b/megatron/core/pipeline_parallel/deepspeed_zbh1_schedule.py new file mode 100644 index 0000000000..651aadce72 --- /dev/null +++ b/megatron/core/pipeline_parallel/deepspeed_zbh1_schedule.py @@ -0,0 +1,148 @@ +from deepspeed.runtime.pipe.schedule import PipeSchedule, PipeInstruction, BufferOpInstruction, \ + LoadMicroBatch, RecvActivation, SendActivation, RecvGrad, SendGrad, \ + ForwardPass, BackwardPass, ReduceGrads, ReduceTiedGrads, OptimizerStep +from megatron import get_args + +class ZeroBubbleH1Pipeline(PipeSchedule): + """A schedule for training a batch using hybrid parallelism. + + Pipeline parallelism is extracted through gradient accumulation and thus + convergence follows that of a data parallel approach with the same batch + size. + """ + + def steps(self): + num_warmup_microbatches = self.stages - self.stage_id + + forward = 0 + backward = 0 + weight = 0 + + # F section + for _ in range(num_warmup_microbatches - 1): + if forward == self.micro_batches: + continue + forward_id = self.get_buffer_id(forward) + forward += 1 + + cmds = [] + if not self.is_first_stage: + cmds.append(RecvActivation(forward_id)) + if self.is_first_stage or self.is_last_stage: + cmds.append(LoadMicroBatch(forward_id)) + cmds.append(ForwardPass(forward_id)) + if not self.is_last_stage: + cmds.append(SendActivation(forward_id)) + yield cmds + + # FB section + for _ in range(self.stage_id): + if forward == self.micro_batches: + continue + forward_id = self.get_buffer_id(forward) + backward_id = self.get_buffer_id(backward) + forward += 1 + backward += 1 + + cmds = [] + if not self.is_first_stage: + cmds.append(RecvActivation(forward_id)) + if self.is_first_stage or self.is_last_stage: + cmds.append(LoadMicroBatch(forward_id)) + cmds.append(ForwardPass(forward_id)) + if not self.is_last_stage: + cmds.append(RecvGrad(backward_id)) + cmds.append(SendActivation(forward_id)) + cmds.append(BackwardOnlyPass(backward_id)) + if not self.is_first_stage: + cmds.append(SendGrad(backward_id)) + yield cmds + + # FBW section + while forward < self.micro_batches: + forward_id = self.get_buffer_id(forward) + backward_id = self.get_buffer_id(backward) + forward += 1 + backward += 1 + weight += 1 + + cmds = [] + if not self.is_first_stage: + cmds.append(RecvActivation(forward_id)) + if self.is_first_stage or self.is_last_stage: + cmds.append(LoadMicroBatch(forward_id)) + cmds.append(ForwardPass(forward_id)) + if not self.is_last_stage: + cmds.append(RecvGrad(backward_id)) + cmds.append(SendActivation(forward_id)) + if self.is_first_stage: + cmds.append(BackwardPass(backward_id)) + elif forward == self.micro_batches: + cmds.append(BackwardOnlyPass(backward_id)) + cmds.append(SendGrad(backward_id)) + cmds.append(WeightPass()) + else: + if get_args().enable_zbh1_exact_semantics: + cmds.append(BackwardOnlyPass(backward_id)) + cmds.append(SendGrad(backward_id)) + cmds.append(WeightPass()) + else: + cmds.append(BackwardPass(backward_id)) + cmds.append(SendGrad(backward_id)) + yield cmds + + #BW section + while backward < self.micro_batches: + backward_id = self.get_buffer_id(backward) + backward += 1 + weight += 1 + + cmds = [] + if not self.is_last_stage: + cmds.append(RecvGrad(backward_id)) + if self.is_first_stage: + cmds.append(BackwardPass(backward_id)) + else: + cmds.append(BackwardOnlyPass(backward_id)) + cmds.append(SendGrad(backward_id)) + cmds.append(WeightPass()) + yield cmds + + #W section + while weight < self.micro_batches: + weight += 1 + yield [WeightPass()] + + yield [ReduceTiedGrads(), ReduceGrads(), OptimizerStep()] + + def get_buffer_id(self, microbatch_id): + num_warmup_microbatches = self.stages - self.stage_id + return microbatch_id % num_warmup_microbatches + + +##Additional Instruction classes +class BackwardOnlyPass(BufferOpInstruction): + """Compute a backward pass and accumulate gradients. + + Roughly: + + .. code-block:: python + + outputs = buffers['outputs'][buffer_id] + gradients = buffers['gradients'][buffer_id] + torch.autograd.backward(tensors=outputs, + grad_tensors=gradients, inputs = input_tensor) + """ + pass + +class WeightPass(PipeInstruction): + """Compute a weight pass and accumulate gradients. + + Roughly: + + .. code-block:: python + + torch.autograd.backward(tensors=outputs, + grad_tensors=gradients, inputs = model.parameters()) + """ + pass diff --git a/megatron/core/tensor_parallel/layers.py b/megatron/core/tensor_parallel/layers.py index 020d25915a..2245113c9c 100644 --- a/megatron/core/tensor_parallel/layers.py +++ b/megatron/core/tensor_parallel/layers.py @@ -16,6 +16,8 @@ from torch.cuda.amp import custom_fwd, custom_bwd +from megatron import get_args + from megatron.core.model_parallel_config import ModelParallelConfig from megatron.core.parallel_state import ( @@ -233,6 +235,11 @@ def __init__(self, sequence_length, embedding_dim): def forward(self, position_ids): return self.local_embeddings(position_ids - self.offset) +def gradientUpdateFunction(total_input, grad_output, weight): + if weight.grad == None: + weight.grad = grad_output.t().matmul(total_input) + else: + weight.grad += grad_output.t().matmul(total_input) class LinearWithGradAccumulationAndAsyncCommunication(torch.autograd.Function): """See linear_with_grad_accumulation_and_async_allreduce""" @@ -359,7 +366,9 @@ def backward(ctx, grad_output): # grad_weight = None # else: # grad_weight = grad_output.t().matmul(total_input) - grad_weight = grad_output.t().matmul(total_input) + from megatron.core.tensor_parallel.weight_grad_store import WeightGradStore + WeightGradStore.put(total_input, grad_output, weight, gradientUpdateFunction) + grad_weight = None grad_bias = grad_output.sum(dim=0) if use_bias else None if ctx.sequence_parallel: diff --git a/megatron/core/tensor_parallel/weight_grad_store.py b/megatron/core/tensor_parallel/weight_grad_store.py new file mode 100644 index 0000000000..bbd1aea533 --- /dev/null +++ b/megatron/core/tensor_parallel/weight_grad_store.py @@ -0,0 +1,34 @@ +import queue + +class WeightGradStore: + + cache = [] + weight_grad_queue = queue.Queue() + combine_bw = True + + @classmethod + def set_combine_bw(cls, combine_bw): + # For the following backward pass, combine W with B and skip next W. + cls.combine_bw = combine_bw + + @classmethod + def put(cls, total_input, grad_output, weight, func): + if cls.combine_bw == True: + func(total_input, grad_output, weight) + return + # Store the weight gradient computation of linear layers. + cls.cache.append((total_input, grad_output, weight, func)) + + @classmethod + def flush(cls): + # Collect all stored computations during backward as a W. + cls.weight_grad_queue.put(cls.cache) + cls.cache = [] + + @classmethod + def pop(cls): + # Execute a single W. + assert cls.weight_grad_queue.qsize() > 0 + stored_grads = cls.weight_grad_queue.get() + for total_input, grad_output, weight, func in stored_grads: + func(total_input, grad_output, weight) \ No newline at end of file diff --git a/megatron/initialize.py b/megatron/initialize.py index 85c37a5767..2a9cb3d90f 100644 --- a/megatron/initialize.py +++ b/megatron/initialize.py @@ -15,6 +15,8 @@ from megatron import get_args from megatron import get_tensorboard_writer from megatron.core import mpu, tensor_parallel +from megatron.core.pipeline_parallel.deepspeed_zbh1_engine import _exec_backward_only_pass, _exec_weight_pass +from megatron.core.pipeline_parallel.deepspeed_zbh1_schedule import BackwardOnlyPass, WeightPass, ZeroBubbleH1Pipeline from megatron.arguments import (parse_args, validate_args) from megatron.checkpointing import load_args_from_checkpoint from megatron.global_vars import set_global_variables @@ -216,6 +218,14 @@ def _initialize_distributed(): get_accelerator().set_device(device) # only do so when device_count > 0 + if args.enable_zbh1_pipeline: + deepspeed.runtime.pipe.schedule.TrainSchedule = ZeroBubbleH1Pipeline + deepspeed.runtime.pipe.engine.PipelineEngine._INSTRUCTION_MAP.update( + { + BackwardOnlyPass: _exec_backward_only_pass, + WeightPass: _exec_weight_pass, + } + ) # Call the init process if args.deepspeed or args.ds_inference: deepspeed.init_distributed() From f2d7589720c216b8e0e676e83bc5a0bc3ab71eca Mon Sep 17 00:00:00 2001 From: Xinyu Chen Date: Thu, 27 Jun 2024 21:38:47 +0800 Subject: [PATCH 10/92] Fix ParallelMLP and enable accelerator test (#403) --- tests/models/test_gpt_embedding.py | 12 +++++++----- tests/transformer/test_parallel_mlp.py | 27 +++++++++++++------------- 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/tests/models/test_gpt_embedding.py b/tests/models/test_gpt_embedding.py index 700990adc2..28f8d52f15 100644 --- a/tests/models/test_gpt_embedding.py +++ b/tests/models/test_gpt_embedding.py @@ -7,6 +7,8 @@ from megatron.core.transformer.transformer_config import TransformerConfig from megatron.core.models.gpt.gpt_embedding import GPTEmbedding +from deepspeed.accelerator import get_accelerator +device_name = get_accelerator().device_name() @pytest.fixture def gpt_embedding(transformer_config): @@ -36,12 +38,12 @@ def test_cpu_forward(self, gpt_embedding: GPTEmbedding): assert embeddings.shape[1] == input_ids.shape[0] assert embeddings.shape[2] == gpt_embedding.config.hidden_size - def test_gpu_forward(self, gpt_embedding: GPTEmbedding): - gpt_embedding.cuda() - input_ids = torch.tensor([0, 1, 2, 3], dtype=torch.int64).repeat((2, 1)).cuda() - position_ids = torch.tensor([0, 1, 2, 3], dtype=torch.int64).repeat((2, 1)).cuda() + def test_accelerator_forward(self, gpt_embedding: GPTEmbedding): + gpt_embedding.to(device_name) + input_ids = torch.tensor([0, 1, 2, 3], dtype=torch.int64).repeat((2, 1)).to(device_name) + position_ids = torch.tensor([0, 1, 2, 3], dtype=torch.int64).repeat((2, 1)).to(device_name) embeddings = gpt_embedding(input_ids, position_ids) - assert embeddings.device.type == 'cuda' + assert embeddings.device.type == device_name assert embeddings.shape[0] == gpt_embedding.max_sequence_length assert embeddings.shape[1] == input_ids.shape[0] assert embeddings.shape[2] == gpt_embedding.config.hidden_size diff --git a/tests/transformer/test_parallel_mlp.py b/tests/transformer/test_parallel_mlp.py index f43dc0b467..f9fa365c68 100644 --- a/tests/transformer/test_parallel_mlp.py +++ b/tests/transformer/test_parallel_mlp.py @@ -6,6 +6,8 @@ from megatron.core.transformer.parallel_mlp import ParallelMLP +from deepspeed.accelerator import get_accelerator +device_name = get_accelerator().device_name() @pytest.fixture def mlp(transformer_config): @@ -19,28 +21,27 @@ def test_constructor(self, mlp): num_weights = sum([p.numel() for p in mlp.parameters()]) assert num_weights == 1212 - def test_cpu_forward(self, mlp): + def test_cpu_forward(self, mlp, transformer_config): # [sequence length, micro batch size, hidden size] - hidden_states = torch.ones((32, 2, mlp.config.hidden_size)) + hidden_states = torch.ones((32, 2, transformer_config.hidden_size)) output, output_bias = mlp(hidden_states) assert output.shape[0] == 32 assert output.shape[1] == 2 - assert output.shape[2] == mlp.config.hidden_size - assert output_bias.shape[0] == mlp.config.hidden_size + assert output.shape[2] == transformer_config.hidden_size + assert output_bias == None assert output.dtype == torch.float32 - @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available") - def test_gpu_forward(self, mlp): - mlp.cuda() + @pytest.mark.skipif(not get_accelerator().is_available(), reason="accelerator not available") + def test_accelerator_forward(self, mlp, transformer_config): + mlp.to(device_name) # [sequence length, batch size, hidden size] - hidden_states = torch.ones((32, 2, mlp.config.hidden_size)) - hidden_states = hidden_states.cuda() + hidden_states = torch.ones((32, 2, transformer_config.hidden_size)) + hidden_states = hidden_states.to(device_name) output, output_bias = mlp(hidden_states) assert output.shape[0] == 32 assert output.shape[1] == 2 - assert output.shape[2] == mlp.config.hidden_size - assert output_bias.shape[0] == mlp.config.hidden_size + assert output.shape[2] == transformer_config.hidden_size + assert output_bias == None assert output.dtype == torch.float32 - assert output.device.type == 'cuda' - assert output_bias.device.type == 'cuda' + assert output.device.type == device_name From ea4b67a2bb5e572bdbe9c4fd3aa0e539e8e17110 Mon Sep 17 00:00:00 2001 From: Xinyu Chen Date: Thu, 27 Jun 2024 22:15:14 +0800 Subject: [PATCH 11/92] Fix test_deallocate_output_tensor (#404) --- tests/pipeline_parallel/test_schedules.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/pipeline_parallel/test_schedules.py b/tests/pipeline_parallel/test_schedules.py index a6bac5b2a3..64bd7c3ac6 100644 --- a/tests/pipeline_parallel/test_schedules.py +++ b/tests/pipeline_parallel/test_schedules.py @@ -21,7 +21,9 @@ def test_get_forward_backward_func(): def test_deallocate_output_tensor(): out = torch.tensor([[1, 2, 3], [4, 5, 6]]) schedule.deallocate_output_tensor(out) - assert(out.nelement() == 1) + assert(out.nelement() == 6) + schedule.deallocate_output_tensor(out, True) + assert(out.nelement() == 1) def test_forward_backward_func_without_pipeline_parallel(mocker): from megatron.core.pipeline_parallel import get_forward_backward_func From 08f5a996903affd36b664c52cbbb8b7e770abcdd Mon Sep 17 00:00:00 2001 From: Costin Eseanu <19394655+costin-eseanu@users.noreply.github.com> Date: Mon, 1 Jul 2024 07:05:31 -0700 Subject: [PATCH 12/92] Fixed missing BookCorpus dataset. (#407) Co-authored-by: Costin Eseanu --- .../ds_pretrain_gpt_1.3B_seq_parallel_32k.sh | 26 +++++++++++++------ .../preprocess_bookcorpus.py | 4 +++ megatron/arguments.py | 4 +-- 3 files changed, 24 insertions(+), 10 deletions(-) create mode 100644 examples_deepspeed/sequence_parallel/preprocess_bookcorpus.py diff --git a/examples_deepspeed/sequence_parallel/ds_pretrain_gpt_1.3B_seq_parallel_32k.sh b/examples_deepspeed/sequence_parallel/ds_pretrain_gpt_1.3B_seq_parallel_32k.sh index da028dc731..24bfa544d6 100644 --- a/examples_deepspeed/sequence_parallel/ds_pretrain_gpt_1.3B_seq_parallel_32k.sh +++ b/examples_deepspeed/sequence_parallel/ds_pretrain_gpt_1.3B_seq_parallel_32k.sh @@ -187,14 +187,6 @@ host="${HOSTNAME}" seed=1234 num_workers=0 -data_path="BookCorpusDataset_text_document" -if [ ! -f "BookCorpusDataset_text_document.bin" ]; then - wget https://the-eye.eu/public/AI/pile_neox/data/BookCorpusDataset_text_document.bin -fi -if [ ! -f "BookCorpusDataset_text_document.idx" ]; then - wget https://the-eye.eu/public/AI/pile_neox/data/BookCorpusDataset_text_document.idx -fi - vocab_path="gpt2-vocab.json" if [ ! -f "$vocab_path" ]; then wget https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-vocab.json @@ -204,6 +196,24 @@ if [ ! -f "$merge_path" ]; then wget https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-merges.txt fi + +data_path="BookCorpusDataset_text_document" +if [ ! -f "BookCorpusDataset_text_document.bin" ]; then + # Download the Bookcorpus dataset and convert to json + python preprocess_bookcorpus.py + + # Process the dataset + python ${dir}/../../tools/preprocess_data.py \ + --input ${data_path}.json \ + --output-prefix "BookCorpusDataset" \ + --vocab-file $vocab_path \ + --merge-file $merge_path \ + --dataset-impl mmap \ + --tokenizer-type GPT2BPETokenizer \ + --workers 32 \ + --append-eod +fi + prescale_grad="true" jobname="gpt_${model_size}B_tok${train_tokens_in_billion}B" jobname="${jobname}_lr${lr}_min${min_lr}_w${lr_warmup_tokens_in_million}M_d${lr_decay_tokens_in_billion}B_${lr_decay_style}" diff --git a/examples_deepspeed/sequence_parallel/preprocess_bookcorpus.py b/examples_deepspeed/sequence_parallel/preprocess_bookcorpus.py new file mode 100644 index 0000000000..c35a13ea68 --- /dev/null +++ b/examples_deepspeed/sequence_parallel/preprocess_bookcorpus.py @@ -0,0 +1,4 @@ +from datasets import load_dataset + +train_data = load_dataset('bookcorpus/bookcorpus', split='train') +train_data.to_json("BookCorpusDataset_text_document.json", lines=True) diff --git a/megatron/arguments.py b/megatron/arguments.py index e7182c317e..8889854850 100644 --- a/megatron/arguments.py +++ b/megatron/arguments.py @@ -101,8 +101,8 @@ def validate_args(args, defaults={}): args.ds_sequence_parallel_size assert args.world_size % model_parallel_size == 0, 'world size ({}) is not'\ ' divisible by tensor parallel size ({}) times pipeline parallel ' \ - 'size ({})'.format(args.world_size, args.tensor_model_parallel_size, - args.pipeline_model_parallel_size) + 'size ({}) times seqence parallel size ({})'.format(args.world_size, args.tensor_model_parallel_size, + args.pipeline_model_parallel_size, args.ds_sequence_parallel_size) args.data_parallel_size = args.world_size // model_parallel_size if args.rank == 0: print('using world size: {}, data-parallel-size: {}, ' From c3a13be721da0d0de16c338d0d665b0f7d13d14f Mon Sep 17 00:00:00 2001 From: Xinyu Chen Date: Tue, 2 Jul 2024 01:22:08 +0800 Subject: [PATCH 13/92] Set proper arguments when constructing models in unit tests (#408) --- tests/models/test_gpt_embedding.py | 4 ++++ tests/models/test_gpt_model.py | 21 +++++++++++++++++---- tests/transformer/test_parallel_mlp.py | 13 +++++++++++++ 3 files changed, 34 insertions(+), 4 deletions(-) diff --git a/tests/models/test_gpt_embedding.py b/tests/models/test_gpt_embedding.py index 28f8d52f15..8a188b0cd2 100644 --- a/tests/models/test_gpt_embedding.py +++ b/tests/models/test_gpt_embedding.py @@ -3,15 +3,19 @@ import pytest import torch +import types from megatron.core.transformer.transformer_config import TransformerConfig from megatron.core.models.gpt.gpt_embedding import GPTEmbedding +from megatron.global_vars import set_args from deepspeed.accelerator import get_accelerator device_name = get_accelerator().device_name() @pytest.fixture def gpt_embedding(transformer_config): + args = types.SimpleNamespace(params_dtype=torch.float32, embed_layernorm=False) + set_args(args) embedding = GPTEmbedding(config=transformer_config, vocab_size=100, max_sequence_length=4) return embedding diff --git a/tests/models/test_gpt_model.py b/tests/models/test_gpt_model.py index b854ecd918..b6888af5cc 100644 --- a/tests/models/test_gpt_model.py +++ b/tests/models/test_gpt_model.py @@ -3,18 +3,25 @@ import pytest import torch +import types from megatron.core.transformer.transformer_config import TransformerConfig from megatron.core.models.gpt.gpt_model import GPTModel +from megatron.global_vars import set_args +from deepspeed.accelerator import get_accelerator +device_name = get_accelerator().device_name() @pytest.fixture def gpt_model(transformer_config): + args = types.SimpleNamespace(params_dtype=torch.float32, embed_layernorm=False) + set_args(args) language_model = GPTModel(config=transformer_config, vocab_size=100, max_sequence_length=4) return language_model class TestGPTModel: + @pytest.mark.xfail(device_name=='hpu', reason="TELayerNorm is not defined in HPU") def test_constructor(self, gpt_model: GPTModel): assert isinstance(gpt_model, GPTModel) @@ -23,6 +30,7 @@ def test_constructor(self, gpt_model: GPTModel): num_weights = sum([p.numel() for p in gpt_model.parameters()]) assert num_weights == 5040 + @pytest.mark.xfail(device_name=='hpu', reason="TELayerNorm is not defined in HPU") def test_set_input_tensor(self, gpt_model: GPTModel): config: TransformerConfig = gpt_model.config sequence_length = gpt_model.max_sequence_length @@ -37,17 +45,18 @@ def test_set_input_tensor(self, gpt_model: GPTModel): assert gpt_model.decoder.input_tensor.shape[1] == micro_batch_size assert gpt_model.decoder.input_tensor.shape[2] == config.hidden_size + @pytest.mark.xfail(device_name=='hpu', reason="TELayerNorm is not defined in HPU") def test_post_process_forward(self, gpt_model: GPTModel): config: TransformerConfig = gpt_model.config sequence_length = gpt_model.max_sequence_length micro_batch_size = 2 - gpt_model.cuda() + gpt_model.to(device_name) data = list(range(sequence_length)) - input_ids = torch.tensor(data, dtype=torch.int64).repeat((micro_batch_size, 1)).cuda() - position_ids = torch.tensor(data, dtype=torch.int64).repeat((micro_batch_size, 1)).cuda() - attention_mask = torch.ones((1, 1, sequence_length, sequence_length), dtype=bool).cuda() + input_ids = torch.tensor(data, dtype=torch.int64).repeat((micro_batch_size, 1)).to(device_name) + position_ids = torch.tensor(data, dtype=torch.int64).repeat((micro_batch_size, 1)).to(device_name) + attention_mask = torch.ones((1, 1, sequence_length, sequence_length), dtype=bool).to(device_name) logits = gpt_model.forward(input_ids=input_ids, position_ids=position_ids, attention_mask=attention_mask) @@ -55,15 +64,19 @@ def test_post_process_forward(self, gpt_model: GPTModel): assert logits.shape[1] == sequence_length assert logits.shape[2] == gpt_model.vocab_size + @pytest.mark.xfail(device_name=='hpu', reason="TELayerNorm is not defined in HPU") def test_no_post_process_forward(self, gpt_model: GPTModel): pass + @pytest.mark.xfail(device_name=='hpu', reason="TELayerNorm is not defined in HPU") def test_no_preprocess_forward(self, gpt_model: GPTModel): pass + @pytest.mark.xfail(device_name=='hpu', reason="TELayerNorm is not defined in HPU") def test_state_dict_for_save_checkpoint(self, gpt_model: GPTModel): pass + @pytest.mark.xfail(device_name=='hpu', reason="TELayerNorm is not defined in HPU") def test_load_state_dict(self, gpt_model: GPTModel): pass diff --git a/tests/transformer/test_parallel_mlp.py b/tests/transformer/test_parallel_mlp.py index f9fa365c68..ac5e4b0cd5 100644 --- a/tests/transformer/test_parallel_mlp.py +++ b/tests/transformer/test_parallel_mlp.py @@ -3,14 +3,27 @@ import pytest import torch +import types from megatron.core.transformer.parallel_mlp import ParallelMLP +from megatron.global_vars import set_args from deepspeed.accelerator import get_accelerator device_name = get_accelerator().device_name() @pytest.fixture def mlp(transformer_config): + mlp_args = types.SimpleNamespace( + swiglu=False, + openai_gelu=True, + onnx_safe=False, + bias_gelu_fusion=False, + transformer_impl="", + cache_fp8_weight=False, + fp8_interval=False, + cache_fp8_weight_fwd=False + ) + set_args(mlp_args) return ParallelMLP(transformer_config) From 330f9f29e6d4caf9c1f85709598d9f733e8e9ff4 Mon Sep 17 00:00:00 2001 From: Polisetty V R K Jyothendra Varma Date: Mon, 8 Jul 2024 22:25:27 +0300 Subject: [PATCH 14/92] use split/squeeze instead of slice for performance (#409) * use split/squeeze instead of slice for performance GPU may not have perf difference but HPU perf improves with this * add copyrights --- megatron/model/transformer.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/megatron/model/transformer.py b/megatron/model/transformer.py index e75f13a24f..be8ae6ef4b 100644 --- a/megatron/model/transformer.py +++ b/megatron/model/transformer.py @@ -1,3 +1,4 @@ +# Copyright (C) 2024 Habana Labs, Ltd. an Intel Company. # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. """Transformer.""" @@ -660,9 +661,10 @@ def repeat_kv(self, hidden_states, n_rep): head_dim) def split_tensor(self, mixed_x_layer): - query_layer = mixed_x_layer[:, :, :, :-2, :].reshape(mixed_x_layer.shape[:2] + (-1, self.hidden_size_per_attention_head)) - key_layer = mixed_x_layer[:, :, :, -2, :] - value_layer = mixed_x_layer[:, :, :, -1, :] + query_layer, key_layer, value_layer = torch.split(mixed_x_layer, [self.num_key_value_groups, 1, 1], dim=-2) + query_layer = query_layer.reshape(mixed_x_layer.shape[:2] + (self.num_attention_heads_per_partition, self.hidden_size_per_attention_head)) + key_layer = torch.squeeze(key_layer, -2) + value_layer = torch.squeeze(value_layer, -2) return query_layer, key_layer, value_layer From af06d1449a0dac732deb172568553254f30ff657 Mon Sep 17 00:00:00 2001 From: Polisetty V R K Jyothendra Varma Date: Mon, 8 Jul 2024 22:58:32 +0300 Subject: [PATCH 15/92] improve performance by keeping attention_mask on device and run ops further on device (#411) * improve performance by keeping attention_mask on device and run ops further on device * add copyrights --- megatron/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/megatron/utils.py b/megatron/utils.py index 97294070af..9033d6402a 100644 --- a/megatron/utils.py +++ b/megatron/utils.py @@ -1,3 +1,4 @@ +# Copyright (C) 2024 Habana Labs, Ltd. an Intel Company. # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. """General utilities.""" @@ -178,7 +179,7 @@ def get_ltor_masks_and_position_ids(data, attention_mask = None if not skip_mask: attention_mask = torch.tril(torch.ones( - (att_mask_batch, seq_length, seq_length))).view(att_mask_batch, 1, seq_length, seq_length) + (att_mask_batch, seq_length, seq_length), device=data.device)).view(att_mask_batch, 1, seq_length, seq_length) # Loss mask. loss_mask = torch.ones(data.size(), dtype=torch.float, device=data.device) @@ -218,7 +219,6 @@ def get_ltor_masks_and_position_ids(data, # Convert attention mask to binary: if not skip_mask: attention_mask = (attention_mask < 0.5) - attention_mask = attention_mask.to(data.device) return attention_mask, loss_mask, position_ids From ec3f1f402e0e634f84458cba84c7e92967cf3107 Mon Sep 17 00:00:00 2001 From: Polisetty V R K Jyothendra Varma Date: Thu, 11 Jul 2024 18:26:16 +0300 Subject: [PATCH 16/92] Improve RoPE perf by using cached sin/cos tensors (#410) * improve RoPE perf by using cached sin/cos tensors * add copyrights --- megatron/model/rotary_pos_embedding.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/megatron/model/rotary_pos_embedding.py b/megatron/model/rotary_pos_embedding.py index 4d4497e0cd..0a7acb9efc 100644 --- a/megatron/model/rotary_pos_embedding.py +++ b/megatron/model/rotary_pos_embedding.py @@ -1,3 +1,4 @@ +# Copyright (C) 2024 Habana Labs, Ltd. an Intel Company. # coding=utf-8 # The following code has been taken from https://github.com/NVIDIA/NeMo/blob/ \ @@ -11,6 +12,10 @@ __all__ = ['RotaryEmbedding', 'apply_rotary_pos_emb'] +# sin, cos tensors cached for all devices +cos_cached = None +sin_cached = None + class RotaryEmbedding(nn.Module): def __init__(self, dim, theta=10000): super().__init__() @@ -47,10 +52,19 @@ def apply_rotary_pos_emb(t, freqs): check https://kexue.fm/archives/8265 for detailed formulas """ rot_dim = freqs.shape[-1] - # ideally t_pass is empty so rotary pos embedding is applied to all tensor t - t, t_pass = t[..., :rot_dim], t[..., rot_dim:] + t_pass = None + if t.shape[-1] != rot_dim: + # ideally t_pass is empty so rotary pos embedding is applied to all tensor t + t, t_pass = t[..., :rot_dim], t[..., rot_dim:] + global cos_cached, sin_cached + if cos_cached is None or sin_cached is None or t.shape[0] != cos_cached.shape[0]: + freqs_ = freqs[:t.shape[0]] + cos_cached = freqs_.cos().to(t.dtype) + sin_cached = freqs_.sin().to(t.dtype) # first part is cosine component # second part is sine component, need to change signs with _rotate_half method - t = (t * freqs.cos().to(t.dtype)) + (_rotate_half(t) * freqs.sin().to(t.dtype)) - return t if t_pass.shape[-1] == 0 else torch.cat((t, t_pass), dim=-1) + t = (t * cos_cached) + (_rotate_half(t) * sin_cached) + if t_pass is None: + return t + return torch.cat((t, t_pass), dim=-1) From 354e42067eac846fdda036527fb020ac6bdd4a92 Mon Sep 17 00:00:00 2001 From: Xinyu Chen Date: Fri, 12 Jul 2024 22:46:48 +0800 Subject: [PATCH 17/92] Extend test utilities to support more accelerators (#418) * Extend test utilities to support more accelerators * Add Intel Copyright --- tests/models/test_gpt_embedding.py | 1 + tests/models/test_gpt_model.py | 1 + tests/pipeline_parallel/test_schedules.py | 2 ++ tests/transformer/test_parallel_mlp.py | 1 + tests/unit_tests/test_utilities.py | 16 ++++++++++------ 5 files changed, 15 insertions(+), 6 deletions(-) diff --git a/tests/models/test_gpt_embedding.py b/tests/models/test_gpt_embedding.py index 8a188b0cd2..199f29dede 100644 --- a/tests/models/test_gpt_embedding.py +++ b/tests/models/test_gpt_embedding.py @@ -1,3 +1,4 @@ +# Copyright (C) 2024 Habana Labs, Ltd. an Intel Company. # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. import pytest diff --git a/tests/models/test_gpt_model.py b/tests/models/test_gpt_model.py index b6888af5cc..cf322908b3 100644 --- a/tests/models/test_gpt_model.py +++ b/tests/models/test_gpt_model.py @@ -1,3 +1,4 @@ +# Copyright (C) 2024 Habana Labs, Ltd. an Intel Company. # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. import pytest diff --git a/tests/pipeline_parallel/test_schedules.py b/tests/pipeline_parallel/test_schedules.py index 64bd7c3ac6..72c2372ba4 100644 --- a/tests/pipeline_parallel/test_schedules.py +++ b/tests/pipeline_parallel/test_schedules.py @@ -1,3 +1,5 @@ +# Copyright (C) 2024 Habana Labs, Ltd. an Intel Company. + import torch from tests.test_utilities import Utils from megatron.core import ModelParallelConfig diff --git a/tests/transformer/test_parallel_mlp.py b/tests/transformer/test_parallel_mlp.py index ac5e4b0cd5..098f18a9d6 100644 --- a/tests/transformer/test_parallel_mlp.py +++ b/tests/transformer/test_parallel_mlp.py @@ -1,3 +1,4 @@ +# Copyright (C) 2024 Habana Labs, Ltd. an Intel Company. # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. import pytest diff --git a/tests/unit_tests/test_utilities.py b/tests/unit_tests/test_utilities.py index b35c77b58d..68c6e6b55c 100644 --- a/tests/unit_tests/test_utilities.py +++ b/tests/unit_tests/test_utilities.py @@ -1,21 +1,25 @@ +# Copyright (C) 2024 Habana Labs, Ltd. an Intel Company. + import os import torch import megatron.core.parallel_state as ps +from deepspeed.accelerator import get_accelerator + class Utils: - world_size = torch.cuda.device_count() - rank = int(os.environ['LOCAL_RANK']) + world_size = int(os.getenv("WORLD_SIZE", '1')) + rank = int(os.getenv('LOCAL_RANK', '0')) @staticmethod def initialize_distributed(): print(f'Initializing torch.distributed with rank: {Utils.rank}, world_size: {Utils.world_size}') - torch.cuda.set_device(Utils.rank % torch.cuda.device_count()) + get_accelerator().set_device(Utils.rank % get_accelerator().device_count()) init_method = 'tcp://' master_ip = os.getenv('MASTER_ADDR', 'localhost') master_port = os.getenv('MASTER_PORT', '6000') init_method += master_ip + ':' + master_port - torch.distributed.init_process_group(backend='nccl', world_size=Utils.world_size, rank=Utils.rank, init_method=init_method) + torch.distributed.init_process_group(backend=get_accelerator().communication_backend_name(), world_size=Utils.world_size, rank=Utils.rank, init_method=init_method) @staticmethod def destroy_model_parallel(): @@ -23,8 +27,8 @@ def destroy_model_parallel(): torch.distributed.barrier() @staticmethod - def initialize_model_parallel(tensor_model_parallel_size = 1, pipeline_model_parallel_size = 1, virtual_pipeline_model_parallel_size = None, pipeline_model_parallel_split_rank = None): + def initialize_model_parallel(tensor_model_parallel_size = 1, pipeline_model_parallel_size = 1, sequence_parallel_size = 1, virtual_pipeline_model_parallel_size = None, pipeline_model_parallel_split_rank = None): ps.destroy_model_parallel() if not torch.distributed.is_initialized(): Utils.initialize_distributed() - ps.initialize_model_parallel(tensor_model_parallel_size, pipeline_model_parallel_size, virtual_pipeline_model_parallel_size, pipeline_model_parallel_split_rank) \ No newline at end of file + ps.initialize_model_parallel(tensor_model_parallel_size, pipeline_model_parallel_size, sequence_parallel_size, virtual_pipeline_model_parallel_size, pipeline_model_parallel_split_rank) \ No newline at end of file From 73252c0211fad1b4808e46f986341702b962e154 Mon Sep 17 00:00:00 2001 From: inkcherry Date: Fri, 12 Jul 2024 23:27:27 +0800 Subject: [PATCH 18/92] clear document (#395) --- examples_deepspeed/universal_checkpointing/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples_deepspeed/universal_checkpointing/README.md b/examples_deepspeed/universal_checkpointing/README.md index 14169c9e22..a0a1df5b8a 100644 --- a/examples_deepspeed/universal_checkpointing/README.md +++ b/examples_deepspeed/universal_checkpointing/README.md @@ -77,6 +77,8 @@ Please see the corresponding [pull request](https://github.com/microsoft/Megatro Combining sequence parallelism with data parallelism is another good use case for universal checkpointing, see [sp pull request](https://github.com/microsoft/DeepSpeed/pull/4752) for example and visualization of matching loss values. +Notes: The model weights using the ```--no-pipeline-parallel``` parameter and the model weights not using the ```--no-pipeline-parallel``` parameter are currently not supported for mutual conversion. + ### TensorBoard Log Analysis The Universal Checkpointing example includes a TensorBoard analysis script that will generate `csv` files and `png` plots across the unviersal checkpointing training steps for comparison of training and validation loss curves. From 0971e684246368ac74395162ec7c5df95635f87f Mon Sep 17 00:00:00 2001 From: Polisetty V R K Jyothendra Varma Date: Mon, 15 Jul 2024 18:44:23 +0300 Subject: [PATCH 19/92] add PyTorch profiler support (#414) * Update arguments.py * Update training.py * Create profiler.py * add copyrights * Update profiler.py * add copyrights * Update help * add copyrights --- megatron/arguments.py | 19 +++++++++++++++ megatron/initialize.py | 1 + megatron/profiler.py | 54 ++++++++++++++++++++++++++++++++++++++++++ megatron/training.py | 6 +++++ 4 files changed, 80 insertions(+) create mode 100644 megatron/profiler.py diff --git a/megatron/arguments.py b/megatron/arguments.py index 8889854850..de348504c7 100644 --- a/megatron/arguments.py +++ b/megatron/arguments.py @@ -1,3 +1,4 @@ +# Copyright (C) 2024 Habana Labs, Ltd. an Intel Company. # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. """Megatron arguments.""" @@ -44,6 +45,7 @@ def parse_args(extra_args_provider=None, ignore_unknown_args=False): parser = _add_inference_args(parser) parser = _add_transformer_engine_args(parser) parser = _add_retro_args(parser) + parser = _add_profiler_args(parser) # Custom arguments. if extra_args_provider is not None: @@ -1541,3 +1543,20 @@ def _add_distillation_args(parser): help='Directory containing a teacher model checkpoint.') return parser + + +def _add_profiler_args(parser): + group = parser.add_argument_group(title='profiling configuration') + + group.add_argument("--profile", + type=str, + default=None, + choices=['pt', 'pt-full'], + help="Enable profiling, pt-full gives call stack compared to pt") + + group.add_argument("--profile_steps", + type=str, + default='2,3', + help="Which steps to profile. Format: ,") + + return parser diff --git a/megatron/initialize.py b/megatron/initialize.py index 2a9cb3d90f..538f7fc456 100644 --- a/megatron/initialize.py +++ b/megatron/initialize.py @@ -1,3 +1,4 @@ +# Copyright (C) 2024 Habana Labs, Ltd. an Intel Company. # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. """Megatron initialization.""" diff --git a/megatron/profiler.py b/megatron/profiler.py new file mode 100644 index 0000000000..c98096482a --- /dev/null +++ b/megatron/profiler.py @@ -0,0 +1,54 @@ +# Copyright (C) 2024 Habana Labs, Ltd. an Intel Company. + +import torch + +on_step_begin = [] +on_step_end = [] + +def trigger(phase): + [f() for f in phase] + +def setup_profiler(args, device): + if args.profile is None: + return + + start_step, end_step = map(int, args.profile_steps.split(',')) + active_steps = end_step - start_step + 1 + cur_step = 0 + + def on_step_begin_fn(): + nonlocal cur_step + cur_step = cur_step + 1 + on_step_begin.append(on_step_begin_fn) + + def when(cond, clbk): + def fn(): + if cond(): + clbk() + return fn + + def is_start_step(): + return cur_step == start_step + + def is_end_step(): + return cur_step == end_step + + def is_capture_step(): + return cur_step >= start_step and cur_step <= end_step + + if args.profile.startswith('pt'): + schedule = torch.profiler.schedule(wait=0, warmup=0, active=active_steps, repeat=1) + activities = [torch.profiler.ProfilerActivity.CPU] + activities.extend([torch.profiler.ProfilerActivity.HPU] if device.startswith("hpu") else []) + activities.extend([torch.profiler.ProfilerActivity.CUDA] if device.startswith("cuda") else []) + full = args.profile == 'pt-full' + + profiler = torch.profiler.profile( + schedule=schedule, + activities=activities, + on_trace_ready=torch.profiler.tensorboard_trace_handler(args.tensorboard_dir, use_gzip=True), + with_stack=full) + + on_step_begin.append(when(is_start_step, profiler.start)) + on_step_end.append(when(is_capture_step, profiler.step)) + on_step_end.append(when(is_end_step, profiler.stop)) diff --git a/megatron/training.py b/megatron/training.py index 19b8a6c71f..c422c6fc55 100644 --- a/megatron/training.py +++ b/megatron/training.py @@ -1,3 +1,4 @@ +# Copyright (C) 2024 Habana Labs, Ltd. an Intel Company. # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. """Pretrain utilities.""" @@ -43,6 +44,7 @@ from megatron.utils import report_memory, throughput_calculator, checkpoint_throughput_calculator, update_rotary_pos_emb from megatron.model.vision.knn_monitor import compute_feature_bank from megatron.arguments import core_transformer_config_from_args +from megatron.profiler import setup_profiler, trigger, on_step_begin, on_step_end import deepspeed from deepspeed.accelerator import get_accelerator @@ -1161,6 +1163,8 @@ def train(forward_step_func, model, optimizer, opt_param_scheduler, # Write args to tensorboard write_args_to_tensorboard() + setup_profiler(args, get_accelerator().device_name()) + if args.random_ltd: # random-ltd requires different randomness on each rank import random @@ -1191,6 +1195,7 @@ def train(forward_step_func, model, optimizer, opt_param_scheduler, while iteration < args.train_iters and (args.train_tokens is None or \ args.consumed_train_tokens < args.train_tokens): + trigger(on_step_begin) update_num_microbatches(args.consumed_train_samples) if args.deepspeed: # inform deepspeed of any batch size changes @@ -1311,6 +1316,7 @@ def train(forward_step_func, model, optimizer, opt_param_scheduler, torch.distributed.barrier() print_datetime('exiting program at iteration {}'.format(iteration)) sys.exit() + trigger(on_step_end) return iteration From 73029edfa88f9b6fd2d1aed271a964ae87131dbb Mon Sep 17 00:00:00 2001 From: billishyahao Date: Wed, 17 Jul 2024 07:19:06 +0800 Subject: [PATCH 20/92] [Wandb] Refine wandb logging function (#416) * Refine wandb logging function * Address comments * enable user to specify wandb local save dir * Update and fix comments * Update --- .../pretrain_llama2_distributed.sh | 13 +- megatron/__init__.py | 1 + megatron/arguments.py | 6 + megatron/global_vars.py | 47 ++++ megatron/timers.py | 2 +- megatron/training.py | 230 +++++++++++------- 6 files changed, 204 insertions(+), 95 deletions(-) diff --git a/examples_deepspeed/pretrain_llama2_distributed.sh b/examples_deepspeed/pretrain_llama2_distributed.sh index f275ea636a..4c790e8c19 100755 --- a/examples_deepspeed/pretrain_llama2_distributed.sh +++ b/examples_deepspeed/pretrain_llama2_distributed.sh @@ -41,6 +41,17 @@ GRAD_CLIP=1 # activation_checkpoint="true" activation_checkpoint="false" +LOG_TO_WANDB=0 +WANDB_ARGS= +if [ $LOG_TO_WANDB -eq 1 ] +then +WANDB_ARGS="\ + --wandb-project pretrain-llama2 \ + --wandb-exp-name exp0 \ + --wandb-save-dir ${BASE_PATH}/wandb \ + " +fi + # Below configuration required for llama model as per llama paper # --no-query-key-layer-scaling \ # --attention-dropout 0 \ @@ -53,7 +64,6 @@ activation_checkpoint="false" ###################################### - cat < $DS_CONFIG { "train_batch_size" : $GLOBAL_BATCH_SIZE, @@ -132,4 +142,5 @@ torchrun $DISTRIBUTED_ARGS \ --normalization rmsnorm \ --disable-bias-linear \ --num-key-value-heads $NUM_KV_HEADS \ + $WANDB_ARGS \ $ds_args diff --git a/megatron/__init__.py b/megatron/__init__.py index d92a279ec6..4a7fe2cd82 100644 --- a/megatron/__init__.py +++ b/megatron/__init__.py @@ -9,6 +9,7 @@ from .global_vars import update_num_microbatches from .global_vars import get_tokenizer from .global_vars import get_tensorboard_writer +from .global_vars import get_wandb_writer from .global_vars import get_adlr_autoresume from .global_vars import get_timers from .initialize import initialize_megatron diff --git a/megatron/arguments.py b/megatron/arguments.py index de348504c7..30c3d669d7 100644 --- a/megatron/arguments.py +++ b/megatron/arguments.py @@ -748,6 +748,12 @@ def _add_logging_args(parser): group.add_argument('--log-world-size-to-tensorboard', action='store_true', help='Enable world size logging to tensorboard.') + group.add_argument('--wandb-project', type=str, default='', + help='The wandb project name. Ignore wandb by default.') + group.add_argument('--wandb-exp-name', type=str, default='', + help='The wandb experiment name.') + group.add_argument('--wandb-save-dir', type=str, default='', + help='Path to save the wandb results locally.') return parser diff --git a/megatron/global_vars.py b/megatron/global_vars.py index cb284b3c34..3f9d6fd66b 100644 --- a/megatron/global_vars.py +++ b/megatron/global_vars.py @@ -16,6 +16,7 @@ _GLOBAL_NUM_MICROBATCHES_CALCULATOR = None _GLOBAL_TOKENIZER = None _GLOBAL_TENSORBOARD_WRITER = None +_GLOBAL_WANDB_WRITER = None _GLOBAL_ADLR_AUTORESUME = None _GLOBAL_TIMERS = None _GLOBAL_SIGNAL_HANDLER = None @@ -56,6 +57,12 @@ def get_tensorboard_writer(): return _GLOBAL_TENSORBOARD_WRITER +def get_wandb_writer(): + """Return wandb writer. It can be None so no need + to check if it is initialized.""" + return _GLOBAL_WANDB_WRITER + + def get_adlr_autoresume(): """ADLR autoresume object. It can be None so no need to check if it is initialized.""" @@ -91,6 +98,7 @@ def set_global_variables(args): _build_num_microbatches_calculator(args) _ = _build_tokenizer(args) _set_tensorboard_writer(args) + _set_wandb_writer(args) _set_adlr_autoresume(args) _set_timers(args) @@ -152,6 +160,45 @@ def _set_tensorboard_writer(args): 'no TensorBoard logs will be written.', flush=True) +def _set_wandb_writer(args): + """Set wandb writer.""" + global _GLOBAL_WANDB_WRITER + _ensure_var_is_not_initialized(_GLOBAL_WANDB_WRITER, + 'wandb writer') + getattr(args, 'wandb_project', '') + getattr(args, 'wandb_exp_name', '') + + if args.rank == (args.world_size - 1): + if args.wandb_project == '' or \ + args.wandb_exp_name == '': + print('WARNING: WANDB writing requested but no legit wandb ' + 'project or experiment name provided, ' + 'therefore WANDB logs will be written ' + 'according to random generated project or experiment name.', flush=True) + + try: + import wandb + except (ImportError, ModuleNotFoundError): + print('WARNING: WANDB writing requested but is not ' + 'available (try to pip install wandb to solve it), ' + 'no WANDB logs will be written.', flush=True) + return + + if args.wandb_save_dir: + save_dir = args.wandb_save_dir + else: + # Defaults to the save dir. + save_dir = os.path.join(args.save, 'wandb') + wandb_kwargs = { + 'dir': save_dir, + 'name': args.wandb_exp_name, + 'project': args.wandb_project, + 'config': vars(args)} + os.makedirs(wandb_kwargs['dir'], exist_ok=True) + wandb.init(**wandb_kwargs) + _GLOBAL_WANDB_WRITER = wandb + + def _set_adlr_autoresume(args): """Initialize ADLR autoresume.""" global _GLOBAL_ADLR_AUTORESUME diff --git a/megatron/timers.py b/megatron/timers.py index 384c7c37a3..53ff9be98e 100644 --- a/megatron/timers.py +++ b/megatron/timers.py @@ -303,7 +303,7 @@ def write(self, names, writer, iteration, normalizer=1.0, assert normalizer > 0.0 name_to_min_max_time = self._get_global_min_max_time( names, reset, barrier, normalizer) - if writer is not None: + if writer.is_enabled(): for name in name_to_min_max_time: _, max_time = name_to_min_max_time[name] writer.add_scalar(name + '-time', max_time, iteration) diff --git a/megatron/training.py b/megatron/training.py index c422c6fc55..697d62f7b8 100644 --- a/megatron/training.py +++ b/megatron/training.py @@ -8,16 +8,22 @@ import sys import time import json +try: + import wandb +except (ImportError, ModuleNotFoundError): + wandb = None # The earliest we can measure the start time. _TRAIN_START_TIME = time.time() import torch from collections import OrderedDict from torch.nn.parallel.distributed import DistributedDataParallel as torchDDP +from enum import Enum from megatron import get_args from megatron import get_signal_handler from megatron import get_timers from megatron import get_tensorboard_writer +from megatron import get_wandb_writer from megatron import get_current_global_batch_size from megatron import get_num_microbatches from megatron import is_last_rank @@ -54,12 +60,6 @@ from deepspeed import comm as dist -try: - import wandb -except (ImportError, ModuleNotFoundError): - wandb = None - - def print_datetime(string): """Note that this call will sync across all ranks.""" torch.distributed.barrier() @@ -787,6 +787,65 @@ def train_step(forward_step_func, data_iterator, return {}, skipped_iter, grad_norm, num_zeros_in_grad +class InteropLoggingTool(Enum): + TENSORBOARD = 1 + WANDB = 2 + + +class interop_tool_logger: + def __init__(self, tb_writer=None, wandb_writer=None): + self.tb_writer = tb_writer + self.wandb_writer = wandb_writer + self.custom_x_axis = [] + self.custom_y_axis = {} + self.args = get_args() + if not hasattr(self.args, "logger_iteration"): + self.args.logger_iteration = 1 + + def is_enabled(self): + return self.tb_writer or self.wandb_writer + + def add_scalar(self, key, scalar_value, step, custom_step_name=None, \ + tool_list=[InteropLoggingTool.TENSORBOARD, InteropLoggingTool.WANDB]): + if self.tb_writer and \ + InteropLoggingTool.TENSORBOARD in tool_list: + self.tb_writer.add_scalar(key, scalar_value, step) + + if self.wandb_writer and \ + InteropLoggingTool.WANDB in tool_list: + if not custom_step_name: + self.wandb_writer.log({key: scalar_value}, step=step) + if self.args.logger_iteration < step: + # Updating iteration + self.args.logger_iteration = step + + else: + if custom_step_name not in self.custom_x_axis: + self.custom_x_axis.append(custom_step_name) + wandb.define_metric(custom_step_name) + + if key not in self.custom_y_axis: + self.custom_y_axis[key] = custom_step_name + wandb.define_metric(key, step_metric=custom_step_name) + + self.wandb_writer.log({key: scalar_value, custom_step_name: step}, \ + step=self.args.logger_iteration) + + + def add_scalar_to_tb(self, key, scalar_value, step): + return self.add_scalar(key, scalar_value, step, None, [InteropLoggingTool.TENSORBOARD]) + + def add_scalar_to_wandb(self, key, scalar_value, step, custom_step_name=None): + return self.add_scalar(key, scalar_value, step, custom_step_name, [InteropLoggingTool.WANDB]) + + def add_images(self, key, img_tensor, step=None): + if self.tb_writer: + self.tb_writer.add_images(key, img_tensor, step) + + if self.wandb_writer: + self.wandb_writer.log({key: wandb.Image(img_tensor)}, step) + + def training_log(loss_dict, total_loss_dict, learning_rate, iteration, loss_scale, report_memory_flag, skipped_iter, grad_norm, params_norm, num_zeros_in_grad, @@ -794,8 +853,10 @@ def training_log(loss_dict, total_loss_dict, learning_rate, iteration, """Log training information such as losses, timing, ....""" args = get_args() timers = get_timers() - writer = get_tensorboard_writer() - + writer = interop_tool_logger(tb_writer=get_tensorboard_writer(), \ + wandb_writer=get_wandb_writer()) + x_axis_samples = 'Samples' + x_axis_tokens = 'Tokens' # Advanced, skipped, and Nan iterations. advanced_iters_key = 'advanced iterations' skipped_iters_key = 'skipped iterations' @@ -866,80 +927,80 @@ def training_log(loss_dict, total_loss_dict, learning_rate, iteration, (iteration % args.tensorboard_log_interval == 0): timers.write(timers_to_log, writer, iteration, normalizer=total_iterations) - if writer and (iteration % args.tensorboard_log_interval == 0): - writer.add_scalar('steps-vs-samples/y=steps,x=samples', iteration, args.consumed_train_samples) + if writer.is_enabled() and (iteration % args.tensorboard_log_interval == 0): + writer.add_scalar('steps-vs-samples/y=steps,x=samples', iteration, args.consumed_train_samples, x_axis_samples) writer.add_scalar('steps-vs-samples/y=samples,x=steps', args.consumed_train_samples, iteration) - writer.add_scalar('steps-vs-tokens/y=steps,x=tokens', iteration, args.consumed_train_tokens) + writer.add_scalar('steps-vs-tokens/y=steps,x=tokens', iteration, args.consumed_train_tokens, x_axis_tokens) writer.add_scalar('steps-vs-tokens/y=tokens,x=steps', args.consumed_train_tokens, iteration) if args.log_learning_rate_to_tensorboard: writer.add_scalar('learning-rate/learning-rate', learning_rate, iteration) writer.add_scalar('learning-rate/learning-rate vs samples', learning_rate, - args.consumed_train_samples) + args.consumed_train_samples, x_axis_samples) writer.add_scalar('learning-rate/learning-rate vs tokens', learning_rate, - args.consumed_train_tokens) + args.consumed_train_tokens, x_axis_tokens) if args.log_batch_size_to_tensorboard: writer.add_scalar('batch-size/batch-size', batch_size, iteration) writer.add_scalar('batch-size/batch-size vs samples', batch_size, - args.consumed_train_samples) + args.consumed_train_samples, x_axis_samples) writer.add_scalar('batch-size/batch-size vs tokens', batch_size, - args.consumed_train_tokens) + args.consumed_train_tokens, x_axis_tokens) for key in loss_dict: writer.add_scalar(f"lm-loss-training/{key}", loss_dict[key], iteration) writer.add_scalar(f"lm-loss-training/{key}" + ' vs samples', loss_dict[key], - args.consumed_train_samples) + args.consumed_train_samples, x_axis_samples) writer.add_scalar(f"lm-loss-training/{key}" + ' vs tokens', loss_dict[key], - args.consumed_train_tokens) + args.consumed_train_tokens, x_axis_tokens) if args.fp16 and loss_scale and args.log_loss_scale_to_tensorboard: writer.add_scalar('loss-scale/loss-scale', loss_scale, iteration) writer.add_scalar('loss-scale/loss-scale vs samples', loss_scale, - args.consumed_train_samples) + args.consumed_train_samples, x_axis_samples) writer.add_scalar('loss-scale/loss-scale vs tokens', loss_scale, - args.consumed_train_tokens) + args.consumed_train_tokens, x_axis_tokens) if args.log_world_size_to_tensorboard: writer.add_scalar('world-size/world-size', args.world_size, iteration) writer.add_scalar('world-size/world-size vs samples', args.world_size, - args.consumed_train_samples) + args.consumed_train_samples, x_axis_samples) writer.add_scalar('world-size/world-size vs tokens', args.world_size, - args.consumed_train_tokens) + args.consumed_train_tokens, x_axis_tokens) if grad_norm is not None: writer.add_scalar('grad-norm/grad-norm', grad_norm, iteration) writer.add_scalar('grad-norm/grad-norm vs samples', grad_norm, - args.consumed_train_samples) + args.consumed_train_samples, x_axis_samples) writer.add_scalar('grad-norm/grad-norm vs tokens', grad_norm, - args.consumed_train_tokens) + args.consumed_train_tokens, x_axis_tokens) if num_zeros_in_grad is not None: writer.add_scalar('num-zeros/num-zeros', num_zeros_in_grad, iteration) writer.add_scalar('num-zeros/num-zeros vs samples', num_zeros_in_grad, - args.consumed_train_samples) + args.consumed_train_samples, x_axis_samples) writer.add_scalar('num-zeros/num-zeros vs tokens', num_zeros_in_grad, - args.consumed_train_tokens) + args.consumed_train_tokens, x_axis_tokens) if params_norm is not None: writer.add_scalar('params-norm/params-norm', params_norm, iteration) writer.add_scalar('params-norm/params-norm vs samples', params_norm, - args.consumed_train_samples) + args.consumed_train_samples, x_axis_samples) writer.add_scalar('params-norm/params-norm vs tokens', params_norm, - args.consumed_train_tokens) + args.consumed_train_tokens, x_axis_tokens) if hasattr(args, 'actual_seq_length'): writer.add_scalar('seqlen/actual_seq_length', args.actual_seq_length, iteration) writer.add_scalar('seqlen/actual_seq_length vs samples', args.actual_seq_length, - args.consumed_train_samples) + args.consumed_train_samples, x_axis_samples) writer.add_scalar('seqlen/actual_seq_length vs tokens', args.actual_seq_length, - args.consumed_train_tokens) + args.consumed_train_tokens, x_axis_tokens) if args.curriculum_learning_legacy or args.data_efficiency_curriculum_learning: writer.add_scalar('seqlen/curriculum_seqlen', args.curriculum_seqlen, iteration) writer.add_scalar('seqlen/curriculum_seqlen vs samples', args.curriculum_seqlen, - args.consumed_train_samples) + args.consumed_train_samples, x_axis_samples) writer.add_scalar('seqlen/curriculum_seqlen vs tokens', args.curriculum_seqlen, - args.consumed_train_tokens) + args.consumed_train_tokens, x_axis_tokens) if args.random_ltd: writer.add_scalar('seqlen/random_ltd_reserved_length', args.random_ltd_reserved_length, iteration) writer.add_scalar('seqlen/random_ltd_reserved_length vs samples', args.random_ltd_reserved_length, - args.consumed_train_samples) + args.consumed_train_samples, x_axis_samples) writer.add_scalar('seqlen/random_ltd_reserved_length vs tokens', args.random_ltd_reserved_length, - args.consumed_train_tokens) + args.consumed_train_tokens, x_axis_tokens) if args.log_memory_to_tensorboard: mem_stats = torch.cuda.memory_stats() writer.add_scalar( @@ -1002,19 +1063,19 @@ def training_log(loss_dict, total_loss_dict, learning_rate, iteration, group=mpu.get_pipeline_model_parallel_group()) # print('step {} rank {} after sync opt_stats {}, {}'.format(iteration, torch.distributed.get_rank(), opt_stats_2, opt_stats)) - if writer and is_last_rank(): - writer.add_scalar('optimizer/variance_l2 vs tokens', opt_stats[0]**0.5, args.consumed_train_tokens) - writer.add_scalar('optimizer/variance_sqrt_l2 vs tokens', opt_stats[1]**0.5, args.consumed_train_tokens) - writer.add_scalar('optimizer/momentum_l2 vs tokens', opt_stats[2]**0.5, args.consumed_train_tokens) - writer.add_scalar('optimizer/weight_l2 vs tokens', opt_stats[3]**0.5, args.consumed_train_tokens) - writer.add_scalar('optimizer/variance_l1 vs tokens', opt_stats[4], args.consumed_train_tokens) - writer.add_scalar('optimizer/variance_sqrt_l1 vs tokens', opt_stats[5], args.consumed_train_tokens) - writer.add_scalar('optimizer/momentum_l1 vs tokens', opt_stats[6], args.consumed_train_tokens) - writer.add_scalar('optimizer/weight_l1 vs tokens', opt_stats[7], args.consumed_train_tokens) - writer.add_scalar('optimizer/variance_abs_max vs tokens', opt_stats_2[0], args.consumed_train_tokens) - writer.add_scalar('optimizer/variance_sqrt_abs_max vs tokens', opt_stats_2[1], args.consumed_train_tokens) - writer.add_scalar('optimizer/momentum_abs_max vs tokens', opt_stats_2[2], args.consumed_train_tokens) - writer.add_scalar('optimizer/weight_abs_max vs tokens', opt_stats_2[3], args.consumed_train_tokens) + if writer.is_enabled() and is_last_rank(): + writer.add_scalar('optimizer/variance_l2 vs tokens', opt_stats[0]**0.5, args.consumed_train_tokens, x_axis_tokens) + writer.add_scalar('optimizer/variance_sqrt_l2 vs tokens', opt_stats[1]**0.5, args.consumed_train_tokens, x_axis_tokens) + writer.add_scalar('optimizer/momentum_l2 vs tokens', opt_stats[2]**0.5, args.consumed_train_tokens, x_axis_tokens) + writer.add_scalar('optimizer/weight_l2 vs tokens', opt_stats[3]**0.5, args.consumed_train_tokens, x_axis_tokens) + writer.add_scalar('optimizer/variance_l1 vs tokens', opt_stats[4], args.consumed_train_tokens, x_axis_tokens) + writer.add_scalar('optimizer/variance_sqrt_l1 vs tokens', opt_stats[5], args.consumed_train_tokens, x_axis_tokens) + writer.add_scalar('optimizer/momentum_l1 vs tokens', opt_stats[6], args.consumed_train_tokens, x_axis_tokens) + writer.add_scalar('optimizer/weight_l1 vs tokens', opt_stats[7], args.consumed_train_tokens, x_axis_tokens) + writer.add_scalar('optimizer/variance_abs_max vs tokens', opt_stats_2[0], args.consumed_train_tokens, x_axis_tokens) + writer.add_scalar('optimizer/variance_sqrt_abs_max vs tokens', opt_stats_2[1], args.consumed_train_tokens, x_axis_tokens) + writer.add_scalar('optimizer/momentum_abs_max vs tokens', opt_stats_2[2], args.consumed_train_tokens, x_axis_tokens) + writer.add_scalar('optimizer/weight_abs_max vs tokens', opt_stats_2[3], args.consumed_train_tokens, x_axis_tokens) writer.add_scalar('optimizer/variance_l2', opt_stats[0]**0.5, iteration) writer.add_scalar('optimizer/variance_sqrt_l2', opt_stats[1]**0.5, iteration) @@ -1046,34 +1107,29 @@ def training_log(loss_dict, total_loss_dict, learning_rate, iteration, tokens_per_sec_per_replica = tokens_per_sec / args.data_parallel_size tokens_per_gpu_per_second = tokens_per_sec / args.world_size tokens_per_gpu_per_second_per_replica = tokens_per_gpu_per_second / args.data_parallel_size - if wandb is not None and getattr(wandb, 'run', None) is not None: - assert wandb.run is not None - wandb_metrics = { - 'throughput/iteration-time': elapsed_time_per_iteration, # 1000 ms / s - 'throughput/samples_per_sec': samples_per_sec, - 'throughput/samples_per_sec_per_replica': samples_per_sec_per_replica, - 'throughput/tokens_per_sec': tokens_per_sec, - 'throughput/tokens_per_sec_per_replica': tokens_per_sec_per_replica, - 'throughput/tokens_per_gpu_per_sec': tokens_per_gpu_per_second, - 'throughput/tokens_per_gpu_per_sec_per_replica': tokens_per_gpu_per_second_per_replica, - 'throughput/tflops': tflops, - 'throughput/approx_params_in_billions': approx_parameters_in_billions, - 'throughput/elapsed_ms_per_iteration': elapsed_time_per_iteration, - 'throughput/iteration': iteration, - } + + if writer.is_enabled(): + writer.add_scalar_to_wandb('throughput/iteration-time', elapsed_time_per_iteration, iteration) # 1000 ms / s + writer.add_scalar_to_wandb('throughput/samples_per_sec', samples_per_sec, iteration) + writer.add_scalar_to_wandb('throughput/samples_per_sec_per_replica', samples_per_sec_per_replica, iteration) + writer.add_scalar_to_wandb('throughput/tokens_per_sec', tokens_per_sec, iteration) + writer.add_scalar_to_wandb('throughput/tokens_per_sec_per_replica', tokens_per_sec_per_replica, iteration) + writer.add_scalar_to_wandb('throughput/tokens_per_gpu_per_sec', tokens_per_gpu_per_second, iteration) + writer.add_scalar_to_wandb('throughput/tokens_per_gpu_per_sec_per_replica', tokens_per_gpu_per_second_per_replica, iteration) + writer.add_scalar_to_wandb('throughput/tflops', tflops, iteration) + writer.add_scalar_to_wandb('throughput/approx_params_in_billions', approx_parameters_in_billions, iteration) + writer.add_scalar_to_wandb('throughput/elapsed_ms_per_iteration', elapsed_time_per_iteration, iteration) if loss_dict is not None: - wandb_metrics |= { - f'loss/{k}': v for k, v in loss_dict.items() - } - wandb_metrics |= {'loss/iteration': iteration} - if writer: + for k, v in loss_dict.items(): + writer.add_scalar_to_wandb(f'loss/{k}', v, iteration) + if args.log_timers_to_tensorboard: writer.add_scalar('iteration-time/iteration-time', elapsed_time_per_iteration, iteration) writer.add_scalar('iteration-time/iteration-time vs samples', - elapsed_time_per_iteration, args.consumed_train_samples) + elapsed_time_per_iteration, args.consumed_train_samples, x_axis_samples) writer.add_scalar('iteration-time/iteration-time vs tokens', - elapsed_time_per_iteration, args.consumed_train_tokens) + elapsed_time_per_iteration, args.consumed_train_tokens, x_axis_tokens) log_string = ' iteration {:8d}/{:8d} |'.format( iteration, args.train_iters) log_string += ' consumed samples: {:12d} |'.format( @@ -1084,21 +1140,7 @@ def training_log(loss_dict, total_loss_dict, learning_rate, iteration, elapsed_time_per_iteration * 1000.0) log_string += ' learning rate: {:.3E} |'.format(learning_rate) log_string += ' global batch size: {:5d} |'.format(batch_size) - if wandb is not None and getattr(wandb, 'run', None) is not None: - wandb_metrics |= { - 'training/iteration': iteration, - 'training/iteration_time': elapsed_time_per_iteration, - 'training/iteration_time_vs_tokens': ( - (elapsed_time_per_iteration - / args.consumed_train_tokens) - ), - 'training/iteration_time_vs_samples': ( - (elapsed_time_per_iteration - / args.consumed_train_samples), - ), - 'training/consumed_samples': args.consumed_train_samples, - 'training/consumed_tokens': args.consumed_train_tokens, - } + for key in total_loss_dict: if key not in [advanced_iters_key, skipped_iters_key, nan_iters_key]: @@ -1107,8 +1149,6 @@ def training_log(loss_dict, total_loss_dict, learning_rate, iteration, if avg > 0.0: log_string += ' {}: {:.6E} |'.format(key, avg) total_loss_dict[key] = get_accelerator().FloatTensor([0.0]) - if wandb is not None and getattr(wandb, 'run', None) is not None: - wandb.log(wandb_metrics) if loss_scale is not None: log_string += ' loss scale: {:.1f} |'.format(loss_scale) if grad_norm is not None: @@ -1433,9 +1473,11 @@ def evaluate_and_print_results(prefix, forward_step_func, """Helper function to evaluate and dump results on screen.""" args = get_args() if write_to_tensorboard: - writer = get_tensorboard_writer() + writer = interop_tool_logger(tb_writer=get_tensorboard_writer(), wandb_writer=get_wandb_writer()) else: - writer = None + writer = interop_tool_logger() + x_axis_samples = 'Samples' + x_axis_tokens = 'Tokens' total_loss_dict, collected_non_loss_data = evaluate( forward_step_func, data_iterator, model, @@ -1445,26 +1487,28 @@ def evaluate_and_print_results(prefix, forward_step_func, string += '{} value: {:.6E} | '.format(key, total_loss_dict[key].item()) ppl = math.exp(min(20, total_loss_dict[key].item())) string += '{} PPL: {:.6E} | '.format(key, ppl) - if writer and is_last_rank(): + if writer.is_enabled() and is_last_rank(): data_type = 'test' if test else 'validation' writer.add_scalar(f'lm-loss-validation/{key} {data_type}', total_loss_dict[key].item(), iteration) writer.add_scalar(f'lm-loss-validation/{key} {data_type} vs samples', total_loss_dict[key].item(), - args.consumed_train_samples) + args.consumed_train_samples, + x_axis_samples) writer.add_scalar(f'lm-loss-validation/{key} {data_type} vs tokens', total_loss_dict[key].item(), - args.consumed_train_tokens) + args.consumed_train_tokens, + x_axis_tokens) if args.log_validation_ppl_to_tensorboard: writer.add_scalar(f'lm-loss-validation/{key} {data_type} ppl', ppl, iteration) writer.add_scalar(f'lm-loss-validation/{key} {data_type} ppl vs samples', - ppl, args.consumed_train_samples) + ppl, args.consumed_train_samples, x_axis_samples) writer.add_scalar(f'lm-loss-validation/{key} {data_type} ppl vs tokens', - ppl, args.consumed_train_tokens) + ppl, args.consumed_train_tokens, x_axis_tokens) - if process_non_loss_data_func is not None and writer and is_last_rank(): + if process_non_loss_data_func is not None and writer.is_enabled() and is_last_rank(): process_non_loss_data_func(collected_non_loss_data, iteration, writer) length = len(string) + 1 From fc989b8844b34489fa8c71aa048f2529ce5e397a Mon Sep 17 00:00:00 2001 From: Polisetty V R K Jyothendra Varma Date: Wed, 17 Jul 2024 20:21:02 +0300 Subject: [PATCH 21/92] add kill switch file support to gracefully exit training at runtime (#412) * Update arguments.py * Update training.py * Update utils.py * add copyrights * add copyrights * add copyrights * Update arguments.py help * Update arguments.py * Update training.py * Update utils.py * Update arguments.py --- megatron/arguments.py | 3 +++ megatron/training.py | 19 +++++++++++++++++-- megatron/utils.py | 8 ++++++++ 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/megatron/arguments.py b/megatron/arguments.py index 30c3d669d7..af39f0b0e3 100644 --- a/megatron/arguments.py +++ b/megatron/arguments.py @@ -680,6 +680,9 @@ def _add_network_size_args(parser): help='Untie embeddings and output weights.'), group.add_argument('--embedding-weights-in-fp32', action='store_true', help='Cast word embedding weights to fp32 before embedding fwd.'), + group.add_argument('--kill-switch-file', type=str, default=None, + help='Location of kill switch file. ' + 'If found will automatically exit the program at runtime.') return parser diff --git a/megatron/training.py b/megatron/training.py index 697d62f7b8..6ba26f3944 100644 --- a/megatron/training.py +++ b/megatron/training.py @@ -43,7 +43,7 @@ from megatron.optimizer_param_scheduler import OptimizerParamScheduler from megatron.model import DistributedDataParallel as LocalDDP from megatron.utils import check_adlr_autoresume_termination -from megatron.utils import unwrap_model +from megatron.utils import unwrap_model, found_kill_switch from megatron.data.data_samplers import build_pretraining_data_loader from megatron.utils import calc_params_l2_norm from megatron.core.pipeline_parallel import get_forward_backward_func @@ -128,6 +128,13 @@ def pretrain(train_valid_test_dataset_provider, # Initalize and get arguments, timers, and Tensorboard writer. initialize_megatron(extra_args_provider=extra_args_provider, args_defaults=args_defaults, external_args=external_args) + + args = get_args() + + if found_kill_switch(): + print_datetime(f"Detected kill switch at {args.kill_switch_file}. Exiting") + sys.exit() + # Set pytorch JIT layer fusion options and warmup JIT functions. if get_accelerator().device_name() == 'cuda': set_jit_fusion_options() @@ -144,7 +151,6 @@ def pretrain(train_valid_test_dataset_provider, time.time() - _TRAIN_START_TIME)) print_datetime('after megatron is initialized') - args = get_args() timers = get_timers() if args.deepspeed: @@ -1358,6 +1364,15 @@ def train(forward_step_func, model, optimizer, opt_param_scheduler, sys.exit() trigger(on_step_end) + # Exiting based on kill switch file + if found_kill_switch(): + if args.save and not saved_checkpoint: + save_checkpoint_and_time(iteration, model, optimizer, + opt_param_scheduler) + torch.distributed.barrier() + print_datetime(f"Detected kill switch at {args.kill_switch_file}, " + f"iteration={iteration}. Exiting") + sys.exit() return iteration diff --git a/megatron/utils.py b/megatron/utils.py index 9033d6402a..cbb7aa6426 100644 --- a/megatron/utils.py +++ b/megatron/utils.py @@ -381,3 +381,11 @@ def dump_weights(preamble, iteration, model, optimizer, tensor=None): p = model[0].module.tied_modules.embed.word_embeddings.weight._hp_param fh.write(f"{get_fingerprint(p)} module.tied_modules.embed.word_embeddings.weight._hp_param {p.shape}\n") + +def found_kill_switch(): + args = get_args() + if args.kill_switch_file is not None and os.path.exists(args.kill_switch_file): + return True + else: + return False + From 7d23e33633e27cd7798fc798c7c268c7fa286120 Mon Sep 17 00:00:00 2001 From: Polisetty V R K Jyothendra Varma Date: Thu, 18 Jul 2024 19:47:04 +0300 Subject: [PATCH 22/92] add support to run custom Hf tokenizer for training and dataset pre-processing (#421) * Update arguments.py * Update tokenizer.py * Update preprocess_data.py --- megatron/arguments.py | 2 ++ megatron/tokenizer/tokenizer.py | 12 +++++++++--- tools/preprocess_data.py | 8 +++++++- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/megatron/arguments.py b/megatron/arguments.py index af39f0b0e3..49b3d8e4c6 100644 --- a/megatron/arguments.py +++ b/megatron/arguments.py @@ -1299,6 +1299,8 @@ def _add_data_args(parser): help='What type of tokenizer to use.') group.add_argument('--tokenizer-model', type=str, default=None, help='Sentencepiece tokenizer model.') + group.add_argument('--trust-remote-code', action='store_true', default=False, + help='To run HFTokenizer model from local path.') group.add_argument('--data-impl', type=str, default='infer', choices=['mmap', 'infer'], help='Implementation of indexed datasets.') diff --git a/megatron/tokenizer/tokenizer.py b/megatron/tokenizer/tokenizer.py index 43c251bab1..7edef90b6f 100644 --- a/megatron/tokenizer/tokenizer.py +++ b/megatron/tokenizer/tokenizer.py @@ -1,3 +1,4 @@ +# Copyright (C) 2024 Habana Labs, Ltd. an Intel Company. # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. """Megatron tokenizers.""" @@ -40,7 +41,9 @@ def build_tokenizer(args): tokenizer = _NullTokenizer(args.vocab_size) elif args.tokenizer_type == 'HFTokenizer': assert args.tokenizer_model is not None - tokenizer = _HFTokenizer(args.tokenizer_model,args.seq_length) + tokenizer = _HFTokenizer(args.tokenizer_model, + args.seq_length, + args.trust_remote_code) else: raise NotImplementedError('{} tokenizer is not ' 'implemented.'.format(args.tokenizer_type)) @@ -540,10 +543,13 @@ def additional_special_tokens_ids(self): class _HFTokenizer(AbstractTokenizer): """HF Tokenizer""" - def __init__(self, tokenizer_name_or_path,max_seq_len): + def __init__(self, tokenizer_name_or_path, max_seq_len, trust_remote_code): name = tokenizer_name_or_path super().__init__(name) - self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name_or_path,padding_side="right",use_fast=False) + self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name_or_path, + padding_side="right", + trust_remote_code=trust_remote_code, + use_fast=False) DEFAULT_PAD_TOKEN = "[PAD]" DEFAULT_EOS_TOKEN = "" diff --git a/tools/preprocess_data.py b/tools/preprocess_data.py index 399f93c10e..c34d9dcb3b 100644 --- a/tools/preprocess_data.py +++ b/tools/preprocess_data.py @@ -1,3 +1,4 @@ +# Copyright (C) 2024 Habana Labs, Ltd. an Intel Company. # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. """Processing large data for pretraining.""" @@ -193,10 +194,15 @@ def get_args(): group.add_argument('--tokenizer-type', type=str, required=True, choices=['BertWordPieceLowerCase','BertWordPieceCase', 'GPT2BPETokenizer', 'SentencePieceTokenizer', - 'GPTSentencePieceTokenizer', 'NullTokenizer'], + 'GPTSentencePieceTokenizer', 'HFTokenizer', + 'NullTokenizer'], help='What type of tokenizer to use.') group.add_argument('--tokenizer-model', type=str, default=None, help='YTTM tokenizer model.') + group.add_argument('--seq-length', type=int, default=None, + help='Maximum sequence length to process.') + group.add_argument('--trust-remote-code', action='store_true', + help='To run HFTokenizer model from local path.') group.add_argument('--vocab-file', type=str, default=None, help='Path to the vocab file') group.add_argument('--vocab-size', default=786, From 13f2673caab46152ee7ba8051b8b0a9602270775 Mon Sep 17 00:00:00 2001 From: Polisetty V R K Jyothendra Varma Date: Fri, 19 Jul 2024 04:02:32 +0300 Subject: [PATCH 23/92] improve repeat_kv GQA perf (#419) --- megatron/model/transformer.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/megatron/model/transformer.py b/megatron/model/transformer.py index be8ae6ef4b..e79abea3cf 100644 --- a/megatron/model/transformer.py +++ b/megatron/model/transformer.py @@ -654,11 +654,16 @@ def repeat_kv(self, hidden_states, n_rep): slen, batch, num_key_value_heads_per_partition, head_dim = hidden_states.shape if n_rep == 1: return hidden_states - hidden_states = hidden_states[:, :, :, None, :].expand( - slen, batch, num_key_value_heads_per_partition, n_rep, head_dim) - return hidden_states.reshape(slen, batch, - num_key_value_heads_per_partition * n_rep, - head_dim) + elif num_key_value_heads_per_partition == 1: + # If no of KV heads is 1 then just perform expand operation + # instead of unsqueeze, expand and reshape to match query states. + return hidden_states.expand(slen, batch, n_rep, head_dim) + else: + hidden_states = hidden_states[:, :, :, None, :].expand( + slen, batch, num_key_value_heads_per_partition, n_rep, head_dim) + return hidden_states.reshape(slen, batch, + num_key_value_heads_per_partition * n_rep, + head_dim) def split_tensor(self, mixed_x_layer): query_layer, key_layer, value_layer = torch.split(mixed_x_layer, [self.num_key_value_groups, 1, 1], dim=-2) From 3af2e255ed9cc1b6b596cd232e12a84f8cbdeea5 Mon Sep 17 00:00:00 2001 From: Polisetty V R K Jyothendra Varma Date: Fri, 19 Jul 2024 04:03:30 +0300 Subject: [PATCH 24/92] acquire device when required (#420) * Update module.py * Update preprocess_data.py * add copyrights * add copyrights * Update tokenizer.py * add copyrights --- megatron/model/module.py | 16 ++++++++++++---- tools/preprocess_data.py | 2 +- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/megatron/model/module.py b/megatron/model/module.py index 963ad2d29d..08cf7cb553 100644 --- a/megatron/model/module.py +++ b/megatron/model/module.py @@ -1,3 +1,4 @@ +# Copyright (C) 2024 Habana Labs, Ltd. an Intel Company. # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. """Megatron Module""" @@ -10,10 +11,9 @@ from megatron.core import mpu, tensor_parallel -_FLOAT_TYPES = [get_accelerator().FloatTensor(0).dtype] -_HALF_TYPES = [get_accelerator().HalfTensor(0).dtype] -_BF16_TYPES = [get_accelerator().BFloat16Tensor(0).dtype] - +_FLOAT_TYPES = None +_HALF_TYPES = None +_BF16_TYPES = None def param_is_not_shared(param): @@ -131,6 +131,9 @@ def conversion_helper(val, conversion): def fp32_to_float16(val, float16_convertor): """Convert fp32 `val` to fp16/bf16""" + global _FLOAT_TYPES + if _FLOAT_TYPES is None: + _FLOAT_TYPES = [get_accelerator().FloatTensor(0).dtype] def half_conversion(val): val_typecheck = val if isinstance(val_typecheck, (Parameter, Variable)): @@ -143,6 +146,11 @@ def half_conversion(val): def float16_to_fp32(val): """Convert fp16/bf16 `val` to fp32""" + global _HALF_TYPES, _BF16_TYPES + if _HALF_TYPES is None: + _HALF_TYPES = [get_accelerator().HalfTensor(0).dtype] + if _BF16_TYPES is None: + _BF16_TYPES = [get_accelerator().BFloat16Tensor(0).dtype] def float_conversion(val): val_typecheck = val if isinstance(val_typecheck, (Parameter, Variable)): diff --git a/tools/preprocess_data.py b/tools/preprocess_data.py index c34d9dcb3b..6e117db31a 100644 --- a/tools/preprocess_data.py +++ b/tools/preprocess_data.py @@ -235,7 +235,7 @@ def get_args(): print("Are you sure you don't want to split sentences?") # some default/dummy values for the tokenizer - args.rank = 1 + args.rank = 0 args.make_vocab_size_divisible_by = 128 args.tensor_model_parallel_size = 1 args.vocab_extra_ids = 0 From 08b93769c9a6df70ceb2c74ff3eaaebee25fe074 Mon Sep 17 00:00:00 2001 From: Logan Adams <114770087+loadams@users.noreply.github.com> Date: Fri, 19 Jul 2024 15:04:39 -0700 Subject: [PATCH 25/92] Add basic compilation test (#426) --- .github/workflows/python.yml | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 .github/workflows/python.yml diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml new file mode 100644 index 0000000000..d519b65e2e --- /dev/null +++ b/.github/workflows/python.yml @@ -0,0 +1,35 @@ +name: python + +on: + workflow_dispatch: + pull_request: + branches: + '**' + schedule: + - cron: "0 0 * * *" + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + unit-tests: + strategy: + matrix: + pyVersion: ["3.7", "3.8", "3.9", "3.10"] + fail-fast: false + + runs-on: ubuntu-22.04 + container: + image: deepspeed/gh-builder:py${{ matrix.pyVersion }} + + steps: + - uses: actions/checkout@v4 + + - name: environment + run: | + which python + python --version + - name: Install Megatron-DeepSpeed + run: | + pip3 install . From 3afd267e1e50b1410beb606c5625cc232a55417a Mon Sep 17 00:00:00 2001 From: Logan Adams <114770087+loadams@users.noreply.github.com> Date: Fri, 19 Jul 2024 15:09:44 -0700 Subject: [PATCH 26/92] Update yml to be valid (#427) --- .github/workflows/python.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index d519b65e2e..2f9765af24 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -32,4 +32,4 @@ jobs: python --version - name: Install Megatron-DeepSpeed run: | - pip3 install . + pip3 install . From 8822a5ced6ce74d926fbe0f49cdca6bb3389bef8 Mon Sep 17 00:00:00 2001 From: Lev Kurilenko <113481193+lekurile@users.noreply.github.com> Date: Mon, 29 Jul 2024 14:40:40 -0700 Subject: [PATCH 27/92] Update/add GPT/Llama universal checkpointing scripts (#391) This PR adds a Llama universal checkpointing example to examples_deepspeed/universal_checkpointing. It also includes changes to the README, some minor changes, and an update to the TensorBoard analysis script. --- .../universal_checkpointing/README.md | 22 +-- .../llama/run_llama_bf16.sh | 175 +++++++++++++++++ .../llama/run_tb_analysis_llama.sh | 27 +++ .../llama/run_universal_llama_bf16.sh | 176 ++++++++++++++++++ .../{ => megatron_gpt}/run_bf16.sh | 2 +- .../{ => megatron_gpt}/run_fp16.sh | 2 +- .../run_tb_analysis_gpt.sh} | 2 - .../run_tb_analysis_gpt_plot_only.sh | 31 +++ .../{ => megatron_gpt}/run_universal_bf16.sh | 2 +- .../{ => megatron_gpt}/run_universal_fp16.sh | 2 +- .../tb_analysis/arguments.py | 2 + .../tb_analysis/tb_analysis_script.py | 41 +++- .../tb_analysis/uc_analysis.py | 2 +- .../tb_analysis/utils.py | 26 ++- 14 files changed, 486 insertions(+), 26 deletions(-) create mode 100644 examples_deepspeed/universal_checkpointing/llama/run_llama_bf16.sh create mode 100755 examples_deepspeed/universal_checkpointing/llama/run_tb_analysis_llama.sh create mode 100644 examples_deepspeed/universal_checkpointing/llama/run_universal_llama_bf16.sh rename examples_deepspeed/universal_checkpointing/{ => megatron_gpt}/run_bf16.sh (99%) rename examples_deepspeed/universal_checkpointing/{ => megatron_gpt}/run_fp16.sh (99%) rename examples_deepspeed/universal_checkpointing/{run_tb_analysis.sh => megatron_gpt/run_tb_analysis_gpt.sh} (96%) create mode 100755 examples_deepspeed/universal_checkpointing/megatron_gpt/run_tb_analysis_gpt_plot_only.sh rename examples_deepspeed/universal_checkpointing/{ => megatron_gpt}/run_universal_bf16.sh (99%) rename examples_deepspeed/universal_checkpointing/{ => megatron_gpt}/run_universal_fp16.sh (99%) diff --git a/examples_deepspeed/universal_checkpointing/README.md b/examples_deepspeed/universal_checkpointing/README.md index a0a1df5b8a..281d320e99 100644 --- a/examples_deepspeed/universal_checkpointing/README.md +++ b/examples_deepspeed/universal_checkpointing/README.md @@ -10,12 +10,12 @@ This folder contains example scripts that demonstrate how to use Universal Check For ZeRO stage 1, we provide bash scripts for bf16 and fp16 training examples corresponding to the steps 1 and 3 above. The step 1 scripts launch a training run of TP=PP=DP=2 of 200 iterations that creates a checkpoint every 100 iterations. The step 3 scripts load a universal checkpoint of iteration 100 and resume training with TP=PP=2 and DP=1 for an additional 100 iterations. Users can modify these scripts to try out other save and resume 3D combinations (e.g., save TP=PP=DP=1 and resume TP=PP=DP=2). Tensorboard logs are created by both step 1 and 3 scripts to enable visual inspection of how well the loss curves of the initial and resumed training runs match, especially at iteration 101. 1. bf16: - * run_bf16.sh: step 1 - * run_universal_bf16.sh: step 3 + * megatron_gpt/run_bf16.sh: step 1 + * megatron_gpt/run_universal_bf16.sh: step 3 2. fp16: - * run_fp16.sh: step 1 - * run_universal_fp16.sh: step 3 + * megatron_gpt/run_fp16.sh: step 1 + * megatron_gpt/run_universal_fp16.sh: step 3 Please note that these scripts should be run from the root folder of the repo (i.e., two levels above this README). For illustration, here are the commands for running the bf16 example. @@ -41,22 +41,22 @@ NOTE: Make sure to update your `BASE_DATA_PATH` path in the `run_[bf16/fp16].sh` ### Step 1: Create ZeRO checkpoint ```bash - bash examples_deepspeed/universal_checkpointing/run_bf16.sh + bash examples_deepspeed/universal_checkpointing/megatron_gpt/run_bf16.sh ``` -By default the script will create the checkpoints in folder `z1_uni_ckpt/checkpoints/gpt2/z1/bf16/tp2_pp2_dp2_toy` +By default the script will create the checkpoints in folder `z1_uni_ckpt/checkpoints/gpt2/z1/bf16/tp2_pp2_dp2_sp1_toy` ### Step 2: Convert ZeRO checkpoint of iteration 100 to Universal format Assuming the DeepSpeed source code is cloned into the home folder, the following command will generate universal checkpoint for iteration 100. ```bash python ${HOME}/DeepSpeed/deepspeed/checkpoint/ds_to_universal.py \ - --input_folder z1_uni_ckpt/checkpoints/gpt2/z1/bf16/tp2_pp2_dp2_toy/global_step100 \ - --output_folder z1_uni_ckpt/checkpoints/gpt2/z1/bf16/tp2_pp2_dp2_toy/global_step100_universal + --input_folder z1_uni_ckpt/checkpoints/gpt2/z1/bf16/tp2_pp2_dp2_sp1_toy/global_step100 \ + --output_folder z1_uni_ckpt/checkpoints/gpt2/z1/bf16/tp2_pp2_dp2_sp1_toy/global_step100_universal ``` Note that we chose to create the universal checkpoint in the same checkpoint folder as the ZeRO checkpoint. This maintains the normal checkpoint folder structure expected by the Megatron-DeepSpeed code, which makes it easy to load universal checkpoints with little/no script or code changes. For clarity, we show below the contents of the checkpoint folder after creation of the universal checkpoint. Note that the conversion script creates `global_step100_universal` folder and `latest_universal` file. ```bash -ls -l z1_uni_ckpt/checkpoints/gpt2/z1/bf16/tp2_pp2_dp2_toy/ +ls -l z1_uni_ckpt/checkpoints/gpt2/z1/bf16/tp2_pp2_dp2_sp1_toy/ total 48 drwxr-xr-x 2 user group 4096 Oct 21 08:51 global_step100 drwxr-xr-x 3 user group 4096 Oct 21 09:28 global_step100_universal @@ -69,7 +69,7 @@ drwxr-xr-x 2 user group 4096 Oct 21 09:01 global_step200 ### Step 3: Resume training with Universal checkpoint of iteration 100 ```bash -bash examples_deepspeed/universal_checkpointing/run_universal_bf16.sh +bash examples_deepspeed/universal_checkpointing/megatron_gpt/run_universal_bf16.sh ``` This resumption script effects the loading of universal checkpoint rather than the ZeRO checkpoint in the folder by passing `--universal-checkpoint` command line flag to the main training script (i.e., `pretrain_gpt.py`). @@ -85,7 +85,7 @@ The Universal Checkpointing example includes a TensorBoard analysis script that After Step 3 is completed, the script may be executed as follows: ```bash -bash examples_deepspeed/universal_checkpointing/run_tb_analysis.sh z1_uni_ckpt +bash examples_deepspeed/universal_checkpointing/megatron_gpt/run_tb_analysis_gpt.sh z1_uni_ckpt ``` The script will output the following `csv` files: diff --git a/examples_deepspeed/universal_checkpointing/llama/run_llama_bf16.sh b/examples_deepspeed/universal_checkpointing/llama/run_llama_bf16.sh new file mode 100644 index 0000000000..72e79d4f1f --- /dev/null +++ b/examples_deepspeed/universal_checkpointing/llama/run_llama_bf16.sh @@ -0,0 +1,175 @@ +#!/bin/bash +set -ex + +DIR=`pwd` +###################################### +# Change the below configurations here +BASE_PATH=dataset +DS_CONFIG=${BASE_PATH}/deepspeed.json +DATASET=${BASE_PATH}/my-gpt2_text_document +TOKENIZER_PATH=${BASE_PATH}/llama-7b/tokenizer.model # offical llama tokenizer.model + +GPUS_PER_NODE=8 +MASTER_ADDR=localhost +MASTER_PORT=6000 +NNODES=1 +NODE_RANK=0 + +HIDDEN_SIZE=2048 # e.g. llama-13b: 5120 +FFN_HIDDEN_SIZE=5504 # e.g. llama-13b: 13824 +NUM_LAYERS=24 # e.g. llama-13b: 40 +NUM_HEADS=16 # e.g. llama-13b: 40 +SEQ=2048 + +LR_WARMUP_STEPS=2000 +WEIGHT_DECAY=0.1 +GRAD_CLIP=1 + +## Activation checkpointing saves GPU memory, but reduces training speed +# activation_checkpoint="true" +activation_checkpoint="false" + +ZERO_STAGE=1 +DTYPE="bf16" + +# 3D parallelism of training +TP=2 +PP=2 +DP=2 +SP=1 +WORLD_SIZE=$((TP*PP*DP*SP)) +GLOBAL_BATCH=32 +MICRO_BATCH=$((GLOBAL_BATCH/WORLD_SIZE)) +TRAIN_ITERS=250000 +LR=3e-4 +MIN_LR=3e-5 + +# Debug +DEBUG_MODE=1 +if [[ $DEBUG_MODE == 1 ]]; then + EXIT_INTERVAL=200 + SIZE_TAG="toy" +else + EXIT_INTERVAL=$TRAIN_ITERS + SIZE_TAG="big" +fi + +# 3D parallelism of checkpoint to load +LOAD_TP=$TP +LOAD_PP=$PP +LOAD_DP=$DP +LOAD_SP=$SP +RUN_TAG="save" + + +EXP_DIR="z${ZERO_STAGE}_uni_ckpt" +CHECKPOINT_PATH=${EXP_DIR}/checkpoints/llama/z${ZERO_STAGE}/$DTYPE/tp${TP}_pp${PP}_dp${DP}_sp${SP}_${SIZE_TAG} +LOAD_CHECKPOINT_PATH=${EXP_DIR}/checkpoints/llama/z${ZERO_STAGE}/$DTYPE/tp${LOAD_TP}_pp${LOAD_PP}_dp${LOAD_DP}_sp${LOAD_SP}_${SIZE_TAG} +LOG_DIR="${EXP_DIR}/tensorboard/llama/$DTYPE/tp${TP}_pp${PP}_dp${DP}_sp${SP}_hd${HIDDEN}_nl${LAYERS}_gbsz${GLOBAL_BATCH}_mbsz${MICRO_BATCH}_z${ZERO_STAGE}_LR_${LR}_${MIN_LR}_${DTYPE}_${SIZE_TAG}_${RUN_TAG}" +mkdir -p $LOG_DIR + +# Below configuration required for llama model as per llama paper +# --no-query-key-layer-scaling \ +# --attention-dropout 0 \ +# --hidden-dropout 0 \ +# --use-rotary-position-embeddings \ +# --untie-embeddings-and-output-weights \ +# --swiglu \ +# --normalization rmsnorm \ +# --disable-bias-linear \ +###################################### + +cat < $DS_CONFIG +{ + "train_batch_size" : $GLOBAL_BATCH, + "train_micro_batch_size_per_gpu": $MICRO_BATCH, + "steps_per_print": 1, + + "zero_optimization": { + "stage": $ZERO_STAGE + }, + + "bf16": { + "enabled": true + }, + + "wall_clock_breakdown" : false +} +EOT + +ds_args="" +ds_args=" --deepspeed ${ds_args}" +ds_args=" --deepspeed_config=$DS_CONFIG ${ds_args}" +ds_args=" --zero-stage=$ZERO_STAGE ${ds_args}" + +if [ "${activation_checkpoint}" = "true" ]; then + ds_args="--deepspeed-activation-checkpointing ${ds_args}" + + ## old argument for recomputing the transformer layer + # ds_args="--checkpoint-activations ${ds_args}" + + ## new argument for recomputing the transformer layer + ds_args="--recompute-granularity full --recompute-method uniform ${ds_args}" + ## new argument for recomputing only the attention layer + # ds_args="--recompute-granularity selective ${ds_args}" +fi + +if [[ ${ZERO_STAGE} -gt 1 ]]; then +ds_args="${ds_args} \ + --no-pipeline-parallel" +fi + +options="\ + --tensor-model-parallel-size $TP \ + --pipeline-model-parallel-size $PP \ + --ds-sequence-parallel-size $SP \ + --num-layers $NUM_LAYERS \ + --hidden-size $HIDDEN_SIZE \ + --ffn-hidden-size $FFN_HIDDEN_SIZE \ + --num-attention-heads $NUM_HEADS \ + --micro-batch-size $MICRO_BATCH \ + --global-batch-size $GLOBAL_BATCH \ + --seq-length $SEQ \ + --max-position-embeddings $SEQ \ + --train-iters $TRAIN_ITERS \ + --save ${CHECKPOINT_PATH} \ + --load ${LOAD_CHECKPOINT_PATH} \ + --data-path $DATASET \ + --data-impl mmap \ + --tokenizer-type GPTSentencePieceTokenizer \ + --tokenizer-model $TOKENIZER_PATH \ + --split 949,50,1 \ + --distributed-backend nccl \ + --lr $LR \ + --lr-decay-style cosine \ + --min-lr $MIN_LR \ + --weight-decay $WEIGHT_DECAY \ + --clip-grad $GRAD_CLIP \ + --lr-warmup-iters $LR_WARMUP_STEPS \ + --optimizer adam \ + --adam-beta1 0.9 \ + --adam-beta2 0.95 \ + --log-interval 1 \ + --save-interval 100 \ + --eval-interval 10 \ + --eval-iters 40 \ + --exit-interval ${EXIT_INTERVAL} \ + --${DTYPE} \ + --no-query-key-layer-scaling \ + --attention-dropout 0 \ + --hidden-dropout 0 \ + --use-rotary-position-embeddings \ + --untie-embeddings-and-output-weights \ + --swiglu \ + --normalization rmsnorm \ + --disable-bias-linear \ + --tensorboard-dir $LOG_DIR \ + $ds_args +" + +WORKER_STR="--num_nodes 1 --num_gpus $WORLD_SIZE" +run_cmd="deepspeed --master_port 29700 $WORKER_STR ${DIR}/pretrain_gpt.py $@ ${options}" + +echo ${options} +echo ${run_cmd} +eval ${run_cmd} diff --git a/examples_deepspeed/universal_checkpointing/llama/run_tb_analysis_llama.sh b/examples_deepspeed/universal_checkpointing/llama/run_tb_analysis_llama.sh new file mode 100755 index 0000000000..b807fb97a7 --- /dev/null +++ b/examples_deepspeed/universal_checkpointing/llama/run_tb_analysis_llama.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +OUTPUT_PATH=$1 + +if [ "$OUTPUT_PATH" == "" ]; then + OUTPUT_PATH="z1_uni_ckpt" +fi + +# Training Loss +python3 examples_deepspeed/universal_checkpointing/tb_analysis/tb_analysis_script.py \ + --tb_dir $OUTPUT_PATH \ + --tb_event_key "lm-loss-training/lm loss" \ + --plot_name "uc_char_training_loss.png" \ + --plot_title "Llama 7B Universal Checkpointing - Training Loss" \ + +# Validation Loss +python3 examples_deepspeed/universal_checkpointing/tb_analysis/tb_analysis_script.py \ + --tb_dir $OUTPUT_PATH \ + --tb_event_key "lm-loss-validation/lm loss validation" \ + --csv_name "val_" \ + --plot_name "uc_char_validation_loss.png" \ + --plot_title "Llama 7B Universal Checkpointing - Validation Loss" \ + --plot_y_label "Validation LM Loss" \ diff --git a/examples_deepspeed/universal_checkpointing/llama/run_universal_llama_bf16.sh b/examples_deepspeed/universal_checkpointing/llama/run_universal_llama_bf16.sh new file mode 100644 index 0000000000..334fa3eaf6 --- /dev/null +++ b/examples_deepspeed/universal_checkpointing/llama/run_universal_llama_bf16.sh @@ -0,0 +1,176 @@ +#!/bin/bash +set -ex + +DIR=`pwd` +###################################### +# Change the below configurations here +BASE_PATH=dataset +DS_CONFIG=${BASE_PATH}/deepspeed.json +DATASET=${BASE_PATH}/my-gpt2_text_document +TOKENIZER_PATH=${BASE_PATH}/llama-7b/tokenizer.model # offical llama tokenizer.model + +GPUS_PER_NODE=8 +MASTER_ADDR=localhost +MASTER_PORT=6000 +NNODES=1 +NODE_RANK=0 + +HIDDEN_SIZE=2048 # e.g. llama-13b: 5120 +FFN_HIDDEN_SIZE=5504 # e.g. llama-13b: 13824 +NUM_LAYERS=24 # e.g. llama-13b: 40 +NUM_HEADS=16 # e.g. llama-13b: 40 +SEQ=2048 + +LR_WARMUP_STEPS=2000 +WEIGHT_DECAY=0.1 +GRAD_CLIP=1 + +## Activation checkpointing saves GPU memory, but reduces training speed +# activation_checkpoint="true" +activation_checkpoint="false" + +ZERO_STAGE=1 +DTYPE="bf16" + +# 3D parallelism of training +TP=2 +PP=2 +DP=1 +SP=1 +WORLD_SIZE=$((TP*PP*DP*SP)) +GLOBAL_BATCH=32 +MICRO_BATCH=$((GLOBAL_BATCH/WORLD_SIZE)) +TRAIN_ITERS=250000 +LR=3e-4 +MIN_LR=3e-5 + +# Debug +DEBUG_MODE=1 +if [[ $DEBUG_MODE == 1 ]]; then + EXIT_INTERVAL=200 + SIZE_TAG="toy" +else + EXIT_INTERVAL=$TRAIN_ITERS + SIZE_TAG="big" +fi + +# 3D parallelism of checkpoint to load +LOAD_TP=2 +LOAD_PP=2 +LOAD_DP=2 +LOAD_SP=1 +RUN_TAG="uni_load${LOAD_TP}_${LOAD_PP}_${LOAD_DP}_${LOAD_SP}" + + +EXP_DIR="z${ZERO_STAGE}_uni_ckpt" +CHECKPOINT_PATH=${EXP_DIR}/checkpoints/llama/z${ZERO_STAGE}/$DTYPE/tp${TP}_pp${PP}_dp${DP}_sp${SP}_${SIZE_TAG} +LOAD_CHECKPOINT_PATH=${EXP_DIR}/checkpoints/llama/z${ZERO_STAGE}/$DTYPE/tp${LOAD_TP}_pp${LOAD_PP}_dp${LOAD_DP}_sp${LOAD_SP}_${SIZE_TAG} +LOG_DIR="${EXP_DIR}/tensorboard/llama/$DTYPE/tp${TP}_pp${PP}_dp${DP}_sp${SP}_hd${HIDDEN}_nl${LAYERS}_gbsz${GLOBAL_BATCH}_mbsz${MICRO_BATCH}_z${ZERO_STAGE}_LR_${LR}_${MIN_LR}_${DTYPE}_${SIZE_TAG}_${RUN_TAG}" +mkdir -p $LOG_DIR + +# Below configuration required for llama model as per llama paper +# --no-query-key-layer-scaling \ +# --attention-dropout 0 \ +# --hidden-dropout 0 \ +# --use-rotary-position-embeddings \ +# --untie-embeddings-and-output-weights \ +# --swiglu \ +# --normalization rmsnorm \ +# --disable-bias-linear \ +###################################### + +cat < $DS_CONFIG +{ + "train_batch_size" : $GLOBAL_BATCH, + "train_micro_batch_size_per_gpu": $MICRO_BATCH, + "steps_per_print": 1, + + "zero_optimization": { + "stage": $ZERO_STAGE + }, + + "bf16": { + "enabled": true + }, + + "wall_clock_breakdown" : false +} +EOT + +ds_args="" +ds_args=" --deepspeed ${ds_args}" +ds_args=" --deepspeed_config=$DS_CONFIG ${ds_args}" +ds_args=" --zero-stage=$ZERO_STAGE ${ds_args}" + +if [ "${activation_checkpoint}" = "true" ]; then + ds_args="--deepspeed-activation-checkpointing ${ds_args}" + + ## old argument for recomputing the transformer layer + # ds_args="--checkpoint-activations ${ds_args}" + + ## new argument for recomputing the transformer layer + ds_args="--recompute-granularity full --recompute-method uniform ${ds_args}" + ## new argument for recomputing only the attention layer + # ds_args="--recompute-granularity selective ${ds_args}" +fi + +if [[ ${ZERO_STAGE} -gt 1 ]]; then +ds_args="${ds_args} \ + --no-pipeline-parallel" +fi + +options="\ + --tensor-model-parallel-size $TP \ + --pipeline-model-parallel-size $PP \ + --ds-sequence-parallel-size $SP \ + --num-layers $NUM_LAYERS \ + --hidden-size $HIDDEN_SIZE \ + --ffn-hidden-size $FFN_HIDDEN_SIZE \ + --num-attention-heads $NUM_HEADS \ + --micro-batch-size $MICRO_BATCH \ + --global-batch-size $GLOBAL_BATCH \ + --seq-length $SEQ \ + --max-position-embeddings $SEQ \ + --train-iters $TRAIN_ITERS \ + --save ${CHECKPOINT_PATH} \ + --load ${LOAD_CHECKPOINT_PATH} \ + --data-path $DATASET \ + --data-impl mmap \ + --tokenizer-type GPTSentencePieceTokenizer \ + --tokenizer-model $TOKENIZER_PATH \ + --split 949,50,1 \ + --distributed-backend nccl \ + --lr $LR \ + --lr-decay-style cosine \ + --min-lr $MIN_LR \ + --weight-decay $WEIGHT_DECAY \ + --clip-grad $GRAD_CLIP \ + --lr-warmup-iters $LR_WARMUP_STEPS \ + --optimizer adam \ + --adam-beta1 0.9 \ + --adam-beta2 0.95 \ + --log-interval 1 \ + --save-interval 100 \ + --eval-interval 10 \ + --eval-iters 40 \ + --exit-interval ${EXIT_INTERVAL} \ + --${DTYPE} \ + --no-query-key-layer-scaling \ + --attention-dropout 0 \ + --hidden-dropout 0 \ + --use-rotary-position-embeddings \ + --untie-embeddings-and-output-weights \ + --swiglu \ + --normalization rmsnorm \ + --disable-bias-linear \ + --tensorboard-dir $LOG_DIR \ + --universal-checkpoint \ + $ds_args +" + +WORKER_STR="--num_nodes 1 --num_gpus $WORLD_SIZE" +run_cmd="deepspeed --master_port 29700 $WORKER_STR ${DIR}/pretrain_gpt.py $@ ${options}" + +echo ${options} +echo ${run_cmd} +eval ${run_cmd} diff --git a/examples_deepspeed/universal_checkpointing/run_bf16.sh b/examples_deepspeed/universal_checkpointing/megatron_gpt/run_bf16.sh similarity index 99% rename from examples_deepspeed/universal_checkpointing/run_bf16.sh rename to examples_deepspeed/universal_checkpointing/megatron_gpt/run_bf16.sh index 0953954222..07cbc30e72 100755 --- a/examples_deepspeed/universal_checkpointing/run_bf16.sh +++ b/examples_deepspeed/universal_checkpointing/megatron_gpt/run_bf16.sh @@ -3,7 +3,7 @@ DIR=`pwd` DATETIME=`date +'date_%y-%m-%d_time_%H-%M-%S'` -BASE_DATA_PATH=datasets +BASE_DATA_PATH=dataset DATASET=${BASE_DATA_PATH}/my-gpt2_text_document VOCAB_PATH=${BASE_DATA_PATH}/gpt2-vocab.json MERGE_PATH=${BASE_DATA_PATH}/gpt2-merges.txt diff --git a/examples_deepspeed/universal_checkpointing/run_fp16.sh b/examples_deepspeed/universal_checkpointing/megatron_gpt/run_fp16.sh similarity index 99% rename from examples_deepspeed/universal_checkpointing/run_fp16.sh rename to examples_deepspeed/universal_checkpointing/megatron_gpt/run_fp16.sh index 691fa8a8e6..2f1b994079 100755 --- a/examples_deepspeed/universal_checkpointing/run_fp16.sh +++ b/examples_deepspeed/universal_checkpointing/megatron_gpt/run_fp16.sh @@ -3,7 +3,7 @@ DIR=`pwd` DATETIME=`date +'date_%y-%m-%d_time_%H-%M-%S'` -BASE_DATA_PATH=datasets +BASE_DATA_PATH=dataset DATASET=${BASE_DATA_PATH}/my-gpt2_text_document VOCAB_PATH=${BASE_DATA_PATH}/gpt2-vocab.json MERGE_PATH=${BASE_DATA_PATH}/gpt2-merges.txt diff --git a/examples_deepspeed/universal_checkpointing/run_tb_analysis.sh b/examples_deepspeed/universal_checkpointing/megatron_gpt/run_tb_analysis_gpt.sh similarity index 96% rename from examples_deepspeed/universal_checkpointing/run_tb_analysis.sh rename to examples_deepspeed/universal_checkpointing/megatron_gpt/run_tb_analysis_gpt.sh index 7aa988a0a0..3a17d66750 100755 --- a/examples_deepspeed/universal_checkpointing/run_tb_analysis.sh +++ b/examples_deepspeed/universal_checkpointing/megatron_gpt/run_tb_analysis_gpt.sh @@ -16,7 +16,6 @@ python3 examples_deepspeed/universal_checkpointing/tb_analysis/tb_analysis_scrip --tb_event_key "lm-loss-training/lm loss" \ --plot_name "uc_char_training_loss.png" \ --plot_title "Megatron-GPT Universal Checkpointing - Training Loss" \ - --use_sns # Validation Loss python3 examples_deepspeed/universal_checkpointing/tb_analysis/tb_analysis_script.py \ @@ -26,4 +25,3 @@ python3 examples_deepspeed/universal_checkpointing/tb_analysis/tb_analysis_scrip --plot_name "uc_char_validation_loss.png" \ --plot_title "Megatron-GPT Universal Checkpointing - Validation Loss" \ --plot_y_label "Validation LM Loss" \ - --use_sns diff --git a/examples_deepspeed/universal_checkpointing/megatron_gpt/run_tb_analysis_gpt_plot_only.sh b/examples_deepspeed/universal_checkpointing/megatron_gpt/run_tb_analysis_gpt_plot_only.sh new file mode 100755 index 0000000000..0c3ea5399c --- /dev/null +++ b/examples_deepspeed/universal_checkpointing/megatron_gpt/run_tb_analysis_gpt_plot_only.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +OUTPUT_PATH=$1 + +if [ "$OUTPUT_PATH" == "" ]; then + OUTPUT_PATH="z1_uni_ckpt" +fi + +# Training Loss +python3 examples_deepspeed/universal_checkpointing/tb_analysis/tb_analysis_script.py \ + --tb_dir $OUTPUT_PATH \ + --tb_event_key "lm-loss-training/lm loss" \ + --plot_name "uc_char_training_loss.png" \ + --plot_title "Megatron-GPT Universal Checkpointing - Training Loss" \ + --plot_only \ + --csv_dir "/workspace/uc/megatron/loss_csv" \ + +# Validation Loss +python3 examples_deepspeed/universal_checkpointing/tb_analysis/tb_analysis_script.py \ + --tb_dir $OUTPUT_PATH \ + --tb_event_key "lm-loss-validation/lm loss validation" \ + --csv_name "val_" \ + --plot_name "uc_char_validation_loss.png" \ + --plot_title "Megatron-GPT Universal Checkpointing - Validation Loss" \ + --plot_y_label "Validation LM Loss" \ + --plot_only \ + --csv_dir "/workspace/uc/megatron/val_csv" \ diff --git a/examples_deepspeed/universal_checkpointing/run_universal_bf16.sh b/examples_deepspeed/universal_checkpointing/megatron_gpt/run_universal_bf16.sh similarity index 99% rename from examples_deepspeed/universal_checkpointing/run_universal_bf16.sh rename to examples_deepspeed/universal_checkpointing/megatron_gpt/run_universal_bf16.sh index ef0e134cfc..4134b9df48 100755 --- a/examples_deepspeed/universal_checkpointing/run_universal_bf16.sh +++ b/examples_deepspeed/universal_checkpointing/megatron_gpt/run_universal_bf16.sh @@ -3,7 +3,7 @@ DIR=`pwd` DATETIME=`date +'date_%y-%m-%d_time_%H-%M-%S'` -BASE_DATA_PATH=datasets +BASE_DATA_PATH=dataset DATASET=${BASE_DATA_PATH}/my-gpt2_text_document VOCAB_PATH=${BASE_DATA_PATH}/gpt2-vocab.json MERGE_PATH=${BASE_DATA_PATH}/gpt2-merges.txt diff --git a/examples_deepspeed/universal_checkpointing/run_universal_fp16.sh b/examples_deepspeed/universal_checkpointing/megatron_gpt/run_universal_fp16.sh similarity index 99% rename from examples_deepspeed/universal_checkpointing/run_universal_fp16.sh rename to examples_deepspeed/universal_checkpointing/megatron_gpt/run_universal_fp16.sh index 1e207e422b..bb3a538951 100755 --- a/examples_deepspeed/universal_checkpointing/run_universal_fp16.sh +++ b/examples_deepspeed/universal_checkpointing/megatron_gpt/run_universal_fp16.sh @@ -3,7 +3,7 @@ DIR=`pwd` DATETIME=`date +'date_%y-%m-%d_time_%H-%M-%S'` -BASE_DATA_PATH=datasets +BASE_DATA_PATH=dataset DATASET=${BASE_DATA_PATH}/my-gpt2_text_document VOCAB_PATH=${BASE_DATA_PATH}/gpt2-vocab.json MERGE_PATH=${BASE_DATA_PATH}/gpt2-merges.txt diff --git a/examples_deepspeed/universal_checkpointing/tb_analysis/arguments.py b/examples_deepspeed/universal_checkpointing/tb_analysis/arguments.py index 3dacb45d4e..ca80872ca0 100644 --- a/examples_deepspeed/universal_checkpointing/tb_analysis/arguments.py +++ b/examples_deepspeed/universal_checkpointing/tb_analysis/arguments.py @@ -17,3 +17,5 @@ parser.add_argument("--skip_csv", action='store_true', help="Skip generation of csv files") parser.add_argument("--use_sns", action='store_true', help="Use the SNS library to format plot") parser.add_argument("--csv_name", required=False, default="", type=str, help="Unique name for CSV files") +parser.add_argument("--plot_only", action='store_true', help="Plot only using csv files") +parser.add_argument("--csv_dir", required=False, type=str, help="Directory for csv files") diff --git a/examples_deepspeed/universal_checkpointing/tb_analysis/tb_analysis_script.py b/examples_deepspeed/universal_checkpointing/tb_analysis/tb_analysis_script.py index 337f6540ab..fbf9b6dd28 100644 --- a/examples_deepspeed/universal_checkpointing/tb_analysis/tb_analysis_script.py +++ b/examples_deepspeed/universal_checkpointing/tb_analysis/tb_analysis_script.py @@ -6,9 +6,10 @@ import os import re import pandas as pd +import csv import matplotlib.pyplot as plt from tensorboard.backend.event_processing.event_accumulator import EventAccumulator -from utils import get_analyzer, find_files +from utils import get_analyzer, find_files_prefix, find_files_suffix from arguments import parser args = parser.parse_args() @@ -18,8 +19,8 @@ sns.set() def main(): - target_affix = 'events.out.tfevents' - tb_log_paths = find_files(args.tb_dir, target_affix) + target_prefix = 'events.out.tfevents' + tb_log_paths = find_files_prefix(args.tb_dir, target_prefix) analyzer = get_analyzer(args.analyzer) @@ -41,6 +42,8 @@ def main(): df = pd.DataFrame({"step": x, "value": y}) df.to_csv(f"{args.csv_name}{analyzer.get_csv_filename()}.csv") + plt.grid(True) + if not args.skip_plot: plt.legend() plt.title(args.plot_title) @@ -48,5 +51,35 @@ def main(): plt.ylabel(args.plot_y_label) plt.savefig(args.plot_name) +def plot_csv(): + target_suffix = 'csv' + csv_log_files = find_files_suffix(args.csv_dir, target_suffix) + + analyzer = get_analyzer(args.analyzer) + + for csv_file in csv_log_files: + analyzer.set_names(csv_file) + + x, y = [], [] + with open(csv_file, 'r') as file: + reader = csv.reader(file) + for row in reader: + if row[1] == 'step': + continue + x.append(int(row[1])) # Assuming the first column contains x values + y.append(float(row[2])) # Assuming the second column contains y values + + plt.plot(x, y, label=f'{analyzer.get_label_name()}') + + plt.grid(True) + plt.legend() + plt.title(args.plot_title) + plt.xlabel(args.plot_x_label) + plt.ylabel(args.plot_y_label) + plt.savefig(args.plot_name) + if __name__ == "__main__": - main() + if args.plot_only: + plot_csv() + else: + main() diff --git a/examples_deepspeed/universal_checkpointing/tb_analysis/uc_analysis.py b/examples_deepspeed/universal_checkpointing/tb_analysis/uc_analysis.py index f5809c3dc1..20d46ff6a8 100644 --- a/examples_deepspeed/universal_checkpointing/tb_analysis/uc_analysis.py +++ b/examples_deepspeed/universal_checkpointing/tb_analysis/uc_analysis.py @@ -19,7 +19,7 @@ def set_names(self, path_name): tp, pp, dp, sp = match.groups() self._label_name = f"Training Run: TP: {tp}, PP: {pp}, DP: {dp}" - self._csv_name = f"uc_out_tp_{tp}_pp_{pp}_dp_{dp}_sp_{sp}" + self._csv_name = f"uc_out_tp{tp}_pp{pp}_dp{dp}_sp{sp}" def get_label_name(self): return self._label_name diff --git a/examples_deepspeed/universal_checkpointing/tb_analysis/utils.py b/examples_deepspeed/universal_checkpointing/tb_analysis/utils.py index 4bbbb3f2f0..db6624bbc4 100644 --- a/examples_deepspeed/universal_checkpointing/tb_analysis/utils.py +++ b/examples_deepspeed/universal_checkpointing/tb_analysis/utils.py @@ -7,13 +7,13 @@ from uc_analysis import UniversalCheckpointingAnalysis -def find_files(directory, file_affix): +def find_files_prefix(directory, file_prefix): """ - Searches for files with a specific affix in a directory using os.walk(). + Searches for files with a specific prefix in a directory using os.walk(). Args: directory (str): The path to the directory to search. - file_affix (str): The desired file affix. + file_prefix (str): The desired file prefix. Returns: list: A list of paths to matching files. @@ -21,10 +21,28 @@ def find_files(directory, file_affix): matching_paths = [] for root, _, files in os.walk(directory): for filename in files: - if root not in matching_paths and filename.lower().startswith(file_affix.lower()): + if root not in matching_paths and filename.lower().startswith(file_prefix.lower()): matching_paths.append(os.path.join(root)) return matching_paths +def find_files_suffix(directory, file_suffix): + """ + Searches for files with a specific suffix in a directory using os.walk(). + + Args: + directory (str): The path to the directory to search. + file_suffix (str): The desired file suffix. + + Returns: + list: A list of paths to matching files. + """ + matching_paths = [] + for root, _, files in os.walk(directory): + for filename in files: + if root not in matching_paths and filename.lower().endswith(file_suffix.lower()): + matching_paths.append(os.path.join(filename)) + return matching_paths + def get_analyzer(analyzer_name): if analyzer_name == 'universal_checkpointing': return UniversalCheckpointingAnalysis() From 1bfc35c4e279b4edcd8291cb99dd3992e02bc8ba Mon Sep 17 00:00:00 2001 From: Jinghan Yao Date: Thu, 1 Aug 2024 15:35:11 -0700 Subject: [PATCH 28/92] fixing the bug of flash_attn import and the wrong gather index when using flash_attn_cuda in sequence parallel (#406) Co-authored-by: Jinghan Yao --- megatron/model/transformer.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/megatron/model/transformer.py b/megatron/model/transformer.py index e79abea3cf..7467190582 100644 --- a/megatron/model/transformer.py +++ b/megatron/model/transformer.py @@ -37,9 +37,12 @@ try: # FlashAttention (1.x) from flash_attn.flash_attn_interface import flash_attn_unpadded_func - from flash_attn.flash_attn_triton import flash_attn_func except ImportError: flash_attn_unpadded_func = None + +try: + from flash_attn.flash_attn_triton import flash_attn_func +except ImportError: flash_attn_func = None try: @@ -599,7 +602,11 @@ def __init__(self, config, layer_number, if self.enable_ds_sequence_parallel: assert dist_attn_supported, 'Distributed attention is not supported in this DeepSpeed version' assert args.num_attention_heads % parallel_state.get_sequence_parallel_world_size() == 0 - self.dist_attn = DistributedAttention(local_attn, parallel_state.get_sequence_parallel_group()) + self.dist_attn = DistributedAttention( + local_attn, + parallel_state.get_sequence_parallel_group(), + gather_idx=1 if args.use_flash_attn_v1 or args.use_flash_attn_v2 else 0) + # flash_attn_cuda assumes [b, s, nh, hd] layout, we need to make sure all2all gathers into the correct sequence dimension. else: if self.use_flash_attn: self.core_attention_flash = local_attn From 53b241f992f9b3dd7917bc36472f60cb118f8303 Mon Sep 17 00:00:00 2001 From: YiSheng5 Date: Mon, 5 Aug 2024 00:20:18 +0800 Subject: [PATCH 29/92] add fused_rms_norm support on XPU device (#431) --- megatron/model/__init__.py | 7 ++++++- megatron/model/fused_rmsnorm.py | 14 ++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 megatron/model/fused_rmsnorm.py diff --git a/megatron/model/__init__.py b/megatron/model/__init__.py index 2306749fcb..00c7322331 100644 --- a/megatron/model/__init__.py +++ b/megatron/model/__init__.py @@ -1,11 +1,16 @@ # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +import torch +import intel_extension_for_pytorch from deepspeed.accelerator.real_accelerator import get_accelerator if get_accelerator().device_name() == 'cuda': from .fused_layer_norm import MixedFusedLayerNorm as LayerNorm from apex.normalization import MixedFusedRMSNorm as RMSNorm else: - from .rmsnorm import RMSNorm + if hasattr(torch.xpu, "IpexRmsNorm"): + from .fused_rmsnorm import RMSNorm + else: + from .rmsnorm import RMSNorm from torch.nn import LayerNorm from .distributed import DistributedDataParallel diff --git a/megatron/model/fused_rmsnorm.py b/megatron/model/fused_rmsnorm.py new file mode 100644 index 0000000000..a3b9927c87 --- /dev/null +++ b/megatron/model/fused_rmsnorm.py @@ -0,0 +1,14 @@ +import torch +from torch.nn.parameter import Parameter +import intel_extension_for_pytorch as ipex # noqa + +# Taken from facebookresearch/llama +class RMSNorm(torch.nn.Module): + def __init__(self, dim: int, eps: float = 1e-6): + super().__init__() + self.eps = eps + self.weight = Parameter(torch.ones(dim)) + + def forward(self, x): + output = torch.xpu.IpexRmsNorm(x, self.weight.shape, self.weight, self.eps) + return output From 61350c55478fba29ecf40940a629a3e7ce008a05 Mon Sep 17 00:00:00 2001 From: Jinghan Yao Date: Wed, 7 Aug 2024 16:11:57 -0700 Subject: [PATCH 30/92] pass batch_dim_idx to deepspeed sequence parallel distributed attention for supporting batch size larger than 1 (#433) Co-authored-by: Jinghan Yao --- megatron/model/transformer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/megatron/model/transformer.py b/megatron/model/transformer.py index 7467190582..036c11566a 100644 --- a/megatron/model/transformer.py +++ b/megatron/model/transformer.py @@ -817,12 +817,14 @@ def forward(self, hidden_states, attention_mask, # value_layer = apply_rotary_pos_emb(value_layer, k_pos_emb) if self.enable_ds_sequence_parallel: + batch_dim_idx = 1 if self.use_flash_attn: if not self.use_flash_attn_triton: query_layer, key_layer, value_layer = [rearrange(x, 's b ... -> b s ...').contiguous() for x in (query_layer, key_layer, value_layer)] + batch_dim_idx = 0 - context_layer = self.dist_attn(query_layer, key_layer, value_layer) + context_layer = self.dist_attn(query_layer, key_layer, value_layer, batch_dim_idx) if not self.use_flash_attn_triton: context_layer = rearrange(context_layer, 'b s h d -> s b (h d)').contiguous() From f132876c41915483d07e6424addc9b5e4857ec4c Mon Sep 17 00:00:00 2001 From: billishyahao Date: Sat, 10 Aug 2024 23:45:27 +0800 Subject: [PATCH 31/92] [LLaMa] Adding support converting checkpoint from mds to hf (#432) * add support converting checkpoint from hf to mds * Fix PP issue * update --- .../finetune_hf_llama/ds_config.json | 8 +- .../finetune_hf_llama/finetune_llama.sh | 23 +- megatron/global_vars.py | 1 + tools/hf2megads_weight_converter.py | 306 +++++++++++++++--- 4 files changed, 280 insertions(+), 58 deletions(-) diff --git a/examples_deepspeed/finetune_hf_llama/ds_config.json b/examples_deepspeed/finetune_hf_llama/ds_config.json index 9c0b332473..85f439ce47 100755 --- a/examples_deepspeed/finetune_hf_llama/ds_config.json +++ b/examples_deepspeed/finetune_hf_llama/ds_config.json @@ -1,11 +1,5 @@ { "train_batch_size" : 256, "train_micro_batch_size_per_gpu": 16, - "steps_per_print": 100, - "zero_optimization": { - "stage": 0 - }, - "bf16": { - "enabled": true - } + "steps_per_print": 1 } diff --git a/examples_deepspeed/finetune_hf_llama/finetune_llama.sh b/examples_deepspeed/finetune_hf_llama/finetune_llama.sh index c48ea11b93..ab8bfdf419 100644 --- a/examples_deepspeed/finetune_hf_llama/finetune_llama.sh +++ b/examples_deepspeed/finetune_hf_llama/finetune_llama.sh @@ -1,8 +1,8 @@ DS_CONFIG=./examples_deepspeed/finetune_hf_llama/ds_config.json -DATASET_PATH=./alpaca_data.json +DATASET_PATH=./examples_deepspeed/finetune_hf_llama/alpaca_data.json # dataset link: https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json -HF_LLAMA_PATH=/data/llama-7b/ +HF_LLAMA_PATH=/data/llama-2-7b-hf/ # weights link: https://huggingface.co/huggyllama/llama-7b MICRO_BATCH_SIZE=16 @@ -44,11 +44,20 @@ cat < $DS_CONFIG EOT -covert_args="deepspeed tools/hf2megads_weight_converter.py \ +covert_hf2mds_args="deepspeed tools/hf2megads_weight_converter.py \ --hf-ckpt-num-shards 2 \ ---origin-hf-ckpt-dir $HF_LLAMA_PATH \ +--hf-ckpt-dir $HF_LLAMA_PATH \ +--load-mode auto \ --save $MEGA_DS_LLAMA_PATH" +covert_mds2hf_args="deepspeed tools/hf2megads_weight_converter.py \ +--hf-ckpt-num-shards 2 \ +--hf-ckpt-dir $HF_LLAMA_PATH \ +--load-mode auto \ +--to-hf-ckpt \ +--load $MEGA_DS_LLAMA_PATH \ +--save $HF_LLAMA_PATH'-hf-out' " + finetune_args="deepspeed finetune_llama.py \ --load $MEGA_DS_LLAMA_PATH" @@ -98,8 +107,10 @@ comm_args="--tensor-model-parallel-size $TP \ --no-gradient-accumulation-fusion \ --repeated-dataloader" -if [ "$1" = "convert" ]; then - task_args="$covert_args" +if [ "$1" = "convert_hf2mds" ]; then + task_args="$covert_hf2mds_args" +elif [ "$1" = "convert_mds2hf" ]; then + task_args="$covert_mds2hf_args" else task_args="$finetune_args" fi diff --git a/megatron/global_vars.py b/megatron/global_vars.py index 3f9d6fd66b..ccd0a4e21e 100644 --- a/megatron/global_vars.py +++ b/megatron/global_vars.py @@ -175,6 +175,7 @@ def _set_wandb_writer(args): 'project or experiment name provided, ' 'therefore WANDB logs will be written ' 'according to random generated project or experiment name.', flush=True) + return try: import wandb diff --git a/tools/hf2megads_weight_converter.py b/tools/hf2megads_weight_converter.py index bfbde1fd05..12468963c5 100755 --- a/tools/hf2megads_weight_converter.py +++ b/tools/hf2megads_weight_converter.py @@ -3,9 +3,11 @@ import sys import os sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import torch.distributed from torch.nn.parallel.distributed import DistributedDataParallel as torchDDP from megatron import print_rank_0, get_tokenizer, get_args from megatron.core import mpu +from megatron.core import tensor_parallel from megatron.core.utils import divide from megatron.model import GPTModelPipe, Float16Module from megatron.utils import unwrap_model @@ -13,20 +15,30 @@ from megatron.arguments import core_transformer_config_from_args from megatron.initialize import initialize_megatron from megatron.optimizer import get_megatron_optimizer -from megatron.checkpointing import save_checkpoint +from megatron.checkpointing import save_checkpoint, load_checkpoint from megatron.training import get_optimizer_param_scheduler from deepspeed.runtime.utils import see_memory_usage import deepspeed +import copy +from pathlib import Path + def add_extra_args(parser): """Text generation arguments.""" group = parser.add_argument_group(title='hf2mega') - group.add_argument("--hf-ckpt-num-shards", type=int, help='num of llama ckpt.') - group.add_argument("--origin-hf-ckpt-dir", + group.add_argument("--hf-ckpt-dir", type=str, default="", - help="the original path of the llama-hf ckpt") + help="the llama-hf ckpt") + group.add_argument("--hf-ckpt-num-shards", type=int, default=-1, help='num of llama ckpt.') + group.add_argument("--load-mode", type=str, + default=None, + choices=['torchbin', 'safetensor', 'auto'], + help="load ckpt format: pytorch.bin or model.safetensor or auto.") + group.add_argument("--to-hf-ckpt", action="store_true", + help="by default convert from hf to megads" + "if set, convert reversely from megads to hf ckpt.") return parser @@ -55,6 +67,49 @@ def load_and_print_hf_weight(hf_ckpt_dir, hf_ckpt_num_of_shards): return loaded +def load_and_print_hf_weight_from_safetensor(hf_ckpt_dir, hf_ckpt_num_of_shards): + from safetensors import safe_open + # Optimization point: We can selectively load specific 'shared' data to reduce CPU memory usage. + hf_model = {} + print_rank_0( + f"----------------------------hf weight list----------------------------") + + for wid in range(1, hf_ckpt_num_of_shards + 1): + if hf_ckpt_num_of_shards == 1: + ckpt_path = f"{hf_ckpt_dir}/model.safetensors" + else: + ckpt_path = f"{hf_ckpt_dir}/model-{wid:05d}-of-{hf_ckpt_num_of_shards:05d}.safetensors" + + with safe_open(ckpt_path, framework="pt", device="cpu") as f: + for k in f.keys(): + print_rank_0(f"name: {k}, shape: {f.get_tensor(k).shape}") + assert k not in hf_model + hf_model[k] = f.get_tensor(k).clone() + + return hf_model + + +def load_and_print_hf_weight_auto(hf_ckpt_dir, no_init=True): + from transformers import AutoConfig, AutoModelForCausalLM + from transformers.modeling_utils import no_init_weights + + if no_init: + hf_config = AutoConfig.from_pretrained(hf_ckpt_dir, trust_remote_code=True) + with no_init_weights(): + hf_model = AutoModelForCausalLM.from_config(hf_config, trust_remote_code=True, torch_dtype=torch.bfloat16) + else: + hf_model = {} + hf_auto_model = AutoModelForCausalLM.from_pretrained(hf_ckpt_dir, trust_remote_code=True, torch_dtype=torch.bfloat16) + print_rank_0( + f"----------------------------hf weight list----------------------------") + + for name, param in hf_auto_model.named_parameters(): + hf_model[name] = param.clone() + print_rank_0(name) + + return hf_model + + def print_distinct_weights(model): print_rank_0( f"----------------------------mega-ds weight list----------------------------") @@ -70,16 +125,19 @@ def print_distinct_weights(model): class refactor: - def __init__(self, model, loaded, args, config): + def __init__(self, ds_model, hf_model, args, config): tokenizer = get_tokenizer() # align layer number - self.model = model - self.loaded = loaded + self.ds_model = ds_model + self.hf_model = hf_model + self.hf_dict = {} # for handling pp case when converting mds => hf self.config = config self.offset_num = 2 self.mega_emb_wnum = 1 self.mega_norm_wnum = args.num_layers + 2 + self.num_attention_heads = args.num_attention_heads + self.num_key_value_heads = args.num_key_value_heads self.mega_lm_head_wnum = self.mega_norm_wnum + 1 self.token_vocab = tokenizer.vocab_size self.padded_vocab_size = args.padded_vocab_size @@ -95,7 +153,7 @@ def _embedding_refactor(self, pname, p): hf_name = "lm_head.weight" elif pname == f"{self.mega_emb_wnum}.word_embeddings.weight": hf_name = "model.embed_tokens.weight" - hf_w = self.loaded[hf_name] + hf_w = self.hf_model[hf_name] assert hf_w.shape[0] == self.token_vocab per_partition_vocab_size, start_index, end_index = compute_partition_range( self.padded_vocab_size, self.tp_rank, self.tp_size) @@ -112,24 +170,28 @@ def _embedding_refactor(self, pname, p): ) return new_w + + + def _direct_refactor(self, pname, p, hf_layer=None, subname=None): if pname == f"{self.mega_norm_wnum}.weight": hf_name = "model.norm.weight" elif subname in ["input_layernorm.weight", "post_attention_layernorm.weight"]: hf_name = f"model.layers.{hf_layer}.{subname}" - new_w = hf_w = self.loaded[hf_name] + new_w = hf_w = self.hf_model[hf_name] self.record_mapping_info( f"mega-ds:{pname,p.data.shape}<--hf{hf_name,} {hf_w.shape}") return new_w + def _qkv_refactor(self, pname, p, hf_layer): hf_wq_name = f"model.layers.{hf_layer}.self_attn.q_proj.weight" hf_wk_name = f"model.layers.{hf_layer}.self_attn.k_proj.weight" hf_wv_name = f"model.layers.{hf_layer}.self_attn.v_proj.weight" - wq = self.loaded[hf_wq_name] - wk = self.loaded[hf_wk_name] - wv = self.loaded[hf_wv_name] + wq = self.hf_model[hf_wq_name] + wk = self.hf_model[hf_wk_name] + wv = self.hf_model[hf_wv_name] hidden_size = wq.shape[0] per_partition_size, start_index, end_index = compute_partition_range( @@ -159,8 +221,8 @@ def _qkv_refactor(self, pname, p, hf_layer): def _mlphto4h_dense_refactor(self, pname, p, hf_layer): hf_w_gate_name = f"model.layers.{hf_layer}.mlp.gate_proj.weight" hf_w_up_name = f"model.layers.{hf_layer}.mlp.up_proj.weight" - w_gate = self.loaded[hf_w_gate_name] - w_up = self.loaded[hf_w_up_name] + w_gate = self.hf_model[hf_w_gate_name] + w_up = self.hf_model[hf_w_up_name] hidden_size = w_gate.shape[0] per_partition_size, start_index, end_index = compute_partition_range( @@ -184,7 +246,7 @@ def _attn_dense_refactor(self, pname, p, hf_layer, subname): else: hf_name = f"model.layers.{hf_layer}.mlp.down_proj.weight" - hf_w = self.loaded[hf_name] + hf_w = self.hf_model[hf_name] hidden_size = hf_w.shape[1] per_partition_size, start_index, end_index = compute_partition_range( hidden_size, self.tp_rank, self.tp_size) @@ -200,7 +262,7 @@ def _mlphto4h1_refactor(self, pname, p, hf_layer, subname): hf_name = f"model.layers.{hf_layer}.mlp.gate_proj.weight" else: hf_name = f"model.layers.{hf_layer}.mlp.up_proj.weight" - hf_w = self.loaded[hf_name] + hf_w = self.hf_model[hf_name] hidden_size = hf_w.shape[0] per_partition_size, start_index, end_index = compute_partition_range( hidden_size, self.tp_rank, self.tp_size) @@ -212,10 +274,11 @@ def _mlphto4h1_refactor(self, pname, p, hf_layer, subname): ) return new_w - def refactor(self): + def transform_from_hf_to_megds(self): assert self.is_refactored == False new_w = None - for pname, p in self.model.named_parameters(): + for pname, p in self.ds_model.named_parameters(): + if pname in [ f"{self.mega_emb_wnum}.word_embeddings.weight", f"{self.mega_lm_head_wnum}.lm_head.weight" @@ -253,6 +316,123 @@ def refactor(self): new_w = None self.is_refactored = True + + def _embedding_refactor_to_hf(self, pname, ds_w): + if pname == f"{self.mega_lm_head_wnum}.lm_head.weight": + hf_w = self.hf_model.lm_head.weight + hf_w_name = "lm_head.weight" + elif pname == f"{self.mega_emb_wnum}.word_embeddings.weight": + hf_w = self.hf_model.model.embed_tokens.weight + hf_w_name = "model.embed_tokens.weight" + + with torch.no_grad(): + ds_w_all_rank = tensor_parallel.mappings._gather_along_first_dim(ds_w) + + self.hf_dict[hf_w_name] = copy.deepcopy(ds_w_all_rank[:hf_w.shape[0], :]) + + def _direct_refactor_to_hf(self, pname, ds_w, hf_layer=None, subname=None): + if pname in [f"{self.mega_norm_wnum}.weight"]: + hf_w = self.hf_model.model.norm.weight + hf_w_name = "model.norm.weight" + elif subname in ["input_layernorm.weight"]: + hf_w = self.hf_model.model.layers[hf_layer].input_layernorm.weight + hf_w_name = f"model.layers.{hf_layer}.input_layernorm.weight" + elif subname in ["post_attention_layernorm.weight"]: + hf_w = self.hf_model.model.layers[hf_layer].post_attention_layernorm.weight + hf_w_name = f"model.layers.{hf_layer}.post_attention_layernorm.weight" + + self.hf_dict[hf_w_name] = copy.deepcopy(ds_w) + + def _attn_dense_refactor_to_hf(self, pname, ds_w, hf_layer, subname): + if subname == "self_attention.dense.weight": + hf_w = self.hf_model.model.layers[hf_layer].self_attn.o_proj.weight + hf_w_name = f"model.layers.{hf_layer}.self_attn.o_proj.weight" + elif subname == "mlp.dense_4h_to_h.weight": + hf_w = self.hf_model.model.layers[hf_layer].mlp.down_proj.weight + hf_w_name = f"model.layers.{hf_layer}.mlp.down_proj.weight" + + with torch.no_grad(): + ds_w_all_rank = tensor_parallel.mappings._gather_along_last_dim(ds_w) + + self.hf_dict[hf_w_name] = copy.deepcopy(ds_w_all_rank) + + def _mlphto4h_dense_refactor_to_hf(self, pname, ds_w, hf_layer): + hf_g_name = f"model.layers.{hf_layer}.mlp.gate_proj.weight" + hf_u_name = f"model.layers.{hf_layer}.mlp.up_proj.weight" + + with torch.no_grad(): + ds_w_all_rank = tensor_parallel.mappings._gather_along_first_dim(ds_w) + + ds_w_shape = ds_w_all_rank.shape + ds_w_all_rank = ds_w_all_rank.reshape(self.tp_size, 2, -1, ds_w_shape[-1]) + self.hf_dict[hf_g_name] = copy.deepcopy(ds_w_all_rank[:, 0, :, :].reshape(-1, ds_w_shape[-1])) + self.hf_dict[hf_u_name] = copy.deepcopy(ds_w_all_rank[:, 1, :, :].reshape(-1, ds_w_shape[-1])) + + + def _qkv_refactor_to_hf(self, pname, ds_w, hf_layer): + with torch.no_grad(): + ds_w_all_rank = tensor_parallel.mappings._gather_along_first_dim(ds_w) + + hf_q = self.hf_model.model.layers[hf_layer].self_attn.q_proj.weight + hf_k = self.hf_model.model.layers[hf_layer].self_attn.k_proj.weight + hf_v = self.hf_model.model.layers[hf_layer].self_attn.v_proj.weight + hf_q_name = f"model.layers.{hf_layer}.self_attn.q_proj.weight" + hf_k_name = f"model.layers.{hf_layer}.self_attn.k_proj.weight" + hf_v_name = f"model.layers.{hf_layer}.self_attn.v_proj.weight" + oldshape = hf_q.shape + hidden_size = oldshape[-1] + hidden_size_per_attention_head = divide(hidden_size, + self.config.num_attention_heads) + num_attention_heads_per_partition = divide(self.config.num_attention_heads, + self.tp_size) + newshape = (self.tp_size, num_attention_heads_per_partition, 3, hidden_size_per_attention_head, hidden_size) + ds_w_out = ds_w_all_rank.reshape(*newshape) + self.hf_dict[hf_q_name] = copy.deepcopy(ds_w_out[:, :, 0, :, :].reshape(-1, oldshape[-1])) + self.hf_dict[hf_k_name] = copy.deepcopy(ds_w_out[:, :, 1, :, :].reshape(-1, oldshape[-1])) + self.hf_dict[hf_v_name] = copy.deepcopy(ds_w_out[:, :, 2, :, :].reshape(-1, oldshape[-1])) + + + def transform_from_megads_to_hf(self): + use_gqa = True if self.num_attention_heads != self.num_key_value_heads else False + + for pname, p in self.ds_model.named_parameters(): + if pname in [ + f"{self.mega_emb_wnum}.word_embeddings.weight", + f"{self.mega_lm_head_wnum}.lm_head.weight", + ]: + self._embedding_refactor_to_hf(pname, p) + elif pname in [ + f"{self.mega_norm_wnum}.weight", + ]: + self._direct_refactor_to_hf(pname, p) + else: + mobj = self.decoder_pat.match(pname) + layer_num = int(mobj.group(1)) + subname = mobj.group(2) + hf_layer = layer_num - self.offset_num + if subname in ["self_attention.query_key_value.weight"]: + if not use_gqa: + self._qkv_refactor_to_hf(pname, p, hf_layer) + else: + #TODO(billishyahao): Not impl yet ... + assert False + elif subname in ["mlp.dense_h_to_4h.weight"]: + self._mlphto4h_dense_refactor_to_hf(pname, p, hf_layer) + elif subname in [ + "self_attention.dense.weight", + "mlp.dense_4h_to_h.weight" + ]: + self._attn_dense_refactor_to_hf(pname, p, hf_layer, subname) + elif subname in [ + "input_layernorm.weight", + "post_attention_layernorm.weight", + ]: + self._direct_refactor_to_hf(pname, p, hf_layer, subname) + else: + print(f"Unrecognized weight type: {pname}") + raise ValueError(f"Unrecognized weight type: {pname}") + self.is_refactored = True + def record_mapping_info(self, record_msg): self.refactor_weight_list.append(record_msg) @@ -272,7 +452,18 @@ def inorder_show_record(self): torch.distributed.barrier() -def convert_hf_to_mega_ds(): +def load_hf_weights(args, no_init): + if args.load_mode == 'torchbin': + assert no_init == False, "only work with init" + return load_and_print_hf_weight(args.hf_ckpt_dir, args.hf_ckpt_num_shards) + elif args.load_mode == 'safetensor': + assert no_init == False, "only work with init" + return load_and_print_hf_weight_from_safetensor(args.hf_ckpt_dir, args.hf_ckpt_num_shards) + elif args.load_mode == 'auto': + return load_and_print_hf_weight_auto(args.hf_ckpt_dir, no_init) + + +def convert_ckpt(): """Build the model.""" args = get_args() print_rank_0(f'building model ...') @@ -286,49 +477,74 @@ def convert_hf_to_mega_ds(): enabled=args.zero_stage == 3, mpu=mpu): if args.deepspeed and not args.no_pipeline_parallel: - model = GPTModelPipe(config, num_tokentypes=0, parallel_output=True) + ds_model = GPTModelPipe(config, num_tokentypes=0, parallel_output=True) else: raise NotImplementedError("Not implemented") see_memory_usage(f"After Building Model", force=True) if torch.distributed.get_rank() < 2: - print(f"{torch.distributed.get_rank()} {model}") - - # load and initialize HF weight dict - # print hf weights list & mega-ds weights list - hf_ckpt_dir = args.origin_hf_ckpt_dir - hf_ckpt_num_of_shards = args.hf_ckpt_num_shards - loaded = load_and_print_hf_weight(hf_ckpt_dir, hf_ckpt_num_of_shards) - print_distinct_weights(model) - - # refactor weight from hf to mega-ds - - cur_refactor = refactor(model, loaded, args, config) - cur_refactor.refactor() - cur_refactor.inorder_show_record() + print(f"{torch.distributed.get_rank()} {ds_model}") - del loaded + # 'torchbin', 'safetensor', 'auto' + hf_model = load_hf_weights(args, no_init=args.to_hf_ckpt) - unwrapped_model = unwrap_model([model], (torchDDP, LocalDDP, Float16Module)) - optimizer = get_megatron_optimizer(unwrapped_model) - opt_param_scheduler = get_optimizer_param_scheduler(optimizer) + # print_distinct_weights(hf_model) #init model and save print_rank_0(f"before deepspeed init") ds_engine, _, _, _ = deepspeed.initialize( - model=model, - optimizer=optimizer, + model=ds_model, + optimizer=None, args=args, - lr_scheduler=opt_param_scheduler, + lr_scheduler=None, mpu=mpu if args.no_pipeline_parallel else None) print_rank_0(f"after deepspeed init") - print_rank_0(f"mega-ds checkpoint will be saved in {args.save}") - save_checkpoint(0, [ds_engine], optimizer, opt_param_scheduler) - print_rank_0(f"save checkpoint completed") + if args.to_hf_ckpt: + load_checkpoint([ds_engine], None, None, load_only_weights=True) + print_rank_0(f"completed to load deepspeed actual checkpoint") + + # refactor weight from hf to mega-ds and vice versa + + cur_refactor = refactor(ds_model, hf_model, args, config) + if args.to_hf_ckpt: + cur_refactor.transform_from_megads_to_hf() + else: + cur_refactor.transform_from_hf_to_megds() + # cur_refactor.inorder_show_record() + + if args.to_hf_ckpt: + save_path = args.save + if not os.path.exists(save_path): + Path(save_path).mkdir(parents=True, exist_ok=True) + ckpt_per_pp_path = os.path.join(save_path, f"model_pp{mpu.get_pipeline_model_parallel_rank()}.pt") + torch.save(cur_refactor.hf_dict, ckpt_per_pp_path) + if torch.distributed.is_initialized(): + torch.distributed.barrier() + + print_rank_0(f"hf checkpoint will be saved in {save_path}/release ") + if mpu.is_pipeline_last_stage(): + ## doing checkpoint merging and saving... + # hf_model.tie_weights() + + all_wei = {} + for pprank in range(mpu.get_pipeline_model_parallel_world_size()): + ckpt_per_pp_path = os.path.join(save_path, f"model_pp{pprank}.pt") + partial_wei = torch.load(ckpt_per_pp_path) + all_wei = all_wei | partial_wei + + hf_model.load_state_dict(all_wei) + + # mega-ds checkpoint will be saved in args.save + hf_model.save_pretrained(os.path.join(save_path, "release"), safe_serialization=True) + else: + print_rank_0(f"mega-ds checkpoint will be saved in {args.save}") + save_checkpoint(0, [ds_engine], None, None) + + print_rank_0(f"save checkpoint completed") if __name__ == "__main__": initialize_megatron(extra_args_provider=add_extra_args) - convert_hf_to_mega_ds() + convert_ckpt() From cdf51944deecad75572ebfd7cddfb0abfd1b9987 Mon Sep 17 00:00:00 2001 From: YiSheng5 Date: Thu, 15 Aug 2024 04:04:01 +0800 Subject: [PATCH 32/92] add device check when import ipex (#436) --- megatron/model/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/megatron/model/__init__.py b/megatron/model/__init__.py index 00c7322331..64158ef99a 100644 --- a/megatron/model/__init__.py +++ b/megatron/model/__init__.py @@ -1,8 +1,10 @@ # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. import torch -import intel_extension_for_pytorch from deepspeed.accelerator.real_accelerator import get_accelerator + +if get_accelerator().device_name() == 'xpu': + import intel_extension_for_pytorch if get_accelerator().device_name() == 'cuda': from .fused_layer_norm import MixedFusedLayerNorm as LayerNorm from apex.normalization import MixedFusedRMSNorm as RMSNorm From b7b2d5ef330f43729b406630e6c5d38e873d7398 Mon Sep 17 00:00:00 2001 From: Polisetty V R K Jyothendra Varma Date: Mon, 19 Aug 2024 19:05:27 +0530 Subject: [PATCH 33/92] fix TFLOPs calculation (#371) * fix TFLOPs calculation when GQA used, we observe right TFLOPs after this fix. when GQA is not used, huge difference in TFLOPs is solved with selective recompute . some other minor difference will also be observed as logits macs also added. * add copyrights --- megatron/utils.py | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/megatron/utils.py b/megatron/utils.py index cbb7aa6426..15160b1644 100644 --- a/megatron/utils.py +++ b/megatron/utils.py @@ -275,22 +275,38 @@ def throughput_calculator(model, args, iteration_time, total_iterations): #flops calculator hidden_size = args.hidden_size + num_attention_heads = args.num_attention_heads + head_dim = hidden_size // num_attention_heads + ffn_hidden_size = args.ffn_hidden_size num_layers = args.num_layers vocab_size = args.padded_vocab_size + gqa = args.num_attention_heads // args.num_key_value_heads + ffn_multiplier = 3 if args.swiglu else 2 + macs_per_flops = 2 # General TFLOPs formula (borrowed from Equation 3 in Section 5.1 of # https://arxiv.org/pdf/2104.04473.pdf). - # The factor of 4 is when used with activation check-pointing, - # otherwise it will be 3. - checkpoint_activations_factor = 3 - if hasattr(args, 'checkpoint_activations') and args.checkpoint_activations: - checkpoint_activations_factor = 4 - if hasattr(args, 'recompute_granularity') and (args.recompute_granularity == 'selective' or args.recompute_granularity == 'full'): - checkpoint_activations_factor = 4 + # correction has been made to TFLOPs formula due to incorrect behavior + # observed with selective recompute when GQA not used and for all with GQA seq_len = args.seq_length if hasattr(args, 'actual_seq_length'): seq_len = args.actual_seq_length - flops_per_iteration = (24 * checkpoint_activations_factor * batch_size * seq_len * num_layers * (hidden_size**2)) * (1. + (seq_len / (6. * hidden_size)) + (vocab_size / (16. * num_layers * hidden_size))) + + pre_and_post_mha_gemm_macs = batch_size * num_layers * (1 + (2 // gqa) + 1) * (hidden_size**2) * seq_len + mha_bgemm_macs = batch_size * num_layers * 2 * head_dim * num_attention_heads * (seq_len**2) + ffn_gemm_macs = batch_size * num_layers * ffn_multiplier * ffn_hidden_size * hidden_size * seq_len + logit_lmhead_gemm_macs = batch_size * vocab_size * hidden_size * seq_len + + fwd_macs = pre_and_post_mha_gemm_macs + mha_bgemm_macs + ffn_gemm_macs + logit_lmhead_gemm_macs + bwd_macs = 2 * fwd_macs + fwd_bwd_macs = fwd_macs + bwd_macs + + if (hasattr(args, 'checkpoint_activations') and args.checkpoint_activations) or (hasattr(args, 'recompute_granularity') and args.recompute_granularity == 'full'): + fwd_bwd_macs += fwd_macs + if hasattr(args, 'recompute_granularity') and args.recompute_granularity == 'selective': + fwd_bwd_macs += mha_bgemm_macs + + flops_per_iteration = fwd_bwd_macs * macs_per_flops tflops = flops_per_iteration / (elapsed_time_per_iter * args.world_size * (10**12)) return samples_per_second, tflops, approx_parameters_in_billions From 4f9f1f63775daaa4c29de780a8319cd152ed0cea Mon Sep 17 00:00:00 2001 From: YiSheng5 Date: Sat, 24 Aug 2024 22:53:40 +0800 Subject: [PATCH 34/92] fix nan issue when running megatron-deepspeed (#434) --- megatron/training.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/megatron/training.py b/megatron/training.py index 6ba26f3944..e7d7aed809 100644 --- a/megatron/training.py +++ b/megatron/training.py @@ -672,7 +672,6 @@ def train_step(forward_step_func, data_iterator, timers = get_timers() if args.deepspeed and args.ds_pipeline_enabled: - skipped_iter = 0 num_zeros_in_grad = 0 assert isinstance(model[0], deepspeed.PipelineEngine) loss = model[0].train_batch(data_iter=data_iterator) @@ -682,6 +681,8 @@ def train_step(forward_step_func, data_iterator, if additional_losses is not None: loss_dict.update(additional_losses) grad_norm = model[0].get_global_grad_norm() + update_successful = model[0].was_step_applied() + skipped_iter = 0 if update_successful else 1 return loss_dict, skipped_iter, grad_norm, num_zeros_in_grad # Set grad to zero. @@ -760,7 +761,7 @@ def train_step(forward_step_func, data_iterator, # Update learning rate. if args.deepspeed: - skipped_iter = 0 + skipped_iter = 0 if update_successful else 1 grad_norm = None num_zeros_in_grad = None From 8e9d97337a8533aef7b2f3f72dbd4248fdd1d4d2 Mon Sep 17 00:00:00 2001 From: YiSheng5 Date: Mon, 26 Aug 2024 23:49:19 +0800 Subject: [PATCH 35/92] enable empty cache on XPU device (#438) --- megatron/training.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/megatron/training.py b/megatron/training.py index e7d7aed809..79f39ccc2e 100644 --- a/megatron/training.py +++ b/megatron/training.py @@ -725,7 +725,7 @@ def train_step(forward_step_func, data_iterator, # Empty unused memory. if args.empty_unused_memory_level >= 1: - torch.cuda.empty_cache() + get_accelerator().empty_cache() # Reduce gradients. if not args.deepspeed: @@ -782,7 +782,7 @@ def train_step(forward_step_func, data_iterator, # Empty unused memory. if args.empty_unused_memory_level >= 2: - torch.cuda.empty_cache() + get_accelerator().empty_cache() if mpu.is_pipeline_last_stage(ignore_virtual=True): # Average loss across microbatches. @@ -1438,7 +1438,7 @@ def evaluate(forward_step_func, # Empty unused memory if args.empty_unused_memory_level >= 1: - torch.cuda.empty_cache() + get_accelerator().empty_cache() if mpu.is_pipeline_last_stage(ignore_virtual=True): # Reduce across processes. From 543543acae44d15f4d3f83936a34c1dbd1e32a7b Mon Sep 17 00:00:00 2001 From: billishyahao Date: Wed, 28 Aug 2024 01:21:26 +0800 Subject: [PATCH 36/92] [wandb] disable wandb more gracefully (#422) Co-authored-by: Logan Adams <114770087+loadams@users.noreply.github.com> --- megatron/global_vars.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/megatron/global_vars.py b/megatron/global_vars.py index ccd0a4e21e..ec6abdbda8 100644 --- a/megatron/global_vars.py +++ b/megatron/global_vars.py @@ -165,15 +165,13 @@ def _set_wandb_writer(args): global _GLOBAL_WANDB_WRITER _ensure_var_is_not_initialized(_GLOBAL_WANDB_WRITER, 'wandb writer') - getattr(args, 'wandb_project', '') - getattr(args, 'wandb_exp_name', '') if args.rank == (args.world_size - 1): - if args.wandb_project == '' or \ - args.wandb_exp_name == '': + if getattr(args, 'wandb_project', '') == '' and \ + getattr(args, 'wandb_exp_name', '') == '': print('WARNING: WANDB writing requested but no legit wandb ' 'project or experiment name provided, ' - 'therefore WANDB logs will be written ' + 'therefore no WANDB logs will be written ' 'according to random generated project or experiment name.', flush=True) return From 1280f59c1a65e50d4e174e4195e14f173301a497 Mon Sep 17 00:00:00 2001 From: billishyahao Date: Wed, 28 Aug 2024 01:22:20 +0800 Subject: [PATCH 37/92] [Bug] Fix crash when logging optimizer state to tb (#417) --- megatron/training.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/megatron/training.py b/megatron/training.py index 79f39ccc2e..0aeaabeba5 100644 --- a/megatron/training.py +++ b/megatron/training.py @@ -1032,6 +1032,12 @@ def training_log(loss_dict, total_loss_dict, learning_rate, iteration, if args.log_optimizer_states_to_tensorboard and optimizer is not None: opt_stats = [0.0] * 8 opt_stats_2 = [0.0] * 4 + + #TODO(billishyahao): Remove me after bf16_optimizer promotes its state. + if not hasattr(optimizer, "state"): + assert hasattr(optimizer, "optimizer"), f"Optimizer must have optimizer property." + optimizer.state = optimizer.optimizer.state + for _, group in enumerate(optimizer.param_groups): for _, param in enumerate(group['params']): opt_stats[0] += (torch.norm(optimizer.state[param]['exp_avg_sq']).item())**2 From ea0c3c762b2c1fef35898d8952a6129a31f1645d Mon Sep 17 00:00:00 2001 From: Huihuo Zheng Date: Fri, 30 Aug 2024 15:09:11 -0500 Subject: [PATCH 38/92] fixed dftracer compatibility --- megatron/utils.py | 73 ++++++++++++++++++++++++++++------------------- 1 file changed, 44 insertions(+), 29 deletions(-) diff --git a/megatron/utils.py b/megatron/utils.py index 7a1d60bab9..19eef70399 100644 --- a/megatron/utils.py +++ b/megatron/utils.py @@ -36,12 +36,22 @@ _DLIO_PROFILER_EXIST = True +_DFTRACER_EXIST=True + +try: + import dftracer +except: + _DFTRACER_EXIST=True + try: import dlio_profiler except: _DLIO_PROFILER_EXIST = False -if _DLIO_PROFILER_EXIST: + +if _DFTRACER_EXIST: + from dftracer.logger import dftracer as PerfTrace, dft_fn as Profile, DFTRACER_ENABLE as DFTRACER_ENABLE +elif _DLIO_PROFILER_EXIST: from dlio_profiler.logger import fn_interceptor as Profile from dlio_profiler.logger import dlio_logger as PerfTrace else: @@ -50,40 +60,45 @@ # Profile: ContextManager = nullcontext # # class Profile(nullable_schema) - - class Profile: - def __init__(self, type="PROFILER"): - self._start = time.perf_counter() - self.type = type - - def log(self, func): + class Profile(object): + def __init__(self, cat, name=None, epoch=None, step=None, image_idx=None, image_size=None): + return + def log(self, func): + return func + def log_init(self, func): + return func + def iter(self, func, iter_name="step"): return func - - def iter(self, a): - return a - def __enter__(self): - self._start = time.perf_counter() - - def __exit__(self, *args, **kwargs): - dt = time.perf_counter() - self._start - log.info(f"{self.type} took: {dt:.6f}s") - - - class dlio_logger: - def __init__( - self, - ): + return + def __exit__(self, type, value, traceback): + return + def update(self, epoch=None, step=None, image_idx=None, image_size=None, args={}): + return + def flush(self): + return + def reset(self): + return + def log_static(self, func): + return + class dftracer(object): + def __init__(self,): self.type = None - def initialize_log(self, logfile=None, data_dir=None, process_id=-1): return + def get_time(self): + return + def enter_event(self): + return + def exit_event(self): + return + def log_event(self, name, cat, start_time, duration, string_args=None): + return + def finalize(self): + return - def iter(self, a): - return a - - PerfTrace = dlio_logger() - + PerfTrace = dftracer() + DFTRACER_ENABLE = False def get_logger( name: str, From a0ac7503d99560ba2f968c816812fabf81b3608a Mon Sep 17 00:00:00 2001 From: Huihuo Zheng Date: Tue, 3 Sep 2024 14:55:24 -0500 Subject: [PATCH 39/92] added requirements.txt --- requirements.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000..4f758bed7d --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +pydftracer +wandb +git+https://github.com/saforem2/ezpz@main From 0d6e3793a1fc06eded9764ef15ad12bcc0281101 Mon Sep 17 00:00:00 2001 From: Polisetty V R K Jyothendra Varma Date: Wed, 4 Sep 2024 11:12:52 +0530 Subject: [PATCH 40/92] Enable Sequence Parallelism (#429) --- megatron/arguments.py | 3 ++- megatron/core/tensor_parallel/layers.py | 4 +++- megatron/model/fused_layer_norm.py | 14 ++++++++++++-- megatron/model/gpt_model.py | 8 ++++++-- megatron/model/language_model.py | 5 +++-- megatron/model/rmsnorm.py | 17 +++++++++++++++-- megatron/model/transformer.py | 13 +++++++++---- 7 files changed, 50 insertions(+), 14 deletions(-) diff --git a/megatron/arguments.py b/megatron/arguments.py index 49b3d8e4c6..9228da6ee9 100644 --- a/megatron/arguments.py +++ b/megatron/arguments.py @@ -399,7 +399,8 @@ def validate_args(args, defaults={}): args.async_tensor_model_parallel_allreduce = False if not args.use_dataset_only: - if os.environ.get('CUDA_DEVICE_MAX_CONNECTIONS') != "1": + if deepspeed.accelerator.get_accelerator().device_name() == "cuda" \ + and os.environ.get('CUDA_DEVICE_MAX_CONNECTIONS') != "1": if args.sequence_parallel: raise RuntimeError( "Using sequence parallelism requires setting the environment variable " diff --git a/megatron/core/tensor_parallel/layers.py b/megatron/core/tensor_parallel/layers.py index 2245113c9c..67a78853aa 100644 --- a/megatron/core/tensor_parallel/layers.py +++ b/megatron/core/tensor_parallel/layers.py @@ -1,3 +1,4 @@ +# Copyright (C) 2024 Habana Labs, Ltd. an Intel Company. # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. # Parts of the code here are adapted from PyTorch @@ -450,7 +451,8 @@ def linear_with_grad_accumulation_and_async_allreduce( ] if not linear_with_grad_accumulation_and_async_allreduce.warned: - if os.environ.get('CUDA_DEVICE_MAX_CONNECTIONS') != "1": + if get_accelerator().device_name() == "cuda" \ + and os.environ.get('CUDA_DEVICE_MAX_CONNECTIONS') != "1": if sequence_parallel: warnings.warn( "When using sequence parallelism it is recommended to set the " diff --git a/megatron/model/fused_layer_norm.py b/megatron/model/fused_layer_norm.py index 2f3b89014b..d1ef034397 100644 --- a/megatron/model/fused_layer_norm.py +++ b/megatron/model/fused_layer_norm.py @@ -1,9 +1,11 @@ +# Copyright (C) 2024 Habana Labs, Ltd. an Intel Company. # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. """This code is copied fron NVIDIA apex: https://github.com/NVIDIA/apex with some changes. """ +from deepspeed.accelerator.real_accelerator import get_accelerator import numbers import torch from torch.nn.parameter import Parameter @@ -13,6 +15,7 @@ import inspect from megatron.core.utils import make_viewless_tensor +from megatron import get_args try: from apex.contrib.layer_norm.layer_norm import FastLayerNormFN @@ -56,8 +59,15 @@ def __init__(self, normalized_shape, eps=1e-5, normalized_shape = (normalized_shape,) self.normalized_shape = torch.Size(normalized_shape) self.eps = eps - self.weight = Parameter(torch.Tensor(*normalized_shape)) - self.bias = Parameter(torch.Tensor(*normalized_shape)) + init_device = None + if get_accelerator().device_name() == 'hpu': + init_device = get_accelerator().current_device_name() + self.weight = Parameter(torch.empty(*normalized_shape, + device=init_device, + dtype=get_args().params_dtype)) + self.bias = Parameter(torch.empty(*normalized_shape, + device=init_device, + dtype=get_args().params_dtype)) self.reset_parameters() self.no_persist_layer_norm = no_persist_layer_norm self.sequence_parallel = sequence_parallel diff --git a/megatron/model/gpt_model.py b/megatron/model/gpt_model.py index 8968c96655..e5e60c43ee 100644 --- a/megatron/model/gpt_model.py +++ b/megatron/model/gpt_model.py @@ -1,3 +1,4 @@ +# Copyright (C) 2024 Habana Labs, Ltd. an Intel Company. # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. """GPT-2 model.""" @@ -393,9 +394,12 @@ def _to_float16(inputs): if args.normalization == 'layernorm': self.specs.append(LayerSpec(LayerNorm, args.hidden_size, - eps=args.layernorm_epsilon)) + eps=args.layernorm_epsilon, + sequence_parallel=args.sequence_parallel)) else: - self.specs.append(LayerSpec(RMSNorm, args.hidden_size, args.layernorm_epsilon)) + self.specs.append(LayerSpec(RMSNorm, args.hidden_size, + args.layernorm_epsilon, + sequence_parallel=args.sequence_parallel)) def _logits_helper(embedding, lm_output): """A wrapper to massage inputs/outputs from pipeline. """ diff --git a/megatron/model/language_model.py b/megatron/model/language_model.py index ec2ae1877a..3b8e4e0da1 100644 --- a/megatron/model/language_model.py +++ b/megatron/model/language_model.py @@ -1,3 +1,4 @@ +# Copyright (C) 2024 Habana Labs, Ltd. an Intel Company. # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. """Transformer based language model.""" @@ -256,8 +257,8 @@ def forward(self, input_ids, position_ids, tokentype_ids=None): # Dropout. if self.sequence_parallel: - # already partition sequence, do not need scatter_to_sequence_parallel_region - # embeddings = tensor_parallel.scatter_to_sequence_parallel_region(embeddings) + # already partition sequence, do not need scatter_to_sequence_parallel_region ? + embeddings = tensor_parallel.scatter_to_sequence_parallel_region(embeddings) with tensor_parallel.get_cuda_rng_tracker().fork(): embeddings = self.embedding_dropout(embeddings) else: diff --git a/megatron/model/rmsnorm.py b/megatron/model/rmsnorm.py index 60e8978171..4860d81716 100644 --- a/megatron/model/rmsnorm.py +++ b/megatron/model/rmsnorm.py @@ -1,4 +1,10 @@ +# Copyright (C) 2024 Habana Labs, Ltd. an Intel Company. + +from deepspeed.accelerator import get_accelerator +from megatron import get_args + import torch +from torch.nn import init from torch.nn.parameter import Parameter # Taken from facebookresearch/llama @@ -6,11 +12,18 @@ class RMSNorm(torch.nn.Module): def __init__(self, dim: int, eps: float = 1e-6): super().__init__() self.eps = eps - self.weight = Parameter(torch.ones(dim)) + init_device = None + if get_accelerator().device_name() == 'hpu': + init_device = get_accelerator().current_device_name() + self.weight = Parameter(torch.empty(dim, + device=init_device, + dtype=get_args().params_dtype)) + init.ones_(self.weight) + setattr(self.weight, 'sequence_parallel', sequence_parallel) def _norm(self, x): return x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps) def forward(self, x): output = self._norm(x.float()).type_as(x) - return output * self.weight \ No newline at end of file + return output * self.weight diff --git a/megatron/model/transformer.py b/megatron/model/transformer.py index 036c11566a..74e977103f 100644 --- a/megatron/model/transformer.py +++ b/megatron/model/transformer.py @@ -931,7 +931,8 @@ def __init__(self, config, config.hidden_size, eps=config.layernorm_epsilon) else: - self.input_layernorm = RMSNorm(config.hidden_size, config.layernorm_epsilon) + self.input_layernorm = RMSNorm(config.hidden_size, config.layernorm_epsilon, + sequence_parallel=config.sequence_parallel) # Self attention. self.self_attention = ParallelAttention( config, @@ -957,7 +958,8 @@ def __init__(self, config, config.hidden_size, eps=config.layernorm_epsilon) else: - self.post_attention_layernorm = RMSNorm(config.hidden_size, config.layernorm_epsilon) + self.post_attention_layernorm = RMSNorm(config.hidden_size, config.layernorm_epsilon, + sequence_parallel=config.sequence_parallel) # Cross attention. if self.layer_type in (LayerType.decoder, LayerType.retro_decoder, @@ -977,7 +979,9 @@ def __init__(self, config, apply_layernorm_1p=args.apply_layernorm_1p, mem_efficient_ln=args.mem_efficient_ln) else: - self.post_inter_attention_layernorm = RMSNorm(config.hidden_size, config.layernorm_epsilon) + self.post_inter_attention_layernorm = RMSNorm(config.hidden_size, + config.layernorm_epsilon, + sequence_parallel=config.sequence_parallel) # MLP self.num_experts = num_experts @@ -1780,7 +1784,8 @@ def build_layer(layer_number, n_e): config.hidden_size, eps=config.layernorm_epsilon) else: - self.final_layernorm = RMSNorm(config.hidden_size, config.layernorm_epsilon) + self.final_layernorm = RMSNorm(config.hidden_size, config.layernorm_epsilon, + sequence_parallel=config.sequence_parallel) def _get_layer(self, layer_number): return self.layers[layer_number] From de7f22f35361e0eff628cb1d61d26ae94234876b Mon Sep 17 00:00:00 2001 From: Huihuo Zheng Date: Wed, 4 Sep 2024 10:00:00 -0500 Subject: [PATCH 41/92] Update utils.py --- megatron/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/megatron/utils.py b/megatron/utils.py index 19eef70399..67dc366d50 100644 --- a/megatron/utils.py +++ b/megatron/utils.py @@ -80,7 +80,7 @@ def flush(self): def reset(self): return def log_static(self, func): - return + return func class dftracer(object): def __init__(self,): self.type = None From a1ede68a5ee9a6ce11ad1adf90b57acef4859f69 Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Wed, 11 Sep 2024 19:45:26 -0500 Subject: [PATCH 42/92] Remove duplicate `--profile` arg --- megatron/arguments.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/megatron/arguments.py b/megatron/arguments.py index 154f53e2bc..b3ed06353e 100644 --- a/megatron/arguments.py +++ b/megatron/arguments.py @@ -1012,7 +1012,7 @@ def _add_training_args(parser): dest='gradient_accumulation_fusion') group.add_argument('--use-dataset-only', type=bool, required=False, default=False, help='If set to True, only use the megatron dataset for external trainer ') - group.add_argument('--profile', action='store_true', help='Enable Torch Profiler') + # group.add_argument('--profile', action='store_true', help='Enable Torch Profiler') group.add_argument( "--train-range-to-skip", action="extend", From 6b32cff40aecd5cc13ad0d0e7414ea91560034d9 Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Wed, 11 Sep 2024 19:46:11 -0500 Subject: [PATCH 43/92] debug: `sequence_parallel` issue in `RMSNorm` ?? --- megatron/model/fused_rmsnorm.py | 4 +- megatron/model/rmsnorm.py | 15 +- megatron/model/transformer.py | 1513 +++++++++++++++++++------------ 3 files changed, 958 insertions(+), 574 deletions(-) diff --git a/megatron/model/fused_rmsnorm.py b/megatron/model/fused_rmsnorm.py index a3b9927c87..0ee372a7eb 100644 --- a/megatron/model/fused_rmsnorm.py +++ b/megatron/model/fused_rmsnorm.py @@ -2,9 +2,11 @@ from torch.nn.parameter import Parameter import intel_extension_for_pytorch as ipex # noqa + # Taken from facebookresearch/llama class RMSNorm(torch.nn.Module): - def __init__(self, dim: int, eps: float = 1e-6): + # def __init__(self, dim: int, eps: float = 1e-6): + def __init__(self, dim: int, eps: float = 1e-6, sequence_parallel: bool = False): super().__init__() self.eps = eps self.weight = Parameter(torch.ones(dim)) diff --git a/megatron/model/rmsnorm.py b/megatron/model/rmsnorm.py index 4860d81716..6feb575f68 100644 --- a/megatron/model/rmsnorm.py +++ b/megatron/model/rmsnorm.py @@ -7,19 +7,20 @@ from torch.nn import init from torch.nn.parameter import Parameter + # Taken from facebookresearch/llama class RMSNorm(torch.nn.Module): - def __init__(self, dim: int, eps: float = 1e-6): + def __init__(self, dim: int, eps: float = 1e-6, sequence_parallel: bool = False): super().__init__() self.eps = eps init_device = None - if get_accelerator().device_name() == 'hpu': - init_device = get_accelerator().current_device_name() - self.weight = Parameter(torch.empty(dim, - device=init_device, - dtype=get_args().params_dtype)) + if get_accelerator().device_name() == "hpu": + init_device = get_accelerator().current_device_name() + self.weight = Parameter( + torch.empty(dim, device=init_device, dtype=get_args().params_dtype) + ) init.ones_(self.weight) - setattr(self.weight, 'sequence_parallel', sequence_parallel) + setattr(self.weight, "sequence_parallel", sequence_parallel) def _norm(self, x): return x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps) diff --git a/megatron/model/transformer.py b/megatron/model/transformer.py index 74e977103f..592ff2855b 100644 --- a/megatron/model/transformer.py +++ b/megatron/model/transformer.py @@ -25,6 +25,7 @@ try: from deepspeed.sequence.layer import DistributedAttention + dist_attn_supported = True except ImportError: dist_attn_supported = False @@ -70,28 +71,31 @@ hyperparameters: transformer hyperparameters """ + class DropPath(MegatronModule): """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). """ - def __init__(self, drop_prob=0.): + def __init__(self, drop_prob=0.0): super(DropPath, self).__init__() self.drop_prob = drop_prob def forward(self, hidden_state): - if self.drop_prob == 0. or not self.training: + if self.drop_prob == 0.0 or not self.training: return hidden_state keep_prob = 1 - self.drop_prob # work with diff dim tensors, not just 2D ConvNets # hidden_state: [s, b, h] shape = (1,) + (hidden_state.shape[1],) + (1,) * (hidden_state.ndim - 2) - random_tensor = keep_prob + \ - torch.rand(shape, dtype=hidden_state.dtype, device=hidden_state.device) + random_tensor = keep_prob + torch.rand( + shape, dtype=hidden_state.dtype, device=hidden_state.device + ) random_tensor.floor_() # binarize output = hidden_state.div(keep_prob) * random_tensor return output + class ParallelMLP(MegatronModule): """MLP. @@ -120,7 +124,7 @@ def __init__(self, config, moe=False, enable_expert_tensor_parallelism=False): gather_output=False, skip_bias_add=True, moe=moe, - enable_expert_tensor_parallelism=enable_expert_tensor_parallelism + enable_expert_tensor_parallelism=enable_expert_tensor_parallelism, ) self.bias_gelu_fusion = False @@ -132,13 +136,17 @@ def __init__(self, config, moe=False, enable_expert_tensor_parallelism=False): elif args.onnx_safe: self.activation_func = erf_gelu elif args.swiglu: + def swiglu(x): x = torch.chunk(x, 2, dim=-1) return F.silu(x[0]) * x[1] + self.activation_func = swiglu elif args.squared_relu: + def squared_relu(x): return torch.pow(F.relu(x), 2) + self.activation_func = squared_relu else: self.bias_gelu_fusion = args.bias_gelu_fusion @@ -153,7 +161,7 @@ def squared_relu(x): bias=self.add_bias, input_is_parallel=True, moe=moe, - enable_expert_tensor_parallelism=enable_expert_tensor_parallelism + enable_expert_tensor_parallelism=enable_expert_tensor_parallelism, ) def forward(self, hidden_states): @@ -175,10 +183,12 @@ def forward(self, hidden_states): output, output_bias = self.dense_4h_to_h(intermediate_parallel) return output, output_bias + class SwitchMLP(MegatronModule): """ Routes input to one of N MLP "experts" """ + def __init__(self, config): super(SwitchMLP, self).__init__() args = get_args() @@ -195,29 +205,29 @@ def forward(self, hidden_states): route = self.router(hidden_states) route = torch.nn.functional.softmax(route, dim=2) max_prob, max_ind = torch.max(route, dim=2) - max_prob = torch.unsqueeze(max_prob, 2) # [s b 1] + max_prob = torch.unsqueeze(max_prob, 2) # [s b 1] # TODO (rprenger) TODO this could be made easier to read # Converting [s, b, h] to [s*b, h]. # Each vector could be routed differently - hidden_states = hidden_states.view(-1, hidden_states.size(2)) # [s*b h] - max_prob = max_prob.view(-1, max_prob.size(2)) # [s*b 1] - max_ind = max_ind.view(-1) # [s*b] + hidden_states = hidden_states.view(-1, hidden_states.size(2)) # [s*b h] + max_prob = max_prob.view(-1, max_prob.size(2)) # [s*b 1] + max_ind = max_ind.view(-1) # [s*b] output_total = torch.empty_like(hidden_states) output_bias_total = torch.empty_like(hidden_states) - #TODO (rprenger) This does each expert in serial, but it could be parallelized + # TODO (rprenger) This does each expert in serial, but it could be parallelized for expert_num, expert in enumerate(self.experts): local_indices = (max_ind == expert_num).nonzero() - hidden = hidden_states[local_indices,:] + hidden = hidden_states[local_indices, :] output, output_bias = expert(hidden) output_bias = output_bias.expand_as(output) - output_total[local_indices,:] = output - output_bias_total[local_indices,:] = output_bias + output_total[local_indices, :] = output + output_bias_total[local_indices, :] = output_bias - output_total = output_total*max_prob - output_bias_total = output_bias_total*max_prob + output_total = output_total * max_prob + output_bias_total = output_bias_total * max_prob output_total = output_total.view(s, b, h) output_bias_total = output_bias_total.view(s, b, h) @@ -226,8 +236,7 @@ def forward(self, hidden_states): class CoreAttention(MegatronModule): - def __init__(self, layer_number, config, - attn_mask_type=AttnMaskType.padding): + def __init__(self, layer_number, config, attn_mask_type=AttnMaskType.padding): super(CoreAttention, self).__init__() self.fp16 = config.fp16 self.bf16 = config.bf16 @@ -246,14 +255,19 @@ def __init__(self, layer_number, config, seq_parallel_world_size = 1 if parallel_state.sequence_parallel_is_initialized(): seq_parallel_world_size = parallel_state.get_sequence_parallel_world_size() - world_size = seq_parallel_world_size if seq_parallel_world_size > 1 else parallel_state.get_tensor_model_parallel_world_size() + world_size = ( + seq_parallel_world_size + if seq_parallel_world_size > 1 + else parallel_state.get_tensor_model_parallel_world_size() + ) - self.hidden_size_per_partition = core.utils.divide(projection_size, - world_size) + self.hidden_size_per_partition = core.utils.divide(projection_size, world_size) self.hidden_size_per_attention_head = core.utils.divide( - projection_size, config.num_attention_heads) + projection_size, config.num_attention_heads + ) self.num_attention_heads_per_partition = core.utils.divide( - config.num_attention_heads, world_size) + config.num_attention_heads, world_size + ) coeff = None self.norm_factor = math.sqrt(self.hidden_size_per_attention_head) @@ -262,49 +276,56 @@ def __init__(self, layer_number, config, self.norm_factor *= coeff self.scale_mask_softmax = FusedScaleMaskSoftmax( - self.fp16, self.bf16, + self.fp16, + self.bf16, self.attn_mask_type, config.masked_softmax_fusion, attention_mask_func, self.attention_softmax_in_fp32, - coeff) + coeff, + ) # Dropout. Note that for a single iteration, this layer will generate # different outputs on different number of parallel partitions but # on average it should not be partition dependent. self.attention_dropout = torch.nn.Dropout(config.attention_dropout) - def forward(self, query_layer, key_layer, - value_layer, attention_mask): + def forward(self, query_layer, key_layer, value_layer, attention_mask): # =================================== # Raw attention scores. [b, np, s, s] # =================================== # [b, np, sq, sk] - output_size = (query_layer.size(1), - query_layer.size(2), - query_layer.size(0), - key_layer.size(0)) + output_size = ( + query_layer.size(1), + query_layer.size(2), + query_layer.size(0), + key_layer.size(0), + ) # [sq, b, np, hn] -> [sq, b * np, hn] - query_layer = query_layer.view(output_size[2], - output_size[0] * output_size[1], -1) + query_layer = query_layer.view( + output_size[2], output_size[0] * output_size[1], -1 + ) # [sk, b, np, hn] -> [sk, b * np, hn] - key_layer = key_layer.view(output_size[3], - output_size[0] * output_size[1], -1) + key_layer = key_layer.view(output_size[3], output_size[0] * output_size[1], -1) # preallocting input tensor: [b * np, sq, sk] matmul_input_buffer = parallel_state.get_global_memory_buffer().get_tensor( - (output_size[0]*output_size[1], output_size[2], output_size[3]), - query_layer.dtype, "mpu") + (output_size[0] * output_size[1], output_size[2], output_size[3]), + query_layer.dtype, + "mpu", + ) # Raw attention scores. [b * np, sq, sk] matmul_result = torch.baddbmm( matmul_input_buffer, - query_layer.transpose(0, 1), # [b * np, sq, hn] + query_layer.transpose(0, 1), # [b * np, sq, hn] key_layer.transpose(0, 1).transpose(1, 2), # [b * np, hn, sk] - beta=0.0, alpha=(1.0/self.norm_factor)) + beta=0.0, + alpha=(1.0 / self.norm_factor), + ) # change view to [b, np, sq, sk] attention_scores = matmul_result.view(*output_size) @@ -314,8 +335,7 @@ def forward(self, query_layer, key_layer, # =========================== # attention scores and attention mask [b, np, sq, sk] - attention_probs = self.scale_mask_softmax(attention_scores, - attention_mask) + attention_probs = self.scale_mask_softmax(attention_scores, attention_mask) # This is actually dropping out entire tokens to attend to, which might # seem a bit unusual, but is taken from the original Transformer paper. @@ -333,18 +353,22 @@ def forward(self, query_layer, key_layer, # [sk, b, np, hn] --> [b, np, sq, hn] # context layer shape: [b, np, sq, hn] - output_size = (value_layer.size(1), - value_layer.size(2), - query_layer.size(0), - value_layer.size(3)) + output_size = ( + value_layer.size(1), + value_layer.size(2), + query_layer.size(0), + value_layer.size(3), + ) # change view [sk, b * np, hn] - value_layer = value_layer.view(value_layer.size(0), - output_size[0] * output_size[1], -1) + value_layer = value_layer.view( + value_layer.size(0), output_size[0] * output_size[1], -1 + ) # change view [b * np, sq, sk] - attention_probs = attention_probs.view(output_size[0] * output_size[1], - output_size[2], -1) + attention_probs = attention_probs.view( + output_size[0] * output_size[1], output_size[2], -1 + ) # matmul: [b * np, sq, hn] context_layer = torch.bmm(attention_probs, value_layer.transpose(0, 1)) @@ -356,8 +380,9 @@ def forward(self, query_layer, key_layer, context_layer = context_layer.permute(2, 0, 1, 3).contiguous() # [sq, b, np, hn] --> [sq, b, hp] - new_context_layer_shape = context_layer.size()[:-2] + \ - (self.hidden_size_per_partition,) + new_context_layer_shape = context_layer.size()[:-2] + ( + self.hidden_size_per_partition, + ) context_layer = context_layer.view(*new_context_layer_shape) return context_layer @@ -373,12 +398,24 @@ class FlashSelfAttention(torch.nn.Module): attention_dropout: The dropout rate to apply to the attention (default: 0.0) """ - def __init__(self, causal=False, softmax_scale=None, attention_dropout=0.0, - device=None, dtype=None): + + def __init__( + self, + causal=False, + softmax_scale=None, + attention_dropout=0.0, + device=None, + dtype=None, + ): super().__init__() - assert flash_attn_unpadded_func is not None or flash_attn_varlen_func is not None or flash_attn_builder is not None, \ - ('Please install FlashAttention first, e.g., with pip install flash-attn or implement your own flash attention') - assert rearrange is not None, 'Please install einops first, e.g., with pip install einops' + assert ( + flash_attn_unpadded_func is not None + or flash_attn_varlen_func is not None + or flash_attn_builder is not None + ), "Please install FlashAttention first, e.g., with pip install flash-attn or implement your own flash attention" + assert ( + rearrange is not None + ), "Please install einops first, e.g., with pip install einops" self.causal = causal self.softmax_scale = softmax_scale self.dropout_p = attention_dropout @@ -389,14 +426,18 @@ def __init__(self, causal=False, softmax_scale=None, attention_dropout=0.0, self.use_flash_attn_builder_v2 = False self.use_flash_attn = False if args.use_flash_attn_builder: - if hasattr(flash_attn_builder, 'flash_attn_func'): + if hasattr(flash_attn_builder, "flash_attn_func"): self.flash_attn_func = flash_attn_builder.flash_attn_func self.use_flash_attn_builder_v1 = True else: self.flash_attn_func = flash_attn_builder.flash_attn_func_v2 self.use_flash_attn_builder_v2 = True else: - self.flash_attn_func = flash_attn_varlen_func if args.use_flash_attn_v2 else flash_attn_unpadded_func + self.flash_attn_func = ( + flash_attn_varlen_func + if args.use_flash_attn_v2 + else flash_attn_unpadded_func + ) self.use_flash_attn = True def forward(self, q, k, v): @@ -406,42 +447,67 @@ def forward(self, q, k, v): q, k, v: The tensor containing the query, key, and value. (B, S, H, D) """ - assert all((i.dtype in [torch.float16, torch.bfloat16] for i in (q,k,v))) + assert all((i.dtype in [torch.float16, torch.bfloat16] for i in (q, k, v))) assert all((get_accelerator().on_accelerator(i) for i in (q, k, v))) batch_size, seqlen_q = q.shape[0], q.shape[1] seqlen_k = k.shape[1] if self.use_flash_attn: - q, k, v = [rearrange(x, 'b s ... -> (b s) ...') for x in [q, k, v]] - cu_seqlens_q = torch.arange(0, (batch_size + 1) * seqlen_q, step=seqlen_q, dtype=torch.int32, - device=q.device) + q, k, v = [rearrange(x, "b s ... -> (b s) ...") for x in [q, k, v]] + cu_seqlens_q = torch.arange( + 0, + (batch_size + 1) * seqlen_q, + step=seqlen_q, + dtype=torch.int32, + device=q.device, + ) elif self.use_flash_attn_builder_v1: - q, k, v = [rearrange(x, 'b s h d -> b h s d').contiguous() for x in [q, k, v]] + q, k, v = [ + rearrange(x, "b s h d -> b h s d").contiguous() for x in [q, k, v] + ] else: # use_flash_attn_builder_v2 - q, k, v = [rearrange(x, 'b s h d -> b h s d') for x in [q, k, v]] + q, k, v = [rearrange(x, "b s h d -> b h s d") for x in [q, k, v]] if self.training: # during training q,k,v always have same seqlen assert seqlen_k == seqlen_q is_causal = self.causal - cu_seqlens_k = cu_seqlens_q if get_accelerator().device_name() == 'cuda' else None + cu_seqlens_k = ( + cu_seqlens_q if get_accelerator().device_name() == "cuda" else None + ) dropout_p = self.dropout_p else: # turn off FA causal mask after first inference autoregressive iteration # only on first autoregressive step q,k,v have same seqlen is_causal = seqlen_q == seqlen_k - cu_seqlens_k = torch.arange(0, (batch_size + 1) * seqlen_k, step=seqlen_k, dtype=torch.int32, - device=q.device) if get_accelerator().device_name() == 'cuda' else None + cu_seqlens_k = ( + torch.arange( + 0, + (batch_size + 1) * seqlen_k, + step=seqlen_k, + dtype=torch.int32, + device=q.device, + ) + if get_accelerator().device_name() == "cuda" + else None + ) dropout_p = 0 if self.use_flash_attn: output = self.flash_attn_func( - q, k, v, cu_seqlens_q, cu_seqlens_k, seqlen_q, seqlen_k, + q, + k, + v, + cu_seqlens_q, + cu_seqlens_k, + seqlen_q, + seqlen_k, dropout_p, - softmax_scale=self.softmax_scale, causal=is_causal + softmax_scale=self.softmax_scale, + causal=is_causal, ) else: # use_flash_attn_builder @@ -450,15 +516,16 @@ def forward(self, q, k, v): ) if self.use_flash_attn: - output = rearrange(output, '(b s) ... -> b s ...', b=batch_size) + output = rearrange(output, "(b s) ... -> b s ...", b=batch_size) elif self.use_flash_attn_builder_v1: - output = rearrange(output, 'b h s d -> b s h d').contiguous() + output = rearrange(output, "b h s d -> b s h d").contiguous() else: # use_flash_attn_builder_v2: - output = rearrange(output, 'b h s d -> b s h d') + output = rearrange(output, "b h s d -> b s h d") return output + class FlashSelfAttentionTriton(torch.nn.Module): """Implement the scaled dot product attention with softmax. Arguments @@ -469,11 +536,22 @@ class FlashSelfAttentionTriton(torch.nn.Module): attention_dropout: The dropout rate to apply to the attention (default: 0.0) """ - def __init__(self, causal=False, softmax_scale=None, attention_dropout=0.0, - device=None, dtype=None): + + def __init__( + self, + causal=False, + softmax_scale=None, + attention_dropout=0.0, + device=None, + dtype=None, + ): super().__init__() - assert flash_attn_func is not None, ('Triton version of FlashAttention is not installed.') - assert rearrange is not None, 'Please install einops first, e.g., with pip install einops' + assert ( + flash_attn_func is not None + ), "Triton version of FlashAttention is not installed." + assert ( + rearrange is not None + ), "Please install einops first, e.g., with pip install einops" self.causal = causal self.softmax_scale = softmax_scale self.dropout_p = attention_dropout @@ -487,13 +565,13 @@ def forward(self, q, k, v): assert q.dtype in [torch.float16, torch.bfloat16] assert q.is_cuda - q, k, v = [rearrange(x, 's b ... -> b s ...').contiguous() - for x in (q, k, v)] - + q, k, v = [rearrange(x, "s b ... -> b s ...").contiguous() for x in (q, k, v)] + output = flash_attn_func(q, k, v, None, self.causal) - output = rearrange(output, 'b s h d -> s b (h d)').contiguous() + output = rearrange(output, "b s h d -> s b (h d)").contiguous() return output + class ParallelAttention(MegatronModule): """Parallel self-attention layer abstract class. @@ -501,9 +579,13 @@ class ParallelAttention(MegatronModule): and returns output of the same size. """ - def __init__(self, config, layer_number, - attention_type=AttnType.self_attn, - attn_mask_type=AttnMaskType.padding): + def __init__( + self, + config, + layer_number, + attention_type=AttnType.self_attn, + attn_mask_type=AttnMaskType.padding, + ): super(ParallelAttention, self).__init__() args = get_args() self.layer_number = max(1, layer_number) @@ -513,12 +595,18 @@ def __init__(self, config, layer_number, self.sequence_parallel = config.sequence_parallel self.num_attention_heads = config.num_attention_heads self.num_key_value_heads = config.num_key_value_heads - self.use_gqa = (self.num_attention_heads != self.num_key_value_heads) - - self.use_flash_attn = (args.use_flash_attn_v1 or args.use_flash_attn_triton or args.use_flash_attn_v2 or \ - args.use_flash_attn_builder) \ - and attention_type == AttnType.self_attn \ + self.use_gqa = self.num_attention_heads != self.num_key_value_heads + + self.use_flash_attn = ( + ( + args.use_flash_attn_v1 + or args.use_flash_attn_triton + or args.use_flash_attn_v2 + or args.use_flash_attn_builder + ) + and attention_type == AttnType.self_attn and self.attn_mask_type == AttnMaskType.causal + ) self.use_flash_attn_triton = args.use_flash_attn_triton if self.use_flash_attn: global flash_attn_builder @@ -528,38 +616,53 @@ def __init__(self, config, layer_number, flash_attn_builder = None if args.use_flash_attn_v1: - assert flash_attn_unpadded_func != None, "Cannot import FlashAttention v1 " + assert ( + flash_attn_unpadded_func != None + ), "Cannot import FlashAttention v1 " if args.use_flash_attn_v2: - assert flash_attn_varlen_func != None, "Cannot import FlashAttention v2 " + assert ( + flash_attn_varlen_func != None + ), "Cannot import FlashAttention v2 " if args.use_flash_attn_triton: assert flash_attn_func != None, "Cannot import FlashAttention triton " if args.use_flash_attn_builder: - assert flash_attn_builder != None, "Cannot find FlashAttention op builder " + assert ( + flash_attn_builder != None + ), "Cannot find FlashAttention op builder " - assert attention_type == AttnType.self_attn, ('FlashAttention code path only supports ' - 'self-attention for now') - assert self.attn_mask_type == AttnMaskType.causal, ('FlashAttention code path only ' - 'supports causal mask for now') + assert attention_type == AttnType.self_attn, ( + "FlashAttention code path only supports " "self-attention for now" + ) + assert self.attn_mask_type == AttnMaskType.causal, ( + "FlashAttention code path only " "supports causal mask for now" + ) if rearrange is None: - raise ImportError('einops is not installed, please install with pip install einops') + raise ImportError( + "einops is not installed, please install with pip install einops" + ) projection_size = config.kv_channels * config.num_attention_heads # Per attention head and per partition values. world_size = parallel_state.get_tensor_model_parallel_world_size() self.hidden_size_per_attention_head = core.utils.divide( - projection_size, config.num_attention_heads) + projection_size, config.num_attention_heads + ) self.num_attention_heads_per_partition = core.utils.divide( - config.num_attention_heads, world_size) + config.num_attention_heads, world_size + ) # Per GQA head and per partition values self.num_key_value_heads_per_partition = core.utils.divide( - config.num_key_value_heads, world_size) + config.num_key_value_heads, world_size + ) self.num_key_value_groups = core.utils.divide( - config.num_attention_heads, config.num_key_value_heads) + config.num_attention_heads, config.num_key_value_heads + ) kv_projection_size = config.kv_channels * config.num_key_value_heads assert self.hidden_size_per_attention_head == core.utils.divide( - kv_projection_size, config.num_key_value_heads) + kv_projection_size, config.num_key_value_heads + ) # Strided linear layer. if attention_type == AttnType.self_attn: @@ -569,7 +672,8 @@ def __init__(self, config, layer_number, config=config, init_method=config.init_method, bias=args.add_bias_linear, - gather_output=False) + gather_output=False, + ) else: assert attention_type == AttnType.cross_attn self.query = tensor_parallel.ColumnParallelLinear( @@ -578,8 +682,8 @@ def __init__(self, config, layer_number, config=config, init_method=config.init_method, bias=config.add_bias_linear, - gather_output=False) - + gather_output=False, + ) self.key_value = tensor_parallel.ColumnParallelLinear( config.hidden_size, @@ -587,32 +691,48 @@ def __init__(self, config, layer_number, config=config, init_method=config.init_method, bias=config.add_bias_linear, - gather_output=False) + gather_output=False, + ) # Currently FlashAttention only works with causal mask if self.use_flash_attn_triton: - local_attn = FlashSelfAttentionTriton(causal=True, attention_dropout=args.attention_dropout) + local_attn = FlashSelfAttentionTriton( + causal=True, attention_dropout=args.attention_dropout + ) elif self.use_flash_attn: - local_attn = FlashSelfAttention(causal=True, attention_dropout=config.attention_dropout) + local_attn = FlashSelfAttention( + causal=True, attention_dropout=config.attention_dropout + ) else: local_attn = CoreAttention(self.layer_number, config, self.attn_mask_type) - self.enable_ds_sequence_parallel = parallel_state.get_sequence_parallel_world_size() > 1 \ - or args.force_ds_sequence_parallel + self.enable_ds_sequence_parallel = ( + parallel_state.get_sequence_parallel_world_size() > 1 + or args.force_ds_sequence_parallel + ) if self.enable_ds_sequence_parallel: - assert dist_attn_supported, 'Distributed attention is not supported in this DeepSpeed version' - assert args.num_attention_heads % parallel_state.get_sequence_parallel_world_size() == 0 + assert ( + dist_attn_supported + ), "Distributed attention is not supported in this DeepSpeed version" + assert ( + args.num_attention_heads + % parallel_state.get_sequence_parallel_world_size() + == 0 + ) self.dist_attn = DistributedAttention( - local_attn, - parallel_state.get_sequence_parallel_group(), - gather_idx=1 if args.use_flash_attn_v1 or args.use_flash_attn_v2 else 0) + local_attn, + parallel_state.get_sequence_parallel_group(), + gather_idx=1 if args.use_flash_attn_v1 or args.use_flash_attn_v2 else 0, + ) # flash_attn_cuda assumes [b, s, nh, hd] layout, we need to make sure all2all gathers into the correct sequence dimension. else: if self.use_flash_attn: self.core_attention_flash = local_attn else: self.core_attention = local_attn - self.checkpoint_core_attention = config.recompute_granularity == 'selective' + self.checkpoint_core_attention = ( + config.recompute_granularity == "selective" + ) # Output. self.dense = tensor_parallel.RowParallelLinear( @@ -622,29 +742,38 @@ def __init__(self, config, layer_number, init_method=config.output_layer_init_method, bias=args.add_bias_linear, input_is_parallel=True, - skip_bias_add=True) - + skip_bias_add=True, + ) - def _checkpointed_attention_forward(self, query_layer, key_layer, - value_layer, attention_mask, - rotary_pos_emb=None): + def _checkpointed_attention_forward( + self, query_layer, key_layer, value_layer, attention_mask, rotary_pos_emb=None + ): """Forward method with activation checkpointing.""" + def custom_forward(*inputs): query_layer = inputs[0] key_layer = inputs[1] value_layer = inputs[2] attention_mask = inputs[3] - output_ = self.core_attention(query_layer, key_layer, - value_layer, attention_mask) + output_ = self.core_attention( + query_layer, key_layer, value_layer, attention_mask + ) return output_ - q_pos_emb, k_pos_emb = (None, None) if rotary_pos_emb is None \ - else rotary_pos_emb + q_pos_emb, k_pos_emb = ( + (None, None) if rotary_pos_emb is None else rotary_pos_emb + ) hidden_states = tensor_parallel.checkpoint( custom_forward, - False, query_layer, key_layer, value_layer, attention_mask, - q_pos_emb, k_pos_emb) + False, + query_layer, + key_layer, + value_layer, + attention_mask, + q_pos_emb, + k_pos_emb, + ) return hidden_states @@ -655,7 +784,8 @@ def _allocate_memory(self, inference_max_sequence_len, batch_size): self.num_attention_heads_per_partition, self.hidden_size_per_attention_head, dtype=self.params_dtype, - device=get_accelerator().current_device_name()) + device=get_accelerator().current_device_name(), + ) def repeat_kv(self, hidden_states, n_rep): slen, batch, num_key_value_heads_per_partition, head_dim = hidden_states.shape @@ -667,22 +797,36 @@ def repeat_kv(self, hidden_states, n_rep): return hidden_states.expand(slen, batch, n_rep, head_dim) else: hidden_states = hidden_states[:, :, :, None, :].expand( - slen, batch, num_key_value_heads_per_partition, n_rep, head_dim) - return hidden_states.reshape(slen, batch, - num_key_value_heads_per_partition * n_rep, - head_dim) - + slen, batch, num_key_value_heads_per_partition, n_rep, head_dim + ) + return hidden_states.reshape( + slen, batch, num_key_value_heads_per_partition * n_rep, head_dim + ) + def split_tensor(self, mixed_x_layer): - query_layer, key_layer, value_layer = torch.split(mixed_x_layer, [self.num_key_value_groups, 1, 1], dim=-2) - query_layer = query_layer.reshape(mixed_x_layer.shape[:2] + (self.num_attention_heads_per_partition, self.hidden_size_per_attention_head)) + query_layer, key_layer, value_layer = torch.split( + mixed_x_layer, [self.num_key_value_groups, 1, 1], dim=-2 + ) + query_layer = query_layer.reshape( + mixed_x_layer.shape[:2] + + ( + self.num_attention_heads_per_partition, + self.hidden_size_per_attention_head, + ) + ) key_layer = torch.squeeze(key_layer, -2) value_layer = torch.squeeze(value_layer, -2) return query_layer, key_layer, value_layer - def forward(self, hidden_states, attention_mask, - encoder_output=None, inference_params=None, - rotary_pos_emb=None): + def forward( + self, + hidden_states, + attention_mask, + encoder_output=None, + inference_params=None, + rotary_pos_emb=None, + ): # hidden_states: [sq, b, h] # ================================================= @@ -694,15 +838,20 @@ def forward(self, hidden_states, attention_mask, inf_max_seq_len = inference_params.max_sequence_len inf_max_batch_size = inference_params.max_batch_size inference_key_memory = self._allocate_memory( - inf_max_seq_len, inf_max_batch_size) + inf_max_seq_len, inf_max_batch_size + ) inference_value_memory = self._allocate_memory( - inf_max_seq_len, inf_max_batch_size) + inf_max_seq_len, inf_max_batch_size + ) inference_params.key_value_memory_dict[self.layer_number] = ( - inference_key_memory, inference_value_memory) + inference_key_memory, + inference_value_memory, + ) is_first_step = True else: - inference_key_memory, inference_value_memory = \ + inference_key_memory, inference_value_memory = ( inference_params.key_value_memory_dict[self.layer_number] + ) # ===================== # Query, Key, and Value @@ -713,43 +862,45 @@ def forward(self, hidden_states, attention_mask, mixed_x_layer, _ = self.query_key_value(hidden_states) # [sq, b, ((nq + 2 * nkv) * hn)] --> [sq, b, nkv, (nq // nkv + 2), hn] - new_tensor_shape = mixed_x_layer.size()[:-1] + \ - (-1, (self.num_key_value_groups + 2), - self.hidden_size_per_attention_head) + new_tensor_shape = mixed_x_layer.size()[:-1] + ( + -1, + (self.num_key_value_groups + 2), + self.hidden_size_per_attention_head, + ) mixed_x_layer = mixed_x_layer.view(*new_tensor_shape) # [sq, b, nkv, (nq // nkv + 2), hn] --> 3 [sq, b, np, hn] - (query_layer, - key_layer, - value_layer) = self.split_tensor(mixed_x_layer) + (query_layer, key_layer, value_layer) = self.split_tensor(mixed_x_layer) # Repeat kv if self.use_gqa: key_layer = self.repeat_kv(key_layer, self.num_key_value_groups) - value_layer = self.repeat_kv(value_layer, - self.num_key_value_groups) + value_layer = self.repeat_kv(value_layer, self.num_key_value_groups) else: - assert not self.use_gqa, 'GQA + cross-attn not tested yet' + assert not self.use_gqa, "GQA + cross-attn not tested yet" # Attention heads [sk, b, h] --> [sk, b, (np * 2 * hn)] mixed_kv_layer, _ = self.key_value(encoder_output) # [sk, b, (np * 2 * hn)] --> [sk, b, np, 2 * hn] - new_tensor_shape = mixed_kv_layer.size()[:-1] + \ - (self.num_attention_heads_per_partition, - 2 * self.hidden_size_per_attention_head) + new_tensor_shape = mixed_kv_layer.size()[:-1] + ( + self.num_attention_heads_per_partition, + 2 * self.hidden_size_per_attention_head, + ) mixed_kv_layer = mixed_kv_layer.view(*new_tensor_shape) # [sk, b, np, 2 * hn] --> 2 [sk, b, np, hn] - (key_layer, - value_layer) = tensor_parallel.split_tensor_along_last_dim(mixed_kv_layer, 2) + (key_layer, value_layer) = tensor_parallel.split_tensor_along_last_dim( + mixed_kv_layer, 2 + ) # Attention head [sq, b, h] --> [sq, b, hp] query_layer, _ = self.query(hidden_states) # [sq, b, hp] --> [sq, b, np, hn] - new_tensor_shape = query_layer.size()[:-1] + \ - (self.num_attention_heads_per_partition, - self.hidden_size_per_attention_head) + new_tensor_shape = query_layer.size()[:-1] + ( + self.num_attention_heads_per_partition, + self.hidden_size_per_attention_head, + ) query_layer = query_layer.view(*new_tensor_shape) # ================================== @@ -761,7 +912,7 @@ def forward(self, hidden_states, attention_mask, if isinstance(rotary_pos_emb, tuple): rotary_pos_emb = rotary_pos_emb else: - rotary_pos_emb = ((rotary_pos_emb,) * 2) + rotary_pos_emb = (rotary_pos_emb,) * 2 if inference_params: batch_start = inference_params.batch_size_offset @@ -771,15 +922,16 @@ def forward(self, hidden_states, attention_mask, sequence_end = sequence_start + key_layer.size(0) assert sequence_end <= inference_key_memory.size(0) # Copy key and values. - inference_key_memory[sequence_start:sequence_end, - batch_start:batch_end, ...] = key_layer - inference_value_memory[sequence_start:sequence_end, - batch_start:batch_end, ...] = value_layer - key_layer = inference_key_memory[ - :sequence_end, batch_start:batch_end, ...] + inference_key_memory[ + sequence_start:sequence_end, batch_start:batch_end, ... + ] = key_layer + inference_value_memory[ + sequence_start:sequence_end, batch_start:batch_end, ... + ] = value_layer + key_layer = inference_key_memory[:sequence_end, batch_start:batch_end, ...] value_layer = inference_value_memory[ - :sequence_end, batch_start:batch_end, ...] - + :sequence_end, batch_start:batch_end, ... + ] # adjust the key rotary positional embedding if rotary_pos_emb is not None: @@ -801,7 +953,6 @@ def forward(self, hidden_states, attention_mask, k_pos_emb = k_pos_emb[:sequence_end, :, :, :] rotary_pos_emb = (q_pos_emb, k_pos_emb) - # ================================== # core attention computation # ================================== @@ -820,37 +971,55 @@ def forward(self, hidden_states, attention_mask, batch_dim_idx = 1 if self.use_flash_attn: if not self.use_flash_attn_triton: - query_layer, key_layer, value_layer = [rearrange(x, 's b ... -> b s ...').contiguous() - for x in (query_layer, key_layer, value_layer)] + query_layer, key_layer, value_layer = [ + rearrange(x, "s b ... -> b s ...").contiguous() + for x in (query_layer, key_layer, value_layer) + ] batch_dim_idx = 0 - context_layer = self.dist_attn(query_layer, key_layer, value_layer, batch_dim_idx) + context_layer = self.dist_attn( + query_layer, key_layer, value_layer, batch_dim_idx + ) if not self.use_flash_attn_triton: - context_layer = rearrange(context_layer, 'b s h d -> s b (h d)').contiguous() + context_layer = rearrange( + context_layer, "b s h d -> s b (h d)" + ).contiguous() else: - context_layer = self.dist_attn(query_layer, key_layer, value_layer, attention_mask) + context_layer = self.dist_attn( + query_layer, key_layer, value_layer, attention_mask + ) else: if self.use_flash_attn: if not self.use_flash_attn_triton: - query_layer, key_layer, value_layer = [rearrange(x, 's b ... -> b s ...').contiguous() - for x in (query_layer, key_layer, value_layer)] + query_layer, key_layer, value_layer = [ + rearrange(x, "s b ... -> b s ...").contiguous() + for x in (query_layer, key_layer, value_layer) + ] if self.sequence_parallel: - context_layer = self.core_attention_flash(query_layer, key_layer, value_layer) + context_layer = self.core_attention_flash( + query_layer, key_layer, value_layer + ) else: with tensor_parallel.get_cuda_rng_tracker().fork(): - context_layer = self.core_attention_flash(query_layer, key_layer, value_layer) + context_layer = self.core_attention_flash( + query_layer, key_layer, value_layer + ) if not self.use_flash_attn_triton: - context_layer = rearrange(context_layer, 'b s h d -> s b (h d)').contiguous() + context_layer = rearrange( + context_layer, "b s h d -> s b (h d)" + ).contiguous() else: if self.checkpoint_core_attention: context_layer = self._checkpointed_attention_forward( - query_layer, key_layer, value_layer, attention_mask) + query_layer, key_layer, value_layer, attention_mask + ) else: context_layer = self.core_attention( - query_layer, key_layer, value_layer, attention_mask) + query_layer, key_layer, value_layer, attention_mask + ) # ================= # Output. [sq, b, h] @@ -873,22 +1042,21 @@ def bias_dropout_add(x, bias, residual, prob, training): def get_bias_dropout_add(training): def _bias_dropout_add(x, bias, residual, prob): return bias_dropout_add(x, bias, residual, prob, training) + return _bias_dropout_add @torch.jit.script -def bias_dropout_add_fused_train(x: torch.Tensor, - bias: Optional[torch.Tensor], - residual: torch.Tensor, - prob: float) -> torch.Tensor: +def bias_dropout_add_fused_train( + x: torch.Tensor, bias: Optional[torch.Tensor], residual: torch.Tensor, prob: float +) -> torch.Tensor: return bias_dropout_add(x, bias, residual, prob, True) @torch.jit.script -def bias_dropout_add_fused_inference(x: torch.Tensor, - bias: Optional[torch.Tensor], - residual: torch.Tensor, - prob: float) -> torch.Tensor: +def bias_dropout_add_fused_inference( + x: torch.Tensor, bias: Optional[torch.Tensor], residual: torch.Tensor, prob: float +) -> torch.Tensor: return bias_dropout_add(x, bias, residual, prob, False) @@ -899,10 +1067,15 @@ class ParallelTransformerLayer(MegatronModule): output of the same size. """ - def __init__(self, config, - layer_number, layer_type=LayerType.encoder, - self_attn_mask_type=AttnMaskType.padding, - drop_path_rate=0., num_experts=1): + def __init__( + self, + config, + layer_number, + layer_type=LayerType.encoder, + self_attn_mask_type=AttnMaskType.padding, + drop_path_rate=0.0, + num_experts=1, + ): # retriever=None): args = get_args() @@ -910,110 +1083,130 @@ def __init__(self, config, self.layer_number = layer_number self.layer_type = layer_type - self.apply_residual_connection_post_layernorm \ - = config.apply_residual_connection_post_layernorm + self.apply_residual_connection_post_layernorm = ( + config.apply_residual_connection_post_layernorm + ) self.bf16 = config.bf16 self.fp32_residual_connection = config.fp32_residual_connection # Layernorm on the input data. - if args.normalization == 'layernorm': - if get_accelerator().device_name() == 'cuda': + if args.normalization == "layernorm": + if get_accelerator().device_name() == "cuda": self.input_layernorm = LayerNorm( config.hidden_size, eps=config.layernorm_epsilon, no_persist_layer_norm=args.no_persist_layer_norm, sequence_parallel=config.sequence_parallel, apply_layernorm_1p=args.apply_layernorm_1p, - mem_efficient_ln=args.mem_efficient_ln) + mem_efficient_ln=args.mem_efficient_ln, + ) else: self.input_layernorm = LayerNorm( - config.hidden_size, - eps=config.layernorm_epsilon) + config.hidden_size, eps=config.layernorm_epsilon + ) else: - self.input_layernorm = RMSNorm(config.hidden_size, config.layernorm_epsilon, - sequence_parallel=config.sequence_parallel) + self.input_layernorm = RMSNorm( + config.hidden_size, + config.layernorm_epsilon, + sequence_parallel=config.sequence_parallel, + ) + # self.input_layernorm = RMSNorm(config.hidden_size, config.layernorm_epsilon_ # Self attention. self.self_attention = ParallelAttention( config, layer_number, attention_type=AttnType.self_attn, - attn_mask_type=self_attn_mask_type) + attn_mask_type=self_attn_mask_type, + ) self.hidden_dropout = config.hidden_dropout self.bias_dropout_fusion = config.bias_dropout_fusion self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0.0 else None # Layernorm on the attention output - if args.normalization == 'layernorm': - if get_accelerator().device_name() == 'cuda': + if args.normalization == "layernorm": + if get_accelerator().device_name() == "cuda": self.post_attention_layernorm = LayerNorm( config.hidden_size, eps=config.layernorm_epsilon, no_persist_layer_norm=not config.persist_layer_norm, sequence_parallel=config.sequence_parallel, apply_layernorm_1p=args.apply_layernorm_1p, - mem_efficient_ln=args.mem_efficient_ln) + mem_efficient_ln=args.mem_efficient_ln, + ) else: self.post_attention_layernorm = LayerNorm( - config.hidden_size, - eps=config.layernorm_epsilon) + config.hidden_size, eps=config.layernorm_epsilon + ) else: - self.post_attention_layernorm = RMSNorm(config.hidden_size, config.layernorm_epsilon, - sequence_parallel=config.sequence_parallel) + self.post_attention_layernorm = RMSNorm( + config.hidden_size, + config.layernorm_epsilon, + sequence_parallel=config.sequence_parallel, + ) # Cross attention. - if self.layer_type in (LayerType.decoder, - LayerType.retro_decoder, - LayerType.retro_decoder_with_retriever, - LayerType.retro_encoder): + if self.layer_type in ( + LayerType.decoder, + LayerType.retro_decoder, + LayerType.retro_decoder_with_retriever, + LayerType.retro_encoder, + ): self.inter_attention = ParallelAttention( - config, - layer_number, - attention_type=AttnType.cross_attn) + config, layer_number, attention_type=AttnType.cross_attn + ) # Layernorm on the attention output. - if args.normalization == 'layernorm': + if args.normalization == "layernorm": self.post_inter_attention_layernorm = LayerNorm( config.hidden_size, eps=config.layernorm_epsilon, no_persist_layer_norm=not config.persist_layer_norm, sequence_parallel=config.sequence_parallel, apply_layernorm_1p=args.apply_layernorm_1p, - mem_efficient_ln=args.mem_efficient_ln) + mem_efficient_ln=args.mem_efficient_ln, + ) else: - self.post_inter_attention_layernorm = RMSNorm(config.hidden_size, - config.layernorm_epsilon, - sequence_parallel=config.sequence_parallel) + self.post_inter_attention_layernorm = RMSNorm( + config.hidden_size, + config.layernorm_epsilon, + sequence_parallel=config.sequence_parallel, + ) # MLP self.num_experts = num_experts if args.num_experts_switch is not None: - self.mlp = SwitchMLP(config) # Megatron-LM's MoE + self.mlp = SwitchMLP(config) # Megatron-LM's MoE else: - if self.num_experts <= 1: # dense, not MoE + if self.num_experts <= 1: # dense, not MoE self.mlp = ParallelMLP(config) - else: # DeepSpeed's MoE + else: # DeepSpeed's MoE enable_expert_tensor_parallelism = args.enable_expert_tensor_parallelism - self.mlp = MoE(args.hidden_size, - ParallelMLP(config, - moe=True, - enable_expert_tensor_parallelism=enable_expert_tensor_parallelism), - num_experts=self.num_experts, - ep_size=args.moe_expert_parallel_size, - k=args.topk, - use_residual=(args.mlp_type == 'residual'), - capacity_factor=args.moe_train_capacity_factor, - eval_capacity_factor=args.moe_eval_capacity_factor, - min_capacity=args.moe_min_capacity, - drop_tokens=args.moe_token_dropping, - use_tutel=args.use_tutel, - enable_expert_tensor_parallelism=enable_expert_tensor_parallelism, - top2_2nd_expert_sampling=args.moe_top2_2nd_expert_sampling) + self.mlp = MoE( + args.hidden_size, + ParallelMLP( + config, + moe=True, + enable_expert_tensor_parallelism=enable_expert_tensor_parallelism, + ), + num_experts=self.num_experts, + ep_size=args.moe_expert_parallel_size, + k=args.topk, + use_residual=(args.mlp_type == "residual"), + capacity_factor=args.moe_train_capacity_factor, + eval_capacity_factor=args.moe_eval_capacity_factor, + min_capacity=args.moe_min_capacity, + drop_tokens=args.moe_token_dropping, + use_tutel=args.use_tutel, + enable_expert_tensor_parallelism=enable_expert_tensor_parallelism, + top2_2nd_expert_sampling=args.moe_top2_2nd_expert_sampling, + ) # Set bias+dropout+add fusion grad_enable execution handler. - TORCH_MAJOR = int(torch.__version__.split('.')[0]) - TORCH_MINOR = int(torch.__version__.split('.')[1]) + TORCH_MAJOR = int(torch.__version__.split(".")[0]) + TORCH_MINOR = int(torch.__version__.split(".")[1]) use_nvfuser = TORCH_MAJOR > 1 or (TORCH_MAJOR == 1 and TORCH_MINOR >= 10) - self.bias_dropout_add_exec_handler = \ - nullcontext if use_nvfuser else torch.enable_grad + self.bias_dropout_add_exec_handler = ( + nullcontext if use_nvfuser else torch.enable_grad + ) if args.retro_add_retriever: retro_args = get_retro_args() @@ -1031,23 +1224,24 @@ def __init__(self, config, pre_process=True, post_process=False, ) - self._retriever_key = 'retriever' + self._retriever_key = "retriever" else: self.retriever = None - def default_decoder_cross_attention(self, - encoder_output, - enc_dec_attn_mask, - layernorm_input, - layernorm_output, - bias_dropout_add_func): - '''Cross attention for a standard encoder-decoder model.''' + def default_decoder_cross_attention( + self, + encoder_output, + enc_dec_attn_mask, + layernorm_input, + layernorm_output, + bias_dropout_add_func, + ): + """Cross attention for a standard encoder-decoder model.""" # Attention. - attention_output, attention_bias = \ - self.inter_attention(layernorm_output, - enc_dec_attn_mask, - encoder_output=encoder_output) + attention_output, attention_bias = self.inter_attention( + layernorm_output, enc_dec_attn_mask, encoder_output=encoder_output + ) # Residual connection. if self.apply_residual_connection_post_layernorm: @@ -1061,21 +1255,17 @@ def default_decoder_cross_attention(self, # Bias-dropout-add. with self.bias_dropout_add_exec_handler(): layernorm_input = bias_dropout_add_func( - attention_output, - attention_bias, - residual, - self.hidden_dropout) + attention_output, attention_bias, residual, self.hidden_dropout + ) # Layer norm. layernorm_output = self.post_inter_attention_layernorm(layernorm_input) return layernorm_input, layernorm_output - def retro_encoder_cross_attention(self, - retriever_output, - layernorm_input, - layernorm_output, - bias_dropout_add_func): + def retro_encoder_cross_attention( + self, retriever_output, layernorm_input, layernorm_output, bias_dropout_add_func + ): """Cross attention for Retro encoder. Notation: @@ -1087,16 +1277,15 @@ def retro_encoder_cross_attention(self, r : Number of retrieved tokens (neighbors + continuation). """ - ns, bs, d = layernorm_output.shape # [r, bs * l * k, d] + ns, bs, d = layernorm_output.shape # [r, bs * l * k, d] # Divide sequence dimension into chunks. - chunked_outputs = layernorm_output.reshape(self.retro_retrieved_length, - -1, - self.retro_num_neighbors, - d) - chunked_outputs_before_layer_norm = \ - layernorm_input.reshape(self.retro_retrieved_length, -1, - self.retro_num_neighbors, d) # [r, bs*l, k, d] + chunked_outputs = layernorm_output.reshape( + self.retro_retrieved_length, -1, self.retro_num_neighbors, d + ) + chunked_outputs_before_layer_norm = layernorm_input.reshape( + self.retro_retrieved_length, -1, self.retro_num_neighbors, d + ) # [r, bs*l, k, d] # Per-chunk attention. layernorm_inputs = [] @@ -1104,51 +1293,55 @@ def retro_encoder_cross_attention(self, for k in range(self.retro_num_neighbors): # Attention. - chunked_output = chunked_outputs[:,:,k].contiguous() - attention_output, attention_bias = \ - self.inter_attention( - chunked_output, # Q (neighbor embedding) - None, - encoder_output=retriever_output) # K, V (hidden act) + chunked_output = chunked_outputs[:, :, k].contiguous() + attention_output, attention_bias = self.inter_attention( + chunked_output, # Q (neighbor embedding) + None, + encoder_output=retriever_output, + ) # K, V (hidden act) # Residual connection. if self.apply_residual_connection_post_layernorm: residual = chunked_output else: - residual = chunked_outputs_before_layer_norm[:,:,k] + residual = chunked_outputs_before_layer_norm[:, :, k] # Re-enable torch grad to enable fused optimization. with torch.enable_grad(): layernorm_input = bias_dropout_add_func( attention_output, - None if attention_bias is None else attention_bias.expand_as(residual), + ( + None + if attention_bias is None + else attention_bias.expand_as(residual) + ), residual, - self.hidden_dropout) + self.hidden_dropout, + ) layernorm_inputs.append(layernorm_input) # Layer norm. - layernorm_output = \ - self.post_inter_attention_layernorm(layernorm_input) + layernorm_output = self.post_inter_attention_layernorm(layernorm_input) layernorm_outputs.append(layernorm_output) # Concatenate layer norms. # layernorm_input : [r, k * bs * l, d] # layernorm_output : [r, k * bs * l, d] - layernorm_input = \ - torch.stack(layernorm_inputs, dim=1).reshape(ns, bs, d) - layernorm_output = \ - torch.stack(layernorm_outputs, dim=1).reshape(ns, bs, d) + layernorm_input = torch.stack(layernorm_inputs, dim=1).reshape(ns, bs, d) + layernorm_output = torch.stack(layernorm_outputs, dim=1).reshape(ns, bs, d) return layernorm_input, layernorm_output - def retro_decoder_cross_attention(self, - retriever_input, - retriever_output, - retriever_attn_mask, - layernorm_input, - layernorm_output, - inference_params, - bias_dropout_add_func): + def retro_decoder_cross_attention( + self, + retriever_input, + retriever_output, + retriever_attn_mask, + layernorm_input, + layernorm_output, + inference_params, + bias_dropout_add_func, + ): """Cross attention for Retro decoder. Notation: @@ -1169,22 +1362,27 @@ def retro_decoder_cross_attention(self, first_ns = ns % self.retro_chunk_length if first_ns > 0: raise Exception("test this case.") - first_chunk, rest_chunk = \ - layernorm_output[:first_ns], layernorm_output[first_ns:] + first_chunk, rest_chunk = ( + layernorm_output[:first_ns], + layernorm_output[first_ns:], + ) first_chunk = torch.nn.functional.pad( first_chunk, (0, 0, 0, 0, 0, self.retro_chunk_length - first_ns), - 'constant', - 0) - chunked_output = \ - torch.cat((first_chunk, rest_chunk), dim=0) # [l * m, bs, d] + "constant", + 0, + ) + chunked_output = torch.cat( + (first_chunk, rest_chunk), dim=0 + ) # [l * m, bs, d] else: - chunked_output = layernorm_output # [l * m, bs, d] - chunked_output = chunked_output \ - .reshape(l, self.retro_chunk_length, bs, d) \ - .permute(1, 2, 0, 3) \ - .reshape(self.retro_chunk_length, bs * l, d) \ + chunked_output = layernorm_output # [l * m, bs, d] + chunked_output = ( + chunked_output.reshape(l, self.retro_chunk_length, bs, d) + .permute(1, 2, 0, 3) + .reshape(self.retro_chunk_length, bs * l, d) .contiguous() + ) # Get Encoder Output retriever_output = self.retriever( @@ -1192,9 +1390,11 @@ def retro_decoder_cross_attention(self, attention_mask=retriever_attn_mask, retriever_output=chunked_output, retriever_attn_mask=retriever_attn_mask, - inference_params=inference_params) # [r, k * bs * l , d] + inference_params=inference_params, + ) # [r, k * bs * l , d] retriever_output = retriever_output.reshape( - self.retro_retrieved_length * self.retro_num_neighbors, bs * l, d) # [r * k, bs * l, d] + self.retro_retrieved_length * self.retro_num_neighbors, bs * l, d + ) # [r * k, bs * l, d] # Chunks. pad = (ns - 1) % self.retro_chunk_length @@ -1202,18 +1402,20 @@ def retro_decoder_cross_attention(self, padded_chunks = torch.nn.functional.pad( attending_chunks, (0, 0, 0, 0, 0, self.retro_chunk_length - 1), - 'constant', 0) - padded_chunked_output = padded_chunks \ - .reshape(l, self.retro_chunk_length, bs, d) \ - .permute(1, 2, 0, 3) + "constant", + 0, + ) + padded_chunked_output = padded_chunks.reshape( + l, self.retro_chunk_length, bs, d + ).permute(1, 2, 0, 3) padded_chunked_output = padded_chunked_output.reshape( - self.retro_chunk_length, bs * l, d).contiguous() + self.retro_chunk_length, bs * l, d + ).contiguous() # Encoder output. - attention_output, attention_bias = \ - self.inter_attention(padded_chunked_output, - None, - encoder_output=retriever_output) + attention_output, attention_bias = self.inter_attention( + padded_chunked_output, None, encoder_output=retriever_output + ) # Residual connection. if self.apply_residual_connection_post_layernorm: @@ -1225,17 +1427,27 @@ def retro_decoder_cross_attention(self, with torch.enable_grad(): layernorm_input = bias_dropout_add_func( attention_output, - None if attention_bias is None else attention_bias.expand_as(attention_output), + ( + None + if attention_bias is None + else attention_bias.expand_as(attention_output) + ), torch.zeros_like(attention_output), - self.hidden_dropout) - layernorm_input = layernorm_input \ - .reshape(self.retro_chunk_length, bs, l, d) \ - .permute(2, 0, 1, 3) # [l, m, bs, d] - layernorm_input = layernorm_input.reshape(self.retro_chunk_length * l, bs, d) + self.hidden_dropout, + ) + layernorm_input = layernorm_input.reshape( + self.retro_chunk_length, bs, l, d + ).permute( + 2, 0, 1, 3 + ) # [l, m, bs, d] + layernorm_input = layernorm_input.reshape( + self.retro_chunk_length * l, bs, d + ) layernorm_input = torch.nn.functional.pad( - layernorm_input, - (0, 0, 0, 0, pad, 0), - 'constant', 0)[:ns] # [ns, b, d] + layernorm_input, (0, 0, 0, 0, pad, 0), "constant", 0 + )[ + :ns + ] # [ns, b, d] layernorm_input = layernorm_input + residual # Layer norm post the decoder attention @@ -1243,26 +1455,31 @@ def retro_decoder_cross_attention(self, return retriever_output, layernorm_input, layernorm_output - def forward(self, hidden_states, attention_mask=None, - encoder_output=None, enc_dec_attn_mask=None, - retriever_input=None, - retriever_output=None, - retriever_attn_mask=None, - inference_params=None, - rotary_pos_emb=None, - aggregated_moe_loss=None): + def forward( + self, + hidden_states, + attention_mask=None, + encoder_output=None, + enc_dec_attn_mask=None, + retriever_input=None, + retriever_output=None, + retriever_attn_mask=None, + inference_params=None, + rotary_pos_emb=None, + aggregated_moe_loss=None, + ): # hidden_states: [s, b, h] # Layer norm at the beginning of the transformer layer. layernorm_output = self.input_layernorm(hidden_states) # Self attention. - attention_output, attention_bias = \ - self.self_attention( - layernorm_output, - attention_mask, - inference_params=inference_params, - rotary_pos_emb=rotary_pos_emb) + attention_output, attention_bias = self.self_attention( + layernorm_output, + attention_mask, + inference_params=inference_params, + rotary_pos_emb=rotary_pos_emb, + ) # Residual connection. if self.apply_residual_connection_post_layernorm: @@ -1287,14 +1504,14 @@ def forward(self, hidden_states, attention_mask=None, attention_bias = attention_bias.expand_as(residual) with self.bias_dropout_add_exec_handler(): layernorm_input = bias_dropout_add_func( - attention_output, - attention_bias, - residual, - self.hidden_dropout) + attention_output, attention_bias, residual, self.hidden_dropout + ) else: - out = torch.nn.functional.dropout(attention_output + attention_bias, - p=self.hidden_dropout, - training=self.training) + out = torch.nn.functional.dropout( + attention_output + attention_bias, + p=self.hidden_dropout, + training=self.training, + ) layernorm_input = residual + self.drop_path(out) # Layer norm post the self attention. @@ -1304,23 +1521,25 @@ def forward(self, hidden_states, attention_mask=None, if self.layer_type == LayerType.encoder: pass elif self.layer_type == LayerType.decoder: - layernorm_input, layernorm_output = \ - self.default_decoder_cross_attention( - encoder_output, - enc_dec_attn_mask, - layernorm_input, - layernorm_output, - bias_dropout_add_func) + layernorm_input, layernorm_output = self.default_decoder_cross_attention( + encoder_output, + enc_dec_attn_mask, + layernorm_input, + layernorm_output, + bias_dropout_add_func, + ) elif self.layer_type == LayerType.retro_encoder: - layernorm_input, layernorm_output = \ - self.retro_encoder_cross_attention( - retriever_output, - layernorm_input, - layernorm_output, - bias_dropout_add_func) - elif self.layer_type in (LayerType.retro_decoder, - LayerType.retro_decoder_with_retriever): - retriever_output, layernorm_input, layernorm_output = \ + layernorm_input, layernorm_output = self.retro_encoder_cross_attention( + retriever_output, + layernorm_input, + layernorm_output, + bias_dropout_add_func, + ) + elif self.layer_type in ( + LayerType.retro_decoder, + LayerType.retro_decoder_with_retriever, + ): + retriever_output, layernorm_input, layernorm_output = ( self.retro_decoder_cross_attention( retriever_input, retriever_output, @@ -1328,14 +1547,19 @@ def forward(self, hidden_states, attention_mask=None, layernorm_input, layernorm_output, inference_params, - bias_dropout_add_func) + bias_dropout_add_func, + ) + ) else: - raise Exception("Unsupported layer type, '%s'." % - self.layer_type.name) + raise Exception("Unsupported layer type, '%s'." % self.layer_type.name) # MLP. - moe_loss = torch.tensor(0.0, device=layernorm_output.device, dtype=layernorm_output.dtype) - mlp_bias = torch.tensor(0.0, device=layernorm_output.device, dtype=layernorm_output.dtype) + moe_loss = torch.tensor( + 0.0, device=layernorm_output.device, dtype=layernorm_output.dtype + ) + mlp_bias = torch.tensor( + 0.0, device=layernorm_output.device, dtype=layernorm_output.dtype + ) if self.num_experts == 1: mlp_output, mlp_bias = self.mlp(layernorm_output) @@ -1357,10 +1581,8 @@ def forward(self, hidden_states, attention_mask=None, mlp_bias = mlp_bias.expand_as(residual) with self.bias_dropout_add_exec_handler(): output = bias_dropout_add_func( - mlp_output, - mlp_bias, - residual, - self.hidden_dropout) + mlp_output, mlp_bias, residual, self.hidden_dropout + ) # Jit compiled function creates 'view' tensor. This tensor # potentially gets saved in the MPU checkpoint function context, @@ -1368,16 +1590,16 @@ def forward(self, hidden_states, attention_mask=None, # won't result in memory savings (like the data loader, or # p2p_communication), it serves to document the origin of this # 'view' tensor. - output = core.utils.make_viewless_tensor(inp = output, - requires_grad = output.requires_grad, - keep_graph = True) + output = core.utils.make_viewless_tensor( + inp=output, requires_grad=output.requires_grad, keep_graph=True + ) else: if mlp_bias is not None: mlp_output = mlp_output + mlp_bias - out = torch.nn.functional.dropout(mlp_output, - p=self.hidden_dropout, - training=self.training) + out = torch.nn.functional.dropout( + mlp_output, p=self.hidden_dropout, training=self.training + ) output = residual + self.drop_path(out) if self.layer_type == LayerType.retro_decoder_with_retriever: @@ -1406,25 +1628,47 @@ class ParallelTransformerLayerPipe(ParallelTransformerLayer): If no mask is provided, the module will query `self._args.attn_mask` for the mask and only return `super().forward(...)` """ - def __init__(self, config, - layer_number, layer_type=LayerType.encoder, - self_attn_mask_type=AttnMaskType.padding, - drop_path_rate=0., num_experts=1, - input_aggregated_moe_loss=False, return_aggregated_moe_loss=False): + + def __init__( + self, + config, + layer_number, + layer_type=LayerType.encoder, + self_attn_mask_type=AttnMaskType.padding, + drop_path_rate=0.0, + num_experts=1, + input_aggregated_moe_loss=False, + return_aggregated_moe_loss=False, + ): self.input_aggregated_moe_loss = input_aggregated_moe_loss self.return_aggregated_moe_loss = return_aggregated_moe_loss - super().__init__(config, layer_number, layer_type, self_attn_mask_type, drop_path_rate, num_experts) + super().__init__( + config, + layer_number, + layer_type, + self_attn_mask_type, + drop_path_rate, + num_experts, + ) def forward(self, inputs, **kwargs): assert torch.is_tensor(inputs) or isinstance(inputs, tuple) - if not hasattr(self, '_args'): + if not hasattr(self, "_args"): self._args = get_args() - rotary_pos_emb = self._args.rotary_pos_emb if self._args.use_rotary_position_embeddings else None + rotary_pos_emb = ( + self._args.rotary_pos_emb + if self._args.use_rotary_position_embeddings + else None + ) if torch.is_tensor(inputs) or len(inputs) == 1: - assert not self.input_aggregated_moe_loss, f'Expecting an input tuple of size >= 2' + assert ( + not self.input_aggregated_moe_loss + ), f"Expecting an input tuple of size >= 2" # No attention mask forwarded, search for args.attn_mask hidden_states, attention_mask = inputs, self._args.attn_mask - output, moe_loss = super().forward(hidden_states, attention_mask, **kwargs, rotary_pos_emb=rotary_pos_emb) + output, moe_loss = super().forward( + hidden_states, attention_mask, **kwargs, rotary_pos_emb=rotary_pos_emb + ) return (output, moe_loss) if self.return_aggregated_moe_loss else output elif len(inputs) in (2, 3): # Attention mask and aggregated_moe can both be activations. @@ -1437,22 +1681,28 @@ def forward(self, inputs, **kwargs): hidden_states, attention_mask = inputs[0], inputs[1] return_attention_mask = True else: - hidden_states, attention_mask, aggregated_moe_loss = inputs[0], inputs[1], inputs[2] + hidden_states, attention_mask, aggregated_moe_loss = ( + inputs[0], + inputs[1], + inputs[2], + ) # Forward aggregated_moe_loss to ParallelTransformerLayer for further accumulation if self.input_aggregated_moe_loss: - kwargs.update({'aggregated_moe_loss': aggregated_moe_loss}) + kwargs.update({"aggregated_moe_loss": aggregated_moe_loss}) - output, moe_loss = super().forward(hidden_states, attention_mask, **kwargs, rotary_pos_emb=rotary_pos_emb) + output, moe_loss = super().forward( + hidden_states, attention_mask, **kwargs, rotary_pos_emb=rotary_pos_emb + ) - ret = (output, ) + ret = (output,) if return_attention_mask: - ret += (attention_mask, ) + ret += (attention_mask,) if self.return_aggregated_moe_loss: - ret += (moe_loss, ) + ret += (moe_loss,) return ret else: - raise RuntimeError('Received more inputs than understood.') + raise RuntimeError("Received more inputs than understood.") class NoopTransformerLayer(MegatronModule): @@ -1475,15 +1725,20 @@ def __init__(self, layer_number): super().__init__() self.layer_number = layer_number - def forward(self, hidden_states, attention_mask, - encoder_output=None, enc_dec_attn_mask=None, - inference_params=None): + def forward( + self, + hidden_states, + attention_mask, + encoder_output=None, + enc_dec_attn_mask=None, + inference_params=None, + ): return hidden_states.clone() def _get_num_layers(args, model_type, is_decoder=False): """Compute the number of transformer layers resident on the current rank.""" - is_encoder_and_decoder_model = (model_type == ModelType.encoder_and_decoder) + is_encoder_and_decoder_model = model_type == ModelType.encoder_and_decoder if model_type == ModelType.retro_encoder: num_layers = args.retro_encoder_layers elif parallel_state.get_pipeline_model_parallel_world_size() > 1: @@ -1496,27 +1751,34 @@ def _get_num_layers(args, model_type, is_decoder=False): # the same whether or not a standalone embedding stage is used. num_ranks_in_encoder = ( args.pipeline_model_parallel_split_rank - 1 - if args.standalone_embedding_stage else - args.pipeline_model_parallel_split_rank + if args.standalone_embedding_stage + else args.pipeline_model_parallel_split_rank + ) + num_ranks_in_decoder = ( + args.transformer_pipeline_model_parallel_size - num_ranks_in_encoder + ) + assert args.encoder_num_layers % num_ranks_in_encoder == 0, ( + "encoder_num_layers (%d) must be divisible by number of ranks given to encoder (%d)" + % (args.encoder_num_layers, num_ranks_in_encoder) + ) + assert args.decoder_num_layers % num_ranks_in_decoder == 0, ( + "decoder_num_layers (%d) must be divisible by number of ranks given to decoder (%d)" + % (args.decoder_num_layers, num_ranks_in_decoder) ) - num_ranks_in_decoder = args.transformer_pipeline_model_parallel_size - num_ranks_in_encoder - assert args.encoder_num_layers % num_ranks_in_encoder == 0, \ - 'encoder_num_layers (%d) must be divisible by number of ranks given to encoder (%d)' % (args.encoder_num_layers, num_ranks_in_encoder) - assert args.decoder_num_layers % num_ranks_in_decoder == 0, \ - 'decoder_num_layers (%d) must be divisible by number of ranks given to decoder (%d)' % (args.decoder_num_layers, num_ranks_in_decoder) if parallel_state.is_pipeline_stage_before_split(): num_layers = ( 0 if args.standalone_embedding_stage - and parallel_state.get_pipeline_model_parallel_rank() == 0 else - args.encoder_num_layers // num_ranks_in_encoder + and parallel_state.get_pipeline_model_parallel_rank() == 0 + else args.encoder_num_layers // num_ranks_in_encoder ) else: num_layers = args.decoder_num_layers // num_ranks_in_decoder else: assert args.num_layers == args.encoder_num_layers - assert args.num_layers % args.transformer_pipeline_model_parallel_size == 0, \ - 'num_layers must be divisible by transformer_pipeline_model_parallel_size' + assert ( + args.num_layers % args.transformer_pipeline_model_parallel_size == 0 + ), "num_layers must be divisible by transformer_pipeline_model_parallel_size" # When a standalone embedding stage is used, all transformer layers # are divided among pipeline rank >= 1, while on pipeline rank 0, @@ -1525,8 +1787,8 @@ def _get_num_layers(args, model_type, is_decoder=False): num_layers = ( 0 if args.standalone_embedding_stage - and parallel_state.get_pipeline_model_parallel_rank() == 0 else - args.num_layers // args.transformer_pipeline_model_parallel_size + and parallel_state.get_pipeline_model_parallel_rank() == 0 + else args.num_layers // args.transformer_pipeline_model_parallel_size ) else: if not is_decoder: @@ -1536,14 +1798,15 @@ def _get_num_layers(args, model_type, is_decoder=False): return num_layers -def _get_layer_type(model_type, default_layer_type, retro_layer_numbers, - layer_number): +def _get_layer_type(model_type, default_layer_type, retro_layer_numbers, layer_number): args = get_args() if args.retro_add_retriever and layer_number in retro_layer_numbers: if model_type == ModelType.retro_decoder: - return LayerType.retro_decoder_with_retriever \ - if layer_number == retro_layer_numbers[0] \ - else LayerType.retro_decoder + return ( + LayerType.retro_decoder_with_retriever + if layer_number == retro_layer_numbers[0] + else LayerType.retro_decoder + ) elif model_type == ModelType.retro_encoder: return LayerType.retro_encoder else: @@ -1552,15 +1815,22 @@ def _get_layer_type(model_type, default_layer_type, retro_layer_numbers, return default_layer_type -def get_num_experts_per_layer(num_experts: list, num_layers: int, expert_interval: int, offset: int = 0) -> list: - assert len(num_experts) == 1 or len(num_experts) == num_layers // expert_interval, \ - 'num_experts must be either a single value or a list of the same length as the number of MoE layers' +def get_num_experts_per_layer( + num_experts: list, num_layers: int, expert_interval: int, offset: int = 0 +) -> list: + assert ( + len(num_experts) == 1 or len(num_experts) == num_layers // expert_interval + ), "num_experts must be either a single value or a list of the same length as the number of MoE layers" if len(num_experts) == 1: num_experts = num_experts * (num_layers // expert_interval) experts_per_layer = [] for i in range(num_layers): layer_num = i + 1 + offset - n_e = num_experts[(layer_num-1) // expert_interval] if layer_num % expert_interval == 0 else 1 + n_e = ( + num_experts[(layer_num - 1) // expert_interval] + if layer_num % expert_interval == 0 + else 1 + ) experts_per_layer.append(n_e) return experts_per_layer @@ -1568,14 +1838,18 @@ def get_num_experts_per_layer(num_experts: list, num_layers: int, expert_interva class ParallelTransformer(MegatronModule): """Transformer class.""" - def __init__(self, config, - model_type, layer_type=LayerType.encoder, - self_attn_mask_type=AttnMaskType.padding, - post_layer_norm=True, - pre_process=True, - post_process=True, - drop_path_rate=0.0, - num_experts=[1]): + def __init__( + self, + config, + model_type, + layer_type=LayerType.encoder, + self_attn_mask_type=AttnMaskType.padding, + post_layer_norm=True, + pre_process=True, + post_process=True, + drop_path_rate=0.0, + num_experts=[1], + ): super(ParallelTransformer, self).__init__() args = get_args() @@ -1598,14 +1872,15 @@ def __init__(self, config, self.recompute_granularity = config.recompute_granularity self.recompute_method = config.recompute_method self.recompute_num_layers = config.recompute_num_layers - self.distribute_saved_activations = \ + self.distribute_saved_activations = ( config.distribute_saved_activations and not config.sequence_parallel + ) self.sequence_parallel = config.sequence_parallel # Transformer Engine Init. self.transformer_engine_rope_available = False - if self.transformer_impl == 'transformer_engine': + if self.transformer_impl == "transformer_engine": global transformer_engine import transformer_engine from importlib.metadata import version @@ -1637,45 +1912,53 @@ def __init__(self, config, self.num_microbatches_in_previous_step = -1 self.microbatch_count = 0 - self.checkpoint_core_attention = config.recompute_granularity == 'selective' + self.checkpoint_core_attention = config.recompute_granularity == "selective" # Number of layers. - self.num_layers = _get_num_layers(args, model_type, - layer_type==LayerType.decoder) + self.num_layers = _get_num_layers( + args, model_type, layer_type == LayerType.decoder + ) self.drop_path_rates = [ - rate.item() for rate in - torch.linspace(0, self.drop_path_rate, config.num_layers)] + rate.item() + for rate in torch.linspace(0, self.drop_path_rate, config.num_layers) + ] self.retro_layer_numbers = None if model_type == ModelType.retro_decoder: retro_layer_start = 6 if config.num_layers <= 15 else 9 - self.retro_layer_numbers = \ - np.arange(retro_layer_start, args.num_layers + 1, 3).tolist() + self.retro_layer_numbers = np.arange( + retro_layer_start, args.num_layers + 1, 3 + ).tolist() if model_type == ModelType.retro_encoder: self.retro_layer_numbers = [1] # Transformer layers. if args.retro_add_retriever: - assert self.recompute_granularity != 'full', \ - "Full recompute not supported for Retro." - assert args.transformer_impl == 'local', \ - "Transformer engine does not support Retro layers." + assert ( + self.recompute_granularity != "full" + ), "Full recompute not supported for Retro." + assert ( + args.transformer_impl == "local" + ), "Transformer engine does not support Retro layers." + def build_layer(layer_number, n_e): - if args.transformer_impl == 'local': + if args.transformer_impl == "local": current_layer_type = _get_layer_type( - model_type, layer_type, self.retro_layer_numbers, - layer_number) + model_type, layer_type, self.retro_layer_numbers, layer_number + ) return ParallelTransformerLayer( config, layer_number, layer_type=current_layer_type, self_attn_mask_type=self_attn_mask_type, drop_path_rate=self.drop_path_rates[layer_number - 1], - num_experts=n_e) + num_experts=n_e, + ) else: - assert config.num_attention_heads == config.num_key_value_heads, \ - 'Transformer_engine does not support GQA' + assert ( + config.num_attention_heads == config.num_key_value_heads + ), "Transformer_engine does not support GQA" return transformer_engine.pytorch.TransformerLayer( config.hidden_size, config.ffn_hidden_size, @@ -1702,16 +1985,22 @@ def build_layer(layer_number, n_e): layer_type="encoder", drop_path_rate=self.drop_path_rates[layer_number - 1], set_parallel_mode=True, - fuse_qkv_params=True) + fuse_qkv_params=True, + ) if config.virtual_pipeline_model_parallel_size is not None: - assert config.num_layers % config.virtual_pipeline_model_parallel_size == 0, \ - 'num_layers_per_stage must be divisible by ' \ - 'virtual_pipeline_model_parallel_size' + assert ( + config.num_layers % config.virtual_pipeline_model_parallel_size == 0 + ), ( + "num_layers_per_stage must be divisible by " + "virtual_pipeline_model_parallel_size" + ) assert args.model_type != ModelType.encoder_and_decoder # Number of layers in each model chunk is the number of layers in the stage, # divided by the number of model chunks in a stage. - self.num_layers = self.num_layers // config.virtual_pipeline_model_parallel_size + self.num_layers = ( + self.num_layers // config.virtual_pipeline_model_parallel_size + ) # With 8 layers, 2 stages, and 4 model chunks, we want an assignment of # layers to stages like (each list is a model chunk): # Stage 0: [0] [2] [4] [6] @@ -1721,12 +2010,14 @@ def build_layer(layer_number, n_e): # Stage 0: [0, 1] [4, 5] # Stage 1: [2, 3] [6, 7] offset = parallel_state.get_virtual_pipeline_model_parallel_rank() * ( - config.num_layers // config.virtual_pipeline_model_parallel_size) + \ - (parallel_state.get_pipeline_model_parallel_rank() * self.num_layers) + config.num_layers // config.virtual_pipeline_model_parallel_size + ) + (parallel_state.get_pipeline_model_parallel_rank() * self.num_layers) else: # Each stage gets a contiguous set of layers. - if args.model_type == ModelType.encoder_and_decoder and \ - parallel_state.get_pipeline_model_parallel_world_size() > 1: + if ( + args.model_type == ModelType.encoder_and_decoder + and parallel_state.get_pipeline_model_parallel_world_size() > 1 + ): pipeline_rank = parallel_state.get_pipeline_model_parallel_rank() if layer_type == LayerType.encoder: offset = pipeline_rank * self.num_layers @@ -1734,7 +2025,9 @@ def build_layer(layer_number, n_e): num_ranks_in_enc = args.pipeline_model_parallel_split_rank offset = (pipeline_rank - num_ranks_in_enc) * self.num_layers else: - offset = parallel_state.get_pipeline_model_parallel_rank() * self.num_layers + offset = ( + parallel_state.get_pipeline_model_parallel_rank() * self.num_layers + ) if self.num_layers == 0: # When a standalone embedding stage is used (e.g., @@ -1746,11 +2039,13 @@ def build_layer(layer_number, n_e): # this, we assign a 'no-op' layer on these ranks, which will # disconnect the input tensor from the output tensor. self.num_layers = 1 - self.layers = torch.nn.ModuleList([ NoopTransformerLayer(1) ]) + self.layers = torch.nn.ModuleList([NoopTransformerLayer(1)]) else: # Build the layers self.layers = [] - experts_per_layer = get_num_experts_per_layer(num_experts, self.num_layers, args.expert_interval, offset) + experts_per_layer = get_num_experts_per_layer( + num_experts, self.num_layers, args.expert_interval, offset + ) for i in range(self.num_layers): layer_num = i + 1 + offset n_e = experts_per_layer[i] @@ -1761,41 +2056,54 @@ def build_layer(layer_number, n_e): if model_type == ModelType.retro_encoder: for layer in self.layers: if layer.self_attention.use_flash_attn: - layer.self_attention.core_attention_flash.dropout_p = \ + layer.self_attention.core_attention_flash.dropout_p = ( torch.nn.Dropout(args.retro_encoder_attention_dropout) + ) else: - layer.self_attention.core_attention.attention_dropout.p =\ + layer.self_attention.core_attention.attention_dropout.p = ( args.retro_encoder_attention_dropout + ) layer.hidden_dropout = args.retro_encoder_hidden_dropout if self.post_process and self.post_layer_norm: # Final layer norm before output. - if args.normalization == 'layernorm': - if get_accelerator().device_name() == 'cuda': + if args.normalization == "layernorm": + if get_accelerator().device_name() == "cuda": self.final_layernorm = LayerNorm( config.hidden_size, eps=config.layernorm_epsilon, no_persist_layer_norm=args.no_persist_layer_norm, sequence_parallel=config.sequence_parallel, apply_layernorm_1p=args.apply_layernorm_1p, - mem_efficient_ln=args.mem_efficient_ln) + mem_efficient_ln=args.mem_efficient_ln, + ) else: self.final_layernorm = LayerNorm( - config.hidden_size, - eps=config.layernorm_epsilon) + config.hidden_size, eps=config.layernorm_epsilon + ) else: - self.final_layernorm = RMSNorm(config.hidden_size, config.layernorm_epsilon, - sequence_parallel=config.sequence_parallel) + self.final_layernorm = RMSNorm( + config.hidden_size, + config.layernorm_epsilon, + sequence_parallel=config.sequence_parallel, + ) def _get_layer(self, layer_number): return self.layers[layer_number] - def _checkpointed_forward(self, hidden_states, attention_mask, - encoder_output, enc_dec_attn_mask, - rotary_pos_emb, is_first_microbatch): + def _checkpointed_forward( + self, + hidden_states, + attention_mask, + encoder_output, + enc_dec_attn_mask, + rotary_pos_emb, + is_first_microbatch, + ): args = get_args() """Forward method with activation checkpointing.""" + def custom(start, end): def custom_forward(*args, **kwargs): x_, *args = args @@ -1807,11 +2115,14 @@ def custom_forward(*args, **kwargs): x_, moe_loss = output else: x_ = output - moe_loss = torch.tensor(0.0, device=x_.device, dtype=x_.dtype, requires_grad=True) + moe_loss = torch.tensor( + 0.0, device=x_.device, dtype=x_.dtype, requires_grad=True + ) moe_losses.append(moe_loss) return (x_, *moe_losses) + return custom_forward - + if args.deepspeed and args.deepspeed_activation_checkpointing: moe_losses = [] # Make sure memory is freed. @@ -1819,9 +2130,18 @@ def custom_forward(*args, **kwargs): l = 0 while l < self.num_layers: hidden_states, *local_moe_losses = tensor_parallel.checkpoint( - custom(l, l + self.checkpoint_num_layers), False, - hidden_states, attention_mask, encoder_output, enc_dec_attn_mask, - None, None, None, None, rotary_pos_emb) + custom(l, l + self.checkpoint_num_layers), + False, + hidden_states, + attention_mask, + encoder_output, + enc_dec_attn_mask, + None, + None, + None, + None, + rotary_pos_emb, + ) moe_losses.extend(local_moe_losses) l += self.checkpoint_num_layers @@ -1829,66 +2149,105 @@ def custom_forward(*args, **kwargs): else: moe_losses = [] te_forward_kwargs = {} - if self.transformer_impl == 'transformer_engine': - te_forward_kwargs['is_first_microbatch'] = is_first_microbatch + if self.transformer_impl == "transformer_engine": + te_forward_kwargs["is_first_microbatch"] = is_first_microbatch if self.transformer_engine_rope_available: - te_forward_kwargs['rotary_pos_emb'] = rotary_pos_emb + te_forward_kwargs["rotary_pos_emb"] = rotary_pos_emb - if self.recompute_method == 'uniform': + if self.recompute_method == "uniform": # Uniformly divide the total number of Transformer layers and # checkpoint the input activation of each divided chunk. # A method to further reduce memory usage reducing checkpoints. l = 0 while l < self.num_layers: - if self.transformer_impl == 'transformer_engine': - hidden_states, *local_moe_losses = transformer_engine.pytorch.distributed.checkpoint( - custom(l, l + self.recompute_num_layers), - self.distribute_saved_activations, - tensor_parallel.get_cuda_rng_tracker, - mpu.get_tensor_model_parallel_group(), - hidden_states, attention_mask, encoder_output, - enc_dec_attn_mask, **te_forward_kwargs) + if self.transformer_impl == "transformer_engine": + hidden_states, *local_moe_losses = ( + transformer_engine.pytorch.distributed.checkpoint( + custom(l, l + self.recompute_num_layers), + self.distribute_saved_activations, + tensor_parallel.get_cuda_rng_tracker, + mpu.get_tensor_model_parallel_group(), + hidden_states, + attention_mask, + encoder_output, + enc_dec_attn_mask, + **te_forward_kwargs, + ) + ) else: hidden_states, *local_moe_losses = tensor_parallel.checkpoint( custom(l, l + self.recompute_num_layers), self.distribute_saved_activations, - hidden_states, attention_mask, - encoder_output, enc_dec_attn_mask, - None, None, None, None, rotary_pos_emb) + hidden_states, + attention_mask, + encoder_output, + enc_dec_attn_mask, + None, + None, + None, + None, + rotary_pos_emb, + ) moe_losses.extend(local_moe_losses) l += self.recompute_num_layers - elif self.recompute_method == 'block': + elif self.recompute_method == "block": # Checkpoint the input activation of only a set number of individual # Transformer layers and skip the rest. # A method fully use the device memory removing redundant re-computation. for l in range(self.num_layers): if l < self.recompute_num_layers: - if self.transformer_impl == 'transformer_engine': - hidden_states, *local_moe_losses = transformer_engine.pytorch.distributed.checkpoint( - custom(l, l + 1), - self.distribute_saved_activations, - tensor_parallel.get_cuda_rng_tracker, - mpu.get_tensor_model_parallel_group(), - hidden_states, attention_mask, encoder_output, - enc_dec_attn_mask, **te_forward_kwargs) + if self.transformer_impl == "transformer_engine": + hidden_states, *local_moe_losses = ( + transformer_engine.pytorch.distributed.checkpoint( + custom(l, l + 1), + self.distribute_saved_activations, + tensor_parallel.get_cuda_rng_tracker, + mpu.get_tensor_model_parallel_group(), + hidden_states, + attention_mask, + encoder_output, + enc_dec_attn_mask, + **te_forward_kwargs, + ) + ) else: - hidden_states, *local_moe_losses = tensor_parallel.checkpoint( - custom(l, l + 1), - self.distribute_saved_activations, - hidden_states, attention_mask, - encoder_output, enc_dec_attn_mask, - None, None, None, None, rotary_pos_emb) + hidden_states, *local_moe_losses = ( + tensor_parallel.checkpoint( + custom(l, l + 1), + self.distribute_saved_activations, + hidden_states, + attention_mask, + encoder_output, + enc_dec_attn_mask, + None, + None, + None, + None, + rotary_pos_emb, + ) + ) else: - if self.transformer_impl == 'transformer_engine': + if self.transformer_impl == "transformer_engine": hidden_states, *local_moe_losses = custom(l, l + 1)( - hidden_states, attention_mask, encoder_output, - enc_dec_attn_mask, **te_forward_kwargs) + hidden_states, + attention_mask, + encoder_output, + enc_dec_attn_mask, + **te_forward_kwargs, + ) else: hidden_states, *local_moe_losses = custom(l, l + 1)( - hidden_states, attention_mask, - encoder_output, enc_dec_attn_mask, - None, None, None, None, rotary_pos_emb) - + hidden_states, + attention_mask, + encoder_output, + enc_dec_attn_mask, + None, + None, + None, + None, + rotary_pos_emb, + ) + moe_losses.extend(local_moe_losses) else: raise ValueError("Invalid activation recompute method.") @@ -1904,19 +2263,25 @@ def set_input_tensor(self, input_tensor): forward_step_func""" self.input_tensor = input_tensor - def forward(self, hidden_states, attention_mask, - encoder_output=None, enc_dec_attn_mask=None, - retriever_input=None, - retriever_output=None, - retriever_attn_mask=None, - inference_params=None, - rotary_pos_emb=None): + def forward( + self, + hidden_states, + attention_mask, + encoder_output=None, + enc_dec_attn_mask=None, + retriever_input=None, + retriever_output=None, + retriever_attn_mask=None, + inference_params=None, + rotary_pos_emb=None, + ): # hidden_states: [s, b, h] # Checks. if inference_params: - assert self.recompute_granularity is None, \ - 'inference does not work with activation checkpointing' + assert ( + self.recompute_granularity is None + ), "inference does not work with activation checkpointing" # TODO: Below old DeepSpeed code are commented because it's unsure whether # it is still relevant. @@ -1971,64 +2336,77 @@ def forward(self, hidden_states, attention_mask, with rng_context: # The fp8_autocast context manager is a no-op when enabled=True # The if...else serves to short circuit name resolution for fp8_autocast - with transformer_engine.pytorch.fp8_autocast( - enabled=self.use_fp8, - fp8_recipe=self.fp8_recipe, - fp8_group=self.fp8_group - ) if self.use_fp8 else nullcontext(): + with ( + transformer_engine.pytorch.fp8_autocast( + enabled=self.use_fp8, + fp8_recipe=self.fp8_recipe, + fp8_group=self.fp8_group, + ) + if self.use_fp8 + else nullcontext() + ): # Determine if the current iteration is first microbatch if self.num_microbatches_in_previous_step != get_num_microbatches(): - self.microbatch_count = 0 # Reset count on new batch size rampup interval + self.microbatch_count = ( + 0 # Reset count on new batch size rampup interval + ) self.num_microbatches_in_previous_step = get_num_microbatches() - is_first_microbatch = self.microbatch_count % get_num_microbatches() == 0 + is_first_microbatch = ( + self.microbatch_count % get_num_microbatches() == 0 + ) # Forward pass. moe_losses = [] if self.checkpoint_activations: - hidden_states, moe_losses = self._checkpointed_forward(hidden_states, - attention_mask, - encoder_output, - enc_dec_attn_mask, - rotary_pos_emb, - is_first_microbatch) - elif self.recompute_granularity == 'full': - hidden_states, moe_losses = self._checkpointed_forward(hidden_states, - attention_mask, - encoder_output, - enc_dec_attn_mask, - rotary_pos_emb, - is_first_microbatch) + hidden_states, moe_losses = self._checkpointed_forward( + hidden_states, + attention_mask, + encoder_output, + enc_dec_attn_mask, + rotary_pos_emb, + is_first_microbatch, + ) + elif self.recompute_granularity == "full": + hidden_states, moe_losses = self._checkpointed_forward( + hidden_states, + attention_mask, + encoder_output, + enc_dec_attn_mask, + rotary_pos_emb, + is_first_microbatch, + ) else: forward_kwargs = { - 'encoder_output': encoder_output, - 'enc_dec_attn_mask': enc_dec_attn_mask, - 'inference_params': inference_params, + "encoder_output": encoder_output, + "enc_dec_attn_mask": enc_dec_attn_mask, + "inference_params": inference_params, } - if self.transformer_impl == 'transformer_engine': - forward_kwargs['is_first_microbatch'] = is_first_microbatch - forward_kwargs['checkpoint_core_attention'] = self.checkpoint_core_attention + if self.transformer_impl == "transformer_engine": + forward_kwargs["is_first_microbatch"] = is_first_microbatch + forward_kwargs["checkpoint_core_attention"] = ( + self.checkpoint_core_attention + ) if self.transformer_engine_rope_available: - forward_kwargs['rotary_pos_emb'] = rotary_pos_emb + forward_kwargs["rotary_pos_emb"] = rotary_pos_emb else: - forward_kwargs['rotary_pos_emb'] = rotary_pos_emb - forward_kwargs['retriever_input'] = retriever_input - forward_kwargs['retriever_output'] = retriever_output - forward_kwargs['retriever_attn_mask'] = retriever_attn_mask + forward_kwargs["rotary_pos_emb"] = rotary_pos_emb + forward_kwargs["retriever_input"] = retriever_input + forward_kwargs["retriever_output"] = retriever_output + forward_kwargs["retriever_attn_mask"] = retriever_attn_mask for index in range(self.num_layers): layer = self._get_layer(index) hidden_states = layer( - hidden_states, - attention_mask, - **forward_kwargs) + hidden_states, attention_mask, **forward_kwargs + ) # First Retro decoder layer returns both hidden_states # and retriever_output. Make retriever_output available # to subsequence Retro layers. if isinstance(hidden_states, tuple): - assert (len(hidden_states) == 2 or len(hidden_states) == 3) + assert len(hidden_states) == 2 or len(hidden_states) == 3 if len(hidden_states) == 2: if not self.ds_inference: hidden_states, moe_loss = hidden_states @@ -2054,6 +2432,7 @@ def forward(self, hidden_states, attention_mask, return (hidden_states, *moe_losses) + class LMHeadPipe(MegatronModule): """ Arguments: @@ -2067,11 +2446,13 @@ class LMHeadPipe(MegatronModule): def __init__(self, hidden_size, vocab_size, config): args = get_args() super(LMHeadPipe, self).__init__() - self.lm_head = tensor_parallel.ColumnParallelLinear(input_size=hidden_size, - output_size=vocab_size, - bias=False, - config=config, - init_method=config.init_method,) + self.lm_head = tensor_parallel.ColumnParallelLinear( + input_size=hidden_size, + output_size=vocab_size, + bias=False, + config=config, + init_method=config.init_method, + ) def forward(self, inputs, **kwargs): assert torch.is_tensor(inputs) or isinstance(inputs, tuple) @@ -2080,10 +2461,10 @@ def forward(self, inputs, **kwargs): else: hidden_states = inputs - if not hasattr(self, '_args'): + if not hasattr(self, "_args"): self._args = get_args() - if hasattr(self._args, 'attn_mask'): + if hasattr(self._args, "attn_mask"): attention_mask = None else: attention_mask = inputs[1] @@ -2091,7 +2472,7 @@ def forward(self, inputs, **kwargs): logits, _ = self.lm_head(hidden_states) # If cmd args has attn_mask, we don't forward it as an activation. - if hasattr(self._args, 'attn_mask'): + if hasattr(self._args, "attn_mask"): return logits else: return logits, attention_mask From 12f6f8e5563e5fa1d21838c2a273e8cd5e096c0c Mon Sep 17 00:00:00 2001 From: Huihuo Zheng Date: Thu, 12 Sep 2024 05:11:58 +0000 Subject: [PATCH 44/92] fix check --- megatron/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/megatron/utils.py b/megatron/utils.py index 67dc366d50..8a9f3e7858 100644 --- a/megatron/utils.py +++ b/megatron/utils.py @@ -41,7 +41,7 @@ try: import dftracer except: - _DFTRACER_EXIST=True + _DFTRACER_EXIST=False try: import dlio_profiler @@ -80,7 +80,7 @@ def flush(self): def reset(self): return def log_static(self, func): - return func + return class dftracer(object): def __init__(self,): self.type = None From 5ac877a880447a7b0028d4959dd290e56eafe512 Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Thu, 12 Sep 2024 09:17:33 -0500 Subject: [PATCH 45/92] Update `megatron/training_log_alcf.py` --- megatron/training_log_alcf.py | 72 ++++++++++++++++++++++++----------- 1 file changed, 50 insertions(+), 22 deletions(-) diff --git a/megatron/training_log_alcf.py b/megatron/training_log_alcf.py index 7229549e24..dcd872971d 100644 --- a/megatron/training_log_alcf.py +++ b/megatron/training_log_alcf.py @@ -39,7 +39,7 @@ # from megatron.optimizer_param_scheduler import OptimizerParamScheduler # from megatron.profiler import on_step_begin, on_step_end, setup_profiler, trigger # from megatron.utils import check_adlr_autoresume_termination -from megatron.utils import found_kill_switch, unwrap_model +# from megatron.utils import found_kill_switch, unwrap_model import ezpz as ez # from megatron.utils import calc_params_l2_norm @@ -78,6 +78,37 @@ class InteropLoggingTool(Enum): log.setLevel(LOG_LEVEL) if RANK == 0 else log.setLevel("CRITICAL") +def num_floating_point_operations(args, batch_size): + # Group Query Attention. + # if not args.group_query_attention: + if not args.num_key_value_heads: + args.num_key_value_heads = args.num_attention_heads + # args.num_query_groups = args.num_attention_heads + # MoE. + # num_experts_routed_to = 1 if args.num_experts is None else args.moe_router_topk + num_experts_routed_to = 1 if args.num_experts is None else args.topk + gated_linear_multiplier = 3 / 2 if args.swiglu else 1 + return ( + 12 + * batch_size + * args.seq_length + * args.num_layers + * args.hidden_size + * args.hidden_size + * ( + 1 + + ( + (args.ffn_hidden_size / args.hidden_size) + * num_experts_routed_to + * gated_linear_multiplier + ) + + (args.num_key_value_heads / args.num_attention_heads) + + (args.seq_length / args.hidden_size) + + (args.padded_vocab_size / (2 * args.num_layers * args.hidden_size)) + ) + ) + + def training_log( loss_dict, total_loss_dict, @@ -116,12 +147,10 @@ def training_log( ) # Update losses and set nan iterations got_nan = False + _zero = torch.tensor([0.0]).to(DEVICE) for key in loss_dict: if not skipped_iter: - total_loss_dict[key] = ( - total_loss_dict.get(key, get_accelerator().FloatTensor([0.0])) - + loss_dict[key] - ) + total_loss_dict[key] = total_loss_dict.get(key, _zero) + loss_dict[key] else: value = loss_dict[key].float().sum().item() is_nan = value == float("inf") or value == -float("inf") or value != value @@ -170,7 +199,7 @@ def training_log( # Tensorboard values. # Timer requires all the ranks to call. if args.log_timers_to_tensorboard and ( - iteration % args.tensorboard_log_interval == 0 + iteration % args.tensorboard_log_interval == 0 and writer is not None ): timers.write(timers_to_log, writer, iteration, normalizer=total_iterations) if writer and (iteration % args.tensorboard_log_interval == 0): @@ -389,18 +418,15 @@ def training_log( abs(param.max().item()), abs(param.min().item()), ) - # print('step {} rank {} before sync opt_stats {}, {}'.format(iteration, torch.distributed.get_rank(), opt_stats_2, opt_stats)) if args.zero_stage > 0: # ZeRO partiions optimizer states - # opt_stats = opt_stats.clone().detach() - # opt_stats = get_accelerator().FloatTensor - opt_stats = get_accelerator().FloatTensor(opt_stats) + # opt_stats = get_accelerator().FloatTensor(opt_stats) + opt_stats = torch.tensor(opt_stats).to(DEVICE) torch.distributed.all_reduce( opt_stats, group=mpu.get_sequence_data_parallel_group() ) # opt_stats_2 = get_accelerator().FloatTensor(opt_stats_2) - # opt_stats_2 = opt_stats_2.clone().detach() - opt_stats_2 = get_accelerator().FloatTensor(opt_stats_2) + opt_stats_2 = torch.tensor(opt_stats_2).to(DEVICE) torch.distributed.all_reduce( opt_stats_2, op=torch.distributed.ReduceOp.MAX, @@ -408,13 +434,13 @@ def training_log( ) if args.tensor_model_parallel_size > 1: - # opt_stats = opt_stats.clone().detach() - opt_stats = get_accelerator().FloatTensor(opt_stats) + opt_stats = torch.tensor(opt_stats).to(DEVICE) + # opt_stats = get_accelerator().FloatTensor(opt_stats) torch.distributed.all_reduce( opt_stats, group=mpu.get_tensor_model_parallel_group() ) - # opt_stats_2 = opt_stats_2.clone().detach() - opt_stats_2 = get_accelerator().FloatTensor(opt_stats_2) + # opt_stats_2 = get_accelerator().FloatTensor(opt_stats_2) + opt_stats_2 = torch.tensor(opt_stats_2).to(DEVICE) torch.distributed.all_reduce( opt_stats_2, op=torch.distributed.ReduceOp.MAX, @@ -422,18 +448,19 @@ def training_log( ) if args.pipeline_model_parallel_size > 1: - # opt_stats = opt_stats.clone().detach() - opt_stats = get_accelerator().FloatTensor(opt_stats) + # opt_stats = get_accelerator().FloatTensor(opt_stats) + opt_stats = torch.tensor(opt_stats).to(DEVICE) torch.distributed.all_reduce( opt_stats, group=mpu.get_pipeline_model_parallel_group() ) - # opt_stats_2 = opt_stats_2.clone().detach() - opt_stats_2 = get_accelerator().FloatTensor(opt_stats_2) + # opt_stats_2 = get_accelerator().FloatTensor(opt_stats_2) + opt_stats_2 = torch.tensor(opt_stats_2).to(DEVICE) torch.distributed.all_reduce( opt_stats_2, op=torch.distributed.ReduceOp.MAX, group=mpu.get_pipeline_model_parallel_group(), ) + wandb_metrics |= { "optimizer/learning_rate": learning_rate, "optimizer/iteration": args.iteration, @@ -452,7 +479,8 @@ def training_log( "optimizer/weight_abs_max": opt_stats_2[3], } # print('step {} rank {} after sync opt_stats {}, {}'.format(iteration, torch.distributed.get_rank(), opt_stats_2, opt_stats)) - if writer and is_last_rank(): + # if writer and is_last_rank(): + if writer is not None and RANK == 0: writer.add_scalar( "optimizer/variance_l2 vs tokens", opt_stats[0] ** 0.5, @@ -638,7 +666,7 @@ def training_log( ) if avg > 0.0: log_string += " {}={:.6f} |".format(key, avg) - total_loss_dict[key] = get_accelerator().FloatTensor([0.0]) + total_loss_dict[key] = torch.tensor([0.0]).to(DEVICE) if loss_scale is not None: log_string += " loss_scale={:.1f} |".format(loss_scale) wandb_metrics |= {"loss/loss_scale": loss_scale} From b3e0f6f17bba765b9137204b6aac042f94d79fd6 Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Thu, 12 Sep 2024 21:46:42 -0500 Subject: [PATCH 46/92] Update `megatron/training.py` --- megatron/training.py | 1106 +++--------------------------------------- 1 file changed, 75 insertions(+), 1031 deletions(-) diff --git a/megatron/training.py b/megatron/training.py index ad5b1b908c..9841386049 100644 --- a/megatron/training.py +++ b/megatron/training.py @@ -1,79 +1,77 @@ # Copyright (C) 2024 Habana Labs, Ltd. an Intel Company. # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. """Pretrain utilities.""" +import time + +# The earliest we can measure the start time. +_TRAIN_START_TIME = time.time() +from collections import OrderedDict from datetime import datetime +import json +import logging import math -import sys import os +import sys import time -import json -# noqa: E402 -# The earliest we can measure the start time. -_TRAIN_START_TIME = time.time() +import deepspeed +from deepspeed.accelerator import get_accelerator +from deepspeed.compression.compress import init_compression, redundancy_clean +from deepspeed.runtime.data_pipeline.data_routing.helper import ( + convert_to_random_ltd, +) +import ezpz as ez import torch import torch.distributed as tdist -from collections import OrderedDict from torch.nn.parallel.distributed import DistributedDataParallel as torchDDP -from enum import Enum - -from megatron import get_args -from megatron import get_signal_handler -from megatron import get_timers -from megatron import get_tensorboard_writer -from megatron import get_wandb_writer -from megatron import get_current_global_batch_size -from megatron import get_num_microbatches -from megatron import is_last_rank -from megatron import update_num_microbatches -from megatron.core import mpu, tensor_parallel -# from megatron import print_rank_0, is_rank_0 -# from megatron import print_rank_last -from megatron.checkpointing import load_checkpoint -from megatron.checkpointing import save_checkpoint -from megatron.model import Float16Module -from megatron.model import GPTModel +from megatron import ( + get_args, + get_current_global_batch_size, + get_num_microbatches, + get_signal_handler, + get_tensorboard_writer, + get_timers, + is_last_rank, + update_num_microbatches, +) +from megatron.arguments import core_transformer_config_from_args +from megatron.checkpointing import load_checkpoint, save_checkpoint +from megatron.core import mpu, tensor_parallel from megatron.core.enums import ModelType +from megatron.core.pipeline_parallel import get_forward_backward_func +from megatron.data.data_samplers import build_pretraining_data_loader +from megatron.initialize import ( + initialize_megatron, + set_jit_fusion_options, + write_args_to_tensorboard, +) +from megatron.model import Float16Module, GPTModel +from megatron.model import DistributedDataParallel as LocalDDP +from megatron.model.transformer import ParallelTransformerLayer +from megatron.model.vision.knn_monitor import compute_feature_bank from megatron.optimizer import get_megatron_optimizer -from megatron.initialize import initialize_megatron -from megatron.initialize import write_args_to_tensorboard -from megatron.initialize import set_jit_fusion_options from megatron.optimizer_param_scheduler import OptimizerParamScheduler -from megatron.model import DistributedDataParallel as LocalDDP -from megatron.utils import check_adlr_autoresume_termination -from megatron.utils import unwrap_model, found_kill_switch -from megatron.data.data_samplers import build_pretraining_data_loader -from megatron.utils import calc_params_l2_norm -from megatron.core.pipeline_parallel import get_forward_backward_func +from megatron.training_log import training_log from megatron.utils import ( - report_memory, - throughput_calculator, + PerfTrace, + Profile, + calc_params_l2_norm, + check_adlr_autoresume_termination, checkpoint_throughput_calculator, + found_kill_switch, + # num_floating_point_operations, + # report_memory, + # throughput_calculator, + unwrap_model, update_rotary_pos_emb, ) -from megatron.model.vision.knn_monitor import compute_feature_bank -from megatron.arguments import core_transformer_config_from_args -from megatron.profiler import setup_profiler, trigger, on_step_begin, on_step_end -from megatron.training_log import training_log as training_log_microsoft -from megatron.training_log_alcf import training_log as training_log - -from megatron.utils import PerfTrace, Profile - -try: - import wandb -except (ImportError, ModuleNotFoundError): - wandb = None - -import deepspeed -from deepspeed.accelerator import get_accelerator -from deepspeed.compression.compress import init_compression, redundancy_clean -from deepspeed.runtime.data_pipeline.data_routing.helper import convert_to_random_ltd -from megatron.model.transformer import ParallelTransformerLayer -import ezpz as ez -import logging +# noqa: E402 +# The earliest we can measure the start time. +_TRAIN_START_TIME = time.time() +# noqa dlp = Profile("TRAINING") @@ -89,6 +87,11 @@ LOG_LEVEL: str = str(os.environ.get("LOG_LEVEL", "INFO")).upper() log.setLevel(LOG_LEVEL) if RANK == 0 else log.setLevel("CRITICAL") +# try: +# import wandb +# except (ImportError, ModuleNotFoundError): +# wandb = None + def print_datetime(string): """Note that this call will sync across all ranks.""" @@ -97,37 +100,6 @@ def print_datetime(string): log.info("[" + string + "] datetime={} ".format(time_str)) -def num_floating_point_operations(args, batch_size): - # Group Query Attention. - # if not args.group_query_attention: - if not args.num_key_value_heads: - args.num_key_value_heads = args.num_attention_heads - # args.num_query_groups = args.num_attention_heads - # MoE. - # num_experts_routed_to = 1 if args.num_experts is None else args.moe_router_topk - num_experts_routed_to = 1 if args.num_experts is None else args.topk - gated_linear_multiplier = 3 / 2 if args.swiglu else 1 - return ( - 12 - * batch_size - * args.seq_length - * args.num_layers - * args.hidden_size - * args.hidden_size - * ( - 1 - + ( - (args.ffn_hidden_size / args.hidden_size) - * num_experts_routed_to - * gated_linear_multiplier - ) - + (args.num_key_value_heads / args.num_attention_heads) - + (args.seq_length / args.hidden_size) - + (args.padded_vocab_size / (2 * args.num_layers * args.hidden_size)) - ) - ) - - """ Since v0.9.0, deepspeed.initialize() has forbidden simultaneous setting of args.deepspeed_config (Path) and ds_config dict. So, we use ds_config dict which is the more flexible option @@ -198,9 +170,8 @@ def pretrain( args_defaults=args_defaults, external_args=external_args, ) - args = get_args() - + assert args is not None if found_kill_switch(): print_datetime(f"Detected kill switch at {args.kill_switch_file}. Exiting") sys.exit() @@ -231,8 +202,6 @@ def pretrain( ) ) print_datetime("after megatron is initialized") - args = get_args() - assert args is not None if os.getenv("DLIO_PROFILER_DATASET_DIR") is not None: extra_trace_path = os.environ["DLIO_PROFILER_DATASET_DIR"] else: @@ -660,7 +629,7 @@ def load_model_weights_only(model_provider_func): model = [model] print_datetime("before load checkpoint") if args.load is not None: - iteration = load_checkpoint( + _ = load_checkpoint( model, optimizer, lr_scheduler, strict=True, load_only_weights=True ) print_datetime("after load checkpoint weights") @@ -736,7 +705,7 @@ def setup_model_and_optimizer( # Only need to build dataset on tp rank 0 since Megatron has the # broadcast_data() function that broadcast data from tp rank 0. if mpu.get_tensor_model_parallel_rank() == 0: - log.info(f"Caught 'mpu.get_tensor_model_parallel_rank() == 0'") + log.info("Caught 'mpu.get_tensor_model_parallel_rank() == 0'") # Number of train/valid/test samples. if args.train_samples: train_samples = args.train_samples @@ -911,7 +880,7 @@ def train_step( # Empty unused memory. if args.empty_unused_memory_level >= 1: - get_accelerator().empty_cache() + torch.cuda.empty_cache() # Reduce gradients. if not args.deepspeed: @@ -948,7 +917,7 @@ def train_step( # Update learning rate. if args.deepspeed: - skipped_iter = 0 if update_successful else 1 + skipped_iter = 0 grad_norm = model[0].get_global_grad_norm() num_zeros_in_grad = None loss_reduced = {} @@ -970,7 +939,7 @@ def train_step( # Empty unused memory. if args.empty_unused_memory_level >= 2: - get_accelerator().empty_cache() + torch.cuda.empty_cache() if mpu.is_pipeline_last_stage(ignore_virtual=True): # Average loss across microbatches. @@ -984,854 +953,6 @@ def train_step( return {}, skipped_iter, grad_norm, num_zeros_in_grad -# <<<<<<< HEAD -# @dlp.log -# def training_log( -# loss_dict, -# total_loss_dict, -# learning_rate, -# iteration, -# loss_scale, -# report_memory_flag, -# skipped_iter, -# grad_norm, -# params_norm, -# num_zeros_in_grad, -# model=None, -# optimizer=None, -# ): -# """Log training information such as losses, timing, ....""" -# args = get_args() -# timers = get_timers() -# writer = get_tensorboard_writer() -# assert args is not None and timers is not None -# wandb_metrics = {} -# ======= -# class InteropLoggingTool(Enum): -# TENSORBOARD = 1 -# WANDB = 2 -# -# -# class interop_tool_logger: -# def __init__(self, tb_writer=None, wandb_writer=None): -# self.tb_writer = tb_writer -# self.wandb_writer = wandb_writer -# self.custom_x_axis = [] -# self.custom_y_axis = {} -# self.args = get_args() -# if not hasattr(self.args, "logger_iteration"): -# self.args.logger_iteration = 1 -# -# def is_enabled(self): -# return self.tb_writer or self.wandb_writer -# -# def add_scalar(self, key, scalar_value, step, custom_step_name=None, \ -# tool_list=[InteropLoggingTool.TENSORBOARD, InteropLoggingTool.WANDB]): -# if self.tb_writer and \ -# InteropLoggingTool.TENSORBOARD in tool_list: -# self.tb_writer.add_scalar(key, scalar_value, step) -# -# if self.wandb_writer and \ -# InteropLoggingTool.WANDB in tool_list: -# if not custom_step_name: -# self.wandb_writer.log({key: scalar_value}, step=step) -# if self.args.logger_iteration < step: -# # Updating iteration -# self.args.logger_iteration = step -# -# else: -# if custom_step_name not in self.custom_x_axis: -# self.custom_x_axis.append(custom_step_name) -# wandb.define_metric(custom_step_name) -# -# if key not in self.custom_y_axis: -# self.custom_y_axis[key] = custom_step_name -# wandb.define_metric(key, step_metric=custom_step_name) -# -# self.wandb_writer.log({key: scalar_value, custom_step_name: step}, \ -# step=self.args.logger_iteration) -# -# -# def add_scalar_to_tb(self, key, scalar_value, step): -# return self.add_scalar(key, scalar_value, step, None, [InteropLoggingTool.TENSORBOARD]) -# -# def add_scalar_to_wandb(self, key, scalar_value, step, custom_step_name=None): -# return self.add_scalar(key, scalar_value, step, custom_step_name, [InteropLoggingTool.WANDB]) -# -# def add_images(self, key, img_tensor, step=None): -# if self.tb_writer: -# self.tb_writer.add_images(key, img_tensor, step) -# -# if self.wandb_writer: -# self.wandb_writer.log({key: wandb.Image(img_tensor)}, step) - - -# def training_log(loss_dict, total_loss_dict, learning_rate, iteration, -# loss_scale, report_memory_flag, skipped_iter, -# grad_norm, params_norm, num_zeros_in_grad, -# model=None, optimizer=None): -# """Log training information such as losses, timing, ....""" -# args = get_args() -# timers = get_timers() -# writer = interop_tool_logger(tb_writer=get_tensorboard_writer(), \ -# wandb_writer=get_wandb_writer()) -# x_axis_samples = 'Samples' -# x_axis_tokens = 'Tokens' -# >>>>>>> 0d6e3793a1fc06eded9764ef15ad12bcc0281101 -# # Advanced, skipped, and Nan iterations. -# advanced_iters_key = "advanced iterations" -# skipped_iters_key = "skipped iterations" -# nan_iters_key = "nan iterations" -# # Advanced iterations. -# if not skipped_iter: -# total_loss_dict[advanced_iters_key] = ( -# total_loss_dict.get(advanced_iters_key, 0) + 1 -# ) -# else: -# if advanced_iters_key not in total_loss_dict: -# total_loss_dict[advanced_iters_key] = 0 -# # Skipped iterations. -# total_loss_dict[skipped_iters_key] = ( -# total_loss_dict.get(skipped_iters_key, 0) + skipped_iter -# ) -# # Update losses and set nan iterations -# got_nan = False -# for key in loss_dict: -# if not skipped_iter: -# total_loss_dict[key] = ( -# total_loss_dict.get(key, get_accelerator().FloatTensor([0.0])) -# + loss_dict[key] -# ) -# else: -# value = loss_dict[key].float().sum().item() -# is_nan = value == float("inf") or value == -float("inf") or value != value -# got_nan = got_nan or is_nan -# total_loss_dict[nan_iters_key] = total_loss_dict.get(nan_iters_key, 0) + int( -# got_nan -# ) -# -# # Logging. -# timers_to_log = [ -# "forward-backward", -# "forward-compute", -# "backward-compute", -# "batch-generator", -# "forward-recv", -# "forward-send", -# "backward-recv", -# "backward-send", -# "forward-send-forward-recv", -# "forward-send-backward-recv", -# "backward-send-forward-recv", -# "backward-send-backward-recv", -# "forward-backward-send-forward-backward-recv", -# "layernorm-grads-all-reduce", -# "embedding-grads-all-reduce", -# "grads-all-reduce", -# "grads-reduce-scatter", -# "params-all-gather", -# "optimizer-copy-to-main-grad", -# "optimizer-unscale-and-check-inf", -# "optimizer-clip-main-grad", -# "optimizer-count-zeros", -# "optimizer-inner-step", -# "optimizer-copy-main-to-model-params", -# "optimizer", -# ] -# -# # Calculate batch size. -# batch_size = ( -# args.micro_batch_size * args.data_parallel_size * get_num_microbatches() -# ) -# total_iterations = ( -# total_loss_dict[advanced_iters_key] + total_loss_dict[skipped_iters_key] -# ) -# -# # Tensorboard values. -# # Timer requires all the ranks to call. -# <<<<<<< HEAD -# if args.log_timers_to_tensorboard and ( -# iteration % args.tensorboard_log_interval == 0 -# ): -# timers.write(timers_to_log, writer, iteration, normalizer=total_iterations) -# if writer and (iteration % args.tensorboard_log_interval == 0): -# writer.add_scalar( -# "steps-vs-samples/y=steps,x=samples", iteration, args.consumed_train_samples -# ) -# writer.add_scalar( -# "steps-vs-samples/y=samples,x=steps", args.consumed_train_samples, iteration -# ) -# writer.add_scalar( -# "steps-vs-tokens/y=steps,x=tokens", iteration, args.consumed_train_tokens -# ) -# writer.add_scalar( -# "steps-vs-tokens/y=tokens,x=steps", args.consumed_train_tokens, iteration -# ) -# if args.log_learning_rate_to_tensorboard: -# wandb_metrics |= { -# "learning-rate/iteration": iteration, -# "learning-rate/learning-rate": learning_rate, -# } -# writer.add_scalar("learning-rate/learning-rate", learning_rate, iteration) -# writer.add_scalar( -# "learning-rate/learning-rate vs samples", -# learning_rate, -# args.consumed_train_samples, -# ) -# writer.add_scalar( -# "learning-rate/learning-rate vs tokens", -# learning_rate, -# args.consumed_train_tokens, -# ) -# if args.log_batch_size_to_tensorboard: -# writer.add_scalar("batch-size/batch-size", batch_size, iteration) -# writer.add_scalar( -# "batch-size/batch-size vs samples", -# batch_size, -# args.consumed_train_samples, -# ) -# writer.add_scalar( -# "batch-size/batch-size vs tokens", -# batch_size, -# args.consumed_train_tokens, -# ) -# wandb_metrics |= { -# "lm-loss-training/iteration": iteration, -# "lm-loss-training/consumed_train_tokens": args.consumed_train_tokens, -# } -# ======= -# if args.log_timers_to_tensorboard and \ -# (iteration % args.tensorboard_log_interval == 0): -# timers.write(timers_to_log, writer, iteration, -# normalizer=total_iterations) -# if writer.is_enabled() and (iteration % args.tensorboard_log_interval == 0): -# writer.add_scalar('steps-vs-samples/y=steps,x=samples', iteration, args.consumed_train_samples, x_axis_samples) -# writer.add_scalar('steps-vs-samples/y=samples,x=steps', args.consumed_train_samples, iteration) -# writer.add_scalar('steps-vs-tokens/y=steps,x=tokens', iteration, args.consumed_train_tokens, x_axis_tokens) -# writer.add_scalar('steps-vs-tokens/y=tokens,x=steps', args.consumed_train_tokens, iteration) -# if args.log_learning_rate_to_tensorboard: -# writer.add_scalar('learning-rate/learning-rate', learning_rate, iteration) -# writer.add_scalar('learning-rate/learning-rate vs samples', learning_rate, -# args.consumed_train_samples, x_axis_samples) -# writer.add_scalar('learning-rate/learning-rate vs tokens', learning_rate, -# args.consumed_train_tokens, x_axis_tokens) -# if args.log_batch_size_to_tensorboard: -# writer.add_scalar('batch-size/batch-size', batch_size, iteration) -# writer.add_scalar('batch-size/batch-size vs samples', batch_size, -# args.consumed_train_samples, x_axis_samples) -# writer.add_scalar('batch-size/batch-size vs tokens', batch_size, -# args.consumed_train_tokens, x_axis_tokens) -# >>>>>>> 0d6e3793a1fc06eded9764ef15ad12bcc0281101 -# for key in loss_dict: -# wandb_metrics |= {f"lm-loss-training/{key}": loss_dict[key]} -# writer.add_scalar(f"lm-loss-training/{key}", loss_dict[key], iteration) -# <<<<<<< HEAD -# writer.add_scalar( -# f"lm-loss-training/{key}" + " vs samples", -# loss_dict[key], -# args.consumed_train_samples, -# ) -# writer.add_scalar( -# f"lm-loss-training/{key}" + " vs tokens", -# loss_dict[key], -# args.consumed_train_tokens, -# ) -# if args.fp16 and loss_scale and args.log_loss_scale_to_tensorboard: -# writer.add_scalar("loss-scale/loss-scale", loss_scale, iteration) -# writer.add_scalar( -# "loss-scale/loss-scale vs samples", -# loss_scale, -# args.consumed_train_samples, -# ) -# writer.add_scalar( -# "loss-scale/loss-scale vs tokens", -# loss_scale, -# args.consumed_train_tokens, -# ) -# if args.log_world_size_to_tensorboard: -# writer.add_scalar("world-size/world-size", args.world_size, iteration) -# writer.add_scalar( -# "world-size/world-size vs samples", -# args.world_size, -# args.consumed_train_samples, -# ) -# writer.add_scalar( -# "world-size/world-size vs tokens", -# args.world_size, -# args.consumed_train_tokens, -# ) -# if grad_norm is not None: -# wandb_metrics |= {"training/grad-norm": grad_norm} -# writer.add_scalar("grad-norm/grad-norm", grad_norm, iteration) -# writer.add_scalar( -# "grad-norm/grad-norm vs samples", grad_norm, args.consumed_train_samples -# ) -# writer.add_scalar( -# "grad-norm/grad-norm vs tokens", grad_norm, args.consumed_train_tokens -# ) -# if num_zeros_in_grad is not None: -# wandb_metrics |= {"training/num-zeros": num_zeros_in_grad} -# writer.add_scalar("num-zeros/num-zeros", num_zeros_in_grad, iteration) -# writer.add_scalar( -# "num-zeros/num-zeros vs samples", -# num_zeros_in_grad, -# args.consumed_train_samples, -# ) -# writer.add_scalar( -# "num-zeros/num-zeros vs tokens", -# num_zeros_in_grad, -# args.consumed_train_tokens, -# ) -# if params_norm is not None: -# wandb_metrics |= {"training/params-norm": params_norm} -# writer.add_scalar("params-norm/params-norm", params_norm, iteration) -# writer.add_scalar( -# "params-norm/params-norm vs samples", -# params_norm, -# args.consumed_train_samples, -# ) -# writer.add_scalar( -# "params-norm/params-norm vs tokens", -# params_norm, -# args.consumed_train_tokens, -# ) -# if hasattr(args, "actual_seq_length"): -# writer.add_scalar( -# "seqlen/actual_seq_length", args.actual_seq_length, iteration -# ) -# writer.add_scalar( -# "seqlen/actual_seq_length vs samples", -# args.actual_seq_length, -# args.consumed_train_samples, -# ) -# writer.add_scalar( -# "seqlen/actual_seq_length vs tokens", -# args.actual_seq_length, -# args.consumed_train_tokens, -# ) -# if args.curriculum_learning_legacy or args.data_efficiency_curriculum_learning: -# writer.add_scalar( -# "seqlen/curriculum_seqlen", args.curriculum_seqlen, iteration -# ) -# writer.add_scalar( -# "seqlen/curriculum_seqlen vs samples", -# args.curriculum_seqlen, -# args.consumed_train_samples, -# ) -# writer.add_scalar( -# "seqlen/curriculum_seqlen vs tokens", -# args.curriculum_seqlen, -# args.consumed_train_tokens, -# ) -# if args.random_ltd: -# writer.add_scalar( -# "seqlen/random_ltd_reserved_length", -# args.random_ltd_reserved_length, -# iteration, -# ) -# writer.add_scalar( -# "seqlen/random_ltd_reserved_length vs samples", -# args.random_ltd_reserved_length, -# args.consumed_train_samples, -# ) -# writer.add_scalar( -# "seqlen/random_ltd_reserved_length vs tokens", -# args.random_ltd_reserved_length, -# args.consumed_train_tokens, -# ) -# ======= -# writer.add_scalar(f"lm-loss-training/{key}" + ' vs samples', loss_dict[key], -# args.consumed_train_samples, x_axis_samples) -# writer.add_scalar(f"lm-loss-training/{key}" + ' vs tokens', loss_dict[key], -# args.consumed_train_tokens, x_axis_tokens) -# if args.fp16 and loss_scale and args.log_loss_scale_to_tensorboard: -# writer.add_scalar('loss-scale/loss-scale', loss_scale, iteration) -# writer.add_scalar('loss-scale/loss-scale vs samples', loss_scale, -# args.consumed_train_samples, x_axis_samples) -# writer.add_scalar('loss-scale/loss-scale vs tokens', loss_scale, -# args.consumed_train_tokens, x_axis_tokens) -# if args.log_world_size_to_tensorboard: -# writer.add_scalar('world-size/world-size', args.world_size, iteration) -# writer.add_scalar('world-size/world-size vs samples', args.world_size, -# args.consumed_train_samples, x_axis_samples) -# writer.add_scalar('world-size/world-size vs tokens', args.world_size, -# args.consumed_train_tokens, x_axis_tokens) -# if grad_norm is not None: -# writer.add_scalar('grad-norm/grad-norm', grad_norm, iteration) -# writer.add_scalar('grad-norm/grad-norm vs samples', grad_norm, -# args.consumed_train_samples, x_axis_samples) -# writer.add_scalar('grad-norm/grad-norm vs tokens', grad_norm, -# args.consumed_train_tokens, x_axis_tokens) -# if num_zeros_in_grad is not None: -# writer.add_scalar('num-zeros/num-zeros', num_zeros_in_grad, iteration) -# writer.add_scalar('num-zeros/num-zeros vs samples', num_zeros_in_grad, -# args.consumed_train_samples, x_axis_samples) -# writer.add_scalar('num-zeros/num-zeros vs tokens', num_zeros_in_grad, -# args.consumed_train_tokens, x_axis_tokens) -# if params_norm is not None: -# writer.add_scalar('params-norm/params-norm', params_norm, iteration) -# writer.add_scalar('params-norm/params-norm vs samples', params_norm, -# args.consumed_train_samples, x_axis_samples) -# writer.add_scalar('params-norm/params-norm vs tokens', params_norm, -# args.consumed_train_tokens, x_axis_tokens) -# if hasattr(args, 'actual_seq_length'): -# writer.add_scalar('seqlen/actual_seq_length', args.actual_seq_length, -# iteration) -# writer.add_scalar('seqlen/actual_seq_length vs samples', args.actual_seq_length, -# args.consumed_train_samples, x_axis_samples) -# writer.add_scalar('seqlen/actual_seq_length vs tokens', args.actual_seq_length, -# args.consumed_train_tokens, x_axis_tokens) -# if args.curriculum_learning_legacy or args.data_efficiency_curriculum_learning: -# writer.add_scalar('seqlen/curriculum_seqlen', args.curriculum_seqlen, -# iteration) -# writer.add_scalar('seqlen/curriculum_seqlen vs samples', args.curriculum_seqlen, -# args.consumed_train_samples, x_axis_samples) -# writer.add_scalar('seqlen/curriculum_seqlen vs tokens', args.curriculum_seqlen, -# args.consumed_train_tokens, x_axis_tokens) -# if args.random_ltd: -# writer.add_scalar('seqlen/random_ltd_reserved_length', args.random_ltd_reserved_length, -# iteration) -# writer.add_scalar('seqlen/random_ltd_reserved_length vs samples', args.random_ltd_reserved_length, -# args.consumed_train_samples, x_axis_samples) -# writer.add_scalar('seqlen/random_ltd_reserved_length vs tokens', args.random_ltd_reserved_length, -# args.consumed_train_tokens, x_axis_tokens) -# >>>>>>> 0d6e3793a1fc06eded9764ef15ad12bcc0281101 -# if args.log_memory_to_tensorboard: -# mem_stats = torch.cuda.memory_stats() -# writer.add_scalar( -# "mem-reserved-bytes", -# mem_stats["reserved_bytes.all.current"], -# iteration, -# ) -# writer.add_scalar( -# "mem-allocated-bytes", -# mem_stats["allocated_bytes.all.current"], -# iteration, -# ) -# writer.add_scalar( -# "mem-allocated-count", -# mem_stats["allocation.all.current"], -# iteration, -# ) -# if iteration % args.tensorboard_log_interval == 0: -# # This logging write various optimizer states to tensorboard. This -# # feature may consume extra GPU memory thus is set at false by default. -# if args.log_optimizer_states_to_tensorboard and optimizer is not None: -# opt_stats = [0.0] * 8 -# opt_stats_2 = [0.0] * 4 -# -# #TODO(billishyahao): Remove me after bf16_optimizer promotes its state. -# if not hasattr(optimizer, "state"): -# assert hasattr(optimizer, "optimizer"), f"Optimizer must have optimizer property." -# optimizer.state = optimizer.optimizer.state -# -# for _, group in enumerate(optimizer.param_groups): -# for _, param in enumerate(group["params"]): -# state_param = getattr(optimizer, "state", None) -# if state_param is not None: -# exp_avg_sq = state_param.get("exp_avg_sq", torch.tensor(0.0)) -# exp_avg = state_param.get("exp_avg", torch.tensor(0.0)) -# opt_stats[0] += (torch.norm(exp_avg_sq).item()) ** 2 -# opt_stats[1] += (torch.norm(exp_avg_sq.sqrt()).item()) ** 2 -# opt_stats[2] += (torch.norm(exp_avg).item()) ** 2 -# opt_stats[3] += (torch.norm(param).item()) ** 2 -# opt_stats[4] += torch.norm(exp_avg_sq, p=1).item() -# opt_stats[5] += torch.norm(exp_avg_sq.sqrt(), p=1).item() -# opt_stats[6] += torch.norm(exp_avg, p=1).item() -# opt_stats[7] += torch.norm(param, p=1).item() -# opt_stats_2[0] = max( -# opt_stats_2[0], -# abs(exp_avg_sq.max().item()), -# abs(exp_avg_sq.min().item()), -# ) -# opt_stats_2[1] = max( -# opt_stats_2[1], exp_avg_sq.sqrt().abs_().max().item() -# ) -# opt_stats_2[2] = max( -# opt_stats_2[2], -# abs(exp_avg.max().item()), -# abs(exp_avg.min().item()), -# ) -# opt_stats_2[3] = max( -# opt_stats_2[3], -# abs(param.max().item()), -# abs(param.min().item()), -# ) -# # print('step {} rank {} before sync opt_stats {}, {}'.format(iteration, torch.distributed.get_rank(), opt_stats_2, opt_stats)) -# if args.zero_stage > 0: -# # ZeRO partiions optimizer states -# # opt_stats = opt_stats.clone().detach() -# # opt_stats = get_accelerator().FloatTensor -# opt_stats = get_accelerator().FloatTensor(opt_stats) -# torch.distributed.all_reduce( -# opt_stats, group=mpu.get_sequence_data_parallel_group() -# ) -# # opt_stats_2 = get_accelerator().FloatTensor(opt_stats_2) -# # opt_stats_2 = opt_stats_2.clone().detach() -# opt_stats_2 = get_accelerator().FloatTensor(opt_stats_2) -# torch.distributed.all_reduce( -# opt_stats_2, -# op=torch.distributed.ReduceOp.MAX, -# group=mpu.get_sequence_data_parallel_group(), -# ) -# -# if args.tensor_model_parallel_size > 1: -# # opt_stats = opt_stats.clone().detach() -# opt_stats = get_accelerator().FloatTensor(opt_stats) -# torch.distributed.all_reduce( -# opt_stats, group=mpu.get_tensor_model_parallel_group() -# ) -# # opt_stats_2 = opt_stats_2.clone().detach() -# opt_stats_2 = get_accelerator().FloatTensor(opt_stats_2) -# torch.distributed.all_reduce( -# opt_stats_2, -# op=torch.distributed.ReduceOp.MAX, -# group=mpu.get_tensor_model_parallel_group(), -# ) -# -# if args.pipeline_model_parallel_size > 1: -# # opt_stats = opt_stats.clone().detach() -# opt_stats = get_accelerator().FloatTensor(opt_stats) -# torch.distributed.all_reduce( -# opt_stats, group=mpu.get_pipeline_model_parallel_group() -# ) -# # opt_stats_2 = opt_stats_2.clone().detach() -# opt_stats_2 = get_accelerator().FloatTensor(opt_stats_2) -# torch.distributed.all_reduce( -# opt_stats_2, -# op=torch.distributed.ReduceOp.MAX, -# group=mpu.get_pipeline_model_parallel_group(), -# ) -# wandb_metrics |= { -# "optimizer/learning_rate": learning_rate, -# "optimizer/iteration": args.iteration, -# "optimizer/consumed_train_tokens": args.consumed_train_tokens, -# "optimizer/variance_l2": opt_stats[0] ** 0.5, -# "optimizer/variance_sqrt_l2": opt_stats[1] ** 0.5, -# "optimizer/momentum_l2": opt_stats[2] ** 0.5, -# "optimizer/weight_l2": opt_stats[3] ** 0.5, -# "optimizer/variance_l1": opt_stats[4], -# "optimizer/variance_sqrt_l1": opt_stats[5], -# "optimizer/momentum_l1": opt_stats[6], -# "optimizer/weight_l1": opt_stats[7], -# "optimizer/variance_abs_max": opt_stats_2[0], -# "optimizer/variance_sqrt_abs_max": opt_stats_2[1], -# "optimizer/momentum_abs_max": opt_stats_2[2], -# "optimizer/weight_abs_max": opt_stats_2[3], -# } -# # print('step {} rank {} after sync opt_stats {}, {}'.format(iteration, torch.distributed.get_rank(), opt_stats_2, opt_stats)) -# <<<<<<< HEAD -# if writer and is_last_rank(): -# writer.add_scalar( -# "optimizer/variance_l2 vs tokens", -# opt_stats[0] ** 0.5, -# args.consumed_train_tokens, -# ) -# writer.add_scalar( -# "optimizer/variance_sqrt_l2 vs tokens", -# opt_stats[1] ** 0.5, -# args.consumed_train_tokens, -# ) -# writer.add_scalar( -# "optimizer/momentum_l2 vs tokens", -# opt_stats[2] ** 0.5, -# args.consumed_train_tokens, -# ) -# writer.add_scalar( -# "optimizer/weight_l2 vs tokens", -# opt_stats[3] ** 0.5, -# args.consumed_train_tokens, -# ) -# writer.add_scalar( -# "optimizer/variance_l1 vs tokens", -# opt_stats[4], -# args.consumed_train_tokens, -# ) -# writer.add_scalar( -# "optimizer/variance_sqrt_l1 vs tokens", -# opt_stats[5], -# args.consumed_train_tokens, -# ) -# writer.add_scalar( -# "optimizer/momentum_l1 vs tokens", -# opt_stats[6], -# args.consumed_train_tokens, -# ) -# writer.add_scalar( -# "optimizer/weight_l1 vs tokens", -# opt_stats[7], -# args.consumed_train_tokens, -# ) -# writer.add_scalar( -# "optimizer/variance_abs_max vs tokens", -# opt_stats_2[0], -# args.consumed_train_tokens, -# ) -# writer.add_scalar( -# "optimizer/variance_sqrt_abs_max vs tokens", -# opt_stats_2[1], -# args.consumed_train_tokens, -# ) -# writer.add_scalar( -# "optimizer/momentum_abs_max vs tokens", -# opt_stats_2[2], -# args.consumed_train_tokens, -# ) -# writer.add_scalar( -# "optimizer/weight_abs_max vs tokens", -# opt_stats_2[3], -# args.consumed_train_tokens, -# ) -# writer.add_scalar( -# "optimizer/variance_l2", opt_stats[0] ** 0.5, iteration -# ) -# writer.add_scalar( -# "optimizer/variance_sqrt_l2", opt_stats[1] ** 0.5, iteration -# ) -# writer.add_scalar( -# "optimizer/momentum_l2", opt_stats[2] ** 0.5, iteration -# ) -# writer.add_scalar("optimizer/weight_l2", opt_stats[3] ** 0.5, iteration) -# writer.add_scalar("optimizer/variance_l1", opt_stats[4], iteration) -# writer.add_scalar("optimizer/variance_sqrt_l1", opt_stats[5], iteration) -# writer.add_scalar("optimizer/momentum_l1", opt_stats[6], iteration) -# writer.add_scalar("optimizer/weight_l1", opt_stats[7], iteration) -# writer.add_scalar( -# "optimizer/variance_abs_max", opt_stats_2[0], iteration -# ) -# writer.add_scalar( -# "optimizer/variance_sqrt_abs_max", opt_stats_2[1], iteration -# ) -# writer.add_scalar( -# "optimizer/momentum_abs_max", opt_stats_2[2], iteration -# ) -# writer.add_scalar("optimizer/weight_abs_max", opt_stats_2[3], iteration) -# ======= -# if writer.is_enabled() and is_last_rank(): -# writer.add_scalar('optimizer/variance_l2 vs tokens', opt_stats[0]**0.5, args.consumed_train_tokens, x_axis_tokens) -# writer.add_scalar('optimizer/variance_sqrt_l2 vs tokens', opt_stats[1]**0.5, args.consumed_train_tokens, x_axis_tokens) -# writer.add_scalar('optimizer/momentum_l2 vs tokens', opt_stats[2]**0.5, args.consumed_train_tokens, x_axis_tokens) -# writer.add_scalar('optimizer/weight_l2 vs tokens', opt_stats[3]**0.5, args.consumed_train_tokens, x_axis_tokens) -# writer.add_scalar('optimizer/variance_l1 vs tokens', opt_stats[4], args.consumed_train_tokens, x_axis_tokens) -# writer.add_scalar('optimizer/variance_sqrt_l1 vs tokens', opt_stats[5], args.consumed_train_tokens, x_axis_tokens) -# writer.add_scalar('optimizer/momentum_l1 vs tokens', opt_stats[6], args.consumed_train_tokens, x_axis_tokens) -# writer.add_scalar('optimizer/weight_l1 vs tokens', opt_stats[7], args.consumed_train_tokens, x_axis_tokens) -# writer.add_scalar('optimizer/variance_abs_max vs tokens', opt_stats_2[0], args.consumed_train_tokens, x_axis_tokens) -# writer.add_scalar('optimizer/variance_sqrt_abs_max vs tokens', opt_stats_2[1], args.consumed_train_tokens, x_axis_tokens) -# writer.add_scalar('optimizer/momentum_abs_max vs tokens', opt_stats_2[2], args.consumed_train_tokens, x_axis_tokens) -# writer.add_scalar('optimizer/weight_abs_max vs tokens', opt_stats_2[3], args.consumed_train_tokens, x_axis_tokens) -# -# writer.add_scalar('optimizer/variance_l2', opt_stats[0]**0.5, iteration) -# writer.add_scalar('optimizer/variance_sqrt_l2', opt_stats[1]**0.5, iteration) -# writer.add_scalar('optimizer/momentum_l2', opt_stats[2]**0.5, iteration) -# writer.add_scalar('optimizer/weight_l2', opt_stats[3]**0.5, iteration) -# writer.add_scalar('optimizer/variance_l1', opt_stats[4], iteration) -# writer.add_scalar('optimizer/variance_sqrt_l1', opt_stats[5], iteration) -# writer.add_scalar('optimizer/momentum_l1', opt_stats[6], iteration) -# writer.add_scalar('optimizer/weight_l1', opt_stats[7], iteration) -# writer.add_scalar('optimizer/variance_abs_max', opt_stats_2[0], iteration) -# writer.add_scalar('optimizer/variance_sqrt_abs_max', opt_stats_2[1], iteration) -# writer.add_scalar('optimizer/momentum_abs_max', opt_stats_2[2], iteration) -# writer.add_scalar('optimizer/weight_abs_max', opt_stats_2[3], iteration) -# >>>>>>> 0d6e3793a1fc06eded9764ef15ad12bcc0281101 -# -# assert args is not None -# assert timers is not None -# if iteration % args.log_interval == 0: -# elapsed_time = timers("interval-time").elapsed(barrier=True) -# elapsed_time_per_iteration = elapsed_time / total_iterations -# seq_len = args.seq_length -# if hasattr(args, "actual_seq_length"): -# seq_len = args.actual_seq_length -# samples_per_sec, tflops, approx_parameters_in_billions = throughput_calculator( -# model, args, elapsed_time, total_iterations -# ) -# samples_per_sec_per_replica = samples_per_sec / args.data_parallel_size -# tokens_per_sec = samples_per_sec * seq_len -# tokens_per_sec_per_replica = tokens_per_sec / args.data_parallel_size -# tokens_per_gpu_per_second = tokens_per_sec / args.world_size -# <<<<<<< HEAD -# tokens_per_gpu_per_second_per_replica = ( -# tokens_per_gpu_per_second / args.data_parallel_size -# ) -# # NOTE: [2024-06-19] -# # Updated to use (more accurate) calculation according to -# # `num_floating_point_operations` from NVIDIA/Megatron-LM -# num_flop_lm = num_floating_point_operations(args, batch_size) -# num_flop_per_sec_lm = (num_flop_lm / elapsed_time_per_iteration) -# tflops_lm = (num_flop_per_sec_lm / (10 ** 12)) -# tflops_lm_per_gpu = (tflops_lm / args.world_size) -# wandb_metrics |= { -# "throughput/iteration-time": elapsed_time_per_iteration, # 1000 ms / s -# "throughput/samples_per_sec": samples_per_sec, -# "throughput/samples_per_sec_per_replica": samples_per_sec_per_replica, -# "throughput/tokens_per_sec": tokens_per_sec, -# "throughput/tokens_per_sec_per_replica": tokens_per_sec_per_replica, -# "throughput/tokens_per_gpu_per_sec": tokens_per_gpu_per_second, -# "throughput/tokens_per_gpu_per_sec_per_replica": tokens_per_gpu_per_second_per_replica, -# "throughput/tflops": tflops, -# "throughput/tflops-new": num_flop_lm / elapsed_time_per_iteration, -# "throughput/tflops-lm": tflops_lm_per_gpu, -# "throughput/approx_params_in_billions": approx_parameters_in_billions, -# "throughput/elapsed_ms_per_iteration": elapsed_time_per_iteration, -# "throughput/iteration": iteration, -# } -# if loss_dict is not None: -# wandb_metrics |= { -# "loss/iteration": iteration, -# **{f"loss/{k}": v for k, v in loss_dict.items()}, -# } -# if writer and args.log_timers_to_tensorboard: -# writer.add_scalar( -# "iteration-time/iteration-time", elapsed_time_per_iteration, iteration -# ) -# writer.add_scalar( -# "iteration-time/iteration-time vs samples", -# elapsed_time_per_iteration, -# args.consumed_train_samples, -# ) -# writer.add_scalar( -# "iteration-time/iteration-time vs tokens", -# elapsed_time_per_iteration, -# args.consumed_train_tokens, -# ) -# # metrics_to_log = { -# # 'iteration': iteration, -# # 'train_iters': args.train_iters, -# # 'consumed_samples': args.consumed_train_samples, -# # 'consumed_tokens': args.consumed_tokens, -# # } -# log_string = f" iteration={iteration:8d}/{args.train_iters:8d} |" -# # .format( iteration, args.train_iters) -# log_string += ( -# f" consumed_samples={args.consumed_train_samples:12d} |" -# # .format(args.consumed_train_samples) -# ) -# log_string += f" consumed_tokens={args.consumed_train_tokens:12d} |" -# # .format( args.consumed_train_tokens) -# log_string += ( -# " elapsed_time_per_iteration_ms=" -# f"{elapsed_time_per_iteration * 1000.0:.1f} |" -# # .format( elapsed_time_per_iteration * 1000.0) -# ) -# log_string += f" learning_rate={learning_rate:.6g} |" -# log_string += f" global_batch_size={batch_size:5d} |" -# # if wandb is not None and getattr(wandb, 'run', None) is not None: -# wandb_metrics |= { -# "training/iteration": iteration, -# "training/iteration_time": elapsed_time_per_iteration, -# "training/iteration_time_vs_tokens": ( -# elapsed_time_per_iteration / args.consumed_train_tokens -# ), -# "training/iteration_time_vs_samples": ( -# (elapsed_time_per_iteration / args.consumed_train_samples), -# ), -# "training/consumed_samples": args.consumed_train_samples, -# "training/consumed_tokens": args.consumed_train_tokens, -# } -# ======= -# tokens_per_gpu_per_second_per_replica = tokens_per_gpu_per_second / args.data_parallel_size -# -# if writer.is_enabled(): -# writer.add_scalar_to_wandb('throughput/iteration-time', elapsed_time_per_iteration, iteration) # 1000 ms / s -# writer.add_scalar_to_wandb('throughput/samples_per_sec', samples_per_sec, iteration) -# writer.add_scalar_to_wandb('throughput/samples_per_sec_per_replica', samples_per_sec_per_replica, iteration) -# writer.add_scalar_to_wandb('throughput/tokens_per_sec', tokens_per_sec, iteration) -# writer.add_scalar_to_wandb('throughput/tokens_per_sec_per_replica', tokens_per_sec_per_replica, iteration) -# writer.add_scalar_to_wandb('throughput/tokens_per_gpu_per_sec', tokens_per_gpu_per_second, iteration) -# writer.add_scalar_to_wandb('throughput/tokens_per_gpu_per_sec_per_replica', tokens_per_gpu_per_second_per_replica, iteration) -# writer.add_scalar_to_wandb('throughput/tflops', tflops, iteration) -# writer.add_scalar_to_wandb('throughput/approx_params_in_billions', approx_parameters_in_billions, iteration) -# writer.add_scalar_to_wandb('throughput/elapsed_ms_per_iteration', elapsed_time_per_iteration, iteration) -# if loss_dict is not None: -# for k, v in loss_dict.items(): -# writer.add_scalar_to_wandb(f'loss/{k}', v, iteration) -# -# if args.log_timers_to_tensorboard: -# writer.add_scalar('iteration-time/iteration-time', -# elapsed_time_per_iteration, iteration) -# writer.add_scalar('iteration-time/iteration-time vs samples', -# elapsed_time_per_iteration, args.consumed_train_samples, x_axis_samples) -# writer.add_scalar('iteration-time/iteration-time vs tokens', -# elapsed_time_per_iteration, args.consumed_train_tokens, x_axis_tokens) -# log_string = ' iteration {:8d}/{:8d} |'.format( -# iteration, args.train_iters) -# log_string += ' consumed samples: {:12d} |'.format( -# args.consumed_train_samples) -# log_string += ' consumed tokens: {:12d} |'.format( -# args.consumed_train_tokens) -# log_string += ' elapsed time per iteration (ms): {:.1f} |'.format( -# elapsed_time_per_iteration * 1000.0) -# log_string += ' learning rate: {:.3E} |'.format(learning_rate) -# log_string += ' global batch size: {:5d} |'.format(batch_size) -# -# >>>>>>> 0d6e3793a1fc06eded9764ef15ad12bcc0281101 -# for key in total_loss_dict: -# if key not in [advanced_iters_key, skipped_iters_key, nan_iters_key]: -# avg = total_loss_dict[key].item() / float( -# max(1, total_loss_dict[advanced_iters_key]) -# ) -# if avg > 0.0: -# log_string += " {}={:.6f} |".format(key, avg) -# total_loss_dict[key] = get_accelerator().FloatTensor([0.0]) -# if loss_scale is not None: -# log_string += " loss_scale={:.1f} |".format(loss_scale) -# wandb_metrics |= {"loss/loss_scale": loss_scale} -# if grad_norm is not None: -# log_string += " grad_norm={:.3f} |".format(grad_norm) -# wandb_metrics |= {"loss/grad_norm": grad_norm} -# if num_zeros_in_grad is not None: -# log_string += " num_zeros={:.1f} |".format(num_zeros_in_grad) -# wandb_metrics |= {"loss/num_zeros_in_grad": num_zeros_in_grad} -# if params_norm is not None: -# log_string += " params_norm={:.3f} |".format(params_norm) -# wandb_metrics |= {"loss/params_norm": params_norm} -# if args.curriculum_learning_legacy or args.data_efficiency_curriculum_learning: -# log_string += " curriculum_seqlen={:5d} |".format(args.curriculum_seqlen) -# if args.random_ltd: -# log_string += " random_ltd reserved_length={:5d} |".format( -# args.random_ltd_reserved_length -# ) -# # log_string += " | ".join([ -# # f"{seq_len=:5d} ", -# # f"{}" -# # f"number_of_skipped_iterations={:3d}", -# # -# # ]) -# log_string += " actual_seqlen={:5d} |".format(seq_len) -# log_string += " number_of_skipped_iterations={:3d} |".format( -# total_loss_dict[skipped_iters_key] -# ) -# log_string += " number_of_nan_iterations={:3d} |".format( -# total_loss_dict[nan_iters_key] -# ) -# log_string += " samples_per_second={:.3f} |".format(samples_per_sec) -# log_string += " tokens_per_gpu_per_second_tgs={:.3f} |".format( -# tokens_per_gpu_per_second -# ) -# log_string += " [LM]TFLOPs={:.2f} |".format(tflops_lm_per_gpu) -# log_string += " [DS]TFLOPs={:.2f} |".format(tflops) -# total_loss_dict[advanced_iters_key] = 0 -# total_loss_dict[skipped_iters_key] = 0 -# total_loss_dict[nan_iters_key] = 0 -# # print_rank_last(log_string) -# log.info(log_string) -# if report_memory_flag and learning_rate > 0.0: -# # Report memory after optimizer state has been initialized. -# report_memory("(after {} iterations)".format(iteration)) -# report_memory_flag = False -# if wandb is not None and getattr(wandb, "run", None) is not None: -# wandb_metrics |= { -# "training/skiped_iterations": total_loss_dict[skipped_iters_key] -# } -# wandb_metrics |= {"training/nan_iterations": total_loss_dict[nan_iters_key]} -# wandb.log(wandb_metrics) -# if timers is not None: -# timers.log(timers_to_log, normalizer=args.log_interval) -# -# return report_memory_flag - - @dlp.log @ez.dist.timeitlogit(rank=RANK) def save_checkpoint_and_time(iteration, model, optimizer, opt_param_scheduler): @@ -1866,31 +987,24 @@ def train( assert timers is not None # Write args to tensorboard write_args_to_tensorboard() - - setup_profiler(args, get_accelerator().device_name()) if args.random_ltd: # random-ltd requires different randomness on each rank import random random.seed(args.seed + torch.distributed.get_rank()) - # Turn on training mode which enables dropout. for model_module in model: model_module.train() - # Tracking loss. total_loss_dict = {} loss_dict = {} - # Iterations. iteration = args.iteration - # Translate args to core configuration config = core_transformer_config_from_args(args) if not args.deepspeed: config.grad_scale_func = optimizer.scale_loss config.timers = timers - timers("interval-time", log_level=0).start(barrier=True) print_datetime("before the start of training step") report_memory_flag = True @@ -1915,7 +1029,6 @@ def train( while iteration < args.train_iters and ( args.train_tokens is None or args.consumed_train_tokens < args.train_tokens ): - trigger(on_step_begin) update_num_microbatches(args.consumed_train_samples) if args.deepspeed: # inform deepspeed of any batch size changes @@ -1925,7 +1038,6 @@ def train( * get_num_microbatches() ) model[0].set_train_batch_size(global_batch_size) - if args.curriculum_learning_legacy and not args.no_pipeline_parallel: curriculum_seqlen = args.curriculum_scheduler.update_difficulty( args.iteration + 1 @@ -1945,7 +1057,7 @@ def train( num_zeros_in_grad = None else: if os.getenv("TORCH_PROFILER_ENABLE") == "2": - from torch.profiler import profile, record_function, ProfilerActivity + from torch.profiler import profile, ProfilerActivity try: activities = [ @@ -2015,7 +1127,6 @@ def train( args.consumed_train_tokens += new_samples * args.actual_seq_length else: args.consumed_train_tokens += new_samples * args.actual_seq_length - # Logging. if args.deepspeed: if hasattr(model[0].optimizer, "cur_scale"): @@ -2046,7 +1157,6 @@ def train( check_adlr_autoresume_termination( iteration, model, optimizer, opt_param_scheduler ) - # # Evaluation if args.eval_interval and iteration % args.eval_interval == 0 and args.do_valid: prefix = "iteration {}".format(iteration) @@ -2060,7 +1170,6 @@ def train( config, False, ) - # Checkpointing saved_checkpoint = False if args.exit_signal_handler: @@ -2072,11 +1181,9 @@ def train( ) print_datetime("exiting program after receiving SIGTERM.") sys.exit() - if args.save and args.save_interval and iteration % args.save_interval == 0: save_checkpoint_and_time(iteration, model, optimizer, opt_param_scheduler) saved_checkpoint = True - # Exiting based on duration if args.exit_duration_in_mins: train_time = (time.time() - _TRAIN_START_TIME) / 60.0 @@ -2092,7 +1199,6 @@ def train( ) print_datetime("exiting program after {} minutes".format(train_time)) sys.exit() - # Exiting based on iterations if args.exit_interval and iteration % args.exit_interval == 0: if args.save and not saved_checkpoint: @@ -2102,22 +1208,6 @@ def train( torch.distributed.barrier() print_datetime("exiting program at iteration {}".format(iteration)) sys.exit() - - trigger(on_step_end) - - # Exiting based on kill switch file - if found_kill_switch(): - if args.save and not saved_checkpoint: - save_checkpoint_and_time( - iteration, model, optimizer, opt_param_scheduler - ) - torch.distributed.barrier() - print_datetime( - f"Detected kill switch at {args.kill_switch_file}, " - f"iteration={iteration}. Exiting" - ) - sys.exit() - return iteration @@ -2184,7 +1274,7 @@ def evaluate( # Empty unused memory if args.empty_unused_memory_level >= 1: - get_accelerator().empty_cache() + torch.cuda.empty_cache() if mpu.is_pipeline_last_stage(ignore_virtual=True): # Reduce across processes. @@ -2251,17 +1341,12 @@ def evaluate_and_print_results( test=False, ): """Helper function to evaluate and dump results on screen.""" - from megatron.training_log import interop_tool_logger - args = get_args() + assert args is not None if write_to_tensorboard: - writer = interop_tool_logger( - tb_writer=get_tensorboard_writer(), wandb_writer=get_wandb_writer() - ) + writer = get_tensorboard_writer() else: - writer = interop_tool_logger() - x_axis_samples = "Samples" - x_axis_tokens = "Tokens" + writer = None total_loss_dict, collected_non_loss_data = evaluate( forward_step_func, @@ -2275,43 +1360,9 @@ def evaluate_and_print_results( for key in total_loss_dict: string += f"{key} value={total_loss_dict[key].item():.6f}" ppl = math.exp(min(20, total_loss_dict[key].item())) - # <<<<<<< HEAD - # string += f"{key} PPL={ppl:.6f}" - # # string += '{} PPL={:.6f} | '.format(key, ppl) - # if writer and is_last_rank(): - # data_type = "test" if test else "validation" - # writer.add_scalar( - # f"lm-loss-validation/{key} {data_type}", - # total_loss_dict[key].item(), - # iteration, - # ) - # writer.add_scalar( - # f"lm-loss-validation/{key} {data_type} vs samples", - # total_loss_dict[key].item(), - # args.consumed_train_samples, - # ) - # writer.add_scalar( - # f"lm-loss-validation/{key} {data_type} vs tokens", - # total_loss_dict[key].item(), - # args.consumed_train_tokens, - # ) - # if args.log_validation_ppl_to_tensorboard: - # writer.add_scalar( - # f"lm-loss-validation/{key} {data_type} ppl", ppl, iteration - # ) - # writer.add_scalar( - # f"lm-loss-validation/{key} {data_type} ppl vs samples", - # ppl, - # args.consumed_train_samples, - # ) - # writer.add_scalar( - # f"lm-loss-validation/{key} {data_type} ppl vs tokens", - # ppl, - # args.consumed_train_tokens, - # ) - # ======= - string += "{} PPL: {:.6E} | ".format(key, ppl) - if writer.is_enabled() and is_last_rank(): + string += f"{key} PPL={ppl:.6f}" + # string += '{} PPL={:.6f} | '.format(key, ppl) + if writer is not None and is_last_rank(): data_type = "test" if test else "validation" writer.add_scalar( f"lm-loss-validation/{key} {data_type}", @@ -2322,13 +1373,11 @@ def evaluate_and_print_results( f"lm-loss-validation/{key} {data_type} vs samples", total_loss_dict[key].item(), args.consumed_train_samples, - x_axis_samples, ) writer.add_scalar( f"lm-loss-validation/{key} {data_type} vs tokens", total_loss_dict[key].item(), args.consumed_train_tokens, - x_axis_tokens, ) if args.log_validation_ppl_to_tensorboard: writer.add_scalar( @@ -2338,21 +1387,14 @@ def evaluate_and_print_results( f"lm-loss-validation/{key} {data_type} ppl vs samples", ppl, args.consumed_train_samples, - x_axis_samples, ) writer.add_scalar( f"lm-loss-validation/{key} {data_type} ppl vs tokens", ppl, args.consumed_train_tokens, - x_axis_tokens, ) - # >>>>>>> 0d6e3793a1fc06eded9764ef15ad12bcc0281101 - if ( - process_non_loss_data_func is not None - and writer.is_enabled() - and is_last_rank() - ): + if process_non_loss_data_func is not None and writer and is_last_rank(): process_non_loss_data_func(collected_non_loss_data, iteration, writer) length = len(string) + 1 @@ -2375,6 +1417,7 @@ def build_train_valid_test_datasets(build_train_valid_test_datasets_provider): args = get_args() # Number of train/valid/test samples. + assert args is not None if args.train_samples: train_samples = args.train_samples else: @@ -2473,6 +1516,7 @@ def build_train_valid_test_data_iterators(build_train_valid_test_datasets_provid """Build pretraining data iterators.""" args = get_args() + assert args is not None # Build loaders. train_dataloader, valid_dataloader, test_dataloader = ( From 2113dbc9fc8b38aea09786380ce8846894c0eb25 Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Fri, 13 Sep 2024 09:16:20 -0500 Subject: [PATCH 47/92] Update `megatron/utils.py` --- megatron/training.py | 147 ++++++----- megatron/training_log.py | 543 +++++++++++++++------------------------ megatron/utils.py | 247 +++++++----------- 3 files changed, 399 insertions(+), 538 deletions(-) diff --git a/megatron/training.py b/megatron/training.py index 9841386049..90a1250648 100644 --- a/megatron/training.py +++ b/megatron/training.py @@ -61,17 +61,17 @@ check_adlr_autoresume_termination, checkpoint_throughput_calculator, found_kill_switch, - # num_floating_point_operations, - # report_memory, - # throughput_calculator, unwrap_model, update_rotary_pos_emb, ) -# noqa: E402 -# The earliest we can measure the start time. -_TRAIN_START_TIME = time.time() -# noqa +from megatron.profiler import ( + setup_profiler, + trigger, + on_step_begin, + on_step_end, +) + dlp = Profile("TRAINING") @@ -87,11 +87,6 @@ LOG_LEVEL: str = str(os.environ.get("LOG_LEVEL", "INFO")).upper() log.setLevel(LOG_LEVEL) if RANK == 0 else log.setLevel("CRITICAL") -# try: -# import wandb -# except (ImportError, ModuleNotFoundError): -# wandb = None - def print_datetime(string): """Note that this call will sync across all ranks.""" @@ -415,6 +410,8 @@ def get_model( ): """Build the model.""" args = get_args() + accelerator = get_accelerator() + assert accelerator is not None assert args is not None args.model_type = model_type @@ -521,7 +518,7 @@ def get_model( if wrap_with_ddp: if args.DDP_impl == "torch": - i = get_accelerator().current_device() + i = accelerator.current_device() model = [ torchDDP( model_module, @@ -808,6 +805,7 @@ def setup_model_and_optimizer( log.info("Initializing ICT from pretrained BERT model") unwrapped_model[0].init_state_dict_from_bert() if args.fp16: + assert optimizer is not None optimizer.reload_model_params() # random-LTD requires converting transformer layers if args.random_ltd: @@ -822,8 +820,8 @@ def train_step( """Single training step.""" args = get_args() timers = get_timers() - - assert args is not None and timers is not None + accelerator = get_accelerator() + assert args is not None and timers is not None and accelerator is not None if args.deepspeed and args.ds_pipeline_enabled: num_zeros_in_grad = 0 assert isinstance(model[0], deepspeed.PipelineEngine) @@ -860,11 +858,13 @@ def train_step( if args.timing_log_level < 2: config.timers = None + num_microbatches = get_num_microbatches() + assert num_microbatches is not None losses_reduced = forward_backward_func( forward_step_func=forward_step_func, data_iterator=data_iterator, model=model, - num_microbatches=get_num_microbatches(), + num_microbatches=num_microbatches, seq_length=args.seq_length, micro_batch_size=args.micro_batch_size, decoder_seq_length=args.decoder_seq_length, @@ -879,8 +879,8 @@ def train_step( args.teacher_forward = False # Empty unused memory. - if args.empty_unused_memory_level >= 1: - torch.cuda.empty_cache() + if args.empty_unused_memory_level >= 1 and accelerator is not None: + accelerator.empty_cache() # Reduce gradients. if not args.deepspeed: @@ -917,8 +917,14 @@ def train_step( # Update learning rate. if args.deepspeed: - skipped_iter = 0 + skipped_iter = 0 if update_successful else 1 grad_norm = model[0].get_global_grad_norm() + # XXX: [saforem2]: ---------------------------------------------------- + # Is `num_zeros_in_grad` worth calculating (/ implementing) ?? + # the `Megatron`-specific implementation is at: + # [megatron.optimizer.clip_grads.count_zeros_fp32](./optimizer/clip_grads.py) + # For now, explicitly set to None + # --------------------------------------------------------------------- num_zeros_in_grad = None loss_reduced = {} for key in losses_reduced[0]: @@ -927,29 +933,28 @@ def train_step( losses_reduced_for_key ) return loss_reduced, skipped_iter, grad_norm, num_zeros_in_grad + if update_successful: + increment = ( + get_num_microbatches() * args.micro_batch_size * args.data_parallel_size + ) + opt_param_scheduler.step(increment=increment) + skipped_iter = 0 else: - if update_successful: - increment = ( - get_num_microbatches() * args.micro_batch_size * args.data_parallel_size - ) - opt_param_scheduler.step(increment=increment) - skipped_iter = 0 - else: - skipped_iter = 1 + skipped_iter = 1 - # Empty unused memory. - if args.empty_unused_memory_level >= 2: - torch.cuda.empty_cache() - - if mpu.is_pipeline_last_stage(ignore_virtual=True): - # Average loss across microbatches. - loss_reduced = {} - for key in losses_reduced[0]: - losses_reduced_for_key = [x[key] for x in losses_reduced] - loss_reduced[key] = sum(losses_reduced_for_key) / len( - losses_reduced_for_key - ) - return loss_reduced, skipped_iter, grad_norm, num_zeros_in_grad + # Empty unused memory. + if args.empty_unused_memory_level >= 2 and accelerator is not None: + accelerator.empty_cache() + + if mpu.is_pipeline_last_stage(ignore_virtual=True): + # Average loss across microbatches. + loss_reduced = {} + for key in losses_reduced[0]: + losses_reduced_for_key = [x[key] for x in losses_reduced] + loss_reduced[key] = sum(losses_reduced_for_key) / len( + losses_reduced_for_key + ) + return loss_reduced, skipped_iter, grad_norm, num_zeros_in_grad return {}, skipped_iter, grad_norm, num_zeros_in_grad @@ -983,10 +988,12 @@ def train( """Train the model function.""" args = get_args() timers = get_timers() - assert args is not None - assert timers is not None + accelerator = get_accelerator() + assert args is not None and timers is not None and accelerator is not None # Write args to tensorboard write_args_to_tensorboard() + assert accelerator is not None + setup_profiler(args, accelerator.device_name()) if args.random_ltd: # random-ltd requires different randomness on each rank import random @@ -1002,6 +1009,7 @@ def train( iteration = args.iteration # Translate args to core configuration config = core_transformer_config_from_args(args) + num_skipped_iters = 0 if not args.deepspeed: config.grad_scale_func = optimizer.scale_loss config.timers = timers @@ -1029,6 +1037,7 @@ def train( while iteration < args.train_iters and ( args.train_tokens is None or args.consumed_train_tokens < args.train_tokens ): + trigger(on_step_begin) update_num_microbatches(args.consumed_train_samples) if args.deepspeed: # inform deepspeed of any batch size changes @@ -1051,10 +1060,12 @@ def train( [i <= (iteration + 1) <= j for (i, j) in ranges_to_skip] ): log.info(f"Caught {iteration + 1} in 'ranges_to_skip', skipping!") - loss_dict = {} + # total_loss_dict = {"skipped iterations": } skipped_iter = 1 + total_loss_dict["skipped iterations"] += skipped_iter grad_norm = None num_zeros_in_grad = None + num_skipped_iters += 1 else: if os.getenv("TORCH_PROFILER_ENABLE") == "2": from torch.profiler import profile, ProfilerActivity @@ -1187,9 +1198,7 @@ def train( # Exiting based on duration if args.exit_duration_in_mins: train_time = (time.time() - _TRAIN_START_TIME) / 60.0 - done_cuda = get_accelerator().IntTensor( - [train_time > args.exit_duration_in_mins] - ) + done_cuda = accelerator.IntTensor([train_time > args.exit_duration_in_mins]) torch.distributed.all_reduce(done_cuda, op=torch.distributed.ReduceOp.MAX) done = done_cuda.item() if done: @@ -1208,6 +1217,19 @@ def train( torch.distributed.barrier() print_datetime("exiting program at iteration {}".format(iteration)) sys.exit() + trigger(on_step_end) + # Exiting based on kill switch file + if found_kill_switch(): + if args.save and not saved_checkpoint: + save_checkpoint_and_time( + iteration, model, optimizer, opt_param_scheduler + ) + torch.distributed.barrier() + print_datetime( + f"Detected kill switch at {args.kill_switch_file}, " + f"iteration={iteration}. Exiting" + ) + sys.exit() return iteration @@ -1222,7 +1244,8 @@ def evaluate( ): """Evaluation.""" args = get_args() - assert args is not None + accelerator = get_accelerator() + assert args is not None and accelerator is not None if args.vision_pretraining and args.vision_pretraining_type == "dino": compute_feature_bank(model) @@ -1244,6 +1267,10 @@ def evaluate( total_loss_dict = {} + num_microbatches = get_num_microbatches() + assert num_microbatches is not None + forward_backward_func = get_forward_backward_func() + with torch.no_grad(): iteration = 0 while iteration < args.eval_iters: @@ -1251,20 +1278,19 @@ def evaluate( if verbose and iteration % args.log_interval == 0: log.info("Evaluating iter {}/{}".format(iteration, args.eval_iters)) - forward_backward_func = get_forward_backward_func() # Don't care about timing during evaluation config.timers = None if args.deepspeed and args.ds_pipeline_enabled: # DeepSpeed uses eval_batch() and already aggregates losses. assert isinstance(model, list) and len(model) == 1 loss = model[0].eval_batch(data_iterator) - loss_dicts = [{"lm loss": loss}] * get_num_microbatches() + loss_dicts = [{"lm loss": loss}] * num_microbatches else: loss_dicts = forward_backward_func( forward_step_func=forward_step_func, data_iterator=data_iterator, model=model, - num_microbatches=get_num_microbatches(), + num_microbatches=num_microbatches, seq_length=args.seq_length, micro_batch_size=args.micro_batch_size, decoder_seq_length=args.decoder_seq_length, @@ -1274,7 +1300,7 @@ def evaluate( # Empty unused memory if args.empty_unused_memory_level >= 1: - torch.cuda.empty_cache() + accelerator.empty_cache() if mpu.is_pipeline_last_stage(ignore_virtual=True): # Reduce across processes. @@ -1282,16 +1308,14 @@ def evaluate( for key in loss_dict: if "moe" not in key: total_loss_dict[key] = ( - total_loss_dict.get( - key, get_accelerator().FloatTensor([0.0]) - ) + total_loss_dict.get(key, accelerator.FloatTensor([0.0])) + loss_dict[key] ) args.consumed_valid_samples += ( mpu.get_data_parallel_world_size() * args.micro_batch_size - * get_num_microbatches() + * num_microbatches ) collected_non_loss_data = None if process_non_loss_data_func is not None and is_last_rank(): @@ -1299,7 +1323,7 @@ def evaluate( forward_step_func=forward_step_func, data_iterator=data_iterator, model=model, - num_microbatches=get_num_microbatches(), + num_microbatches=num_microbatches, seq_length=args.seq_length, micro_batch_size=args.micro_batch_size, decoder_seq_length=args.decoder_seq_length, @@ -1312,7 +1336,7 @@ def evaluate( model_module.train() for key in total_loss_dict: - total_loss_dict[key] /= args.eval_iters * get_num_microbatches() + total_loss_dict[key] /= args.eval_iters * num_microbatches if args.curriculum_learning_legacy and not args.no_pipeline_parallel: # roll back to actual curriculum seqlen at the end of eval. @@ -1443,7 +1467,8 @@ def build_train_valid_test_datasets(build_train_valid_test_datasets_provider): def build_train_valid_test_data_loaders(build_train_valid_test_datasets_provider): """Build pretraining data loaders.""" args = get_args() - assert args is not None + accelerator = get_accelerator() + assert args is not None and accelerator is not None (train_dataloader, valid_dataloader, test_dataloader) = (None, None, None) log.info("> building train, validation, and test datasets ...") # Backward compatibility, assume fixed batch size. @@ -1486,11 +1511,9 @@ def build_train_valid_test_data_loaders(build_train_valid_test_datasets_provider do_valid = valid_dataloader is not None and args.eval_iters > 0 do_test = test_dataloader is not None and args.eval_iters > 0 # Need to broadcast num_tokens and num_type_tokens. - flags = get_accelerator().LongTensor( - [int(do_train), int(do_valid), int(do_test)] - ) + flags = accelerator.LongTensor([int(do_train), int(do_valid), int(do_test)]) else: - flags = get_accelerator().LongTensor([0, 0, 0]) + flags = accelerator.LongTensor([0, 0, 0]) # Broadcast num tokens. if ds_sequence_parallel: torch.distributed.broadcast( diff --git a/megatron/training_log.py b/megatron/training_log.py index e4f74b24c3..be2eacd059 100644 --- a/megatron/training_log.py +++ b/megatron/training_log.py @@ -1,154 +1,51 @@ # Copyright (C) 2024 Habana Labs, Ltd. an Intel Company. # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +""" +training_log.py +""" -"""Pretrain utilities.""" - -from enum import Enum - -# from deepspeed.accelerator import get_accelerator -# from deepspeed.compression.compress import redundancy_clean -import torch -import os import logging +import os -from megatron import get_args -from megatron import get_timers -from megatron import get_tensorboard_writer -from megatron import get_wandb_writer -from megatron import get_num_microbatches -from megatron.core import mpu - -# from megatron import is_rank_0, print_rank_0 -# from megatron import print_rank_last -# from megatron.arguments import core_transformer_config_from_args -# from megatron.checkpointing import load_checkpoint -# from megatron.checkpointing import save_checkpoint -# from megatron.core import mpu, tensor_parallel -# from megatron.core.enums import ModelType -# from megatron.core.pipeline_parallel import get_forward_backward_func -# from megatron.data.data_samplers import build_pretraining_data_loader -# from megatron.initialize import initialize_megatron -# from megatron.initialize import write_args_to_tensorboard -# from megatron.initialize import set_jit_fusion_options -# from megatron.model import Float16Module -# from megatron.model import GPTModel -# from megatron.model import DistributedDataParallel as LocalDDP -# from megatron.model.transformer import ParallelTransformerLayer -# from megatron.model.vision.knn_monitor import compute_feature_bank -# from megatron.optimizer import get_megatron_optimizer -# from megatron.optimizer_param_scheduler import OptimizerParamScheduler -# from megatron.profiler import on_step_begin, on_step_end, setup_profiler, trigger -# from megatron.utils import check_adlr_autoresume_termination -from megatron.utils import found_kill_switch +from deepspeed import get_accelerator import ezpz as ez +import torch -# from megatron.utils import calc_params_l2_norm +from megatron.core import mpu +from megatron.global_vars import ( + get_args, + get_num_microbatches, + get_tensorboard_writer, + get_timers, +) from megatron.utils import ( - # checkpoint_throughput_calculator, + Profile, + is_last_rank, report_memory, throughput_calculator, - # update_rotary_pos_emb, + num_floating_point_operations, ) -try: - import wandb -except (ImportError, ModuleNotFoundError): - wandb = None -# The earliest we can measure the start time. -# _TRAIN_START_TIME = time.time() - - -log = logging.getLogger(__name__) - - -class InteropLoggingTool(Enum): - TENSORBOARD = 1 - WANDB = 2 - RANK: int = ez.get_rank() -LOCAL_RANK: int = ez.get_local_rank() WORLD_SIZE: int = ez.get_world_size() DEVICE_TYPE: str = ez.dist.get_torch_device_type() -DEVICE_ID: str = f"{DEVICE_TYPE}:{LOCAL_RANK}" DEVICE: torch.device = torch.device(DEVICE_TYPE) log: logging.Logger = logging.getLogger(__name__) LOG_LEVEL: str = str(os.environ.get("LOG_LEVEL", "INFO")).upper() log.setLevel(LOG_LEVEL) if RANK == 0 else log.setLevel("CRITICAL") +try: + import wandb +except (ImportError, ModuleNotFoundError): + wandb = None -class interop_tool_logger: - def __init__(self, tb_writer=None, wandb_writer=None): - self.tb_writer = tb_writer - self.wandb_writer = wandb_writer - self.custom_x_axis = [] - self.custom_y_axis = {} - self.args = get_args() - assert self.args is not None - if not hasattr(self.args, "logger_iteration"): - self.args.logger_iteration = 1 - assert self.args.logger_iteration is not None - - def is_enabled(self): - return self.tb_writer or self.wandb_writer - - def add_scalar( - self, - key, - scalar_value, - step, - custom_step_name=None, - tool_list=[InteropLoggingTool.TENSORBOARD, InteropLoggingTool.WANDB], - ): - if self.tb_writer and InteropLoggingTool.TENSORBOARD in tool_list: - self.tb_writer.add_scalar(key, scalar_value, step) - - if ( - wandb is not None - and self.wandb_writer - and InteropLoggingTool.WANDB in tool_list - ): - assert self.args is not None - assert self.args.logger_iteration is not None - if not custom_step_name: - self.wandb_writer.log({key: scalar_value}, step=step) - if self.args.logger_iteration < step: - # Updating iteration - self.args.logger_iteration = step - - else: - if custom_step_name not in self.custom_x_axis: - self.custom_x_axis.append(custom_step_name) - wandb.define_metric(custom_step_name) - - if key not in self.custom_y_axis: - self.custom_y_axis[key] = custom_step_name - wandb.define_metric(key, step_metric=custom_step_name) - - self.wandb_writer.log( - {key: scalar_value, custom_step_name: step}, - step=self.args.logger_iteration, - ) - - def add_scalar_to_tb(self, key, scalar_value, step): - return self.add_scalar( - key, scalar_value, step, None, [InteropLoggingTool.TENSORBOARD] - ) - - def add_scalar_to_wandb(self, key, scalar_value, step, custom_step_name=None): - return self.add_scalar( - key, scalar_value, step, custom_step_name, [InteropLoggingTool.WANDB] - ) - - def add_images(self, key, img_tensor, step=None): - if self.tb_writer: - self.tb_writer.add_images(key, img_tensor, step) - if wandb is not None and self.wandb_writer: - self.wandb_writer.log({key: wandb.Image(img_tensor)}, step) +dlp = Profile("TRAINING_LOG") +@dlp.log def training_log( loss_dict, total_loss_dict, @@ -165,12 +62,11 @@ def training_log( ): """Log training information such as losses, timing, ....""" args = get_args() + accelerator = get_accelerator() timers = get_timers() - writer = interop_tool_logger( - tb_writer=get_tensorboard_writer(), wandb_writer=get_wandb_writer() - ) - x_axis_samples = "Samples" - x_axis_tokens = "Tokens" + writer = get_tensorboard_writer() + assert args is not None and timers is not None and accelerator is not None + wandb_metrics = {} # Advanced, skipped, and Nan iterations. advanced_iters_key = "advanced iterations" skipped_iters_key = "skipped iterations" @@ -189,10 +85,12 @@ def training_log( ) # Update losses and set nan iterations got_nan = False - _zero = torch.tensor([0.0]).to(DEVICE) for key in loss_dict: if not skipped_iter: - total_loss_dict[key] = total_loss_dict.get(key, _zero) + loss_dict[key] + total_loss_dict[key] = ( + total_loss_dict.get(key, accelerator.FloatTensor([0.0])) + + loss_dict[key] + ) else: value = loss_dict[key].float().sum().item() is_nan = value == float("inf") or value == -float("inf") or value != value @@ -230,12 +128,10 @@ def training_log( "optimizer", ] - assert args is not None and timers is not None # Calculate batch size. batch_size = ( args.micro_batch_size * args.data_parallel_size * get_num_microbatches() ) - total_iterations = ( total_loss_dict[advanced_iters_key] + total_loss_dict[skipped_iters_key] ) @@ -245,39 +141,35 @@ def training_log( if args.log_timers_to_tensorboard and ( iteration % args.tensorboard_log_interval == 0 ): - timers.write(timers_to_log, writer, iteration, normalizer=total_iterations) # type: ignore - if writer.is_enabled() and (iteration % args.tensorboard_log_interval == 0): + timers.write(timers_to_log, writer, iteration, normalizer=total_iterations) + if writer and (iteration % args.tensorboard_log_interval == 0): writer.add_scalar( - "steps-vs-samples/y=steps,x=samples", - iteration, - args.consumed_train_samples, - x_axis_samples, + "steps-vs-samples/y=steps,x=samples", iteration, args.consumed_train_samples ) writer.add_scalar( "steps-vs-samples/y=samples,x=steps", args.consumed_train_samples, iteration ) writer.add_scalar( - "steps-vs-tokens/y=steps,x=tokens", - iteration, - args.consumed_train_tokens, - x_axis_tokens, + "steps-vs-tokens/y=steps,x=tokens", iteration, args.consumed_train_tokens ) writer.add_scalar( "steps-vs-tokens/y=tokens,x=steps", args.consumed_train_tokens, iteration ) if args.log_learning_rate_to_tensorboard: + wandb_metrics |= { + "learning-rate/iteration": iteration, + "learning-rate/learning-rate": learning_rate, + } writer.add_scalar("learning-rate/learning-rate", learning_rate, iteration) writer.add_scalar( "learning-rate/learning-rate vs samples", learning_rate, args.consumed_train_samples, - x_axis_samples, ) writer.add_scalar( "learning-rate/learning-rate vs tokens", learning_rate, args.consumed_train_tokens, - x_axis_tokens, ) if args.log_batch_size_to_tensorboard: writer.add_scalar("batch-size/batch-size", batch_size, iteration) @@ -285,27 +177,28 @@ def training_log( "batch-size/batch-size vs samples", batch_size, args.consumed_train_samples, - x_axis_samples, ) writer.add_scalar( "batch-size/batch-size vs tokens", batch_size, args.consumed_train_tokens, - x_axis_tokens, ) + wandb_metrics |= { + "lm-loss-training/iteration": iteration, + "lm-loss-training/consumed_train_tokens": args.consumed_train_tokens, + } for key in loss_dict: + wandb_metrics |= {f"lm-loss-training/{key}": loss_dict[key]} writer.add_scalar(f"lm-loss-training/{key}", loss_dict[key], iteration) writer.add_scalar( f"lm-loss-training/{key}" + " vs samples", loss_dict[key], args.consumed_train_samples, - x_axis_samples, ) writer.add_scalar( f"lm-loss-training/{key}" + " vs tokens", loss_dict[key], args.consumed_train_tokens, - x_axis_tokens, ) if args.fp16 and loss_scale and args.log_loss_scale_to_tensorboard: writer.add_scalar("loss-scale/loss-scale", loss_scale, iteration) @@ -313,13 +206,11 @@ def training_log( "loss-scale/loss-scale vs samples", loss_scale, args.consumed_train_samples, - x_axis_samples, ) writer.add_scalar( "loss-scale/loss-scale vs tokens", loss_scale, args.consumed_train_tokens, - x_axis_tokens, ) if args.log_world_size_to_tensorboard: writer.add_scalar("world-size/world-size", args.world_size, iteration) @@ -327,55 +218,46 @@ def training_log( "world-size/world-size vs samples", args.world_size, args.consumed_train_samples, - x_axis_samples, ) writer.add_scalar( "world-size/world-size vs tokens", args.world_size, args.consumed_train_tokens, - x_axis_tokens, ) if grad_norm is not None: + wandb_metrics |= {"training/grad-norm": grad_norm} writer.add_scalar("grad-norm/grad-norm", grad_norm, iteration) writer.add_scalar( - "grad-norm/grad-norm vs samples", - grad_norm, - args.consumed_train_samples, - x_axis_samples, + "grad-norm/grad-norm vs samples", grad_norm, args.consumed_train_samples ) writer.add_scalar( - "grad-norm/grad-norm vs tokens", - grad_norm, - args.consumed_train_tokens, - x_axis_tokens, + "grad-norm/grad-norm vs tokens", grad_norm, args.consumed_train_tokens ) if num_zeros_in_grad is not None: + wandb_metrics |= {"training/num-zeros": num_zeros_in_grad} writer.add_scalar("num-zeros/num-zeros", num_zeros_in_grad, iteration) writer.add_scalar( "num-zeros/num-zeros vs samples", num_zeros_in_grad, args.consumed_train_samples, - x_axis_samples, ) writer.add_scalar( "num-zeros/num-zeros vs tokens", num_zeros_in_grad, args.consumed_train_tokens, - x_axis_tokens, ) if params_norm is not None: + wandb_metrics |= {"training/params-norm": params_norm} writer.add_scalar("params-norm/params-norm", params_norm, iteration) writer.add_scalar( "params-norm/params-norm vs samples", params_norm, args.consumed_train_samples, - x_axis_samples, ) writer.add_scalar( "params-norm/params-norm vs tokens", params_norm, args.consumed_train_tokens, - x_axis_tokens, ) if hasattr(args, "actual_seq_length"): writer.add_scalar( @@ -385,13 +267,11 @@ def training_log( "seqlen/actual_seq_length vs samples", args.actual_seq_length, args.consumed_train_samples, - x_axis_samples, ) writer.add_scalar( "seqlen/actual_seq_length vs tokens", args.actual_seq_length, args.consumed_train_tokens, - x_axis_tokens, ) if args.curriculum_learning_legacy or args.data_efficiency_curriculum_learning: writer.add_scalar( @@ -401,13 +281,11 @@ def training_log( "seqlen/curriculum_seqlen vs samples", args.curriculum_seqlen, args.consumed_train_samples, - x_axis_samples, ) writer.add_scalar( "seqlen/curriculum_seqlen vs tokens", args.curriculum_seqlen, args.consumed_train_tokens, - x_axis_tokens, ) if args.random_ltd: writer.add_scalar( @@ -419,13 +297,11 @@ def training_log( "seqlen/random_ltd_reserved_length vs samples", args.random_ltd_reserved_length, args.consumed_train_samples, - x_axis_samples, ) writer.add_scalar( "seqlen/random_ltd_reserved_length vs tokens", args.random_ltd_reserved_length, args.consumed_train_tokens, - x_axis_tokens, ) if args.log_memory_to_tensorboard: mem_stats = torch.cuda.memory_stats() @@ -444,70 +320,56 @@ def training_log( mem_stats["allocation.all.current"], iteration, ) - if iteration % args.tensorboard_log_interval == 0: # This logging write various optimizer states to tensorboard. This # feature may consume extra GPU memory thus is set at false by default. if args.log_optimizer_states_to_tensorboard and optimizer is not None: opt_stats = [0.0] * 8 opt_stats_2 = [0.0] * 4 - - # TODO(billishyahao): Remove me after bf16_optimizer promotes its state. - if not hasattr(optimizer, "state"): - assert hasattr( - optimizer, "optimizer" - ), "Optimizer must have optimizer property." - optimizer.state = optimizer.optimizer.state - for _, group in enumerate(optimizer.param_groups): for _, param in enumerate(group["params"]): - opt_stats[0] += ( - torch.norm(optimizer.state[param]["exp_avg_sq"]).item() - ) ** 2 - opt_stats[1] += ( - torch.norm(optimizer.state[param]["exp_avg_sq"].sqrt()).item() - ) ** 2 - opt_stats[2] += ( - torch.norm(optimizer.state[param]["exp_avg"]).item() - ) ** 2 - opt_stats[3] += (torch.norm(param).item()) ** 2 - opt_stats[4] += torch.norm( - optimizer.state[param]["exp_avg_sq"], p=1 - ).item() - opt_stats[5] += torch.norm( - optimizer.state[param]["exp_avg_sq"].sqrt(), p=1 - ).item() - opt_stats[6] += torch.norm( - optimizer.state[param]["exp_avg"], p=1 - ).item() - opt_stats[7] += torch.norm(param, p=1).item() - opt_stats_2[0] = max( - opt_stats_2[0], - abs(optimizer.state[param]["exp_avg_sq"].max().item()), - abs(optimizer.state[param]["exp_avg_sq"].min().item()), - ) - opt_stats_2[1] = max( - opt_stats_2[1], - optimizer.state[param]["exp_avg_sq"].sqrt().abs_().max().item(), - ) - opt_stats_2[2] = max( - opt_stats_2[2], - abs(optimizer.state[param]["exp_avg"].max().item()), - abs(optimizer.state[param]["exp_avg"].min().item()), - ) - opt_stats_2[3] = max( - opt_stats_2[3], abs(param.max().item()), abs(param.min().item()) - ) + state_param = getattr(optimizer, "state", None) + if state_param is not None: + exp_avg_sq = state_param.get("exp_avg_sq", torch.tensor(0.0)) + exp_avg = state_param.get("exp_avg", torch.tensor(0.0)) + opt_stats[0] += (torch.norm(exp_avg_sq).item()) ** 2 + opt_stats[1] += (torch.norm(exp_avg_sq.sqrt()).item()) ** 2 + opt_stats[2] += (torch.norm(exp_avg).item()) ** 2 + opt_stats[3] += (torch.norm(param).item()) ** 2 + opt_stats[4] += torch.norm(exp_avg_sq, p=1).item() + opt_stats[5] += torch.norm(exp_avg_sq.sqrt(), p=1).item() + opt_stats[6] += torch.norm(exp_avg, p=1).item() + opt_stats[7] += torch.norm(param, p=1).item() + opt_stats_2[0] = max( + opt_stats_2[0], + abs(exp_avg_sq.max().item()), + abs(exp_avg_sq.min().item()), + ) + opt_stats_2[1] = max( + opt_stats_2[1], exp_avg_sq.sqrt().abs_().max().item() + ) + opt_stats_2[2] = max( + opt_stats_2[2], + abs(exp_avg.max().item()), + abs(exp_avg.min().item()), + ) + opt_stats_2[3] = max( + opt_stats_2[3], + abs(param.max().item()), + abs(param.min().item()), + ) # print('step {} rank {} before sync opt_stats {}, {}'.format(iteration, torch.distributed.get_rank(), opt_stats_2, opt_stats)) if args.zero_stage > 0: # ZeRO partiions optimizer states - # opt_stats = get_accelerator().FloatTensor(opt_stats) - opt_stats = torch.tensor(opt_stats).to(DEVICE) + # opt_stats = opt_stats.clone().detach() + # opt_stats = get_accelerator().FloatTensor + opt_stats = accelerator.FloatTensor(opt_stats) torch.distributed.all_reduce( opt_stats, group=mpu.get_sequence_data_parallel_group() ) # opt_stats_2 = get_accelerator().FloatTensor(opt_stats_2) - opt_stats_2 = torch.tensor(opt_stats_2).to(DEVICE) + # opt_stats_2 = opt_stats_2.clone().detach() + opt_stats_2 = accelerator.FloatTensor(opt_stats_2) torch.distributed.all_reduce( opt_stats_2, op=torch.distributed.ReduceOp.MAX, @@ -515,13 +377,13 @@ def training_log( ) if args.tensor_model_parallel_size > 1: - opt_stats = torch.tensor(opt_stats).to(DEVICE) - # opt_stats = get_accelerator().FloatTensor(opt_stats) + # opt_stats = opt_stats.clone().detach() + opt_stats = accelerator.FloatTensor(opt_stats) torch.distributed.all_reduce( opt_stats, group=mpu.get_tensor_model_parallel_group() ) - # opt_stats_2 = get_accelerator().FloatTensor(opt_stats_2) - opt_stats_2 = torch.tensor(opt_stats_2).to(DEVICE) + # opt_stats_2 = opt_stats_2.clone().detach() + opt_stats_2 = accelerator.FloatTensor(opt_stats_2) torch.distributed.all_reduce( opt_stats_2, op=torch.distributed.ReduceOp.MAX, @@ -529,95 +391,97 @@ def training_log( ) if args.pipeline_model_parallel_size > 1: - # opt_stats = get_accelerator().FloatTensor(opt_stats) - opt_stats = torch.tensor(opt_stats).to(DEVICE) + # opt_stats = opt_stats.clone().detach() + opt_stats = accelerator.FloatTensor(opt_stats) torch.distributed.all_reduce( opt_stats, group=mpu.get_pipeline_model_parallel_group() ) - # opt_stats_2 = get_accelerator().FloatTensor(opt_stats_2) - opt_stats_2 = torch.tensor(opt_stats_2).to(DEVICE) + # opt_stats_2 = opt_stats_2.clone().detach() + opt_stats_2 = accelerator.get_accelerator().FloatTensor(opt_stats_2) torch.distributed.all_reduce( opt_stats_2, op=torch.distributed.ReduceOp.MAX, group=mpu.get_pipeline_model_parallel_group(), ) - + wandb_metrics |= { + "optimizer/learning_rate": learning_rate, + "optimizer/iteration": args.iteration, + "optimizer/consumed_train_tokens": args.consumed_train_tokens, + "optimizer/variance_l2": opt_stats[0] ** 0.5, + "optimizer/variance_sqrt_l2": opt_stats[1] ** 0.5, + "optimizer/momentum_l2": opt_stats[2] ** 0.5, + "optimizer/weight_l2": opt_stats[3] ** 0.5, + "optimizer/variance_l1": opt_stats[4], + "optimizer/variance_sqrt_l1": opt_stats[5], + "optimizer/momentum_l1": opt_stats[6], + "optimizer/weight_l1": opt_stats[7], + "optimizer/variance_abs_max": opt_stats_2[0], + "optimizer/variance_sqrt_abs_max": opt_stats_2[1], + "optimizer/momentum_abs_max": opt_stats_2[2], + "optimizer/weight_abs_max": opt_stats_2[3], + } # print('step {} rank {} after sync opt_stats {}, {}'.format(iteration, torch.distributed.get_rank(), opt_stats_2, opt_stats)) - # if writer.is_enabled() and is_last_rank(): - if writer.is_enabled() and RANK == 0: + if writer and is_last_rank(): writer.add_scalar( "optimizer/variance_l2 vs tokens", opt_stats[0] ** 0.5, args.consumed_train_tokens, - x_axis_tokens, ) writer.add_scalar( "optimizer/variance_sqrt_l2 vs tokens", opt_stats[1] ** 0.5, args.consumed_train_tokens, - x_axis_tokens, ) writer.add_scalar( "optimizer/momentum_l2 vs tokens", opt_stats[2] ** 0.5, args.consumed_train_tokens, - x_axis_tokens, ) writer.add_scalar( "optimizer/weight_l2 vs tokens", opt_stats[3] ** 0.5, args.consumed_train_tokens, - x_axis_tokens, ) writer.add_scalar( "optimizer/variance_l1 vs tokens", opt_stats[4], args.consumed_train_tokens, - x_axis_tokens, ) writer.add_scalar( "optimizer/variance_sqrt_l1 vs tokens", opt_stats[5], args.consumed_train_tokens, - x_axis_tokens, ) writer.add_scalar( "optimizer/momentum_l1 vs tokens", opt_stats[6], args.consumed_train_tokens, - x_axis_tokens, ) writer.add_scalar( "optimizer/weight_l1 vs tokens", opt_stats[7], args.consumed_train_tokens, - x_axis_tokens, ) writer.add_scalar( "optimizer/variance_abs_max vs tokens", opt_stats_2[0], args.consumed_train_tokens, - x_axis_tokens, ) writer.add_scalar( "optimizer/variance_sqrt_abs_max vs tokens", opt_stats_2[1], args.consumed_train_tokens, - x_axis_tokens, ) writer.add_scalar( "optimizer/momentum_abs_max vs tokens", opt_stats_2[2], args.consumed_train_tokens, - x_axis_tokens, ) writer.add_scalar( "optimizer/weight_abs_max vs tokens", opt_stats_2[3], args.consumed_train_tokens, - x_axis_tokens, ) - writer.add_scalar( "optimizer/variance_l2", opt_stats[0] ** 0.5, iteration ) @@ -643,6 +507,8 @@ def training_log( ) writer.add_scalar("optimizer/weight_abs_max", opt_stats_2[3], iteration) + assert args is not None + assert timers is not None if iteration % args.log_interval == 0: elapsed_time = timers("interval-time").elapsed(barrier=True) elapsed_time_per_iteration = elapsed_time / total_iterations @@ -659,114 +525,126 @@ def training_log( tokens_per_gpu_per_second_per_replica = ( tokens_per_gpu_per_second / args.data_parallel_size ) - - if writer.is_enabled(): - writer.add_scalar_to_wandb( - "throughput/iteration-time", elapsed_time_per_iteration, iteration - ) # 1000 ms / s - writer.add_scalar_to_wandb( - "throughput/samples_per_sec", samples_per_sec, iteration - ) - writer.add_scalar_to_wandb( - "throughput/samples_per_sec_per_replica", - samples_per_sec_per_replica, - iteration, - ) - writer.add_scalar_to_wandb( - "throughput/tokens_per_sec", tokens_per_sec, iteration - ) - writer.add_scalar_to_wandb( - "throughput/tokens_per_sec_per_replica", - tokens_per_sec_per_replica, - iteration, - ) - writer.add_scalar_to_wandb( - "throughput/tokens_per_gpu_per_sec", - tokens_per_gpu_per_second, - iteration, - ) - writer.add_scalar_to_wandb( - "throughput/tokens_per_gpu_per_sec_per_replica", - tokens_per_gpu_per_second_per_replica, - iteration, + # NOTE: [2024-06-19] + # Updated to use (more accurate) calculation according to + # `num_floating_point_operations` from NVIDIA/Megatron-LM + num_flop_lm = num_floating_point_operations(args, batch_size) + num_flop_per_sec_lm = num_flop_lm / elapsed_time_per_iteration + tflops_lm = num_flop_per_sec_lm / (10**12) + tflops_lm_per_gpu = tflops_lm / args.world_size + wandb_metrics |= { + "throughput/iteration-time": elapsed_time_per_iteration, # 1000 ms / s + "throughput/samples_per_sec": samples_per_sec, + "throughput/samples_per_sec_per_replica": samples_per_sec_per_replica, + "throughput/tokens_per_sec": tokens_per_sec, + "throughput/tokens_per_sec_per_replica": tokens_per_sec_per_replica, + "throughput/tokens_per_gpu_per_sec": tokens_per_gpu_per_second, + "throughput/tokens_per_gpu_per_sec_per_replica": tokens_per_gpu_per_second_per_replica, + "throughput/tflops": tflops, + "throughput/tflops-new": num_flop_lm / elapsed_time_per_iteration, + "throughput/tflops-lm": tflops_lm_per_gpu, + "throughput/approx_params_in_billions": approx_parameters_in_billions, + "throughput/elapsed_ms_per_iteration": elapsed_time_per_iteration, + "throughput/iteration": iteration, + } + if loss_dict is not None: + wandb_metrics |= { + "loss/iteration": iteration, + **{f"loss/{k}": v for k, v in loss_dict.items()}, + } + if writer and args.log_timers_to_tensorboard: + writer.add_scalar( + "iteration-time/iteration-time", elapsed_time_per_iteration, iteration ) - writer.add_scalar_to_wandb("throughput/tflops", tflops, iteration) - writer.add_scalar_to_wandb( - "throughput/approx_params_in_billions", - approx_parameters_in_billions, - iteration, + writer.add_scalar( + "iteration-time/iteration-time vs samples", + elapsed_time_per_iteration, + args.consumed_train_samples, ) - writer.add_scalar_to_wandb( - "throughput/elapsed_ms_per_iteration", + writer.add_scalar( + "iteration-time/iteration-time vs tokens", elapsed_time_per_iteration, - iteration, + args.consumed_train_tokens, ) - if loss_dict is not None: - for k, v in loss_dict.items(): - writer.add_scalar_to_wandb(f"loss/{k}", v, iteration) - - if args.log_timers_to_tensorboard: - writer.add_scalar( - "iteration-time/iteration-time", - elapsed_time_per_iteration, - iteration, - ) - writer.add_scalar( - "iteration-time/iteration-time vs samples", - elapsed_time_per_iteration, - args.consumed_train_samples, - x_axis_samples, - ) - writer.add_scalar( - "iteration-time/iteration-time vs tokens", - elapsed_time_per_iteration, - args.consumed_train_tokens, - x_axis_tokens, - ) - log_string = " iteration {:8d}/{:8d} |".format(iteration, args.train_iters) - log_string += " consumed samples: {:12d} |".format(args.consumed_train_samples) - log_string += " consumed tokens: {:12d} |".format(args.consumed_train_tokens) - log_string += " elapsed time per iteration (ms): {:.1f} |".format( - elapsed_time_per_iteration * 1000.0 + # metrics_to_log = { + # 'iteration': iteration, + # 'train_iters': args.train_iters, + # 'consumed_samples': args.consumed_train_samples, + # 'consumed_tokens': args.consumed_tokens, + # } + log_string = f" iteration={iteration:8d}/{args.train_iters:8d} |" + # .format( iteration, args.train_iters) + log_string += ( + f" consumed_samples={args.consumed_train_samples:12d} |" + # .format(args.consumed_train_samples) ) - log_string += " learning rate: {:.3E} |".format(learning_rate) - log_string += " global batch size: {:5d} |".format(batch_size) - + log_string += f" consumed_tokens={args.consumed_train_tokens:12d} |" + # .format( args.consumed_train_tokens) + log_string += ( + " elapsed_time_per_iteration_ms=" + f"{elapsed_time_per_iteration * 1000.0:.1f} |" + # .format( elapsed_time_per_iteration * 1000.0) + ) + log_string += f" learning_rate={learning_rate:.6g} |" + log_string += f" global_batch_size={batch_size:5d} |" + # if wandb is not None and getattr(wandb, 'run', None) is not None: + wandb_metrics |= { + "training/iteration": iteration, + "training/iteration_time": elapsed_time_per_iteration, + "training/iteration_time_vs_tokens": ( + elapsed_time_per_iteration / args.consumed_train_tokens + ), + "training/iteration_time_vs_samples": ( + (elapsed_time_per_iteration / args.consumed_train_samples), + ), + "training/consumed_samples": args.consumed_train_samples, + "training/consumed_tokens": args.consumed_train_tokens, + } for key in total_loss_dict: if key not in [advanced_iters_key, skipped_iters_key, nan_iters_key]: avg = total_loss_dict[key].item() / float( max(1, total_loss_dict[advanced_iters_key]) ) if avg > 0.0: - log_string += " {}: {:.6E} |".format(key, avg) - # total_loss_dict[key] = get_accelerator().FloatTensor([0.0]) - total_loss_dict[key] = torch.tensor([0.0]).to(DEVICE) + log_string += " {}={:.6f} |".format(key, avg) + total_loss_dict[key] = accelerator.FloatTensor([0.0]) if loss_scale is not None: - log_string += " loss scale: {:.1f} |".format(loss_scale) + log_string += " loss_scale={:.1f} |".format(loss_scale) + wandb_metrics |= {"loss/loss_scale": loss_scale} if grad_norm is not None: - log_string += " grad norm: {:.3f} |".format(grad_norm) + log_string += " grad_norm={:.3f} |".format(grad_norm) + wandb_metrics |= {"loss/grad_norm": grad_norm} if num_zeros_in_grad is not None: - log_string += " num zeros: {:.1f} |".format(num_zeros_in_grad) + log_string += " num_zeros={:.1f} |".format(num_zeros_in_grad) + wandb_metrics |= {"loss/num_zeros_in_grad": num_zeros_in_grad} if params_norm is not None: - log_string += " params norm: {:.3f} |".format(params_norm) + log_string += " params_norm={:.3f} |".format(params_norm) + wandb_metrics |= {"loss/params_norm": params_norm} if args.curriculum_learning_legacy or args.data_efficiency_curriculum_learning: - log_string += " curriculum seqlen: {:5d} |".format(args.curriculum_seqlen) + log_string += " curriculum_seqlen={:5d} |".format(args.curriculum_seqlen) if args.random_ltd: - log_string += " random ltd reserved length: {:5d} |".format( + log_string += " random_ltd reserved_length={:5d} |".format( args.random_ltd_reserved_length ) - log_string += " actual seqlen: {:5d} |".format(seq_len) - log_string += " number of skipped iterations: {:3d} |".format( + # log_string += " | ".join([ + # f"{seq_len=:5d} ", + # f"{}" + # f"number_of_skipped_iterations={:3d}", + # + # ]) + log_string += " actual_seqlen={:5d} |".format(seq_len) + log_string += " number_of_skipped_iterations={:3d} |".format( total_loss_dict[skipped_iters_key] ) - log_string += " number of nan iterations: {:3d} |".format( + log_string += " number_of_nan_iterations={:3d} |".format( total_loss_dict[nan_iters_key] ) - log_string += " samples per second: {:.3f} |".format(samples_per_sec) - log_string += " tokens per gpu per second (tgs): {:.3f} |".format( + log_string += " samples_per_second={:.3f} |".format(samples_per_sec) + log_string += " tokens_per_gpu_per_second_tgs={:.3f} |".format( tokens_per_gpu_per_second ) - log_string += " TFLOPs: {:.2f} |".format(tflops) + log_string += " [LM]TFLOPs={:.2f} |".format(tflops_lm_per_gpu) + log_string += " [DS]TFLOPs={:.2f} |".format(tflops) total_loss_dict[advanced_iters_key] = 0 total_loss_dict[skipped_iters_key] = 0 total_loss_dict[nan_iters_key] = 0 @@ -776,6 +654,13 @@ def training_log( # Report memory after optimizer state has been initialized. report_memory("(after {} iterations)".format(iteration)) report_memory_flag = False - timers.log(timers_to_log, normalizer=args.log_interval) + if wandb is not None and getattr(wandb, "run", None) is not None: + wandb_metrics |= { + "training/skiped_iterations": total_loss_dict[skipped_iters_key] + } + wandb_metrics |= {"training/nan_iterations": total_loss_dict[nan_iters_key]} + wandb.log(wandb_metrics) + if timers is not None: + timers.log(timers_to_log, normalizer=args.log_interval) return report_memory_flag diff --git a/megatron/utils.py b/megatron/utils.py index c1b43624d3..31c8e20508 100644 --- a/megatron/utils.py +++ b/megatron/utils.py @@ -7,17 +7,21 @@ import os import time import logging -from typing import ContextManager, Optional +from typing import Optional +# from ezpz.dist import get_rank import torch from torch.nn.parallel import DistributedDataParallel as torchDDP from deepspeed.accelerator import get_accelerator -if get_accelerator().device_name() == "cuda": +ACCELERATOR = get_accelerator() +assert ACCELERATOR is not None + +if ACCELERATOR.device_name() == "cuda": try: - from apex.multi_tensor_apply import multi_tensor_applier - import amp_C + from apex.multi_tensor_apply import multi_tensor_applier # type: ignore + import amp_C # type:ignore HAS_APEX = True except Exception: @@ -38,19 +42,15 @@ _DLIO_PROFILER_EXIST = True try: - import dlio_profiler -except: + import dlio_profiler # type: ignore +except Exception: _DLIO_PROFILER_EXIST = False if _DLIO_PROFILER_EXIST: - from dlio_profiler.logger import fn_interceptor as Profile - from dlio_profiler.logger import dlio_logger as PerfTrace + from dlio_profiler.logger import fn_interceptor as Profile # type:ignore + from dlio_profiler.logger import dlio_logger as PerfTrace # type:ignore else: from functools import wraps - # from contextlib import nullcontext - # Profile: ContextManager = nullcontext - # - # class Profile(nullable_schema) class Profile: def __init__(self, type="PROFILER"): @@ -70,7 +70,6 @@ def __exit__(self, *args, **kwargs): dt = time.perf_counter() - self._start log.info(f"{self.type} took: {dt:.6f}s") - class dlio_logger: def __init__( self, @@ -87,9 +86,9 @@ def iter(self, a): def get_logger( - name: str, - level: str = "INFO", - rank_zero_only: Optional[bool] = None, + name: str, + level: str = "INFO", + rank_zero_only: Optional[bool] = None, ) -> logging.Logger: """Returns a `logging.Logger` object. @@ -105,7 +104,8 @@ def get_logger( def update_rotary_pos_emb(seq_length): args = get_args() - assert args is not None + accelerator = get_accelerator() + assert args is not None and accelerator is not None rotary_dim = ( args.hidden_size // args.num_attention_heads if args.kv_channels is None @@ -119,7 +119,7 @@ def update_rotary_pos_emb(seq_length): # Wang and Komatsuzaki et al # https://github.com/kingoflolz/mesh-transformer-jax/ rotary_pos_emb = RotaryEmbedding(rotary_dim, theta=args.rope_theta)(seq_length).to( - get_accelerator().current_device_name() + accelerator.current_device_name() ) args.rotary_pos_emb = rotary_pos_emb @@ -189,21 +189,22 @@ def average_losses_across_data_parallel_group(losses): def report_memory(name): """Simple GPU memory report.""" + accelerator = get_accelerator() + assert accelerator is not None mega_bytes = 1024.0 * 1024.0 string = name + " memory (MB)" - string += " | allocated: {}".format( - get_accelerator().memory_allocated() / mega_bytes - ) + string += " | allocated: {}".format(accelerator.memory_allocated() / mega_bytes) string += " | max allocated: {}".format( - get_accelerator().max_memory_allocated() / mega_bytes - ) - string += " | reserved: {}".format(get_accelerator().memory_reserved() / mega_bytes) - string += " | max reserved: {}".format( - get_accelerator().max_memory_reserved() / mega_bytes + accelerator.max_memory_allocated() / mega_bytes ) + reserved = accelerator.memory_reserved() + max_reserved = accelerator.max_memory_reserved() + if reserved is not None: + string += " | reserved: {}".format(reserved / mega_bytes) + if max_reserved is not None: + string += " | max reserved: {}".format(max_reserved / mega_bytes) if mpu.get_data_parallel_rank() == 0: log.info(f"[Rank {RANK}] {string}") - # log.info("[Rank {}] {}".format(torch.distributed.get_rank(), string)) # , flush=True) def print_params_min_max_norm(optimizer, iteration): @@ -222,19 +223,19 @@ def print_params_min_max_norm(optimizer, iteration): iteration, rank, index, int(param.tensor_model_parallel) ) string += "{:.6E}, {:.6E}, {:.6E}\n".format(min_, max_, norm) - # print(string, flush=True) log.info(string) def check_adlr_autoresume_termination(iteration, model, optimizer, opt_param_scheduler): """Check for autoresume signal and exit if it is received.""" from megatron.checkpointing import save_checkpoint + args = get_args() assert args is not None autoresume = get_adlr_autoresume() # Add barrier to ensure consistnecy. torch.distributed.barrier() - if autoresume.termination_requested(): + if autoresume is not None and autoresume.termination_requested(): if args.save: save_checkpoint(iteration, model, optimizer, opt_param_scheduler) print_rank_0(">>> autoresume termination request found!") @@ -265,18 +266,9 @@ def get_ltor_masks_and_position_ids( attention_mask = None if not skip_mask: -# <<<<<<< HEAD -# attention_mask = torch.tril( -# torch.ones((att_mask_batch, seq_length, seq_length)) -# ).view(att_mask_batch, 1, seq_length, seq_length) -# ======= attention_mask = torch.tril( - torch.ones( - (att_mask_batch, seq_length, seq_length), - device=data.device - ) + torch.ones((att_mask_batch, seq_length, seq_length), device=data.device) ).view(att_mask_batch, 1, seq_length, seq_length) -# >>>>>>> 0d6e3793a1fc06eded9764ef15ad12bcc0281101 # Loss mask. loss_mask = torch.ones(data.size(), dtype=torch.float, device=data.device) @@ -304,27 +296,21 @@ def get_ltor_masks_and_position_ids( for j in range(eod_index.size()[0]): i = eod_index[j] # Mask attention loss. - if reset_attention_mask and not skip_mask: + if ( + reset_attention_mask + and not skip_mask + and attention_mask is not None + ): attention_mask[b, 0, (i + 1) :, : (i + 1)] = 0 # Reset positions. if reset_position_ids: position_ids[b, (i + 1) :] -= i + 1 - prev_index prev_index = i + 1 - # # Convert attention mask to binary: - # if not skip_mask: - # <<<<<<< HEAD - # assert attention_mask is not None - # attention_mask = attention_mask < 0.5 - # attention_mask = attention_mask.to(data.device) - # ======= - # attention_mask = (attention_mask < 0.5) - # >>>>>>> 0d6e3793a1fc06eded9764ef15ad12bcc0281101 - # Convert attention mask to binary: if not skip_mask: assert attention_mask is not None - attention_mask = (attention_mask < 0.5) + attention_mask = attention_mask < 0.5 return attention_mask, loss_mask, position_ids @@ -364,10 +350,7 @@ def is_rank_0(): if torch.distributed.is_initialized(): if torch.distributed.get_rank() == 0 or ( is_aml() - and ( - torch.distributed.get_rank() - % get_accelerator().device_count() - ) == 0 + and (torch.distributed.get_rank() % get_accelerator().device_count()) == 0 ): return True else: @@ -396,6 +379,37 @@ def get_parameters_in_billions(model): return approx_parameters_in_billions * gpus_per_model / (1e9) +def num_floating_point_operations(args, batch_size): + # Group Query Attention. + # if not args.group_query_attention: + if not args.num_key_value_heads: + args.num_key_value_heads = args.num_attention_heads + # args.num_query_groups = args.num_attention_heads + # MoE. + # num_experts_routed_to = 1 if args.num_experts is None else args.moe_router_topk + num_experts_routed_to = 1 if args.num_experts is None else args.topk + gated_linear_multiplier = 3 / 2 if args.swiglu else 1 + return ( + 12 + * batch_size + * args.seq_length + * args.num_layers + * args.hidden_size + * args.hidden_size + * ( + 1 + + ( + (args.ffn_hidden_size / args.hidden_size) + * num_experts_routed_to + * gated_linear_multiplier + ) + + (args.num_key_value_heads / args.num_attention_heads) + + (args.seq_length / args.hidden_size) + + (args.padded_vocab_size / (2 * args.num_layers * args.hidden_size)) + ) + ) + + def throughput_calculator(model, args, iteration_time, total_iterations): batch_size = ( args.micro_batch_size * get_num_microbatches() * args.data_parallel_size @@ -419,55 +433,47 @@ def throughput_calculator(model, args, iteration_time, total_iterations): # General TFLOPs formula (borrowed from Equation 3 in Section 5.1 of # https://arxiv.org/pdf/2104.04473.pdf). -# <<<<<<< HEAD -# # The factor of 4 is when used with activation check-pointing, -# # otherwise it will be 3. -# checkpoint_activations_factor = 3 -# if hasattr(args, "checkpoint_activations") and args.checkpoint_activations: -# checkpoint_activations_factor = 4 -# if hasattr(args, "recompute_granularity") and ( -# args.recompute_granularity == "selective" -# or args.recompute_granularity == "full" -# ): -# checkpoint_activations_factor = 4 -# ======= # correction has been made to TFLOPs formula due to incorrect behavior # observed with selective recompute when GQA not used and for all with GQA -# >>>>>>> 0d6e3793a1fc06eded9764ef15ad12bcc0281101 seq_len = args.seq_length if hasattr(args, "actual_seq_length"): seq_len = args.actual_seq_length -# <<<<<<< HEAD -# flops_per_iteration = ( -# 24 -# * checkpoint_activations_factor -# * batch_size -# * seq_len -# * num_layers -# * (hidden_size**2) -# ) * ( -# 1.0 -# + (seq_len / (6.0 * hidden_size)) -# + (vocab_size / (16.0 * num_layers * hidden_size)) -# ) -# ======= - - pre_and_post_mha_gemm_macs = batch_size * num_layers * (1 + (2 // gqa) + 1) * (hidden_size**2) * seq_len - mha_bgemm_macs = batch_size * num_layers * 2 * head_dim * num_attention_heads * (seq_len**2) - ffn_gemm_macs = batch_size * num_layers * ffn_multiplier * ffn_hidden_size * hidden_size * seq_len + pre_and_post_mha_gemm_macs = ( + batch_size * num_layers * (1 + (2 // gqa) + 1) * (hidden_size**2) * seq_len + ) + mha_bgemm_macs = ( + batch_size * num_layers * 2 * head_dim * num_attention_heads * (seq_len**2) + ) + ffn_gemm_macs = ( + batch_size + * num_layers + * ffn_multiplier + * ffn_hidden_size + * hidden_size + * seq_len + ) logit_lmhead_gemm_macs = batch_size * vocab_size * hidden_size * seq_len - fwd_macs = pre_and_post_mha_gemm_macs + mha_bgemm_macs + ffn_gemm_macs + logit_lmhead_gemm_macs + fwd_macs = ( + pre_and_post_mha_gemm_macs + + mha_bgemm_macs + + ffn_gemm_macs + + logit_lmhead_gemm_macs + ) bwd_macs = 2 * fwd_macs fwd_bwd_macs = fwd_macs + bwd_macs - if (hasattr(args, 'checkpoint_activations') and args.checkpoint_activations) or (hasattr(args, 'recompute_granularity') and args.recompute_granularity == 'full'): + if (hasattr(args, "checkpoint_activations") and args.checkpoint_activations) or ( + hasattr(args, "recompute_granularity") and args.recompute_granularity == "full" + ): fwd_bwd_macs += fwd_macs - if hasattr(args, 'recompute_granularity') and args.recompute_granularity == 'selective': + if ( + hasattr(args, "recompute_granularity") + and args.recompute_granularity == "selective" + ): fwd_bwd_macs += mha_bgemm_macs flops_per_iteration = fwd_bwd_macs * macs_per_flops -# >>>>>>> 0d6e3793a1fc06eded9764ef15ad12bcc0281101 tflops = flops_per_iteration / (elapsed_time_per_iter * args.world_size * (10**12)) return samples_per_second, tflops, approx_parameters_in_billions @@ -517,58 +523,6 @@ def dump_position_embed_weights(preamble, iteration, model): ) -# def dump_weights(preamble, iteration, model, optimizer, tensor=None): -# # return -# tp_rank = mpu.get_tensor_model_parallel_rank() -# pp_rank = mpu.get_pipeline_model_parallel_rank() -# dp_rank = mpu.get_data_parallel_rank() -# dp_size = mpu.get_data_parallel_world_size() -# fn = f"debug-bf16-{iteration}-pp{pp_rank}-tp{tp_rank}-dp{dp_rank}-{preamble}.txt" -# # only care for first and last pp stages and dp0 tp0 -# # if not (mpu.is_pipeline_first_stage() or mpu.is_pipeline_last_stage()): -# # return -# # if not (tp_rank == 0 and dp_rank == 0): -# # return -# if tensor is not None: -# orig_tensor = tensor -# if hasattr(tensor, "_hp_param"): -# numel = tensor._hp_param.numel() # // dp_size -# tensor = tensor.flatten().narrow(0, 0, numel) -# # print(fn) -# with open(fn, "w") as fh: -# fh.write(f"{get_fingerprint_header()}\n") -# if tensor is not None: -# fh.write(f"{get_fingerprint(tensor)} tensor {tensor.shape}\n") -# else: -# for n, p in model[0].named_parameters(): -# fh.write(f"{get_fingerprint(p)} {n} {p.shape}\n") -# return -# # until we figure out how to dump the actual fp32 values don't do this -# fn = f"debug-fp32-{iteration}-pp{pp_rank}-tp{tp_rank}-dp{dp_rank}-{preamble}.txt" -# with open(fn, "w") as fh: -# fh.write(f"{get_fingerprint_header()}\n") -# if tensor is not None: -# tensor = orig_tensor -# if hasattr(tensor, "_hp_param"): -# fh.write( -# f"{get_fingerprint(tensor._hp_param)} tensor {tensor._hp_param.shape}\n" -# ) -# # fh.write(f"{get_fingerprint(tensor._hp_grad)} tensor grad\n") -# else: -# fh.write(f"{get_fingerprint(tensor)} tensor {tensor.shape}\n") -# # fh.write(f"{get_fingerprint(tensor.grad)} tensor grad\n") -# else: -# if hasattr(model[0].module.tied_modules, "embed"): -# p = model[0].module.tied_modules.embed.word_embeddings.weight._hp_param -# <<<<<<< HEAD -# fh.write( -# f"{get_fingerprint(p)} module.tied_modules.embed.word_embeddings.weight._hp_param {p.shape}\n" -# ) -# ======= -# fh.write(f"{get_fingerprint(p)} module.tied_modules.embed.word_embeddings.weight._hp_param {p.shape}\n") -# - - def dump_weights(preamble, iteration, model, optimizer, tensor=None): # return tp_rank = mpu.get_tensor_model_parallel_rank() @@ -578,19 +532,19 @@ def dump_weights(preamble, iteration, model, optimizer, tensor=None): fn = f"debug-bf16-{iteration}-pp{pp_rank}-tp{tp_rank}-dp{dp_rank}-{preamble}.txt" # only care for first and last pp stages and dp0 tp0 - #if not (mpu.is_pipeline_first_stage() or mpu.is_pipeline_last_stage()): + # if not (mpu.is_pipeline_first_stage() or mpu.is_pipeline_last_stage()): # return - #if not (tp_rank == 0 and dp_rank == 0): + # if not (tp_rank == 0 and dp_rank == 0): # return if tensor is not None: orig_tensor = tensor if hasattr(tensor, "_hp_param"): - numel = tensor._hp_param.numel() # // dp_size + numel = tensor._hp_param.numel() # // dp_size tensor = tensor.flatten().narrow(0, 0, numel) - #print(fn) + # print(fn) with open(fn, "w") as fh: fh.write(f"{get_fingerprint_header()}\n") @@ -600,7 +554,6 @@ def dump_weights(preamble, iteration, model, optimizer, tensor=None): for n, p in model[0].named_parameters(): fh.write(f"{get_fingerprint(p)} {n} {p.shape}\n") - # # # until we figure out how to dump the actual fp32 values don't do this # fn = f"debug-fp32-{iteration}-pp{pp_rank}-tp{tp_rank}-dp{dp_rank}-{preamble}.txt" # with open(fn, "w") as fh: From 7f71572ab0d916dcd0989e9fad014391c04912e1 Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Fri, 13 Sep 2024 12:24:06 -0500 Subject: [PATCH 48/92] Update `megatron/training_log.py` --- megatron/training_log.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/megatron/training_log.py b/megatron/training_log.py index be2eacd059..cd6638e17d 100644 --- a/megatron/training_log.py +++ b/megatron/training_log.py @@ -397,7 +397,7 @@ def training_log( opt_stats, group=mpu.get_pipeline_model_parallel_group() ) # opt_stats_2 = opt_stats_2.clone().detach() - opt_stats_2 = accelerator.get_accelerator().FloatTensor(opt_stats_2) + opt_stats_2 = accelerator.FloatTensor(opt_stats_2) torch.distributed.all_reduce( opt_stats_2, op=torch.distributed.ReduceOp.MAX, From 7cb9c1137f3bee4e84071c2b58fffdd301b44c54 Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Sat, 14 Sep 2024 21:30:23 -0500 Subject: [PATCH 49/92] Update `pretrain_gpt_alcf.py` --- pretrain_gpt_alcf.py | 336 ++++++++++++++++++++++--------------------- 1 file changed, 171 insertions(+), 165 deletions(-) diff --git a/pretrain_gpt_alcf.py b/pretrain_gpt_alcf.py index 04018d7918..4a6c3453da 100644 --- a/pretrain_gpt_alcf.py +++ b/pretrain_gpt_alcf.py @@ -2,7 +2,9 @@ """Pretrain GPT""" import time +from typing import Callable, Type from mpi4py import MPI + comm = MPI.COMM_WORLD comm.Barrier() python_start_time = time.time() @@ -14,6 +16,7 @@ import math from functools import partial from megatron import get_args + # from megatron import print_rank_0 from megatron import get_timers from megatron import get_tokenizer @@ -23,14 +26,19 @@ from megatron.model import GPTModel, GPTModelPipe from megatron.training import pretrain from megatron.utils import get_ltor_masks_and_position_ids -from megatron.utils import average_losses_across_data_parallel_group, update_rotary_pos_emb +from megatron.utils import ( + average_losses_across_data_parallel_group, + update_rotary_pos_emb, +) from megatron.arguments import core_transformer_config_from_args + # from megatron.utils import Profile, PerfTrace import logging import deepspeed from deepspeed.runtime.utils import see_memory_usage + # from deepspeed.accelerator.real_accelerator import get_accelerator import subprocess import wandb @@ -38,7 +46,8 @@ from torch import nn import torch.nn.functional as F import ezpz as ez -dt_imports = time.time() - python_start_time + +dt_imports = time.time() - python_start_time t0_setup = time.time() # ---- [SETUP COMMS] ------------------------ @@ -62,19 +71,12 @@ log.info(f"ez.setup_torch time: {dt_setup} seconds") # ---- [SETUP WANDB FROM RANK 0] -------------- -WANDB_MODE = os.environ.get('WANDB_MODE', None) -DISABLE_WANDB = ( - WANDB_MODE is not None and str(WANDB_MODE).lower() == 'disabled' -) +WANDB_MODE = os.environ.get("WANDB_MODE", None) +DISABLE_WANDB = WANDB_MODE is not None and str(WANDB_MODE).lower() == "disabled" if RANK == 0 and not DISABLE_WANDB: - project_name = ( - os.environ.get( - 'WB_PROJECT', # look for WB_PROJECT in env - os.environ.get( - 'WANDB_PROJECT', # look for WANDB_PROJECT in env - 'AuroraGPT' - ), - ) + project_name = os.environ.get( + "WB_PROJECT", # look for WB_PROJECT in env + os.environ.get("WANDB_PROJECT", "AuroraGPT"), # look for WANDB_PROJECT in env ) log.info(f"Setting up W&B from: {RANK} with {project_name}") _ = ez.setup_wandb(project_name=project_name) @@ -83,16 +85,16 @@ @ez.dist.timeitlogit(rank=RANK) def model_provider(pre_process=True, post_process=True): """Build the model.""" - log.info('building GPT model ...') + log.info("building GPT model ...") see_memory_usage("Before Building Model", force=True) args = get_args() assert args is not None config = core_transformer_config_from_args(args) # if RANK == 0: # git_ds_info() - if hasattr(mpu, 'get_sequence_data_parallel_group'): + if hasattr(mpu, "get_sequence_data_parallel_group"): dpg = mpu.get_sequence_data_parallel_group() - elif hasattr(mpu, 'get_data_parallel_group'): + elif hasattr(mpu, "get_data_parallel_group"): dpg = mpu.get_data_parallel_group() else: dpg = None @@ -100,20 +102,14 @@ def model_provider(pre_process=True, post_process=True): if args.use_mics: deepspeed_zero_init = deepspeed.zero.MiCS_Init with deepspeed_zero_init( - data_parallel_group=dpg, - remote_device=( - None if args.remote_device == 'none' else args.remote_device - ), - config_dict_or_path=args.deepspeed_config_dict, - enabled=args.zero_stage == 3, - mpu=mpu + data_parallel_group=dpg, + remote_device=(None if args.remote_device == "none" else args.remote_device), + config_dict_or_path=args.deepspeed_config_dict, + enabled=args.zero_stage == 3, + mpu=mpu, ): if args.deepspeed and not args.no_pipeline_parallel: - model = GPTModelPipe( - config=config, - num_tokentypes=0, - parallel_output=True - ) + model = GPTModelPipe(config=config, num_tokentypes=0, parallel_output=True) # This is a hack to give us a reference to # get_batch_pipe from within training.py # We need to call model.set_batch_fn after deepspeed.initialize @@ -129,7 +125,7 @@ def model_provider(pre_process=True, post_process=True): ) ).view(1, 1, args.seq_length, args.seq_length) # Convert attention mask to binary: - attention_mask = (attention_mask < 0.5) + attention_mask = attention_mask < 0.5 if args.fp16: attention_mask = attention_mask.half() elif args.bf16: @@ -146,37 +142,33 @@ def model_provider(pre_process=True, post_process=True): num_tokentypes=0, parallel_output=True, pre_process=pre_process, - post_process=post_process + post_process=post_process, ) num_params = sum(p.numel() for p in model.parameters() if p.requires_grad) - log.info(80 * '-') + log.info(80 * "-") log.info(f"Number of parameters in model: {num_params}") - log.info(80 * '-') + log.info(80 * "-") see_memory_usage("After Building Model", force=True) - if wandb is not None and getattr(wandb, 'run', None) is not None: + if wandb is not None and getattr(wandb, "run", None) is not None: assert wandb.run is not None tbdir = args.tensorboard_dir # tbdir = args.getattr('tensorboard_dir', None) if tbdir is not None: try: - log.info(f'Patching tensorboard from {tbdir}') + log.info(f"Patching tensorboard from {tbdir}") wandb.tensorboard.patch(root_logdir=tbdir) except ValueError as exc: log.exception(exc) - log.warning('Continuing without patching tensorboard!') - wandb.run.config.update({'num_params': num_params}) + log.warning("Continuing without patching tensorboard!") + wandb.run.config.update({"num_params": num_params}) if "args" not in wandb.run.config: log.info( f"Updating WandB run.config: [{wandb.run.name}]({wandb.run.get_url()})" ) try: - wandb.run.config.update( - {"args": dict(sorted(vars(args).items()))} - ) + wandb.run.config.update({"args": dict(sorted(vars(args).items()))}) except Exception: - log.error( - 'Unable to `wandb.run.config.update({"args": vars(args)})`' - ) + log.error('Unable to `wandb.run.config.update({"args": vars(args)})`') # try: # wandb.run.watch( # model, @@ -194,7 +186,7 @@ def get_batch(data_iterator): tokenizer = get_tokenizer() assert args is not None and tokenizer is not None # Items and their type. - keys = ['text'] + keys = ["text"] datatype = torch.int64 data = next(data_iterator) if data_iterator is not None else None # # Broadcast data. @@ -204,7 +196,7 @@ def get_batch(data_iterator): # data = None data_b = tensor_parallel.broadcast_data(keys, data, datatype) # Unpack. - tokens_ = data_b['text'].long() + tokens_ = data_b["text"].long() labels = tokens_[:, 1:].contiguous() tokens = tokens_[:, :-1].contiguous() # Get the masks and postition ids. @@ -215,7 +207,8 @@ def get_batch(data_iterator): args.reset_position_ids, args.reset_attention_mask, args.eod_mask_loss, - skip_mask) + skip_mask, + ) # For DS's sequence parallel seq_parallel_world_size = mpu.get_sequence_parallel_world_size() seq_parallel_world_rank = mpu.get_sequence_parallel_rank() @@ -240,24 +233,37 @@ def data_post_process(data, data_sampler_state_dict): args = get_args() assert args is not None if args.data_efficiency_curriculum_learning: - if 'seqlen_truncate' in data_sampler_state_dict['current_difficulties']: - args.data_efficiency_curriculum_learning_seqlen_type = 'seqlen_truncate' - current_seqlen = data_sampler_state_dict['current_difficulties']['seqlen_truncate'] + if "seqlen_truncate" in data_sampler_state_dict["current_difficulties"]: + args.data_efficiency_curriculum_learning_seqlen_type = "seqlen_truncate" + current_seqlen = data_sampler_state_dict["current_difficulties"][ + "seqlen_truncate" + ] if current_seqlen < args.seq_length: - data['text'] = data['text'][:, :(current_seqlen+1)].contiguous() - elif 'seqlen_reshape' in data_sampler_state_dict['current_difficulties']: - args.data_efficiency_curriculum_learning_seqlen_type = 'seqlen_reshape' - current_seqlen = data_sampler_state_dict['current_difficulties']['seqlen_reshape'] + data["text"] = data["text"][:, : (current_seqlen + 1)].contiguous() + elif "seqlen_reshape" in data_sampler_state_dict["current_difficulties"]: + args.data_efficiency_curriculum_learning_seqlen_type = "seqlen_reshape" + current_seqlen = data_sampler_state_dict["current_difficulties"][ + "seqlen_reshape" + ] if current_seqlen < args.seq_length: - orig_num_token = torch.numel(data['text']) - reshape_len = (data['text'].size()[1] // (current_seqlen+1)) * (current_seqlen+1) - data['text'] = torch.cat((data['text'][:, :reshape_len].contiguous().view(-1, current_seqlen+1), - data['text'][:, -(current_seqlen+1):]), 0).contiguous() - num_row = math.ceil(orig_num_token / (current_seqlen+1)) - num_row = min(num_row, data['text'].size()[0]) + orig_num_token = torch.numel(data["text"]) + reshape_len = (data["text"].size()[1] // (current_seqlen + 1)) * ( + current_seqlen + 1 + ) + data["text"] = torch.cat( + ( + data["text"][:, :reshape_len] + .contiguous() + .view(-1, current_seqlen + 1), + data["text"][:, -(current_seqlen + 1) :], + ), + 0, + ).contiguous() + num_row = math.ceil(orig_num_token / (current_seqlen + 1)) + num_row = min(num_row, data["text"].size()[0]) if num_row > 1 and num_row % 2 != 0: num_row -= 1 - data['text'] = data['text'][:num_row, :].contiguous() + data["text"] = data["text"][:num_row, :].contiguous() else: args.data_efficiency_curriculum_learning_seqlen_type = None return data @@ -272,12 +278,12 @@ def get_batch_pipe(data): tokenizer = get_tokenizer() assert args is not None # Items and their type. - keys = ['text'] + keys = ["text"] datatype = torch.int64 # Broadcast data. data_b = tensor_parallel.broadcast_data(keys, data, datatype) # Unpack. - tokens_ = data_b['text'].long() + tokens_ = data_b["text"].long() labels = tokens_[:, 1:].contiguous() tokens = tokens_[:, :-1].contiguous() # Get the masks and postition ids. @@ -286,19 +292,17 @@ def get_batch_pipe(data): tokenizer.eod, args.reset_position_ids, args.reset_attention_mask, - args.eod_mask_loss) - if ( - args.curriculum_learning_legacy - and args.curriculum_seqlen < tokens.size()[1] - ): + args.eod_mask_loss, + ) + if args.curriculum_learning_legacy and args.curriculum_seqlen < tokens.size()[1]: # seqlen-based curriculum learning # tokens, position_ids, labels, loss_mask # have size [batch size, seqlen] - tokens = tokens[:, :args.curriculum_seqlen].contiguous() - position_ids = position_ids[:, :args.curriculum_seqlen].contiguous() + tokens = tokens[:, : args.curriculum_seqlen].contiguous() + position_ids = position_ids[:, : args.curriculum_seqlen].contiguous() if labels is not None: - labels = labels[:, :args.curriculum_seqlen].contiguous() - loss_mask = loss_mask[:, :args.curriculum_seqlen].contiguous() + labels = labels[:, : args.curriculum_seqlen].contiguous() + loss_mask = loss_mask[:, : args.curriculum_seqlen].contiguous() return (tokens, position_ids, attention_mask), (labels, loss_mask) @@ -315,37 +319,32 @@ def loss_func(loss_mask, moe_loss, mos_loss, output_tensor): loss = loss + moe_loss + mos_loss if args.mos: return loss, { - 'total loss': loss, - 'lm loss': averaged_loss[0], - 'moe loss': moe_loss, - 'mos loss': mos_loss + "total loss": loss, + "lm loss": averaged_loss[0], + "moe loss": moe_loss, + "mos loss": mos_loss, } elif args.kd: return loss, { - 'total loss': loss, - 'lm loss': averaged_loss[0], - 'moe loss': moe_loss, - 'kd loss': mos_loss + "total loss": loss, + "lm loss": averaged_loss[0], + "moe loss": moe_loss, + "kd loss": mos_loss, } log.info( - f'>>> total loss: {loss}, ' - f'lm loss {averaged_loss[0]}, ' - f'kd loss {mos_loss}' + f">>> total loss: {loss}, " + f"lm loss {averaged_loss[0]}, " + f"kd loss {mos_loss}" ) else: if max(args.num_experts) <= 1: - return loss, {'lm loss': averaged_loss[0]} + return loss, {"lm loss": averaged_loss[0]} loss = loss + moe_loss - return loss, {'lm loss': averaged_loss[0], 'moe loss': moe_loss} + return loss, {"lm loss": averaged_loss[0], "moe loss": moe_loss} def calculate_mos_loss( - args, - stu_output, - teacher_model, - tokens, - position_ids, - attention_mask + args, stu_output, teacher_model, tokens, position_ids, attention_mask ): mos_loss = 0 alpha = args.kd_alpha_ce @@ -354,29 +353,25 @@ def calculate_mos_loss( if teacher_model: with torch.no_grad(): if ( - args.curriculum_learning_legacy and - args.curriculum_seqlen < args.seq_length + args.curriculum_learning_legacy + and args.curriculum_seqlen < args.seq_length ): assert args.curriculum_seqlen is not None curriculum_seqlen = args.curriculum_seqlen tokens = tokens[:, :curriculum_seqlen].contiguous() position_ids = position_ids[:, :curriculum_seqlen].contiguous() csl = curriculum_seqlen - attention_mask = ( - attention_mask[:, :, :csl, :csl].contiguous() - ) + attention_mask = attention_mask[:, :, :csl, :csl].contiguous() # No need to truncate labels # as we do not need it for the teacher logits tea_output, tea_other_losses = teacher_model( - tokens, - position_ids, - attention_mask + tokens, position_ids, attention_mask ) assert stu_output.size() == tea_output.size(), ( - 'teacher and student output should match in size. ' - f'Student: {stu_output.size()}, ' - f'Teacher: {tea_output.size()}, ' - f'CL seq length {args.curriculum_seqlen}' + "teacher and student output should match in size. " + f"Student: {stu_output.size()}, " + f"Teacher: {tea_output.size()}, " + f"CL seq length {args.curriculum_seqlen}" ) student_logits = F.log_softmax(stu_output / kd_temp, dim=2) # The target logits is expected to be probabilities. @@ -384,67 +379,81 @@ def calculate_mos_loss( # then we need to set target_log to true # when initializing the KLDivLoss. tea_logits = F.softmax(tea_output / kd_temp, dim=2) - mos_loss = kd_temp * kd_temp * nn.KLDivLoss(reduction='batchmean')( - student_logits, - tea_logits + mos_loss = ( + kd_temp + * kd_temp + * nn.KLDivLoss(reduction="batchmean")(student_logits, tea_logits) ) mos_loss = mos_loss.div(args.seq_length) * beta return mos_loss -def forward_step(data_iterator, model): +# ForwardStepOutput = Type[tuple[torch.Tensor | None, Callable[[torch.Tensor], torch.Tensor | None]]] + + +def _return_none(_: torch.Tensor) -> torch.Tensor | None: + return None + + +def forward_step(data_iterator, model) -> tuple[torch.Tensor | None, Callable]: """Forward step.""" args = get_args() timers = get_timers() assert args is not None assert timers is not None # Get the batch. - timers('batch-generator', log_level=2).start() - tokens, labels, loss_mask, attention_mask, position_ids = get_batch( - data_iterator - ) - timers('batch-generator').stop() + timers("batch-generator", log_level=2).start() + tokens, labels, loss_mask, attention_mask, position_ids = get_batch(data_iterator) + timers("batch-generator").stop() + ranges_to_skip = None + if args.train_range_to_skip is not None: + assert ( + len(args.train_range_to_skip) % 2 == 0 + ), f"""Expected --train-range-to-skip to have an even number of values. + Received: {len(args.train_range_to_skip)} + """ + ranges_to_skip = list( + zip( + args.train_range_to_skip[::2], + args.train_range_to_skip[1::2], + ) + ) + if ranges_to_skip is not None and any( + [i <= (args.iteration + 1) <= j for (i, j) in ranges_to_skip] + ): + log.info( + f"Caught {args.iteration} in 'forward_step', {tokens.shape()=}, {args.consumed_train_tokens=}'" + ) + # log.info(f"Caught {args.iteration + 1} in 'ranges_to_skip', skipping!" + # return (None, _return_none) + return ( + torch.tensor([0.0], device=tokens.device), + lambda _: torch.Tensor([0.0], device=tokens.device), + # lambda _: return torch.Tensor([0.0], deviec=tokens.device), + ) if args.data_efficiency_curriculum_learning: args.curriculum_seqlen = tokens.size()[1] - if ( - hasattr( - args, - 'data_efficiency_curriculum_learning_seqlen_type') - and ( - args.data_efficiency_curriculum_learning_seqlen_type - == 'seqlen_reshape' - ) + if hasattr(args, "data_efficiency_curriculum_learning_seqlen_type") and ( + args.data_efficiency_curriculum_learning_seqlen_type == "seqlen_reshape" ): - args.data_efficiency_curriculum_learning_numel = ( - torch.numel(tokens) - ) + args.data_efficiency_curriculum_learning_numel = torch.numel(tokens) stu_output = None if args.mos or args.kd: # The forward func can return either the loss or the logits, # depending on whether passing in the labels or not. stu_output, other_losses = model(tokens, position_ids, attention_mask) - if ( - args.curriculum_learning_legacy - and args.curriculum_seqlen < args.seq_length - ): + if args.curriculum_learning_legacy and args.curriculum_seqlen < args.seq_length: assert args.curriculum_seqlen is not None - labels = labels[:, :args.curriculum_seqlen].contiguous() + labels = labels[:, : args.curriculum_seqlen].contiguous() output_tensor = tensor_parallel.vocab_parallel_cross_entropy( - stu_output.contiguous().float(), - labels + stu_output.contiguous().float(), labels ) else: output_tensor, other_losses = model( - tokens, - position_ids, - attention_mask, - labels=labels + tokens, position_ids, attention_mask, labels=labels ) - if ( - args.curriculum_learning_legacy and - args.curriculum_seqlen < args.seq_length - ): - loss_mask = loss_mask[:, :args.curriculum_seqlen].contiguous() + if args.curriculum_learning_legacy and args.curriculum_seqlen < args.seq_length: + loss_mask = loss_mask[:, : args.curriculum_seqlen].contiguous() moe_losses = [] for moe_loss in other_losses: @@ -462,7 +471,7 @@ def forward_step(data_iterator, model): args.teacher_model[0], tokens, position_ids, - attention_mask + attention_mask, ) # Output_tensor stores the standard loss, @@ -479,7 +488,7 @@ def train_valid_test_datasets_provider(train_val_test_num_samples): # from ezpz.profile import get_context_manager # cm = get_context_manager(rank=RANK, outdir=args.save) # with cm: - log.info('> building train, validation, and test datasets for GPT ...') + log.info("> building train, validation, and test datasets for GPT ...") files = [] if args.data_file_list is not None: log.info(f"Reading datasets from {args.data_file_list}") @@ -492,7 +501,7 @@ def train_valid_test_datasets_provider(train_val_test_num_samples): # - `/path/to/data_text_document` is the path to the text document # - `corpus` is the corpus (~ source, can be made up) where that # document came from (i.e. `books`, `arxiv`, etc.) - with open(args.data_file_list, 'r') as flist: + with open(args.data_file_list, "r") as flist: for f in flist.readlines(): if len(f.strip()) != 0: try: @@ -505,17 +514,11 @@ def train_valid_test_datasets_provider(train_val_test_num_samples): ) if fname.find(".bin") != -1: fname = fname.split(".bin")[0] - files.extend( - [ - float(w), # weight - fname, # filename - c # corpus - ] - ) + files.extend([float(w), fname, c]) # weight # filename # corpus elif len(args.data_path) == 1 and os.path.isdir(args.data_path[0]): path = args.data_path[0] + "/" for f in os.listdir(path): - if (os.path.isfile(path + f) and f.find(".bin") != -1): + if os.path.isfile(path + f) and f.find(".bin") != -1: files.append(1) files.append(path + f.split(".bin")[0]) else: @@ -540,11 +543,7 @@ def train_valid_test_datasets_provider(train_val_test_num_samples): def command_exists(cmd): - result = subprocess.Popen( - f'type {cmd}', - stdout=subprocess.PIPE, - shell=True - ) + result = subprocess.Popen(f"type {cmd}", stdout=subprocess.PIPE, shell=True) return result.wait() == 0 @@ -552,17 +551,18 @@ def git_ds_info(): if RANK != 0: return from deepspeed.env_report import main as ds_report + ds_report() # Write out version/git info git_hash_cmd = "git rev-parse --short HEAD" git_branch_cmd = "git rev-parse --abbrev-ref HEAD" - if command_exists('git'): + if command_exists("git"): try: result = subprocess.check_output(git_hash_cmd, shell=True) - git_hash = result.decode('utf-8').strip() + git_hash = result.decode("utf-8").strip() result = subprocess.check_output(git_branch_cmd, shell=True) - git_branch = result.decode('utf-8').strip() + git_branch = result.decode("utf-8").strip() except subprocess.CalledProcessError: git_hash = "unknown" git_branch = "unknown" @@ -570,21 +570,26 @@ def git_ds_info(): git_hash = "unknown" git_branch = "unknown" print( - f'**** Git info for Megatron: ' - f'git_hash={git_hash} git_branch={git_branch} ****' + f"**** Git info for Megatron: " + f"git_hash={git_hash} git_branch={git_branch} ****" ) def main(): - if os.getenv('TORCH_PROFILER_ENABLE') == '1': + if os.getenv("TORCH_PROFILER_ENABLE") == "1": # record_function from torch.profiler import profile, ProfilerActivity + try: - activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA, ProfilerActivity.XPU] + activities = [ + ProfilerActivity.CPU, + ProfilerActivity.CUDA, + ProfilerActivity.XPU, + ] except Exception as exc: log.exception(exc) - log.warning("TORCH PROFILER WARNING: XPU is not supported") - activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA] + log.warning("TORCH PROFILER WARNING: XPU is not supported") + activities = [ProfilerActivity.CPU, ProfilerActivity.CUDA] with profile(activities=activities) as prof: model = pretrain( train_valid_test_datasets_provider, @@ -592,7 +597,7 @@ def main(): ModelType.encoder_or_decoder, forward_step, # args_defaults={'tokenizer_type': 'GPT2BPETokenizer'}, - data_post_process=data_post_process + data_post_process=data_post_process, ) args = get_args() assert args is not None @@ -606,7 +611,7 @@ def main(): ModelType.encoder_or_decoder, forward_step, # args_defaults={'tokenizer_type': 'GPT2BPETokenizer'}, - data_post_process=data_post_process + data_post_process=data_post_process, ) # try: # from megatron.text_generation import generate_and_post_process @@ -641,6 +646,7 @@ def main(): # data_post_process=data_post_process) import sys import deepspeed.comm as dist + model = main() dist.log_summary() if wandb.run is not None: From e83de19527830ade3a49f3fcfb698739a84c4ac9 Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Sat, 14 Sep 2024 21:33:05 -0500 Subject: [PATCH 50/92] Update `megatron/training_log.py` --- megatron/training_log.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/megatron/training_log.py b/megatron/training_log.py index cd6638e17d..24b8015264 100644 --- a/megatron/training_log.py +++ b/megatron/training_log.py @@ -645,6 +645,12 @@ def training_log( ) log_string += " [LM]TFLOPs={:.2f} |".format(tflops_lm_per_gpu) log_string += " [DS]TFLOPs={:.2f} |".format(tflops) + if wandb is not None and getattr(wandb, "run", None) is not None: + wandb_metrics |= { + "training/skiped_iterations": total_loss_dict[skipped_iters_key] + } + wandb_metrics |= {"training/nan_iterations": total_loss_dict[nan_iters_key]} + wandb.log(wandb_metrics) total_loss_dict[advanced_iters_key] = 0 total_loss_dict[skipped_iters_key] = 0 total_loss_dict[nan_iters_key] = 0 @@ -654,12 +660,6 @@ def training_log( # Report memory after optimizer state has been initialized. report_memory("(after {} iterations)".format(iteration)) report_memory_flag = False - if wandb is not None and getattr(wandb, "run", None) is not None: - wandb_metrics |= { - "training/skiped_iterations": total_loss_dict[skipped_iters_key] - } - wandb_metrics |= {"training/nan_iterations": total_loss_dict[nan_iters_key]} - wandb.log(wandb_metrics) if timers is not None: timers.log(timers_to_log, normalizer=args.log_interval) From 29756d6e1e97464e8a7fbe1eca4cf832fa2a916a Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Sat, 14 Sep 2024 21:34:33 -0500 Subject: [PATCH 51/92] Warn if mismatch b/w iters in `megatron/checkpointing.py` --- megatron/checkpointing.py | 4 +++- megatron/training.py | 17 ++++++++++++++--- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/megatron/checkpointing.py b/megatron/checkpointing.py index b7f4b30bde..a4f82ec9d3 100644 --- a/megatron/checkpointing.py +++ b/megatron/checkpointing.py @@ -239,7 +239,9 @@ def save_checkpoint(iteration, model, optimizer, opt_param_scheduler): """Save a model checkpoint.""" args = get_args() assert args is not None - iteration = args.iteration + args_iter = args.iteration + if args_iter != iteration: + log.warning(f"{args.iteration=} != {iteration} passed to 'save_checkpoint'") save_lr_state_dict() diff --git a/megatron/training.py b/megatron/training.py index 90a1250648..668aea930c 100644 --- a/megatron/training.py +++ b/megatron/training.py @@ -1004,7 +1004,7 @@ def train( model_module.train() # Tracking loss. total_loss_dict = {} - loss_dict = {} + loss_dict = {"skipped_iter": 0} # Iterations. iteration = args.iteration # Translate args to core configuration @@ -1061,11 +1061,22 @@ def train( ): log.info(f"Caught {iteration + 1} in 'ranges_to_skip', skipping!") # total_loss_dict = {"skipped iterations": } + # loss_dict skipped_iter = 1 - total_loss_dict["skipped iterations"] += skipped_iter - grad_norm = None + # grad_norm = None num_zeros_in_grad = None num_skipped_iters += 1 + increment = ( + get_num_microbatches() * args.micro_batch_size * args.data_parallel_size + ) + model[0].skipped_steps += 1 + model[0].global_steps += 1 + model[0].micro_steps += 1 + model[0].global_samples += model[0].train_batch_size() + # model[0].step(lr_kwargs={"increment": increment}) + # grad_norm = model[0].get_global_grad_norm() + # update_successful = model[0].was_step_applied() + opt_param_scheduler.step(increment=increment) else: if os.getenv("TORCH_PROFILER_ENABLE") == "2": from torch.profiler import profile, ProfilerActivity From 1a7f03b67a260e2e7326fe3a0f12709a8908d616 Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Sun, 15 Sep 2024 20:39:24 -0500 Subject: [PATCH 52/92] fix: `try/except` for non tensors in `megatron/training_log.py` --- megatron/training_log.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/megatron/training_log.py b/megatron/training_log.py index 24b8015264..3eb96c392d 100644 --- a/megatron/training_log.py +++ b/megatron/training_log.py @@ -92,7 +92,10 @@ def training_log( + loss_dict[key] ) else: - value = loss_dict[key].float().sum().item() + try: + value = loss_dict[key].float().sum().item() + except AttributeError: + value = loss_dict[key] is_nan = value == float("inf") or value == -float("inf") or value != value got_nan = got_nan or is_nan total_loss_dict[nan_iters_key] = total_loss_dict.get(nan_iters_key, 0) + int( From 828f6a944627335336f7f6d2e348dd3989ffb021 Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Tue, 17 Sep 2024 08:17:17 -0500 Subject: [PATCH 53/92] fix: Correctly draw `grad_acc_steps` batches of data when skipping step --- megatron/training.py | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/megatron/training.py b/megatron/training.py index 668aea930c..8ffac6cb9c 100644 --- a/megatron/training.py +++ b/megatron/training.py @@ -822,6 +822,8 @@ def train_step( timers = get_timers() accelerator = get_accelerator() assert args is not None and timers is not None and accelerator is not None + grad_norm = None + num_zeros_in_grad = None if args.deepspeed and args.ds_pipeline_enabled: num_zeros_in_grad = 0 assert isinstance(model[0], deepspeed.PipelineEngine) @@ -919,6 +921,10 @@ def train_step( if args.deepspeed: skipped_iter = 0 if update_successful else 1 grad_norm = model[0].get_global_grad_norm() + # Empty unused memory. + if args.empty_unused_memory_level >= 2 and accelerator is not None: + accelerator.empty_cache() + # XXX: [saforem2]: ---------------------------------------------------- # Is `num_zeros_in_grad` worth calculating (/ implementing) ?? # the `Megatron`-specific implementation is at: @@ -1002,6 +1008,7 @@ def train( # Turn on training mode which enables dropout. for model_module in model: model_module.train() + grad_norm = None # Tracking loss. total_loss_dict = {} loss_dict = {"skipped_iter": 0} @@ -1060,12 +1067,23 @@ def train( [i <= (iteration + 1) <= j for (i, j) in ranges_to_skip] ): log.info(f"Caught {iteration + 1} in 'ranges_to_skip', skipping!") - # total_loss_dict = {"skipped iterations": } - # loss_dict skipped_iter = 1 - # grad_norm = None - num_zeros_in_grad = None num_skipped_iters += 1 + num_zeros_in_grad = None + gas = args.deepspeed_config_dict["gradient_accumulation_steps"] + for microstep in range(gas): + _batch = next(train_data_iterator) + _tokens = _batch["text"] + if ( + iteration < 10 + and os.environ.get("DUMP_SKIPPED_ITERS", None) + and RANK == 0 + ): + log.info(f"{_tokens.shape}, {len(train_data_iterator)=}") + log.info( + f"{iteration=} [{microstep}/{gas}]: ({_tokens.shape})\n{_tokens[:10]=}" + ) + increment = ( get_num_microbatches() * args.micro_batch_size * args.data_parallel_size ) @@ -1073,9 +1091,6 @@ def train( model[0].global_steps += 1 model[0].micro_steps += 1 model[0].global_samples += model[0].train_batch_size() - # model[0].step(lr_kwargs={"increment": increment}) - # grad_norm = model[0].get_global_grad_norm() - # update_successful = model[0].was_step_applied() opt_param_scheduler.step(increment=increment) else: if os.getenv("TORCH_PROFILER_ENABLE") == "2": @@ -1085,7 +1100,7 @@ def train( activities = [ ProfilerActivity.CPU, ProfilerActivity.CUDA, - ProfilerActivity.XPU, + ProfilerActivity.XPU, # type:ignore ] except Exception: log.warning("TORCH PROFILER WARNING: XPU is not supported") From 295fcb3d57a40ec513a521aa8814d99a5c8827b8 Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Tue, 17 Sep 2024 08:18:32 -0500 Subject: [PATCH 54/92] Update `pretrain_gpt_alcf.py` Remve `--train-range-to-skip` logic from `pretrain_gpt_alcf.py` and remove redundant code. --- pretrain_gpt_alcf.py | 44 +++++++++----------------------------------- 1 file changed, 9 insertions(+), 35 deletions(-) diff --git a/pretrain_gpt_alcf.py b/pretrain_gpt_alcf.py index 4a6c3453da..12a05c5299 100644 --- a/pretrain_gpt_alcf.py +++ b/pretrain_gpt_alcf.py @@ -2,13 +2,12 @@ """Pretrain GPT""" import time -from typing import Callable, Type +from typing import Callable from mpi4py import MPI comm = MPI.COMM_WORLD comm.Barrier() python_start_time = time.time() -from pathlib import Path import os from rich import print @@ -189,6 +188,14 @@ def get_batch(data_iterator): keys = ["text"] datatype = torch.int64 data = next(data_iterator) if data_iterator is not None else None + + if ( + args.iteration < 10 + and RANK == 0 + and os.environ.get("DUMP_TOKENS", None) + and data is not None + ): + log.info(f"{args.iteration=}: {data['text'][:10]=}") # # Broadcast data. # if data_iterator is not None: # data = next(data_iterator) @@ -388,13 +395,6 @@ def calculate_mos_loss( return mos_loss -# ForwardStepOutput = Type[tuple[torch.Tensor | None, Callable[[torch.Tensor], torch.Tensor | None]]] - - -def _return_none(_: torch.Tensor) -> torch.Tensor | None: - return None - - def forward_step(data_iterator, model) -> tuple[torch.Tensor | None, Callable]: """Forward step.""" args = get_args() @@ -405,32 +405,6 @@ def forward_step(data_iterator, model) -> tuple[torch.Tensor | None, Callable]: timers("batch-generator", log_level=2).start() tokens, labels, loss_mask, attention_mask, position_ids = get_batch(data_iterator) timers("batch-generator").stop() - ranges_to_skip = None - if args.train_range_to_skip is not None: - assert ( - len(args.train_range_to_skip) % 2 == 0 - ), f"""Expected --train-range-to-skip to have an even number of values. - Received: {len(args.train_range_to_skip)} - """ - ranges_to_skip = list( - zip( - args.train_range_to_skip[::2], - args.train_range_to_skip[1::2], - ) - ) - if ranges_to_skip is not None and any( - [i <= (args.iteration + 1) <= j for (i, j) in ranges_to_skip] - ): - log.info( - f"Caught {args.iteration} in 'forward_step', {tokens.shape()=}, {args.consumed_train_tokens=}'" - ) - # log.info(f"Caught {args.iteration + 1} in 'ranges_to_skip', skipping!" - # return (None, _return_none) - return ( - torch.tensor([0.0], device=tokens.device), - lambda _: torch.Tensor([0.0], device=tokens.device), - # lambda _: return torch.Tensor([0.0], deviec=tokens.device), - ) if args.data_efficiency_curriculum_learning: args.curriculum_seqlen = tokens.size()[1] if hasattr(args, "data_efficiency_curriculum_learning_seqlen_type") and ( From 598c0920b70a138ef808b0ca3637131ad7579588 Mon Sep 17 00:00:00 2001 From: YiSheng5 Date: Sat, 21 Sep 2024 06:42:59 +0800 Subject: [PATCH 55/92] grad_wei can't be NoneType when running with DeepSpeed, for zero3 will divided the gradient (#428) --- megatron/core/tensor_parallel/layers.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/megatron/core/tensor_parallel/layers.py b/megatron/core/tensor_parallel/layers.py index 67a78853aa..3dd3299ae0 100644 --- a/megatron/core/tensor_parallel/layers.py +++ b/megatron/core/tensor_parallel/layers.py @@ -286,6 +286,7 @@ def forward(ctx, input, weight, bias, gradient_accumulation_fusion, @staticmethod @custom_bwd def backward(ctx, grad_output): + args = get_args() input, weight = ctx.saved_tensors use_bias = ctx.use_bias @@ -367,9 +368,13 @@ def backward(ctx, grad_output): # grad_weight = None # else: # grad_weight = grad_output.t().matmul(total_input) - from megatron.core.tensor_parallel.weight_grad_store import WeightGradStore - WeightGradStore.put(total_input, grad_output, weight, gradientUpdateFunction) - grad_weight = None + if args.enable_zbh1_pipeline: + from megatron.core.tensor_parallel.weight_grad_store import WeightGradStore + WeightGradStore.put(total_input, grad_output, weight, gradientUpdateFunction) + grad_weight = None + else: + grad_weight = grad_output.t().matmul(total_input) + grad_bias = grad_output.sum(dim=0) if use_bias else None if ctx.sequence_parallel: From cf80e6bb75c56cd8a26627e5364e4290da736bd8 Mon Sep 17 00:00:00 2001 From: Marieme Ngom Date: Mon, 23 Sep 2024 16:21:00 +0000 Subject: [PATCH 56/92] added sophia --- megatron/arguments.py | 10 ++ megatron/optimizer/__init__.py | 9 ++ megatron/optimizer/sophia.py | 202 +++++++++++++++++++++++++++++++++ 3 files changed, 221 insertions(+) create mode 100644 megatron/optimizer/sophia.py diff --git a/megatron/arguments.py b/megatron/arguments.py index b3ed06353e..2a0ac606ce 100644 --- a/megatron/arguments.py +++ b/megatron/arguments.py @@ -780,6 +780,15 @@ def _add_regularization_args(parser): help='Weight decay increment function.') group.add_argument('--clip-grad', type=float, default=1.0, help='Gradient clipping based on global L2 norm.') + group.add_argument('--sophiag-beta1', type=float, default=0.9, + help='First coefficient for computing running averages ' + 'of gradient and its hessian') + group.add_argument('--sophiag-beta2', type=float, default=0.95, + help='Second coefficient for computing running averages ' + 'of gradient and its hessian') + group.add_argument('--sophiag-rho', type=float, default=0.01, + help='SophiaG clipping threshhold') + group.add_argument('--adam-beta1', type=float, default=0.9, help='First coefficient for computing running averages ' 'of gradient and its square') @@ -946,6 +955,7 @@ def _add_training_args(parser): choices=[ 'adam', 'adamw', + 'sophiag', 'sgd', 'ds.fusedlamb', 'ipex.lamb', diff --git a/megatron/optimizer/__init__.py b/megatron/optimizer/__init__.py index 48f2737a06..99145ff4f4 100644 --- a/megatron/optimizer/__init__.py +++ b/megatron/optimizer/__init__.py @@ -315,6 +315,15 @@ def optimizer_hook(p): weight_decay=args.weight_decay, momentum=args.sgd_momentum ) + elif str(args.optimizer).lower() == 'sophiag': + from .sophia import SophiaG + optimizer = SophiaG( + param_groups, + lr=args.lr, + betas=(args.sophiag_beta1, args.sophiag_beta2), + rho = args.sophiag_rho, + weight_decay=args.weight_decay + ) else: raise TypeError(f'{args.optimizer} optimizer is not supported.') if args.deepspeed: diff --git a/megatron/optimizer/sophia.py b/megatron/optimizer/sophia.py new file mode 100644 index 0000000000..4c4e074790 --- /dev/null +++ b/megatron/optimizer/sophia.py @@ -0,0 +1,202 @@ +import math +import torch +from torch import Tensor +from torch.optim.optimizer import Optimizer +from typing import List, Optional + + +#SOphiaG implementation from https://github.com/Liuhong99/Sophia/blob/main/sophia.py, copy pasted here because no pip and not sure about submodules + +class SophiaG(Optimizer): + def __init__(self, params, lr=1e-4, betas=(0.965, 0.99), rho = 0.04, + weight_decay=1e-1, *, maximize: bool = False, + capturable: bool = False): + if not 0.0 <= lr: + raise ValueError("Invalid learning rate: {}".format(lr)) + if not 0.0 <= betas[0] < 1.0: + raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) + if not 0.0 <= betas[1] < 1.0: + raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) + if not 0.0 <= rho: + raise ValueError("Invalid rho parameter at index 1: {}".format(rho)) + if not 0.0 <= weight_decay: + raise ValueError("Invalid weight_decay value: {}".format(weight_decay)) + defaults = dict(lr=lr, betas=betas, rho=rho, + weight_decay=weight_decay, + maximize=maximize, capturable=capturable) + super(SophiaG, self).__init__(params, defaults) + + def __setstate__(self, state): + super().__setstate__(state) + for group in self.param_groups: + group.setdefault('maximize', False) + group.setdefault('capturable', False) + state_values = list(self.state.values()) + step_is_tensor = (len(state_values) != 0) and torch.is_tensor(state_values[0]['step']) + if not step_is_tensor: + for s in state_values: + s['step'] = torch.tensor(float(s['step'])) + + @torch.no_grad() + def update_hessian(self): + for group in self.param_groups: + beta1, beta2 = group['betas'] + for p in group['params']: + if p.grad is None: + continue + state = self.state[p] + + if len(state) == 0: + state['step'] = torch.zeros((1,), dtype=torch.float, device=p.device) \ + if self.defaults['capturable'] else torch.tensor(0.) + state['exp_avg'] = torch.zeros_like(p, memory_format=torch.preserve_format) + state['hessian'] = torch.zeros_like(p, memory_format=torch.preserve_format) + + if 'hessian' not in state.keys(): + state['hessian'] = torch.zeros_like(p, memory_format=torch.preserve_format) + + state['hessian'].mul_(beta2).addcmul_(p.grad, p.grad, value=1 - beta2) + + + @torch.no_grad() + def step(self, closure=None, bs=5120): + loss = None + if closure is not None: + with torch.enable_grad(): + loss = closure() + + for group in self.param_groups: + params_with_grad = [] + grads = [] + exp_avgs = [] + state_steps = [] + hessian = [] + beta1, beta2 = group['betas'] + + for p in group['params']: + if p.grad is None: + continue + params_with_grad.append(p) + + if p.grad.is_sparse: + raise RuntimeError('Hero does not support sparse gradients') + grads.append(p.grad) + state = self.state[p] + # State initialization + if len(state) == 0: + state['step'] = torch.zeros((1,), dtype=torch.float, device=p.device) \ + if self.defaults['capturable'] else torch.tensor(0.) + state['exp_avg'] = torch.zeros_like(p, memory_format=torch.preserve_format) + state['hessian'] = torch.zeros_like(p, memory_format=torch.preserve_format) + + if 'hessian' not in state.keys(): + state['hessian'] = torch.zeros_like(p, memory_format=torch.preserve_format) + + exp_avgs.append(state['exp_avg']) + state_steps.append(state['step']) + hessian.append(state['hessian']) + + if self.defaults['capturable']: + bs = torch.ones((1,), dtype=torch.float, device=p.device) * bs + + sophiag(params_with_grad, + grads, + exp_avgs, + hessian, + state_steps, + bs=bs, + beta1=beta1, + beta2=beta2, + rho=group['rho'], + lr=group['lr'], + weight_decay=group['weight_decay'], + maximize=group['maximize'], + capturable=group['capturable']) + + return loss + +def sophiag(params: List[Tensor], + grads: List[Tensor], + exp_avgs: List[Tensor], + hessian: List[Tensor], + state_steps: List[Tensor], + capturable: bool = False, + *, + bs: int, + beta1: float, + beta2: float, + rho: float, + lr: float, + weight_decay: float, + maximize: bool): + + if not all(isinstance(t, torch.Tensor) for t in state_steps): + raise RuntimeError("API has changed, `state_steps` argument must contain a list of singleton tensors") + + + func = _single_tensor_sophiag + + func(params, + grads, + exp_avgs, + hessian, + state_steps, + bs=bs, + beta1=beta1, + beta2=beta2, + rho=rho, + lr=lr, + weight_decay=weight_decay, + maximize=maximize, + capturable=capturable) + +def _single_tensor_sophiag(params: List[Tensor], + grads: List[Tensor], + exp_avgs: List[Tensor], + hessian: List[Tensor], + state_steps: List[Tensor], + *, + bs: int, + beta1: float, + beta2: float, + rho: float, + lr: float, + weight_decay: float, + maximize: bool, + capturable: bool): + + for i, param in enumerate(params): + grad = grads[i] if not maximize else -grads[i] + exp_avg = exp_avgs[i] + hess = hessian[i] + step_t = state_steps[i] + + if capturable: + assert param.is_cuda and step_t.is_cuda and bs.is_cuda + + if torch.is_complex(param): + grad = torch.view_as_real(grad) + exp_avg = torch.view_as_real(exp_avg) + hess = torch.view_as_real(hess) + param = torch.view_as_real(param) + + # update step + step_t += 1 + + # Perform stepweight decay + param.mul_(1 - lr * weight_decay) + + # Decay the first and second moment running average coefficient + exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1) + + if capturable: + step_size = lr + step_size_neg = step_size.neg() + + ratio = (exp_avg.abs() / (rho * bs * hess + 1e-15)).clamp(None,1) + param.addcmul_(exp_avg.sign(), ratio, value=step_size_neg) + else: + step_size_neg = - lr + + ratio = (exp_avg.abs() / (rho * bs * hess + 1e-15)).clamp(None,1) + param.addcmul_(exp_avg.sign(), ratio, value=step_size_neg) From 8be7f4895968574d44ae06785fdbe115baa1c13c Mon Sep 17 00:00:00 2001 From: YiSheng5 Date: Fri, 4 Oct 2024 18:01:49 +0800 Subject: [PATCH 57/92] fix init issue for rms_norm in squence_parallel (#448) --- megatron/model/fused_rmsnorm.py | 10 ++++++++-- megatron/model/rmsnorm.py | 5 +++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/megatron/model/fused_rmsnorm.py b/megatron/model/fused_rmsnorm.py index a3b9927c87..ab1c42e24e 100644 --- a/megatron/model/fused_rmsnorm.py +++ b/megatron/model/fused_rmsnorm.py @@ -1,13 +1,19 @@ +from megatron import get_args + import torch from torch.nn.parameter import Parameter +from torch.nn import init import intel_extension_for_pytorch as ipex # noqa # Taken from facebookresearch/llama class RMSNorm(torch.nn.Module): - def __init__(self, dim: int, eps: float = 1e-6): + def __init__(self, dim: int, eps: float = 1e-6, sequence_parallel=False): super().__init__() self.eps = eps - self.weight = Parameter(torch.ones(dim)) + self.weight = Parameter(torch.ones(dim, + dtype=get_args().params_dtype)) + self.sequence_parallel = sequence_parallel + setattr(self.weight, 'sequence_parallel', self.sequence_parallel) def forward(self, x): output = torch.xpu.IpexRmsNorm(x, self.weight.shape, self.weight, self.eps) diff --git a/megatron/model/rmsnorm.py b/megatron/model/rmsnorm.py index 4860d81716..7bcaec37ef 100644 --- a/megatron/model/rmsnorm.py +++ b/megatron/model/rmsnorm.py @@ -9,7 +9,7 @@ # Taken from facebookresearch/llama class RMSNorm(torch.nn.Module): - def __init__(self, dim: int, eps: float = 1e-6): + def __init__(self, dim: int, eps: float = 1e-6, sequence_parallel=False): super().__init__() self.eps = eps init_device = None @@ -19,7 +19,8 @@ def __init__(self, dim: int, eps: float = 1e-6): device=init_device, dtype=get_args().params_dtype)) init.ones_(self.weight) - setattr(self.weight, 'sequence_parallel', sequence_parallel) + self.sequence_parallel = sequence_parallel + setattr(self.weight, 'sequence_parallel', self.sequence_parallel) def _norm(self, x): return x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps) From 444849272a9a39048baee8e6baf98d41d0b0d76e Mon Sep 17 00:00:00 2001 From: ranzhejiang Date: Tue, 8 Oct 2024 22:02:46 +0800 Subject: [PATCH 58/92] enable profiler for specific ranks (#451) --- megatron/arguments.py | 6 ++++++ megatron/profiler.py | 4 +++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/megatron/arguments.py b/megatron/arguments.py index 9228da6ee9..6580dba80a 100644 --- a/megatron/arguments.py +++ b/megatron/arguments.py @@ -1570,5 +1570,11 @@ def _add_profiler_args(parser): type=str, default='2,3', help="Which steps to profile. Format: ,") + + group.add_argument("--profile-ranks", + type=int, + nargs='+', + default=None, + help="Which ranks to profile. Format: 0 1 2 3") return parser diff --git a/megatron/profiler.py b/megatron/profiler.py index c98096482a..aeab144846 100644 --- a/megatron/profiler.py +++ b/megatron/profiler.py @@ -36,7 +36,9 @@ def is_end_step(): def is_capture_step(): return cur_step >= start_step and cur_step <= end_step - if args.profile.startswith('pt'): + if args.profile.startswith('pt') and ( + args.profile_ranks is None or torch.distributed.get_rank() in args.profile_ranks + ): schedule = torch.profiler.schedule(wait=0, warmup=0, active=active_steps, repeat=1) activities = [torch.profiler.ProfilerActivity.CPU] activities.extend([torch.profiler.ProfilerActivity.HPU] if device.startswith("hpu") else []) From 53941565b264a8eb6e8dde5ca13aaad8d426b2f8 Mon Sep 17 00:00:00 2001 From: Huihuo Zheng Date: Sat, 12 Oct 2024 12:41:17 +0000 Subject: [PATCH 59/92] shuffle concate dataset index --- ALCF/test_blendable_dataset.py | 40 ++++++++++++++++++- .../pipeline_parallel/p2p_communication.py | 20 +++++----- megatron/data/gpt_dataset.py | 14 +++---- 3 files changed, 56 insertions(+), 18 deletions(-) diff --git a/ALCF/test_blendable_dataset.py b/ALCF/test_blendable_dataset.py index a3cabddd29..c119862142 100644 --- a/ALCF/test_blendable_dataset.py +++ b/ALCF/test_blendable_dataset.py @@ -1,5 +1,6 @@ #!/usr/bin/env python import time +import json start_time = time.time() from mpi4py import MPI import os @@ -37,7 +38,7 @@ def print_rank_0(msg): os.makedirs(args.trace_dir, exist_ok=True) - +corpus_all = [] data_file_list = args.data_file_list print_rank_0(f"Reading data from {args.data_file_list}") files = [] @@ -51,6 +52,9 @@ def print_rank_0(msg): files.append(float(w)) files.append(fname) files.append(c) + if c not in corpus_all: + corpus_all.append(c) + splits_string="100,0,0" weights = np.array(weights) @@ -82,6 +86,40 @@ def print_rank_0(msg): print_rank_0(f"Total number of samples: {len(train_ds)}") print_rank_0(f"Weights set: {weights[:min(8, num_datasets)]}") + +def get_sample_info(blendable_dataset, idx): + # corpus dataset + cd = blendable_dataset.dataset_index[idx] + # index within the corpus dataset + cds = blendable_dataset.dataset_sample_index[idx] + # dataset index within each corpus + fcd = blendable_dataset.datasets[cd].dataset_index[cds] + # sample index within the dataset + fcds = blendable_dataset.datasets[cd].dataset_sample_index[cds] + # corresponding data file + prefix = blendable_dataset.datasets[cd].dataset_builders[fcd].prefix + corpus = blendable_dataset.datasets[cd].dataset_builders[fcd].corpus + #v = blendable_dataset[idx]['text'] + #norm = np.linalg.norm(v) + return prefix, corpus, fcds + +num_batches = args.train_iters +print(f"global_batch_size: {args.global_batch_size}") +print(f"number of batches: {num_batches}") + +fout = open("samples_list.jsonl", "w") +if comm.rank == 0: + for i in range(num_batches): + ns_corpus = {} + for c in corpus_all: + ns_corpus[c] = 0 + for j in range(args.global_batch_size): + prefix, corpus, idx = get_sample_info(train_ds, i*args.global_batch_size+j) + ns_corpus[corpus] +=1 + fout.write(f"\u007b 'batch': {i}, 'sample': {j}, 'corpus': '{corpus}', 'prefix': '{prefix}', 'dataset_sample_index': {idx} \u007d\n") + fout.write(f"\u007b 'batch': {i}, 'histogram': {ns_corpus} \u007d \n") +comm.Barrier() +exit() start_build_dataloader = time.time() print_rank_0(f"Starting to build the data loader") rank_in_parallel_group = mpu.get_sequence_parallel_rank() diff --git a/megatron/core/pipeline_parallel/p2p_communication.py b/megatron/core/pipeline_parallel/p2p_communication.py index b23f6c84b3..64f75ad005 100644 --- a/megatron/core/pipeline_parallel/p2p_communication.py +++ b/megatron/core/pipeline_parallel/p2p_communication.py @@ -16,7 +16,8 @@ from megatron.core import ModelParallelConfig from deepspeed.accelerator import get_accelerator - +from megatron.utils import Profile +Profile("PIPELINE") # Types Shape = Union[List[int], torch.Size] @@ -329,6 +330,7 @@ def _ring_exchange_wrapper(**kwargs): return tensor_recv_prev, tensor_recv_next, reqs +@dlp.log def recv_forward(tensor_shape: Shape, config: ModelParallelConfig) -> torch.Tensor: """ Receive tensor from previous rank in pipeline (forward receive). @@ -353,7 +355,7 @@ def recv_forward(tensor_shape: Shape, config.timers('forward-recv').stop() return input_tensor - +@dlp.log def recv_backward(tensor_shape: Shape, config: ModelParallelConfig) -> torch.Tensor: """Receive tensor from next rank in pipeline (backward receive). @@ -376,7 +378,7 @@ def recv_backward(tensor_shape: Shape, config.timers('backward-recv').stop() return output_tensor_grad - +@dlp.log def send_forward(output_tensor: torch.Tensor, config: ModelParallelConfig) -> None: """Send tensor to next rank in pipeline (forward send). @@ -397,7 +399,7 @@ def send_forward(output_tensor: torch.Tensor, if config.timers is not None: config.timers('forward-send').stop() - +@dlp.log def send_backward(input_tensor_grad: torch.Tensor, config: ModelParallelConfig) -> None: """Send tensor to previous rank in pipeline (backward send). @@ -417,7 +419,7 @@ def send_backward(input_tensor_grad: torch.Tensor, if config.timers is not None: config.timers('backward-send').stop() - +@dlp.log def send_forward_recv_backward(output_tensor: torch.Tensor, tensor_shape: Shape, config: ModelParallelConfig) -> torch.Tensor: @@ -441,7 +443,7 @@ def send_forward_recv_backward(output_tensor: torch.Tensor, config.timers('forward-send-backward-recv').stop() return output_tensor_grad - +@dlp.log def send_backward_recv_forward(input_tensor_grad: torch.Tensor, tensor_shape: Shape, config: ModelParallelConfig) -> torch.Tensor: @@ -465,7 +467,7 @@ def send_backward_recv_forward(input_tensor_grad: torch.Tensor, config.timers('backward-send-forward-recv').stop() return input_tensor - +@dlp.log def send_forward_recv_forward(output_tensor: torch.Tensor, recv_prev: bool, tensor_shape: Shape, @@ -491,7 +493,7 @@ def send_forward_recv_forward(output_tensor: torch.Tensor, return input_tensor, wait_handles return input_tensor - +@dlp.log def send_backward_recv_backward(input_tensor_grad: torch.Tensor, recv_next: bool, tensor_shape: Shape, @@ -517,7 +519,7 @@ def send_backward_recv_backward(input_tensor_grad: torch.Tensor, return output_tensor_grad, wait_handles return output_tensor_grad - +@dlp.log def send_forward_backward_recv_forward_backward( output_tensor: torch.Tensor, input_tensor_grad: torch.Tensor, diff --git a/megatron/data/gpt_dataset.py b/megatron/data/gpt_dataset.py index 0cf97356a4..c801a6a5ae 100755 --- a/megatron/data/gpt_dataset.py +++ b/megatron/data/gpt_dataset.py @@ -114,8 +114,10 @@ def _build_indices(): print_rank_0('> elapsed time for building concat dataset indices: ' '{:.2f} (sec)'.format(time.time() - start_time)) return dataset_index, dataset_sample_index - + self.dataset_index, self.dataset_sample_index = _build_indices() + np_rng = np.random.RandomState(seed=dataset_builders[0].seed) + self.shuffle_index=np_rng.shuffle(range(self.num_samples)) for i in range(self.num_datasets): self.desc += dataset_builders[i].prefix + "," @@ -125,13 +127,9 @@ def __len__(self): @dlp.log def __getitem__(self, idx): - if idx >= self.num_samples: - print_rank_0(f"WARNING: index overflow encountered {idx} > {self.num_samples} for {self.dataset_builders[0].corpus}; will randomly pick one sample") - id = np.random.randint(self.num_samples) - else: - id = idx - i = self.dataset_index[idx] - j = self.dataset_sample_index[idx] + id_shuffle = self.shuffle_index[idx] + i = self.dataset_index[id_shuffle] + j = self.dataset_sample_index[id_shuffle] if self.dataset_builders[i].build: return self.dataset_builders[i].dataset[j] else: From 573b668ef20f4f5060937511169dfcd2c877a74c Mon Sep 17 00:00:00 2001 From: Huihuo Zheng Date: Sat, 12 Oct 2024 09:27:04 -0500 Subject: [PATCH 60/92] fixed bugs --- megatron/core/pipeline_parallel/p2p_communication.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/megatron/core/pipeline_parallel/p2p_communication.py b/megatron/core/pipeline_parallel/p2p_communication.py index 64f75ad005..78e43e7fed 100644 --- a/megatron/core/pipeline_parallel/p2p_communication.py +++ b/megatron/core/pipeline_parallel/p2p_communication.py @@ -17,7 +17,7 @@ from megatron.core import ModelParallelConfig from deepspeed.accelerator import get_accelerator from megatron.utils import Profile -Profile("PIPELINE") +dlp = Profile("PIPELINE") # Types Shape = Union[List[int], torch.Size] From 41ff0598b1a1c150a155404de408ef0575f6d6ee Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Sat, 12 Oct 2024 13:25:19 -0500 Subject: [PATCH 61/92] Update `ALCF/helpers.sh`, `train_aGPT_7B.sh` --- ALCF/helpers.sh | 382 ++++++++++++++++++++++++++++++----------------- train_aGPT_7B.sh | 13 +- 2 files changed, 252 insertions(+), 143 deletions(-) diff --git a/ALCF/helpers.sh b/ALCF/helpers.sh index bc31c322ce..109916dc37 100644 --- a/ALCF/helpers.sh +++ b/ALCF/helpers.sh @@ -125,9 +125,9 @@ setup() { set_args || exit # Ensure executable exists in expected path check_executable "${EXEC:-${WORKING_DIR}/pretrain_gpt_alcf.py}" - dfl="${DATA_FILE_LIST:-}" + dfl="${DATA_FILE_LIST:-"${PBS_O_WORKDIR}/ALCF/data-lists/$(get_machine_name)/dolma.txt"}" # Setup data + tokenizer via `DATA_FILE_LIST` and `TOKENIZER_TYPE` - tok="${TOKENIZER_TYPE:-Llama2}" + tok="${TOKENIZER_TYPE:-Llama2Tokenizer}" setup_tokenizer_and_data "${tok}" "${dfl}" || exit make_data || exit # Print job info @@ -140,6 +140,7 @@ setup() { setup_run_cmd "$@" || exit } + ##################################################### # setup_run_cmd # @@ -150,7 +151,8 @@ setup_run_cmd() { # take in additional arguments # and append them directly to # the end of the `run_cmd` - custom_args="$@" + # custom_args="$@" + custom_args=("$@") ############################## #### Make it easy to track experiments by date ################### year="$(date "+%Y")" @@ -168,78 +170,122 @@ setup_run_cmd() { # `export LAUNCH_WITH=deepspeeed && bash train_llama_alcf.sh` ################################################################## setupLauncher "${LAUNCH_WITH:-MPICH}" || exit - TBDIR="${CKPT_DIR}/tensorboard" - mkdir -p "${TBDIR}" export data_cache_path="${CKPT_DIR}/${DATA_CACHE_PATH}" && mkdir -p "${data_cache_path}" printf "\n" echo "Using data_cache_path: ${data_cache_path}" - export DEFAULTS="\ - --split 100,0,0 \ - --log-interval 1 \ - --no-bias-gelu-fusion \ - --no-bias-dropout-fusion \ - --no-masked-softmax-fusion \ - --no-gradient-accumulation-fusion \ - --accumulate-allreduce-grads-in-fp32 \ - --log-timers-to-tensorboard \ - --log-optimizer-states-to-tensorboard" - OVERRIDE_CKPT_OPT_PARAM="${OVERRIDE_CKPT_OPT_PARAM:-}" + TRAIN_SPLIT="${TRAIN_SPLIT:-100}" + VAL_SPLIT="${VAL_SPLIT:-0}" + TEST_SPLIT="${TEST_SPLIT:-0}" + LOG_INTERVAL="${LOG_INTERVAL:-1}" + DEFAULTS=( + "--split ${TRAIN_SPLIT},${VAL_SPLIT},${TEST_SPLIT}" + "--log-interval ${LOG_INTERVAL}" + "--no-bias-gelu-fusion" + "--no-bias-dropout-fusion" + "--no-masked-softmax-fusion" + "--no-gradient-accumulation-fusion" + "--accumulate-allreduce-grads-in-fp32" + ) + # export DEFAULTS="\ + # --split ${TRAIN_SPLIT},${VAL_SPLIT},${TEST_SPLIT} \ + # --log-interval ${LOG_INTERVAL} \ + # --no-bias-gelu-fusion \ + # --no-bias-dropout-fusion \ + # --no-masked-softmax-fusion \ + # --no-gradient-accumulation-fusion \ + # --accumulate-allreduce-grads-in-fp32" + # OVERRIDE_CKPT_OPT_PARAM="${OVERRIDE_CKPT_OPT_PARAM:-}" if [[ -z "${OVERRIDE_CKPT_OPT_PARAM:-}" ]]; then - DEFAULTS="${DEFAULTS} --use-checkpoint-opt_param-scheduler" + DEFAULTS+=("--use-checkpoint-opt_param-scheduler") fi - if [[ "${SP}" -ge 2 ]]; then - export DEFAULTS="${DEFAULTS} --ds-sequence-parallel-size ${SP} --force-ds-sequence-parallel" + if [[ "${SP}" -gt 1 ]]; then + DEFAULTS+=( + "--ds-sequence-parallel-size ${SP}" + "--force-ds-sequence-parallel" + ) fi ################################################################## # WARN: to disable Llama-type architectures, toggle via: # `NO_LLAMA=1 bash train_llama_alcf.sh` ################################################################## - if [[ -z "${NO_LLAMA:-}" ]]; then - llama_flags="${LLAMA_ARGS}\ - --num-key-value-heads ${NUM_KV_HEAD} \ - --ffn-hidden-size ${FFN_HIDDEN_SIZE} \ - " + LLAMA_ARGS="" + if [[ "${SP}" == 1 ]]; then + export LLAMA_ARGS="${LLAMA_ARGS} " else - echo "!! Running in NO_LLAMA MODE !!" - llama_flags="" + export LLAMA_ARGS="" + echo "NOT USING ROTARY EMBEDDINGS! LLAMA_ARGS=${LLAMA_ARGS}" + fi + if [[ -z "${NO_LLAMA:-}" ]]; then + llama_flags=( + "--swiglu" + "--hidden-dropout 0" + "--attention-dropout 0" + "--normalization rmsnorm" + "--disable-bias-linear" + "--no-query-key-layer-scaling" + "--use-rotary-position-embeddings" + "--untie-embeddings-and-output-weights" + "--num-key-value-heads ${NUM_KV_HEAD}" + "--ffn-hidden-size ${FFN_HIDDEN_SIZE}" + ) + fi + + TENSORBARD_ARGS=() + if [[ -z "${USE_TENSORBARD:-}" ]]; then + TBDIR="${CKPT_DIR}/tensorboard" + mkdir -p "${TBDIR}" + # --log-timers-to-tensorboard \ + # --log-optimizer-states-to-tensorboard" + # --tensorboard-dir ${TBDIR} \ + TENSORBARD_ARGS+=( + "--log-timers-to-tensorboard" + "--log-optimizer-states-to-tensorboard" + "--tensorboard-dir ${TBDIR}" + ) fi - export run_cmd=" - ${LAUNCHER} \ - --${DTYPE} \ - ${DEFAULTS} \ - --optimizer ${OPT} \ - --adam-beta1=${ADAM_BETA1} \ - --adam-beta2=${ADAM_BETA2} \ - --adam-eps=${ADAM_EPS} \ - --weight-decay=${WEIGHT_DECAY} \ - --save ${CKPT_DIR} \ - --load ${CKPT_DIR} \ - --seq-length ${SEQ} \ - --num-layers ${NLAYERS} \ - --hidden-size ${HIDDEN} \ - --tensorboard-dir ${TBDIR} \ - --train-iters ${TRAIN_ITERS} \ - --eval-iters ${EVAL_ITERS} \ - --distributed-backend ${BE} \ - --num-attention-heads ${HEADS} \ - --save-interval ${SAVE_INTERVAL} \ - --eval-interval ${EVAL_INTERVAL} \ - --max-position-embeddings ${SEQ} \ - --micro-batch-size ${MICRO_BATCH} \ - --tensor-model-parallel-size ${TP} \ - --global-batch-size ${GLOBAL_BATCH} \ - --pipeline-model-parallel-size ${PP} \ - --data-cache-path ${data_cache_path} \ - ${DATA_FLAGS} \ - ${LR_ARGS} \ - ${llama_flags} \ - ${FLASH_ARG} \ - ${TIMING_STR} \ - ${TOKENIZER_FLAGS} \ - ${ds_args} \ - ${gpt_args[*]} \ - ${custom_args} - " + dfl_fallback="${DATA_FILE_LIST:-${PBS_O_WORKDIR}/ALCF/data-lists/$(get_machine_name)/dolma.txt}" + export ADAM_BETA1="${ADAM_BETA1:-0.9}" + export ADAM_BETA2="${ADAM_BETA2:-0.95}" + export ADAM_EPS="${ADAM_EPS:-0.00001}" # 1 * 10^{-5} + export run_cmd=( + "${LAUNCHER}" + "--${DTYPE}" + "${DEFAULTS[@]}" + "--optimizer ${OPT}" + "--save ${CKPT_DIR}" + "--load ${CKPT_DIR}" + "--seq-length ${SEQ}" + "--num-layers ${NLAYERS}" + "--hidden-size ${HIDDEN}" + "--train-iters ${TRAIN_ITERS}" + "--eval-iters ${EVAL_ITERS}" + "--distributed-backend ${BE}" + "--adam-beta1 ${ADAM_BETA1:-0.9}" + "--adam-beta2 ${ADAM_BETA2:-0.95}" + "--adam-eps ${ADAM_EPS:-0.00001}" + "--clip-grad ${CLIP_GRAD:-1.0}" + "--weight-decay ${WEIGHT_DECAY:-0.1}" + "--num-attention-heads ${HEADS}" + "--save-interval ${SAVE_INTERVAL}" + "--eval-interval ${EVAL_INTERVAL}" + "--max-position-embeddings ${SEQ}" + "--micro-batch-size ${MICRO_BATCH}" + "--tensor-model-parallel-size ${TP}" + "--global-batch-size ${GLOBAL_BATCH}" + "--pipeline-model-parallel-size ${PP}" + "--data-cache-path ${data_cache_path}" + "--data-file-list ${DATA_FILE_LIST:-${dfl_fallback}}" + "${TENSORBARD_ARGS[@]}" + "${DATA_FLAGS}" + "${LR_ARGS}" + "${llama_flags[@]}" + "${FLASH_ARG}" + "${TIMING_STR}" + "${TOKENIZER_FLAGS}" + "${ds_args[@]}" + "${gpt_args[@]}" + "${custom_args[@]}" + ) } save_dotenv() { @@ -518,7 +564,6 @@ set_ccl_vars_on_aurora() { ############################################################################## setParams() { FLASH_ARG="" - LLAMA_ARGS="--attention-dropout 0 --hidden-dropout 0" # ---- [Parallelism Settings] -------------------------------------------+ # ------ [Aurora] -------||------ [SunSpot] ------------- # if [[ $(hostname) == x4* || $(hostname) == x1* ]]; then @@ -600,9 +645,9 @@ setParams() { export FLASH_ARG="${FLASH_ARG}" export DTYPE="${DTYPE:-bf16}" export OPT="${OPT:-adamw}" - export ADAM_BETA1="${ADAM_BETA1:-0.9}" - export ADAM_BETA2="${ADAM_BETA2:-0.95}" - export ADAM_EPS="${ADAM_EPS:-0.00001}" # 1 * 10^{-5} + # export ADAM_BETA1="${ADAM_BETA1:-0.9}" + # export ADAM_BETA2="${ADAM_BETA2:-0.95}" + # export ADAM_EPS="${ADAM_EPS:-0.00001}" # 1 * 10^{-5} export WEIGHT_DECAY="${WEIGHT_DECAY:-0.1}" export HOSTFILE="${HOSTFILE:-${PBS_NODEFILE}}" NHOSTS=$(wc -l <"${HOSTFILE}") @@ -648,24 +693,19 @@ setParams() { # # For this reason, we only use the default LLAMA_ARGS when SP=0. ########################################################################## - if [[ "${SP}" == 1 ]]; then - export LLAMA_ARGS="${LLAMA_ARGS} --no-query-key-layer-scaling --use-rotary-position-embeddings --untie-embeddings-and-output-weights --swiglu --normalization rmsnorm --disable-bias-linear" - else - export LLAMA_ARGS="" - echo "NOT USING ROTARY EMBEDDINGS! LLAMA_ARGS=${LLAMA_ARGS}" - fi # -----[Learning Rate Settings]-------------------------------------------- export LR=${LR:-0.0003} # LEARNING_RATE export LR_WARMUP_FRAC=${LR_WARMUP_FRAC:-0.05} # LEARNING RATE WARMUP export LR_DECAY_ITERS=${LR_DECAY_ITERS:-} # LR DECAY ITERS set_lr_args # -----[Learning Rate Settings]-------------------------------------------- - if [[ "${TIMING_LOG_LEVEL}" -ge 1 ]]; then + # if [[ "${TIMING_LOG_LEVEL:-1}" -gt 1 ]]; then + if [[ "${TIMING_LOG_LEVEL:-1}" -gt 1 ]]; then TIMING_STR="\ - --timing-log-level ${TIMING_LOG_LEVEL} \ - --log-timers-to-tensorboard \ - --log-optimizer-states-to-tensorboard \ - " + --timing-log-level ${TIMING_LOG_LEVEL}" + # --log-timers-to-tensorboard \ + # --log-optimizer-states-to-tensorboard \ + # " else TIMING_STR="" fi @@ -679,19 +719,31 @@ setParams() { ############################################## set_args() { # ---- Set DeepSpeed arguments -------------------------------- - ds_args=" " - ds_args=" --deepspeed ${ds_args}" - if [[ $PP == 1 ]]; then - ds_args=" --no-pipeline-parallel ${ds_args}" + ds_args=( + "--deepspeed" + ) + if [[ "${PP:-1}" == 1 ]]; then + ds_args+=("--no-pipeline-parallel") fi - ds_args=" --deepspeed_config=$DS_CONFIG ${ds_args}" - ds_args=" --zero-stage=$ZERO_STAGE ${ds_args}" + ds_args+=("--deepspeed_config=${DS_CONFIG}") + ds_args+=("--zero-stage=$ZERO_STAGE") if [[ "${ZERO_STAGE}" == 3 ]]; then - ds_args="--use-mics ${ds_args}" + ds_args+=("--use-mics") fi + # ds_args=" " + # ds_args=" --deepspeed ${ds_args}" + # if [[ $PP == 1 ]]; then + # ds_args=" --no-pipeline-parallel ${ds_args}" + # fi + # ds_args=" --deepspeed_config=$DS_CONFIG ${ds_args}" + # ds_args="--zero-stage=$ZERO_STAGE ${ds_args}" + # if [[ "${ZERO_STAGE}" == 3 ]]; then + # ds_args="--use-mics ${ds_args}" + # fi if [[ "$USE_ACTIVATION_CHECKPOINTING" == 1 ]]; then echo "!! Caught USE_ACTIVATION_CHECKPOINTING=${USE_ACTIVATION_CHECKPOINTING} !!" - ds_args=" --deepspeed-activation-checkpointing ${ds_args}" + ds_args+=("--deepspeed-activation-checkpointing") + # ds_args=" --deepspeed-activation-checkpointing ${ds_args}" # --checkpoint-activations \ # --deepspeed-activation-checkpointing fi @@ -804,7 +856,8 @@ get_output_prefix() { pre="${pre}_sp${SP}_pp${PP}_tp${TP}_${DTYPE}_opt${OPT}" pre="${pre}_lr${LR}_lwf${LR_WARMUP_FRAC}" if [[ -n "${TOKENIZER_TYPE:-}" ]]; then - pre="${pre}_tok${TOKENIZER_TYPE}" + _tok=$(echo "${TOKENIZER_TYPE}" | sed 's/Tokenizer//g') # noqa + pre="${pre}_tok${_tok}" fi if [[ -n "${LR_DECAY_ITERS}" ]]; then pre="${pre}_ldi${LR_DECAY_ITERS}" @@ -832,7 +885,7 @@ setOutput() { # Build DeepSpeed config and write to .json ############################################# buildDSconfig() { - export CPU_OPTIMIZER="${CPU_OPTIMIZER:-0}" + # export CPU_OPTIMIZER="${CPU_OPTIMIZER:-0}" export DS_CONFIG="${WORKING_DIR}/ds-configs/ds_stage${ZERO_STAGE}_mb${MICRO_BATCH}_gb${GLOBAL_BATCH}_pp${PP}_${DTYPE}.json" mkdir -p "$(dirname "${DS_CONFIG}")" echo "DS_CONFIG: ${DS_CONFIG}" @@ -893,31 +946,6 @@ install_dependencies() { fi } -###################################################################### -# install_deepspeed_for_xpu -# -# Install microsoft/DeepSpeed on PVC -# -# This will: -# 1. Clone rep -# 2. Checkout appropriate branch -# 3. Install into virtual environment -###################################################################### -install_deepspeed_for_xpu() { - # python3 -m pip install "torch==2.1.0.post2" torchvision==0.16.0.post2 torchaudio==2.1.0.post2 intel-extension-for-pytorch==2.1.30.post0 oneccl_bind_pt==2.1.300+xpu --extra-index-url "https://pytorch-extension.intel.com/release-whl/stable/xpu/us/" - echo "Building + Installing DeepSpeed on $(hostname)" - outdir="${WORKING_DIR}/deps/DeepSpeed" - mkdir -p "${outdir}" - git clone https://github.com/microsoft/DeepSpeed.git "${outdir}" - cd "${outdir}" || exit - echo "[install_deepspeed_for_xpu] !! pwd: $(pwd)" - python3 -m pip install --require-virtualenv -r requirements/requirements.txt 1>/dev/null - python3 -m pip install xgboost "numpy<2" --force-reinstall --upgrade --require-virtualenv 1>/dev/null - python setup.py develop 1>/dev/null - cd "${WORKING_DIR}" - echo "[install_deepspeed_for_xpu] !! pwd: $(pwd)" -} - ################################################# # Fix for distributed key value store on Aurora ################################################# @@ -1003,9 +1031,11 @@ setup_tokenizer_and_data() { fi echo "Setting up tokenizer with ${tok}" echo "Using data_file_list: ${dfl}" + _data_flags=() + _tokenizer_flags=() if [[ ${tok} == gpt* || ${tok} == GPT* ]]; then export TOKENIZER_TYPE="GPT2" - export TOKENIZER_FLAGS="--tokenizer-type GPT2BPETokenizer" + _tokenizer_flags+=("--tokenizer-type GPT2BPETokenizer") machine=$(get_machine_name) if [[ ${machine} == "polaris" ]]; then export DATA_PARENT="${DATA_PARENT:-/eagle/argonne_tpc/foremans/projects/argonne-lcf/Megatron-DeepSpeed/dataset}" @@ -1019,18 +1049,25 @@ setup_tokenizer_and_data() { export VOCAB_FILE="${DATA_PARENT}/gpt2-vocab.json" export MERGE_FILE="${DATA_PARENT}/gpt2-merges.txt" export DATA_PATH="${DATA_PARENT}/BookCorpusDataset_text_document" - export DATA_FLAGS="--data-path ${DATA_PATH} --vocab-file ${VOCAB_FILE} --merge-file ${MERGE_FILE}" + _data_flags+=( + "--data-path ${DATA_PATH}" + "--vocab-file ${VOCAB_FILE}" + "--merge-file ${MERGE_FILE}" + ) else - export DATA_FLAGS="" - export TOKENIZER_TYPE="Llama2" + export TOKENIZER_TYPE="${TOKENIZER_TYPE:-Llama2Tokenizer}" tm="${WORKING_DIR}/ALCF/tokenizer.model" # fallback: Megatron-DeepSpeed/ALCF/tokenizer.model export TOKENIZER_MODEL="${TOKENIZER_MODEL:-${tm}}" # USE TOKENIZER_MODEL from env, else fallback from ^ - export TOKENIZER_FLAGS="--tokenizer-type Llama2Tokenizer --tokenizer-model ${TOKENIZER_MODEL}" - if [[ "${TOKENIZER_TYPE}" != "GPT2" ]]; then - echo "Using tokenizer: ${TOKENIZER_TYPE}. Setting up data with ${DATA_FILE_LIST-}" - setData "${dfl}" || exit - fi + _tokenizer_flags+=( + "--tokenizer-type ${TOKENIZER_TYPE}" + "--tokenizer-model ${TOKENIZER_MODEL}" + ) + # if [[ "${TOKENIZER_TYPE}" != "GPT2" ]]; then + echo "Using tokenizer: ${TOKENIZER_TYPE}. Setting up data with ${DATA_FILE_LIST:-}" + setData "${dfl}" || exit fi + export DATA_FLAGS="${_data_flags[*]}" + export TOKENIZER_FLAGS="${_tokenizer_flags[*]}" printf "[setData] DATA_FLAGS: %s\n" "$(printGreen "${DATA_FLAGS}")" printf "[setData] TOKENIZER_FLAGS: %s\n" "$(printMagenta "${TOKENIZER_FLAGS}")" } @@ -1059,7 +1096,7 @@ setData() { # ------------------------[dfl: abbrv. for DATA_FILE_LIST] export WEIGHT_SUM="${ws}" export DFL_STEM="${dfl_stem}" export DATA_CACHE_PATH="${dcp}" - export DATA_FLAGS="${DATA_FLAGS} --data-file-list ${DATA_FILE_LIST}" # --data-cache-path ${DATA_CACHE_PATH}" + # export DATA_FLAGS="${DATA_FLAGS} --data-file-list ${DATA_FILE_LIST}" # --data-cache-path ${DATA_CACHE_PATH}" echo "--------------------" echo "Updated environment:" printf "DATA_FILE_LIST: %s\n" "${DATA_FILE_LIST}" @@ -1089,16 +1126,6 @@ generateDSconfig() { exit 1 fi done - # \"optimizer\": { - # \"type\": \"AdamW\", - # \"params\": { - # \"lr\": ${LR}, - # \"beta1\": 0.9, - # \"beta2\": 0.95, - # \"eps\": 1e-5, - # \"weight_decay\": 1e-1 - # } - # }, # \"scheduler\": { # \"type\": \"WarmupLR\", # \"params\": { @@ -1113,6 +1140,7 @@ generateDSconfig() { \"train_micro_batch_size_per_gpu\": $MICRO_BATCH, \"steps_per_print\": 1, \"gradient_accumulation_steps\": $GRAD_ACC_STEPS, + \"zero_force_ds_cpu_optimizer\": false, \"zero_allow_untested_optimizer\": true, \"gradient_clipping\": 1.0, \"activation_checkpointing\": { @@ -1160,6 +1188,20 @@ generateDSconfig() { else dtype="\"communication_data_type\": \"fp32\"," fi + if [[ "${OPT:-adamw}" == "ds.adamw" ]]; then + optimizer="\ + \"optimizer\": { + \"type\": \"AdamW\", + \"params\": { + \"lr\": ${LR}, + \"beta1\": 0.9, + \"beta2\": 0.95, + \"eps\": 1e-5, + \"weight_decay\": 1e-1 + }," + else + optimizer="" + fi if [[ "${ZERO_STAGE}" == 3 ]]; then # \"mics_shard_size\": 2, zero="\ @@ -1185,8 +1227,7 @@ generateDSconfig() { }," # elif [[ $ZERO_STAGE == 2 ]]; then elif [[ "${ZERO_STAGE}" == 2 || "${ZERO_STAGE}" == 1 ]]; then - # if [[ -n "${CPU_OPTIMIZER}" ]]; then - if [[ "${CPU_OPTIMIZER:-0}" != 0 ]]; then + if [[ -z "${CPU_OPTIMIZER:-}" ]]; then echo "!!!! CAUGHT CPU_OPTIMIZER !!!!" zero="\ \"zero_optimization\": { @@ -1215,9 +1256,8 @@ generateDSconfig() { else extra="\ \"comms_logger\": { - \"enabled\": true, + \"enabled\": ${COMMS_LOGGER:-false}, \"verbose\": false, - \"prof_all\": true, \"debug\": false }," fi @@ -1227,6 +1267,7 @@ generateDSconfig() { cat <"$1" { $common +$optimizer $zero $dtype $extra @@ -1304,6 +1345,73 @@ printWhite() { printf "\e[1;37m%s\e[0m\n" "$@" } +reset_env() { + custom_vars=( + NO_FLASH_ATTN + TP + PP + SP + FLASH_ARG + OPT + ADAM_BETA1 + ADAM_BETA2 + ADAM_EPS + WEIGHT_DECAY + HEADS + NLAYERS + HIDDEN + NUM_KV_HEAD + FFN_HIDDEN_SIZE + SEQ + ZERO_STAGE + MICRO_BATCH + EVAL_ITERS + EVAL_INTERVAL + TIMING_LOG_LEVEL + ACT_CKPT_NUM_LAYERS + USE_ACTIVATION_CHECKPOINTING + GLOBAL_BATCH_MAX + GLOBAL_BATCH + TRAIN_TOKENS + TRAIN_ITERS + MODEL_TYPE + LLAMA_ARGS + LR + LR_WARMUP_FRAC + LR_DECAY_ITERS + LR_ARGS + CPU_OPTIMIZER + DS_CONFIG + OUTPUT_DIR + OUTPUT_LOG + CKPT_DIR + ds_args + EXEC + EXEC_STEM + DATA_FLAGS + TOKENIZER_TYPE + TOKENIZER_MODEL + TOKENIZER_FLAGS + DATA_FILE_LIST + NUM_DOCS + WEIGHT_SUM + DFL_STEM + DATA_CACHE_PATH + DOTENV_FILE + YEAR + MONTH + DAY + TODAY + STARTED_AT + LAUNCHER + data_cache_path + DEFAULTS + ) + printf "Unsetting custom vars: %s\n" "${custom_vars[*]}" + unset "${custom_vars[@]}" +} + + ########################### # call helpers_main() ########################### diff --git a/train_aGPT_7B.sh b/train_aGPT_7B.sh index a6a2db72ab..286740fc89 100644 --- a/train_aGPT_7B.sh +++ b/train_aGPT_7B.sh @@ -16,19 +16,20 @@ source "${HERE}/ALCF/helpers.sh" || exit # 3. call `setup` from `./ALCF/helpers.sh` setup "$@" || exit -export run_cmd="${run_cmd}" -echo "${run_cmd}" | tee -a "${OUTPUT_LOG}" +# export run_cmd="${run_cmd}" +echo "${run_cmd[@]}" | tee -a "${OUTPUT_LOG}" # 4. Tell user where to find output printf "[!! %s] View output at:\n %s\n" "$(printBlue "NOTE")" "$(printYellow "${OUTPUT_LOG}")" | tee -a "${OUTPUT_LOG}" -# 5. Ignore the following strings on Intel XPU devices -# (otherwise they'll clutter up logs) -XPU_IGNORE_STRING="CCL_WARN|\ -\ INFO\ \-\ |real_accelerator\.py|numexpr\.utils|async_io|libaio" +# # 5. Ignore the following strings on Intel XPU devices +# # (otherwise they'll clutter up logs) +# XPU_IGNORE_STRING="CCL_WARN|\ -\ INFO\ \-\ |real_accelerator\.py|numexpr\.utils|async_io|libaio" # if [[ $(ezpz_get_machine_name) == "aurora" ]]; then # module unload mpich && module load mpich # fi # # 6. Evaluate ${run_cmd} and append outputs to ${OUTPUT_LOG} -eval "${run_cmd}" |& grep -E -v "${XPU_IGNORE_STRING}" |& tee -a "${OUTPUT_LOG}" +# eval "${run_cmd[@]}" |& tee -a "${OUTPUT_LOG}" +eval "${run_cmd[*]}" |& tee -a "${OUTPUT_LOG}" From 9de83a9684742081e8dabd82f59ede7c7fdfffb3 Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Sat, 12 Oct 2024 15:56:18 -0500 Subject: [PATCH 62/92] Fix `shuffle_idx` in `megatron/data/gpt_dataset.py` --- megatron/data/gpt_dataset.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/megatron/data/gpt_dataset.py b/megatron/data/gpt_dataset.py index c801a6a5ae..a8457609db 100755 --- a/megatron/data/gpt_dataset.py +++ b/megatron/data/gpt_dataset.py @@ -117,6 +117,8 @@ def _build_indices(): self.dataset_index, self.dataset_sample_index = _build_indices() np_rng = np.random.RandomState(seed=dataset_builders[0].seed) + self.shuffle_index = np.arange(self.num_samples) + np_rng.shuffle(self.shuffle_index) self.shuffle_index=np_rng.shuffle(range(self.num_samples)) for i in range(self.num_datasets): self.desc += dataset_builders[i].prefix + "," From d7a2594a87fd0ffcd8eb3a6f102af809cf397ab2 Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Sat, 12 Oct 2024 16:24:02 -0500 Subject: [PATCH 63/92] Fix `shuffle_idx` in `megatron/data/gpt_dataset.py` --- megatron/data/gpt_dataset.py | 1 - 1 file changed, 1 deletion(-) diff --git a/megatron/data/gpt_dataset.py b/megatron/data/gpt_dataset.py index a8457609db..8501324752 100755 --- a/megatron/data/gpt_dataset.py +++ b/megatron/data/gpt_dataset.py @@ -119,7 +119,6 @@ def _build_indices(): np_rng = np.random.RandomState(seed=dataset_builders[0].seed) self.shuffle_index = np.arange(self.num_samples) np_rng.shuffle(self.shuffle_index) - self.shuffle_index=np_rng.shuffle(range(self.num_samples)) for i in range(self.num_datasets): self.desc += dataset_builders[i].prefix + "," From 3e33a6a66d0dae2328e02cad1c760b862c0064dc Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Sun, 13 Oct 2024 10:15:30 -0500 Subject: [PATCH 64/92] Update `ALCF/helpers.sh`, `train_aGPT_7B.sh` --- ALCF/helpers.sh | 432 +++++++++++++++++++++++++++++++---------------- train_aGPT_7B.sh | 13 +- 2 files changed, 297 insertions(+), 148 deletions(-) diff --git a/ALCF/helpers.sh b/ALCF/helpers.sh index bc31c322ce..5df9a2c7a5 100644 --- a/ALCF/helpers.sh +++ b/ALCF/helpers.sh @@ -120,14 +120,15 @@ setup() { # Create `deepspeed_config.json` from runtime params from ^ buildDSconfig || exit # Specify output directory for {logs, checkpoints, etc.} + setup_checkpoint || exit setOutput || exit # Specify additional `deepspeed` arguments (dependent on _newly created_ variables) set_args || exit # Ensure executable exists in expected path check_executable "${EXEC:-${WORKING_DIR}/pretrain_gpt_alcf.py}" - dfl="${DATA_FILE_LIST:-}" + dfl="${DATA_FILE_LIST:-"${PBS_O_WORKDIR}/ALCF/data-lists/$(get_machine_name)/dolma.txt"}" # Setup data + tokenizer via `DATA_FILE_LIST` and `TOKENIZER_TYPE` - tok="${TOKENIZER_TYPE:-Llama2}" + tok="${TOKENIZER_TYPE:-Llama2Tokenizer}" setup_tokenizer_and_data "${tok}" "${dfl}" || exit make_data || exit # Print job info @@ -140,6 +141,7 @@ setup() { setup_run_cmd "$@" || exit } + ##################################################### # setup_run_cmd # @@ -150,7 +152,8 @@ setup_run_cmd() { # take in additional arguments # and append them directly to # the end of the `run_cmd` - custom_args="$@" + # custom_args="$@" + custom_args=("$@") ############################## #### Make it easy to track experiments by date ################### year="$(date "+%Y")" @@ -168,78 +171,122 @@ setup_run_cmd() { # `export LAUNCH_WITH=deepspeeed && bash train_llama_alcf.sh` ################################################################## setupLauncher "${LAUNCH_WITH:-MPICH}" || exit - TBDIR="${CKPT_DIR}/tensorboard" - mkdir -p "${TBDIR}" export data_cache_path="${CKPT_DIR}/${DATA_CACHE_PATH}" && mkdir -p "${data_cache_path}" printf "\n" echo "Using data_cache_path: ${data_cache_path}" - export DEFAULTS="\ - --split 100,0,0 \ - --log-interval 1 \ - --no-bias-gelu-fusion \ - --no-bias-dropout-fusion \ - --no-masked-softmax-fusion \ - --no-gradient-accumulation-fusion \ - --accumulate-allreduce-grads-in-fp32 \ - --log-timers-to-tensorboard \ - --log-optimizer-states-to-tensorboard" - OVERRIDE_CKPT_OPT_PARAM="${OVERRIDE_CKPT_OPT_PARAM:-}" + TRAIN_SPLIT="${TRAIN_SPLIT:-100}" + VAL_SPLIT="${VAL_SPLIT:-0}" + TEST_SPLIT="${TEST_SPLIT:-0}" + LOG_INTERVAL="${LOG_INTERVAL:-1}" + DEFAULTS=( + "--split ${TRAIN_SPLIT},${VAL_SPLIT},${TEST_SPLIT}" + "--log-interval ${LOG_INTERVAL}" + "--no-bias-gelu-fusion" + "--no-bias-dropout-fusion" + "--no-masked-softmax-fusion" + "--no-gradient-accumulation-fusion" + "--accumulate-allreduce-grads-in-fp32" + ) + # export DEFAULTS="\ + # --split ${TRAIN_SPLIT},${VAL_SPLIT},${TEST_SPLIT} \ + # --log-interval ${LOG_INTERVAL} \ + # --no-bias-gelu-fusion \ + # --no-bias-dropout-fusion \ + # --no-masked-softmax-fusion \ + # --no-gradient-accumulation-fusion \ + # --accumulate-allreduce-grads-in-fp32" + # OVERRIDE_CKPT_OPT_PARAM="${OVERRIDE_CKPT_OPT_PARAM:-}" if [[ -z "${OVERRIDE_CKPT_OPT_PARAM:-}" ]]; then - DEFAULTS="${DEFAULTS} --use-checkpoint-opt_param-scheduler" + DEFAULTS+=("--use-checkpoint-opt_param-scheduler") fi - if [[ "${SP}" -ge 2 ]]; then - export DEFAULTS="${DEFAULTS} --ds-sequence-parallel-size ${SP} --force-ds-sequence-parallel" + if [[ "${SP}" -gt 1 ]]; then + DEFAULTS+=( + "--ds-sequence-parallel-size ${SP}" + "--force-ds-sequence-parallel" + ) fi ################################################################## # WARN: to disable Llama-type architectures, toggle via: # `NO_LLAMA=1 bash train_llama_alcf.sh` ################################################################## - if [[ -z "${NO_LLAMA:-}" ]]; then - llama_flags="${LLAMA_ARGS}\ - --num-key-value-heads ${NUM_KV_HEAD} \ - --ffn-hidden-size ${FFN_HIDDEN_SIZE} \ - " + LLAMA_ARGS="" + if [[ "${SP}" == 1 ]]; then + export LLAMA_ARGS="${LLAMA_ARGS} " else - echo "!! Running in NO_LLAMA MODE !!" - llama_flags="" + export LLAMA_ARGS="" + echo "NOT USING ROTARY EMBEDDINGS! LLAMA_ARGS=${LLAMA_ARGS}" fi - export run_cmd=" - ${LAUNCHER} \ - --${DTYPE} \ - ${DEFAULTS} \ - --optimizer ${OPT} \ - --adam-beta1=${ADAM_BETA1} \ - --adam-beta2=${ADAM_BETA2} \ - --adam-eps=${ADAM_EPS} \ - --weight-decay=${WEIGHT_DECAY} \ - --save ${CKPT_DIR} \ - --load ${CKPT_DIR} \ - --seq-length ${SEQ} \ - --num-layers ${NLAYERS} \ - --hidden-size ${HIDDEN} \ - --tensorboard-dir ${TBDIR} \ - --train-iters ${TRAIN_ITERS} \ - --eval-iters ${EVAL_ITERS} \ - --distributed-backend ${BE} \ - --num-attention-heads ${HEADS} \ - --save-interval ${SAVE_INTERVAL} \ - --eval-interval ${EVAL_INTERVAL} \ - --max-position-embeddings ${SEQ} \ - --micro-batch-size ${MICRO_BATCH} \ - --tensor-model-parallel-size ${TP} \ - --global-batch-size ${GLOBAL_BATCH} \ - --pipeline-model-parallel-size ${PP} \ - --data-cache-path ${data_cache_path} \ - ${DATA_FLAGS} \ - ${LR_ARGS} \ - ${llama_flags} \ - ${FLASH_ARG} \ - ${TIMING_STR} \ - ${TOKENIZER_FLAGS} \ - ${ds_args} \ - ${gpt_args[*]} \ - ${custom_args} - " + if [[ -z "${NO_LLAMA:-}" ]]; then + llama_flags=( + "--swiglu" + "--hidden-dropout 0" + "--attention-dropout 0" + "--normalization rmsnorm" + "--disable-bias-linear" + "--no-query-key-layer-scaling" + "--use-rotary-position-embeddings" + "--untie-embeddings-and-output-weights" + "--num-key-value-heads ${NUM_KV_HEAD}" + "--ffn-hidden-size ${FFN_HIDDEN_SIZE}" + ) + fi + + TENSORBARD_ARGS=() + if [[ -z "${USE_TENSORBARD:-}" ]]; then + TBDIR="${CKPT_DIR}/tensorboard" + mkdir -p "${TBDIR}" + # --log-timers-to-tensorboard \ + # --log-optimizer-states-to-tensorboard" + # --tensorboard-dir ${TBDIR} \ + TENSORBARD_ARGS+=( + "--log-timers-to-tensorboard" + "--log-optimizer-states-to-tensorboard" + "--tensorboard-dir ${TBDIR}" + ) + fi + dfl_fallback="${DATA_FILE_LIST:-${PBS_O_WORKDIR}/ALCF/data-lists/$(get_machine_name)/dolma.txt}" + export ADAM_BETA1="${ADAM_BETA1:-0.9}" + export ADAM_BETA2="${ADAM_BETA2:-0.95}" + export ADAM_EPS="${ADAM_EPS:-0.00001}" # 1 * 10^{-5} + export run_cmd=( + "${LAUNCHER}" + "--${DTYPE}" + "${DEFAULTS[@]}" + "--optimizer ${OPT}" + "--save ${CKPT_DIR}" + "--load ${CKPT_DIR}" + "--seq-length ${SEQ}" + "--num-layers ${NLAYERS}" + "--hidden-size ${HIDDEN}" + "--train-iters ${TRAIN_ITERS}" + "--eval-iters ${EVAL_ITERS}" + "--distributed-backend ${BE}" + "--adam-beta1 ${ADAM_BETA1:-0.9}" + "--adam-beta2 ${ADAM_BETA2:-0.95}" + "--adam-eps ${ADAM_EPS:-0.00001}" + "--clip-grad ${CLIP_GRAD:-1.0}" + "--weight-decay ${WEIGHT_DECAY:-0.1}" + "--num-attention-heads ${HEADS}" + "--save-interval ${SAVE_INTERVAL}" + "--eval-interval ${EVAL_INTERVAL}" + "--max-position-embeddings ${SEQ}" + "--micro-batch-size ${MICRO_BATCH}" + "--tensor-model-parallel-size ${TP}" + "--global-batch-size ${GLOBAL_BATCH}" + "--pipeline-model-parallel-size ${PP}" + "--data-cache-path ${data_cache_path}" + "--data-file-list ${DATA_FILE_LIST:-${dfl_fallback}}" + "${TENSORBARD_ARGS[@]}" + "${DATA_FLAGS}" + "${LR_ARGS}" + "${llama_flags[@]}" + "${FLASH_ARG}" + "${TIMING_STR}" + "${TOKENIZER_FLAGS}" + "${ds_args[@]}" + "${gpt_args[@]}" + "${custom_args[@]}" + ) } save_dotenv() { @@ -518,7 +565,6 @@ set_ccl_vars_on_aurora() { ############################################################################## setParams() { FLASH_ARG="" - LLAMA_ARGS="--attention-dropout 0 --hidden-dropout 0" # ---- [Parallelism Settings] -------------------------------------------+ # ------ [Aurora] -------||------ [SunSpot] ------------- # if [[ $(hostname) == x4* || $(hostname) == x1* ]]; then @@ -600,9 +646,9 @@ setParams() { export FLASH_ARG="${FLASH_ARG}" export DTYPE="${DTYPE:-bf16}" export OPT="${OPT:-adamw}" - export ADAM_BETA1="${ADAM_BETA1:-0.9}" - export ADAM_BETA2="${ADAM_BETA2:-0.95}" - export ADAM_EPS="${ADAM_EPS:-0.00001}" # 1 * 10^{-5} + # export ADAM_BETA1="${ADAM_BETA1:-0.9}" + # export ADAM_BETA2="${ADAM_BETA2:-0.95}" + # export ADAM_EPS="${ADAM_EPS:-0.00001}" # 1 * 10^{-5} export WEIGHT_DECAY="${WEIGHT_DECAY:-0.1}" export HOSTFILE="${HOSTFILE:-${PBS_NODEFILE}}" NHOSTS=$(wc -l <"${HOSTFILE}") @@ -648,24 +694,19 @@ setParams() { # # For this reason, we only use the default LLAMA_ARGS when SP=0. ########################################################################## - if [[ "${SP}" == 1 ]]; then - export LLAMA_ARGS="${LLAMA_ARGS} --no-query-key-layer-scaling --use-rotary-position-embeddings --untie-embeddings-and-output-weights --swiglu --normalization rmsnorm --disable-bias-linear" - else - export LLAMA_ARGS="" - echo "NOT USING ROTARY EMBEDDINGS! LLAMA_ARGS=${LLAMA_ARGS}" - fi # -----[Learning Rate Settings]-------------------------------------------- export LR=${LR:-0.0003} # LEARNING_RATE export LR_WARMUP_FRAC=${LR_WARMUP_FRAC:-0.05} # LEARNING RATE WARMUP export LR_DECAY_ITERS=${LR_DECAY_ITERS:-} # LR DECAY ITERS set_lr_args # -----[Learning Rate Settings]-------------------------------------------- - if [[ "${TIMING_LOG_LEVEL}" -ge 1 ]]; then + # if [[ "${TIMING_LOG_LEVEL:-1}" -gt 1 ]]; then + if [[ "${TIMING_LOG_LEVEL:-1}" -gt 1 ]]; then TIMING_STR="\ - --timing-log-level ${TIMING_LOG_LEVEL} \ - --log-timers-to-tensorboard \ - --log-optimizer-states-to-tensorboard \ - " + --timing-log-level ${TIMING_LOG_LEVEL}" + # --log-timers-to-tensorboard \ + # --log-optimizer-states-to-tensorboard \ + # " else TIMING_STR="" fi @@ -679,19 +720,31 @@ setParams() { ############################################## set_args() { # ---- Set DeepSpeed arguments -------------------------------- - ds_args=" " - ds_args=" --deepspeed ${ds_args}" - if [[ $PP == 1 ]]; then - ds_args=" --no-pipeline-parallel ${ds_args}" + ds_args=( + "--deepspeed" + ) + if [[ "${PP:-1}" == 1 ]]; then + ds_args+=("--no-pipeline-parallel") fi - ds_args=" --deepspeed_config=$DS_CONFIG ${ds_args}" - ds_args=" --zero-stage=$ZERO_STAGE ${ds_args}" + ds_args+=("--deepspeed_config=${DS_CONFIG}") + ds_args+=("--zero-stage=$ZERO_STAGE") if [[ "${ZERO_STAGE}" == 3 ]]; then - ds_args="--use-mics ${ds_args}" + ds_args+=("--use-mics") fi + # ds_args=" " + # ds_args=" --deepspeed ${ds_args}" + # if [[ $PP == 1 ]]; then + # ds_args=" --no-pipeline-parallel ${ds_args}" + # fi + # ds_args=" --deepspeed_config=$DS_CONFIG ${ds_args}" + # ds_args="--zero-stage=$ZERO_STAGE ${ds_args}" + # if [[ "${ZERO_STAGE}" == 3 ]]; then + # ds_args="--use-mics ${ds_args}" + # fi if [[ "$USE_ACTIVATION_CHECKPOINTING" == 1 ]]; then echo "!! Caught USE_ACTIVATION_CHECKPOINTING=${USE_ACTIVATION_CHECKPOINTING} !!" - ds_args=" --deepspeed-activation-checkpointing ${ds_args}" + ds_args+=("--deepspeed-activation-checkpointing") + # ds_args=" --deepspeed-activation-checkpointing ${ds_args}" # --checkpoint-activations \ # --deepspeed-activation-checkpointing fi @@ -804,7 +857,8 @@ get_output_prefix() { pre="${pre}_sp${SP}_pp${PP}_tp${TP}_${DTYPE}_opt${OPT}" pre="${pre}_lr${LR}_lwf${LR_WARMUP_FRAC}" if [[ -n "${TOKENIZER_TYPE:-}" ]]; then - pre="${pre}_tok${TOKENIZER_TYPE}" + _tok=$(echo "${TOKENIZER_TYPE}" | sed 's/Tokenizer//g') # noqa + pre="${pre}_tok${_tok}" fi if [[ -n "${LR_DECAY_ITERS}" ]]; then pre="${pre}_ldi${LR_DECAY_ITERS}" @@ -822,9 +876,21 @@ setOutput() { OUTPUT_DIR="logs/${OUTPUT_PREFIX}/$(date +%Y%m%d-%H%M%S)_${WORLD_SIZE}_${HOSTNAME}" export OUTPUT_DIR="${OUTPUT_DIR}" && mkdir -p "${OUTPUT_DIR}" export OUTPUT_LOG="${OUTPUT_DIR}/output.log" - export CKPT_DIR="checkpoints/${OUTPUT_PREFIX}" echo "${OUTPUT_LOG}" >>"logs/latest" printf "\n Please see logs at: %s\n" "$(printGreen "${OUTPUT_DIR}")" +} + +get_checkpoint_dir() { + if [[ -n "${CKPT_DIR:-}" ]]; then + echo "${CKPT_DIR}" + else + echo "checkpoints/$(get_output_prefix)" + fi +} + +setup_checkpoint() { + ckpt_dir=$(get_checkpoint_dir) + export CKPT_DIR="${ckpt_dir}" printf "Checkpoints will be saved to: %s\n" "$(printYellow "${CKPT_DIR}")" } @@ -832,7 +898,7 @@ setOutput() { # Build DeepSpeed config and write to .json ############################################# buildDSconfig() { - export CPU_OPTIMIZER="${CPU_OPTIMIZER:-0}" + # export CPU_OPTIMIZER="${CPU_OPTIMIZER:-0}" export DS_CONFIG="${WORKING_DIR}/ds-configs/ds_stage${ZERO_STAGE}_mb${MICRO_BATCH}_gb${GLOBAL_BATCH}_pp${PP}_${DTYPE}.json" mkdir -p "$(dirname "${DS_CONFIG}")" echo "DS_CONFIG: ${DS_CONFIG}" @@ -893,31 +959,6 @@ install_dependencies() { fi } -###################################################################### -# install_deepspeed_for_xpu -# -# Install microsoft/DeepSpeed on PVC -# -# This will: -# 1. Clone rep -# 2. Checkout appropriate branch -# 3. Install into virtual environment -###################################################################### -install_deepspeed_for_xpu() { - # python3 -m pip install "torch==2.1.0.post2" torchvision==0.16.0.post2 torchaudio==2.1.0.post2 intel-extension-for-pytorch==2.1.30.post0 oneccl_bind_pt==2.1.300+xpu --extra-index-url "https://pytorch-extension.intel.com/release-whl/stable/xpu/us/" - echo "Building + Installing DeepSpeed on $(hostname)" - outdir="${WORKING_DIR}/deps/DeepSpeed" - mkdir -p "${outdir}" - git clone https://github.com/microsoft/DeepSpeed.git "${outdir}" - cd "${outdir}" || exit - echo "[install_deepspeed_for_xpu] !! pwd: $(pwd)" - python3 -m pip install --require-virtualenv -r requirements/requirements.txt 1>/dev/null - python3 -m pip install xgboost "numpy<2" --force-reinstall --upgrade --require-virtualenv 1>/dev/null - python setup.py develop 1>/dev/null - cd "${WORKING_DIR}" - echo "[install_deepspeed_for_xpu] !! pwd: $(pwd)" -} - ################################################# # Fix for distributed key value store on Aurora ################################################# @@ -1003,9 +1044,11 @@ setup_tokenizer_and_data() { fi echo "Setting up tokenizer with ${tok}" echo "Using data_file_list: ${dfl}" + _data_flags=() + _tokenizer_flags=() if [[ ${tok} == gpt* || ${tok} == GPT* ]]; then export TOKENIZER_TYPE="GPT2" - export TOKENIZER_FLAGS="--tokenizer-type GPT2BPETokenizer" + _tokenizer_flags+=("--tokenizer-type GPT2BPETokenizer") machine=$(get_machine_name) if [[ ${machine} == "polaris" ]]; then export DATA_PARENT="${DATA_PARENT:-/eagle/argonne_tpc/foremans/projects/argonne-lcf/Megatron-DeepSpeed/dataset}" @@ -1019,18 +1062,25 @@ setup_tokenizer_and_data() { export VOCAB_FILE="${DATA_PARENT}/gpt2-vocab.json" export MERGE_FILE="${DATA_PARENT}/gpt2-merges.txt" export DATA_PATH="${DATA_PARENT}/BookCorpusDataset_text_document" - export DATA_FLAGS="--data-path ${DATA_PATH} --vocab-file ${VOCAB_FILE} --merge-file ${MERGE_FILE}" + _data_flags+=( + "--data-path ${DATA_PATH}" + "--vocab-file ${VOCAB_FILE}" + "--merge-file ${MERGE_FILE}" + ) else - export DATA_FLAGS="" - export TOKENIZER_TYPE="Llama2" + export TOKENIZER_TYPE="${TOKENIZER_TYPE:-Llama2Tokenizer}" tm="${WORKING_DIR}/ALCF/tokenizer.model" # fallback: Megatron-DeepSpeed/ALCF/tokenizer.model export TOKENIZER_MODEL="${TOKENIZER_MODEL:-${tm}}" # USE TOKENIZER_MODEL from env, else fallback from ^ - export TOKENIZER_FLAGS="--tokenizer-type Llama2Tokenizer --tokenizer-model ${TOKENIZER_MODEL}" - if [[ "${TOKENIZER_TYPE}" != "GPT2" ]]; then - echo "Using tokenizer: ${TOKENIZER_TYPE}. Setting up data with ${DATA_FILE_LIST-}" - setData "${dfl}" || exit - fi + _tokenizer_flags+=( + "--tokenizer-type ${TOKENIZER_TYPE}" + "--tokenizer-model ${TOKENIZER_MODEL}" + ) + # if [[ "${TOKENIZER_TYPE}" != "GPT2" ]]; then + echo "Using tokenizer: ${TOKENIZER_TYPE}. Setting up data with ${DATA_FILE_LIST:-}" + setData "${dfl}" || exit fi + export DATA_FLAGS="${_data_flags[*]}" + export TOKENIZER_FLAGS="${_tokenizer_flags[*]}" printf "[setData] DATA_FLAGS: %s\n" "$(printGreen "${DATA_FLAGS}")" printf "[setData] TOKENIZER_FLAGS: %s\n" "$(printMagenta "${TOKENIZER_FLAGS}")" } @@ -1059,7 +1109,7 @@ setData() { # ------------------------[dfl: abbrv. for DATA_FILE_LIST] export WEIGHT_SUM="${ws}" export DFL_STEM="${dfl_stem}" export DATA_CACHE_PATH="${dcp}" - export DATA_FLAGS="${DATA_FLAGS} --data-file-list ${DATA_FILE_LIST}" # --data-cache-path ${DATA_CACHE_PATH}" + # export DATA_FLAGS="${DATA_FLAGS} --data-file-list ${DATA_FILE_LIST}" # --data-cache-path ${DATA_CACHE_PATH}" echo "--------------------" echo "Updated environment:" printf "DATA_FILE_LIST: %s\n" "${DATA_FILE_LIST}" @@ -1071,6 +1121,30 @@ setData() { # ------------------------[dfl: abbrv. for DATA_FILE_LIST] echo "--------------------" } +generateDSconfig_new() { + cat < "${CONFIG_JSON}" + { + "train_batch_size" : $GLOBAL_BATCH, + "train_micro_batch_size_per_gpu": $MICRO_BATCH, + "steps_per_print": 1, + + "zero_optimization": { + "stage": $ZERO_STAGE + }, + + "bf16": { + "enabled": true + }, + + "data_types": { + "grad_accum_dtype": "fp32" + }, + + "wall_clock_breakdown" : false + } +EOT +} + ################################################################################ # generateDSconfig # @@ -1089,16 +1163,6 @@ generateDSconfig() { exit 1 fi done - # \"optimizer\": { - # \"type\": \"AdamW\", - # \"params\": { - # \"lr\": ${LR}, - # \"beta1\": 0.9, - # \"beta2\": 0.95, - # \"eps\": 1e-5, - # \"weight_decay\": 1e-1 - # } - # }, # \"scheduler\": { # \"type\": \"WarmupLR\", # \"params\": { @@ -1113,13 +1177,17 @@ generateDSconfig() { \"train_micro_batch_size_per_gpu\": $MICRO_BATCH, \"steps_per_print\": 1, \"gradient_accumulation_steps\": $GRAD_ACC_STEPS, + \"zero_force_ds_cpu_optimizer\": false, \"zero_allow_untested_optimizer\": true, \"gradient_clipping\": 1.0, - \"activation_checkpointing\": { - \"partition_activations\": true, - \"contiguous_memory_optimization\": true - }, \"wall_clock_breakdown\": false," + if [[ "${USE_ACTIVATION_CHECKPOINTING}" == 1 ]]; then + activation_checkpointing="\ + \"activation_checkpointing\": { + \"partition_activations\": true, + \"contiguous_memory_optimization\": true + }," + fi flops_profiler="\ \"flops_profiler\": { \"enabled\": true, @@ -1160,6 +1228,20 @@ generateDSconfig() { else dtype="\"communication_data_type\": \"fp32\"," fi + if [[ "${OPT:-adamw}" == "ds.adamw" ]]; then + optimizer="\ + \"optimizer\": { + \"type\": \"AdamW\", + \"params\": { + \"lr\": ${LR}, + \"beta1\": 0.9, + \"beta2\": 0.95, + \"eps\": 1e-5, + \"weight_decay\": 1e-1 + }," + else + optimizer="" + fi if [[ "${ZERO_STAGE}" == 3 ]]; then # \"mics_shard_size\": 2, zero="\ @@ -1185,8 +1267,7 @@ generateDSconfig() { }," # elif [[ $ZERO_STAGE == 2 ]]; then elif [[ "${ZERO_STAGE}" == 2 || "${ZERO_STAGE}" == 1 ]]; then - # if [[ -n "${CPU_OPTIMIZER}" ]]; then - if [[ "${CPU_OPTIMIZER:-0}" != 0 ]]; then + if [[ -z "${CPU_OPTIMIZER:-}" ]]; then echo "!!!! CAUGHT CPU_OPTIMIZER !!!!" zero="\ \"zero_optimization\": { @@ -1215,9 +1296,8 @@ generateDSconfig() { else extra="\ \"comms_logger\": { - \"enabled\": true, + \"enabled\": ${COMMS_LOGGER:-false}, \"verbose\": false, - \"prof_all\": true, \"debug\": false }," fi @@ -1227,6 +1307,7 @@ generateDSconfig() { cat <"$1" { $common +$optimizer $zero $dtype $extra @@ -1304,6 +1385,73 @@ printWhite() { printf "\e[1;37m%s\e[0m\n" "$@" } +reset_env() { + custom_vars=( + NO_FLASH_ATTN + TP + PP + SP + FLASH_ARG + OPT + ADAM_BETA1 + ADAM_BETA2 + ADAM_EPS + WEIGHT_DECAY + HEADS + NLAYERS + HIDDEN + NUM_KV_HEAD + FFN_HIDDEN_SIZE + SEQ + ZERO_STAGE + MICRO_BATCH + EVAL_ITERS + EVAL_INTERVAL + TIMING_LOG_LEVEL + ACT_CKPT_NUM_LAYERS + USE_ACTIVATION_CHECKPOINTING + GLOBAL_BATCH_MAX + GLOBAL_BATCH + TRAIN_TOKENS + TRAIN_ITERS + MODEL_TYPE + LLAMA_ARGS + LR + LR_WARMUP_FRAC + LR_DECAY_ITERS + LR_ARGS + CPU_OPTIMIZER + DS_CONFIG + OUTPUT_DIR + OUTPUT_LOG + CKPT_DIR + ds_args + EXEC + EXEC_STEM + DATA_FLAGS + TOKENIZER_TYPE + TOKENIZER_MODEL + TOKENIZER_FLAGS + DATA_FILE_LIST + NUM_DOCS + WEIGHT_SUM + DFL_STEM + DATA_CACHE_PATH + DOTENV_FILE + YEAR + MONTH + DAY + TODAY + STARTED_AT + LAUNCHER + data_cache_path + DEFAULTS + ) + printf "Unsetting custom vars: %s\n" "${custom_vars[*]}" + unset "${custom_vars[@]}" +} + + ########################### # call helpers_main() ########################### diff --git a/train_aGPT_7B.sh b/train_aGPT_7B.sh index a6a2db72ab..286740fc89 100644 --- a/train_aGPT_7B.sh +++ b/train_aGPT_7B.sh @@ -16,19 +16,20 @@ source "${HERE}/ALCF/helpers.sh" || exit # 3. call `setup` from `./ALCF/helpers.sh` setup "$@" || exit -export run_cmd="${run_cmd}" -echo "${run_cmd}" | tee -a "${OUTPUT_LOG}" +# export run_cmd="${run_cmd}" +echo "${run_cmd[@]}" | tee -a "${OUTPUT_LOG}" # 4. Tell user where to find output printf "[!! %s] View output at:\n %s\n" "$(printBlue "NOTE")" "$(printYellow "${OUTPUT_LOG}")" | tee -a "${OUTPUT_LOG}" -# 5. Ignore the following strings on Intel XPU devices -# (otherwise they'll clutter up logs) -XPU_IGNORE_STRING="CCL_WARN|\ -\ INFO\ \-\ |real_accelerator\.py|numexpr\.utils|async_io|libaio" +# # 5. Ignore the following strings on Intel XPU devices +# # (otherwise they'll clutter up logs) +# XPU_IGNORE_STRING="CCL_WARN|\ -\ INFO\ \-\ |real_accelerator\.py|numexpr\.utils|async_io|libaio" # if [[ $(ezpz_get_machine_name) == "aurora" ]]; then # module unload mpich && module load mpich # fi # # 6. Evaluate ${run_cmd} and append outputs to ${OUTPUT_LOG} -eval "${run_cmd}" |& grep -E -v "${XPU_IGNORE_STRING}" |& tee -a "${OUTPUT_LOG}" +# eval "${run_cmd[@]}" |& tee -a "${OUTPUT_LOG}" +eval "${run_cmd[*]}" |& tee -a "${OUTPUT_LOG}" From 43cde2b8f10171735dce68b899bd4a43f3158bfc Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Sun, 13 Oct 2024 10:15:49 -0500 Subject: [PATCH 65/92] Update `pretrain_gpt_alcf.py` --- pretrain_gpt_alcf.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pretrain_gpt_alcf.py b/pretrain_gpt_alcf.py index 12a05c5299..3686c6ceeb 100644 --- a/pretrain_gpt_alcf.py +++ b/pretrain_gpt_alcf.py @@ -1,6 +1,7 @@ # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. """Pretrain GPT""" + import time from typing import Callable from mpi4py import MPI @@ -103,7 +104,7 @@ def model_provider(pre_process=True, post_process=True): with deepspeed_zero_init( data_parallel_group=dpg, remote_device=(None if args.remote_device == "none" else args.remote_device), - config_dict_or_path=args.deepspeed_config_dict, + config_dict_or_path=args.deepspeed_config, # _dict, enabled=args.zero_stage == 3, mpu=mpu, ): From 9f097339de3f9dacd856b4649fb696c5d0591010 Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Sun, 13 Oct 2024 10:16:22 -0500 Subject: [PATCH 66/92] Update `megatron/data/{blendable,gpt,indexed}_dataset.py` --- megatron/data/blendable_dataset.py | 24 +- megatron/data/gpt_dataset.py | 885 +++++++++++++++++++---------- megatron/data/indexed_dataset.py | 217 ++++--- 3 files changed, 713 insertions(+), 413 deletions(-) diff --git a/megatron/data/blendable_dataset.py b/megatron/data/blendable_dataset.py index ba2e00b1ef..590a379971 100755 --- a/megatron/data/blendable_dataset.py +++ b/megatron/data/blendable_dataset.py @@ -6,14 +6,20 @@ import os import time +import logging import numpy as np import torch from deepspeed.accelerator import get_accelerator -from megatron import print_rank_0 +# from megatron import print_rank_0 from megatron.core import mpu from megatron.utils import Profile, PerfTrace from mpi4py import MPI + +from megatron.utils import get_logger + +log = get_logger(__name__, rank_zero_only=True) + dlp = Profile("DATASET") class BlendableDataset(torch.utils.data.Dataset): @dlp.log @@ -43,7 +49,7 @@ def _build_indices(): helpers.build_blending_indices(dataset_index, dataset_sample_index, weights, num_datasets, self.size, torch.distributed.get_rank() == 0) - print_rank_0('> elapsed time for building blendable dataset indices: ' + log.info('> elapsed time for building blendable dataset indices: ' '{:.2f} (sec)'.format(time.time() - start_time)) return dataset_index, dataset_sample_index @@ -68,7 +74,7 @@ def _build_indices(): ' dataset, building indices on rank 0 ...', flush=True) dataset_index, dataset_sample_index = _build_indices() try: - print_rank_0(" > saving index map files") + log.info(" > saving index map files") start_time = time.time() os.makedirs(os.path.dirname(index_path), exist_ok=True) with open(desc_path, 'wt') as fd: @@ -76,7 +82,7 @@ def _build_indices(): np.save(index_path, dataset_index, allow_pickle=True) np.save(sample_index_path, dataset_sample_index, allow_pickle=True) - print_rank_0(f" > finished saving index map files in {time.time() - start_time} seconds") + log.info(f" > finished saving index map files in {time.time() - start_time} seconds") except OSError: print(f'There was an error trying to create the data cache directory ({data_cache_path})') print('or a file in it. This is set with the --data-cache-path argument. Please') @@ -93,7 +99,7 @@ def _build_indices(): torch.distributed.get_world_size() // torch.distributed.get_world_size(group=mpu.get_tensor_model_parallel_group()) // torch.distributed.get_world_size(group=mpu.get_sequence_parallel_group())): - print_rank_0("Data index creation unsuccessful, exiting.") + log.info("Data index creation unsuccessful, exiting.") exit() ''' torch.distributed.barrier(group=mpu.get_data_parallel_group()) @@ -101,13 +107,13 @@ def _build_indices(): torch.distributed.barrier(group=mpu.get_data_parallel_group()) start_time = time.time() - print_rank_0(f'> loading blendable dataset index: {index_path}') + log.info(f'> loading blendable dataset index: {index_path}') self.dataset_index = np.load(index_path, allow_pickle=True, mmap_mode='r') assert self.dataset_index.size == self.size - print_rank_0(f'> loading blendable dataset sample index: {sample_index_path}') + log.info(f'> loading blendable dataset sample index: {sample_index_path}') self.dataset_sample_index = np.load(sample_index_path, allow_pickle=True, mmap_mode='r') assert self.dataset_sample_index.size == self.size - print_rank_0(f'> finished loading in {time.time() - start_time} seconds') + log.info(f'> finished loading in {time.time() - start_time} seconds') else: self.dataset_index, self.dataset_sample_index = _build_indices() @@ -119,7 +125,7 @@ def _build_indices(): raise RuntimeError('BlendedDataset size is improperly bounded') except IndexError: pass - print_rank_0('> size of blendable dataset: ' + log.info('> size of blendable dataset: ' '{} samples'.format(self.size)) diff --git a/megatron/data/gpt_dataset.py b/megatron/data/gpt_dataset.py index 8501324752..0a01ea31ca 100755 --- a/megatron/data/gpt_dataset.py +++ b/megatron/data/gpt_dataset.py @@ -9,67 +9,96 @@ import numpy as np import torch from deepspeed.accelerator import get_accelerator -from megatron import print_rank_0, is_rank_0, get_args +from megatron import is_rank_0, get_args from megatron.core import mpu -from megatron.data import helpers +from megatron.data import helpers # type:ignore from megatron.data.blendable_dataset import BlendableDataset -from megatron.data.dataset_utils import get_datasets_weights_and_num_samples, get_datasets_corpuses_weights_and_num_samples +from megatron.data.dataset_utils import ( + get_datasets_weights_and_num_samples, + get_datasets_corpuses_weights_and_num_samples, +) from megatron.data.dataset_utils import get_train_valid_test_split_ from megatron.data.indexed_dataset import make_dataset as make_indexed_dataset -from megatron.utils import PerfTrace, Profile +from megatron.utils import PerfTrace, Profile, get_logger from mpi4py import MPI dlp = Profile("DATASET") +log = get_logger(__name__, rank_zero_only=True) + + @dlp.log -def build_train_valid_test_datasets(data_prefix, data_impl, splits_string, - train_valid_test_num_samples, - seq_length, seed, skip_warmup, - train_data_prefix=None, - valid_data_prefix=None, - test_data_prefix=None, - return_doc_ids=False, *, - data_cache_path=None): +def build_train_valid_test_datasets( + data_prefix, + data_impl, + splits_string, + train_valid_test_num_samples, + seq_length, + seed, + skip_warmup, + train_data_prefix=None, + valid_data_prefix=None, + test_data_prefix=None, + return_doc_ids=False, + *, + data_cache_path=None, +): """Build train, valid, and test datasets.""" if data_prefix: - print_rank_0("Single data path provided for train, valid & test") + log.debug("Single data path provided for train, valid & test") # Single dataset. if len(data_prefix) == 1: - return _build_train_valid_test_datasets(data_prefix[0], - data_impl, splits_string, - train_valid_test_num_samples, - seq_length, seed, skip_warmup, - data_cache_path=data_cache_path) + return _build_train_valid_test_datasets( + data_prefix[0], + data_impl, + splits_string, + train_valid_test_num_samples, + seq_length, + seed, + skip_warmup, + data_cache_path=data_cache_path, + ) # Blending dataset. # Parse the values. - output = get_datasets_corpuses_weights_and_num_samples(data_prefix, - train_valid_test_num_samples) + output = get_datasets_corpuses_weights_and_num_samples( + data_prefix, train_valid_test_num_samples + ) prefixes, corpuses, weights, datasets_train_valid_test_num_samples = output corpus_list = sorted(set(corpuses)) train_num_samples, valid_num_samples, test_num_samples = map( - sum, - zip(*datasets_train_valid_test_num_samples) + sum, zip(*datasets_train_valid_test_num_samples) ) class DatasetBuilder: - ''' + """ This is for building individual dataset from each dataset file - ''' + """ + @dlp.log - def __init__(self, prefix, corpus, data_impl, splits_string, - num_samples, seq_length, seed, skip_warmup, - return_doc_ids, - data_cache_path=data_cache_path, name='train'): + def __init__( + self, + prefix, + corpus, + data_impl, + splits_string, + num_samples, + seq_length, + seed, + skip_warmup, + return_doc_ids, + data_cache_path=data_cache_path, + name="train", + ): self.prefix = prefix self.data_impl = data_impl self.splits_string = splits_string - if name == 'train': + if name == "train": self.num_samples = num_samples[0] - elif name == 'valid': + elif name == "valid": self.num_samples = num_samples[1] else: self.num_samples = num_samples[2] @@ -84,11 +113,21 @@ def __init__(self, prefix, corpus, data_impl, splits_string, self.desc = prefix + f"{self.num_samples}" + f"{seq_length}" + f"{seed}" self.build = False self.corpus = corpus + @dlp.log def Build(self): - self.dataset = _build_train_valid_test_datasets_single(self.prefix, self.data_impl, self.splits_string, - self.num_samples_train_valid_test, self.seq_length, self.seed, self.skip_warmup, self.name, self.return_doc_ids, - data_cache_path=self.data_cache_path) + self.dataset = _build_train_valid_test_datasets_single( + self.prefix, + self.data_impl, + self.splits_string, + self.num_samples_train_valid_test, + self.seq_length, + self.seed, + self.skip_warmup, + self.name, + self.return_doc_ids, + data_cache_path=self.data_cache_path, + ) self.build = True return self.dataset @@ -98,21 +137,27 @@ def __init__(self, dataset_builders): self.dataset_builders = dataset_builders self.num_datasets = len(dataset_builders) self.num_samples = np.sum([d.num_samples for d in dataset_builders]) - self.indices=np.zeros((self.num_samples, 2), dtype=np.uint64) - self.desc="ConcatDataset:" - m = 0 + self.indices = np.zeros((self.num_samples, 2), dtype=np.uint64) + self.desc = "ConcatDataset:" + # m = 0 num_samples_list = np.array([d.num_samples for d in dataset_builders]) self.num_samples = np.sum(num_samples_list) + def _build_indices(): start_time = time.time() dataset_index = np.zeros(self.num_samples, dtype=np.int64) dataset_sample_index = np.zeros(self.num_samples, dtype=np.int64) - helpers.build_concat_indices(dataset_index, dataset_sample_index, - num_samples_list, - self.num_datasets, - torch.distributed.get_rank()==0) - print_rank_0('> elapsed time for building concat dataset indices: ' - '{:.2f} (sec)'.format(time.time() - start_time)) + helpers.build_concat_indices( + dataset_index, + dataset_sample_index, + num_samples_list, + self.num_datasets, + torch.distributed.get_rank() == 0, + ) + log.debug( + "> elapsed time for building concat dataset indices: " + "{:.2f} (sec)".format(time.time() - start_time) + ) return dataset_index, dataset_sample_index self.dataset_index, self.dataset_sample_index = _build_indices() @@ -122,7 +167,12 @@ def _build_indices(): for i in range(self.num_datasets): self.desc += dataset_builders[i].prefix + "," - self.desc += f"-{self.num_samples}" + f"-{dataset_builders[0].seq_length}" + f"{dataset_builders[0].seed}" + self.desc += ( + f"-{self.num_samples}" + + f"-{dataset_builders[0].seq_length}" + + f"{dataset_builders[0].seed}" + ) + def __len__(self): return self.num_samples @@ -135,227 +185,340 @@ def __getitem__(self, idx): return self.dataset_builders[i].dataset[j] else: return self.dataset_builders[i].Build()[j] - - # Predetermine whether need to build the specific dataset or not. + # Predetermine whether need to build the specific dataset or not. start_time = time.time() - print_rank_0(" >>> Started building datasets in distributed way ... ") + log.debug(" >>> Started building datasets in distributed way ... ") a, b, c = [int(d) for d in splits_string.split(",")] - + train_datasets = [] valid_datasets = [] test_datasets = [] # Build individual datasets. @dlp.log - def build_corpus_datasets(dataset_type='train'): + def build_corpus_datasets(dataset_type="train"): start_time = time.time() - print_rank_0(f" >>> Building {dataset_type} corpus datasets ...") + log.debug(f" >>> Building {dataset_type} corpus datasets ...") datasets = [] corpus_builders = {} corpus_weights = {} for c in corpus_list: corpus_builders[c] = [] corpus_weights[c] = 0.0 - dataset_builders = [DatasetBuilder(prefixes[i], corpuses[i], data_impl, splits_string, - datasets_train_valid_test_num_samples[i], - seq_length, seed, skip_warmup, - return_doc_ids,data_cache_path, dataset_type) for i in range(len(weights))] - for i in range(torch.distributed.get_rank()//mpu.get_tensor_model_parallel_world_size(), len(weights), torch.distributed.get_world_size()//mpu.get_tensor_model_parallel_world_size()): + dataset_builders = [ + DatasetBuilder( + prefixes[i], + corpuses[i], + data_impl, + splits_string, + datasets_train_valid_test_num_samples[i], + seq_length, + seed, + skip_warmup, + return_doc_ids, + data_cache_path, + dataset_type, + ) + for i in range(len(weights)) + ] + for i in range( + torch.distributed.get_rank() + // mpu.get_tensor_model_parallel_world_size(), + len(weights), + torch.distributed.get_world_size() + // mpu.get_tensor_model_parallel_world_size(), + ): dataset_builders[i].Build() - print_rank_0(f" >>> Finished building individual datasets in {time.time() - start_time} seconds") + log.debug( + f" >>> Finished building individual datasets in {time.time() - start_time} seconds" + ) start_concating_time = time.time() for i, d in zip(range(len(weights)), dataset_builders): corpus_builders[d.corpus].append(d) corpus_weights[d.corpus] += weights[i] total = 0 - print_rank_0(" > number of samples for each corpus ") - corpus_weights_achieved={} + log.debug(" > number of samples for each corpus ") + corpus_weights_achieved = {} for c in corpus_list: datasets.append(BuildConcatDataset(corpus_builders[c])) total += datasets[-1].num_samples - corpus_weights_achieved[c] = float(datasets[-1].num_samples)/train_num_samples - print_rank_0(f" {c}: {datasets[-1].num_samples} w={corpus_weights_achieved[c]} (expected: {corpus_weights[c]})") - - print_rank_0(f" > total number of samples: {total}") - print_rank_0(f" >>> Finished concatenating datasets in {time.time() - start_concating_time} seconds") - print_rank_0(f" >>> Finished building {dataset_type} corpus datasets in {time.time() - start_time} seconds") + corpus_weights_achieved[c] = ( + float(datasets[-1].num_samples) / train_num_samples + ) + log.debug( + f" {c}: {datasets[-1].num_samples} w={corpus_weights_achieved[c]} (expected: {corpus_weights[c]})" + ) + + log.debug(f" > total number of samples: {total}") + log.debug( + f" >>> Finished concatenating datasets in {time.time() - start_concating_time} seconds" + ) + log.debug( + f" >>> Finished building {dataset_type} corpus datasets in {time.time() - start_time} seconds" + ) return datasets, [corpus_weights_achieved[c] for c in corpus_list] + train_weights = None if a > 0: - train_datasets, train_weights = build_corpus_datasets('train') - + train_datasets, train_weights = build_corpus_datasets("train") + valid_weights = None if b > 0: - valid_datasets, valid_weights = build_corpus_datasets('valid') - - if c > 0: - test_datasets, test_weights = build_corpus_datasets('test') + valid_datasets, valid_weights = build_corpus_datasets("valid") + test_weights = None + if c > 0: + test_datasets, test_weights = build_corpus_datasets("test") # This barrier is critical to make sure that all the datasets are built once # and the metadata were written to the cache folder before other ranks touch them - print_rank_0(f" >>> Rank 0 - finished building datasets in {time.time() - start_time} seconds") + log.debug( + f" >>> Rank 0 - finished building datasets in {time.time() - start_time} seconds" + ) torch.distributed.barrier(group=mpu.get_data_parallel_group()) torch.distributed.barrier(group=mpu.get_pipeline_model_parallel_group()) torch.distributed.barrier(group=mpu.get_data_parallel_group()) - print_rank_0(f" >>> Finished building datasets (all ranks) in distributed way in {time.time() - start_time} seconds") - print_rank_0(f" >>> Starting to build BlendableDataset") + log.debug( + f" >>> Finished building datasets (all ranks) in distributed way in {time.time() - start_time} seconds" + ) + log.debug(" >>> Starting to build BlendableDataset") # Blend. start_time = time.time() blending_train_dataset = None - if train_datasets: - blending_train_dataset = BlendableDataset(train_datasets, train_weights, train_num_samples, - data_cache_path=data_cache_path) + if train_datasets and train_weights: + blending_train_dataset = BlendableDataset( + train_datasets, + train_weights, + train_num_samples, + data_cache_path=data_cache_path, + ) blending_valid_dataset = None - if valid_datasets: - blending_valid_dataset = BlendableDataset(valid_datasets, valid_weights, valid_num_samples, - data_cache_path=data_cache_path) + if valid_datasets and valid_weights: + blending_valid_dataset = BlendableDataset( + valid_datasets, + valid_weights, + valid_num_samples, + data_cache_path=data_cache_path, + ) blending_test_dataset = None - if test_datasets: - blending_test_dataset = BlendableDataset(test_datasets, test_weights, test_num_samples, - data_cache_path=data_cache_path) + if test_datasets and test_weights: + blending_test_dataset = BlendableDataset( + test_datasets, + test_weights, + test_num_samples, + data_cache_path=data_cache_path, + ) end_time = time.time() - print_rank_0(f" >>> Finished building BlendableDataset in {end_time - start_time} seconds") - return (blending_train_dataset, blending_valid_dataset, - blending_test_dataset) + log.debug( + f" >>> Finished building BlendableDataset in {end_time - start_time} seconds" + ) + return (blending_train_dataset, blending_valid_dataset, blending_test_dataset) else: - print_rank_0("Separate data paths provided for train, valid & test. Split string will be ignored.") + log.debug( + "Separate data paths provided for train, valid & test. Split string will be ignored." + ) train_dataset, valid_dataset, test_dataset = None, None, None # Single dataset. if train_data_prefix is not None: - train_dataset = build_dataset("train", train_data_prefix, data_impl, - splits_string, - train_valid_test_num_samples[0], - seq_length, seed, skip_warmup, - data_cache_path=data_cache_path) + train_dataset = build_dataset( + "train", + train_data_prefix, + data_impl, + splits_string, + train_valid_test_num_samples[0], + seq_length, + seed, + skip_warmup, + data_cache_path=data_cache_path, + ) if valid_data_prefix is not None: - valid_dataset = build_dataset("valid", valid_data_prefix, data_impl, - splits_string, - train_valid_test_num_samples[1], - seq_length, seed, False, - data_cache_path=data_cache_path) - + valid_dataset = build_dataset( + "valid", + valid_data_prefix, + data_impl, + splits_string, + train_valid_test_num_samples[1], + seq_length, + seed, + False, + data_cache_path=data_cache_path, + ) if test_data_prefix is not None: - test_dataset = build_dataset("test", test_data_prefix, data_impl, - splits_string, - train_valid_test_num_samples[2], - seq_length, seed, False, - data_cache_path=data_cache_path) + test_dataset = build_dataset( + "test", + test_data_prefix, + data_impl, + splits_string, + train_valid_test_num_samples[2], + seq_length, + seed, + False, + data_cache_path=data_cache_path, + ) return (train_dataset, valid_dataset, test_dataset) + @dlp.log -def _build_train_valid_test_datasets(data_prefix, data_impl, splits_string, - train_valid_test_num_samples, - seq_length, seed, skip_warmup, - return_doc_ids=False, *, - data_cache_path=None): +def _build_train_valid_test_datasets( + data_prefix, + data_impl, + splits_string, + train_valid_test_num_samples, + seq_length, + seed, + skip_warmup, + return_doc_ids=False, + *, + data_cache_path=None, +): """Build train, valid, and test datasets.""" # Indexed dataset. - indexed_dataset = get_indexed_dataset_(data_prefix, - data_impl, - skip_warmup) + indexed_dataset = get_indexed_dataset_(data_prefix, data_impl, skip_warmup) total_num_of_documents = indexed_dataset.sizes.shape[0] splits = get_train_valid_test_split_(splits_string, total_num_of_documents) # Print stats about the splits. - print_rank_0(' > dataset split:') + log.debug(" > dataset split:") def print_split_stats(name, index): - print_rank_0(' {}:'.format(name)) - print_rank_0(' document indices in [{}, {}) total of {} ' - 'documents'.format(splits[index], splits[index + 1], - splits[index + 1] - splits[index])) - print_split_stats('train', 0) - print_split_stats('validation', 1) - print_split_stats('test', 2) + log.debug(" {}:".format(name)) + log.debug( + " document indices in [{}, {}) total of {} " "documents".format( + splits[index], splits[index + 1], splits[index + 1] - splits[index] + ) + ) + + print_split_stats("train", 0) + print_split_stats("validation", 1) + print_split_stats("test", 2) def build_dataset(index, name): dataset = None if splits[index + 1] > splits[index]: - documents = np.arange(start=splits[index], stop=splits[index + 1], - step=1, dtype=np.int32) - dataset = GPTDataset(name, data_prefix, documents, indexed_dataset, - splits_string, - train_valid_test_num_samples[index], - seq_length, seed, - return_doc_ids, - data_cache_path=data_cache_path) + documents = np.arange( + start=splits[index], stop=splits[index + 1], step=1, dtype=np.int32 + ) + dataset = GPTDataset( + name, + data_prefix, + documents, + indexed_dataset, + splits_string, + train_valid_test_num_samples[index], + seq_length, + seed, + return_doc_ids, + data_cache_path=data_cache_path, + ) return dataset - train_dataset = build_dataset(0, 'train') - valid_dataset = build_dataset(1, 'valid') - test_dataset = build_dataset(2, 'test') + train_dataset = build_dataset(0, "train") + valid_dataset = build_dataset(1, "valid") + test_dataset = build_dataset(2, "test") return (train_dataset, valid_dataset, test_dataset) + @dlp.log -def _build_train_valid_test_datasets_single(data_prefix, data_impl, splits_string, - train_valid_test_num_samples, - seq_length, seed, skip_warmup, name, - return_doc_ids=False, *, - data_cache_path=None): +def _build_train_valid_test_datasets_single( + data_prefix, + data_impl, + splits_string, + train_valid_test_num_samples, + seq_length, + seed, + skip_warmup, + name, + return_doc_ids=False, + *, + data_cache_path=None, +): """Build train, valid, and test datasets.""" # Each rank print out information - print_rank_0(f" >> building dataset for {data_prefix}") + log.debug(f" >> building dataset for {data_prefix}") # Indexed dataset. - indexed_dataset = get_indexed_dataset_(data_prefix, - data_impl, - skip_warmup) + indexed_dataset = get_indexed_dataset_(data_prefix, data_impl, skip_warmup) total_num_of_documents = indexed_dataset.sizes.shape[0] splits = get_train_valid_test_split_(splits_string, total_num_of_documents) # Print stats about the splits. - print_rank_0(' > dataset split:') + log.debug(" > dataset split:") def print_split_stats(name, index): - print_rank_0(' {}:'.format(name)) - print_rank_0(' document indices in [{}, {}) total of {} ' - 'documents'.format(splits[index], splits[index + 1], - splits[index + 1] - splits[index])) - print_split_stats('train', 0) - print_split_stats('validation', 1) - print_split_stats('test', 2) + log.debug(" {}:".format(name)) + log.debug( + " document indices in [{}, {}) total of {} " "documents".format( + splits[index], splits[index + 1], splits[index + 1] - splits[index] + ) + ) + + print_split_stats("train", 0) + print_split_stats("validation", 1) + print_split_stats("test", 2) def build_dataset(index, name): dataset = None if splits[index + 1] > splits[index]: - documents = np.arange(start=splits[index], stop=splits[index + 1], - step=1, dtype=np.int32) - dataset = GPTDataset(name, data_prefix, documents, indexed_dataset, - splits_string, - train_valid_test_num_samples[index], - seq_length, seed, - return_doc_ids, - data_cache_path=data_cache_path) + documents = np.arange( + start=splits[index], stop=splits[index + 1], step=1, dtype=np.int32 + ) + dataset = GPTDataset( + name, + data_prefix, + documents, + indexed_dataset, + splits_string, + train_valid_test_num_samples[index], + seq_length, + seed, + return_doc_ids, + data_cache_path=data_cache_path, + ) return dataset - if name.find("train")!=-1: - return build_dataset(0, 'train') - if name.find("valid")!=-1: - return build_dataset(1, 'valid') - if name.find("test")!=-1: - return build_dataset(2, 'test') + + if name.find("train") != -1: + return build_dataset(0, "train") + if name.find("valid") != -1: + return build_dataset(1, "valid") + if name.find("test") != -1: + return build_dataset(2, "test") + @dlp.log -def build_dataset(dataset_name, data_prefix, data_impl, - splits_string, num_samples, - seq_length, seed, skip_warmup, - *, - data_cache_path=None): +def build_dataset( + dataset_name, + data_prefix, + data_impl, + splits_string, + num_samples, + seq_length, + seed, + skip_warmup, + *, + data_cache_path=None, +): dataset = None if len(data_prefix) == 1: - dataset = _build_dataset(dataset_name, data_prefix[0], data_impl, - splits_string, num_samples, seq_length, - seed, skip_warmup, - data_cache_path=data_cache_path) + dataset = _build_dataset( + dataset_name, + data_prefix[0], + data_impl, + splits_string, + num_samples, + seq_length, + seed, + skip_warmup, + data_cache_path=data_cache_path, + ) else: # Blending dataset. # Parse the values. @@ -366,73 +529,108 @@ def build_dataset(dataset_name, data_prefix, data_impl, # Build individual datasets. datasets = [] for i in range(len(prefixes)): - ds = _build_dataset(dataset_name, prefixes[i], data_impl, - splits_string, dataset_num_samples[i], - seq_length, seed, skip_warmup, - data_cache_path=data_cache_path) + ds = _build_dataset( + dataset_name, + prefixes[i], + data_impl, + splits_string, + dataset_num_samples[i], + seq_length, + seed, + skip_warmup, + data_cache_path=data_cache_path, + ) if ds: datasets.append(ds) if datasets: - dataset = BlendableDataset(datasets, weights, num_samples, - data_cache_path=data_cache_path) + dataset = BlendableDataset( + datasets, weights, num_samples, data_cache_path=data_cache_path + ) return dataset + @dlp.log -def _build_dataset(dataset_name, data_prefix, data_impl, splits_string, - num_samples, seq_length, seed, skip_warmup, - *, - data_cache_path=None): +def _build_dataset( + dataset_name, + data_prefix, + data_impl, + splits_string, + num_samples, + seq_length, + seed, + skip_warmup, + *, + data_cache_path=None, +): """ Build dataset. This method is called when individual train, valid, test datasets are provided """ # Indexed dataset. - indexed_dataset = get_indexed_dataset_(data_prefix, - data_impl, - skip_warmup) + indexed_dataset = get_indexed_dataset_(data_prefix, data_impl, skip_warmup) total_num_of_documents = indexed_dataset.sizes.shape[0] - print_rank_0(' {}:'.format(dataset_name)) - print_rank_0(' document indices in [0, {}) total of {} ' - 'documents'.format(total_num_of_documents, total_num_of_documents)) - - documents = np.arange(start=0, stop=total_num_of_documents, - step=1, dtype=np.int32) - - dataset = GPTDataset(dataset_name, data_prefix, documents, indexed_dataset, - splits_string, num_samples, seq_length, seed, - data_cache_path=data_cache_path) + log.debug(" {}:".format(dataset_name)) + log.debug( + " document indices in [0, {}) total of {} " "documents".format( + total_num_of_documents, total_num_of_documents + ) + ) + + documents = np.arange(start=0, stop=total_num_of_documents, step=1, dtype=np.int32) + + dataset = GPTDataset( + dataset_name, + data_prefix, + documents, + indexed_dataset, + splits_string, + num_samples, + seq_length, + seed, + data_cache_path=data_cache_path, + ) return dataset + @dlp.log def get_indexed_dataset_(data_prefix, data_impl, skip_warmup): """Build indexed dataset.""" - print_rank_0(' > building dataset index ...') + log.debug(" > building dataset index ...") start_time = time.time() - indexed_dataset = make_indexed_dataset(data_prefix, - data_impl, - skip_warmup) - print_rank_0(' > finished creating indexed dataset in {:4f} ' - 'seconds'.format(time.time() - start_time)) - print_rank_0(' number of documents: {}'.format( - indexed_dataset.sizes.shape[0])) + indexed_dataset = make_indexed_dataset(data_prefix, data_impl, skip_warmup) + log.debug( + " > finished creating indexed dataset in {:4f} " "seconds".format( + time.time() - start_time + ) + ) + log.debug(" number of documents: {}".format(indexed_dataset.sizes.shape[0])) return indexed_dataset class GPTDataset(torch.utils.data.Dataset): @dlp.log - def __init__(self, name, data_prefix, documents, indexed_dataset, - splits_string, num_samples, seq_length, seed, - return_doc_ids=False, *, - data_cache_path=None): - + def __init__( + self, + name, + data_prefix, + documents, + indexed_dataset, + splits_string, + num_samples, + seq_length, + seed, + return_doc_ids=False, + *, + data_cache_path=None, + ): self.name = name self.indexed_dataset = indexed_dataset self.return_doc_ids = return_doc_ids @@ -442,20 +640,29 @@ def __init__(self, name, data_prefix, documents, indexed_dataset, assert np.max(documents) < indexed_dataset.sizes.shape[0] # Build index mappings. - self.doc_idx, self.sample_idx, self.shuffle_idx, self.desc, self.desc_hash = \ - _build_index_mappings(self.name, data_prefix, - documents, self.indexed_dataset.sizes, - splits_string, num_samples, seq_length, seed, - data_cache_path=data_cache_path) - + self.doc_idx, self.sample_idx, self.shuffle_idx, self.desc, self.desc_hash = ( + _build_index_mappings( + self.name, + data_prefix, + documents, + self.indexed_dataset.sizes, + splits_string, + num_samples, + seq_length, + seed, + data_cache_path=data_cache_path, + ) + ) def __len__(self): # -1 is due to data structure used to retieve the index: # sample i --> [sample_idx[i], sample_idx[i+1]) return self.sample_idx.shape[0] - 1 + @dlp.log def __getitem__(self, idx): args = get_args() + assert args is not None orig_idx = idx # Get the shuffled index. try: @@ -464,21 +671,24 @@ def __getitem__(self, idx): if is_rank_0(): import json from rich import print_json + print(exc) print( - '\n'.join( - ['-------------------------------------------------', - f'Trying to access {idx=} from self.shuffle_idx,', - f'but {len(self.shuffle_idx)=}', - '-------------------------------------------------'] + "\n".join( + [ + "-------------------------------------------------", + f"Trying to access {idx=} from self.shuffle_idx,", + f"but {len(self.shuffle_idx)=}", + "-------------------------------------------------", + ] ) ) print_json( json.dumps( { - 'doc_idx': len(self.doc_idx), - 'sample_idx': len(self.sample_idx), - 'shuffle_idx': len(self.shuffle_idx), + "doc_idx": len(self.doc_idx), + "sample_idx": len(self.sample_idx), + "shuffle_idx": len(self.shuffle_idx), }, indent=4, ) @@ -492,45 +702,57 @@ def __getitem__(self, idx): doc_ids = [] if doc_index_f == doc_index_l: doc_ids.append(self.doc_idx[doc_index_f]) - sample = self.indexed_dataset.get(self.doc_idx[doc_index_f], - offset=offset_f, - length=offset_l - offset_f + 1) + sample = self.indexed_dataset.get( + self.doc_idx[doc_index_f], + offset=offset_f, + length=offset_l - offset_f + 1, + ) else: # Otherwise, get the rest of the initial document. doc_ids.append(self.doc_idx[doc_index_f]) - sample_list = [self.indexed_dataset.get(self.doc_idx[doc_index_f], - offset=offset_f)] + sample_list = [ + self.indexed_dataset.get(self.doc_idx[doc_index_f], offset=offset_f) + ] # Loop over all in between documents and add the entire document. for i in range(doc_index_f + 1, doc_index_l): doc_ids.append(self.doc_idx[i]) sample_list.append(self.indexed_dataset.get(self.doc_idx[i])) # And finally add the relevant portion of last document. doc_ids.append(self.doc_idx[doc_index_l]) - sample_list.append(self.indexed_dataset.get( - self.doc_idx[doc_index_l], - length=offset_l + 1)) + sample_list.append( + self.indexed_dataset.get(self.doc_idx[doc_index_l], length=offset_l + 1) + ) sample = np.concatenate(sample_list) - text_name = 'text' + text_name = "text" if args.use_dataset_only: - text_name = 'input_ids' + text_name = "input_ids" sample_dict = {text_name: np.array(sample, dtype=np.int64)} if args.return_data_index: - sample_dict.update({'index': np.array([orig_idx], dtype=np.int64)}) + sample_dict.update({"index": np.array([orig_idx], dtype=np.int64)}) - if self.return_doc_ids: # for retro preprocessing - sample_dict.update({'doc_ids': np.array(doc_ids, dtype=np.int64)}) + if self.return_doc_ids: # for retro preprocessing + sample_dict.update({"doc_ids": np.array(doc_ids, dtype=np.int64)}) if args.use_dataset_only: - sample_dict.update({'labels': np.array(sample, dtype=np.int64)}) + sample_dict.update({"labels": np.array(sample, dtype=np.int64)}) return sample_dict + @dlp.log -def _build_index_mappings(name, data_prefix, documents, sizes, - splits_string, num_samples, seq_length, seed, - *, - data_cache_path): +def _build_index_mappings( + name, + data_prefix, + documents, + sizes, + splits_string, + num_samples, + seq_length, + seed, + *, + data_cache_path, +): """Build doc-idx, sample-idx, and shuffle-idx. doc-idx: is an array (ordered) of documents to be used in training. sample-idx: is the start document index and document offset for each @@ -538,10 +760,11 @@ def _build_index_mappings(name, data_prefix, documents, sizes, shuffle-idx: maps the sample index into a random index into sample-idx. """ args = get_args() + assert args is not None # Number of tokens in each epoch and number of required epochs. tokens_per_epoch = _num_tokens(documents, sizes) num_epochs = _num_epochs(tokens_per_epoch, seq_length, num_samples) - if args.train_data_exact_num_epochs is not None and name == 'train': + if args.train_data_exact_num_epochs is not None and name == "train": num_epochs = args.train_data_exact_num_epochs # rng state @@ -556,13 +779,13 @@ def _build_index_mappings(name, data_prefix, documents, sizes, desc += f"Sequence length {seq_length}\n" desc += f"Random seed {seed}\n" desc += f"Split {splits_string}\n" - desc_hash = hashlib.md5(desc.encode('utf-8')).hexdigest() + desc_hash = hashlib.md5(desc.encode("utf-8")).hexdigest() desc_filename = desc_hash + ".dsc" - doc_idx_filename = desc_hash + '_doc_idx.npy' - sample_idx_filename = desc_hash + '_sample_idx.npy' - shuffle_idx_filename = desc_hash + '_shuffle_idx.npy' + doc_idx_filename = desc_hash + "_doc_idx.npy" + sample_idx_filename = desc_hash + "_sample_idx.npy" + shuffle_idx_filename = desc_hash + "_shuffle_idx.npy" - if name == 'train': + if name == "train": # force to use certain index files if args.train_desc_path is not None: desc_filename = args.train_desc_path @@ -577,15 +800,15 @@ def _build_index_mappings(name, data_prefix, documents, sizes, # duplication, then look in data-cache-path if specified, # If nothing is found, use the last path looked in build_indices = True - prefixes = [os.path.join(os.path.dirname(data_prefix), 'index-cache')] + prefixes = [os.path.join(os.path.dirname(data_prefix), "index-cache")] if data_cache_path is not None: prefixes.append(data_cache_path) for prefix in prefixes: idx_path = { - 'desc': os.path.join(prefix, desc_filename), - 'doc': os.path.join(prefix, doc_idx_filename), - 'sample': os.path.join(prefix, sample_idx_filename), - 'shuffle': os.path.join(prefix, shuffle_idx_filename) + "desc": os.path.join(prefix, desc_filename), + "doc": os.path.join(prefix, doc_idx_filename), + "sample": os.path.join(prefix, sample_idx_filename), + "shuffle": os.path.join(prefix, shuffle_idx_filename), } for f in idx_path.values(): if not os.path.isfile(f): @@ -594,15 +817,17 @@ def _build_index_mappings(name, data_prefix, documents, sizes, # Found our files! build_indices = False break - data_cache_dir = os.path.dirname(idx_path['desc']) + data_cache_dir = os.path.dirname(idx_path["desc"]) data_cache_success = True # Build the indexed mapping if not exist. if build_indices: - # Since this function will be called by all the rank in the very beginning. Therefore, we assume that all the - # ranks will first create the document files, and then read it. + # Since this function will be called by all the rank in the very beginning. Therefore, we assume that all the + # ranks will first create the document files, and then read it. # There will not be contension effects going on either - print_rank_0(f" > WARNING: could not find index map files, building on rank {torch.distributed.get_rank()}") + log.warning( + f" > WARNING: could not find index map files, building on rank {torch.distributed.get_rank()}" + ) # For the last epoch, decide whether include the entire epoch # in the global shuffle or not. @@ -611,64 +836,80 @@ def _build_index_mappings(name, data_prefix, documents, sizes, # not mean anything. if num_epochs == 1: separate_last_epoch = False - print_rank_0(' > only one epoch required, setting ' - 'separate_last_epoch to False') + log.debug( + " > only one epoch required, setting " "separate_last_epoch to False" + ) else: # Get the number of samples for the last epoch num_samples_from_epochs_minus_one = ( - (num_epochs - 1) * tokens_per_epoch - 1) // seq_length - last_epoch_num_samples = num_samples - \ - num_samples_from_epochs_minus_one - assert last_epoch_num_samples >= 0, \ - 'last epoch number of samples should be non-negative.' + (num_epochs - 1) * tokens_per_epoch - 1 + ) // seq_length + last_epoch_num_samples = num_samples - num_samples_from_epochs_minus_one + assert ( + last_epoch_num_samples >= 0 + ), "last epoch number of samples should be non-negative." num_samples_per_epoch = (tokens_per_epoch - 1) // seq_length - assert last_epoch_num_samples <= (num_samples_per_epoch + 1), \ - 'last epoch number of samples exceeded max value.' + assert last_epoch_num_samples <= ( + num_samples_per_epoch + 1 + ), "last epoch number of samples exceeded max value." # If we have less than 80% of the samples for the last epoch, # seperate out the epoch and treat it differently. # Note: the 80% number is just based on common sense and can # be adjusted if needed. - separate_last_epoch = (last_epoch_num_samples < - int(0.80 * num_samples_per_epoch)) + separate_last_epoch = last_epoch_num_samples < int( + 0.80 * num_samples_per_epoch + ) if separate_last_epoch: - string = ' > last epoch number of samples ({}) is smaller '\ - 'than 80% of number of samples per epoch ({}), '\ - 'setting separate_last_epoch to True' + string = ( + " > last epoch number of samples ({}) is smaller " + "than 80% of number of samples per epoch ({}), " + "setting separate_last_epoch to True" + ) else: - string = ' > last epoch number of samples ({}) is larger '\ - 'than 80% of number of samples per epoch ({}), '\ - 'setting separate_last_epoch to False' - print_rank_0(string.format(last_epoch_num_samples, - num_samples_per_epoch)) - + string = ( + " > last epoch number of samples ({}) is larger " + "than 80% of number of samples per epoch ({}), " + "setting separate_last_epoch to False" + ) + log.debug(string.format(last_epoch_num_samples, num_samples_per_epoch)) try: os.makedirs(data_cache_dir, exist_ok=True) # description - with open(idx_path['desc'], 'wt') as fd: + with open(idx_path["desc"], "wt") as fd: fd.write(desc) # doc-idx. start_time = time.time() - doc_idx = _build_doc_idx(documents, num_epochs, np_rng, - separate_last_epoch) - np.save(idx_path['doc'], doc_idx, allow_pickle=True) - print_rank_0(' > elasped time to build and save doc-idx mapping ' - '(seconds): {:4f}'.format(time.time() - start_time)) + doc_idx = _build_doc_idx(documents, num_epochs, np_rng, separate_last_epoch) + np.save(idx_path["doc"], doc_idx, allow_pickle=True) + log.debug( + " > elasped time to build and save doc-idx mapping " + "(seconds): {:4f}".format(time.time() - start_time) + ) # sample-idx. start_time = time.time() # Use C++ implementation for speed. # First compile and then import. from megatron.data import helpers + assert doc_idx.dtype == np.int32 assert sizes.dtype == np.int32 - sample_idx = helpers.build_sample_idx(sizes, doc_idx, seq_length, - num_epochs, tokens_per_epoch, torch.distributed.get_rank()==0) - np.save(idx_path['sample'], sample_idx, allow_pickle=True) - print_rank_0(' > elasped time to build and save sample-idx mapping ' - '(seconds): {:4f}'.format(time.time() - start_time)) + sample_idx = helpers.build_sample_idx( + sizes, + doc_idx, + seq_length, + num_epochs, + tokens_per_epoch, + torch.distributed.get_rank() == 0, + ) + np.save(idx_path["sample"], sample_idx, allow_pickle=True) + log.debug( + " > elasped time to build and save sample-idx mapping " + "(seconds): {:4f}".format(time.time() - start_time) + ) # shuffle-idx. start_time = time.time() # -1 is due to data structure used to retieve the index: @@ -677,35 +918,46 @@ def _build_index_mappings(name, data_prefix, documents, sizes, num_samples_ = num_samples_from_epochs_minus_one else: num_samples_ = sample_idx.shape[0] - 1 - shuffle_idx = _build_shuffle_idx(num_samples_, - sample_idx.shape[0] - 1, np_rng) - np.save(idx_path['shuffle'], shuffle_idx, allow_pickle=True) - print_rank_0(' > elasped time to build and save shuffle-idx mapping' - ' (seconds): {:4f}'.format(time.time() - start_time)) + shuffle_idx = _build_shuffle_idx( + num_samples_, sample_idx.shape[0] - 1, np_rng + ) + np.save(idx_path["shuffle"], shuffle_idx, allow_pickle=True) + log.debug( + " > elasped time to build and save shuffle-idx mapping" + " (seconds): {:4f}".format(time.time() - start_time) + ) except OSError: - print(f'There was an error trying to create the data cache directory ({data_cache_dir})') - print('or a file in it. This defaults to a directory "index-cache" within the directory') - print('the data files are in and can be set with the --data-cache-path argument. Please') - print('ensure you have write access to this directory or specify one that you do have') - print('write access to.') + print( + f"There was an error trying to create the data cache directory ({data_cache_dir})" + ) + print( + 'or a file in it. This defaults to a directory "index-cache" within the directory' + ) + print( + "the data files are in and can be set with the --data-cache-path argument. Please" + ) + print( + "ensure you have write access to this directory or specify one that you do have" + ) + print("write access to.") data_cache_success = False # Load mappings. start_time = time.time() - print_rank_0(f" > loading doc-idx mapping from {idx_path['doc']}") - doc_idx = np.load(idx_path['doc'], allow_pickle=True, mmap_mode='r') + log.debug(f" > loading doc-idx mapping from {idx_path['doc']}") + doc_idx = np.load(idx_path["doc"], allow_pickle=True, mmap_mode="r") - print_rank_0(f" > loading sample-idx mapping from {idx_path['sample']}") - sample_idx = np.load(idx_path['sample'], allow_pickle=True, mmap_mode='r') + log.debug(f" > loading sample-idx mapping from {idx_path['sample']}") + sample_idx = np.load(idx_path["sample"], allow_pickle=True, mmap_mode="r") - print_rank_0(f" > loading shuffle-idx mapping from {idx_path['shuffle']}") - shuffle_idx = np.load(idx_path['shuffle'], allow_pickle=True, mmap_mode='r') + log.debug(f" > loading shuffle-idx mapping from {idx_path['shuffle']}") + shuffle_idx = np.load(idx_path["shuffle"], allow_pickle=True, mmap_mode="r") - print_rank_0(' loaded indexed file in {:3.3f} seconds'.format( - time.time() - start_time)) - print_rank_0(' total number of samples: {}'.format( - sample_idx.shape[0])) - print_rank_0(' total number of epochs: {}'.format(num_epochs)) + log.debug( + " loaded indexed file in {:3.3f} seconds".format(time.time() - start_time) + ) + log.debug(" total number of samples: {}".format(sample_idx.shape[0])) + log.debug(" total number of epochs: {}".format(num_epochs)) return doc_idx, sample_idx, shuffle_idx, desc, desc_hash @@ -729,25 +981,26 @@ def _num_epochs(tokens_per_epoch, seq_length, num_samples): if ((total_tokens - 1) // seq_length) >= num_samples: return num_epochs + @dlp.log def _build_doc_idx(documents, num_epochs, np_rng, separate_last_epoch): """Build an array with length = number-of-epochs * number-of-dcuments. Each index is mapped to a corresponding document.""" if not separate_last_epoch or num_epochs == 1: - doc_idx = np.mgrid[0:num_epochs, 0:len(documents)][1] + doc_idx = np.mgrid[0:num_epochs, 0 : len(documents)][1] doc_idx[:] = documents doc_idx = doc_idx.reshape(-1) doc_idx = doc_idx.astype(np.int32) np_rng.shuffle(doc_idx) return doc_idx - doc_idx_first = _build_doc_idx(documents, num_epochs-1, np_rng, False) + doc_idx_first = _build_doc_idx(documents, num_epochs - 1, np_rng, False) doc_idx_last = _build_doc_idx(documents, 1, np_rng, False) return np.concatenate((doc_idx_first, doc_idx_last)) + @dlp.log -def _build_sample_idx(sizes, doc_idx, seq_length, - num_epochs, tokens_per_epoch): +def _build_sample_idx(sizes, doc_idx, seq_length, num_epochs, tokens_per_epoch): """Sample index mapping is a 2D array with sizes [number-of-samples + 1, 2] where [..., 0] contains the index into `doc_idx` and [..., 1] is the @@ -781,7 +1034,7 @@ def _build_sample_idx(sizes, doc_idx, seq_length, # Note that -1 here is for the same reason we have -1 in # `_num_epochs` calculations. if remaining_seq_length <= 0: - doc_offset += (remaining_seq_length + doc_length - 1) + doc_offset += remaining_seq_length + doc_length - 1 remaining_seq_length = 0 else: # Otherwise, start from the begining of the next document. @@ -794,24 +1047,28 @@ def _build_sample_idx(sizes, doc_idx, seq_length, return sample_idx + @dlp.log def _build_shuffle_idx(num_samples, total_size, np_rng): """Build the range [0, size) and shuffle.""" - print_rank_0(' > building shuffle index with split [0, {}) and [{}, {}) ' - '...'.format(num_samples, num_samples, total_size)) + log.debug( + " > building shuffle index with split [0, {}) and [{}, {}) " "...".format( + num_samples, num_samples, total_size + ) + ) dtype_ = np.uint32 if total_size >= (np.iinfo(np.uint32).max - 1): dtype_ = np.int64 - shuffle_idx_first = np.arange(start=0, stop=num_samples, - step=1, dtype=dtype_) + shuffle_idx_first = np.arange(start=0, stop=num_samples, step=1, dtype=dtype_) np_rng.shuffle(shuffle_idx_first) if num_samples == total_size: return shuffle_idx_first - shuffle_idx_last = np.arange(start=num_samples, stop=total_size, - step=1, dtype=dtype_) + shuffle_idx_last = np.arange( + start=num_samples, stop=total_size, step=1, dtype=dtype_ + ) np_rng.shuffle(shuffle_idx_last) return np.concatenate((shuffle_idx_first, shuffle_idx_last)) diff --git a/megatron/data/indexed_dataset.py b/megatron/data/indexed_dataset.py index 1eb9b7842b..8479f33fab 100644 --- a/megatron/data/indexed_dataset.py +++ b/megatron/data/indexed_dataset.py @@ -15,17 +15,24 @@ from functools import lru_cache import os + +# import logging import shutil import struct from itertools import accumulate import numpy as np import torch -from megatron import print_rank_0 -from megatron.utils import Profile + +# from megatron import print_rank_0 +from megatron.utils import Profile, get_logger + +log = get_logger(__name__) + dlp = Profile("DATASET") + def __best_fitting_dtype(vocab_size=None): if vocab_size is not None and vocab_size < 65500: return np.uint16 @@ -34,28 +41,32 @@ def __best_fitting_dtype(vocab_size=None): def get_available_dataset_impl(): - return ['lazy', 'cached', 'mmap'] + return ["lazy", "cached", "mmap"] def infer_dataset_impl(path): if IndexedDataset.exists(path): - with open(index_file_path(path), 'rb') as f: + with open(index_file_path(path), "rb") as f: magic = f.read(8) if magic == IndexedDataset._HDR_MAGIC: - return 'cached' + return "cached" elif magic == MMapIndexedDataset.Index._HDR_MAGIC[:8]: - return 'mmap' + return "mmap" else: return None else: print(f"Dataset does not exist: {path}") - print("Path should be a basename that both .idx and .bin can be appended to get full filenames.") + print( + "Path should be a basename that both .idx and .bin can be appended to get full filenames." + ) return None def make_builder(out_file, impl, vocab_size=None): - if impl == 'mmap': - return MMapIndexedDatasetBuilder(out_file, dtype=__best_fitting_dtype(vocab_size)) + if impl == "mmap": + return MMapIndexedDatasetBuilder( + out_file, dtype=__best_fitting_dtype(vocab_size) + ) else: return IndexedDatasetBuilder(out_file) @@ -63,22 +74,24 @@ def make_builder(out_file, impl, vocab_size=None): def make_dataset(path, impl, skip_warmup=False): if not IndexedDataset.exists(path): print(f"Dataset does not exist: {path}") - print("Path should be a basename that both .idx and .bin can be appended to get full filenames.") + print( + "Path should be a basename that both .idx and .bin can be appended to get full filenames." + ) return None - if impl == 'infer': + if impl == "infer": impl = infer_dataset_impl(path) - if impl == 'lazy' and IndexedDataset.exists(path): + if impl == "lazy" and IndexedDataset.exists(path): return IndexedDataset(path) - elif impl == 'cached' and IndexedDataset.exists(path): + elif impl == "cached" and IndexedDataset.exists(path): return IndexedCachedDataset(path) - elif impl == 'mmap' and MMapIndexedDataset.exists(path): + elif impl == "mmap" and MMapIndexedDataset.exists(path): return MMapIndexedDataset(path, skip_warmup) print(f"Unknown dataset implementation: {impl}") return None def dataset_exists(path, impl): - if impl == 'mmap': + if impl == "mmap": return MMapIndexedDataset.exists(path) else: return IndexedDataset.exists(path) @@ -114,11 +127,11 @@ def code(dtype): def index_file_path(prefix_path): - return prefix_path + '.idx' + return prefix_path + ".idx" def data_file_path(prefix_path): - return prefix_path + '.bin' + return prefix_path + ".bin" def create_doc_idx(sizes): @@ -131,38 +144,41 @@ def create_doc_idx(sizes): class IndexedDataset(torch.utils.data.Dataset): """Loader for IndexedDataset""" - _HDR_MAGIC = b'TNTIDX\x00\x00' + + _HDR_MAGIC = b"TNTIDX\x00\x00" def __init__(self, path): super().__init__() self.path = path self.data_file = None self.read_index(path) + @dlp.log def read_index(self, path): - with open(index_file_path(path), 'rb') as f: + with open(index_file_path(path), "rb") as f: magic = f.read(8) assert magic == self._HDR_MAGIC, ( - 'Index file doesn\'t match expected format. ' - 'Make sure that --dataset-impl is configured properly.' + "Index file doesn't match expected format. " + "Make sure that --dataset-impl is configured properly." ) version = f.read(8) - assert struct.unpack('= self._len: - raise IndexError('index out of range') + raise IndexError("index out of range") def __del__(self): if self.data_file: @@ -176,7 +192,7 @@ def __getitem__(self, idx): if isinstance(idx, int): i = idx self.check_index(i) - tensor_size = self.sizes[self.dim_offsets[i]:self.dim_offsets[i + 1]] + tensor_size = self.sizes[self.dim_offsets[i] : self.dim_offsets[i + 1]] a = np.empty(tensor_size, dtype=self.dtype) self.data_file.seek(self.data_offsets[i] * self.element_size) self.data_file.readinto(a) @@ -185,7 +201,7 @@ def __getitem__(self, idx): start, stop, step = idx.indices(len(self)) if step != 1: raise ValueError("Slices into indexed_dataset must be contiguous") - sizes = self.sizes[self.dim_offsets[start]:self.dim_offsets[stop]] + sizes = self.sizes[self.dim_offsets[start] : self.dim_offsets[stop]] size = sum(sizes) a = np.empty(size, dtype=self.dtype) self.data_file.seek(self.data_offsets[start] * self.element_size) @@ -205,8 +221,8 @@ def size(self, index): @staticmethod def exists(path): - return ( - os.path.exists(index_file_path(path)) and os.path.exists(data_file_path(path)) + return os.path.exists(index_file_path(path)) and os.path.exists( + data_file_path(path) ) @property @@ -215,7 +231,6 @@ def supports_prefetch(self): class IndexedCachedDataset(IndexedDataset): - def __init__(self, path): super().__init__(path) self.cache = None @@ -224,6 +239,7 @@ def __init__(self, path): @property def supports_prefetch(self): return True + @dlp.log def prefetch(self, indices): if all(i in self.cache_index for i in indices): @@ -240,7 +256,7 @@ def prefetch(self, indices): for i in indices: self.cache_index[i] = ptx size = self.data_offsets[i + 1] - self.data_offsets[i] - a = self.cache[ptx: ptx + size] + a = self.cache[ptx : ptx + size] self.data_file.seek(self.data_offsets[i] * self.element_size) self.data_file.readinto(a) ptx += size @@ -255,10 +271,10 @@ def __getitem__(self, idx): if isinstance(idx, int): i = idx self.check_index(i) - tensor_size = self.sizes[self.dim_offsets[i]:self.dim_offsets[i + 1]] + tensor_size = self.sizes[self.dim_offsets[i] : self.dim_offsets[i + 1]] a = np.empty(tensor_size, dtype=self.dtype) ptx = self.cache_index[i] - np.copyto(a, self.cache[ptx: ptx + a.size]) + np.copyto(a, self.cache[ptx : ptx + a.size]) return a elif isinstance(idx, slice): # Hack just to make this work, can optimizer later if necessary @@ -278,15 +294,17 @@ class IndexedDatasetBuilder(object): np.float32: 4, np.float64: 8, } + @dlp.log def __init__(self, out_file, dtype=np.int32): - self.out_file = open(out_file, 'wb') + self.out_file = open(out_file, "wb") self.dtype = dtype self.data_offsets = [0] self.dim_offsets = [0] self.sizes = [] self.element_size = self.element_sizes[self.dtype] self.doc_idx = [0] + @dlp.log def add_item(self, tensor): bytes = self.out_file.write(np.array(tensor.numpy(), dtype=self.dtype)) @@ -297,6 +315,7 @@ def add_item(self, tensor): def end_document(self): self.doc_idx.append(len(self.sizes)) + @dlp.log def merge_file_(self, another_file): index = IndexedDataset(another_file) @@ -315,7 +334,7 @@ def merge_file_(self, another_file): self.doc_idx.extend((doc_offset + index.doc_idx)[1:]) - with open(data_file_path(another_file), 'rb') as f: + with open(data_file_path(another_file), "rb") as f: while True: data = f.read(1024) if data: @@ -325,21 +344,22 @@ def merge_file_(self, another_file): def finalize(self, index_file): self.out_file.close() - index = open(index_file, 'wb') - index.write(b'TNTIDX\x00\x00') - index.write(struct.pack(' Date: Sun, 13 Oct 2024 10:16:44 -0500 Subject: [PATCH 67/92] Update `ALCF/requirements/requirements.txt` --- ALCF/requirements/requirements.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/ALCF/requirements/requirements.txt b/ALCF/requirements/requirements.txt index 78d1789d2c..03541ba514 100644 --- a/ALCF/requirements/requirements.txt +++ b/ALCF/requirements/requirements.txt @@ -15,6 +15,4 @@ six numpy<2 schedulefree packaging>=20.0 -pydftracer wandb -# git+https://github.com/saforem2/ezpz@main From 5e9eed0443e5b448e924cd28d5696e82e475a9da Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Sun, 13 Oct 2024 10:16:54 -0500 Subject: [PATCH 68/92] Update `megatron/utils.py` --- megatron/utils.py | 94 ++++++++++++++++++++++++++++++----------------- 1 file changed, 60 insertions(+), 34 deletions(-) diff --git a/megatron/utils.py b/megatron/utils.py index d00f4cd0ef..3d5eef4672 100644 --- a/megatron/utils.py +++ b/megatron/utils.py @@ -4,24 +4,14 @@ import sys import os -import time import logging -from typing import ContextManager, Optional +from typing import Optional import torch from torch.nn.parallel import DistributedDataParallel as torchDDP from deepspeed.accelerator import get_accelerator -if get_accelerator().device_name() == "cuda": - try: - from apex.multi_tensor_apply import multi_tensor_applier - import amp_C - - HAS_APEX = True - except Exception: - HAS_APEX = False - from megatron import get_args, get_adlr_autoresume, get_num_microbatches from megatron.core import mpu from megatron.core.tensor_parallel import param_is_not_tensor_parallel_duplicate @@ -30,70 +20,104 @@ import ezpz as ez +ACCELERATOR = get_accelerator() +assert ACCELERATOR is not None + +if ACCELERATOR.device_name() == "cuda": + try: + from apex.multi_tensor_apply import multi_tensor_applier # type:ignore + import amp_C # type:ignore + + HAS_APEX = True + except Exception: + HAS_APEX = False + RANK = ez.get_rank() log = logging.getLogger(__name__) -# log.setLevel("INFO") if RANK == 0 else log.setLevel("CRITICAL") - +log.setLevel(os.environ.get("LOG_LEVEL", ("INFO" if RANK == 0 else "CRITICAL"))) _DLIO_PROFILER_EXIST = True -_DFTRACER_EXIST=True +_DFTRACER_EXIST = True try: - import dftracer + import dftracer # type:ignore except Exception: - _DFTRACER_EXIST=False + _DFTRACER_EXIST = False try: - import dlio_profiler + import dlio_profiler # type:ignore except Exception: _DLIO_PROFILER_EXIST = False if _DFTRACER_EXIST: - from dftracer.logger import dftracer as PerfTrace, dft_fn as Profile, DFTRACER_ENABLE as DFTRACER_ENABLE + from dftracer.logger import ( # type:ignore + dftracer as PerfTrace, + dft_fn as Profile, + DFTRACER_ENABLE as DFTRACER_ENABLE, + ) elif _DLIO_PROFILER_EXIST: - from dlio_profiler.logger import fn_interceptor as Profile - from dlio_profiler.logger import dlio_logger as PerfTrace + from dlio_profiler.logger import fn_interceptor as Profile # type:ignore + from dlio_profiler.logger import dlio_logger as PerfTrace # type:ignore else: from functools import wraps - # from contextlib import nullcontext - # Profile: ContextManager = nullcontext - # - # class Profile(nullable_schema) + class Profile(object): - def __init__(self, cat, name=None, epoch=None, step=None, image_idx=None, image_size=None): - return - def log(self, func): + def __init__( + self, cat, name=None, epoch=None, step=None, image_idx=None, image_size=None + ): + return + + def log(self, func): return func - def log_init(self, func): + + def log_init(self, func): return func - def iter(self, func, iter_name="step"): + + def iter(self, func, iter_name="step"): return func + def __enter__(self): return + def __exit__(self, type, value, traceback): return - def update(self, epoch=None, step=None, image_idx=None, image_size=None, args={}): + + def update( + self, epoch=None, step=None, image_idx=None, image_size=None, args={} + ): return + def flush(self): return + def reset(self): return + def log_static(self, func): return + class dftracer(object): - def __init__(self,): + def __init__( + self, + ): self.type = None + def initialize_log(self, logfile=None, data_dir=None, process_id=-1): return + def get_time(self): return + def enter_event(self): return + def exit_event(self): return + def log_event(self, name, cat, start_time, duration, string_args=None): return + def finalize(self): return @@ -103,8 +127,8 @@ def finalize(self): def get_logger( name: str, - level: str = "INFO", - rank_zero_only: Optional[bool] = None, + level: Optional[str] = None, + rank_zero_only: Optional[bool] = True, ) -> logging.Logger: """Returns a `logging.Logger` object. @@ -112,7 +136,9 @@ def get_logger( non-zero ranks (and will be set to `level` on RANK==0). """ logger = logging.getLogger(name) - logger.setLevel(level) + logger.setLevel( + str(level if level is not None else os.environ.get("LOG_LEVEL", "INFO")).upper() + ) if rank_zero_only and ez.get_rank() != 0: logger.setLevel("CRITICAL") return logger From 3dcb2974da465b3d3a061215694464787737108b Mon Sep 17 00:00:00 2001 From: Huihuo Zheng Date: Sun, 13 Oct 2024 22:36:10 -0500 Subject: [PATCH 69/92] fixed bugs and added commandline option --- megatron/arguments.py | 2 ++ megatron/data/gpt_dataset.py | 10 ++++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/megatron/arguments.py b/megatron/arguments.py index 9a5e4b8da7..82e000923f 100644 --- a/megatron/arguments.py +++ b/megatron/arguments.py @@ -1252,6 +1252,8 @@ def _add_data_args(parser): group.add_argument('--data-file-list', type=str, default=None, help='The file with the list of dataset and weights') + group.add_argument('--shuffle-sample', action='stored_true', help="Whether to shuffle the samples within in the dataset files") + group.add_argument('--split', type=str, default='969, 30, 1', help='Comma-separated list of proportions for training,' ' validation, and test split. For example the split ' diff --git a/megatron/data/gpt_dataset.py b/megatron/data/gpt_dataset.py index c801a6a5ae..8c32be7d8e 100755 --- a/megatron/data/gpt_dataset.py +++ b/megatron/data/gpt_dataset.py @@ -94,7 +94,7 @@ def Build(self): class BuildConcatDataset(torch.utils.data.Dataset): @dlp.log - def __init__(self, dataset_builders): + def __init__(self, dataset_builders, shuffle=False): self.dataset_builders = dataset_builders self.num_datasets = len(dataset_builders) self.num_samples = np.sum([d.num_samples for d in dataset_builders]) @@ -117,7 +117,9 @@ def _build_indices(): self.dataset_index, self.dataset_sample_index = _build_indices() np_rng = np.random.RandomState(seed=dataset_builders[0].seed) - self.shuffle_index=np_rng.shuffle(range(self.num_samples)) + self.shuffle_index = np.arange(self.num_samples) + if shuffle: + np_rng.shuffle(self.shuffle_index) for i in range(self.num_datasets): self.desc += dataset_builders[i].prefix + "," @@ -146,7 +148,7 @@ def __getitem__(self, idx): valid_datasets = [] test_datasets = [] # Build individual datasets. - + args = get_args() @dlp.log def build_corpus_datasets(dataset_type='train'): start_time = time.time() @@ -172,7 +174,7 @@ def build_corpus_datasets(dataset_type='train'): print_rank_0(" > number of samples for each corpus ") corpus_weights_achieved={} for c in corpus_list: - datasets.append(BuildConcatDataset(corpus_builders[c])) + datasets.append(BuildConcatDataset(corpus_builders[c], args.shuffle_sample)) total += datasets[-1].num_samples corpus_weights_achieved[c] = float(datasets[-1].num_samples)/train_num_samples print_rank_0(f" {c}: {datasets[-1].num_samples} w={corpus_weights_achieved[c]} (expected: {corpus_weights[c]})") From 43fc2feaace3781bb399e32b2f6e827920622c61 Mon Sep 17 00:00:00 2001 From: Huihuo Zheng Date: Sun, 13 Oct 2024 23:00:21 -0500 Subject: [PATCH 70/92] fixed typo --- megatron/arguments.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/megatron/arguments.py b/megatron/arguments.py index 82e000923f..307e725e51 100644 --- a/megatron/arguments.py +++ b/megatron/arguments.py @@ -1252,7 +1252,7 @@ def _add_data_args(parser): group.add_argument('--data-file-list', type=str, default=None, help='The file with the list of dataset and weights') - group.add_argument('--shuffle-sample', action='stored_true', help="Whether to shuffle the samples within in the dataset files") + group.add_argument('--shuffle-sample', action='store_true', help="Whether to shuffle the samples within in the dataset files") group.add_argument('--split', type=str, default='969, 30, 1', help='Comma-separated list of proportions for training,' From d50239f1c5470fd2b2495affc01773e063942e37 Mon Sep 17 00:00:00 2001 From: Huihuo Zheng Date: Mon, 14 Oct 2024 14:20:56 -0500 Subject: [PATCH 71/92] added support for blending samples across different files in the same corpus --- megatron/arguments.py | 4 +++- megatron/data/gpt_dataset.py | 36 ++++++++++++++++++++++++++++-------- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/megatron/arguments.py b/megatron/arguments.py index 9ab3e40953..9b0e6ccb1a 100644 --- a/megatron/arguments.py +++ b/megatron/arguments.py @@ -1291,7 +1291,9 @@ def _add_data_args(parser): group.add_argument('--data-file-list', type=str, default=None, help='The file with the list of dataset and weights') - group.add_argument('--shuffle-sample', action='store_true', help="Whether to shuffle the samples within in the dataset files") + group.add_argument('--shuffle-sample-in-corpus', action='store_true', help="Whether to shuffle the samples within in the dataset files") + + group.add_argument('--blend-sample-in-corpus', action='store_true', help="Whether to blend different files in the same corpus") group.add_argument('--split', type=str, default='969, 30, 1', help='Comma-separated list of proportions for training,' diff --git a/megatron/data/gpt_dataset.py b/megatron/data/gpt_dataset.py index c412d02b31..38df556267 100755 --- a/megatron/data/gpt_dataset.py +++ b/megatron/data/gpt_dataset.py @@ -131,19 +131,35 @@ def Build(self): self.build = True return self.dataset - class BuildConcatDataset(torch.utils.data.Dataset): + class BuildCorpusDataset(torch.utils.data.Dataset): @dlp.log - def __init__(self, dataset_builders, shuffle=False): + def __init__(self, dataset_builders): self.dataset_builders = dataset_builders self.num_datasets = len(dataset_builders) self.num_samples = np.sum([d.num_samples for d in dataset_builders]) self.indices = np.zeros((self.num_samples, 2), dtype=np.uint64) - self.desc = "ConcatDataset:" + self.desc = "CorpusDataset:" # m = 0 num_samples_list = np.array([d.num_samples for d in dataset_builders]) self.num_samples = np.sum(num_samples_list) + args = get_args() - def _build_indices(): + @dlp.log + def _build_indices_blended(): + start_time = time.time() + dataset_index = np.zeros(self.num_samples, dtype=np.int64) + dataset_sample_index = np.zeros(self.num_samples, dtype=np.int64) + weights = num_samples_list / self.num_samples + helpers.build_blending_indices( + dataset_index, dataset_sample_index, + weights, self.num_datasets, self.num_samples, + torch.distributed.get_rank() == 0) + log.debug('> elapsed time for building blendable dataset indices for corpus {self.dataset_builders[0].corpus}: ' + '{:.2f} (sec)'.format(time.time() - start_time)) + return dataset_index, dataset_sample_index + + + def _build_indices_concat(): start_time = time.time() dataset_index = np.zeros(self.num_samples, dtype=np.int64) dataset_sample_index = np.zeros(self.num_samples, dtype=np.int64) @@ -159,11 +175,15 @@ def _build_indices(): "{:.2f} (sec)".format(time.time() - start_time) ) return dataset_index, dataset_sample_index - - self.dataset_index, self.dataset_sample_index = _build_indices() + + if args.blend_sample_in_corpus: + self.dataset_index, self.dataset_sample_index = _build_indices_blended() + else: + self.dataset_index, self.dataset_sample_index = _build_indices_concat() + np_rng = np.random.RandomState(seed=dataset_builders[0].seed) self.shuffle_index = np.arange(self.num_samples) - if shuffle: + if args.shuffle_sample_in_corpus: np_rng.shuffle(self.shuffle_index) for i in range(self.num_datasets): self.desc += dataset_builders[i].prefix + "," @@ -243,7 +263,7 @@ def build_corpus_datasets(dataset_type="train"): log.debug(" > number of samples for each corpus ") corpus_weights_achieved = {} for c in corpus_list: - datasets.append(BuildConcatDataset(corpus_builders[c], args.shuffle_sample)) + datasets.append(BuildCorpusDataset(corpus_builders[c])) total += datasets[-1].num_samples corpus_weights_achieved[c] = ( float(datasets[-1].num_samples) / train_num_samples From 45ff652dc473fa07ec674543199b2a7785e7f9a6 Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Mon, 14 Oct 2024 23:28:00 -0500 Subject: [PATCH 72/92] Discard changes to megatron/data/gpt_dataset.py --- megatron/data/gpt_dataset.py | 921 ++++++++++++----------------------- 1 file changed, 325 insertions(+), 596 deletions(-) diff --git a/megatron/data/gpt_dataset.py b/megatron/data/gpt_dataset.py index f62b567ddc..0cf97356a4 100755 --- a/megatron/data/gpt_dataset.py +++ b/megatron/data/gpt_dataset.py @@ -5,114 +5,71 @@ import hashlib import os import time -import logging import numpy as np import torch from deepspeed.accelerator import get_accelerator -from megatron import is_rank_0, get_args +from megatron import print_rank_0, is_rank_0, get_args from megatron.core import mpu -from megatron.data import helpers # type:ignore +from megatron.data import helpers from megatron.data.blendable_dataset import BlendableDataset -from megatron.data.dataset_utils import ( - get_datasets_weights_and_num_samples, - get_datasets_corpuses_weights_and_num_samples, -) +from megatron.data.dataset_utils import get_datasets_weights_and_num_samples, get_datasets_corpuses_weights_and_num_samples from megatron.data.dataset_utils import get_train_valid_test_split_ from megatron.data.indexed_dataset import make_dataset as make_indexed_dataset -from megatron.utils import PerfTrace, Profile, get_logger +from megatron.utils import PerfTrace, Profile from mpi4py import MPI -try: - import ezpz as ez - RANK = ez.get_rank() -except Exception: - RANK = torch.distributed.get_rank() - -# NOTE: [logging]----------------------------------------------------------- -# - Set logging level to "INFO" on RANK == 0, "CRITICAL" on all other ranks -log = logging.getLogger(__name__) -LOG_LEVEL = str(os.environ.get("LOG_LEVEL", "INFO")).upper() -log.setLevel(LOG_LEVEL) if RANK == 0 else log.setLevel("CRITICAL") -# -------------------------------------------------------------------------- - dlp = Profile("DATASET") -log = get_logger(__name__, rank_zero_only=True) - - @dlp.log -def build_train_valid_test_datasets( - data_prefix, - data_impl, - splits_string, - train_valid_test_num_samples, - seq_length, - seed, - skip_warmup, - train_data_prefix=None, - valid_data_prefix=None, - test_data_prefix=None, - return_doc_ids=False, - *, - data_cache_path=None, -): +def build_train_valid_test_datasets(data_prefix, data_impl, splits_string, + train_valid_test_num_samples, + seq_length, seed, skip_warmup, + train_data_prefix=None, + valid_data_prefix=None, + test_data_prefix=None, + return_doc_ids=False, *, + data_cache_path=None): """Build train, valid, and test datasets.""" if data_prefix: - log.debug("Single data path provided for train, valid & test") + print_rank_0("Single data path provided for train, valid & test") # Single dataset. if len(data_prefix) == 1: - return _build_train_valid_test_datasets( - data_prefix[0], - data_impl, - splits_string, - train_valid_test_num_samples, - seq_length, - seed, - skip_warmup, - data_cache_path=data_cache_path, - ) + return _build_train_valid_test_datasets(data_prefix[0], + data_impl, splits_string, + train_valid_test_num_samples, + seq_length, seed, skip_warmup, + data_cache_path=data_cache_path) # Blending dataset. # Parse the values. - output = get_datasets_corpuses_weights_and_num_samples( - data_prefix, train_valid_test_num_samples - ) + output = get_datasets_corpuses_weights_and_num_samples(data_prefix, + train_valid_test_num_samples) prefixes, corpuses, weights, datasets_train_valid_test_num_samples = output corpus_list = sorted(set(corpuses)) train_num_samples, valid_num_samples, test_num_samples = map( - sum, zip(*datasets_train_valid_test_num_samples) + sum, + zip(*datasets_train_valid_test_num_samples) ) class DatasetBuilder: - """ + ''' This is for building individual dataset from each dataset file - """ - + ''' @dlp.log - def __init__( - self, - prefix, - corpus, - data_impl, - splits_string, - num_samples, - seq_length, - seed, - skip_warmup, - return_doc_ids, - data_cache_path=data_cache_path, - name="train", - ): + def __init__(self, prefix, corpus, data_impl, splits_string, + num_samples, seq_length, seed, skip_warmup, + return_doc_ids, + data_cache_path=data_cache_path, name='train'): self.prefix = prefix self.data_impl = data_impl self.splits_string = splits_string - if name == "train": + if name == 'train': self.num_samples = num_samples[0] - elif name == "valid": + elif name == 'valid': self.num_samples = num_samples[1] else: self.num_samples = num_samples[2] @@ -127,413 +84,279 @@ def __init__( self.desc = prefix + f"{self.num_samples}" + f"{seq_length}" + f"{seed}" self.build = False self.corpus = corpus - @dlp.log def Build(self): - self.dataset = _build_train_valid_test_datasets_single( - self.prefix, - self.data_impl, - self.splits_string, - self.num_samples_train_valid_test, - self.seq_length, - self.seed, - self.skip_warmup, - self.name, - self.return_doc_ids, - data_cache_path=self.data_cache_path, - ) + self.dataset = _build_train_valid_test_datasets_single(self.prefix, self.data_impl, self.splits_string, + self.num_samples_train_valid_test, self.seq_length, self.seed, self.skip_warmup, self.name, self.return_doc_ids, + data_cache_path=self.data_cache_path) self.build = True return self.dataset class BuildConcatDataset(torch.utils.data.Dataset): @dlp.log - def __init__(self, dataset_builders, shuffle=False): + def __init__(self, dataset_builders): self.dataset_builders = dataset_builders self.num_datasets = len(dataset_builders) self.num_samples = np.sum([d.num_samples for d in dataset_builders]) - self.indices = np.zeros((self.num_samples, 2), dtype=np.uint64) - self.desc = "ConcatDataset:" - # m = 0 + self.indices=np.zeros((self.num_samples, 2), dtype=np.uint64) + self.desc="ConcatDataset:" + m = 0 num_samples_list = np.array([d.num_samples for d in dataset_builders]) self.num_samples = np.sum(num_samples_list) - def _build_indices(): start_time = time.time() dataset_index = np.zeros(self.num_samples, dtype=np.int64) dataset_sample_index = np.zeros(self.num_samples, dtype=np.int64) - helpers.build_concat_indices( - dataset_index, - dataset_sample_index, - num_samples_list, - self.num_datasets, - torch.distributed.get_rank() == 0, - ) - log.debug( - "> elapsed time for building concat dataset indices: " - "{:.2f} (sec)".format(time.time() - start_time) - ) + helpers.build_concat_indices(dataset_index, dataset_sample_index, + num_samples_list, + self.num_datasets, + torch.distributed.get_rank()==0) + print_rank_0('> elapsed time for building concat dataset indices: ' + '{:.2f} (sec)'.format(time.time() - start_time)) return dataset_index, dataset_sample_index - + self.dataset_index, self.dataset_sample_index = _build_indices() - np_rng = np.random.RandomState(seed=dataset_builders[0].seed) - self.shuffle_index = np.arange(self.num_samples) - if shuffle: - np_rng.shuffle(self.shuffle_index) for i in range(self.num_datasets): self.desc += dataset_builders[i].prefix + "," - self.desc += ( - f"-{self.num_samples}" - + f"-{dataset_builders[0].seq_length}" - + f"{dataset_builders[0].seed}" - ) - + self.desc += f"-{self.num_samples}" + f"-{dataset_builders[0].seq_length}" + f"{dataset_builders[0].seed}" def __len__(self): return self.num_samples @dlp.log def __getitem__(self, idx): - id_shuffle = self.shuffle_index[idx] - i = self.dataset_index[id_shuffle] - j = self.dataset_sample_index[id_shuffle] + if idx >= self.num_samples: + print_rank_0(f"WARNING: index overflow encountered {idx} > {self.num_samples} for {self.dataset_builders[0].corpus}; will randomly pick one sample") + id = np.random.randint(self.num_samples) + else: + id = idx + i = self.dataset_index[idx] + j = self.dataset_sample_index[idx] if self.dataset_builders[i].build: return self.dataset_builders[i].dataset[j] else: return self.dataset_builders[i].Build()[j] + - # Predetermine whether need to build the specific dataset or not. + # Predetermine whether need to build the specific dataset or not. start_time = time.time() - log.debug(" >>> Started building datasets in distributed way ... ") + print_rank_0(" >>> Started building datasets in distributed way ... ") a, b, c = [int(d) for d in splits_string.split(",")] - + train_datasets = [] valid_datasets = [] test_datasets = [] # Build individual datasets. - args = get_args() + @dlp.log - def build_corpus_datasets(dataset_type="train"): + def build_corpus_datasets(dataset_type='train'): start_time = time.time() - log.debug(f" >>> Building {dataset_type} corpus datasets ...") + print_rank_0(f" >>> Building {dataset_type} corpus datasets ...") datasets = [] corpus_builders = {} corpus_weights = {} for c in corpus_list: corpus_builders[c] = [] corpus_weights[c] = 0.0 - dataset_builders = [ - DatasetBuilder( - prefixes[i], - corpuses[i], - data_impl, - splits_string, - datasets_train_valid_test_num_samples[i], - seq_length, - seed, - skip_warmup, - return_doc_ids, - data_cache_path, - dataset_type, - ) - for i in range(len(weights)) - ] - for i in range( - torch.distributed.get_rank() - // mpu.get_tensor_model_parallel_world_size(), - len(weights), - torch.distributed.get_world_size() - // mpu.get_tensor_model_parallel_world_size(), - ): + dataset_builders = [DatasetBuilder(prefixes[i], corpuses[i], data_impl, splits_string, + datasets_train_valid_test_num_samples[i], + seq_length, seed, skip_warmup, + return_doc_ids,data_cache_path, dataset_type) for i in range(len(weights))] + for i in range(torch.distributed.get_rank()//mpu.get_tensor_model_parallel_world_size(), len(weights), torch.distributed.get_world_size()//mpu.get_tensor_model_parallel_world_size()): dataset_builders[i].Build() - log.debug( - f" >>> Finished building individual datasets in {time.time() - start_time} seconds" - ) + print_rank_0(f" >>> Finished building individual datasets in {time.time() - start_time} seconds") start_concating_time = time.time() for i, d in zip(range(len(weights)), dataset_builders): corpus_builders[d.corpus].append(d) corpus_weights[d.corpus] += weights[i] total = 0 - log.debug(" > number of samples for each corpus ") - corpus_weights_achieved = {} + print_rank_0(" > number of samples for each corpus ") + corpus_weights_achieved={} for c in corpus_list: - datasets.append(BuildConcatDataset(corpus_builders[c], args.shuffle_sample)) + datasets.append(BuildConcatDataset(corpus_builders[c])) total += datasets[-1].num_samples - corpus_weights_achieved[c] = ( - float(datasets[-1].num_samples) / train_num_samples - ) - log.debug( - f" {c}: {datasets[-1].num_samples} w={corpus_weights_achieved[c]} (expected: {corpus_weights[c]})" - ) - - log.debug(f" > total number of samples: {total}") - log.debug( - f" >>> Finished concatenating datasets in {time.time() - start_concating_time} seconds" - ) - log.debug( - f" >>> Finished building {dataset_type} corpus datasets in {time.time() - start_time} seconds" - ) + corpus_weights_achieved[c] = float(datasets[-1].num_samples)/train_num_samples + print_rank_0(f" {c}: {datasets[-1].num_samples} w={corpus_weights_achieved[c]} (expected: {corpus_weights[c]})") + + print_rank_0(f" > total number of samples: {total}") + print_rank_0(f" >>> Finished concatenating datasets in {time.time() - start_concating_time} seconds") + print_rank_0(f" >>> Finished building {dataset_type} corpus datasets in {time.time() - start_time} seconds") return datasets, [corpus_weights_achieved[c] for c in corpus_list] - train_weights = None if a > 0: - train_datasets, train_weights = build_corpus_datasets("train") - valid_weights = None + train_datasets, train_weights = build_corpus_datasets('train') + if b > 0: - valid_datasets, valid_weights = build_corpus_datasets("valid") - test_weights = None - if c > 0: - test_datasets, test_weights = build_corpus_datasets("test") + valid_datasets, valid_weights = build_corpus_datasets('valid') + + if c > 0: + test_datasets, test_weights = build_corpus_datasets('test') # This barrier is critical to make sure that all the datasets are built once # and the metadata were written to the cache folder before other ranks touch them - log.debug( - f" >>> Rank 0 - finished building datasets in {time.time() - start_time} seconds" - ) + print_rank_0(f" >>> Rank 0 - finished building datasets in {time.time() - start_time} seconds") torch.distributed.barrier(group=mpu.get_data_parallel_group()) torch.distributed.barrier(group=mpu.get_pipeline_model_parallel_group()) torch.distributed.barrier(group=mpu.get_data_parallel_group()) - log.debug( - f" >>> Finished building datasets (all ranks) in distributed way in {time.time() - start_time} seconds" - ) - log.debug(" >>> Starting to build BlendableDataset") + print_rank_0(f" >>> Finished building datasets (all ranks) in distributed way in {time.time() - start_time} seconds") + print_rank_0(f" >>> Starting to build BlendableDataset") # Blend. start_time = time.time() blending_train_dataset = None - if train_datasets and train_weights: - blending_train_dataset = BlendableDataset( - train_datasets, - train_weights, - train_num_samples, - data_cache_path=data_cache_path, - ) + if train_datasets: + blending_train_dataset = BlendableDataset(train_datasets, train_weights, train_num_samples, + data_cache_path=data_cache_path) blending_valid_dataset = None - if valid_datasets and valid_weights: - blending_valid_dataset = BlendableDataset( - valid_datasets, - valid_weights, - valid_num_samples, - data_cache_path=data_cache_path, - ) + if valid_datasets: + blending_valid_dataset = BlendableDataset(valid_datasets, valid_weights, valid_num_samples, + data_cache_path=data_cache_path) blending_test_dataset = None - if test_datasets and test_weights: - blending_test_dataset = BlendableDataset( - test_datasets, - test_weights, - test_num_samples, - data_cache_path=data_cache_path, - ) + if test_datasets: + blending_test_dataset = BlendableDataset(test_datasets, test_weights, test_num_samples, + data_cache_path=data_cache_path) end_time = time.time() - log.debug( - f" >>> Finished building BlendableDataset in {end_time - start_time} seconds" - ) - return (blending_train_dataset, blending_valid_dataset, blending_test_dataset) + print_rank_0(f" >>> Finished building BlendableDataset in {end_time - start_time} seconds") + return (blending_train_dataset, blending_valid_dataset, + blending_test_dataset) else: - log.debug( - "Separate data paths provided for train, valid & test. Split string will be ignored." - ) + print_rank_0("Separate data paths provided for train, valid & test. Split string will be ignored.") train_dataset, valid_dataset, test_dataset = None, None, None # Single dataset. if train_data_prefix is not None: - train_dataset = build_dataset( - "train", - train_data_prefix, - data_impl, - splits_string, - train_valid_test_num_samples[0], - seq_length, - seed, - skip_warmup, - data_cache_path=data_cache_path, - ) + train_dataset = build_dataset("train", train_data_prefix, data_impl, + splits_string, + train_valid_test_num_samples[0], + seq_length, seed, skip_warmup, + data_cache_path=data_cache_path) if valid_data_prefix is not None: - valid_dataset = build_dataset( - "valid", - valid_data_prefix, - data_impl, - splits_string, - train_valid_test_num_samples[1], - seq_length, - seed, - False, - data_cache_path=data_cache_path, - ) + valid_dataset = build_dataset("valid", valid_data_prefix, data_impl, + splits_string, + train_valid_test_num_samples[1], + seq_length, seed, False, + data_cache_path=data_cache_path) + if test_data_prefix is not None: - test_dataset = build_dataset( - "test", - test_data_prefix, - data_impl, - splits_string, - train_valid_test_num_samples[2], - seq_length, - seed, - False, - data_cache_path=data_cache_path, - ) + test_dataset = build_dataset("test", test_data_prefix, data_impl, + splits_string, + train_valid_test_num_samples[2], + seq_length, seed, False, + data_cache_path=data_cache_path) return (train_dataset, valid_dataset, test_dataset) - @dlp.log -def _build_train_valid_test_datasets( - data_prefix, - data_impl, - splits_string, - train_valid_test_num_samples, - seq_length, - seed, - skip_warmup, - return_doc_ids=False, - *, - data_cache_path=None, -): +def _build_train_valid_test_datasets(data_prefix, data_impl, splits_string, + train_valid_test_num_samples, + seq_length, seed, skip_warmup, + return_doc_ids=False, *, + data_cache_path=None): """Build train, valid, and test datasets.""" # Indexed dataset. - indexed_dataset = get_indexed_dataset_(data_prefix, data_impl, skip_warmup) + indexed_dataset = get_indexed_dataset_(data_prefix, + data_impl, + skip_warmup) total_num_of_documents = indexed_dataset.sizes.shape[0] splits = get_train_valid_test_split_(splits_string, total_num_of_documents) # Print stats about the splits. - log.debug(" > dataset split:") + print_rank_0(' > dataset split:') def print_split_stats(name, index): - log.debug(" {}:".format(name)) - log.debug( - " document indices in [{}, {}) total of {} " "documents".format( - splits[index], splits[index + 1], splits[index + 1] - splits[index] - ) - ) - - print_split_stats("train", 0) - print_split_stats("validation", 1) - print_split_stats("test", 2) + print_rank_0(' {}:'.format(name)) + print_rank_0(' document indices in [{}, {}) total of {} ' + 'documents'.format(splits[index], splits[index + 1], + splits[index + 1] - splits[index])) + print_split_stats('train', 0) + print_split_stats('validation', 1) + print_split_stats('test', 2) def build_dataset(index, name): dataset = None if splits[index + 1] > splits[index]: - documents = np.arange( - start=splits[index], stop=splits[index + 1], step=1, dtype=np.int32 - ) - dataset = GPTDataset( - name, - data_prefix, - documents, - indexed_dataset, - splits_string, - train_valid_test_num_samples[index], - seq_length, - seed, - return_doc_ids, - data_cache_path=data_cache_path, - ) + documents = np.arange(start=splits[index], stop=splits[index + 1], + step=1, dtype=np.int32) + dataset = GPTDataset(name, data_prefix, documents, indexed_dataset, + splits_string, + train_valid_test_num_samples[index], + seq_length, seed, + return_doc_ids, + data_cache_path=data_cache_path) return dataset - train_dataset = build_dataset(0, "train") - valid_dataset = build_dataset(1, "valid") - test_dataset = build_dataset(2, "test") + train_dataset = build_dataset(0, 'train') + valid_dataset = build_dataset(1, 'valid') + test_dataset = build_dataset(2, 'test') return (train_dataset, valid_dataset, test_dataset) - @dlp.log -def _build_train_valid_test_datasets_single( - data_prefix, - data_impl, - splits_string, - train_valid_test_num_samples, - seq_length, - seed, - skip_warmup, - name, - return_doc_ids=False, - *, - data_cache_path=None, -): +def _build_train_valid_test_datasets_single(data_prefix, data_impl, splits_string, + train_valid_test_num_samples, + seq_length, seed, skip_warmup, name, + return_doc_ids=False, *, + data_cache_path=None): """Build train, valid, and test datasets.""" # Each rank print out information - log.debug(f" >> building dataset for {data_prefix}") + print_rank_0(f" >> building dataset for {data_prefix}") # Indexed dataset. - indexed_dataset = get_indexed_dataset_(data_prefix, data_impl, skip_warmup) + indexed_dataset = get_indexed_dataset_(data_prefix, + data_impl, + skip_warmup) total_num_of_documents = indexed_dataset.sizes.shape[0] splits = get_train_valid_test_split_(splits_string, total_num_of_documents) # Print stats about the splits. - log.debug(" > dataset split:") + print_rank_0(' > dataset split:') def print_split_stats(name, index): - log.debug(" {}:".format(name)) - log.debug( - " document indices in [{}, {}) total of {} " "documents".format( - splits[index], splits[index + 1], splits[index + 1] - splits[index] - ) - ) - - print_split_stats("train", 0) - print_split_stats("validation", 1) - print_split_stats("test", 2) + print_rank_0(' {}:'.format(name)) + print_rank_0(' document indices in [{}, {}) total of {} ' + 'documents'.format(splits[index], splits[index + 1], + splits[index + 1] - splits[index])) + print_split_stats('train', 0) + print_split_stats('validation', 1) + print_split_stats('test', 2) def build_dataset(index, name): dataset = None if splits[index + 1] > splits[index]: - documents = np.arange( - start=splits[index], stop=splits[index + 1], step=1, dtype=np.int32 - ) - dataset = GPTDataset( - name, - data_prefix, - documents, - indexed_dataset, - splits_string, - train_valid_test_num_samples[index], - seq_length, - seed, - return_doc_ids, - data_cache_path=data_cache_path, - ) + documents = np.arange(start=splits[index], stop=splits[index + 1], + step=1, dtype=np.int32) + dataset = GPTDataset(name, data_prefix, documents, indexed_dataset, + splits_string, + train_valid_test_num_samples[index], + seq_length, seed, + return_doc_ids, + data_cache_path=data_cache_path) return dataset - - if name.find("train") != -1: - return build_dataset(0, "train") - if name.find("valid") != -1: - return build_dataset(1, "valid") - if name.find("test") != -1: - return build_dataset(2, "test") - + if name.find("train")!=-1: + return build_dataset(0, 'train') + if name.find("valid")!=-1: + return build_dataset(1, 'valid') + if name.find("test")!=-1: + return build_dataset(2, 'test') @dlp.log -def build_dataset( - dataset_name, - data_prefix, - data_impl, - splits_string, - num_samples, - seq_length, - seed, - skip_warmup, - *, - data_cache_path=None, -): +def build_dataset(dataset_name, data_prefix, data_impl, + splits_string, num_samples, + seq_length, seed, skip_warmup, + *, + data_cache_path=None): dataset = None if len(data_prefix) == 1: - dataset = _build_dataset( - dataset_name, - data_prefix[0], - data_impl, - splits_string, - num_samples, - seq_length, - seed, - skip_warmup, - data_cache_path=data_cache_path, - ) + dataset = _build_dataset(dataset_name, data_prefix[0], data_impl, + splits_string, num_samples, seq_length, + seed, skip_warmup, + data_cache_path=data_cache_path) else: # Blending dataset. # Parse the values. @@ -544,108 +367,73 @@ def build_dataset( # Build individual datasets. datasets = [] for i in range(len(prefixes)): - ds = _build_dataset( - dataset_name, - prefixes[i], - data_impl, - splits_string, - dataset_num_samples[i], - seq_length, - seed, - skip_warmup, - data_cache_path=data_cache_path, - ) + ds = _build_dataset(dataset_name, prefixes[i], data_impl, + splits_string, dataset_num_samples[i], + seq_length, seed, skip_warmup, + data_cache_path=data_cache_path) if ds: datasets.append(ds) if datasets: - dataset = BlendableDataset( - datasets, weights, num_samples, data_cache_path=data_cache_path - ) + dataset = BlendableDataset(datasets, weights, num_samples, + data_cache_path=data_cache_path) return dataset - @dlp.log -def _build_dataset( - dataset_name, - data_prefix, - data_impl, - splits_string, - num_samples, - seq_length, - seed, - skip_warmup, - *, - data_cache_path=None, -): +def _build_dataset(dataset_name, data_prefix, data_impl, splits_string, + num_samples, seq_length, seed, skip_warmup, + *, + data_cache_path=None): """ Build dataset. This method is called when individual train, valid, test datasets are provided """ # Indexed dataset. - indexed_dataset = get_indexed_dataset_(data_prefix, data_impl, skip_warmup) + indexed_dataset = get_indexed_dataset_(data_prefix, + data_impl, + skip_warmup) total_num_of_documents = indexed_dataset.sizes.shape[0] - log.debug(" {}:".format(dataset_name)) - log.debug( - " document indices in [0, {}) total of {} " "documents".format( - total_num_of_documents, total_num_of_documents - ) - ) - - documents = np.arange(start=0, stop=total_num_of_documents, step=1, dtype=np.int32) - - dataset = GPTDataset( - dataset_name, - data_prefix, - documents, - indexed_dataset, - splits_string, - num_samples, - seq_length, - seed, - data_cache_path=data_cache_path, - ) + print_rank_0(' {}:'.format(dataset_name)) + print_rank_0(' document indices in [0, {}) total of {} ' + 'documents'.format(total_num_of_documents, total_num_of_documents)) - return dataset + documents = np.arange(start=0, stop=total_num_of_documents, + step=1, dtype=np.int32) + dataset = GPTDataset(dataset_name, data_prefix, documents, indexed_dataset, + splits_string, num_samples, seq_length, seed, + data_cache_path=data_cache_path) + + return dataset @dlp.log def get_indexed_dataset_(data_prefix, data_impl, skip_warmup): """Build indexed dataset.""" - log.debug(" > building dataset index ...") + print_rank_0(' > building dataset index ...') start_time = time.time() - indexed_dataset = make_indexed_dataset(data_prefix, data_impl, skip_warmup) - log.debug( - " > finished creating indexed dataset in {:4f} " "seconds".format( - time.time() - start_time - ) - ) - log.debug(" number of documents: {}".format(indexed_dataset.sizes.shape[0])) + indexed_dataset = make_indexed_dataset(data_prefix, + data_impl, + skip_warmup) + print_rank_0(' > finished creating indexed dataset in {:4f} ' + 'seconds'.format(time.time() - start_time)) + print_rank_0(' number of documents: {}'.format( + indexed_dataset.sizes.shape[0])) return indexed_dataset class GPTDataset(torch.utils.data.Dataset): @dlp.log - def __init__( - self, - name, - data_prefix, - documents, - indexed_dataset, - splits_string, - num_samples, - seq_length, - seed, - return_doc_ids=False, - *, - data_cache_path=None, - ): + def __init__(self, name, data_prefix, documents, indexed_dataset, + splits_string, num_samples, seq_length, seed, + return_doc_ids=False, *, + data_cache_path=None): + self.name = name self.indexed_dataset = indexed_dataset self.return_doc_ids = return_doc_ids @@ -655,29 +443,20 @@ def __init__( assert np.max(documents) < indexed_dataset.sizes.shape[0] # Build index mappings. - self.doc_idx, self.sample_idx, self.shuffle_idx, self.desc, self.desc_hash = ( - _build_index_mappings( - self.name, - data_prefix, - documents, - self.indexed_dataset.sizes, - splits_string, - num_samples, - seq_length, - seed, - data_cache_path=data_cache_path, - ) - ) + self.doc_idx, self.sample_idx, self.shuffle_idx, self.desc, self.desc_hash = \ + _build_index_mappings(self.name, data_prefix, + documents, self.indexed_dataset.sizes, + splits_string, num_samples, seq_length, seed, + data_cache_path=data_cache_path) + def __len__(self): # -1 is due to data structure used to retieve the index: # sample i --> [sample_idx[i], sample_idx[i+1]) return self.sample_idx.shape[0] - 1 - @dlp.log def __getitem__(self, idx): args = get_args() - assert args is not None orig_idx = idx # Get the shuffled index. try: @@ -686,24 +465,21 @@ def __getitem__(self, idx): if is_rank_0(): import json from rich import print_json - print(exc) print( - "\n".join( - [ - "-------------------------------------------------", - f"Trying to access {idx=} from self.shuffle_idx,", - f"but {len(self.shuffle_idx)=}", - "-------------------------------------------------", - ] + '\n'.join( + ['-------------------------------------------------', + f'Trying to access {idx=} from self.shuffle_idx,', + f'but {len(self.shuffle_idx)=}', + '-------------------------------------------------'] ) ) print_json( json.dumps( { - "doc_idx": len(self.doc_idx), - "sample_idx": len(self.sample_idx), - "shuffle_idx": len(self.shuffle_idx), + 'doc_idx': len(self.doc_idx), + 'sample_idx': len(self.sample_idx), + 'shuffle_idx': len(self.shuffle_idx), }, indent=4, ) @@ -717,57 +493,45 @@ def __getitem__(self, idx): doc_ids = [] if doc_index_f == doc_index_l: doc_ids.append(self.doc_idx[doc_index_f]) - sample = self.indexed_dataset.get( - self.doc_idx[doc_index_f], - offset=offset_f, - length=offset_l - offset_f + 1, - ) + sample = self.indexed_dataset.get(self.doc_idx[doc_index_f], + offset=offset_f, + length=offset_l - offset_f + 1) else: # Otherwise, get the rest of the initial document. doc_ids.append(self.doc_idx[doc_index_f]) - sample_list = [ - self.indexed_dataset.get(self.doc_idx[doc_index_f], offset=offset_f) - ] + sample_list = [self.indexed_dataset.get(self.doc_idx[doc_index_f], + offset=offset_f)] # Loop over all in between documents and add the entire document. for i in range(doc_index_f + 1, doc_index_l): doc_ids.append(self.doc_idx[i]) sample_list.append(self.indexed_dataset.get(self.doc_idx[i])) # And finally add the relevant portion of last document. doc_ids.append(self.doc_idx[doc_index_l]) - sample_list.append( - self.indexed_dataset.get(self.doc_idx[doc_index_l], length=offset_l + 1) - ) + sample_list.append(self.indexed_dataset.get( + self.doc_idx[doc_index_l], + length=offset_l + 1)) sample = np.concatenate(sample_list) - text_name = "text" + text_name = 'text' if args.use_dataset_only: - text_name = "input_ids" + text_name = 'input_ids' sample_dict = {text_name: np.array(sample, dtype=np.int64)} if args.return_data_index: - sample_dict.update({"index": np.array([orig_idx], dtype=np.int64)}) + sample_dict.update({'index': np.array([orig_idx], dtype=np.int64)}) - if self.return_doc_ids: # for retro preprocessing - sample_dict.update({"doc_ids": np.array(doc_ids, dtype=np.int64)}) + if self.return_doc_ids: # for retro preprocessing + sample_dict.update({'doc_ids': np.array(doc_ids, dtype=np.int64)}) if args.use_dataset_only: - sample_dict.update({"labels": np.array(sample, dtype=np.int64)}) + sample_dict.update({'labels': np.array(sample, dtype=np.int64)}) return sample_dict - @dlp.log -def _build_index_mappings( - name, - data_prefix, - documents, - sizes, - splits_string, - num_samples, - seq_length, - seed, - *, - data_cache_path, -): +def _build_index_mappings(name, data_prefix, documents, sizes, + splits_string, num_samples, seq_length, seed, + *, + data_cache_path): """Build doc-idx, sample-idx, and shuffle-idx. doc-idx: is an array (ordered) of documents to be used in training. sample-idx: is the start document index and document offset for each @@ -775,11 +539,10 @@ def _build_index_mappings( shuffle-idx: maps the sample index into a random index into sample-idx. """ args = get_args() - assert args is not None # Number of tokens in each epoch and number of required epochs. tokens_per_epoch = _num_tokens(documents, sizes) num_epochs = _num_epochs(tokens_per_epoch, seq_length, num_samples) - if args.train_data_exact_num_epochs is not None and name == "train": + if args.train_data_exact_num_epochs is not None and name == 'train': num_epochs = args.train_data_exact_num_epochs # rng state @@ -794,13 +557,13 @@ def _build_index_mappings( desc += f"Sequence length {seq_length}\n" desc += f"Random seed {seed}\n" desc += f"Split {splits_string}\n" - desc_hash = hashlib.md5(desc.encode("utf-8")).hexdigest() + desc_hash = hashlib.md5(desc.encode('utf-8')).hexdigest() desc_filename = desc_hash + ".dsc" - doc_idx_filename = desc_hash + "_doc_idx.npy" - sample_idx_filename = desc_hash + "_sample_idx.npy" - shuffle_idx_filename = desc_hash + "_shuffle_idx.npy" + doc_idx_filename = desc_hash + '_doc_idx.npy' + sample_idx_filename = desc_hash + '_sample_idx.npy' + shuffle_idx_filename = desc_hash + '_shuffle_idx.npy' - if name == "train": + if name == 'train': # force to use certain index files if args.train_desc_path is not None: desc_filename = args.train_desc_path @@ -815,15 +578,15 @@ def _build_index_mappings( # duplication, then look in data-cache-path if specified, # If nothing is found, use the last path looked in build_indices = True - prefixes = [os.path.join(os.path.dirname(data_prefix), "index-cache")] + prefixes = [os.path.join(os.path.dirname(data_prefix), 'index-cache')] if data_cache_path is not None: prefixes.append(data_cache_path) for prefix in prefixes: idx_path = { - "desc": os.path.join(prefix, desc_filename), - "doc": os.path.join(prefix, doc_idx_filename), - "sample": os.path.join(prefix, sample_idx_filename), - "shuffle": os.path.join(prefix, shuffle_idx_filename), + 'desc': os.path.join(prefix, desc_filename), + 'doc': os.path.join(prefix, doc_idx_filename), + 'sample': os.path.join(prefix, sample_idx_filename), + 'shuffle': os.path.join(prefix, shuffle_idx_filename) } for f in idx_path.values(): if not os.path.isfile(f): @@ -832,17 +595,15 @@ def _build_index_mappings( # Found our files! build_indices = False break - data_cache_dir = os.path.dirname(idx_path["desc"]) + data_cache_dir = os.path.dirname(idx_path['desc']) data_cache_success = True # Build the indexed mapping if not exist. if build_indices: - # Since this function will be called by all the rank in the very beginning. Therefore, we assume that all the - # ranks will first create the document files, and then read it. + # Since this function will be called by all the rank in the very beginning. Therefore, we assume that all the + # ranks will first create the document files, and then read it. # There will not be contension effects going on either - log.warning( - f" > WARNING: could not find index map files, building on rank {torch.distributed.get_rank()}" - ) + print_rank_0(f" > WARNING: could not find index map files, building on rank {torch.distributed.get_rank()}") # For the last epoch, decide whether include the entire epoch # in the global shuffle or not. @@ -851,80 +612,64 @@ def _build_index_mappings( # not mean anything. if num_epochs == 1: separate_last_epoch = False - log.debug( - " > only one epoch required, setting " "separate_last_epoch to False" - ) + print_rank_0(' > only one epoch required, setting ' + 'separate_last_epoch to False') else: # Get the number of samples for the last epoch num_samples_from_epochs_minus_one = ( - (num_epochs - 1) * tokens_per_epoch - 1 - ) // seq_length - last_epoch_num_samples = num_samples - num_samples_from_epochs_minus_one - assert ( - last_epoch_num_samples >= 0 - ), "last epoch number of samples should be non-negative." + (num_epochs - 1) * tokens_per_epoch - 1) // seq_length + last_epoch_num_samples = num_samples - \ + num_samples_from_epochs_minus_one + assert last_epoch_num_samples >= 0, \ + 'last epoch number of samples should be non-negative.' num_samples_per_epoch = (tokens_per_epoch - 1) // seq_length - assert last_epoch_num_samples <= ( - num_samples_per_epoch + 1 - ), "last epoch number of samples exceeded max value." + assert last_epoch_num_samples <= (num_samples_per_epoch + 1), \ + 'last epoch number of samples exceeded max value.' # If we have less than 80% of the samples for the last epoch, # seperate out the epoch and treat it differently. # Note: the 80% number is just based on common sense and can # be adjusted if needed. - separate_last_epoch = last_epoch_num_samples < int( - 0.80 * num_samples_per_epoch - ) + separate_last_epoch = (last_epoch_num_samples < + int(0.80 * num_samples_per_epoch)) if separate_last_epoch: - string = ( - " > last epoch number of samples ({}) is smaller " - "than 80% of number of samples per epoch ({}), " - "setting separate_last_epoch to True" - ) + string = ' > last epoch number of samples ({}) is smaller '\ + 'than 80% of number of samples per epoch ({}), '\ + 'setting separate_last_epoch to True' else: - string = ( - " > last epoch number of samples ({}) is larger " - "than 80% of number of samples per epoch ({}), " - "setting separate_last_epoch to False" - ) - log.debug(string.format(last_epoch_num_samples, num_samples_per_epoch)) + string = ' > last epoch number of samples ({}) is larger '\ + 'than 80% of number of samples per epoch ({}), '\ + 'setting separate_last_epoch to False' + print_rank_0(string.format(last_epoch_num_samples, + num_samples_per_epoch)) + try: os.makedirs(data_cache_dir, exist_ok=True) # description - with open(idx_path["desc"], "wt") as fd: + with open(idx_path['desc'], 'wt') as fd: fd.write(desc) # doc-idx. start_time = time.time() - doc_idx = _build_doc_idx(documents, num_epochs, np_rng, separate_last_epoch) - np.save(idx_path["doc"], doc_idx, allow_pickle=True) - log.debug( - " > elasped time to build and save doc-idx mapping " - "(seconds): {:4f}".format(time.time() - start_time) - ) + doc_idx = _build_doc_idx(documents, num_epochs, np_rng, + separate_last_epoch) + np.save(idx_path['doc'], doc_idx, allow_pickle=True) + print_rank_0(' > elasped time to build and save doc-idx mapping ' + '(seconds): {:4f}'.format(time.time() - start_time)) # sample-idx. start_time = time.time() # Use C++ implementation for speed. # First compile and then import. from megatron.data import helpers - assert doc_idx.dtype == np.int32 assert sizes.dtype == np.int32 - sample_idx = helpers.build_sample_idx( - sizes, - doc_idx, - seq_length, - num_epochs, - tokens_per_epoch, - torch.distributed.get_rank() == 0, - ) - np.save(idx_path["sample"], sample_idx, allow_pickle=True) - log.debug( - " > elasped time to build and save sample-idx mapping " - "(seconds): {:4f}".format(time.time() - start_time) - ) + sample_idx = helpers.build_sample_idx(sizes, doc_idx, seq_length, + num_epochs, tokens_per_epoch, torch.distributed.get_rank()==0) + np.save(idx_path['sample'], sample_idx, allow_pickle=True) + print_rank_0(' > elasped time to build and save sample-idx mapping ' + '(seconds): {:4f}'.format(time.time() - start_time)) # shuffle-idx. start_time = time.time() # -1 is due to data structure used to retieve the index: @@ -933,46 +678,35 @@ def _build_index_mappings( num_samples_ = num_samples_from_epochs_minus_one else: num_samples_ = sample_idx.shape[0] - 1 - shuffle_idx = _build_shuffle_idx( - num_samples_, sample_idx.shape[0] - 1, np_rng - ) - np.save(idx_path["shuffle"], shuffle_idx, allow_pickle=True) - log.debug( - " > elasped time to build and save shuffle-idx mapping" - " (seconds): {:4f}".format(time.time() - start_time) - ) + shuffle_idx = _build_shuffle_idx(num_samples_, + sample_idx.shape[0] - 1, np_rng) + np.save(idx_path['shuffle'], shuffle_idx, allow_pickle=True) + print_rank_0(' > elasped time to build and save shuffle-idx mapping' + ' (seconds): {:4f}'.format(time.time() - start_time)) except OSError: - print( - f"There was an error trying to create the data cache directory ({data_cache_dir})" - ) - print( - 'or a file in it. This defaults to a directory "index-cache" within the directory' - ) - print( - "the data files are in and can be set with the --data-cache-path argument. Please" - ) - print( - "ensure you have write access to this directory or specify one that you do have" - ) - print("write access to.") + print(f'There was an error trying to create the data cache directory ({data_cache_dir})') + print('or a file in it. This defaults to a directory "index-cache" within the directory') + print('the data files are in and can be set with the --data-cache-path argument. Please') + print('ensure you have write access to this directory or specify one that you do have') + print('write access to.') data_cache_success = False # Load mappings. start_time = time.time() - log.debug(f" > loading doc-idx mapping from {idx_path['doc']}") - doc_idx = np.load(idx_path["doc"], allow_pickle=True, mmap_mode="r") + print_rank_0(f" > loading doc-idx mapping from {idx_path['doc']}") + doc_idx = np.load(idx_path['doc'], allow_pickle=True, mmap_mode='r') - log.debug(f" > loading sample-idx mapping from {idx_path['sample']}") - sample_idx = np.load(idx_path["sample"], allow_pickle=True, mmap_mode="r") + print_rank_0(f" > loading sample-idx mapping from {idx_path['sample']}") + sample_idx = np.load(idx_path['sample'], allow_pickle=True, mmap_mode='r') - log.debug(f" > loading shuffle-idx mapping from {idx_path['shuffle']}") - shuffle_idx = np.load(idx_path["shuffle"], allow_pickle=True, mmap_mode="r") + print_rank_0(f" > loading shuffle-idx mapping from {idx_path['shuffle']}") + shuffle_idx = np.load(idx_path['shuffle'], allow_pickle=True, mmap_mode='r') - log.debug( - " loaded indexed file in {:3.3f} seconds".format(time.time() - start_time) - ) - log.debug(" total number of samples: {}".format(sample_idx.shape[0])) - log.debug(" total number of epochs: {}".format(num_epochs)) + print_rank_0(' loaded indexed file in {:3.3f} seconds'.format( + time.time() - start_time)) + print_rank_0(' total number of samples: {}'.format( + sample_idx.shape[0])) + print_rank_0(' total number of epochs: {}'.format(num_epochs)) return doc_idx, sample_idx, shuffle_idx, desc, desc_hash @@ -996,26 +730,25 @@ def _num_epochs(tokens_per_epoch, seq_length, num_samples): if ((total_tokens - 1) // seq_length) >= num_samples: return num_epochs - @dlp.log def _build_doc_idx(documents, num_epochs, np_rng, separate_last_epoch): """Build an array with length = number-of-epochs * number-of-dcuments. Each index is mapped to a corresponding document.""" if not separate_last_epoch or num_epochs == 1: - doc_idx = np.mgrid[0:num_epochs, 0 : len(documents)][1] + doc_idx = np.mgrid[0:num_epochs, 0:len(documents)][1] doc_idx[:] = documents doc_idx = doc_idx.reshape(-1) doc_idx = doc_idx.astype(np.int32) np_rng.shuffle(doc_idx) return doc_idx - doc_idx_first = _build_doc_idx(documents, num_epochs - 1, np_rng, False) + doc_idx_first = _build_doc_idx(documents, num_epochs-1, np_rng, False) doc_idx_last = _build_doc_idx(documents, 1, np_rng, False) return np.concatenate((doc_idx_first, doc_idx_last)) - @dlp.log -def _build_sample_idx(sizes, doc_idx, seq_length, num_epochs, tokens_per_epoch): +def _build_sample_idx(sizes, doc_idx, seq_length, + num_epochs, tokens_per_epoch): """Sample index mapping is a 2D array with sizes [number-of-samples + 1, 2] where [..., 0] contains the index into `doc_idx` and [..., 1] is the @@ -1049,7 +782,7 @@ def _build_sample_idx(sizes, doc_idx, seq_length, num_epochs, tokens_per_epoch): # Note that -1 here is for the same reason we have -1 in # `_num_epochs` calculations. if remaining_seq_length <= 0: - doc_offset += remaining_seq_length + doc_length - 1 + doc_offset += (remaining_seq_length + doc_length - 1) remaining_seq_length = 0 else: # Otherwise, start from the begining of the next document. @@ -1062,28 +795,24 @@ def _build_sample_idx(sizes, doc_idx, seq_length, num_epochs, tokens_per_epoch): return sample_idx - @dlp.log def _build_shuffle_idx(num_samples, total_size, np_rng): """Build the range [0, size) and shuffle.""" - log.debug( - " > building shuffle index with split [0, {}) and [{}, {}) " "...".format( - num_samples, num_samples, total_size - ) - ) + print_rank_0(' > building shuffle index with split [0, {}) and [{}, {}) ' + '...'.format(num_samples, num_samples, total_size)) dtype_ = np.uint32 if total_size >= (np.iinfo(np.uint32).max - 1): dtype_ = np.int64 - shuffle_idx_first = np.arange(start=0, stop=num_samples, step=1, dtype=dtype_) + shuffle_idx_first = np.arange(start=0, stop=num_samples, + step=1, dtype=dtype_) np_rng.shuffle(shuffle_idx_first) if num_samples == total_size: return shuffle_idx_first - shuffle_idx_last = np.arange( - start=num_samples, stop=total_size, step=1, dtype=dtype_ - ) + shuffle_idx_last = np.arange(start=num_samples, stop=total_size, + step=1, dtype=dtype_) np_rng.shuffle(shuffle_idx_last) return np.concatenate((shuffle_idx_first, shuffle_idx_last)) From 52a406ce7005f7be260462688020f325190a48b8 Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Mon, 14 Oct 2024 23:51:07 -0500 Subject: [PATCH 73/92] Consistent logging in `megatron/data/*.py` --- megatron/data/blendable_dataset.py | 14 +- megatron/data/gpt_dataset.py | 907 ++++++++++++++++++----------- megatron/data/indexed_dataset.py | 16 - 3 files changed, 590 insertions(+), 347 deletions(-) mode change 100755 => 100644 megatron/data/gpt_dataset.py diff --git a/megatron/data/blendable_dataset.py b/megatron/data/blendable_dataset.py index 590a379971..979e9a174e 100755 --- a/megatron/data/blendable_dataset.py +++ b/megatron/data/blendable_dataset.py @@ -49,8 +49,10 @@ def _build_indices(): helpers.build_blending_indices(dataset_index, dataset_sample_index, weights, num_datasets, self.size, torch.distributed.get_rank() == 0) - log.info('> elapsed time for building blendable dataset indices: ' - '{:.2f} (sec)'.format(time.time() - start_time)) + log.info( + "> elapsed time for building blendable dataset indices: " + f"{time.perf_counter() - start_time:.2f} (sec)" + ) return dataset_index, dataset_sample_index desc = "Blendable dataset\n\n" @@ -74,7 +76,7 @@ def _build_indices(): ' dataset, building indices on rank 0 ...', flush=True) dataset_index, dataset_sample_index = _build_indices() try: - log.info(" > saving index map files") + log.debug(" > saving index map files") start_time = time.time() os.makedirs(os.path.dirname(index_path), exist_ok=True) with open(desc_path, 'wt') as fd: @@ -105,7 +107,7 @@ def _build_indices(): torch.distributed.barrier(group=mpu.get_data_parallel_group()) torch.distributed.barrier(group=mpu.get_pipeline_model_parallel_group()) torch.distributed.barrier(group=mpu.get_data_parallel_group()) - + start_time = time.time() log.info(f'> loading blendable dataset index: {index_path}') self.dataset_index = np.load(index_path, allow_pickle=True, mmap_mode='r') @@ -113,7 +115,7 @@ def _build_indices(): log.info(f'> loading blendable dataset sample index: {sample_index_path}') self.dataset_sample_index = np.load(sample_index_path, allow_pickle=True, mmap_mode='r') assert self.dataset_sample_index.size == self.size - log.info(f'> finished loading in {time.time() - start_time} seconds') + log.info(f'> finished loading in {time.time() - start_time} seconds') else: self.dataset_index, self.dataset_sample_index = _build_indices() @@ -139,4 +141,4 @@ def __getitem__(self, idx): return { "dataset_idx" : dataset_idx, **self.datasets[dataset_idx][sample_idx], - } + } diff --git a/megatron/data/gpt_dataset.py b/megatron/data/gpt_dataset.py old mode 100755 new mode 100644 index 0cf97356a4..c412d02b31 --- a/megatron/data/gpt_dataset.py +++ b/megatron/data/gpt_dataset.py @@ -9,67 +9,96 @@ import numpy as np import torch from deepspeed.accelerator import get_accelerator -from megatron import print_rank_0, is_rank_0, get_args +from megatron import is_rank_0, get_args from megatron.core import mpu -from megatron.data import helpers +from megatron.data import helpers # type:ignore from megatron.data.blendable_dataset import BlendableDataset -from megatron.data.dataset_utils import get_datasets_weights_and_num_samples, get_datasets_corpuses_weights_and_num_samples +from megatron.data.dataset_utils import ( + get_datasets_weights_and_num_samples, + get_datasets_corpuses_weights_and_num_samples, +) from megatron.data.dataset_utils import get_train_valid_test_split_ from megatron.data.indexed_dataset import make_dataset as make_indexed_dataset -from megatron.utils import PerfTrace, Profile +from megatron.utils import PerfTrace, Profile, get_logger from mpi4py import MPI dlp = Profile("DATASET") +log = get_logger(__name__, rank_zero_only=True) + + @dlp.log -def build_train_valid_test_datasets(data_prefix, data_impl, splits_string, - train_valid_test_num_samples, - seq_length, seed, skip_warmup, - train_data_prefix=None, - valid_data_prefix=None, - test_data_prefix=None, - return_doc_ids=False, *, - data_cache_path=None): +def build_train_valid_test_datasets( + data_prefix, + data_impl, + splits_string, + train_valid_test_num_samples, + seq_length, + seed, + skip_warmup, + train_data_prefix=None, + valid_data_prefix=None, + test_data_prefix=None, + return_doc_ids=False, + *, + data_cache_path=None, +): """Build train, valid, and test datasets.""" if data_prefix: - print_rank_0("Single data path provided for train, valid & test") + log.debug("Single data path provided for train, valid & test") # Single dataset. if len(data_prefix) == 1: - return _build_train_valid_test_datasets(data_prefix[0], - data_impl, splits_string, - train_valid_test_num_samples, - seq_length, seed, skip_warmup, - data_cache_path=data_cache_path) + return _build_train_valid_test_datasets( + data_prefix[0], + data_impl, + splits_string, + train_valid_test_num_samples, + seq_length, + seed, + skip_warmup, + data_cache_path=data_cache_path, + ) # Blending dataset. # Parse the values. - output = get_datasets_corpuses_weights_and_num_samples(data_prefix, - train_valid_test_num_samples) + output = get_datasets_corpuses_weights_and_num_samples( + data_prefix, train_valid_test_num_samples + ) prefixes, corpuses, weights, datasets_train_valid_test_num_samples = output corpus_list = sorted(set(corpuses)) train_num_samples, valid_num_samples, test_num_samples = map( - sum, - zip(*datasets_train_valid_test_num_samples) + sum, zip(*datasets_train_valid_test_num_samples) ) class DatasetBuilder: - ''' + """ This is for building individual dataset from each dataset file - ''' + """ + @dlp.log - def __init__(self, prefix, corpus, data_impl, splits_string, - num_samples, seq_length, seed, skip_warmup, - return_doc_ids, - data_cache_path=data_cache_path, name='train'): + def __init__( + self, + prefix, + corpus, + data_impl, + splits_string, + num_samples, + seq_length, + seed, + skip_warmup, + return_doc_ids, + data_cache_path=data_cache_path, + name="train", + ): self.prefix = prefix self.data_impl = data_impl self.splits_string = splits_string - if name == 'train': + if name == "train": self.num_samples = num_samples[0] - elif name == 'valid': + elif name == "valid": self.num_samples = num_samples[1] else: self.num_samples = num_samples[2] @@ -84,279 +113,413 @@ def __init__(self, prefix, corpus, data_impl, splits_string, self.desc = prefix + f"{self.num_samples}" + f"{seq_length}" + f"{seed}" self.build = False self.corpus = corpus + @dlp.log def Build(self): - self.dataset = _build_train_valid_test_datasets_single(self.prefix, self.data_impl, self.splits_string, - self.num_samples_train_valid_test, self.seq_length, self.seed, self.skip_warmup, self.name, self.return_doc_ids, - data_cache_path=self.data_cache_path) + self.dataset = _build_train_valid_test_datasets_single( + self.prefix, + self.data_impl, + self.splits_string, + self.num_samples_train_valid_test, + self.seq_length, + self.seed, + self.skip_warmup, + self.name, + self.return_doc_ids, + data_cache_path=self.data_cache_path, + ) self.build = True return self.dataset class BuildConcatDataset(torch.utils.data.Dataset): @dlp.log - def __init__(self, dataset_builders): + def __init__(self, dataset_builders, shuffle=False): self.dataset_builders = dataset_builders self.num_datasets = len(dataset_builders) self.num_samples = np.sum([d.num_samples for d in dataset_builders]) - self.indices=np.zeros((self.num_samples, 2), dtype=np.uint64) - self.desc="ConcatDataset:" - m = 0 + self.indices = np.zeros((self.num_samples, 2), dtype=np.uint64) + self.desc = "ConcatDataset:" + # m = 0 num_samples_list = np.array([d.num_samples for d in dataset_builders]) self.num_samples = np.sum(num_samples_list) + def _build_indices(): start_time = time.time() dataset_index = np.zeros(self.num_samples, dtype=np.int64) dataset_sample_index = np.zeros(self.num_samples, dtype=np.int64) - helpers.build_concat_indices(dataset_index, dataset_sample_index, - num_samples_list, - self.num_datasets, - torch.distributed.get_rank()==0) - print_rank_0('> elapsed time for building concat dataset indices: ' - '{:.2f} (sec)'.format(time.time() - start_time)) + helpers.build_concat_indices( + dataset_index, + dataset_sample_index, + num_samples_list, + self.num_datasets, + torch.distributed.get_rank() == 0, + ) + log.debug( + "> elapsed time for building concat dataset indices: " + "{:.2f} (sec)".format(time.time() - start_time) + ) return dataset_index, dataset_sample_index - + self.dataset_index, self.dataset_sample_index = _build_indices() + np_rng = np.random.RandomState(seed=dataset_builders[0].seed) + self.shuffle_index = np.arange(self.num_samples) + if shuffle: + np_rng.shuffle(self.shuffle_index) for i in range(self.num_datasets): self.desc += dataset_builders[i].prefix + "," - self.desc += f"-{self.num_samples}" + f"-{dataset_builders[0].seq_length}" + f"{dataset_builders[0].seed}" + self.desc += ( + f"-{self.num_samples}" + + f"-{dataset_builders[0].seq_length}" + + f"{dataset_builders[0].seed}" + ) + def __len__(self): return self.num_samples @dlp.log def __getitem__(self, idx): - if idx >= self.num_samples: - print_rank_0(f"WARNING: index overflow encountered {idx} > {self.num_samples} for {self.dataset_builders[0].corpus}; will randomly pick one sample") - id = np.random.randint(self.num_samples) - else: - id = idx - i = self.dataset_index[idx] - j = self.dataset_sample_index[idx] + id_shuffle = self.shuffle_index[idx] + i = self.dataset_index[id_shuffle] + j = self.dataset_sample_index[id_shuffle] if self.dataset_builders[i].build: return self.dataset_builders[i].dataset[j] else: return self.dataset_builders[i].Build()[j] - - # Predetermine whether need to build the specific dataset or not. + # Predetermine whether need to build the specific dataset or not. start_time = time.time() - print_rank_0(" >>> Started building datasets in distributed way ... ") + log.debug(" >>> Started building datasets in distributed way ... ") a, b, c = [int(d) for d in splits_string.split(",")] - + train_datasets = [] valid_datasets = [] test_datasets = [] # Build individual datasets. - + args = get_args() @dlp.log - def build_corpus_datasets(dataset_type='train'): + def build_corpus_datasets(dataset_type="train"): start_time = time.time() - print_rank_0(f" >>> Building {dataset_type} corpus datasets ...") + log.debug(f" >>> Building {dataset_type} corpus datasets ...") datasets = [] corpus_builders = {} corpus_weights = {} for c in corpus_list: corpus_builders[c] = [] corpus_weights[c] = 0.0 - dataset_builders = [DatasetBuilder(prefixes[i], corpuses[i], data_impl, splits_string, - datasets_train_valid_test_num_samples[i], - seq_length, seed, skip_warmup, - return_doc_ids,data_cache_path, dataset_type) for i in range(len(weights))] - for i in range(torch.distributed.get_rank()//mpu.get_tensor_model_parallel_world_size(), len(weights), torch.distributed.get_world_size()//mpu.get_tensor_model_parallel_world_size()): + dataset_builders = [ + DatasetBuilder( + prefixes[i], + corpuses[i], + data_impl, + splits_string, + datasets_train_valid_test_num_samples[i], + seq_length, + seed, + skip_warmup, + return_doc_ids, + data_cache_path, + dataset_type, + ) + for i in range(len(weights)) + ] + for i in range( + torch.distributed.get_rank() + // mpu.get_tensor_model_parallel_world_size(), + len(weights), + torch.distributed.get_world_size() + // mpu.get_tensor_model_parallel_world_size(), + ): dataset_builders[i].Build() - print_rank_0(f" >>> Finished building individual datasets in {time.time() - start_time} seconds") + log.debug( + f" >>> Finished building individual datasets in {time.time() - start_time} seconds" + ) start_concating_time = time.time() for i, d in zip(range(len(weights)), dataset_builders): corpus_builders[d.corpus].append(d) corpus_weights[d.corpus] += weights[i] total = 0 - print_rank_0(" > number of samples for each corpus ") - corpus_weights_achieved={} + log.debug(" > number of samples for each corpus ") + corpus_weights_achieved = {} for c in corpus_list: - datasets.append(BuildConcatDataset(corpus_builders[c])) + datasets.append(BuildConcatDataset(corpus_builders[c], args.shuffle_sample)) total += datasets[-1].num_samples - corpus_weights_achieved[c] = float(datasets[-1].num_samples)/train_num_samples - print_rank_0(f" {c}: {datasets[-1].num_samples} w={corpus_weights_achieved[c]} (expected: {corpus_weights[c]})") - - print_rank_0(f" > total number of samples: {total}") - print_rank_0(f" >>> Finished concatenating datasets in {time.time() - start_concating_time} seconds") - print_rank_0(f" >>> Finished building {dataset_type} corpus datasets in {time.time() - start_time} seconds") + corpus_weights_achieved[c] = ( + float(datasets[-1].num_samples) / train_num_samples + ) + log.debug( + f" {c}: {datasets[-1].num_samples} w={corpus_weights_achieved[c]} (expected: {corpus_weights[c]})" + ) + + log.debug(f" > total number of samples: {total}") + log.debug( + f" >>> Finished concatenating datasets in {time.time() - start_concating_time} seconds" + ) + log.debug( + f" >>> Finished building {dataset_type} corpus datasets in {time.time() - start_time} seconds" + ) return datasets, [corpus_weights_achieved[c] for c in corpus_list] + train_weights = None if a > 0: - train_datasets, train_weights = build_corpus_datasets('train') - + train_datasets, train_weights = build_corpus_datasets("train") + valid_weights = None if b > 0: - valid_datasets, valid_weights = build_corpus_datasets('valid') - - if c > 0: - test_datasets, test_weights = build_corpus_datasets('test') + valid_datasets, valid_weights = build_corpus_datasets("valid") + test_weights = None + if c > 0: + test_datasets, test_weights = build_corpus_datasets("test") # This barrier is critical to make sure that all the datasets are built once # and the metadata were written to the cache folder before other ranks touch them - print_rank_0(f" >>> Rank 0 - finished building datasets in {time.time() - start_time} seconds") + log.debug( + f" >>> Rank 0 - finished building datasets in {time.time() - start_time} seconds" + ) torch.distributed.barrier(group=mpu.get_data_parallel_group()) torch.distributed.barrier(group=mpu.get_pipeline_model_parallel_group()) torch.distributed.barrier(group=mpu.get_data_parallel_group()) - print_rank_0(f" >>> Finished building datasets (all ranks) in distributed way in {time.time() - start_time} seconds") - print_rank_0(f" >>> Starting to build BlendableDataset") + log.debug( + f" >>> Finished building datasets (all ranks) in distributed way in {time.time() - start_time} seconds" + ) + log.debug(" >>> Starting to build BlendableDataset") # Blend. start_time = time.time() blending_train_dataset = None - if train_datasets: - blending_train_dataset = BlendableDataset(train_datasets, train_weights, train_num_samples, - data_cache_path=data_cache_path) + if train_datasets and train_weights: + blending_train_dataset = BlendableDataset( + train_datasets, + train_weights, + train_num_samples, + data_cache_path=data_cache_path, + ) blending_valid_dataset = None - if valid_datasets: - blending_valid_dataset = BlendableDataset(valid_datasets, valid_weights, valid_num_samples, - data_cache_path=data_cache_path) + if valid_datasets and valid_weights: + blending_valid_dataset = BlendableDataset( + valid_datasets, + valid_weights, + valid_num_samples, + data_cache_path=data_cache_path, + ) blending_test_dataset = None - if test_datasets: - blending_test_dataset = BlendableDataset(test_datasets, test_weights, test_num_samples, - data_cache_path=data_cache_path) + if test_datasets and test_weights: + blending_test_dataset = BlendableDataset( + test_datasets, + test_weights, + test_num_samples, + data_cache_path=data_cache_path, + ) end_time = time.time() - print_rank_0(f" >>> Finished building BlendableDataset in {end_time - start_time} seconds") - return (blending_train_dataset, blending_valid_dataset, - blending_test_dataset) + log.debug( + f" >>> Finished building BlendableDataset in {end_time - start_time} seconds" + ) + return (blending_train_dataset, blending_valid_dataset, blending_test_dataset) else: - print_rank_0("Separate data paths provided for train, valid & test. Split string will be ignored.") + log.debug( + "Separate data paths provided for train, valid & test. Split string will be ignored." + ) train_dataset, valid_dataset, test_dataset = None, None, None # Single dataset. if train_data_prefix is not None: - train_dataset = build_dataset("train", train_data_prefix, data_impl, - splits_string, - train_valid_test_num_samples[0], - seq_length, seed, skip_warmup, - data_cache_path=data_cache_path) + train_dataset = build_dataset( + "train", + train_data_prefix, + data_impl, + splits_string, + train_valid_test_num_samples[0], + seq_length, + seed, + skip_warmup, + data_cache_path=data_cache_path, + ) if valid_data_prefix is not None: - valid_dataset = build_dataset("valid", valid_data_prefix, data_impl, - splits_string, - train_valid_test_num_samples[1], - seq_length, seed, False, - data_cache_path=data_cache_path) - + valid_dataset = build_dataset( + "valid", + valid_data_prefix, + data_impl, + splits_string, + train_valid_test_num_samples[1], + seq_length, + seed, + False, + data_cache_path=data_cache_path, + ) if test_data_prefix is not None: - test_dataset = build_dataset("test", test_data_prefix, data_impl, - splits_string, - train_valid_test_num_samples[2], - seq_length, seed, False, - data_cache_path=data_cache_path) + test_dataset = build_dataset( + "test", + test_data_prefix, + data_impl, + splits_string, + train_valid_test_num_samples[2], + seq_length, + seed, + False, + data_cache_path=data_cache_path, + ) return (train_dataset, valid_dataset, test_dataset) + @dlp.log -def _build_train_valid_test_datasets(data_prefix, data_impl, splits_string, - train_valid_test_num_samples, - seq_length, seed, skip_warmup, - return_doc_ids=False, *, - data_cache_path=None): +def _build_train_valid_test_datasets( + data_prefix, + data_impl, + splits_string, + train_valid_test_num_samples, + seq_length, + seed, + skip_warmup, + return_doc_ids=False, + *, + data_cache_path=None, +): """Build train, valid, and test datasets.""" # Indexed dataset. - indexed_dataset = get_indexed_dataset_(data_prefix, - data_impl, - skip_warmup) + indexed_dataset = get_indexed_dataset_(data_prefix, data_impl, skip_warmup) total_num_of_documents = indexed_dataset.sizes.shape[0] splits = get_train_valid_test_split_(splits_string, total_num_of_documents) # Print stats about the splits. - print_rank_0(' > dataset split:') + log.debug(" > dataset split:") def print_split_stats(name, index): - print_rank_0(' {}:'.format(name)) - print_rank_0(' document indices in [{}, {}) total of {} ' - 'documents'.format(splits[index], splits[index + 1], - splits[index + 1] - splits[index])) - print_split_stats('train', 0) - print_split_stats('validation', 1) - print_split_stats('test', 2) + log.debug(" {}:".format(name)) + log.debug( + " document indices in [{}, {}) total of {} " "documents".format( + splits[index], splits[index + 1], splits[index + 1] - splits[index] + ) + ) + + print_split_stats("train", 0) + print_split_stats("validation", 1) + print_split_stats("test", 2) def build_dataset(index, name): dataset = None if splits[index + 1] > splits[index]: - documents = np.arange(start=splits[index], stop=splits[index + 1], - step=1, dtype=np.int32) - dataset = GPTDataset(name, data_prefix, documents, indexed_dataset, - splits_string, - train_valid_test_num_samples[index], - seq_length, seed, - return_doc_ids, - data_cache_path=data_cache_path) + documents = np.arange( + start=splits[index], stop=splits[index + 1], step=1, dtype=np.int32 + ) + dataset = GPTDataset( + name, + data_prefix, + documents, + indexed_dataset, + splits_string, + train_valid_test_num_samples[index], + seq_length, + seed, + return_doc_ids, + data_cache_path=data_cache_path, + ) return dataset - train_dataset = build_dataset(0, 'train') - valid_dataset = build_dataset(1, 'valid') - test_dataset = build_dataset(2, 'test') + train_dataset = build_dataset(0, "train") + valid_dataset = build_dataset(1, "valid") + test_dataset = build_dataset(2, "test") return (train_dataset, valid_dataset, test_dataset) + @dlp.log -def _build_train_valid_test_datasets_single(data_prefix, data_impl, splits_string, - train_valid_test_num_samples, - seq_length, seed, skip_warmup, name, - return_doc_ids=False, *, - data_cache_path=None): +def _build_train_valid_test_datasets_single( + data_prefix, + data_impl, + splits_string, + train_valid_test_num_samples, + seq_length, + seed, + skip_warmup, + name, + return_doc_ids=False, + *, + data_cache_path=None, +): """Build train, valid, and test datasets.""" # Each rank print out information - print_rank_0(f" >> building dataset for {data_prefix}") + log.debug(f" >> building dataset for {data_prefix}") # Indexed dataset. - indexed_dataset = get_indexed_dataset_(data_prefix, - data_impl, - skip_warmup) + indexed_dataset = get_indexed_dataset_(data_prefix, data_impl, skip_warmup) total_num_of_documents = indexed_dataset.sizes.shape[0] splits = get_train_valid_test_split_(splits_string, total_num_of_documents) # Print stats about the splits. - print_rank_0(' > dataset split:') + log.debug(" > dataset split:") def print_split_stats(name, index): - print_rank_0(' {}:'.format(name)) - print_rank_0(' document indices in [{}, {}) total of {} ' - 'documents'.format(splits[index], splits[index + 1], - splits[index + 1] - splits[index])) - print_split_stats('train', 0) - print_split_stats('validation', 1) - print_split_stats('test', 2) + log.debug(" {}:".format(name)) + log.debug( + " document indices in [{}, {}) total of {} " "documents".format( + splits[index], splits[index + 1], splits[index + 1] - splits[index] + ) + ) + + print_split_stats("train", 0) + print_split_stats("validation", 1) + print_split_stats("test", 2) def build_dataset(index, name): dataset = None if splits[index + 1] > splits[index]: - documents = np.arange(start=splits[index], stop=splits[index + 1], - step=1, dtype=np.int32) - dataset = GPTDataset(name, data_prefix, documents, indexed_dataset, - splits_string, - train_valid_test_num_samples[index], - seq_length, seed, - return_doc_ids, - data_cache_path=data_cache_path) + documents = np.arange( + start=splits[index], stop=splits[index + 1], step=1, dtype=np.int32 + ) + dataset = GPTDataset( + name, + data_prefix, + documents, + indexed_dataset, + splits_string, + train_valid_test_num_samples[index], + seq_length, + seed, + return_doc_ids, + data_cache_path=data_cache_path, + ) return dataset - if name.find("train")!=-1: - return build_dataset(0, 'train') - if name.find("valid")!=-1: - return build_dataset(1, 'valid') - if name.find("test")!=-1: - return build_dataset(2, 'test') + + if name.find("train") != -1: + return build_dataset(0, "train") + if name.find("valid") != -1: + return build_dataset(1, "valid") + if name.find("test") != -1: + return build_dataset(2, "test") + @dlp.log -def build_dataset(dataset_name, data_prefix, data_impl, - splits_string, num_samples, - seq_length, seed, skip_warmup, - *, - data_cache_path=None): +def build_dataset( + dataset_name, + data_prefix, + data_impl, + splits_string, + num_samples, + seq_length, + seed, + skip_warmup, + *, + data_cache_path=None, +): dataset = None if len(data_prefix) == 1: - dataset = _build_dataset(dataset_name, data_prefix[0], data_impl, - splits_string, num_samples, seq_length, - seed, skip_warmup, - data_cache_path=data_cache_path) + dataset = _build_dataset( + dataset_name, + data_prefix[0], + data_impl, + splits_string, + num_samples, + seq_length, + seed, + skip_warmup, + data_cache_path=data_cache_path, + ) else: # Blending dataset. # Parse the values. @@ -367,73 +530,108 @@ def build_dataset(dataset_name, data_prefix, data_impl, # Build individual datasets. datasets = [] for i in range(len(prefixes)): - ds = _build_dataset(dataset_name, prefixes[i], data_impl, - splits_string, dataset_num_samples[i], - seq_length, seed, skip_warmup, - data_cache_path=data_cache_path) + ds = _build_dataset( + dataset_name, + prefixes[i], + data_impl, + splits_string, + dataset_num_samples[i], + seq_length, + seed, + skip_warmup, + data_cache_path=data_cache_path, + ) if ds: datasets.append(ds) if datasets: - dataset = BlendableDataset(datasets, weights, num_samples, - data_cache_path=data_cache_path) + dataset = BlendableDataset( + datasets, weights, num_samples, data_cache_path=data_cache_path + ) return dataset + @dlp.log -def _build_dataset(dataset_name, data_prefix, data_impl, splits_string, - num_samples, seq_length, seed, skip_warmup, - *, - data_cache_path=None): +def _build_dataset( + dataset_name, + data_prefix, + data_impl, + splits_string, + num_samples, + seq_length, + seed, + skip_warmup, + *, + data_cache_path=None, +): """ Build dataset. This method is called when individual train, valid, test datasets are provided """ # Indexed dataset. - indexed_dataset = get_indexed_dataset_(data_prefix, - data_impl, - skip_warmup) + indexed_dataset = get_indexed_dataset_(data_prefix, data_impl, skip_warmup) total_num_of_documents = indexed_dataset.sizes.shape[0] - print_rank_0(' {}:'.format(dataset_name)) - print_rank_0(' document indices in [0, {}) total of {} ' - 'documents'.format(total_num_of_documents, total_num_of_documents)) - - documents = np.arange(start=0, stop=total_num_of_documents, - step=1, dtype=np.int32) - - dataset = GPTDataset(dataset_name, data_prefix, documents, indexed_dataset, - splits_string, num_samples, seq_length, seed, - data_cache_path=data_cache_path) + log.debug(" {}:".format(dataset_name)) + log.debug( + " document indices in [0, {}) total of {} " "documents".format( + total_num_of_documents, total_num_of_documents + ) + ) + + documents = np.arange(start=0, stop=total_num_of_documents, step=1, dtype=np.int32) + + dataset = GPTDataset( + dataset_name, + data_prefix, + documents, + indexed_dataset, + splits_string, + num_samples, + seq_length, + seed, + data_cache_path=data_cache_path, + ) return dataset + @dlp.log def get_indexed_dataset_(data_prefix, data_impl, skip_warmup): """Build indexed dataset.""" - print_rank_0(' > building dataset index ...') + log.debug(" > building dataset index ...") start_time = time.time() - indexed_dataset = make_indexed_dataset(data_prefix, - data_impl, - skip_warmup) - print_rank_0(' > finished creating indexed dataset in {:4f} ' - 'seconds'.format(time.time() - start_time)) - print_rank_0(' number of documents: {}'.format( - indexed_dataset.sizes.shape[0])) + indexed_dataset = make_indexed_dataset(data_prefix, data_impl, skip_warmup) + log.debug( + " > finished creating indexed dataset in {:4f} " "seconds".format( + time.time() - start_time + ) + ) + log.debug(" number of documents: {}".format(indexed_dataset.sizes.shape[0])) return indexed_dataset class GPTDataset(torch.utils.data.Dataset): @dlp.log - def __init__(self, name, data_prefix, documents, indexed_dataset, - splits_string, num_samples, seq_length, seed, - return_doc_ids=False, *, - data_cache_path=None): - + def __init__( + self, + name, + data_prefix, + documents, + indexed_dataset, + splits_string, + num_samples, + seq_length, + seed, + return_doc_ids=False, + *, + data_cache_path=None, + ): self.name = name self.indexed_dataset = indexed_dataset self.return_doc_ids = return_doc_ids @@ -443,20 +641,29 @@ def __init__(self, name, data_prefix, documents, indexed_dataset, assert np.max(documents) < indexed_dataset.sizes.shape[0] # Build index mappings. - self.doc_idx, self.sample_idx, self.shuffle_idx, self.desc, self.desc_hash = \ - _build_index_mappings(self.name, data_prefix, - documents, self.indexed_dataset.sizes, - splits_string, num_samples, seq_length, seed, - data_cache_path=data_cache_path) - + self.doc_idx, self.sample_idx, self.shuffle_idx, self.desc, self.desc_hash = ( + _build_index_mappings( + self.name, + data_prefix, + documents, + self.indexed_dataset.sizes, + splits_string, + num_samples, + seq_length, + seed, + data_cache_path=data_cache_path, + ) + ) def __len__(self): # -1 is due to data structure used to retieve the index: # sample i --> [sample_idx[i], sample_idx[i+1]) return self.sample_idx.shape[0] - 1 + @dlp.log def __getitem__(self, idx): args = get_args() + assert args is not None orig_idx = idx # Get the shuffled index. try: @@ -465,21 +672,24 @@ def __getitem__(self, idx): if is_rank_0(): import json from rich import print_json + print(exc) print( - '\n'.join( - ['-------------------------------------------------', - f'Trying to access {idx=} from self.shuffle_idx,', - f'but {len(self.shuffle_idx)=}', - '-------------------------------------------------'] + "\n".join( + [ + "-------------------------------------------------", + f"Trying to access {idx=} from self.shuffle_idx,", + f"but {len(self.shuffle_idx)=}", + "-------------------------------------------------", + ] ) ) print_json( json.dumps( { - 'doc_idx': len(self.doc_idx), - 'sample_idx': len(self.sample_idx), - 'shuffle_idx': len(self.shuffle_idx), + "doc_idx": len(self.doc_idx), + "sample_idx": len(self.sample_idx), + "shuffle_idx": len(self.shuffle_idx), }, indent=4, ) @@ -493,45 +703,57 @@ def __getitem__(self, idx): doc_ids = [] if doc_index_f == doc_index_l: doc_ids.append(self.doc_idx[doc_index_f]) - sample = self.indexed_dataset.get(self.doc_idx[doc_index_f], - offset=offset_f, - length=offset_l - offset_f + 1) + sample = self.indexed_dataset.get( + self.doc_idx[doc_index_f], + offset=offset_f, + length=offset_l - offset_f + 1, + ) else: # Otherwise, get the rest of the initial document. doc_ids.append(self.doc_idx[doc_index_f]) - sample_list = [self.indexed_dataset.get(self.doc_idx[doc_index_f], - offset=offset_f)] + sample_list = [ + self.indexed_dataset.get(self.doc_idx[doc_index_f], offset=offset_f) + ] # Loop over all in between documents and add the entire document. for i in range(doc_index_f + 1, doc_index_l): doc_ids.append(self.doc_idx[i]) sample_list.append(self.indexed_dataset.get(self.doc_idx[i])) # And finally add the relevant portion of last document. doc_ids.append(self.doc_idx[doc_index_l]) - sample_list.append(self.indexed_dataset.get( - self.doc_idx[doc_index_l], - length=offset_l + 1)) + sample_list.append( + self.indexed_dataset.get(self.doc_idx[doc_index_l], length=offset_l + 1) + ) sample = np.concatenate(sample_list) - text_name = 'text' + text_name = "text" if args.use_dataset_only: - text_name = 'input_ids' + text_name = "input_ids" sample_dict = {text_name: np.array(sample, dtype=np.int64)} if args.return_data_index: - sample_dict.update({'index': np.array([orig_idx], dtype=np.int64)}) + sample_dict.update({"index": np.array([orig_idx], dtype=np.int64)}) - if self.return_doc_ids: # for retro preprocessing - sample_dict.update({'doc_ids': np.array(doc_ids, dtype=np.int64)}) + if self.return_doc_ids: # for retro preprocessing + sample_dict.update({"doc_ids": np.array(doc_ids, dtype=np.int64)}) if args.use_dataset_only: - sample_dict.update({'labels': np.array(sample, dtype=np.int64)}) + sample_dict.update({"labels": np.array(sample, dtype=np.int64)}) return sample_dict + @dlp.log -def _build_index_mappings(name, data_prefix, documents, sizes, - splits_string, num_samples, seq_length, seed, - *, - data_cache_path): +def _build_index_mappings( + name, + data_prefix, + documents, + sizes, + splits_string, + num_samples, + seq_length, + seed, + *, + data_cache_path, +): """Build doc-idx, sample-idx, and shuffle-idx. doc-idx: is an array (ordered) of documents to be used in training. sample-idx: is the start document index and document offset for each @@ -539,10 +761,11 @@ def _build_index_mappings(name, data_prefix, documents, sizes, shuffle-idx: maps the sample index into a random index into sample-idx. """ args = get_args() + assert args is not None # Number of tokens in each epoch and number of required epochs. tokens_per_epoch = _num_tokens(documents, sizes) num_epochs = _num_epochs(tokens_per_epoch, seq_length, num_samples) - if args.train_data_exact_num_epochs is not None and name == 'train': + if args.train_data_exact_num_epochs is not None and name == "train": num_epochs = args.train_data_exact_num_epochs # rng state @@ -557,13 +780,13 @@ def _build_index_mappings(name, data_prefix, documents, sizes, desc += f"Sequence length {seq_length}\n" desc += f"Random seed {seed}\n" desc += f"Split {splits_string}\n" - desc_hash = hashlib.md5(desc.encode('utf-8')).hexdigest() + desc_hash = hashlib.md5(desc.encode("utf-8")).hexdigest() desc_filename = desc_hash + ".dsc" - doc_idx_filename = desc_hash + '_doc_idx.npy' - sample_idx_filename = desc_hash + '_sample_idx.npy' - shuffle_idx_filename = desc_hash + '_shuffle_idx.npy' + doc_idx_filename = desc_hash + "_doc_idx.npy" + sample_idx_filename = desc_hash + "_sample_idx.npy" + shuffle_idx_filename = desc_hash + "_shuffle_idx.npy" - if name == 'train': + if name == "train": # force to use certain index files if args.train_desc_path is not None: desc_filename = args.train_desc_path @@ -578,15 +801,15 @@ def _build_index_mappings(name, data_prefix, documents, sizes, # duplication, then look in data-cache-path if specified, # If nothing is found, use the last path looked in build_indices = True - prefixes = [os.path.join(os.path.dirname(data_prefix), 'index-cache')] + prefixes = [os.path.join(os.path.dirname(data_prefix), "index-cache")] if data_cache_path is not None: prefixes.append(data_cache_path) for prefix in prefixes: idx_path = { - 'desc': os.path.join(prefix, desc_filename), - 'doc': os.path.join(prefix, doc_idx_filename), - 'sample': os.path.join(prefix, sample_idx_filename), - 'shuffle': os.path.join(prefix, shuffle_idx_filename) + "desc": os.path.join(prefix, desc_filename), + "doc": os.path.join(prefix, doc_idx_filename), + "sample": os.path.join(prefix, sample_idx_filename), + "shuffle": os.path.join(prefix, shuffle_idx_filename), } for f in idx_path.values(): if not os.path.isfile(f): @@ -595,15 +818,17 @@ def _build_index_mappings(name, data_prefix, documents, sizes, # Found our files! build_indices = False break - data_cache_dir = os.path.dirname(idx_path['desc']) + data_cache_dir = os.path.dirname(idx_path["desc"]) data_cache_success = True # Build the indexed mapping if not exist. if build_indices: - # Since this function will be called by all the rank in the very beginning. Therefore, we assume that all the - # ranks will first create the document files, and then read it. + # Since this function will be called by all the rank in the very beginning. Therefore, we assume that all the + # ranks will first create the document files, and then read it. # There will not be contension effects going on either - print_rank_0(f" > WARNING: could not find index map files, building on rank {torch.distributed.get_rank()}") + log.warning( + f" > WARNING: could not find index map files, building on rank {torch.distributed.get_rank()}" + ) # For the last epoch, decide whether include the entire epoch # in the global shuffle or not. @@ -612,64 +837,80 @@ def _build_index_mappings(name, data_prefix, documents, sizes, # not mean anything. if num_epochs == 1: separate_last_epoch = False - print_rank_0(' > only one epoch required, setting ' - 'separate_last_epoch to False') + log.debug( + " > only one epoch required, setting " "separate_last_epoch to False" + ) else: # Get the number of samples for the last epoch num_samples_from_epochs_minus_one = ( - (num_epochs - 1) * tokens_per_epoch - 1) // seq_length - last_epoch_num_samples = num_samples - \ - num_samples_from_epochs_minus_one - assert last_epoch_num_samples >= 0, \ - 'last epoch number of samples should be non-negative.' + (num_epochs - 1) * tokens_per_epoch - 1 + ) // seq_length + last_epoch_num_samples = num_samples - num_samples_from_epochs_minus_one + assert ( + last_epoch_num_samples >= 0 + ), "last epoch number of samples should be non-negative." num_samples_per_epoch = (tokens_per_epoch - 1) // seq_length - assert last_epoch_num_samples <= (num_samples_per_epoch + 1), \ - 'last epoch number of samples exceeded max value.' + assert last_epoch_num_samples <= ( + num_samples_per_epoch + 1 + ), "last epoch number of samples exceeded max value." # If we have less than 80% of the samples for the last epoch, # seperate out the epoch and treat it differently. # Note: the 80% number is just based on common sense and can # be adjusted if needed. - separate_last_epoch = (last_epoch_num_samples < - int(0.80 * num_samples_per_epoch)) + separate_last_epoch = last_epoch_num_samples < int( + 0.80 * num_samples_per_epoch + ) if separate_last_epoch: - string = ' > last epoch number of samples ({}) is smaller '\ - 'than 80% of number of samples per epoch ({}), '\ - 'setting separate_last_epoch to True' + string = ( + " > last epoch number of samples ({}) is smaller " + "than 80% of number of samples per epoch ({}), " + "setting separate_last_epoch to True" + ) else: - string = ' > last epoch number of samples ({}) is larger '\ - 'than 80% of number of samples per epoch ({}), '\ - 'setting separate_last_epoch to False' - print_rank_0(string.format(last_epoch_num_samples, - num_samples_per_epoch)) - + string = ( + " > last epoch number of samples ({}) is larger " + "than 80% of number of samples per epoch ({}), " + "setting separate_last_epoch to False" + ) + log.debug(string.format(last_epoch_num_samples, num_samples_per_epoch)) try: os.makedirs(data_cache_dir, exist_ok=True) # description - with open(idx_path['desc'], 'wt') as fd: + with open(idx_path["desc"], "wt") as fd: fd.write(desc) # doc-idx. start_time = time.time() - doc_idx = _build_doc_idx(documents, num_epochs, np_rng, - separate_last_epoch) - np.save(idx_path['doc'], doc_idx, allow_pickle=True) - print_rank_0(' > elasped time to build and save doc-idx mapping ' - '(seconds): {:4f}'.format(time.time() - start_time)) + doc_idx = _build_doc_idx(documents, num_epochs, np_rng, separate_last_epoch) + np.save(idx_path["doc"], doc_idx, allow_pickle=True) + log.debug( + " > elasped time to build and save doc-idx mapping " + "(seconds): {:4f}".format(time.time() - start_time) + ) # sample-idx. start_time = time.time() # Use C++ implementation for speed. # First compile and then import. from megatron.data import helpers + assert doc_idx.dtype == np.int32 assert sizes.dtype == np.int32 - sample_idx = helpers.build_sample_idx(sizes, doc_idx, seq_length, - num_epochs, tokens_per_epoch, torch.distributed.get_rank()==0) - np.save(idx_path['sample'], sample_idx, allow_pickle=True) - print_rank_0(' > elasped time to build and save sample-idx mapping ' - '(seconds): {:4f}'.format(time.time() - start_time)) + sample_idx = helpers.build_sample_idx( + sizes, + doc_idx, + seq_length, + num_epochs, + tokens_per_epoch, + torch.distributed.get_rank() == 0, + ) + np.save(idx_path["sample"], sample_idx, allow_pickle=True) + log.debug( + " > elasped time to build and save sample-idx mapping " + "(seconds): {:4f}".format(time.time() - start_time) + ) # shuffle-idx. start_time = time.time() # -1 is due to data structure used to retieve the index: @@ -678,35 +919,46 @@ def _build_index_mappings(name, data_prefix, documents, sizes, num_samples_ = num_samples_from_epochs_minus_one else: num_samples_ = sample_idx.shape[0] - 1 - shuffle_idx = _build_shuffle_idx(num_samples_, - sample_idx.shape[0] - 1, np_rng) - np.save(idx_path['shuffle'], shuffle_idx, allow_pickle=True) - print_rank_0(' > elasped time to build and save shuffle-idx mapping' - ' (seconds): {:4f}'.format(time.time() - start_time)) + shuffle_idx = _build_shuffle_idx( + num_samples_, sample_idx.shape[0] - 1, np_rng + ) + np.save(idx_path["shuffle"], shuffle_idx, allow_pickle=True) + log.debug( + " > elasped time to build and save shuffle-idx mapping" + " (seconds): {:4f}".format(time.time() - start_time) + ) except OSError: - print(f'There was an error trying to create the data cache directory ({data_cache_dir})') - print('or a file in it. This defaults to a directory "index-cache" within the directory') - print('the data files are in and can be set with the --data-cache-path argument. Please') - print('ensure you have write access to this directory or specify one that you do have') - print('write access to.') + print( + f"There was an error trying to create the data cache directory ({data_cache_dir})" + ) + print( + 'or a file in it. This defaults to a directory "index-cache" within the directory' + ) + print( + "the data files are in and can be set with the --data-cache-path argument. Please" + ) + print( + "ensure you have write access to this directory or specify one that you do have" + ) + print("write access to.") data_cache_success = False # Load mappings. start_time = time.time() - print_rank_0(f" > loading doc-idx mapping from {idx_path['doc']}") - doc_idx = np.load(idx_path['doc'], allow_pickle=True, mmap_mode='r') + log.debug(f" > loading doc-idx mapping from {idx_path['doc']}") + doc_idx = np.load(idx_path["doc"], allow_pickle=True, mmap_mode="r") - print_rank_0(f" > loading sample-idx mapping from {idx_path['sample']}") - sample_idx = np.load(idx_path['sample'], allow_pickle=True, mmap_mode='r') + log.debug(f" > loading sample-idx mapping from {idx_path['sample']}") + sample_idx = np.load(idx_path["sample"], allow_pickle=True, mmap_mode="r") - print_rank_0(f" > loading shuffle-idx mapping from {idx_path['shuffle']}") - shuffle_idx = np.load(idx_path['shuffle'], allow_pickle=True, mmap_mode='r') + log.debug(f" > loading shuffle-idx mapping from {idx_path['shuffle']}") + shuffle_idx = np.load(idx_path["shuffle"], allow_pickle=True, mmap_mode="r") - print_rank_0(' loaded indexed file in {:3.3f} seconds'.format( - time.time() - start_time)) - print_rank_0(' total number of samples: {}'.format( - sample_idx.shape[0])) - print_rank_0(' total number of epochs: {}'.format(num_epochs)) + log.debug( + " loaded indexed file in {:3.3f} seconds".format(time.time() - start_time) + ) + log.debug(" total number of samples: {}".format(sample_idx.shape[0])) + log.debug(" total number of epochs: {}".format(num_epochs)) return doc_idx, sample_idx, shuffle_idx, desc, desc_hash @@ -730,25 +982,26 @@ def _num_epochs(tokens_per_epoch, seq_length, num_samples): if ((total_tokens - 1) // seq_length) >= num_samples: return num_epochs + @dlp.log def _build_doc_idx(documents, num_epochs, np_rng, separate_last_epoch): """Build an array with length = number-of-epochs * number-of-dcuments. Each index is mapped to a corresponding document.""" if not separate_last_epoch or num_epochs == 1: - doc_idx = np.mgrid[0:num_epochs, 0:len(documents)][1] + doc_idx = np.mgrid[0:num_epochs, 0 : len(documents)][1] doc_idx[:] = documents doc_idx = doc_idx.reshape(-1) doc_idx = doc_idx.astype(np.int32) np_rng.shuffle(doc_idx) return doc_idx - doc_idx_first = _build_doc_idx(documents, num_epochs-1, np_rng, False) + doc_idx_first = _build_doc_idx(documents, num_epochs - 1, np_rng, False) doc_idx_last = _build_doc_idx(documents, 1, np_rng, False) return np.concatenate((doc_idx_first, doc_idx_last)) + @dlp.log -def _build_sample_idx(sizes, doc_idx, seq_length, - num_epochs, tokens_per_epoch): +def _build_sample_idx(sizes, doc_idx, seq_length, num_epochs, tokens_per_epoch): """Sample index mapping is a 2D array with sizes [number-of-samples + 1, 2] where [..., 0] contains the index into `doc_idx` and [..., 1] is the @@ -782,7 +1035,7 @@ def _build_sample_idx(sizes, doc_idx, seq_length, # Note that -1 here is for the same reason we have -1 in # `_num_epochs` calculations. if remaining_seq_length <= 0: - doc_offset += (remaining_seq_length + doc_length - 1) + doc_offset += remaining_seq_length + doc_length - 1 remaining_seq_length = 0 else: # Otherwise, start from the begining of the next document. @@ -795,24 +1048,28 @@ def _build_sample_idx(sizes, doc_idx, seq_length, return sample_idx + @dlp.log def _build_shuffle_idx(num_samples, total_size, np_rng): """Build the range [0, size) and shuffle.""" - print_rank_0(' > building shuffle index with split [0, {}) and [{}, {}) ' - '...'.format(num_samples, num_samples, total_size)) + log.debug( + " > building shuffle index with split [0, {}) and [{}, {}) " "...".format( + num_samples, num_samples, total_size + ) + ) dtype_ = np.uint32 if total_size >= (np.iinfo(np.uint32).max - 1): dtype_ = np.int64 - shuffle_idx_first = np.arange(start=0, stop=num_samples, - step=1, dtype=dtype_) + shuffle_idx_first = np.arange(start=0, stop=num_samples, step=1, dtype=dtype_) np_rng.shuffle(shuffle_idx_first) if num_samples == total_size: return shuffle_idx_first - shuffle_idx_last = np.arange(start=num_samples, stop=total_size, - step=1, dtype=dtype_) + shuffle_idx_last = np.arange( + start=num_samples, stop=total_size, step=1, dtype=dtype_ + ) np_rng.shuffle(shuffle_idx_last) return np.concatenate((shuffle_idx_first, shuffle_idx_last)) diff --git a/megatron/data/indexed_dataset.py b/megatron/data/indexed_dataset.py index ec2997f7b8..e2a0c4751f 100644 --- a/megatron/data/indexed_dataset.py +++ b/megatron/data/indexed_dataset.py @@ -16,7 +16,6 @@ from functools import lru_cache import os -# import logging import shutil import struct from itertools import accumulate @@ -29,21 +28,6 @@ log = get_logger(__name__) - -try: - import ezpz as ez - RANK = ez.get_rank() -except Exception: - RANK = torch.distributed.get_rank() - -# NOTE: [logging]----------------------------------------------------------- -# - Set logging level to "INFO" on RANK == 0, "CRITICAL" on all other ranks -log = logging.getLogger(__name__) -LOG_LEVEL = str(os.environ.get("LOG_LEVEL", "INFO")).upper() -log.setLevel(LOG_LEVEL) if RANK == 0 else log.setLevel("CRITICAL") -# -------------------------------------------------------------------------- - - dlp = Profile("DATASET") From 63b1901b6127cd71f5b54877fad211714499120f Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Wed, 16 Oct 2024 11:24:09 -0500 Subject: [PATCH 74/92] Update `megatron/data/gpt_dataset.py` --- megatron/data/gpt_dataset.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/megatron/data/gpt_dataset.py b/megatron/data/gpt_dataset.py index c412d02b31..0a3d898d63 100644 --- a/megatron/data/gpt_dataset.py +++ b/megatron/data/gpt_dataset.py @@ -168,6 +168,10 @@ def _build_indices(): for i in range(self.num_datasets): self.desc += dataset_builders[i].prefix + "," + log.info( + f"[BuildConcatDataset] Caught {shuffle=} across" + f" {self.num_samples} samples" + ) self.desc += ( f"-{self.num_samples}" + f"-{dataset_builders[0].seq_length}" From 7ef26bf922262eb80ff58fa481dbc6b2ff84d5ad Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Wed, 16 Oct 2024 11:24:37 -0500 Subject: [PATCH 75/92] Use `time.perf_counter` in `megatron/data/blendable_dataset.py` --- megatron/data/blendable_dataset.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/megatron/data/blendable_dataset.py b/megatron/data/blendable_dataset.py index 979e9a174e..ab164fdc48 100755 --- a/megatron/data/blendable_dataset.py +++ b/megatron/data/blendable_dataset.py @@ -41,7 +41,7 @@ def __init__(self, datasets, weights, size, *, # Build indicies. @dlp.log def _build_indices(): - start_time = time.time() + start_time = time.perf_counter() dataset_index = np.zeros(self.size, dtype=np.int64) dataset_sample_index = np.zeros(self.size, dtype=np.int64) @@ -77,14 +77,14 @@ def _build_indices(): dataset_index, dataset_sample_index = _build_indices() try: log.debug(" > saving index map files") - start_time = time.time() + start_time = time.perf_counter() os.makedirs(os.path.dirname(index_path), exist_ok=True) with open(desc_path, 'wt') as fd: fd.write(desc) np.save(index_path, dataset_index, allow_pickle=True) np.save(sample_index_path, dataset_sample_index, allow_pickle=True) - log.info(f" > finished saving index map files in {time.time() - start_time} seconds") + log.info(f" > finished saving index map files in {time.perf_counter() - start_time} seconds") except OSError: print(f'There was an error trying to create the data cache directory ({data_cache_path})') print('or a file in it. This is set with the --data-cache-path argument. Please') @@ -108,14 +108,14 @@ def _build_indices(): torch.distributed.barrier(group=mpu.get_pipeline_model_parallel_group()) torch.distributed.barrier(group=mpu.get_data_parallel_group()) - start_time = time.time() + start_time = time.perf_counter() log.info(f'> loading blendable dataset index: {index_path}') self.dataset_index = np.load(index_path, allow_pickle=True, mmap_mode='r') assert self.dataset_index.size == self.size log.info(f'> loading blendable dataset sample index: {sample_index_path}') self.dataset_sample_index = np.load(sample_index_path, allow_pickle=True, mmap_mode='r') assert self.dataset_sample_index.size == self.size - log.info(f'> finished loading in {time.time() - start_time} seconds') + log.info(f'> finished loading in {time.perf_counter() - start_time} seconds') else: self.dataset_index, self.dataset_sample_index = _build_indices() From deb95cd7aa5f677c13b7bee4c0491c04d59d81dd Mon Sep 17 00:00:00 2001 From: Xinyu Lian Date: Thu, 17 Oct 2024 15:02:26 -0500 Subject: [PATCH 76/92] fix init issue for silently ignoring the deepspeed config (#452) --- megatron/initialize.py | 1 + 1 file changed, 1 insertion(+) diff --git a/megatron/initialize.py b/megatron/initialize.py index 538f7fc456..90acf496ee 100644 --- a/megatron/initialize.py +++ b/megatron/initialize.py @@ -185,6 +185,7 @@ def setup_deepspeed_random_and_activation_checkpointing(args): deepspeed.checkpointing.configure( mpu, + deepspeed_config=args.deepspeed_config, partition_activations=args.partition_activations, contiguous_checkpointing=args.contigious_checkpointing, num_checkpoints=num_layers, From 68da2dbd0a1a1ab9500cd389a0cfd82d2032ebd6 Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Thu, 17 Oct 2024 17:25:03 -0500 Subject: [PATCH 77/92] Update `ALCF/helpers.sh` --- ALCF/helpers.sh | 312 ++++++++++++++++++++++++++---------------------- 1 file changed, 167 insertions(+), 145 deletions(-) diff --git a/ALCF/helpers.sh b/ALCF/helpers.sh index 5df9a2c7a5..bc7753aa71 100644 --- a/ALCF/helpers.sh +++ b/ALCF/helpers.sh @@ -9,7 +9,7 @@ # ```bash # $ git clone https://github.com/argonne-lcf/Megatron-DeepSpeed # $ cd Megatron-DeepSpeed -# $ export PBS_O_WORKDIR=$(pwd) && source ALCF/helpers.sh && ezpz_setup +# $ export PBS_O_WORKDIR=$(pwd) && source ALCF/helpers.sh && setup # ``` # # and this will, automatically: @@ -174,48 +174,10 @@ setup_run_cmd() { export data_cache_path="${CKPT_DIR}/${DATA_CACHE_PATH}" && mkdir -p "${data_cache_path}" printf "\n" echo "Using data_cache_path: ${data_cache_path}" - TRAIN_SPLIT="${TRAIN_SPLIT:-100}" - VAL_SPLIT="${VAL_SPLIT:-0}" - TEST_SPLIT="${TEST_SPLIT:-0}" - LOG_INTERVAL="${LOG_INTERVAL:-1}" - DEFAULTS=( - "--split ${TRAIN_SPLIT},${VAL_SPLIT},${TEST_SPLIT}" - "--log-interval ${LOG_INTERVAL}" - "--no-bias-gelu-fusion" - "--no-bias-dropout-fusion" - "--no-masked-softmax-fusion" - "--no-gradient-accumulation-fusion" - "--accumulate-allreduce-grads-in-fp32" - ) - # export DEFAULTS="\ - # --split ${TRAIN_SPLIT},${VAL_SPLIT},${TEST_SPLIT} \ - # --log-interval ${LOG_INTERVAL} \ - # --no-bias-gelu-fusion \ - # --no-bias-dropout-fusion \ - # --no-masked-softmax-fusion \ - # --no-gradient-accumulation-fusion \ - # --accumulate-allreduce-grads-in-fp32" - # OVERRIDE_CKPT_OPT_PARAM="${OVERRIDE_CKPT_OPT_PARAM:-}" - if [[ -z "${OVERRIDE_CKPT_OPT_PARAM:-}" ]]; then - DEFAULTS+=("--use-checkpoint-opt_param-scheduler") - fi - if [[ "${SP}" -gt 1 ]]; then - DEFAULTS+=( - "--ds-sequence-parallel-size ${SP}" - "--force-ds-sequence-parallel" - ) - fi ################################################################## # WARN: to disable Llama-type architectures, toggle via: # `NO_LLAMA=1 bash train_llama_alcf.sh` ################################################################## - LLAMA_ARGS="" - if [[ "${SP}" == 1 ]]; then - export LLAMA_ARGS="${LLAMA_ARGS} " - else - export LLAMA_ARGS="" - echo "NOT USING ROTARY EMBEDDINGS! LLAMA_ARGS=${LLAMA_ARGS}" - fi if [[ -z "${NO_LLAMA:-}" ]]; then llama_flags=( "--swiglu" @@ -230,63 +192,89 @@ setup_run_cmd() { "--ffn-hidden-size ${FFN_HIDDEN_SIZE}" ) fi + # min_lr=$(python3 -c 'print(f"{2 / (10 ** 5):.8f}")') + # "--min-lr ${LR:-${min_lr}}" # 2e-5 + lr_flags=( + "--lr ${LR:-0.0002}" + "--lr-decay-style ${LR_DECAY_STYLE:-cosine}" + "--min-lr ${MIN_LR:-"2e-6"}" # 2e-5 + "--lr-warmup-fraction ${LR_WARMUP_FRAC:-0.05}" + ) + if [[ -n "${LR_DECAY_ITERS:-}" ]]; then + lr_flags+=("--lr-decay-iters ${LR_DECAY_ITERS:-}") + fi - TENSORBARD_ARGS=() - if [[ -z "${USE_TENSORBARD:-}" ]]; then + tb_flags=() + if [[ -z "${NO_TENSORBOARD:-}" ]]; then TBDIR="${CKPT_DIR}/tensorboard" mkdir -p "${TBDIR}" - # --log-timers-to-tensorboard \ - # --log-optimizer-states-to-tensorboard" - # --tensorboard-dir ${TBDIR} \ - TENSORBARD_ARGS+=( + tb_flags+=( "--log-timers-to-tensorboard" "--log-optimizer-states-to-tensorboard" "--tensorboard-dir ${TBDIR}" ) fi dfl_fallback="${DATA_FILE_LIST:-${PBS_O_WORKDIR}/ALCF/data-lists/$(get_machine_name)/dolma.txt}" - export ADAM_BETA1="${ADAM_BETA1:-0.9}" - export ADAM_BETA2="${ADAM_BETA2:-0.95}" - export ADAM_EPS="${ADAM_EPS:-0.00001}" # 1 * 10^{-5} - export run_cmd=( - "${LAUNCHER}" + + train_args=() + if [[ -z "${OVERRIDE_CKPT_OPT_PARAM:-}" ]]; then + train_args+=("--use-checkpoint-opt_param-scheduler") + fi + # "--init-method-std ${INIT_METHOD_STD:-0.0006}" + # "--weight-decay ${WEIGHT_DECAY:-0.1}" + # --accumulate-allreduce-grads-in-fp32" + train_args+=( + "${lr_flags[@]}" + "${custom_args[@]}" + "${llama_flags[@]}" + "${DATA_FLAGS}" + "${FLASH_ARG}" + "${TIMING_STR}" + "${TOKENIZER_FLAGS}" + "${tb_flags[@]}" + "${ds_args[@]}" + "${gpt_args[@]}" "--${DTYPE}" - "${DEFAULTS[@]}" + "--shuffle-sample" + "--no-bias-gelu-fusion" + "--no-bias-dropout-fusion" + "--no-masked-softmax-fusion" + "--no-gradient-accumulation-fusion" "--optimizer ${OPT}" - "--save ${CKPT_DIR}" - "--load ${CKPT_DIR}" + "--tensor-model-parallel-size ${TP}" + "--pipeline-model-parallel-size ${PP}" + "--max-position-embeddings ${SEQ}" + "--micro-batch-size ${MICRO_BATCH}" + "--ds-sequence-parallel-size ${SP}" + "--global-batch-size ${GLOBAL_BATCH}" + "--split ${TRAIN_SPLIT:-950},${VAL_SPLIT:-50},${TEST_SPLIT:-0}" + "--timing-log-level ${TIMING_LOG_LEVEL:-1}" + "--eval-interval ${EVAL_INTERVAL:-50}" + "--eval-iters ${EVAL_ITERS:-40}" + "--save-interval ${SAVE_INTERVAL:-50}" + "--log-interval ${LOG_INTERVAL:-1}" + "--save ${SAVE:-${CKPT_DIR}}" + "--load ${LOAD:-${CKPT_DIR}}" "--seq-length ${SEQ}" "--num-layers ${NLAYERS}" "--hidden-size ${HIDDEN}" "--train-iters ${TRAIN_ITERS}" - "--eval-iters ${EVAL_ITERS}" "--distributed-backend ${BE}" "--adam-beta1 ${ADAM_BETA1:-0.9}" "--adam-beta2 ${ADAM_BETA2:-0.95}" "--adam-eps ${ADAM_EPS:-0.00001}" "--clip-grad ${CLIP_GRAD:-1.0}" - "--weight-decay ${WEIGHT_DECAY:-0.1}" "--num-attention-heads ${HEADS}" - "--save-interval ${SAVE_INTERVAL}" - "--eval-interval ${EVAL_INTERVAL}" - "--max-position-embeddings ${SEQ}" - "--micro-batch-size ${MICRO_BATCH}" - "--tensor-model-parallel-size ${TP}" - "--global-batch-size ${GLOBAL_BATCH}" - "--pipeline-model-parallel-size ${PP}" "--data-cache-path ${data_cache_path}" "--data-file-list ${DATA_FILE_LIST:-${dfl_fallback}}" - "${TENSORBARD_ARGS[@]}" - "${DATA_FLAGS}" - "${LR_ARGS}" - "${llama_flags[@]}" - "${FLASH_ARG}" - "${TIMING_STR}" - "${TOKENIZER_FLAGS}" - "${ds_args[@]}" - "${gpt_args[@]}" - "${custom_args[@]}" ) + cache_dir="${PBS_O_WORKDIR}/.cache/" + mkdir -p "${cache_dir}" + targs_cache="${cache_dir}/train_args.txt" + for arg in "${train_args[@]}"; do echo "${arg}" >> "${targs_cache}" ; done + export TRAIN_ARGS=("$(printf '%s\n' "${train_args[@]}"|sort)") + printf "Training Arguments: %s\n" "${TRAIN_ARGS[@]}" + export run_cmd=("${LAUNCHER}" "${train_args[@]}") } save_dotenv() { @@ -430,17 +418,20 @@ setupLauncher() { printf " %s" "$(printMagenta "${LAUNCHER}")" } -set_lr_args() { - LR_ARGS="--lr ${LR} --lr-decay-style cosine" - if [[ -n "${LR_DECAY_ITERS:-}" ]]; then - LR_ARGS="${LR_ARGS} --lr-decay-iters ${LR_DECAY_ITERS}" - fi - if [[ -n "${LR_WARMUP_FRAC}" ]]; then - LR_ARGS="${LR_ARGS} --lr-warmup-fraction ${LR_WARMUP_FRAC}" - fi - echo "LR_ARGS: ${LR_ARGS}" - export LR_ARGS="${LR_ARGS}" -} +# set_lr_args() { +# export LR=${LR:-0.0002} # LEARNING_RATE +# export LR_WARMUP_FRAC=${LR_WARMUP_FRAC:-0.05} # LEARNING RATE WARMUP +# export LR_DECAY_ITERS=${LR_DECAY_ITERS:-} # LR DECAY ITERS +# LR_ARGS="--lr ${LR} --lr-decay-style cosine" +# if [[ -n "${LR_DECAY_ITERS:-}" ]]; then +# LR_ARGS="${LR_ARGS} --lr-decay-iters ${LR_DECAY_ITERS}" +# fi +# if [[ -n "${LR_WARMUP_FRAC}" ]]; then +# LR_ARGS="${LR_ARGS} --lr-warmup-fraction ${LR_WARMUP_FRAC}" +# fi +# echo "LR_ARGS: ${LR_ARGS}" +# export LR_ARGS="${LR_ARGS}" +# } ######################################################################### # `get_batch_size_on_polaris`: Identify MICRO_BATCH to use on Polaris. @@ -495,12 +486,14 @@ _get_num_hosts_from_hostfile() { # # [2 tiles] x [6 xpus / tile] = 12 xpus # -# | nnhosts | nhosts | GAS | -# |:-------------:|:---------:|:-----:| -# | 64 <= n < inf | [64, inf) | 1 | -# | 32 <= n < 64 | [32, 64) | 2 | -# | 16 <= n < 32 | [16, 32) | 4 | -# | 0 <= n < 16 | [0, 16) | 8 | +# | nnhosts | nhosts | GAS | +# |:---------------:|:----------:|:-----:| +# | 256 <= n < inf | [256, inf) | 1 | +# | 128 <= n < 256 | [128, 256) | 2 | +# | 32 <= n < 128 | [32, 128) | 4 | +# | 16 <= n < 32 | [16, 32) | 8 | +# | 0 <= n < 16 | [0, 16) | 16 | +# ########################################### get_grad_acc_steps_on_aurora() { if [[ "$#" == 0 ]]; then @@ -508,18 +501,21 @@ get_grad_acc_steps_on_aurora() { elif [[ "$#" == 1 ]]; then hf="$1" else + echo "Usage: get_grad_acc_steps_on_aurora" echo "Expected exactly 0 or 1 arguments, received: $#" exit 1 fi nhosts=$(wc -l <"${hf}") - if [[ 64 -le "${nhosts}" ]]; then + if [[ "${nhosts}" -gt 256 ]]; then gas=1 - elif [[ 32 -le "${nhosts}" && "${nhosts}" -lt 64 ]]; then + elif [[ 128 -le "${nhosts}" && "${nhosts}" -lt 256 ]]; then gas=2 - elif [[ 16 -le "${nhosts}" && "${nhosts}" -lt 32 ]]; then + elif [[ 32 -le "${nhosts}" && "${nhosts}" -lt 128 ]]; then gas=4 - else + elif [[ 16 -le "${nhosts}" && "${nhosts}" -lt 32 ]]; then gas=8 + else + gas=16 fi echo "${gas}" } @@ -580,7 +576,7 @@ setParams() { export GRAD_ACC_STEPS="${GRAD_ACC_STEPS:-${gas}}" # export GRAD_ACC_STEPS="${GRAD_ACC_STEPS:-$(get_grad_acc_steps_on_aurora "$@)}" echo "[setParams] Using GRAD_ACC_STEPS: ${GRAD_ACC_STEPS}" - MICRO_BATCH=${MICRO_BATCH:-4} # MICRO_BATCH = 4 + MICRO_BATCH=${MICRO_BATCH:-1} # MICRO_BATCH = 4 #### [sam: 08/17/2024] ########################################## # Use best set of CCL env vars from Gordon Bell runs on Aurora set_ccl_vars_on_aurora @@ -604,9 +600,7 @@ setParams() { echo "Using flash-attn !!" FLASH_ARG="--use-flash-attn-builder" fi - ###################################################################### - # +--------[Polaris]-----------------------------------+ - # elif [[ $(hostname) == x3* ]]; then + # [Polaris] elif [[ "${mn}" == "polaris" || "${mn}" == "sirius" ]]; then # export LAUNCH_CMD="${LAUNCH_CMD:-deepspeed}" TP=${TP:-1} # TP = 2 @@ -625,30 +619,25 @@ setParams() { fi echo "Setting up AWS NCCL OFI Plugin on Polaris..." source "${WORKING_DIR}/ALCF/aws_ofi_nccl_plugin.sh" || exit - # +--------[Perlmutter]---------------------------------+ - # elif [[ $(hostname) == login* || $(hostname) == nid* ]]; then + # [Perlmutter] elif [[ "${mn}" == login* || "${mn}" == nid* ]]; then TP="${TP:-2}" export NCCL="${NCCL:-nccl}" export BE="${NCCL}" export DTYPE="${DTYPE:-bf16}" - MICRO_BATCH="${MICRO_BATCH:-8}" + MICRO_BATCH="${MICRO_BATCH:-1}" if [[ -n "${NO_FLASH_ATTN-}" ]]; then echo "Not using flash-attn!!" else FLASH_ARG="--use-flash-attn-v2" fi fi - # +----------------------------------------------------------------------+ export TP="${TP}" export PP="${PP:-1}" export SP="${SP:-1}" export FLASH_ARG="${FLASH_ARG}" export DTYPE="${DTYPE:-bf16}" export OPT="${OPT:-adamw}" - # export ADAM_BETA1="${ADAM_BETA1:-0.9}" - # export ADAM_BETA2="${ADAM_BETA2:-0.95}" - # export ADAM_EPS="${ADAM_EPS:-0.00001}" # 1 * 10^{-5} export WEIGHT_DECAY="${WEIGHT_DECAY:-0.1}" export HOSTFILE="${HOSTFILE:-${PBS_NODEFILE}}" NHOSTS=$(wc -l <"${HOSTFILE}") @@ -667,18 +656,19 @@ setParams() { # +---[Run Settings]------------------------------------------------------+ export SEQ=${SEQ:-4096} # SEQ_LEN: 4096 export ZERO_STAGE=${ZERO_STAGE:-1} # ZERO OFFLOADING STAGE - export MICRO_BATCH=${MICRO_BATCH:-8} # MICRO BATCH SIZE + export MICRO_BATCH=${MICRO_BATCH:-1} # MICRO BATCH SIZE export GRAD_ACC_STEPS=${GRAD_ACC_STEPS:-1} # GRADIENT ACCUMULATION STEPS - export EVAL_ITERS="${EVAL_ITERS:-10}" # NUMBER OF EVAL ITERS TO RUN - export EVAL_INTERVAL="${EVAL_INTERVAL:-50000}" # HOW FREQUENTLY TO RUN EVAL - export SAVE_INTERVAL=${SAVE_INTERVAL:-50} # HOW FREQUENTLY TO SAVE CKPTS export TIMING_LOG_LEVEL="${TIMING_LOG_LEVEL:-1}" # TIMING VERBOSITY IN LOGS export ACT_CKPT_NUM_LAYERS="${ACT_CKPT_NUM_LAYERS:-1}" # NUM LAYERS TO CHECKPOINT ACTIVATIONS - export USE_ACTIVATION_CHECKPOINTING=${USE_ACTIVATION_CHECKPOINTING:-1} # USE ACTIVATION CHECKPOINTING ? + export USE_ACTIVATION_CHECKPOINTING=${USE_ACTIVATION_CHECKPOINTING:-} # USE ACTIVATION CHECKPOINTING ? export GLOBAL_BATCH_MAX=$((WORLD_SIZE * MICRO_BATCH * GRAD_ACC_STEPS / TP / PP / SP)) # MAX GLOBAL BATCH SIZE export GLOBAL_BATCH="${GLOBAL_BATCH:-${GLOBAL_BATCH_MAX}}" # WILL USE MAX IF NOT SET IN ENVIRONMENT - # export TRAIN_ITER=${TRAIN_ITER:-317892} # NUMBER OF TRAIN ITERS - if [[ -z "${TRAIN_ITERS:-${TRAIN_ITER:-}}" ]]; then + if [[ -n "${TRAIN_TOKENS:-}" ]]; then + export TRAIN_TOKENS="${TRAIN_TOKENS}" + export TRAIN_ITERS=$((TRAIN_TOKENS / SEQ / GLOBAL_BATCH)) + printf "TRAIN_TOKENS=%s (=%sB tokens)\n" "${TRAIN_TOKENS}" "$((TRAIN_TOKENS / 10 ** 9))" + printf "TRAIN_ITERS=%s\n" "${TRAIN_ITERS}" + elif [[ -z "${TRAIN_ITERS:-${TRAIN_ITER:-}}" ]]; then export TRAIN_TOKENS=${TRAIN_TOKENS:-2000000000000} export TRAIN_ITERS=$((TRAIN_TOKENS / SEQ / GLOBAL_BATCH)) printf "TRAIN_TOKENS=%s (=%sB tokens)\n" "${TRAIN_TOKENS}" "$((TRAIN_TOKENS / 10 ** 9))" @@ -694,22 +684,20 @@ setParams() { # # For this reason, we only use the default LLAMA_ARGS when SP=0. ########################################################################## + # # -----[Learning Rate Settings]-------------------------------------------- + # export LR=${LR:-0.0002} # LEARNING_RATE + # export LR_WARMUP_FRAC=${LR_WARMUP_FRAC:-0.05} # LEARNING RATE WARMUP + # export LR_DECAY_ITERS=${LR_DECAY_ITERS:-} # LR DECAY ITERS + # set_lr_args # -----[Learning Rate Settings]-------------------------------------------- - export LR=${LR:-0.0003} # LEARNING_RATE - export LR_WARMUP_FRAC=${LR_WARMUP_FRAC:-0.05} # LEARNING RATE WARMUP - export LR_DECAY_ITERS=${LR_DECAY_ITERS:-} # LR DECAY ITERS - set_lr_args - # -----[Learning Rate Settings]-------------------------------------------- + # # if [[ "${TIMING_LOG_LEVEL:-1}" -gt 1 ]]; then # if [[ "${TIMING_LOG_LEVEL:-1}" -gt 1 ]]; then - if [[ "${TIMING_LOG_LEVEL:-1}" -gt 1 ]]; then - TIMING_STR="\ - --timing-log-level ${TIMING_LOG_LEVEL}" - # --log-timers-to-tensorboard \ - # --log-optimizer-states-to-tensorboard \ - # " - else - TIMING_STR="" - fi + # TIMING_STR="\ + # --timing-log-level ${TIMING_LOG_LEVEL}" + # # " + # else + # TIMING_STR="" + # fi } ############################################## @@ -741,7 +729,7 @@ set_args() { # if [[ "${ZERO_STAGE}" == 3 ]]; then # ds_args="--use-mics ${ds_args}" # fi - if [[ "$USE_ACTIVATION_CHECKPOINTING" == 1 ]]; then + if [[ -n "${USE_ACTIVATION_CHECKPOINTING:-}" ]]; then echo "!! Caught USE_ACTIVATION_CHECKPOINTING=${USE_ACTIVATION_CHECKPOINTING} !!" ds_args+=("--deepspeed-activation-checkpointing") # ds_args=" --deepspeed-activation-checkpointing ${ds_args}" @@ -904,6 +892,7 @@ buildDSconfig() { echo "DS_CONFIG: ${DS_CONFIG}" printf "ZS: %s, MB: %s, GB: %s, PP: %s, DTYPE: %s" "${ZERO_STAGE}" "${MICRO_BATCH}" "${GLOBAL_BATCH}" "${PP}" "${DTYPE}" generateDSconfig "${DS_CONFIG}" + cat "${DS_CONFIG}" | jq . } ############################################################################### @@ -1176,27 +1165,19 @@ generateDSconfig() { \"train_batch_size\": $GLOBAL_BATCH, \"train_micro_batch_size_per_gpu\": $MICRO_BATCH, \"steps_per_print\": 1, + \"gradient_clipping\": 1.0, \"gradient_accumulation_steps\": $GRAD_ACC_STEPS, \"zero_force_ds_cpu_optimizer\": false, \"zero_allow_untested_optimizer\": true, \"gradient_clipping\": 1.0, \"wall_clock_breakdown\": false," - if [[ "${USE_ACTIVATION_CHECKPOINTING}" == 1 ]]; then - activation_checkpointing="\ - \"activation_checkpointing\": { - \"partition_activations\": true, - \"contiguous_memory_optimization\": true - }," - fi - flops_profiler="\ - \"flops_profiler\": { - \"enabled\": true, - \"profile_step\": 2, - \"module_depth\": -1, - \"top_modules\": 1, - \"detailed\": true, - \"output_file\": null - }" + # if [[ "${USE_ACTIVATION_CHECKPOINTING}" == 1 ]]; then + # activation_checkpointing="\ + # \"activation_checkpointing\": { + # \"partition_activations\": true, + # \"contiguous_memory_optimization\": true + # }," + # fi if [[ $DTYPE == "bf16" ]]; then dtype="\ \"communication_data_type\": \"bf16\", @@ -1228,7 +1209,7 @@ generateDSconfig() { else dtype="\"communication_data_type\": \"fp32\"," fi - if [[ "${OPT:-adamw}" == "ds.adamw" ]]; then + if [[ "${OPT:-}" == "ds.adamw" ]]; then optimizer="\ \"optimizer\": { \"type\": \"AdamW\", @@ -1238,6 +1219,24 @@ generateDSconfig() { \"beta2\": 0.95, \"eps\": 1e-5, \"weight_decay\": 1e-1 + }, + }," + elif [[ "${OPT:-}" == "ds.onebitlamb" ]]; then + optimizer="\ + \"optimizer\": { + \"type\": \"OneBitLamb\", + \"params\": { + \"lr\": 11e-3, + \"max_coeff\": 0.3, + \"min_coeff\": 0.01, + \"freeze_step\": 1000, + \"cuda_aware\": false, + \"comm_backend_name\": \"${BE}\", + \"coeff_beta\": 0.9, + \"factor_max\": 4.0, + \"factor_min\": 0.5, + \"factor_threshold\": 0.1 + } }," else optimizer="" @@ -1267,7 +1266,7 @@ generateDSconfig() { }," # elif [[ $ZERO_STAGE == 2 ]]; then elif [[ "${ZERO_STAGE}" == 2 || "${ZERO_STAGE}" == 1 ]]; then - if [[ -z "${CPU_OPTIMIZER:-}" ]]; then + if [[ -n "${CPU_OPTIMIZER:-}" ]]; then echo "!!!! CAUGHT CPU_OPTIMIZER !!!!" zero="\ \"zero_optimization\": { @@ -1304,6 +1303,15 @@ generateDSconfig() { else echo 'Please add the correct config set!!!' fi + flops_profiler="\ + \"flops_profiler\": { + \"enabled\": true, + \"profile_step\": 2, + \"module_depth\": -1, + \"top_modules\": 1, + \"detailed\": true, + \"output_file\": null + }" cat <"$1" { $common @@ -1388,6 +1396,7 @@ printWhite() { reset_env() { custom_vars=( NO_FLASH_ATTN + USE_FLASH_ATTN TP PP SP @@ -1415,7 +1424,6 @@ reset_env() { TRAIN_TOKENS TRAIN_ITERS MODEL_TYPE - LLAMA_ARGS LR LR_WARMUP_FRAC LR_DECAY_ITERS @@ -1447,10 +1455,24 @@ reset_env() { data_cache_path DEFAULTS ) + # LLAMA_ARGS printf "Unsetting custom vars: %s\n" "${custom_vars[*]}" unset "${custom_vars[@]}" } +convert_ckpt_to_universal() { + if [[ "$#" -ne 1 ]]; then + echo "Usage: convert_ckpt_to_universal ckpt_dir" + echo "Expected one argument (ckpt_dir), received: $#" + exit 1 + fi + ckptdir=$1 + gs=$(cat "${ckptdir}/latest_checkpointed_iteration.txt") + src="${ckptdir}/global_step${gs}" + dst="${ckptdir}/global_step${gs}_universal" + convert_script="${PBS_O_WORKDIR}/deps/DeepSpeed/checkpoint/ds_to_universal.py" + python3 "${convert_script}" --input_folder "${src}" --output_folder "${dst}" +} ########################### # call helpers_main() From 6acc370a41440098031227e881bd9f1f23aa369e Mon Sep 17 00:00:00 2001 From: ranzhejiang Date: Fri, 18 Oct 2024 18:31:05 +0800 Subject: [PATCH 78/92] fix moe tflops (#445) --- megatron/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/megatron/utils.py b/megatron/utils.py index 15160b1644..68d06f0dae 100644 --- a/megatron/utils.py +++ b/megatron/utils.py @@ -281,6 +281,7 @@ def throughput_calculator(model, args, iteration_time, total_iterations): num_layers = args.num_layers vocab_size = args.padded_vocab_size gqa = args.num_attention_heads // args.num_key_value_heads + num_experts_routed_to = args.topk ffn_multiplier = 3 if args.swiglu else 2 macs_per_flops = 2 @@ -294,7 +295,7 @@ def throughput_calculator(model, args, iteration_time, total_iterations): pre_and_post_mha_gemm_macs = batch_size * num_layers * (1 + (2 // gqa) + 1) * (hidden_size**2) * seq_len mha_bgemm_macs = batch_size * num_layers * 2 * head_dim * num_attention_heads * (seq_len**2) - ffn_gemm_macs = batch_size * num_layers * ffn_multiplier * ffn_hidden_size * hidden_size * seq_len + ffn_gemm_macs = batch_size * num_layers * ffn_multiplier * ffn_hidden_size * hidden_size * seq_len * num_experts_routed_to logit_lmhead_gemm_macs = batch_size * vocab_size * hidden_size * seq_len fwd_macs = pre_and_post_mha_gemm_macs + mha_bgemm_macs + ffn_gemm_macs + logit_lmhead_gemm_macs From 9e015cc164512e463559a2a6846679676aa3b17f Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Fri, 18 Oct 2024 10:00:26 -0500 Subject: [PATCH 79/92] Remove duplicate `gradient_accumulation_steps` in DS config --- ALCF/helpers.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/ALCF/helpers.sh b/ALCF/helpers.sh index bc7753aa71..ade65a8d09 100644 --- a/ALCF/helpers.sh +++ b/ALCF/helpers.sh @@ -1165,7 +1165,6 @@ generateDSconfig() { \"train_batch_size\": $GLOBAL_BATCH, \"train_micro_batch_size_per_gpu\": $MICRO_BATCH, \"steps_per_print\": 1, - \"gradient_clipping\": 1.0, \"gradient_accumulation_steps\": $GRAD_ACC_STEPS, \"zero_force_ds_cpu_optimizer\": false, \"zero_allow_untested_optimizer\": true, From 58dc2d7c86d8c5b562871450ab7e2377d9db022f Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Mon, 21 Oct 2024 09:04:19 -0500 Subject: [PATCH 80/92] Update default EVAL args --- ALCF/helpers.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ALCF/helpers.sh b/ALCF/helpers.sh index ade65a8d09..cedac99f96 100644 --- a/ALCF/helpers.sh +++ b/ALCF/helpers.sh @@ -247,10 +247,10 @@ setup_run_cmd() { "--micro-batch-size ${MICRO_BATCH}" "--ds-sequence-parallel-size ${SP}" "--global-batch-size ${GLOBAL_BATCH}" - "--split ${TRAIN_SPLIT:-950},${VAL_SPLIT:-50},${TEST_SPLIT:-0}" + "--split ${TRAIN_SPLIT:-990},${VAL_SPLIT:-10},${TEST_SPLIT:-0}" "--timing-log-level ${TIMING_LOG_LEVEL:-1}" - "--eval-interval ${EVAL_INTERVAL:-50}" - "--eval-iters ${EVAL_ITERS:-40}" + "--eval-interval ${EVAL_INTERVAL:-100}" + "--eval-iters ${EVAL_ITERS:-20}" "--save-interval ${SAVE_INTERVAL:-50}" "--log-interval ${LOG_INTERVAL:-1}" "--save ${SAVE:-${CKPT_DIR}}" From 277d308db336a9c5000c098f265d8d193f4b8bae Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Mon, 21 Oct 2024 09:04:37 -0500 Subject: [PATCH 81/92] Catch eval metrics in `megatron/training.py` --- megatron/training.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/megatron/training.py b/megatron/training.py index 8ffac6cb9c..d39f21c128 100644 --- a/megatron/training.py +++ b/megatron/training.py @@ -26,6 +26,7 @@ import torch.distributed as tdist from torch.nn.parallel.distributed import DistributedDataParallel as torchDDP +import wandb from megatron import ( get_args, get_current_global_batch_size, @@ -316,7 +317,7 @@ def pretrain( config = core_transformer_config_from_args(args) if args.do_valid: prefix = f"iteration {iteration} on {args.eval_iters * args.global_batch_size}-sample draw from validation set" - evaluate_and_print_results( + _ = evaluate_and_print_results( prefix, forward_step_func, valid_data_iterator, @@ -329,7 +330,7 @@ def pretrain( ) if args.do_test: prefix = f"iteration {iteration} on {args.eval_iters * args.global_batch_size}-sample draw from test set" - evaluate_and_print_results( + _ = evaluate_and_print_results( prefix, forward_step_func, test_data_iterator, @@ -924,7 +925,6 @@ def train_step( # Empty unused memory. if args.empty_unused_memory_level >= 2 and accelerator is not None: accelerator.empty_cache() - # XXX: [saforem2]: ---------------------------------------------------- # Is `num_zeros_in_grad` worth calculating (/ implementing) ?? # the `Megatron`-specific implementation is at: @@ -1406,6 +1406,16 @@ def evaluate_and_print_results( config, verbose, ) + key = "test" if test else "val" + if wandb is not None and wandb.run is not None: + wandb.log({ + f"{key}/iteration": iteration, + **{f"{key}/{k}": v for k, v in total_loss_dict.items()}, + **{ + f"{key}/ppl_{k}": math.exp(min(20, v.item())) + for k, v in total_loss_dict.items() + }, + }) string = " validation loss at {} | ".format(prefix) for key in total_loss_dict: string += f"{key} value={total_loss_dict[key].item():.6f}" @@ -1451,6 +1461,7 @@ def evaluate_and_print_results( log.info("-" * length) log.info(string) log.info("-" * length) + return total_loss_dict def cyclic_iter(iter): From af4cba12b810e420b8c0039e384d0b06c8842861 Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Mon, 21 Oct 2024 09:05:06 -0500 Subject: [PATCH 82/92] Save git branch to env in `train_aGPT_7B.sh` --- train_aGPT_7B.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/train_aGPT_7B.sh b/train_aGPT_7B.sh index 286740fc89..1350ea0f2a 100644 --- a/train_aGPT_7B.sh +++ b/train_aGPT_7B.sh @@ -1,4 +1,7 @@ #!/bin/bash --login +#PBS -q lustre_scaling +#PBS -A Aurora_Deployment +#PBS -j oe ##################################### # AuroraGPT-7B @@ -10,6 +13,8 @@ # 1. Navigate into `$PBS_O_WORKDIR` cd "${PBS_O_WORKDIR}" || exit HERE=$(python3 -c 'import os; print(os.getcwd())') && export HERE +GIT_BRANCH=$(git branch --show-current) && export GIT_BRANCH + # 2. source `ALCF/helpers.sh` source "${HERE}/ALCF/helpers.sh" || exit From 8a8472c7bd83a3c9bc13f27b54c099ab1cde98b6 Mon Sep 17 00:00:00 2001 From: Huihuo Zheng Date: Mon, 21 Oct 2024 19:35:57 +0000 Subject: [PATCH 83/92] fixed print out bug --- megatron/data/gpt_dataset.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/megatron/data/gpt_dataset.py b/megatron/data/gpt_dataset.py index 38df556267..9ff2703277 100755 --- a/megatron/data/gpt_dataset.py +++ b/megatron/data/gpt_dataset.py @@ -154,8 +154,8 @@ def _build_indices_blended(): dataset_index, dataset_sample_index, weights, self.num_datasets, self.num_samples, torch.distributed.get_rank() == 0) - log.debug('> elapsed time for building blendable dataset indices for corpus {self.dataset_builders[0].corpus}: ' - '{:.2f} (sec)'.format(time.time() - start_time)) + log.debug(f"> elapsed time for building blendable dataset indices for corpus {self.dataset_builders[0].corpus}: " + "{:.2f} (sec)".format(time.time() - start_time)) return dataset_index, dataset_sample_index From 6cb727dde1ef92eedfafc24882cdf51a73a7203b Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Mon, 21 Oct 2024 15:43:58 -0500 Subject: [PATCH 84/92] Fix `args.shuffle` in `megatron/data/gpt_dataset.py` --- megatron/data/gpt_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/megatron/data/gpt_dataset.py b/megatron/data/gpt_dataset.py index 8872d709c7..d09f08d63a 100644 --- a/megatron/data/gpt_dataset.py +++ b/megatron/data/gpt_dataset.py @@ -189,7 +189,7 @@ def _build_indices_concat(): self.desc += dataset_builders[i].prefix + "," log.info( - f"[BuildConcatDataset] Caught {shuffle=} across" + f"[BuildConcatDataset] Caught {args.shuffle_sample_in_corpus=} across" f" {self.num_samples} samples" ) self.desc += ( From 5d10179125c884516a70e76d0a1b36f59b4e1e4c Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Wed, 23 Oct 2024 22:36:50 -0500 Subject: [PATCH 85/92] Update `--{shuffle,blend}-sample-in-corpus` arg in `ALCF/helpers.sh` --- ALCF/helpers.sh | 98 +++++++++++++++++++++++++------------------------ 1 file changed, 50 insertions(+), 48 deletions(-) diff --git a/ALCF/helpers.sh b/ALCF/helpers.sh index cedac99f96..603bc88fdb 100644 --- a/ALCF/helpers.sh +++ b/ALCF/helpers.sh @@ -141,7 +141,6 @@ setup() { setup_run_cmd "$@" || exit } - ##################################################### # setup_run_cmd # @@ -180,24 +179,24 @@ setup_run_cmd() { ################################################################## if [[ -z "${NO_LLAMA:-}" ]]; then llama_flags=( - "--swiglu" + "--swiglu" "--hidden-dropout 0" "--attention-dropout 0" - "--normalization rmsnorm" - "--disable-bias-linear" + "--normalization rmsnorm" + "--disable-bias-linear" "--no-query-key-layer-scaling" - "--use-rotary-position-embeddings" - "--untie-embeddings-and-output-weights" + "--use-rotary-position-embeddings" + "--untie-embeddings-and-output-weights" "--num-key-value-heads ${NUM_KV_HEAD}" "--ffn-hidden-size ${FFN_HIDDEN_SIZE}" ) fi # min_lr=$(python3 -c 'print(f"{2 / (10 ** 5):.8f}")') # "--min-lr ${LR:-${min_lr}}" # 2e-5 + # "--min-lr ${MIN_LR:-"2e-6"}" # 2e-5 lr_flags=( "--lr ${LR:-0.0002}" "--lr-decay-style ${LR_DECAY_STYLE:-cosine}" - "--min-lr ${MIN_LR:-"2e-6"}" # 2e-5 "--lr-warmup-fraction ${LR_WARMUP_FRAC:-0.05}" ) if [[ -n "${LR_DECAY_ITERS:-}" ]]; then @@ -221,8 +220,7 @@ setup_run_cmd() { train_args+=("--use-checkpoint-opt_param-scheduler") fi # "--init-method-std ${INIT_METHOD_STD:-0.0006}" - # "--weight-decay ${WEIGHT_DECAY:-0.1}" - # --accumulate-allreduce-grads-in-fp32" + # "--shuffle-sample" train_args+=( "${lr_flags[@]}" "${custom_args[@]}" @@ -235,44 +233,48 @@ setup_run_cmd() { "${ds_args[@]}" "${gpt_args[@]}" "--${DTYPE}" - "--shuffle-sample" + "--shuffle-sample-in-corpus" + "--blend-sample-in-corpus" + "--accumulate-allreduce-grads-in-fp32" "--no-bias-gelu-fusion" "--no-bias-dropout-fusion" "--no-masked-softmax-fusion" "--no-gradient-accumulation-fusion" - "--optimizer ${OPT}" - "--tensor-model-parallel-size ${TP}" - "--pipeline-model-parallel-size ${PP}" - "--max-position-embeddings ${SEQ}" - "--micro-batch-size ${MICRO_BATCH}" - "--ds-sequence-parallel-size ${SP}" - "--global-batch-size ${GLOBAL_BATCH}" - "--split ${TRAIN_SPLIT:-990},${VAL_SPLIT:-10},${TEST_SPLIT:-0}" - "--timing-log-level ${TIMING_LOG_LEVEL:-1}" - "--eval-interval ${EVAL_INTERVAL:-100}" - "--eval-iters ${EVAL_ITERS:-20}" - "--save-interval ${SAVE_INTERVAL:-50}" - "--log-interval ${LOG_INTERVAL:-1}" - "--save ${SAVE:-${CKPT_DIR}}" - "--load ${LOAD:-${CKPT_DIR}}" - "--seq-length ${SEQ}" - "--num-layers ${NLAYERS}" - "--hidden-size ${HIDDEN}" - "--train-iters ${TRAIN_ITERS}" - "--distributed-backend ${BE}" - "--adam-beta1 ${ADAM_BETA1:-0.9}" - "--adam-beta2 ${ADAM_BETA2:-0.95}" - "--adam-eps ${ADAM_EPS:-0.00001}" - "--clip-grad ${CLIP_GRAD:-1.0}" - "--num-attention-heads ${HEADS}" - "--data-cache-path ${data_cache_path}" - "--data-file-list ${DATA_FILE_LIST:-${dfl_fallback}}" + "--optimizer=${OPT}" + "--tensor-model-parallel-size=${TP}" + "--pipeline-model-parallel-size=${PP}" + "--max-position-embeddings=${SEQ}" + "--micro-batch-size=${MICRO_BATCH}" + "--ds-sequence-parallel-size=${SP}" + "--global-batch-size=${GLOBAL_BATCH}" + "--split=${TRAIN_SPLIT:-990},${VAL_SPLIT:-10},${TEST_SPLIT:-0}" + "--timing-log-level=${TIMING_LOG_LEVEL:-1}" + "--eval-interval=${EVAL_INTERVAL:-100}" + "--eval-iters=${EVAL_ITERS:-20}" + "--save-interval=${SAVE_INTERVAL:-50}" + "--log-interval=${LOG_INTERVAL:-1}" + "--save=${SAVE:-${CKPT_DIR}}" + "--load=${LOAD:-${CKPT_DIR}}" + "--seq-length=${SEQ}" + "--num-layers=${NLAYERS}" + "--hidden-size=${HIDDEN}" + "--train-iters=${TRAIN_ITERS}" + "--distributed-backend=${BE}" + "--weight-decay=${WEIGHT_DECAY:-0.1}" + "--adam-beta1=${ADAM_BETA1:-0.9}" + "--adam-beta2=${ADAM_BETA2:-0.95}" + "--adam-eps=${ADAM_EPS:-0.00001}" + "--clip-grad=${CLIP_GRAD:-1.0}" + "--num-attention-heads=${HEADS}" + "--data-cache-path=${data_cache_path}" + "--data-file-list=${DATA_FILE_LIST:-${dfl_fallback}}" ) + # "--adam-eps ${ADAM_EPS:-0.00001}" cache_dir="${PBS_O_WORKDIR}/.cache/" mkdir -p "${cache_dir}" targs_cache="${cache_dir}/train_args.txt" - for arg in "${train_args[@]}"; do echo "${arg}" >> "${targs_cache}" ; done - export TRAIN_ARGS=("$(printf '%s\n' "${train_args[@]}"|sort)") + for arg in "${train_args[@]}"; do echo "${arg}" >>"${targs_cache}"; done + export TRAIN_ARGS=("$(printf '%s\n' "${train_args[@]}" | sort)") printf "Training Arguments: %s\n" "${TRAIN_ARGS[@]}" export run_cmd=("${LAUNCHER}" "${train_args[@]}") } @@ -506,7 +508,7 @@ get_grad_acc_steps_on_aurora() { exit 1 fi nhosts=$(wc -l <"${hf}") - if [[ "${nhosts}" -gt 256 ]]; then + if [[ "${nhosts}" -ge 256 ]]; then gas=1 elif [[ 128 -le "${nhosts}" && "${nhosts}" -lt 256 ]]; then gas=2 @@ -567,7 +569,7 @@ setParams() { mn=$(get_machine_name) if [[ "${mn}" == "aurora" || "${mn}" == "sunspot" ]]; then TP=${TP:-1} # TP = 1 - export SAVE_INTERVAL="${SAVE_INTERVAL:-20}" + export SAVE_INTERVAL="${SAVE_INTERVAL:-50}" export CCL=${CCL:-ccl} # CCL export BE="${CCL}" # COMMUNICATION BACKEND = CCL export DTYPE=${DTYPE:-bf16} # DTYPE: bf16 @@ -845,7 +847,7 @@ get_output_prefix() { pre="${pre}_sp${SP}_pp${PP}_tp${TP}_${DTYPE}_opt${OPT}" pre="${pre}_lr${LR}_lwf${LR_WARMUP_FRAC}" if [[ -n "${TOKENIZER_TYPE:-}" ]]; then - _tok=$(echo "${TOKENIZER_TYPE}" | sed 's/Tokenizer//g') # noqa + _tok=$(echo "${TOKENIZER_TYPE}" | sed 's/Tokenizer//g') # noqa pre="${pre}_tok${_tok}" fi if [[ -n "${LR_DECAY_ITERS}" ]]; then @@ -1111,7 +1113,7 @@ setData() { # ------------------------[dfl: abbrv. for DATA_FILE_LIST] } generateDSconfig_new() { - cat < "${CONFIG_JSON}" + cat <"${CONFIG_JSON}" { "train_batch_size" : $GLOBAL_BATCH, "train_micro_batch_size_per_gpu": $MICRO_BATCH, @@ -1164,11 +1166,11 @@ generateDSconfig() { common="\ \"train_batch_size\": $GLOBAL_BATCH, \"train_micro_batch_size_per_gpu\": $MICRO_BATCH, + \"gradient_clipping\": 1.0, \"steps_per_print\": 1, \"gradient_accumulation_steps\": $GRAD_ACC_STEPS, \"zero_force_ds_cpu_optimizer\": false, \"zero_allow_untested_optimizer\": true, - \"gradient_clipping\": 1.0, \"wall_clock_breakdown\": false," # if [[ "${USE_ACTIVATION_CHECKPOINTING}" == 1 ]]; then # activation_checkpointing="\ @@ -1178,8 +1180,8 @@ generateDSconfig() { # }," # fi if [[ $DTYPE == "bf16" ]]; then + # \"communication_data_type\": \"bf16\", dtype="\ - \"communication_data_type\": \"bf16\", \"fp16\": { \"enabled\": false, \"loss_scale\": 0, @@ -1214,9 +1216,9 @@ generateDSconfig() { \"type\": \"AdamW\", \"params\": { \"lr\": ${LR}, - \"beta1\": 0.9, - \"beta2\": 0.95, - \"eps\": 1e-5, + \"beta1\": ${ADAM_BETA1}, + \"beta2\": ${ADAM_BETA2}, + \"eps\": ${ADAM_EPS}, \"weight_decay\": 1e-1 }, }," From 160d6a69717689226798ae8e378ea4c7b2866ad4 Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Wed, 30 Oct 2024 21:25:53 -0500 Subject: [PATCH 86/92] fix: `GRAD_ACC_STEPS` when `NHOSTS == 256` --- ALCF/helpers.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ALCF/helpers.sh b/ALCF/helpers.sh index 603bc88fdb..fac2f4d8ec 100644 --- a/ALCF/helpers.sh +++ b/ALCF/helpers.sh @@ -508,7 +508,7 @@ get_grad_acc_steps_on_aurora() { exit 1 fi nhosts=$(wc -l <"${hf}") - if [[ "${nhosts}" -ge 256 ]]; then + if [[ "${nhosts}" -gt 256 ]]; then gas=1 elif [[ 128 -le "${nhosts}" && "${nhosts}" -lt 256 ]]; then gas=2 From ce7d55356ff3bd7f02f864845dda13ecb8c02ff0 Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Wed, 6 Nov 2024 21:57:02 -0600 Subject: [PATCH 87/92] =?UTF-8?q?=F0=9F=9A=A7=20`ALCF/ds=5Fto=5Funiversal.?= =?UTF-8?q?py`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ALCF/ds_to_universal.py | 693 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 693 insertions(+) create mode 100755 ALCF/ds_to_universal.py diff --git a/ALCF/ds_to_universal.py b/ALCF/ds_to_universal.py new file mode 100755 index 0000000000..da7fcb708e --- /dev/null +++ b/ALCF/ds_to_universal.py @@ -0,0 +1,693 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +from functools import partial +from itertools import chain +import argparse +import glob +import itertools +import math +from concurrent.futures import ProcessPoolExecutor +import os +import re +import shutil +import torch +import tqdm +# from pprint import pprint + +from deepspeed.checkpoint import DeepSpeedCheckpoint +from deepspeed.checkpoint import ( + OPTIMIZER_STATE_DICT, + ZERO_STAGE, + BASE_OPTIMIZER_STATE, + SINGLE_PARTITION_OF_FP32_GROUPS, + PARAM_GROUPS, + PARAM_SLICE_MAPPINGS, + PARAM_SHAPES, + PARAM, + CAT_DIM, + PARAM_N_SUB_PARAMS, + SUB_PARAM_SHAPE, + VOCAB_TENSOR, + UNIVERSAL_CHECKPOINT_INFO, + UNIVERSAL_CHECKPOINT_VERSION_KEY, + UNIVERSAL_CHECKPOINT_VERSION_VALUE, + VOCABULARY_PARAMETER_PATTERNS, + PIPELINE_REPLICATED_PARAMETER_PATTERNS, + TP_REPLICATED_PARAMETER_PATTERNS, + PARAMETER_TO_AVERAGE_PATTERNS, + PARAMETER_WITH_ROW_PARALLELISM_PATTERNS, + PARAMETER_WITH_2_SUB_PARAMS_CAT_DIM_0, + PARAMETER_WITH_SUB_PARAMS, + SubparamShape, +) + + +def parse_arguments(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--input_folder", + type=str, + required=True, + help="Input DeepSpeed Checkpoint folder", + ) + parser.add_argument( + "--output_folder", + type=str, + required=False, + default=None, + help="Output DeepSpeed checkpoint folder", + ) + parser.add_argument( + "--num_extract_workers", + default=4, + type=int, + help="How many parallel processes to extract zero shards", + ) + parser.add_argument( + "--num_merge_workers", + default=2, + type=int, + help="How many parallel processes to merge tp slices (more memory intensive, use much fewer than --num_extract_workers))", + ) + parser.add_argument( + "--keep_temp_folder", + action="store_true", + help="Preserve temporary folder of intermediate checkpoint slice files. Useful for debugging.", + ) + parser.add_argument( + "--no_strict", + dest="strict", + action="store_false", + help="Do not perform validity checks on converted checkpoint.", + ) + parser.add_argument( + "--inject_missing_state", + action="store_true", + help="Inject missing checkpoint state into the checkpoint if it is absent.", + ) + args = parser.parse_args() + print(f"args = {args}") + return args + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + """ + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + """ + return [atoi(c) for c in re.split(r"(\d+)", text)] + + +def _create_checkpoint_paths(base_folder, iteration, tp_degree, pp_degree): + path_list = [] + iter_folder = f"iter_{iteration:07d}" + for i in range(0, tp_degree): + path_list.append([]) + for j in range(0, pp_degree): + rank_folder = ( + f"mp_rank_{i:02d}" if pp_degree == 1 else f"mp_rank_{i:02d}_{j:03d}" + ) + ckpt_path = os.path.join(rank_folder, "model_optim_rng.pt") + path_list[i].append(os.path.join(base_folder, iter_folder, ckpt_path)) + + return path_list + + +def _save_checkpoint(file_path, chkpt_sd): + dir, _ = os.path.split(file_path) + os.makedirs(dir, exist_ok=True) + torch.save(chkpt_sd, file_path) + + +def extract_zero_shards(dir, ds_checkpoint, indices_3D): + pp_index, tp_index, dp_index = indices_3D + sd = ds_checkpoint.get_zero_checkpoint_state( + pp_index=pp_index, + tp_index=tp_index, + dp_index=dp_index, + strip_tensor_paddings=False, + ) + + # pprint(f"Processing {dp_index=} {pp_index=}, {tp_index=}") + + optim_sd = sd[OPTIMIZER_STATE_DICT] + param_slice_mappings = optim_sd[PARAM_SLICE_MAPPINGS] + universal_checkpoint_info = ds_checkpoint.get_checkpoint_info( + UNIVERSAL_CHECKPOINT_INFO + ) + pipeline_replicated_params = universal_checkpoint_info.get( + PIPELINE_REPLICATED_PARAMETER_PATTERNS, [] + ) + # print(f'{pipeline_replicated_params=}') + + # dict + state_groups = optim_sd[BASE_OPTIMIZER_STATE]["state"] + # list + fp32_groups = optim_sd[SINGLE_PARTITION_OF_FP32_GROUPS] + param_groups_cnt = len(state_groups) + + for param_group_id in range(param_groups_cnt): + flat_state = dict( + exp_avg=state_groups[param_group_id]["exp_avg"], + exp_avg_sq=state_groups[param_group_id]["exp_avg_sq"], + fp32=fp32_groups[param_group_id], + ) + + if "step" in state_groups[param_group_id]: + flat_state["step"] = state_groups[param_group_id]["step"] + + for name, fragment_mapping in param_slice_mappings[param_group_id].items(): + if pp_index > 0 and any( + re.match(pattern, name) for pattern in pipeline_replicated_params + ): + # Skip tied weights that are replicated in first and last pp stages + continue + + # pprint(f"dpt{dp_index}{pp_index}{tp_index} {param_group_id} {name} => {fragment_mapping.start}:{fragment_mapping.numel}") + for state_key in flat_state.keys(): + dump_param_fragment( + dir, + tp_index, + dp_index, + state_key, + flat_state[state_key], + name, + fragment_mapping.start, + fragment_mapping.numel, + ) + + +def extract_zero_shards_stage3( + optim_files, param_shapes, dp_degree, temp_dir, dp_index +): + state_dict = torch.load(optim_files[dp_index], map_location="cpu") + + flat_state = dict( + exp_avg=state_dict[OPTIMIZER_STATE_DICT]["optimizer_state_dict"]["state"][0][ + "exp_avg" + ], + exp_avg_sq=state_dict[OPTIMIZER_STATE_DICT]["optimizer_state_dict"]["state"][0][ + "exp_avg_sq" + ], + fp32=state_dict[OPTIMIZER_STATE_DICT]["fp32_flat_groups"][0], + ) + + offset = 0 + for name, shape in param_shapes.items(): + unpartitioned_numel = shape.numel() + partitioned_numel, _ = _zero_partitioned_param_info( + unpartitioned_numel, dp_degree + ) + padding_free_numel = min( + partitioned_numel, abs(unpartitioned_numel - dp_index * partitioned_numel) + ) + for state_key in flat_state.keys(): + dump_param_fragment( + temp_dir, + 0, + dp_index, + state_key, + flat_state[state_key], + name, + offset, + padding_free_numel, + ) + offset += partitioned_numel + + +cnt = 0 + + +def dp_index_to_str(dp_index): + return f"{dp_index:0>2d}" + + +def dump_param_fragment( + dir, tp_index, dp_index, state_name, state_flat_tensor, param_name, offset, numel +): + global cnt # temp hack + + param_base_path = os.path.join(dir, param_name, str(tp_index)) + os.makedirs(param_base_path, exist_ok=True) + + cnt += 1 + + path = os.path.join(param_base_path, f"{state_name}.{dp_index_to_str(dp_index)}") + + # print(f"{param_name}: {offset}: {numel} => {path}") + + # State might be a python int or a tensor + if state_name != "step" and torch.is_tensor(state_flat_tensor): + state_flat_tensor = state_flat_tensor.narrow(0, offset, numel).clone() + _save_checkpoint(path, state_flat_tensor) + + +def _merge_zero_shards(param_base_path, state, tp_degree, slice_shape=None): + slices = [] + for tp_index in range(tp_degree): + prefix_path = os.path.join(param_base_path, str(tp_index), f"{state}") + paths = glob.glob(f"{prefix_path}.*") + + if len(paths) == 0: + continue + + pattern = re.compile(f"{prefix_path}\\.([0-9]+)") + dp_indices = set() + for p in paths: + m = pattern.match(p) + if m: + dp_indices.add(int(m.group(1))) + else: + raise ValueError(f"Cannot parse dp_rank from {p}") + + paths = [ + f"{prefix_path}.{dp_index_to_str(dp_index)}" + for dp_index in sorted(list(dp_indices)) + ] + shards = [torch.load(p) for p in paths] + + if state == "step": + assert all( + v == shards[0] for v in shards + ), "All shards must have the same step value" + slice = shards[0] + else: + if slice_shape is None: + slice = torch.cat(shards, dim=0) + else: + slice = torch.cat(shards, dim=0).reshape(slice_shape) + + slices.append(slice) + return slices + + +def merge_tp_slices(ds_checkpoint, dir, slice_dir, tp_degree, name_and_shape): + name, shape = name_and_shape + slice_base_path = os.path.join(slice_dir, name) + param_base_path = os.path.join(dir, name) + + universal_checkpoint_info = ds_checkpoint.get_checkpoint_info( + UNIVERSAL_CHECKPOINT_INFO + ) + replicated_parameters = universal_checkpoint_info.get( + TP_REPLICATED_PARAMETER_PATTERNS, [] + ) + parameters_to_average = universal_checkpoint_info.get( + PARAMETER_TO_AVERAGE_PATTERNS, [] + ) + parameters_with_row_parallelism = universal_checkpoint_info.get( + PARAMETER_WITH_ROW_PARALLELISM_PATTERNS, [] + ) + vocabulary_parameters = universal_checkpoint_info.get( + VOCABULARY_PARAMETER_PATTERNS, [] + ) + parameters_with_2_sub_params_cat_dim_0 = universal_checkpoint_info.get( + PARAMETER_WITH_2_SUB_PARAMS_CAT_DIM_0, [] + ) + parameter_with_sub_params = universal_checkpoint_info.get( + PARAMETER_WITH_SUB_PARAMS, [] + ) + + unmatched_patterns = set( + replicated_parameters + + parameters_to_average + + parameters_with_row_parallelism + + vocabulary_parameters + + parameters_with_2_sub_params_cat_dim_0 + ) + unmatched_patterns.update( + chain.from_iterable( + SubparamShape(**s).patterns for s in parameter_with_sub_params + ) + ) + + def get_matched_pattern(patterns_, name_): + matched_ = [pattern_ for pattern_ in patterns_ if re.match(pattern_, name_)] + assert ( + len(matched_) <= 1 + ), f"Got more than one matching patterns={matched_} for {name_}" + if matched_: + pattern_ = matched_[0] + unmatched_patterns.discard(pattern_) + return pattern_ + return None + + def get_matched_sub_params_pattern(name_): + for subparam_shape_dict in parameter_with_sub_params: + subparam_shape = SubparamShape(**subparam_shape_dict) + for pattern_ in subparam_shape.patterns: + if re.match(pattern_, name_): + unmatched_patterns.discard(pattern_) + return subparam_shape + return None + + matched_sub_params_shape = get_matched_sub_params_pattern(name) + + step_merged = _merge_zero_shards(slice_base_path, "step", tp_degree, shape) + if step_merged: + _save_checkpoint(os.path.join(param_base_path, f"step.pt"), step_merged[0]) + + for state in ("fp32", "exp_avg", "exp_avg_sq"): + slices = _merge_zero_shards(slice_base_path, state, tp_degree, shape) + final_path = os.path.join(param_base_path, f"{state}.pt") + + # print(f"Expected shape: {shape}") + # print(f"Fragment sizes:", list(frag.shape for frag in slices)) + ckpt_dict = {} + if get_matched_pattern(replicated_parameters, name): + if len(slices) > 1: + assert all([slices[0].equal(other_slice) for other_slice in slices[1:]]) + param = slices[0] + # print(f'replicate {name} using first slice') + elif get_matched_pattern(parameters_to_average, name): + param = sum(slices) / len(slices) + # print(f'merge {name} using average') + elif get_matched_pattern(parameters_with_2_sub_params_cat_dim_0, name): + cat_dim = 0 + chunked_slices = [torch.chunk(s, 2, dim=cat_dim) for s in slices] + merged_chunks_0 = torch.cat([s[0] for s in chunked_slices], dim=cat_dim) + merged_chunks_1 = torch.cat([s[1] for s in chunked_slices], dim=cat_dim) + param = torch.cat([merged_chunks_0, merged_chunks_1], dim=cat_dim) + ckpt_dict[CAT_DIM] = cat_dim + ckpt_dict[PARAM_N_SUB_PARAMS] = 2 + elif matched_sub_params_shape: + merged_chunks = [] + partition_dim = matched_sub_params_shape.partition_dim + + sub_dim_sizes = matched_sub_params_shape.shape[partition_dim] + if not isinstance(sub_dim_sizes, tuple): + sub_dim_sizes = (sub_dim_sizes,) + + partition_shape = [ + sum(d) if isinstance(d, tuple) else d + for d in matched_sub_params_shape.shape + ] + partition_shape = [ + d // tp_degree if i == partition_dim else d + for i, d in enumerate(partition_shape) + ] + slices = [s.view(partition_shape) for s in slices] + + offset = 0 + for sub_dim_size in sub_dim_sizes: + part_sub_dim_size = sub_dim_size // tp_degree + merged_chunks.append( + torch.cat( + [ + s.narrow(partition_dim, offset, part_sub_dim_size) + for s in slices + ], + dim=partition_dim, + ) + ) + offset += part_sub_dim_size + param = torch.cat(merged_chunks, dim=partition_dim) + ckpt_dict[SUB_PARAM_SHAPE] = matched_sub_params_shape + else: + cat_dim = ( + 1 if get_matched_pattern(parameters_with_row_parallelism, name) else 0 + ) + # print(f"merge {name} with CAT DIM: {cat_dim}") + param = torch.cat(slices, dim=cat_dim) + ckpt_dict[CAT_DIM] = cat_dim + + if get_matched_pattern(vocabulary_parameters, name): + # print(f"Before {param.shape=}") + # strip padding + original_vocab_size = universal_checkpoint_info["original_vocab_size"] + param = param[:original_vocab_size, :] + ckpt_dict[VOCAB_TENSOR] = True + # print(f"After {param.shape=}") + + # print(f"Final shape: {param.shape}") + ckpt_dict[PARAM] = param + _save_checkpoint(final_path, ckpt_dict) + + return unmatched_patterns + + +def merge_zero3_slices(dp_degree, dir, slice_dir, name): + slice_base_path = os.path.join(slice_dir, name) + param_base_path = os.path.join(dir, name) + + for state in ("fp32", "exp_avg", "exp_avg_sq"): + slices = _merge_zero_shards(slice_base_path, state, 1) + final_path = os.path.join(param_base_path, f"{state}.pt") + _save_checkpoint(final_path, slices[0]) + + +def _do_parallel_work(do_work, work_chunks, num_workers): + results = [] + if num_workers > 1: + with ProcessPoolExecutor(max_workers=num_workers) as executor: + future_list = [executor.submit(do_work, work) for work in work_chunks] + for f in tqdm.tqdm(future_list): + results.append(f.result()) + else: + # No parallel pass for unit testing + # We can't create child processes in tests + for work in tqdm.tqdm(work_chunks): + results.append(do_work(work)) + return results + + +def _extract_zero_shard_files(args, ds_checkpoint, temp_dir): + _3d_range_list = list( + itertools.product( + range(ds_checkpoint.pp_degree), + range(ds_checkpoint.tp_degree), + range(ds_checkpoint.dp_degree), + ) + ) + # pprint(f'{_3d_range_list=}') + + do_work = partial(extract_zero_shards, temp_dir, ds_checkpoint) + _do_parallel_work(do_work, _3d_range_list, args.num_extract_workers) + + +def _extract_zero_shard_files_stage3( + args, optim_files, param_shapes, dp_degree, temp_dir +): + do_work = partial( + extract_zero_shards_stage3, optim_files, param_shapes, dp_degree, temp_dir + ) + _do_parallel_work(do_work, list(range(dp_degree)), args.num_extract_workers) + + +def _merge_tp_slice_files(args, ds_checkpoint, slice_shapes, temp_dir): + zero_output_folder = os.path.join(args.output_folder, "zero") + do_work = partial( + merge_tp_slices, + ds_checkpoint, + zero_output_folder, + temp_dir, + ds_checkpoint.tp_degree, + ) + unmatched_patterns_lists = _do_parallel_work( + do_work, list(slice_shapes.items()), args.num_merge_workers + ) + + # verify that all patterns were used + # if a pattern was not used by any of the workers, then it was not used at all -> assert/alert + sets = [set(lst) for lst in unmatched_patterns_lists] + unmatched_patterns = list(set.intersection(*sets)) + if args.strict: + assert ( + not unmatched_patterns + ), f"Unused patterns={unmatched_patterns} while merging tp slices" + elif unmatched_patterns: + print(f"Warning: Unused patterns={unmatched_patterns} while merging tp slices") + + +def _merge_zero3_slice_files(args, param_shapes, dp_degree, temp_dir): + zero_output_folder = os.path.join(args.output_folder, "zero") + do_work = partial(merge_zero3_slices, dp_degree, zero_output_folder, temp_dir) + _do_parallel_work(do_work, param_shapes.keys(), args.num_merge_workers) + + +def _zero_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _parse_model_states_stage3(files): + return torch.load(files[0], map_location=torch.device("cpu"))[PARAM_SHAPES] + + +def _save_optimizer_state(args, ds_checkpoint): + sharded_states = [ + BASE_OPTIMIZER_STATE, + PARAM_SLICE_MAPPINGS, + SINGLE_PARTITION_OF_FP32_GROUPS, + ] + sd = ds_checkpoint.get_zero_checkpoint_state( + pp_index=0, tp_index=0, dp_index=0, strip_tensor_paddings=False + ) + + optim_sd = sd[OPTIMIZER_STATE_DICT] + output_sd = {k: v for k, v in optim_sd.items() if k not in sharded_states} + output_sd[PARAM_GROUPS] = optim_sd[BASE_OPTIMIZER_STATE][PARAM_GROUPS] + zero_output_folder = os.path.join(args.output_folder, "zero") + output_file_path = os.path.join(zero_output_folder, f"optimizer_state.pt") + _save_checkpoint(output_file_path, output_sd) + + +def _save_optimizer_state_stage3(args, optim_files): + sd = torch.load(optim_files[0], map_location=torch.device("cpu")) + output_sd = sd[OPTIMIZER_STATE_DICT] + output_sd[PARAM_GROUPS] = output_sd[OPTIMIZER_STATE_DICT][PARAM_GROUPS] + zero_output_folder = os.path.join(args.output_folder, "zero") + output_file_path = os.path.join(zero_output_folder, f"optimizer_state.pt") + _save_checkpoint(output_file_path, output_sd) + + +def _get_optim_files(checkpoint_dir): + return _get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def _get_model_state_files(checkpoint_dir): + return _get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def _get_checkpoint_files(checkpoint_dir, glob_pattern): + ckpt_files = sorted( + glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys + ) + + if len(ckpt_files) == 0: + raise FileNotFoundError( + f"can't find {glob_pattern} files in directory '{checkpoint_dir}'" + ) + + return ckpt_files + + +def _get_zero_stage(optim_files): + state_dict = torch.load(optim_files[0], map_location=torch.device("cpu")) + optimizer_state = state_dict[OPTIMIZER_STATE_DICT] + zero_stage = optimizer_state.get(ZERO_STAGE, 1) + return zero_stage + + +def _inject_missing_state(ds_checkpoint): + if UNIVERSAL_CHECKPOINT_INFO not in ds_checkpoint.global_state: + sd = torch.load( + ds_checkpoint.mp_rank_files[0], map_location=torch.device("cpu") + ) + if UNIVERSAL_CHECKPOINT_INFO not in sd: + ds_checkpoint.global_state[UNIVERSAL_CHECKPOINT_INFO] = {} + ds_checkpoint.global_state[UNIVERSAL_CHECKPOINT_INFO][ + UNIVERSAL_CHECKPOINT_VERSION_KEY + ] = UNIVERSAL_CHECKPOINT_VERSION_VALUE + + +def _check_for_required_state(ds_checkpoint): + universal_checkpoint_info = ds_checkpoint.get_checkpoint_info( + UNIVERSAL_CHECKPOINT_INFO + ) + assert ( + universal_checkpoint_info is not None + ), f"Required {UNIVERSAL_CHECKPOINT_INFO} state is missing in checkpoint. Verify that client creates this state." + + +def main(args): + print(f"Convert DeepSpeed Checkpoint to Universal Checkpoint") + + print( + f"Converting DeepSpeed checkpoint in {args.input_folder} to Universal checkpoint in {args.output_folder}" + ) + + optim_files = _get_optim_files(args.input_folder) + zero_stage = _get_zero_stage(optim_files) + + if zero_stage <= 2: + ds_checkpoint = DeepSpeedCheckpoint(args.input_folder) + if args.inject_missing_state: + _inject_missing_state(ds_checkpoint) + else: + _check_for_required_state(ds_checkpoint) + + iteration = ds_checkpoint.get_iteration() + # _create_latest_file(args.output_folder, iteration) + checkpoint_paths = _create_checkpoint_paths( + args.output_folder, + iteration, + ds_checkpoint.tp_degree, + ds_checkpoint.pp_degree, + ) + + slice_shapes = [] + for mp_rank_file in ds_checkpoint.mp_rank_files: + mp_sd = torch.load(mp_rank_file, map_location=torch.device("cpu")) + slice_shapes += mp_sd[PARAM_SHAPES] + + # fix back to normal flat dict, merge duplicates for tp>1 + slice_shapes = dict((k, v) for d in slice_shapes for k, v in d.items()) + temp_dir = os.path.join(args.output_folder, "tmp") + + print("1. Extracting ZeRO fragments") + _extract_zero_shard_files(args, ds_checkpoint, temp_dir) + + print("2. Merging slices .....") + _merge_tp_slice_files(args, ds_checkpoint, slice_shapes, temp_dir) + + print("3. Saving common optimizer states") + _save_optimizer_state(args, ds_checkpoint) + + if not args.keep_temp_folder: + shutil.rmtree(temp_dir, ignore_errors=True) + + # Copy mp* files into output folder + for f in glob.glob(os.path.join(args.input_folder, "mp*")): + shutil.copy2(f, args.output_folder) + + else: + model_files = _get_model_state_files(args.input_folder) + param_shapes = _parse_model_states_stage3(model_files) + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + dp_degree = len(model_files) + + temp_dir = os.path.join(args.output_folder, "tmp") + + print("*** 1. Extracting ZeRO fragments") + _extract_zero_shard_files_stage3( + args, optim_files, param_shapes, dp_degree, temp_dir + ) + + print("*** 2. Merging slices .....") + _merge_zero3_slice_files(args, param_shapes, dp_degree, temp_dir) + + print("*** 3. Saving common optimizer states") + _save_optimizer_state_stage3(args, optim_files) + + if not args.keep_temp_folder: + shutil.rmtree(temp_dir, ignore_errors=True) + + # Copy *model_states files into output folder + for f in glob.glob(os.path.join(args.input_folder, "*model_states.pt")): + shutil.copy2(f, args.output_folder) + + # Update latest to output folder + checkpoint_root_folder, step_folder = os.path.split(args.output_folder) + latest_file = os.path.join(checkpoint_root_folder, "latest_universal") + with open(latest_file, "w") as f: + f.write(step_folder) + + print("*** Done!") + + +if __name__ == "__main__": + args = parse_arguments() + main(args) From 8e0bff815700b6e7c9b2ff1d19a87141150d136b Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Thu, 7 Nov 2024 12:19:53 -0600 Subject: [PATCH 88/92] docs: Add `ALCF/notes/checkpoints.md` --- ALCF/notes/checkpoints.md | 207 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 207 insertions(+) create mode 100644 ALCF/notes/checkpoints.md diff --git a/ALCF/notes/checkpoints.md b/ALCF/notes/checkpoints.md new file mode 100644 index 0000000000..f5acd425a7 --- /dev/null +++ b/ALCF/notes/checkpoints.md @@ -0,0 +1,207 @@ +# Converting Checkpoints + +## Megatron $\rightarrow$ 🤗 HuggingFace + +On Aurora, + +- Setup: + + ```bash + CKPT_ROOT="/flare/Aurora_deployment/AuroraGPT-Testing/foremans/rollback-41k8/Megatron-DeepSpeed-41800/checkpoints/ws768_ds_stage1_nl32_hs4096_mb4_seq4096_gb3072_sp1_pp1_tp1_bf16_optadamw_lr0.00020_lwf0.05"; + + LAST_STEP=$(cat "${CKPT_ROOT}/latest_checkpointed_iteration.txt") + GLOBAL_STEP="${GLOBAL_STEP:-${LAST_STEP}}" + + SRC="${CKPT_ROOT}/global_step${GLOBAL_STEP}" + + OUTPUT_PARENT="/flare/Aurora_deployment/AuroraGPT-Checkpoints/production-checkpoints/aGPT-7B/HF" + DST="${OUTPUT_PARENT}/global_step${GLOBAL_STEP}_hf" + + printf "SRC: %s\n DST: %s\n" "${SRC}" "${DST}" + ``` + +- Convert: + + ```bash + python3 Megatron-DeepSpeed/mds_to_hf.py \ + --mds_checkpoint "${SRC}/mp_rank_00_model_states.pt" \ + --output_dir "${DST}" \ + --cache_dir "./.cache" + ``` + + + + + + + + + + [DST] --> + + + + + + + + + + + +## Use in 🤗 `transformers` + +```python +from pathlib import Path +import time +from rich import print +from typing import Optional +from transformers import LlamaForCausalLM, AutoTokenizer + +def load_model(ckpt_dir: str, step: Optional[int] = None): + if step is None: + fp = Path(ckpt_dir) + else: + fp = Path(ckpt_dir).joinpath(f"global_step{step}_hf") + print(f"Loading ckpt from: {fp}") + if fp.exists(): + model = LlamaForCausalLM.from_pretrained(fp.as_posix()) + print(f"{model=}") + return model + + raise FileNotFoundError(f"Unable to locate checkpoint at: {fp}") + + +def eval_model( + model: torch.nn.Module, + max_length: int = 64, + prompt: Optional[str] = None, + tokenizer: Optional[AutoTokenizer] = None, +) -> str: + prompt = "What is it like in there?" if prompt is None else prompt + tokenizer = ( + AutoTokenizer.from_pretrained("meta-llama/Llama-2-7B-hf") + if tokenizer is None else tokenizer + ) + output = ( + tokenizer.batch_decode( + model.generate( + **tokenizer(prompt, return_tensors="pt"), + max_length=max_length, + ), + clean_up_tokenization_spaces=True, + skip_special_tokens=True, + )[0] + ) + return output + + +def loop_over_checkpoints( + steps_list: list[int], + ckpt_dir: str, + max_length: int = 128, + prompt: Optional[str] = None, +): + for step in steps_list: + t0 = time.perf_counter() + prompt = "What is it like in there?" if prompt is None else prompt + print(f"\n Loading model from checkpoint at global step: {step}") + outputs = eval_model( + load_model(step, ckpt_dir), + max_length=max_length, + prompt=prompt, + ) + print(f"{outputs}") + print(f"\ntook: {time.perf_counter() - t0:.6f}s\n") +``` + +```python +>>> ckpt_dir = "/flare/Aurora_deployment/AuroraGPT-Checkpoints/production-checkpoints/aGPT-7B/HF/" +>>> tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7B-hf") +>>> model = load_model(76300, ckpt_dir) +Loading ckpt from: +/flare/Aurora_deployment/AuroraGPT-Checkpoints/production-checkpoints/aGPT-7B/HF/global_step76300_hf +model=LlamaForCausalLM( + (model): LlamaModel( + (embed_tokens): Embedding(32000, 4096) + (layers): ModuleList( + (0-31): 32 x LlamaDecoderLayer( + (self_attn): LlamaSdpaAttention( + (q_proj): Linear(in_features=4096, out_features=4096, bias=False) + (k_proj): Linear(in_features=4096, out_features=1024, bias=False) + (v_proj): Linear(in_features=4096, out_features=1024, bias=False) + (o_proj): Linear(in_features=4096, out_features=4096, bias=False) + (rotary_emb): LlamaRotaryEmbedding() + ) + (mlp): LlamaMLP( + (gate_proj): Linear(in_features=4096, out_features=11008, bias=False) + (up_proj): Linear(in_features=4096, out_features=11008, bias=False) + (down_proj): Linear(in_features=11008, out_features=4096, bias=False) + (act_fn): SiLU() + ) + (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05) + (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05) + ) + ) + (norm): LlamaRMSNorm((4096,), eps=1e-05) + (rotary_emb): LlamaRotaryEmbedding() + ) + (lm_head): Linear(in_features=4096, out_features=32000, bias=False) +) + +>>> print( +... eval_model( +... model, +... max_length=128, +... prompt="What is it like in there?", +... tokenizer=tokenizer +... ) +... ) +Setting `pad_token_id` to `eos_token_id`:None for open-end generation. +Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, whereit will always be FP32) +What is it like in there? +I've been in there a few times. It's a pretty cool place. +I've been in there a few times. It's a pretty cool place. +I've been in there a few times. It's a pretty cool place. +I've been in there a few times. It's a pretty cool place. +I've been in there a few times. It's a pretty cool place. +I've been in there a few times. It's a pretty cool place. +I've been in +``` + +## Helper Script + +```bash +convert_mds_to_hf() { + if [[ "$#" -eq 3 ]]; then + GLOBAL_STEP=$1 + CKPT_ROOT=$2 + OUTPUT_PARENT=$3 + elif [[ "$#" -eq 2 ]]; then + GLOBAL_STEP=$1 + CKPT_ROOT=$2 + OUPUT_PARENT=$(pwd) + elif [[ "$#" -eq 1 ]]; then + GLOBAL_STEP=$1 + CKPT_ROOT="/flare/Aurora_deployment/AuroraGPT-Testing/foremans/rollback-41k8/Megatron-DeepSpeed-41800/checkpoints/ws768_ds_stage1_nl32_hs4096_mb4_seq4096_gb3072_sp1_pp1_tp1_bf16_optadamw_lr0.00020_lwf0.05/"; + OUPUT_PARENT=$(pwd) + else + echo "Expected exactly 1, 2, or 3 arguments (global_step, src, dst, respectively)" + exit + fi + SRC="${CKPT_ROOT}/global_step${GLOBAL_STEP}" + DST="${OUTPUT_PARENT}/global_step${GLOBAL_STEP}_hf" + if [[ -d "${SRC}" ]]; then + echo "Converting checkpoint @ global step ${GLOBAL_STEP}" + echo "\tsrc = ${SRC}\n" + echo "\tdst = ${DST}\n" + python3 mds_to_hf.py \ + --mds_checkpoint "${SRC}/mp_rank_00_model_states.pt" \ + --output_dir "${DST}" \ + --cache_dir "./.cache" + else + echo "Unable to locate directory ${SRC}. Exiting" + exit 1 + fi +} +``` From bd8c246bcfbdca4833e57c598e37e889218a1c8a Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Thu, 7 Nov 2024 12:21:59 -0600 Subject: [PATCH 89/92] feat: Enable `--use-flash-attn-builder` by default on Aurora --- ALCF/helpers.sh | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/ALCF/helpers.sh b/ALCF/helpers.sh index fac2f4d8ec..252e641fe1 100644 --- a/ALCF/helpers.sh +++ b/ALCF/helpers.sh @@ -578,7 +578,12 @@ setParams() { export GRAD_ACC_STEPS="${GRAD_ACC_STEPS:-${gas}}" # export GRAD_ACC_STEPS="${GRAD_ACC_STEPS:-$(get_grad_acc_steps_on_aurora "$@)}" echo "[setParams] Using GRAD_ACC_STEPS: ${GRAD_ACC_STEPS}" - MICRO_BATCH=${MICRO_BATCH:-1} # MICRO_BATCH = 4 + MICRO_BATCH=${MICRO_BATCH:-1} + if [[ -n "${NO_FLASH_ATTN-}" ]]; then + echo "Not using flash-attn!!" + else + FLASH_ARG="--use-flash-attn-builder" + fi #### [sam: 08/17/2024] ########################################## # Use best set of CCL env vars from Gordon Bell runs on Aurora set_ccl_vars_on_aurora @@ -590,18 +595,18 @@ setParams() { # use_kvs_fix_on_aurora # <-- why are these different from those in update_ccl_env_vars_aurora ?? # update_ccl_env_vars_aurora ###################################################################### - if [[ -z "${USE_FLASH_ATTN:-}" ]]; then - # NOTE: if NO_FLASH_ATTN is NON-empty; then NO FLASH ATTN !! - export NO_FLASH_ATTN=1 # disabled on [2024-06-20] waiting on fix... - if [[ -n "${NO_FLASH_ATTN-}" ]]; then - echo "Not using flash-attn!!" - else - FLASH_ARG="--use-flash-attn-builder" - fi - else - echo "Using flash-attn !!" - FLASH_ARG="--use-flash-attn-builder" - fi + # if [[ -z "${USE_FLASH_ATTN:-}" ]]; then + # # NOTE: if NO_FLASH_ATTN is NON-empty; then NO FLASH ATTN !! + # export NO_FLASH_ATTN=1 # disabled on [2024-06-20] waiting on fix... + # if [[ -n "${NO_FLASH_ATTN-}" ]]; then + # echo "Not using flash-attn!!" + # else + # FLASH_ARG="--use-flash-attn-builder" + # fi + # else + # echo "Using flash-attn !!" + # FLASH_ARG="--use-flash-attn-builder" + # fi # [Polaris] elif [[ "${mn}" == "polaris" || "${mn}" == "sirius" ]]; then # export LAUNCH_CMD="${LAUNCH_CMD:-deepspeed}" From 26f2e71f1aa81222bce6d6643b3e57c8fc1ea6dc Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Thu, 7 Nov 2024 13:33:17 -0600 Subject: [PATCH 90/92] Update python.yml --- .github/workflows/python.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 2f9765af24..8e1dd8f771 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -16,7 +16,7 @@ jobs: unit-tests: strategy: matrix: - pyVersion: ["3.7", "3.8", "3.9", "3.10"] + pyVersion: ["3.9", "3.10", "3.11", "3.12"] fail-fast: false runs-on: ubuntu-22.04 From 48b3c81fc54a3d0bd45b9a7c52cdf432f2bbbfc6 Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Thu, 7 Nov 2024 13:49:05 -0600 Subject: [PATCH 91/92] Update python.yml --- .github/workflows/python.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 8e1dd8f771..7f5ffafc24 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -16,7 +16,7 @@ jobs: unit-tests: strategy: matrix: - pyVersion: ["3.9", "3.10", "3.11", "3.12"] + pyVersion: ["3.10", "3.12"] fail-fast: false runs-on: ubuntu-22.04 From 0a997bb0e5b810b6a74df7278c7a2e9cbd261af4 Mon Sep 17 00:00:00 2001 From: Sam Foreman Date: Thu, 7 Nov 2024 14:08:21 -0600 Subject: [PATCH 92/92] Update python.yml --- .github/workflows/python.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 7f5ffafc24..6746ff98dc 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -16,7 +16,7 @@ jobs: unit-tests: strategy: matrix: - pyVersion: ["3.10", "3.12"] + pyVersion: ["3.10"] fail-fast: false runs-on: ubuntu-22.04