From f07d85cd3c1ba76a8e4e61b5ed84840ea9b7f51e Mon Sep 17 00:00:00 2001 From: Becky Sweger Date: Wed, 6 Nov 2024 15:10:02 -0500 Subject: [PATCH 01/13] Add ncov_metadata property to Tree class Since it's possible to mix and match sequence_as_of and tree_as_of dates in cladetime, sequences and reference trees may have different ncov_metadata attributes (dataset version, nexclade cli version, for example) Add an ncov_metadata property to Tree that reflects metadata for the tree_as_of date (as opposed to CladeTime's ncov_metadata property, which reflects sequence_as_of). We'll use this new property to make sure we're using the correct nextclade dataset when assigning clades. --- src/cladetime/tree.py | 55 +++++++++++++++++++++------------- tests/integration/test_tree.py | 13 ++++++++ 2 files changed, 47 insertions(+), 21 deletions(-) diff --git a/src/cladetime/tree.py b/src/cladetime/tree.py index 59b8e03..35ec93f 100644 --- a/src/cladetime/tree.py +++ b/src/cladetime/tree.py @@ -8,7 +8,7 @@ import structlog -from cladetime import CladeTime +from cladetime import CladeTime, sequence from cladetime.exceptions import NextcladeNotAvailableError, TreeNotAvailableError from cladetime.util.reference import _docker_installed, _get_nextclade_dataset, _get_s3_object_url from cladetime.util.sequence import _get_ncov_metadata @@ -35,9 +35,19 @@ def __init__(self, clade_time: CladeTime): """Tree constructor.""" self._clade_time = clade_time self.as_of = self._clade_time.tree_as_of - self._nextclade_data_url = self._clade_time._config.nextclade_data_url - self._nextclade_data_url_version = self._clade_time._config.nextclade_data_url_version - self._tree_name = self._clade_time._config.nextclade_input_tree_name + self._config = self._clade_time._config + self._nextclade_data_url = self._config.nextclade_data_url + self._nextclade_data_url_version = self._config.nextclade_data_url_version + self._tree_name = self._config.nextclade_input_tree_name + + # Nextstrain began publishing ncov pipeline metadata starting on 2024-08-01 + if self.as_of >= self._config.nextstrain_min_ncov_metadata_date: + self.url_ncov_metadata = _get_s3_object_url( + self._config.nextstrain_ncov_bucket, self._config.nextstrain_ncov_metadata_key, self.as_of + )[1] + else: + self.url_ncov_metadata = None + self._ncov_metadata = self.ncov_metadata self._url = self.url def __repr__(self): @@ -47,6 +57,19 @@ def __repr__(self): def __str__(self): return f"Represents Nexclade reference tree data as of {self.as_of.strftime('%Y-%m-%d')}" + @property + def ncov_metadata(self) -> dict: + """ + dict : Metadata from the Nextstrain pipeline run that corresponds + to as_of. + """ + if self.url_ncov_metadata: + metadata = sequence._get_ncov_metadata(self.url_ncov_metadata) + return metadata + else: + metadata = {} + return metadata + @property def url(self) -> str: """ @@ -100,7 +123,7 @@ def _get_tree_url(self): # we can only reliably retrieve the a past reference tree if we # have access to the ncov metadata for that date - min_tree_as_of = self._clade_time._config.nextstrain_min_ncov_metadata_date + min_tree_as_of = self._config.nextstrain_min_ncov_metadata_date if min_tree_as_of > self.as_of: logger.error("Reference tree not available", tree_as_of=self.as_of) raise TreeNotAvailableError( @@ -108,7 +131,7 @@ def _get_tree_url(self): ) # get the ncov metadata as of the CladeTime's tree_as_of date - url_ncov_metadata = self._get_url_ncov_metadata() + url_ncov_metadata = self.url_ncov_metadata if url_ncov_metadata is None: logger.error("Reference tree not available", tree_as_of=self.clade_time.tree_as_of) @@ -125,14 +148,6 @@ def _get_tree_url(self): ) return tree_url - def _get_url_ncov_metadata(self): - """Get the URL to the ncov metadata file for the tree_as_of date.""" - return _get_s3_object_url( - self._clade_time._config.nextstrain_ncov_bucket, - self._clade_time._config.nextstrain_ncov_metadata_key, - self.as_of, - )[1] - def _get_reference_tree(self) -> dict: """Return a reference tree used for SARS-CoV-2 clade assignments @@ -147,18 +162,16 @@ def _get_reference_tree(self) -> dict: A Python dictionary that represents the reference tree. """ # get the ncov metadata as of the CladeTime's tree_as_of date - url_ncov_metadata = self._get_url_ncov_metadata() - if url_ncov_metadata is None: + if self.url_ncov_metadata is None: logger.error("Reference tree not available", tree_as_of=self.as_of) raise TreeNotAvailableError(f"Reference tree not available for {self.as_of}") - ncov_metadata = _get_ncov_metadata(url_ncov_metadata) - nextclade_version_num = ncov_metadata.get("nextclade_version_num", "") - nextclade_dataset_name = ncov_metadata.get("nextclade_dataset_name", "") - nextclade_dataset_version = ncov_metadata.get("nextclade_dataset_version", "") + nextclade_version_num = self.ncov_metadata.get("nextclade_version_num", "") + nextclade_dataset_name = self.ncov_metadata.get("nextclade_dataset_name", "") + nextclade_dataset_version = self.ncov_metadata.get("nextclade_dataset_version", "") if not all([nextclade_version_num, nextclade_dataset_name, nextclade_dataset_version]): logger.error("Incomplete ncov metadata", tree_as_of=self._clade_time.tree_as_of) - raise TreeNotAvailableError(f"Incomplete ncov metadata {ncov_metadata}") + raise TreeNotAvailableError(f"Incomplete ncov metadata {self.ncov_metadata}") with tempfile.TemporaryDirectory() as tmpdir: nextclade_dataset = _get_nextclade_dataset( diff --git a/tests/integration/test_tree.py b/tests/integration/test_tree.py index 96cd3c8..460f027 100644 --- a/tests/integration/test_tree.py +++ b/tests/integration/test_tree.py @@ -1,3 +1,4 @@ +from datetime import datetime from urllib.parse import urlparse import pytest @@ -24,6 +25,18 @@ def test__get_tree_url_bad_date(): Tree(CladeTime(tree_as_of="2024-07-13")) +def test_tree_ncov_metadata(): + with freeze_time("2024-11-05 16:21:34"): + # when tree_as_of <> sequence_as_of, the respective ncov_metadata + # properties of CladeTime and Tree may differ + ct = CladeTime(sequence_as_of=datetime.now(), tree_as_of="2024-08-02") + tree = Tree(ct) + assert tree.ncov_metadata.get("nextclade_version_num") == "3.8.2" + assert tree.ncov_metadata.get("nextclade_dataset_version") == "2024-07-17--12-57-03Z" + assert ct.ncov_metadata.get("nextclade_version_num") == "3.9.1" + assert ct.ncov_metadata.get("nextclade_dataset_version") == "2024-10-17--16-48-48Z" + + @pytest.mark.skipif(not docker_enabled, reason="Docker is not installed") def test__get_reference_tree(): with freeze_time("2024-08-13 16:21:34"): From d516677248900e02583d1ae0b9765168ca04d260 Mon Sep 17 00:00:00 2001 From: Becky Sweger Date: Wed, 6 Nov 2024 16:03:36 -0500 Subject: [PATCH 02/13] Use "strain" as the id for filtering sequences Still in the NCBI mindset, earlier versions of sequence.filter used accession numbers to compare .fasta records to a set of sequence "ids". However, for the processed Nextstrain sequences, we need to use the "strain" column --- src/cladetime/assign_clades.py | 3 +- src/cladetime/sequence.py | 59 +++++++++++++++++---------------- tests/data/test_metadata.tsv | 60 +++++++++++++++++----------------- tests/unit/test_sequence.py | 17 ++++------ 4 files changed, 68 insertions(+), 71 deletions(-) diff --git a/src/cladetime/assign_clades.py b/src/cladetime/assign_clades.py index 6ac5171..b74b383 100644 --- a/src/cladetime/assign_clades.py +++ b/src/cladetime/assign_clades.py @@ -53,8 +53,7 @@ def get_sequence_metadata(metadata: pl.DataFrame, sequence_collection_date: date "country", "date", "division", - "genbank_accession", - "genbank_accession_rev", + "strain", "host", ] diff --git a/src/cladetime/sequence.py b/src/cladetime/sequence.py index 1fa79b4..07d19fc 100644 --- a/src/cladetime/sequence.py +++ b/src/cladetime/sequence.py @@ -26,6 +26,7 @@ def _download_from_url(session: Session, url: str, data_path: Path) -> Path: parsed_url = urlparse(url) url_filename = os.path.basename(parsed_url.path) + data_path.mkdir(parents=True, exist_ok=True) filename = data_path / url_filename with session.get(url, stream=True) as result: @@ -131,8 +132,7 @@ def filter_metadata( cols : list Optional. A list of columns to include in the filtered metadata. The default columns included in the filtered metadata are: - clade_nextstrain, country, date, division, genbank_accession, - genbank_accession_rev, host + clade_nextstrain, country, date, division, strain, host state_format : :class:`cladetime.types.StateFormat` Optional. The state name format returned in the filtered metadata's location column. Defaults to `StateFormat.ABBR` @@ -167,19 +167,19 @@ def filter_metadata( >>> filtered_metadata = filter_covid_genome_metadata(ct.sequence_metadata) >>> filtered_metadata.collect().head(5) shape: (5, 7) - ┌───────┬─────────┬────────────┬────────────┬────────────┬──────────────┬──────┬ - │ clade ┆ country ┆ date ┆ genbank_ ┆ genbank_ac ┆ host ┆ loca │ - │ ┆ ┆ ┆ accession ┆ cession_rev┆ ┆ tion │ - │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ - │ str ┆ str ┆ date ┆ str ┆ str ┆ str ┆ str │ - │ ┆ ┆ ┆ ┆ ┆ ┆ │ - ╞═══════╪═════════╪════════════╪════════════╪════════════╪══════════════╪══════╡ - │ 22A ┆ USA ┆ 2022-07-07 ┆ PP223234 ┆ PP223234.1 ┆ Homo sapiens ┆ AL │ - │ 22B ┆ USA ┆ 2022-07-02 ┆ PP223435 ┆ PP223435.1 ┆ Homo sapiens ┆ AZ │ - │ 22B ┆ USA ┆ 2022-07-19 ┆ PP223235 ┆ PP223235.1 ┆ Homo sapiens ┆ AZ │ - │ 22B ┆ USA ┆ 2022-07-15 ┆ PP223236 ┆ PP223236.1 ┆ Homo sapiens ┆ AZ │ - │ 22B ┆ USA ┆ 2022-07-20 ┆ PP223237 ┆ PP223237.1 ┆ Homo sapiens ┆ AZ │ - └───────┴─────────┴────────────┴────────────┴────────────┴─────────────────────┴ + ┌───────┬─────────┬────────────┬────────────────────────────┬──────────────┬──────┬ + │ clade ┆ country ┆ date ┆ strain ┆ host ┆ loca │ + │ ┆ ┆ ┆ ┆ ┆ tion │ + │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + │ str ┆ str ┆ date ┆ str ┆ str ┆ str │ + │ ┆ ┆ ┆ ┆ ┆ │ + ╞═══════╪═════════╪════════════╪════════════════════════════╪══════════════╪══════╡ + │ 22A ┆ USA ┆ 2022-07-07 ┆ Alabama/SEARCH-202312/2022 ┆ Homo sapiens ┆ AL │ + │ 22B ┆ USA ┆ 2022-07-02 ┆ Arizona/SEARCH-201153/2022 ┆ Homo sapiens ┆ AZ │ + │ 22B ┆ USA ┆ 2022-07-19 ┆ Arizona/SEARCH-203528/2022 ┆ Homo sapiens ┆ AZ │ + │ 22B ┆ USA ┆ 2022-07-15 ┆ Arizona/SEARCH-203621/2022 ┆ Homo sapiens ┆ AZ │ + │ 22B ┆ USA ┆ 2022-07-20 ┆ Arizona/SEARCH-203625/2022 ┆ Homo sapiens ┆ AZ │ + └───────┴─────────┴────────────┴────────────────────────────┴─────────────────────┴ """ if state_format not in StateFormat: raise ValueError(f"Invalid state_format. Must be one of: {list(StateFormat.__members__.items())}") @@ -191,8 +191,7 @@ def filter_metadata( "country", "date", "division", - "genbank_accession", - "genbank_accession_rev", + "strain", "host", ] @@ -256,7 +255,8 @@ def get_metadata_ids(sequence_metadata: pl.DataFrame | pl.LazyFrame) -> set: """Return sequence IDs for a specified set of Nextstrain sequence metadata. For a given input of GenBank-based SARS-Cov-2 sequence metadata (as - published by Nextstrain), return a set of GenBank accession numbers. + published by Nextstrain), return a set of strains. This function is + mostly used to filter a sequence file. Parameters ---------- @@ -265,21 +265,23 @@ def get_metadata_ids(sequence_metadata: pl.DataFrame | pl.LazyFrame) -> set: Returns ------- set - A set of GenBank accession numbers + A set of + :external:doc:`strains` Raises ------ ValueError - If the sequence metadata does not contain a genbank_accession column + If the sequence metadata does not contain a strain column """ + logger.info("Collecting sequence IDs from metadata") metadata_columns = sequence_metadata.collect_schema().names() - if "genbank_accession" not in metadata_columns: - logger.error("Missing column from sequence_metadata", column="genbank_accession") - raise ValueError("Sequence metadata does not contain a genbank_accession column.") - sequences = sequence_metadata.select("genbank_accession").unique() + if "strain" not in metadata_columns: + logger.error("Missing column from sequence_metadata", column="strain") + raise ValueError("Sequence metadata does not contain a strain column.") + sequences = sequence_metadata.select("strain").unique() if isinstance(sequence_metadata, pl.LazyFrame): sequences = sequences.collect() # type: ignore - seq_set = set(sequences["genbank_accession"].to_list()) # type: ignore + seq_set = set(sequences["strain"].to_list()) # type: ignore return seq_set @@ -302,17 +304,18 @@ def parse_sequence_assignments(df_assignments: pl.DataFrame) -> pl.DataFrame: return df_assignments +@time_function def filter(sequence_ids: set, url_sequence: str, output_path: Path) -> Path: """Filter a fasta file against a specific set of sequences. Download a sequence file (in FASTA format) from Nexstrain, filter - it against a set of specific sequence ids (GenBank accession numbers), - and write the filtered sequences to a new file. + it against a set of specific strains, and write the filtered + sequences to a new file. Parameters ---------- sequence_ids : set - GenBank accession numbers used to filter the sequence file + Strains used to filter the sequence file url_sequence : str The URL to a file of SARS-CoV-2 GenBank sequences published by Nexstrain. The file is should be in .fasta format using the lzma compression diff --git a/tests/data/test_metadata.tsv b/tests/data/test_metadata.tsv index 1021de5..5b15600 100644 --- a/tests/data/test_metadata.tsv +++ b/tests/data/test_metadata.tsv @@ -1,30 +1,30 @@ -genbank_accession genbank_accession_rev unwanted_column date host country division clade_nextstrain location another unwanted column -abc abc.1 i ❤️ wombats 2024-09-01 Homo sapiens USA Massachusetts AA.ZZ Vulcan hummus a tune -abc abc.1 i ❤️ wombats 2024-09-01 Homo sapiens USA Massachusetts AA.ZZ Vulcan hummus a tune -def def.1 i ❤️ wombats 2024-09-01 Homo sapiens USA Massachusetts AA.ZZ Earth hummus a tune -ghi ghi.4 i ❤️ wombats 2024-09-01 Homo sapiens USA Utah BB Cardassia hummus a tune -jkl jkl.1 i ❤️ wombats 2024-09-01 Homo sapiens USA Utah CC Bajor hummus a tune -mno mno.1 i ❤️ wombats 2024-09-01 Homo sapiens Canada Alberta DD Vulcan hummus a tune -mno mno.1 i ❤️ wombats 2024-09-01 marmots USA Massachusetts DD Vulcan hummus a tune -mno mno.1 i ❤️ wombats 2024-09-01 Homo sapiens USA Puerto Rico DD Reisa hummus a tune -abc abc.1 i ❤️ wombats 2024-09-08 Homo sapiens USA Massachusetts EE Vulcan hummus a tune -abc abc.1 i ❤️ wombats 2024-09-08 Homo sapiens USA Massachusetts EE Vulcan hummus a tune -def def.1 i ❤️ wombats 2024-09-08 Homo sapiens USA Massachusetts DD Earth hummus a tune -ghi ghi.4 i ❤️ wombats 2024-09-08 Homo sapiens USA Utah AA Cardassia hummus a tune -jkl jkl.1 i ❤️ wombats 2024-09-08 Homo sapiens USA Utah AA.ZZ Bajor hummus a tune -abc abc.1 i ❤️ wombats 2024-09-15 Homo sapiens USA Massachusetts AA Vulcan hummus a tune -abc abc.1 i ❤️ wombats 2024-09-15 Homo sapiens USA Massachusetts AA Vulcan hummus a tune -def def.1 i ❤️ wombats 2024-09-15 Homo sapiens USA Massachusetts AA Earth hummus a tune -ghi ghi.4 i ❤️ wombats 2024-09-15 Homo sapiens USA Utah BB Cardassia hummus a tune -jkl jkl.1 i ❤️ wombats 2024-09-15 Homo sapiens USA Utah CC Bajor hummus a tune -mno mno.1 i ❤️ wombats 2024-09-15 Homo sapiens Canada Mississippi DD Earth hummus a tune -mno mno.1 i ❤️ wombats 2024-09-15 marmots USA Massachusetts DD Cardassia hummus a tune -mno mno.1 i ❤️ wombats 2024-09-15 Homo sapiens USA Puerto Rico DD Bajor hummus a tune -abcd abcd.1 i ❤️ wombats 2024-09-22 Homo sapiens USA Massachusetts FF Vulcan hummus a tune -abc abc.1 i ❤️ wombats 2024-09-22 Homo sapiens USA Massachusetts AA Vulcan hummus a tune -def def.1 i ❤️ wombats 2024-09-22 Homo sapiens USA Massachusetts AA Earth hummus a tune -ghi ghi.4 i ❤️ wombats 2024-09-22 Homo sapiens USA Utah BB Cardassia hummus a tune -jkl jkl.1 i ❤️ wombats 2024-09-22 Homo sapiens USA Utah CC Bajor hummus a tune -mno mno.1 i ❤️ wombats 2024-09-22 Homo sapiens Canada Mississippi FF Earth hummus a tune -mno mno.1 i ❤️ wombats 2024-09-22 marmots USA Massachusetts FF Cardassia hummus a tune -mno mno.1 i ❤️ wombats 2024-09-22 Homo sapiens USA Guam FF Bajor hummus a tune +strain unwanted_column date host country division clade_nextstrain location another unwanted column +Abc/SEARCH-123/2022 i ❤️ wombats 2024-09-01 Homo sapiens USA Massachusetts AA.ZZ Vulcan hummus a tune +Abc/VULCAN-123/2022 i ❤️ wombats 2024-09-01 Homo sapiens USA Massachusetts AA.ZZ Vulcan hummus a tune +Def/VULCAN-XXX/3024 i ❤️ wombats 2024-09-01 Homo sapiens USA Massachusetts AA.ZZ Earth hummus a tune +Cardassia/SEARCH-123/2000 i ❤️ wombats 2024-09-01 Homo sapiens USA Utah BB Cardassia hummus a tune +Bajor/STRAIN-789/2450 i ❤️ wombats 2024-09-01 Homo sapiens USA Utah CC Bajor hummus a tune +Canada/STRAIN-WWW/2022 i ❤️ wombats 2024-09-01 Homo sapiens Canada Alberta DD Vulcan hummus a tune +Massachusetts/SEARCH-123/2022 i ❤️ wombats 2024-09-01 marmots USA Massachusetts DD Vulcan hummus a tune +PR/STRAIN-QQQ/2022 i ❤️ wombats 2024-09-01 Homo sapiens USA Puerto Rico DD Reisa hummus a tune +Massachusetts/SEARCH-123/2024 i ❤️ wombats 2024-09-08 Homo sapiens USA Massachusetts EE Vulcan hummus a tune +Massachusetts/SEARCH-123/2025 i ❤️ wombats 2024-09-08 Homo sapiens USA Massachusetts EE Vulcan hummus a tune +Massachusetts/SEARCH-123/2026 i ❤️ wombats 2024-09-08 Homo sapiens USA Massachusetts DD Earth hummus a tune +Cardassia/STRAIN-EEE/3001 i ❤️ wombats 2024-09-08 Homo sapiens USA Utah AA Cardassia hummus a tune +Utah/STRAIN-123/2022 i ❤️ wombats 2024-09-08 Homo sapiens USA Utah AA.ZZ Bajor hummus a tune +Massachusetts/SEARCH-123/2027 i ❤️ wombats 2024-09-15 Homo sapiens USA Massachusetts AA Vulcan hummus a tune +Massachusetts/SEARCH-123/2028 i ❤️ wombats 2024-09-15 Homo sapiens USA Massachusetts AA Vulcan hummus a tune +Massachusetts/SEARCH-123/2029 i ❤️ wombats 2024-09-15 Homo sapiens USA Massachusetts AA Earth hummus a tune +Utah/STRAIN-456/2022 i ❤️ wombats 2024-09-15 Homo sapiens USA Utah BB Cardassia hummus a tune +Utah/STRAIN-456/2023 i ❤️ wombats 2024-09-15 Homo sapiens USA Utah CC Bajor hummus a tune +Canada!/SEARCH-123/2022 i ❤️ wombats 2024-09-15 Homo sapiens Canada Mississippi DD Earth hummus a tune +Massachusetts/SEARCH-456/2025 i ❤️ wombats 2024-09-15 marmots USA Massachusetts DD Cardassia hummus a tune +PR/SEARCH-123/2030 i ❤️ wombats 2024-09-15 Homo sapiens USA Puerto Rico DD Bajor hummus a tune +Massachusetts/SEARCH-456/2026 i ❤️ wombats 2024-09-22 Homo sapiens USA Massachusetts FF Vulcan hummus a tune +Massachusetts/SEARCH-456/2027 i ❤️ wombats 2024-09-22 Homo sapiens USA Massachusetts AA Vulcan hummus a tune +Massachusetts/SEARCH-456/2028 i ❤️ wombats 2024-09-22 Homo sapiens USA Massachusetts AA Earth hummus a tune +Utah/STRAIN-456/2031 i ❤️ wombats 2024-09-22 Homo sapiens USA Utah BB Cardassia hummus a tune +Utah/STRAIN-456/2032 i ❤️ wombats 2024-09-22 Homo sapiens USA Utah CC Bajor hummus a tune +CanadaAgain/STRAIN-JJJ/XXXX i ❤️ wombats 2024-09-22 Homo sapiens Canada Mississippi FF Earth hummus a tune +Massachusetts/SEARCH-456/2029 i ❤️ wombats 2024-09-22 marmots USA Massachusetts FF Cardassia hummus a tune +Guam/SEARCH-123/2022 i ❤️ wombats 2024-09-22 Homo sapiens USA Guam FF Bajor hummus a tune diff --git a/tests/unit/test_sequence.py b/tests/unit/test_sequence.py index f6b8e49..035a2da 100644 --- a/tests/unit/test_sequence.py +++ b/tests/unit/test_sequence.py @@ -46,8 +46,7 @@ def test_get_metadata(test_file_path, metadata_file): "country", "division", "clade_nextstrain", - "genbank_accession", - "genbank_accession_rev", + "strain", } assert expected_cols.issubset(metadata_cols) @@ -81,8 +80,7 @@ def test_filter_metadata(): "division": ["Alaska", "Maine", "Guam", "Puerto Rico", "Utah", "Washington DC", "Pennsylvania"], "clade_nextstrain": ["AAA", "BBB", "CCC", "DDD", "EEE", "FFF", "FFF"], "location": ["Vulcan", "Reisa", "Bajor", "Deep Space 9", "Earth", "Cardassia", "Cardassia"], - "genbank_accession": ["A1", "A2", "B1", "B2", "C1", "C2", "C2"], - "genbank_accession_rev": ["A1.1", "A2.4", "B1.1", "B2.5", "C1.1", "C2.1", "C2.1"], + "strain": ["A1", "A2", "B1", "B2", "C1", "C2", "C2"], "unwanted_column": [1, 2, 3, 4, 5, 6, 7], } @@ -101,8 +99,7 @@ def test_filter_metadata(): "clade": pl.String, "country": pl.String, "date": pl.Date, - "genbank_accession": pl.String, - "genbank_accession_rev": pl.String, + "strain": pl.String, "host": pl.String, "location": pl.String, } @@ -118,8 +115,7 @@ def test_filter_metadata_state_name(): "country": ["USA"] * num_test_rows, "clade_nextstrain": ["AAA"] * num_test_rows, "location": ["Earth"] * num_test_rows, - "genbank_accession": ["A1"] * num_test_rows, - "genbank_accession_rev": ["A1.1"] * num_test_rows, + "strain": ["A1"] * num_test_rows, "division": ["Alaska", "Puerto Rico", "Washington DC", "Fake State"], } @@ -142,8 +138,7 @@ def test_filter_metadata_state_fips(): "country": ["USA"] * num_test_rows, "clade_nextstrain": ["AAA"] * num_test_rows, "location": ["Earth"] * num_test_rows, - "genbank_accession": ["A1"] * num_test_rows, - "genbank_accession_rev": ["A1.1"] * num_test_rows, + "strain": ["A1"] * num_test_rows, "division": ["Massachusetts", "Puerto Rico", "Washington DC", "Fake State"], } @@ -160,7 +155,7 @@ def test_filter_metadata_state_fips(): def test_get_metadata_ids(): metadata = { - "genbank_accession": ["A1", "A2", "A2", "A4"], + "strain": ["A1", "A2", "A2", "A4"], "country": ["USA", "Canada", "Mexico", "Brazil"], "location": ["Earth", "Earth", "Earth", "Earth"], } From 477e01efd421021de433b592dfd13a193c2fd2e3 Mon Sep 17 00:00:00 2001 From: Becky Sweger Date: Thu, 7 Nov 2024 11:38:32 -0500 Subject: [PATCH 03/13] Make the integration test_file_path fixture shareable --- tests/integration/conftest.py | 12 ++++++++++++ tests/integration/test_nextclade_integration.py | 10 ---------- 2 files changed, 12 insertions(+), 10 deletions(-) create mode 100644 tests/integration/conftest.py diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py new file mode 100644 index 0000000..a45f063 --- /dev/null +++ b/tests/integration/conftest.py @@ -0,0 +1,12 @@ +from pathlib import Path + +import pytest + + +@pytest.fixture +def test_file_path() -> Path: + """ + Return path to the unit test files. + """ + test_file_path = Path(__file__).parents[1].joinpath("data") + return test_file_path diff --git a/tests/integration/test_nextclade_integration.py b/tests/integration/test_nextclade_integration.py index cdcf7bf..c67e095 100644 --- a/tests/integration/test_nextclade_integration.py +++ b/tests/integration/test_nextclade_integration.py @@ -1,5 +1,4 @@ import zipfile -from pathlib import Path import polars as pl import pytest @@ -9,15 +8,6 @@ docker_enabled = _docker_installed() -@pytest.fixture -def test_file_path() -> Path: - """ - Return path to the unit test files. - """ - test_file_path = Path(__file__).parents[1].joinpath("data") - return test_file_path - - @pytest.mark.skipif(not docker_enabled, reason="Docker is not installed") def test_get_nextclade_dataset(tmp_path): dataset_path = _get_nextclade_dataset("latest", "sars-cov-2", "2024-07-17--12-57-03Z", tmp_path) From b03f01e3183c0e905213ecfea3718d05d0183763 Mon Sep 17 00:00:00 2001 From: Becky Sweger Date: Thu, 7 Nov 2024 14:13:42 -0500 Subject: [PATCH 04/13] Simplify path handling when interacting with docker --- src/cladetime/util/reference.py | 29 ++++++++++--------- .../integration/test_nextclade_integration.py | 8 ++--- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/src/cladetime/util/reference.py b/src/cladetime/util/reference.py index 6c6984c..052eaba 100644 --- a/src/cladetime/util/reference.py +++ b/src/cladetime/util/reference.py @@ -71,7 +71,7 @@ def _get_s3_object_url(bucket_name: str, object_key: str, date: datetime) -> Tup def _run_nextclade_cli( - nextclade_cli_version: str, nextclade_command: list[str], output_file: Path, input_files: list[Path] | None = None + nextclade_cli_version: str, nextclade_command: list[str], output_path: Path, input_files: list[Path] | None = None ) -> Path: """Invoke Nextclade CLI commands via Docker.""" @@ -83,7 +83,6 @@ def _run_nextclade_cli( "Unable to create client for Nextstrain CLI. Is Docker installed and running?" ) from err - output_path = output_file.parent volumes = {str(output_path): {"bind": "/data/", "mode": "rw"}} # if the nextclade command requires input files, add those to the volumes @@ -92,9 +91,10 @@ def _run_nextclade_cli( for file in input_files: volumes[str(file)] = {"bind": f"/data/{file.name}", "mode": "rw"} + image = f"nextstrain/nextclade:{nextclade_cli_version}" try: client.containers.run( - image=f"nextstrain/nextclade:{nextclade_cli_version}", + image=image, command=nextclade_command, volumes=volumes, remove=True, @@ -104,14 +104,13 @@ def _run_nextclade_cli( msg = "Error running Nextclade CLI via Docker" logger.error( msg, - cli_version=nextclade_cli_version, + image=image, command=nextclade_command, + volumes=volumes, error=err, ) raise NextcladeNotAvailableError(msg) from err - return output_file - def _get_nextclade_dataset( nextclade_cli_version: str, dataset_name: str, dataset_version: str, output_path: Path @@ -159,13 +158,13 @@ def _get_nextclade_dataset( f"/data/{zip_filename}", ] - _run_nextclade_cli(nextclade_cli_version, command, output_file) + _run_nextclade_cli(nextclade_cli_version, command, output_path) return output_file def _get_clade_assignments( - nextclade_cli_version: str, sequence_file: Path, nextclade_dataset: Path, output_path: Path + nextclade_cli_version: str, sequence_file: Path, nextclade_dataset: Path, output_file: Path ) -> Path: """Assign clades to sequences using the Nextclade CLI. @@ -186,8 +185,8 @@ def _get_clade_assignments( that contains the reference tree and root sequence to use for clade assignment. Use :func:`get_nextclade_dataset` to get a dataset that corresponds to a specific point in time. - output_path : pathlib.Path - Where to save the clade assignment file + output_file : pathlib.Path + The full filename to use for saving the clade assignment output. Returns ------- @@ -202,9 +201,11 @@ def _get_clade_assignments( If there is an error creating a Docker client or running Nextclade CLI commands using the Docker image. """ - assignment_filename = "nextclade_assignment.csv" - output_file = output_path / assignment_filename - output_path.parent.mkdir(parents=True, exist_ok=True) + if not output_file.suffix: + raise ValueError("output_file should be a full path to the output file, including filename") + output_path = output_file.parent + output_path.mkdir(parents=True, exist_ok=True) + assignment_filename = output_file.name # all files in the input_files list will be mounted to # the docker image's "/data/" directory when running @@ -222,6 +223,6 @@ def _get_clade_assignments( f"/data/{sequence_file.name}", ] - _run_nextclade_cli(nextclade_cli_version, command, output_file, input_files=input_files) + _run_nextclade_cli(nextclade_cli_version, command, output_path, input_files=input_files) return output_file diff --git a/tests/integration/test_nextclade_integration.py b/tests/integration/test_nextclade_integration.py index c67e095..b667781 100644 --- a/tests/integration/test_nextclade_integration.py +++ b/tests/integration/test_nextclade_integration.py @@ -31,9 +31,9 @@ def test_get_clade_assignments(test_file_path, tmp_path): sequence_file = test_file_path / "test_sequences.fasta" nextclade_dataset = test_file_path / "test_nextclade_dataset.zip" # _get_clade_assignments should create the output directory if it doesn't exist - output_path = tmp_path / "clade_assignments" + output_file = tmp_path / "clade_assignments" / "nextclade_assignments.csv" - assignment_file = _get_clade_assignments("latest", sequence_file, nextclade_dataset, output_path) + assignment_file = _get_clade_assignments("latest", sequence_file, nextclade_dataset, output_file) assignment_df = pl.read_csv(assignment_file, separator=";").select( ["seqName", "clade", "clade_nextstrain", "Nextclade_pango"] ) @@ -49,9 +49,9 @@ def test_get_clade_assignments_no_matches(test_file_path, tmp_path): sequence_file = test_file_path / "test_sequences_fake.fasta" nextclade_dataset = test_file_path / "test_nextclade_dataset.zip" # _get_clade_assignments should create the output directory if it doesn't exist - output_path = tmp_path / "clade_assignments" + output_file = tmp_path / "clade_assignments" / "nextclade_assignments.csv" - assignment_file = _get_clade_assignments("latest", sequence_file, nextclade_dataset, output_path) + assignment_file = _get_clade_assignments("latest", sequence_file, nextclade_dataset, output_file) assignment_df = pl.read_csv(assignment_file, separator=";").select( ["seqName", "clade", "clade_nextstrain", "Nextclade_pango"] ) From 049ec1bdd7757463df2c0cdc4987d842af71f5ea Mon Sep 17 00:00:00 2001 From: Becky Sweger Date: Thu, 7 Nov 2024 17:10:34 -0500 Subject: [PATCH 05/13] Fix circular import / change the signature of Tree We will need to instantiate a Tree object from CladeTime when assigning clade sequences. Thus, we shouldn't use CladeTime objects to do this because circulate dependencies --- pyproject.toml | 4 ++++ src/cladetime/__init__.py | 3 ++- src/cladetime/tree.py | 23 +++++++++++++++-------- tests/integration/test_tree.py | 13 ++++++++----- 4 files changed, 29 insertions(+), 14 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2545c82..dd60a8a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -86,6 +86,10 @@ testpaths = [ line-length = 120 lint.extend-select = ["I"] +[tool.ruff.lint.per-file-ignores] +# Ignore import formatting rules in `__init__.py` +"__init__.py" = ["I001"] + [tools.setuptools] packages = ["cladetime"] diff --git a/src/cladetime/__init__.py b/src/cladetime/__init__.py index 6885143..73e36d0 100644 --- a/src/cladetime/__init__.py +++ b/src/cladetime/__init__.py @@ -3,8 +3,9 @@ import structlog -from cladetime.cladetime import CladeTime +# must import Tree before CladeTime from cladetime.tree import Tree +from cladetime.cladetime import CladeTime from cladetime.util.reference import _docker_installed __all__ = ["CladeTime", "Tree"] diff --git a/src/cladetime/tree.py b/src/cladetime/tree.py index 35ec93f..2785d3a 100644 --- a/src/cladetime/tree.py +++ b/src/cladetime/tree.py @@ -3,14 +3,20 @@ import json import tempfile import zipfile +from datetime import datetime from pathlib import Path from urllib.parse import urljoin import structlog -from cladetime import CladeTime, sequence +from cladetime import sequence from cladetime.exceptions import NextcladeNotAvailableError, TreeNotAvailableError -from cladetime.util.reference import _docker_installed, _get_nextclade_dataset, _get_s3_object_url +from cladetime.util.config import Config +from cladetime.util.reference import ( + _docker_installed, + _get_nextclade_dataset, + _get_s3_object_url, +) from cladetime.util.sequence import _get_ncov_metadata logger = structlog.get_logger() @@ -31,22 +37,23 @@ class Tree: Nextstrain reference tree represented by this Tree instance. """ - def __init__(self, clade_time: CladeTime): + def __init__(self, tree_as_of: datetime, url_sequence: str): """Tree constructor.""" - self._clade_time = clade_time - self.as_of = self._clade_time.tree_as_of - self._config = self._clade_time._config + self._config = Config() + self.as_of = tree_as_of + self.url_sequence = url_sequence self._nextclade_data_url = self._config.nextclade_data_url self._nextclade_data_url_version = self._config.nextclade_data_url_version self._tree_name = self._config.nextclade_input_tree_name # Nextstrain began publishing ncov pipeline metadata starting on 2024-08-01 - if self.as_of >= self._config.nextstrain_min_ncov_metadata_date: + min_tree_date = self._config.nextstrain_min_ncov_metadata_date + if self.as_of >= min_tree_date: self.url_ncov_metadata = _get_s3_object_url( self._config.nextstrain_ncov_bucket, self._config.nextstrain_ncov_metadata_key, self.as_of )[1] else: - self.url_ncov_metadata = None + raise TreeNotAvailableError(f"References tree not available for dates prior to {min_tree_date}") self._ncov_metadata = self.ncov_metadata self._url = self.url diff --git a/tests/integration/test_tree.py b/tests/integration/test_tree.py index 460f027..b118007 100644 --- a/tests/integration/test_tree.py +++ b/tests/integration/test_tree.py @@ -1,4 +1,4 @@ -from datetime import datetime +from datetime import datetime, timezone from urllib.parse import urlparse import pytest @@ -13,7 +13,8 @@ def test__get_tree_url(): with freeze_time("2024-08-13 16:21:34"): - tree = Tree(CladeTime()) + ct = CladeTime() + tree = Tree(ct.tree_as_of, ct.url_sequence) tree_url_parts = urlparse(tree.url) assert "2024-07-17--12-57-03Z" in tree_url_parts.path assert "tree.json" in tree_url_parts.path @@ -21,8 +22,9 @@ def test__get_tree_url(): def test__get_tree_url_bad_date(): # we cannot get reference trees prior to 2024-08-01 + ct = CladeTime() with pytest.raises(TreeNotAvailableError): - Tree(CladeTime(tree_as_of="2024-07-13")) + Tree(datetime(2024, 7, 13, tzinfo=timezone.utc), ct.url_sequence) def test_tree_ncov_metadata(): @@ -30,7 +32,7 @@ def test_tree_ncov_metadata(): # when tree_as_of <> sequence_as_of, the respective ncov_metadata # properties of CladeTime and Tree may differ ct = CladeTime(sequence_as_of=datetime.now(), tree_as_of="2024-08-02") - tree = Tree(ct) + tree = Tree(ct.tree_as_of, ct.url_sequence) assert tree.ncov_metadata.get("nextclade_version_num") == "3.8.2" assert tree.ncov_metadata.get("nextclade_dataset_version") == "2024-07-17--12-57-03Z" assert ct.ncov_metadata.get("nextclade_version_num") == "3.9.1" @@ -40,5 +42,6 @@ def test_tree_ncov_metadata(): @pytest.mark.skipif(not docker_enabled, reason="Docker is not installed") def test__get_reference_tree(): with freeze_time("2024-08-13 16:21:34"): - tree = Tree(CladeTime()) + ct = CladeTime() + tree = Tree(ct.tree_as_of, ct.url_sequence) assert tree.tree.get("meta", "").get("title", "").lower() == "sars-cov-2 phylogeny" From ba7ff4a6b8bcc4edaf90eb8da067e3ffa271eded Mon Sep 17 00:00:00 2001 From: Becky Sweger Date: Fri, 8 Nov 2024 10:32:30 -0500 Subject: [PATCH 06/13] Add collection date parameters to sequence.filter_metadata Adding these parameters allows additional filtering on sequence metadata for min and max collection dates. This is in support of clade assignemnts, where we'll only want to assign clades to a small subset of sequences based on their collection date. Behavior is unchanged if these new parameters aren't specified. --- src/cladetime/sequence.py | 19 ++++++++++++++++++- tests/unit/test_sequence.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/src/cladetime/sequence.py b/src/cladetime/sequence.py index 07d19fc..9f849f5 100644 --- a/src/cladetime/sequence.py +++ b/src/cladetime/sequence.py @@ -3,6 +3,7 @@ import lzma import os import re +from datetime import datetime from pathlib import Path from urllib.parse import urlparse @@ -108,7 +109,11 @@ def _get_ncov_metadata( def filter_metadata( - metadata: pl.DataFrame | pl.LazyFrame, cols: list | None = None, state_format: StateFormat = StateFormat.ABBR + metadata: pl.DataFrame | pl.LazyFrame, + cols: list | None = None, + state_format: StateFormat = StateFormat.ABBR, + collection_min_date: datetime | None = None, + collection_max_date: datetime | None = None, ) -> pl.DataFrame | pl.LazyFrame: """Apply standard filters to Nextstrain's SARS-CoV-2 sequence metadata. @@ -136,6 +141,12 @@ def filter_metadata( state_format : :class:`cladetime.types.StateFormat` Optional. The state name format returned in the filtered metadata's location column. Defaults to `StateFormat.ABBR` + collection_min_date : datetime.datetime | None + Optional. Return sequences collected on or after this date. + Defaults to None (no minimum date filter). + collection_max_date : datetime.datetime | None + Optional. Return sequences collected on or before this date. + Defaults to None (no maximum date filter). Returns ------- @@ -216,6 +227,12 @@ def filter_metadata( ) ) + # Apply filters for min and max sequence collection date, if applicable + if collection_min_date is not None: + filtered_metadata = filtered_metadata.filter(pl.col("date") >= collection_min_date) + if collection_max_date is not None: + filtered_metadata = filtered_metadata.filter(pl.col("date") <= collection_max_date) + # Create state mappings based on state_format parameter, including a DC alias, since # Nextrain's metadata uses a different name than the us package if state_format == StateFormat.FIPS: diff --git a/tests/unit/test_sequence.py b/tests/unit/test_sequence.py index 035a2da..4030463 100644 --- a/tests/unit/test_sequence.py +++ b/tests/unit/test_sequence.py @@ -1,4 +1,5 @@ import lzma +from datetime import datetime from pathlib import Path from unittest.mock import MagicMock, patch @@ -107,6 +108,34 @@ def test_filter_metadata(): assert actual_schema == expected_schema +@pytest.mark.parametrize( + "min_date, max_date, expected_rows", + [ + (datetime(2023, 1, 1), None, 2), + (None, datetime(2023, 1, 1), 2), + (datetime(2022, 1, 3), datetime(2023, 12, 25), 2), + ], +) +def test_filter_metadata_dates(min_date, max_date, expected_rows): + num_test_rows = 7 + test_genome_metadata = { + "date": ["2022-01-01", "2022-01-02", "2022-01-03", "2023-12-25", None, "2023-12-27", "2023-05"], + "host": ["Homo sapiens"] * num_test_rows, + "country": ["USA", "Argentina", "USA", "USA", "USA", "USA", "USA"], + "division": ["Massachusetts"] * num_test_rows, + "clade_nextstrain": ["AAA"] * num_test_rows, + "location": ["Earth"] * num_test_rows, + "strain": ["A1"] * num_test_rows, + } + + lf_metadata = pl.LazyFrame(test_genome_metadata) + lf_filtered = sequence.filter_metadata( + lf_metadata, collection_min_date=min_date, collection_max_date=max_date + ).collect() + + assert len(lf_filtered) == expected_rows + + def test_filter_metadata_state_name(): num_test_rows = 4 test_genome_metadata = { From 38c384de59a19c66c46e1cacd51fec3397500473 Mon Sep 17 00:00:00 2001 From: Becky Sweger Date: Fri, 8 Nov 2024 12:08:28 -0500 Subject: [PATCH 07/13] Move date validation function out of cladetime.py This will allow re-use of that function when working with collection begin/end dates in sequence assignment Additional test cases for date commit --- pyproject.toml | 2 +- src/cladetime/cladetime.py | 93 ++++++++++++++++++++------------- src/cladetime/exceptions.py | 2 +- src/cladetime/util/reference.py | 22 +++++++- tests/unit/test_cladetime.py | 49 ++++++++++++----- 5 files changed, 116 insertions(+), 52 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index dd60a8a..63408f1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,7 +74,7 @@ build-backend = "setuptools.build_meta" [tool.pytest.ini_options] tmp_path_retention_policy = "none" filterwarnings = [ - "ignore::cladetime.exceptions.CladeTimeFutureDateWarning", + "ignore::cladetime.exceptions.CladeTimeDateWarning", "ignore::DeprecationWarning", 'ignore:polars found a filename', ] diff --git a/src/cladetime/cladetime.py b/src/cladetime/cladetime.py index f9df23d..187d160 100644 --- a/src/cladetime/cladetime.py +++ b/src/cladetime/cladetime.py @@ -7,9 +7,9 @@ import structlog from cladetime import sequence -from cladetime.exceptions import CladeTimeFutureDateWarning, CladeTimeInvalidDateError, CladeTimeInvalidURLError +from cladetime.exceptions import CladeTimeDateWarning, CladeTimeInvalidURLError from cladetime.util.config import Config -from cladetime.util.reference import _get_s3_object_url +from cladetime.util.reference import _get_date, _get_s3_object_url logger = structlog.get_logger() @@ -34,7 +34,10 @@ class CladeTime: Sets the version of the Nextstrain reference tree that will be used by CladeTime. Can be a datetime object or a string in YYYY-MM-DD format, both of which will be treated as UTC. - The default value is :any:`sequence_as_of` + The default value is :any:`sequence_as_of`, + unless sequence_as_of is before reference tree availability + (2024-08-01), in which case tree_as_of will default to the + current time. Attributes ---------- @@ -87,14 +90,30 @@ def sequence_as_of(self) -> datetime: @sequence_as_of.setter def sequence_as_of(self, date) -> None: - sequence_as_of = self._validate_as_of_date(date) + min_sequence_date = self._config.nextstrain_min_seq_date + date_warning = False utc_now = datetime.now(timezone.utc) - if sequence_as_of > utc_now: - warnings.warn( - f"specified sequence_as_of is in the future, defaulting to current time: {utc_now}", - category=CladeTimeFutureDateWarning, - ) + + try: + sequence_as_of = _get_date(date) + except ValueError: + sequence_as_of = utc_now + date_warning = True + + if sequence_as_of < min_sequence_date: + sequence_as_of = utc_now + date_warning = True + elif sequence_as_of > utc_now: sequence_as_of = utc_now + date_warning = True + + if date_warning: + msg = ( + "\nSequence as_of cannot in the future and cannot be earlier than " + f"{min_sequence_date.strftime('%Y-%m-%d')}, defaulting to " + f"current date: {sequence_as_of.strftime('%Y-%m-%d')}" + ) + warnings.warn(msg, category=CladeTimeDateWarning) self._sequence_as_of = sequence_as_of @@ -109,17 +128,39 @@ def tree_as_of(self) -> datetime: @tree_as_of.setter def tree_as_of(self, date) -> None: + min_tree_date = self._config.nextstrain_min_ncov_metadata_date + date_warning = False + if date is None: tree_as_of = self.sequence_as_of else: - tree_as_of = self._validate_as_of_date(date) + try: + tree_as_of = _get_date(date) + except ValueError: + date_warning = True + default_field = "sequence_as_of" + tree_as_of = self.sequence_as_of + utc_now = datetime.now(timezone.utc) - if tree_as_of > utc_now: - warnings.warn( - f"specified tree_as_of is in the future, defaulting to sequence_as_of: {self.sequence_as_of}", - category=CladeTimeFutureDateWarning, - ) + if tree_as_of < min_tree_date and self.sequence_as_of < min_tree_date: + default_field = "current date" + date_warning = True + tree_as_of = utc_now + elif tree_as_of < min_tree_date: + default_field = "sequence_as_of" + date_warning = True tree_as_of = self.sequence_as_of + elif tree_as_of > utc_now: + default_field = "current date" + date_warning = True + tree_as_of = utc_now + if date_warning: + msg = ( + "\nTree as_of cannot in the future and cannot be earlier than " + f"{min_tree_date.strftime('%Y-%m-%d')}, defaulting to " + f"{default_field}: {tree_as_of.strftime('%Y-%m-%d')}" + ) + warnings.warn(msg, category=CladeTimeDateWarning) self._tree_as_of = tree_as_of @@ -169,25 +210,3 @@ def _get_config(self) -> Config: config = Config() return config - - def _validate_as_of_date(self, as_of: str) -> datetime: - """Validate an as_of date used to instantiate CladeTime. - - All dates used to instantiate CladeTime are assigned - a datetime tzinfo of UTC. - """ - if as_of is None: - as_of_date = datetime.now(timezone.utc) - elif isinstance(as_of, datetime): - as_of_date = as_of.replace(tzinfo=timezone.utc) - elif isinstance(as_of, str): - try: - as_of_date = datetime.strptime(as_of, "%Y-%m-%d").replace(tzinfo=timezone.utc) - except ValueError as e: - raise CladeTimeInvalidDateError(f"Invalid date string: {as_of} (should be in YYYY-MM-DD format)") from e - - as_of_date = as_of_date.replace(microsecond=0) - if as_of_date < self._config.nextstrain_min_seq_date: - raise CladeTimeInvalidDateError(f"Date must be after May 1, 2023: {as_of_date}") - - return as_of_date diff --git a/src/cladetime/exceptions.py b/src/cladetime/exceptions.py index dcd5a32..aba91d7 100644 --- a/src/cladetime/exceptions.py +++ b/src/cladetime/exceptions.py @@ -13,7 +13,7 @@ class CladeTimeInvalidURLError(Error): """Raised when CladeTime encounters an invalid URL.""" -class CladeTimeFutureDateWarning(Warning): +class CladeTimeDateWarning(Warning): """Raised when CladeTime as_of date is in the future.""" diff --git a/src/cladetime/util/reference.py b/src/cladetime/util/reference.py index 052eaba..1a55f1b 100644 --- a/src/cladetime/util/reference.py +++ b/src/cladetime/util/reference.py @@ -1,7 +1,7 @@ """Functions for retrieving and parsing SARS-CoV-2 phylogenic tree data.""" import subprocess -from datetime import datetime +from datetime import datetime, timezone from pathlib import Path from typing import Tuple @@ -17,6 +17,26 @@ logger = structlog.get_logger() +def _get_date(original_date: datetime | str | None) -> datetime: + """Validate an as_of date used to instantiate CladeTime. + + All CladeTime dates are assigned a datetime tzinfo of UTC. + """ + if original_date is None: + new_date = datetime.now(timezone.utc) + elif isinstance(original_date, datetime): + new_date = original_date.replace(tzinfo=timezone.utc) + elif isinstance(original_date, str): + try: + new_date = datetime.strptime(original_date, "%Y-%m-%d").replace(tzinfo=timezone.utc) + except ValueError as e: + raise ValueError(f"Invalid date format: {original_date}") from e + + new_date = new_date.replace(microsecond=0) + + return new_date + + def _docker_installed(): """Check if Docker is installed and running.""" try: diff --git a/tests/unit/test_cladetime.py b/tests/unit/test_cladetime.py index b534809..fd17484 100644 --- a/tests/unit/test_cladetime.py +++ b/tests/unit/test_cladetime.py @@ -7,7 +7,7 @@ from freezegun import freeze_time from cladetime.cladetime import CladeTime -from cladetime.exceptions import CladeTimeFutureDateWarning, CladeTimeInvalidDateError, CladeTimeInvalidURLError +from cladetime.exceptions import CladeTimeDateWarning, CladeTimeInvalidURLError, CladeTimeSequenceWarning def test_cladetime_no_args(): @@ -22,47 +22,72 @@ def test_cladetime_no_args(): "sequence_as_of, tree_as_of, expected_sequence_as_of, expected_tree_as_of", [ ( + # tree_as_of is prior to 2024-08-01, so should default to sequence_as_of + # (metadata for reference trees started publishing in Aug, 2024) "2024-09-01", "2024-01-01", datetime(2024, 9, 1, tzinfo=timezone.utc), - datetime(2024, 1, 1, tzinfo=timezone.utc), + datetime(2024, 9, 1, tzinfo=timezone.utc), ), ( + # sequence_as_of set to current date, tree_as_of defaults to sequence_as_of None, "2023-12-21", datetime(2025, 7, 13, 16, 21, 34, tzinfo=timezone.utc), - datetime(2023, 12, 21, tzinfo=timezone.utc), + datetime(2025, 7, 13, 16, 21, 34, tzinfo=timezone.utc), + ), + ( + # sequence_as_of set to current date, tree_as_of retains specified date + None, + "2024-09-01", + datetime(2025, 7, 13, 16, 21, 34, tzinfo=timezone.utc), + datetime(2024, 9, 1, tzinfo=timezone.utc), ), ( + # tree_as_of set to sequence_as_of datetime(2024, 9, 30, 18, 24, 59, 655398), None, datetime(2024, 9, 30, 18, 24, 59, tzinfo=timezone.utc), datetime(2024, 9, 30, 18, 24, 59, tzinfo=timezone.utc), ), ( - datetime(2024, 2, 22, 22, 22, 22, 222222, tzinfo=dateutil.tz.gettz("US/Eastern")), - datetime(2024, 2, 22, tzinfo=dateutil.tz.gettz("US/Eastern")), - datetime(2024, 2, 22, 22, 22, 22, tzinfo=timezone.utc), - datetime(2024, 2, 22, tzinfo=timezone.utc), + # cladetime ignores incoming timezone, converts everything to UTC + datetime(2024, 8, 22, 22, 22, 22, 222222, tzinfo=dateutil.tz.gettz("US/Eastern")), + datetime(2024, 8, 20, tzinfo=dateutil.tz.gettz("US/Eastern")), + datetime(2024, 8, 22, 22, 22, 22, tzinfo=timezone.utc), + datetime(2024, 8, 20, tzinfo=timezone.utc), ), ( + # sequence_as_of is prior to 2024-08-01, so tree_as_of + # defaults to current date "2023-12-21", None, datetime(2023, 12, 21, tzinfo=timezone.utc), - datetime(2023, 12, 21, tzinfo=timezone.utc), + datetime(2025, 7, 13, 16, 21, 34, tzinfo=timezone.utc), ), ( + # future dates revert to current date "2063-12-21", None, datetime(2025, 7, 13, 16, 21, 34, tzinfo=timezone.utc), datetime(2025, 7, 13, 16, 21, 34, tzinfo=timezone.utc), ), ( + # sequence and tree both have future dates, both + # revert to current date "2063-12-21", "2074-07-13", datetime(2025, 7, 13, 16, 21, 34, tzinfo=timezone.utc), datetime(2025, 7, 13, 16, 21, 34, tzinfo=timezone.utc), ), + ( + # tree_as_of is a bad date, but sequence_as_of is before + # 2024-08-01, so it should revert to current date + "2023-07-13", + "2074-07", + datetime(2023, 7, 13, tzinfo=timezone.utc), + datetime(2025, 7, 13, 16, 21, 34, tzinfo=timezone.utc), + ), ], ) def test_cladetime_as_of_dates(sequence_as_of, tree_as_of, expected_sequence_as_of, expected_tree_as_of): @@ -75,16 +100,16 @@ def test_cladetime_as_of_dates(sequence_as_of, tree_as_of, expected_sequence_as_ @pytest.mark.parametrize("bad_date", ["2020-07-13", "2022-12-32"]) def test_cladetime_invalid_date(bad_date): - with pytest.raises(CladeTimeInvalidDateError): + with pytest.warns(CladeTimeDateWarning): CladeTime(sequence_as_of=bad_date, tree_as_of=bad_date) def test_cladetime_future_date(): - with pytest.warns(CladeTimeFutureDateWarning): + with pytest.warns(CladeTimeDateWarning): CladeTime(sequence_as_of="2063-07-13") - with pytest.warns(CladeTimeFutureDateWarning): + with pytest.warns(CladeTimeDateWarning): CladeTime(tree_as_of="2063-07-13") - with pytest.warns(CladeTimeFutureDateWarning): + with pytest.warns(CladeTimeDateWarning): CladeTime(sequence_as_of="2023-12-31", tree_as_of="2063-07-13") From ed4dc436bd80e6147688986d9328c8ac6cff7882 Mon Sep 17 00:00:00 2001 From: Becky Sweger Date: Fri, 8 Nov 2024 14:59:30 -0500 Subject: [PATCH 08/13] clean up unused config fields --- src/cladetime/util/config.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/cladetime/util/config.py b/src/cladetime/util/config.py index 46dd437..2239074 100644 --- a/src/cladetime/util/config.py +++ b/src/cladetime/util/config.py @@ -1,4 +1,4 @@ -from dataclasses import InitVar, asdict, dataclass, field +from dataclasses import InitVar, asdict, dataclass from datetime import datetime, timezone from pathlib import Path from pprint import pprint @@ -22,11 +22,6 @@ class Config: nextclade_data_url_version = "v3" nextclade_base_url: str = "https://nextstrain.org/nextclade/sars-cov-2" nextclade_input_tree_name: str = "tree.json" - reference_tree_file: Path | None = None - root_sequence_file: Path | None = None - assignment_no_metadata_file: Path | None = None - assignment_file: Path | None = None - assignment_file_columns: list[str] = field(default_factory=list) def __post_init__( self, From fd6da4c5b940b628b9d3f0c4232c97947ddaecd7 Mon Sep 17 00:00:00 2001 From: Becky Sweger Date: Sat, 9 Nov 2024 00:13:58 -0500 Subject: [PATCH 09/13] Add assign_clades method to CladeTime This new method is how clade time users (including people using the upcoming CLI) will do custom clade assignments. After validating dates, assign_clades calls out to existing functions, performing a kind of "mini pipeline" to return a LazyFrame with the results from Nextclade merged with metdata from the sequences being assigned. --- docs/conf.py | 1 + src/cladetime/cladetime.py | 107 ++++++++++++++++++++++++++++++++++- src/cladetime/exceptions.py | 4 ++ src/cladetime/sequence.py | 9 ++- src/cladetime/util/config.py | 28 +++++++++ 5 files changed, 144 insertions(+), 5 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 5427264..f026308 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -39,6 +39,7 @@ github_repository = "cladetime" intersphinx_mapping = { + "ncov": ("https://docs.nextstrain.org/projects/ncov/en/latest/", None), "nextstrain": ("https://docs.nextstrain.org/en/latest", None), "nextclade": ("https://docs.nextstrain.org/projects/nextclade/en/stable/", None), "polars": ("https://docs.pola.rs/api/python/stable", None), diff --git a/src/cladetime/cladetime.py b/src/cladetime/cladetime.py index 187d160..2e87b1d 100644 --- a/src/cladetime/cladetime.py +++ b/src/cladetime/cladetime.py @@ -1,15 +1,17 @@ """Class for clade time traveling.""" +import tempfile import warnings from datetime import datetime, timezone +from pathlib import Path import polars as pl import structlog -from cladetime import sequence -from cladetime.exceptions import CladeTimeDateWarning, CladeTimeInvalidURLError +from cladetime import Tree, sequence +from cladetime.exceptions import CladeTimeDateWarning, CladeTimeInvalidURLError, CladeTimeSequenceWarning from cladetime.util.config import Config -from cladetime.util.reference import _get_date, _get_s3_object_url +from cladetime.util.reference import _get_clade_assignments, _get_date, _get_nextclade_dataset, _get_s3_object_url logger = structlog.get_logger() @@ -210,3 +212,102 @@ def _get_config(self) -> Config: config = Config() return config + + def assign_clades(self, sequence_metadata: pl.LazyFrame, output_file: str | None = None) -> pl.DataFrame: + """Assign clades to a specified set of sequences. + + For each sequence in a sequence file (.fasta), assign a Nextstrain + clade using the Nextclade reference tree that corresponds to the + tree_as_of date. + + Parameters + ---------- + sequence_metadata : polars.LazyFrame + A Polars LazyFrame of the Nexstrain + :external+ncov:doc:`sequence metadata` + to use for clade assignment. + output_file : str | None + The full path (including filename) to where the clade assignment + output file will be saved. The default value is + /cladetime/clade_assignments.csv. + + Returns + ------- + metadata_clades : polars.LazyFrame + Nextstrain sequence_metadata with an additional column for clade assignments + """ + if output_file is not None: + output_file = Path(output_file) + else: + output_file = Path.home() / "cladetime" / "clade_assignments.csv" + + # if there are no sequences in the filtered metadata, stop the clade assignment + sequence_count = sequence_metadata.select(pl.len()).collect().item() + if sequence_count == 0: + msg = "sequence_metadata is empty \n" "Stopping clade assignment...." + warnings.warn( + msg, + category=CladeTimeSequenceWarning, + ) + return pl.LazyFrame() + + # if there are many sequences in the filtered metadata, warn that clade assignment will + # take a long time and require a lot of resources + if sequence_count > self._config.clade_assignment_warning_threshold: + msg = ( + f"Sequence count is {sequence_count}: clade assignment will run longer than usual. " + "You may want to run clade assignments on smaller subsets of sequences." + ) + warnings.warn( + msg, + category=CladeTimeSequenceWarning, + ) + + logger.info( + "Starting clade assignment pipeline", + sequence_as_of=self.sequence_as_of, + tree_as_of=self.tree_as_of, + num_sequence=sequence_count, + ) + + # drop any clade-related columns from sequence_metadata (if any exists, it will be replaced + # by the results of the clade assignment) + sequence_metadata = sequence_metadata.drop( + [ + col + for col in sequence_metadata.collect_schema().names() + if col not in self._config.nextstrain_standard_metadata_fields + ] + ) + + ids = sequence.get_metadata_ids(sequence_metadata) + tree = Tree(self.tree_as_of, self.url_sequence) + + with tempfile.TemporaryDirectory() as tmpdir: + filtered_sequences = sequence.filter(ids, self.url_sequence, Path(tmpdir)) + nextclade_dataset = _get_nextclade_dataset( + tree.ncov_metadata.get("nextclade_version_num"), + tree.ncov_metadata.get("nextclade_dataset_name").lower(), + tree.ncov_metadata.get("nextclade_dataset_version"), + Path(tmpdir), + ) + logger.info( + "Assigning clades", + sequences_to_assign=len(ids), + nextclade_dataset_version=tree.ncov_metadata.get("nextclade_dataset_version"), + ) + assignments = _get_clade_assignments( + tree.ncov_metadata.get("nextclade_version_num"), filtered_sequences, nextclade_dataset, output_file + ) + logger.info( + "Clade assignments done", + assignment_file=assignments, + nextclade_dataset=tree.ncov_metadata.get("nextclade_dataset_version"), + ) + + assigned_clades = pl.read_csv(assignments, separator=";", infer_schema_length=100000) + + assigned_clades = sequence_metadata.join( + assigned_clades.lazy(), left_on="strain", right_on="seqName", how="left" + ) + return assigned_clades diff --git a/src/cladetime/exceptions.py b/src/cladetime/exceptions.py index aba91d7..624e1b3 100644 --- a/src/cladetime/exceptions.py +++ b/src/cladetime/exceptions.py @@ -17,6 +17,10 @@ class CladeTimeDateWarning(Warning): """Raised when CladeTime as_of date is in the future.""" +class CladeTimeSequenceWarning(Warning): + """Raised when filtered sequence metadata is empty.""" + + class NextcladeNotAvailableError(Error): """Raised when Nextclade CLI is not available.""" diff --git a/src/cladetime/sequence.py b/src/cladetime/sequence.py index 9f849f5..3916ce4 100644 --- a/src/cladetime/sequence.py +++ b/src/cladetime/sequence.py @@ -15,6 +15,7 @@ from requests import Session from cladetime.types import StateFormat +from cladetime.util.reference import _get_date from cladetime.util.session import _get_session from cladetime.util.timing import time_function @@ -64,6 +65,8 @@ def get_metadata( assert path_flag + url_flag == 1, "Specify metadata_path or metadata_url, but not both." if metadata_url: + # pytyon's lzma module doesn't support opening from S3, so metadata_url + # must point to a .tsv or a ZSTD-encoded .tsv metadata = pl.scan_csv(metadata_url, separator="\t", n_rows=num_rows) return metadata @@ -229,8 +232,10 @@ def filter_metadata( # Apply filters for min and max sequence collection date, if applicable if collection_min_date is not None: + collection_min_date = _get_date(collection_min_date) filtered_metadata = filtered_metadata.filter(pl.col("date") >= collection_min_date) if collection_max_date is not None: + collection_max_date = _get_date(collection_max_date) filtered_metadata = filtered_metadata.filter(pl.col("date") <= collection_max_date) # Create state mappings based on state_format parameter, including a DC alias, since @@ -283,7 +288,7 @@ def get_metadata_ids(sequence_metadata: pl.DataFrame | pl.LazyFrame) -> set: ------- set A set of - :external:doc:`strains` + :external+ncov:doc:`strains` Raises ------ @@ -351,9 +356,9 @@ def filter(sequence_ids: set, url_sequence: str, output_path: Path) -> Path: # alternately, we could expand this function to handle other types # of compression schemas (ZSTD) or none at all - # download the original sequence file logger.info("Starting sequence file download", url=url_sequence) sequence_file = _download_from_url(session, url_sequence, output_path) + logger.info("Sequence file saved", path=sequence_file) filtered_sequence_file = output_path / "sequences_filtered.fasta" diff --git a/src/cladetime/util/config.py b/src/cladetime/util/config.py index 2239074..28b1019 100644 --- a/src/cladetime/util/config.py +++ b/src/cladetime/util/config.py @@ -22,6 +22,34 @@ class Config: nextclade_data_url_version = "v3" nextclade_base_url: str = "https://nextstrain.org/nextclade/sars-cov-2" nextclade_input_tree_name: str = "tree.json" + clade_assignment_warning_threshold: int = 10000 + # standard metadata fields for Nextstrain ncov pipeline (i.e., excludes clade assignments) + # https://docs.nextstrain.org/projects/ncov/en/latest/reference/metadata-fields.html + nextstrain_standard_metadata_fields = [ + "strain", + "virus", + "gisaid_epi_isl", + "genbank_accession", + "date", + "region", + "country", + "division", + "location", + "region_exposure", + "country_exposure", + "division_exposure", + "segment", + "length", + "host", + "age", + "sex", + "originating_lab", + "submitting_lab", + "authors", + "url", + "title", + "date_submitted", + ] def __post_init__( self, From c148db3b28b40abbccead6e12dc1669beb252a75 Mon Sep 17 00:00:00 2001 From: Becky Sweger Date: Sat, 9 Nov 2024 00:17:35 -0500 Subject: [PATCH 10/13] Add tests for the new CladeTime assign_clades method This changeset represents new tests for the assign_clades method, as well as updates that reflect some refactoring that occurred along the way. --- tests/conftest.py | 18 + tests/data/README.md | 14 + tests/data/test_sequences.fasta | 496 ++++++ tests/data/test_sequences_updated.fasta | 1485 +++++++++++++++++ .../integration/test_cladetime_integration.py | 155 ++ .../integration/test_nextclade_integration.py | 3 +- tests/unit/conftest.py | 12 + tests/unit/test_cladetime.py | 2 +- 8 files changed, 2183 insertions(+), 2 deletions(-) create mode 100644 tests/data/README.md create mode 100644 tests/data/test_sequences_updated.fasta create mode 100644 tests/integration/test_cladetime_integration.py create mode 100644 tests/unit/conftest.py diff --git a/tests/conftest.py b/tests/conftest.py index 8330d99..ffdcc04 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -10,6 +10,24 @@ from cladetime.util.config import Config +@pytest.fixture +def test_sequences(): + """Return a set of sequences for testing. + + These sequences have clade assignments that changed between + 2024-08-02 and 2024-11-07, so this is a good set for testing clade + assignments over time. + """ + file_name = "test_sequences_updated.fasta" + sequences = [ + "USA/VA-CDC-LC1109961/2024", + "USA/MD-CDC-LC1110088/2024", + "USA/FL-CDC-LC1109983/2024", + ] + + return (file_name, set(sequences)) + + @pytest.fixture def ncov_metadata(): return { diff --git a/tests/data/README.md b/tests/data/README.md new file mode 100644 index 0000000..41467e8 --- /dev/null +++ b/tests/data/README.md @@ -0,0 +1,14 @@ +# Cladetime Test Data + +This directory contains test files used by CladeTime's test suite. + +* `test_metadata.tsv` was used to test `get_clade_list` before that functionality moved to variant-nowcast-hub +* `metadata.tsv.xz` and `metadata.tsv.xz` are used to test setting CladeTime's sequence_metadata property. +* `test_sequence.xz` is used to test the sequence filter function +* `test_sequences.fasta`, `test_sequences.fasta`, and `test_nexclade_dataset.zip` are used in Nextclade integration tests +* `test_sequences_evolving.fasta` is used to test clade assignments with prior reference trees + * it contains 3 sequence strains with clade assignments that changed between 2024-08-02 and 2024-11-07 + * differing clade assignments were determined by comparing the 2024-08-02 and 2024-11-07 versions of Nexstrain's sequence metadata + * `USA/VA-CDC-LC1109961/2024` is assigned to `24C` as of 2024-08-02 and `24E` as of 2024-11-07 + * `USA/FL-CDC-LC1109983/2024` is assigned to `24B` as of 2024-08-02 and `24G` as of 2024-11-07 + * `USA/MD-CDC-LC1110088/2024` is assigned to `24B` as of 2024-08-02 and `24G` as of 2024-11-07 diff --git a/tests/data/test_sequences.fasta b/tests/data/test_sequences.fasta index c2d7bdd..fb441b1 100644 --- a/tests/data/test_sequences.fasta +++ b/tests/data/test_sequences.fasta @@ -1988,3 +1988,499 @@ GCACAAGTAGATGTAGTTAACTTTAATCTCACATAGCAATCTTTAATCAGTGTGTAACAT TAGGGAGGACTTGAAAGAGCCACCACATTTTCACCGAGGCCACGCGGAGTACGATCGAGT GTACAATGAACAATGCTAGGGAGAGCTGCCTATATGGAAGAGCCCTAATGTGTAAAATTA ATTTTAGTAGTGCTATCCCCATGTGATTTTAATAGC +>USA/NJ-CDC-LC1124615/2024 +AACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAA +TTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTG +CTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGGGTGTG +ACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACT +CAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGA +GGAGGTCTTATCAGAGGCACGTCAACATCTTAAAGATGGCACTTGTGGCTTAGTAGAAGT +TGAAAAAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGATGC +TCGAACTGCACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCATTCA +GTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGGCGAAATACCAGT +GGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATAAAGGAGCTGGTGGCCATAGGTA +CGGCGCCGATCTAAAGTCATTTGACTTAGGCGACGAGCTTGGCACTGATCCTTATGAAGA +TTTTCAAGAAAACTGGAACACTAAACATAGCAGTGGTGTTACCCGTGAACTCATGCGTGA +GCTTAACGGAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGGCCCTGATGGCTA +CCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGATTCATGCACTTTGTC +CGAACAACTGGACTTTATTGACACTAAGAGGGGTGTATACTGCTGCCGTGAACATGAGCA +TGAAATTGCTTGGTACACGGAACGTTCTGAAAAGAGCTATGAATTGCAGACACCTTTTGA +AATTAAATTGGCAAAGAAATTTGACACCTTCAATGGGGAATGTCCAAATTTTGTATTTCC +CTTAAATTCCATAATCAAGACTATTCAACCAAGGGTTGAAAAGAAAAAGCTTGATGGCTT +TATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCACCAAATGAATGCAACCAAATGTG +CCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAACTTCATGGCAGACGGGCGATTT +TGTTAAAGCCACTTGCGAATTTTGTGGCACTGAGAATTTGACTAAAGAAGGTGCCACTAC +TTGTGGTTACTTACCCCAAAATGCTGTTGTTAAAATTTATTGTCCAGCATGTCACAATTC +AGAAGTAGGACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAAAACCAT +TCTTCGTAAGGGTGGTCGCACTATTGCCTTTGGAGGCTGTGTGTTCTCTTATGTTGGTTG +CCATAACAAGTGTGCCTATTGGGTTCCACGTGCTAGCGCTAACATAGGTTGTAACCATAC +AGGTGTTGTTGGAGAAGGTTCCGAAGGTCTTAATGACAACCTTCTTGAAATACTCCAAAA +AGAGAAAGTCAACATCAATATTGTTGGTGACTTTAAACTTAATGAAGAGATCGCCATTAT +TTTGGCATCTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTTGGATTA +TAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTACAAAAGGAAAAGC +TAAAAAAGGTGCCTGGAATATTGGTGAACAGAAATCAATACTGAGTCCTCTTTATGCATT +TGCATCAGAGGCTGCTCGTGTTGTACGATCAATTTTCTCCCGCACTCTTGAAACTGCTCA +AAATTCTGTGCGTGTTTTACAGAAGGCCGCTATAACAATACTAGATGGAATTTCACAGTA +TTCACTGAGACTCATTGATGCTATGATGTTCACATCTGATTTGGCTACTAACAATCTAGT +TGTAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTGGCTAACTAACAT +CTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTGATTGGCTTGAAGAGAAGTTTAA +GGAAGGTGTAGAGTTTCTTAGAGACGGTTGGGAAATTGTTAAATTTATCTCAACCTGTGC +TTGTGAAATTGTCGGTGGACAAATTGTCACCTGTGCAAAGGAAATTAAGGAGAGTGTTCA +GACATTCTTTAAGCTTGTAAATAAATTTTTGGCTTTGTGTGCTGACTCTATCATTATTGG +TGGAGCTAAACTTAAAGCCTTGAATTTAGGTGAAACATTTGTCACGCACTCAAAGGGATT +GTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCCTACTCATGCCTCTAAAAGCCCC +AAAAGAAATTATCTTCTTAGAGGGAGAAACACTTCCCACAGAAGTGTTAACAGAGGAAGT +TGTCTTGAAAACTGGTGATTTACAACCATTAGAACAACCTACTAGTGAAGCTGTTGAAGC +TCCATTGGTTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAAAGACAC +AGAAAAGTACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATACCTTCACACTCAA +AGGCGGTGCACCAACAAAGGTTACTTTTGGTGATGACACTGTGATAGAAGTGCAAGGTTA +CAAGAGTGTGAATATCATTTTTGAACTTGATGAAAGGATTGATAAAGTACTTAATGAGAA +GTGCTCTGCCTACACAGTTGAACTCGGTACAGAAGTAAATGAGTTCGCCTGTGTTGTGGC +AGATGCTGTCATAAAAACTTTGCAACCAGTATCTGAATTACTTACACCACTGGGCATTGA +TTTAGATGAGTGGAGTATGGCTACATACTACTTATTTGATGAGTCTGGTGAGTTTAAATT +GGCTTCACATATGTATTGTTCTTTTTACCCTCCAGATGAGGATGAAGAAGAAGGTGATTG +TGAAGAAGAAGAGTTTGAGCCATCAACTCAATATGAGTATGGTACTGAAGATGATTACCA +AGGTAAACCTTTGGAATTTGGTGCCACTTCTGCTGCTCTTCAACCTGAAGAAGAGCAAGA +AGAAGATTGGTTAGATGATGATAGTCAACAAACTGTTGGTCAACAAGACGGCAGTGAGGA +CAATCAGACAACTACTATTCAAACAATTGTTGAGGTTCAACCTCAATTAGAGATGGAACT +TACACCAGTTGTTCAGACTATTGAAGTGAATAGTTTTAGTGGTTATTTAAAACTTACTGA +CAATGTATACATTAAAAATGCAGACATTGTGGAAGAAGCTAAAAAGGTAAAACCAACATT +GGTTGTTAATGCAGCCAATGTTTACCTTAAACATGGAGGAGGTGTTGCAGGAGCCTTAAA +TAAGGCTACTAACAATGCCATGCAAGTTGAATCTGATGATTACATAGCTACTAATGGACC +ACTTAAAGTGGGCGGTAGTTGTGTTTTAAGCGGACACAATCTTGCTAAACACTGTCTTCA +TGTTGTCGGCCCAAATGTTAACAAAGGTGAAGACATTCAACTTCTTAAGAGTGCTTATGA +AAATTTTAATCAGCACGAAGTTCTACTTGCACCATTATTATCAGCTGGTATTTTTGGTGC +TGACCCTATACATTCTTTAAGAGTTTGTGTAGATACTGTTCGCACAAATGTCTACTTAGC +TGTCTTTGATAAAAATCTCTATGACAAACTTGTTTCAAGCTTTTTGGAAATGAAGAGTGA +AAAGCAAGTTGAACAAAAGATCGCTGAGATTCCTAAAGAGGAAGTTAAGCCATTTATAAC +TGAAAGTAAACCTTCAGTTGAACAGAGAAAACAAGATGATAAGAAAATCAAAGCTTGTGT +TGAAGAAGTTACAACAACTCTGGAAGAAACTAAGTTCCTCACAGAAAACTTGTTACTTTA +TATTGACATTAATGGCAATCTTCATCCAGATTCTGCCACTCTTGTTAGTGACATTGACAT +CACTTTCTTAAAGAAAGATGCTCCATATATAGTGGGTGATGTTGTTCAAGAGGGTGTTTT +AACTGCTGTGGTTATACCTACTAAAAAGGCTAGTGGCACTACTGAAATGCTAGCGAAAGC +TTTGAGAAAAGTGCCAACAGACAATTATATAACCACTTACCCGGGTCAGGGTTTAAATGG +TTACACTGTAGAGGAGGCAAAGACAGTGCTTAAAAAGTGTAAAAGTGCTTTTTACATTCT +ACCATCTATTATCTCTAATGAGAAGCAAGAAATTCTTGGAACTGTTTCTTGGAATTTGCG +AGAAATGCTTGCACATGCAGAAGAAACACGCAAATTAATGCCTGTCTGTGTGGAAACTAA +AGCCATAGTTTCAACTATACAGCGTAAATATAAGGGTATTAAAATACAAGAGGGTGTGGT +TGATTATGGTGCTAGATTTTACTTTTACACCAGTAAAACAACTGTAGCGTCACTTATCAA +CACACTTAACGATCTAAATGAAACTCTTGTTACAATGCCACTTGGCTATGTAACACATGG +CTTAAATTTGGAAGAAGCTGCTCGGTATATGAGATCTCTCAAAGTGCCAGCTACAGTTTC +TGTTTCTTCACCTGATGCTGTTACAGCGTATAATGGTTATCTTACTTCTTCTTCTAAAAC +ACCTGAAGAACATTTTATTGAAACCATCTCACTTGCTGGTTCCTATAAAGATTGGTCCTA +TTCTGGACAATCTACACAACTAGGTATAGAATTTCTTAAGAGAGGTGATAAAAGTGTATA +TTACACTAGTAATCCTACCACATTCCACCTAGATGGTGAAGTTATCACCTTTGACAATCT +TAAGACACTTCTTTCTTTGAGAGAAGTGAGGACTATTAAGGTGTTTACAACAGTAGACAA +CATTAACCTCCACACGCAAGTTGTGGACATGTCAATGACATATGGACAACAGTTTGGTCC +AACTTATTTGGATGGAGCTGATGTTACTAAAATAAAACCTCATAATTCACATGAAGGTAA +AACATTTTATGTTTTACCTAATGATGACACTCTACGTGTTGAGGCTTTTGAGTACTACCA +CACAACTGATCCTAGTTTTCTGGGTAGGTACATGTCAGCATTAAATCACACTAAAAAGTG +GAAATACCCACAAGTTAATGGTTTAACTTCTATTAAATGGGCAGATAACAACTGTTATCT +TGCCACTGCATTGTTAACACTCCAACAAATAGAGTTGAAGTTTAATCCACCTGCTCTACA +AGATGCTTATTACAGAGCAAGGGCTGGTGAAGCTGCTAACTTTTGTGCACTTATCTTAGC +CTACTGTAATAAGACAGTAGGTGAGTTAGGTGATGTTAGAGAAACAATGAGTTACTTGTT +TCAACATGCCAATTTAGATTCTTGCAAAAGAGTCTTGAACGTGGTGTGTAAAACTTGTGG +ACAACAGCAGACAACCCTTAAGGGTGTAGAAGCTGTTATGTACATGGGCACACTTTCTTA +TGAACAATTTAAGAAAGGTGTTCAGATACCTTGTACGTGTGGTAAACAAGCTACAAAATA +TCTAGTACAACAGGAGTCACCTTTTGTTATGATGTCAGCACCACCTGCTCAGTATGAACT +TAAGCATGGTACATTTACTTGTGCTAGTGAGTACACTGGTAATTACCAGTGTGGTCACTA +TAAACATATAACTTCTAAAGAAACTTTGTATTGCATAGACGGTGCTTTACTTACAAAGTC +CTCAGAATACAAAGGTCCTATTACGGATGTTTTCTACAAAGAAAACAGTTACACAACAAC +CATAAAACCAGTTACTTATAAATTGGATGGTGTTGTTTGTACAGAAATTGACCCTAAGTT +GGACAATTATTATAAGAAAGACAATTCTTATTTCACAGAGCAACCAATTGATCTTGTACC +AAACCAACCATATCCAAACGCAAGCTTCGATAATTTTAAGTTTGTATGTGATAATATCAA +ATTTGCTGATGATTTAAACCAGTTAACTGGTTATAAGAAACCTGCTTCAAGAGAGCTTAA +AGTTACATTTTTCCCTGACTTAAATGGTGATGTGGTGGCTATTGATTATAGACACTACAC +ACCCTCTTTTAAGAAAGGAGCTAAATTGTTACATAAACCTATTGTTTGGCATGTTAACAA +TGCAACTAATAAAGCCACGTATAAACCAAATACCTGGTGTATACGTTGTCTTTGGAGCAC +AAAACCAGTTGAAACATCAAATTCGTTTGATGTACTGAAGTCAGAGGACGCGCAGGGAAT +GGATAATCTTGCCTGCGAAGATCTAAAACCAGTCTCTGAAGAAGTAGTGGAAAATCCTAC +CATACAGAAAGACGTTCTTGAGTGTAATGTGAAAACTACCGAAGTTGTAGGAGACATTAT +ACTTAAACCAGCAAATAATAGTTTAAAAATTACAGAAGAGGTTGGCCACACAGATCTAAT +GGCTGCTTATGTAGACAATTCTAGTCTTACTATTAAGAAACCTAATGAATTATCTAGAGT +ATTAGGTTTGAAAACCCTTGCTACTCATGGTTTAGCTGCTGTTAATAGTGTCCCTTGGGA +TACTATAGCTAATTATGCTAAGCCTTTTCTTAACAAAGTTGTTAGTACAACTACTAACAT +AGTTACACGGTGTTTAAACCGTGTTTGTACTAATTATATGCCTTATTTCTTTACTTTATT +GCTACAATTGTGTACTTTTACTAGAAGTACAAATTCTAGAATTAAAGCATCTATGCCGAC +TACTATAGCAAAGAATACTGTTAAGAGTGTCGGTAAATTTTGTCTAGAGGCTTCATTTAA +TTATTTGAAGTCACCTAATTTTTCTAAACTGATAAATATTATAATTTGGTTTTTACTATT +AAGTGTTTGCCTAGGTTCTTTAATCTACTCAACCGCTGCTTTAGGTGTTTTAATGTCTAA +TTTAGGCATGCCTTCTTACTGTACTGGTTACAGAGAAGGCTATTTGAACTCTACTAATGT +CACTATTGCAACCTACTGTATTGGTTCTATACCTTGTAGTGTTTGTCTTAGTGGTTTAGA +TTCTTTAGACACCTATCCTTCTTTAGAAACTATACAAATTACCATTTCATCTTTTAAATG +GGATTTAACTGCTTTTGGCTTAGTTGCAGAGTGGTTTTTGGCATATATTCTTTTCACTAG +GTTTTTCTATGTACTTGGATTGGCTGCAATCATGCAATTGTTTTTCAGCTATTTTGCAGT +ACATTTTATTAGTAATTCTTGGCTTATGTGGTTAATAATTAATCTTGTACAAATGGCCCC +GATTTCAGCTATGGTTAGAATGTACATCTTCTTTGCATCATTTTATTATGTATGGAAAAG +TTATGTGCATGTTGTAGACGGTTGTAATTCATCAACTTGTATGATGTGTTACAAACGTAA +TAGAGCAACAAGAGTCGAATGTACAACTATTGTTAATGGTGTTAGAAGGTCCTTTTATGT +CTATGCTAATGGAGGTAAAGGCTTTTGCAAACTACACAATTGGAATTGTGTTAATTGTGA +TACATTCTGTGCTGGTAGTACATTTATTAGTGATGAAGTTGCGAGAGACTTGTCACTACA +GTTTAAAAGACCAATAAATCCTACTGACCAGTCTTCTTACATCGTTGATAGTGTTACAGT +GAAGAATGGTTCCATCCATCTTTACTTTGATAAAGCTGGTCAAAAGACTTATGAAAGACA +TTCTCTCTCTCATTTTGTTAACTTAGACAGCCTGAGAGCTAATAACACTAAAGGTTCATT +GCCTATTAATGTTATAGTTTTTGATGGTAAATCAAAATGTGAAGAATCATCTGCAAAATC +AGCGTCTGTTTACTACAGTCAGCTTATGTGTCAACCTATACTGTTACTAGATCAGGCATT +AGTGTCTGATGTTGGTGATAGTGCGGAAGTTGCAGTTAAAATGTTTGATGCTTACGTTAA +TACGTTTTCATCAACTTTTAACGTACCAATGGAAAAACTCAAAACACTAGTTGCAACTGC +AGAAGCTGAACTTGCAAAGAATGTGTCCTTAGACAATGTCTTATCTACTTTTATTTCAGC +AGCTCGGCAAGGGTTTGTTGATTCAGATGTAGAAACTAAAGATGTTGTTGAATGTCTTAA +ATTGTCACATCAATCTGACATAGAAGTTACTGGCGATAGTTGTAATAACTATATGCTCAC +TTATAACAAAGTTGAAAACATGACACCCCGTGACCTTGGTGCTTGTATTGACTGTAGTGC +GCGTCATATTAATGCGCAGGTAGCAAAAAGTCACAACATTACTTTGATATGGAACGTTAA +AGATTTCATGTCATTGTCTGAACAACTACGAAAACAAATACGTAGTGCTGCTAAAAAGAA +TAACTTACCTTTTAAGTTGACATGTGCAACTACTAGACAAGTTGTTAATGTTGTAACAAC +AAAGATAGCACTTAAGGGTGGTAAAATTGTTAATAATTGGTTGAAGCAGTTAATTAAAGT +TACACTTGTGTTCCTTTTTGTTGCTGCTATTTTCTATTTAATAACACCTGTTCATGTCAT +GTCTAAACATACTGACTTTTCAAGTGAAATCATAGGATACAAGGCTATTGATGGTGGTGT +CACTCGTGACATAGCATCTACAGATACTTGTTTTGCTAACAAACATGCTGATTTTGACAC +ATGGTTTAGCCAGCGTGGTGGTAGTTATACTAATGACAAAGCTTGCCCATTGATTGCTGC +AGTCATAACAAGAGAAGTGGGTTTTGTCGTGCCTGGTTTGCCTGGCACGATATTACGCAC +AACTAATGGTGACTTTTTGCATTTCTTACCTAGAGTTTTTAGTGCAGTTGGTAACATCTG +TTACACACCATCAAAACTTATAGAGTACACTGACTTTGCAACATCAGCTTGTGTTTTGGC +TGCTGAATGTACAATTTTTAAAGATGCTTCTGGTAAGCCAGTACCATATTGTTATGATAC +CAATGTACTAGAAGGTTCTGTTGCTTATGAAAGTTTACGCCCTGACACACGTTATGTGCT +CATGGATGGCTCTATTATTCAATTTCCTAACACCTACCTTGAAGGTTCTGTTAGAGTGGT +AACAACTTTTGATTCTGAGTACTGTAGGCACGGCACTTGTGAAAGATCAGAAGCTGGTGT +TTGTGTATCTACTAGTGGTAGATGGGTACTTAACAATGATTATTACAGATCTTTACCAGG +AGTTTTCTGTGGTGTAGATGCTGTAAATTTATTTACTAATATGTTTACACCACTAATTCA +ACCTATTGGTGCTTTGGACATATCAGCATCTATAGTAGCTGGTGGTATTGTGGCTATCGT +AGTAACATGCCTTGCCTACTATTTTATGAGGTTTAGAAGAGCTTTTGGTGAATACAGTCA +TGTAGTTGCCTTTAATACTTTACTATTCCTTATGTCATTCATTGTACTCTGTTTAACACC +AGTTTACTCATTCTTACCTGGTGTTTATTCTGTTATTTACTTGTACTTGACATTTTATCT +TACTAATGATGTTTCTTTTTTAGCACATATTCAGTGGATGGTTATGTTCACACCTTTAGT +ACCTTTCTGGATAACAATTGCTTATATCATTTGTATTTCCACAAAGCATTTCTATTGGTT +CTTTAGTAATTACCTAAAGAGACGTGTAGTCTTTAATGGTGTTTCCTTTAGTACTTTTGA +AGAAGCTGCGCTGTGCACCTTTTTGTTAAATAAAGAAATGTATCTAAAGTTGCGTAGTGA +TGTGCTATTACCTCTTACGCAATATAATAGATACTTAGCTCTTTATAATAAGTACAAGTA +TTTTAGTGGAGCAATGGATACAACTAGCTACAGAGAAGCTGCTTGTTGTCATCTCGCAAA +GGCTCTCAATGACTTCAGTAACTCAGGTTCTGATGTTCTTTACCAACCACCACAAATCTC +TATCACCTCAGCTGTTTTGCAGAGTGGTTTTAGAAAAATGGCATTCCCATCTGGTAAAGT +TGAGGGTTGTATGGTACAAGTAACTTGTGGTACAACTACACTTAACGGTCTTTGGCTTGA +TGACGTAGTTTACTGTCCAAGACATGTGATCTGCACCTCTGAAGATATGCTTAACCCTAA +TTATGAAGATTTACTCATTCGTAAGTCTAATCATAATTTCTTGGTACAGGCTGGTAATGT +TCAACTCAGGGTTATTGGACATTCTATGCAAAATTGTGTACTTAAGCTTAAGGTTGATAC +AGCCAATCCTAAGACACCTAAGTATAAGTTTGTTCGCATTCAACCAGGACAGACTTTTTC +AGTGTTAGCTTGTTACAATGGTTCACCATCTGGTGTTTACCAATGTGCTATGAGACACAA +TTTCACTATTAAGGGTTCATTCCTTAATGGTTCATGTGGTAGTGTTGGTTTTAACATAGA +TTATGACTGTGTCTCTTTTTGTTACATGCACCATATGGAATTACCAACTGGAGTTCATGC +TGGCACAGACTTAGAAGGTAACTTTTATGGACCTTTTGTTGACAGGCAAACAGCACAAGC +AGCTGGTACGGACACAACTATTACAGTTAATGTTTTAGCTTGGTTGTACGCTGCTGTTAT +AAATGGAGACAGGTGGTTTCTCAATCGATTTACCACAACTCTTAATGACTTTAACCTTGT +GGCTATGAAGTACAATTATGAACCTCTAACACAAGACCATGTTGACATACTAGGACCTCT +TTCTGCTCAAACTGGAATTGCCGTTTTAGATATGTGTGCTTCATTAAAAGAATTACTGCA +AAATGGTATGAATGGACGTACCATATTGGGTAGTGCTTTATTAGAAGATGAATTTACACC +TTTTGATGTTGTTAGACAATGCTCAGGTGTTACTTTCCAAAGTGCAGTGAAAAGAACAAT +CAAGGGTACACACCACTGGTTGTTACTCACAATTTTGACTTCACTTTTATTTTTAGTCCA +GAGTACTCAATGGTCTTTGTTCTTTTTTTTGTATGAAAATGCCTTTTTACCTTTTGCTAT +GGGTATTATTGCTATGTCTGCTTTTGCAATGATGTTTGTCAAACATAAGCATGCATTTCT +CTGTTTGTTTTTGTTACCTTCTCTTGCCACTGTAGCTTATTTTAATATGGTCTATATGCC +TGCTAGTTGGGTGATGCGTATTATGACATGGTTGGATATGGTTGATACTAGTTTGAAGCT +AAAAGACTGTGTTATGTATGCATCAGCTGTAGTGTTACTAATCCTTATGACAGCAAGAAC +TGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTA +TAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTC +TGTTACTTCTAACTACTCAGGTGTAGTTACAACTGTCATGTTTTTGGCCAGAGGTATTGT +TTTTATGTGTGTTGAGTATTGCCCTATTTTCTTCATAACTGGTAATACACTTCAGTGTAT +AATGCTAGTTTATTGTTTCTTAGGCTATTTTTGTACTTGTTACTTTGGCCTCTTTTGTTT +ACTCAACCGCTACTTTAGACTGACTCTTGGTGTTTATGATTACTTAGTTTCTACACAGGA +GTTTAAATATATGAATTCACAGGGACTACTCCCACCCAAGAATAGCATAGATGCCTTCAA +ACTCAACATTAAATTGTTGGGTGTTGGTGGCAAACCTTGTATCAAAGTAGCCACTGTACA +GTCTAAAATGTCAGATGTAAAGTGCACATCAGTAGTCTTACTCTCAGTTTTGCAACAACT +CAGAGTAGAATCATCATCTAAATTGTGGGCTCAATGTGTCCAGTTACACAATGACATTCT +CTTAGCTAAAGATACTACTGAAGCCTTTGAAAAAATGGTTTCACTACTTTCTGTTTTGCT +TTCCATGCAGGGTGCTGTAGACATAAACAAGCTTTGTGAAGAAATGCTGGACAACAGGGC +AACCTTACAAGCTATAGCCTCAGAGTTTAGTTCCCTTCCATCATATGCAGCTTTTGCTAC +TGCTCAAGAAGCTTATGAGCAGGCTGTTGCTAATGGTGATTCTGAAGTTGTTCTTAAAAA +GTTGAAGAAGTCTTTGAATGTGGCTAAATCTGAATTTGACCGTGATGCAGCCATGCAACG +TAAGTTGGAAAAGATGGCTGATCAAGCTATGACCCAAATGTATAAACAGGCTAGATCTGA +GGACAAGAGGGCAAAAGTTACTAGTGCTATGCAGACAATGCTTTTCACTATGCTTAGAAA +GTTGGATAATGATGCACTCAACAACATTATCAACAATGCAAGAGATGGTTGTGTTCCCTT +GAACATAATACCTCTTACAACAGCAGCCAAACTAATGGTTGTCATACCAGACTATAACAC +ATATAAAAATACGTGTGATGGTACAACATTTACTTATGCATCAGCATTGTGGGAAATCCA +ACAGGTTGTAGATGCAGATAGTAAAATTGTTCAACTTAGTGAAATTAGTATGGATAATTC +ACCTAATTTAGCATGGCCTCTTATTGTAACAGCTTTAAGGGCCAATTCTGCTGTCAAATT +ACAGAATAATGAGCTTAGTCCTGTTGCACTACGACAGATGTCTTGTGCTGCCGGTACTAC +ACAAACTGCTTGCACTGGTGACAATGCGTTAGCTTACTACAACACAATAAAGGGAGGTAG +GTTTGTACTTGCATTGTTATCCGATTTACAGGATTTGAAATGGGCTAGATTCCCTAAGAG +TGATGGAACTGGTACTATTTATACAGAACTGGAACCACCTTGTAGGTTTGTTACAGACAC +ACCTAAAGGTCCTAAAGTGAAGTATTTATACTTTATTAAAGGATTAAACAACCTAAATAG +AGGTATGGTACTTGGTAGTTTAGCTGCCACAGTACGTCTACAAGCTGGTAATGCAACAGA +AGTGCCTGCCAATTCAACTGTATTATCTTTCTGTGCTTTTGCTGTAGATGCTGCTAAAGC +TTACAAAGATTATCTAGCTTGTGGGGGACAACCAATCACTAATTGTGTTAAGATGTTGTG +TACACACACTGGTACTGGTCAGGCAATAACAGTTACACCGGAAGCCAATATGGATCAAGA +ATCCTTTGGTGGTGCATCGTGTTGTCTGTACTGCCGTTGCCACATAGATCATCCAAATCC +TAAAGGATTTTGTGACTTAAAAGGTAAGTATGTACAAATACCTACAACTTGTGCTAACGA +CCCTGTGGGTTTTACACTTAAAAACACAGTCTGTACCGTCTGCGGTATGTGGAAAGGTTA +TGGCTGTAGTTGTGATCAACTCCGCGAACCCATGCTTCAGTCAGCTGATGCACAATCGTT +TTTAAACGGGTTTGCGGTGTAAGTGCAGCCCGTCTTACACCGTGCGGCACAGGCACTAGT +ACTGATGTCGTATACAGGGCTTTTGACATCTACAATGATAAAGTAGCTGGTTTTGCTAAA +TTCCTAAAAACTAATTGTTGTCGCTTCCAAGAAAAGGACGAAGATGACAATTTAATTGAT +TCTTACTTTGTAGTTAAGAGACACACTTTCTCTAACTACCAACATGAAGAAACAATTTAT +AATTTACTTAAGGATTGTCCAGCTGTTGCTAAACATGACTTCTTTAAGTTTAGAATAGAC +GGTGACATGGTACCACATATATCACGTCAACGTCTTACTAAATACACAATGGCAGACCTC +GTCTATGCTTTAAGGCATTTTGATGAAGGTAATTGTGACACATTAAAAGAAATACTTGTC +ACATACAATTGTTGTGATGATGATTATTTCAATAAAAAGGACTGGTATGATTTTGTAGAA +AACCCAGATATATTACGCGTATACGCCAACTTAGGTGAACGTGTACGCCAAGCTTTGTTA +AAAACAGTACAATTCTGTGATGCCATGCGAAATGCTGGTATTGTTGGTGTACTGACATTA +GATAATCAAGATCTCAATGGTAACTGGTATGATTTCGGTGATTTCATACAAACCACGCCA +GGTAGTGGAGTTCCTGTTGTAGATTCTTATTATTCATTGTTAATGCCTATATTAACCTTG +ACCAGGGCTTTAACTGCAGAGTCACATGTTGACACTGACTTAACAAAGCCTTACATTAAG +TGGGATTTGTTAAAATATGACTTCACGGAAGAGAGGTTAAAACTCTTTGACCGTTATTTT +AAATATTGGGATCAGACATACCACCCAAATTGTGTTAACTGTTTGGATGACAGATGCATT +CTGCATTGTGCAAACTTTAATGTTTTATTCTCTACAGTGTTCCCACTTACAAGTTTTGGA +CCACTAGTGAGAAAAATATTTGTTGATGGTGTTCCATTTGTAGTTTCAACTGGATACCAC +TTCAGAGAGCTAGGTGTTGTACATAATCAGGATGTAAACTTACATAGCTCTAGACTTAGT +TTTAAGGAATTACTTGTGTATGCTGCTGACCCTGCTATGCACGCTGCTTCTGGTAATCTA +TTACTAGATAAACGCACTACGTGCTTTTCAGTAGCTGCACTTACTAACAATGTTGCTTTT +CAAACTGTCAAACCCGGTAATTTTAACAAAGACTTCTATGACTTTGCTGTGTCTAAGGGT +TTCTTTAAGGAAGGAAGTTCTGTTGAATTAAAACACTTCTTCTTTGCTCAGGATGGTAAT +GCTGCTATCAGCGATTATGACTACTATCGTTATAATCTACCAACAATGTGTGATATCAGA +CAACTACTATTTGTAGTTGAAGTTGTTGATAAGTACTTTGATTGTTACGATGGTGGCTGT +ATTAATGCTAACCAAGTCATCGTCAACAACCTAGACAAATCAGCTGGTTTTCCATTTAAT +AAATGGGGTAAGGCTAGACTTTATTATGATTCAATGAGTTATGAGGATCAAGATGCACTT +TTCGCATATACAAAACGTAATGTCATCCCTACTATAACTCAAATGAATCTTAAGTATGCC +ATTAGTGCAAAGAATAGAGCTCGCACCGTAGCTGGTGTCTCTATCTGTAGTACTATGACC +AATAGACAGTTTCATCAAAAATTATTGAAATCAATAGCCGCCACTAGAGGAGCTACTGTA +GTAATTGGAACAAGCAAATTCTATGGTGGTTGGCACAACATGTTAAAAACTGTTTATAGT +GATGTAGAAAACCCTCACCTTATGGGTTGGGATTATCCTAAATGTGATAGAGCCATGCCT +AACATGCTTAGAATTATGGCCTCACTTGTTCTTGCTCGCAAACATACAACTTGTTGTAGC +TTGTCACACCGTTTCTATAGATTAGCTAATGAGTGTGCTCAAGTATTGAGTGAAATGGTC +ATGTGTGGCGGTTCACTATATGTTAAACCAGGTGGAACCTCATCAGGAGATGCCACAACT +GCTTATGCTAATAGTGTTTTTAACATTTGTCAAGCTGTCACGGCCAATGTTAATGCACTT +TTATCTACTGATGGTAACAAAATTGCCGATAAGTATGTCCGCAATTTACAACATAGACTT +TATGAGTGTCTCTATAGAAATAGAGATGTTGACACAGACTTTGTGAATGAGTTTTACGCA +TATTTGCGTAAACATTTCTCAATGATGATACTTTCTGACGATGCTGTTGTGTGTTTCAAT +AGCACTTATGCATCACAAGGTCTAGTGGCTAGCATAAAGAACTTTAAGTCAGTTCTTTAT +TATCAAAACAATGTTTTTATGTCTGAAGCAAAATGTTGGACTGAGACTGACCTTACTAAA +GGACCTCATGAATTTTGCTCTCAACATACAATGCTAGTTAAACAGGGTGATGATTATGTG +TACCTTCCTTACCCAGATCCATCAAGAATCCTAGGGGCCGGCTGTTTTGTAGATGATATC +GTAAAAACAGATGGTACACTTATGATTGAACGGTTCGTGTCTTTAGCTATAGATGCTTAC +CCACTTACTAAACATCCTAATCAGGAGTATGCTGATGTCTTTCATTTGTACTTACAATAC +ATAAGAAAGCTACATGATGAGTTAACAGGACACATGTTAGACATGTATTCTGTTATGCTT +ACTAATGATAACACTTCAAGGTATTGGGAACCTGAGTTTTATGAGGCTATGTACACACCG +CATACAGTCTTACAGGCTGTTGGGGCTTGTGTTCTTTGCAATTCACAGACTTCATTAAGA +TGTGGTGCTTGCATACGTAGACCATTCTTATGTTGTAAATGCTGTTACGACCATGTCATA +TCAACATCACATAAATTAGTCTTGTCTGTTAATCCGTATGTTTGCAATGCTCCAGGTTGT +GATGTCACAGATGTGACTCAACTTTACTTAGGAGGTATGAGCTATTATTGTAAATCACAT +AAACCACCCATTAGTTTTCCATTGTGTGCTAATGGACAAGTTTTTGGTTTATATAAAAAT +ACATGTGTTGGTAGCGATAATGTTACTGACTTTAATGCAATTGCAACATGTGACTGGACA +AATGCTGGTGATTACATTTTAGCTAACACCTGTACTGAAAGACTCAAGCTTTTTGCAGCA +GAAACGCTCAAAGCTACTGAGGAGACATTTAAACTGTCTTATGGTATTGCTACTGTACGT +GAAGTGCTGTCTGACAGAGAATTACATCTTTCATGGGAAGTTGGTAAACCTAGACCACCA +CTTAACCGAAATTATGTCTTTACTGGTTATCGTGTAACTAAAAACAGTAAAGTACAAATA +GGAGAGTACACCTTTGAAAAAGGTGACTATGGTGATGCTGTTGTTTACCGAGGTACAACA +ACTTACAAATTAAATGTTGGTGATTATTTTGTGCTGACATCACATACAGTAATGCCATTA +AGTGCACCTACACTAGTGCCACAAGAGCACTATGTTAGAATTACTGGCTTATACCCAACA +CTCAATATCTCAGATGAGTTTTCTAGCAATGTTGCAAATTATCAAAAGGTTGGTATGCAA +AAGTATTCTACACTCCAGGGACCACCTGGTACTGGTAAGAGTCATTTTGCTATTGGCCTA +GCTCTCTACTACCCTTCTGCTCGCATAGTGTATACAGCTTGCTCTCATGCCGCTGTTGAT +GCACTATGTGAGAAGGCATTAAAATATTTGCCTATAGATAAATGTAGTAGAATTATACCT +GCACGTGCTCGTGTAGAGTGTTTTGATAAATTCAAAGTGAATTCAACATTAGAACAGTAT +GTCTTTTGTACTGTAAATGCATTGCCTGAGACTACAGCAGATATAGTTGTCTTTGATGAA +ATTTCAATGGCCACAAATTATGATTTGAGTGTTGTCAATGCCAGATTATGTGCTAAGCAC +TATGTGTACATTGGCGACCCTGCTCAATTACCTGCACCACGCACATTGCTAACTAAGGGC +ACACTAGAACCAGAATATTTCAATTCAGTGTGTAGACTTATGAAAACTATAGGTCCAGAC +ATGTTCCTCGGAACTTGTCGGCGTTGTCCTGCTGAAATTGTTGACACTGTGAGTGCTTTG +GTTTATGATAATAAGCTTAAAGCACATAAAGACAAATCAGCTCAATGCTTTAAAATGTTT +TATAAGGGTGTTATCACGCATGATGTTTCATCTGCAATTAACAGGCCACAAATAGGCGTG +GTAAGAGAATTCCTTACACGTAACCCTGCTTGGAGAAAAGCTGTCTTTATTTCACCTTAT +AATTCACAGAATGCTGTAGCCTCAAAGATTTTGGGACTACCAACTCAAACTGTTGATTCA +TCACAGGGCTCAGAATATGACTATGTCATATTCACTCAAACCACTGAAACAGCTCACTCT +TGTAATGTAAACAGATTTAATGTTGCTATTACCAGAGCAAAAGTAGGCATACTTTGCATA +ATGTCTGATAGAGACCTTTATGACAAGTTGCAATTTACAAGTCTTGAAATTCCACGTAGG +AATGTGGCAACTTTACAAGCTGAAAATGTAACAGGACTCTTTAAAGATTGTAGTAAGGTA +ATCACTGGGTTACATCCTACACAGGCACCTACACACCTCAGTGTTGACACTAAATTCAAA +ACTGAAGGTTTATGTGTTGACGTACCTGGCATACCTAAGGACATGACCTATAGAAGACTC +ATCTCTATGATGGGTTTTAAAATGAATTATCAAGTTAATGGTTACCCTAACATGTTTATC +ACCCGCGAAGAAGCTATAAGACATGTACGTGCATGGATTGGCTTCGATGTCGAGGGGTGT +CATGCTACTAGAGAAGCTGTTGGTACCAATTTACCTTTACAGCTAGGTTTTTCTACAGGT +GTTAACCTAGTTGCTGTACCTACAGGTTATGTTGATACACCTAATAATACAGATTTTTCC +AGAGTTAGTGCTAAACCACCGCCTGGAGATCAATTTAAACACCTCATACCGCTTATGTAC +AAAGGACTTCCTTGGAATGTAGTGCGTATAAAGATTGTACAAATGTTAAGTGACACACTT +AAAAATCTCTCTGACAGAGTCGTATTTGTCTTATGGGCACATGGCTTTGAGTTGACATCT +ATGAAGTATTTTGTGAAAATAGGACCTGAGCGCACTTGTTGTCTATGTGATAGACGTGCC +ACATGCTTTTCCACTGCTTCAGACACTTATGCCTGTTGGCATCATTCTATTGGATTTGAT +TACGTCTATAATCCGTTTATGATTGATGTTCAACAATGGGGTTTTACAGGTAACCTACAA +AGCAACCATGATCTGTATTGTCAAGTCCATGGTAATGCACATGTAGCTAGTTGTGATGCA +ATCATGACTAGGTGTCTAGCTGTCCACGAGTGTTTTGTTAAGCGTGTTGACTGGACTATT +GAATATCCTATAATTGGTGATGAACTGAAGATTAATGCGGCTTGTAGAAAGGTTCAACAC +ATGGTTGTTAAAGCTGCATTATTAGCAGACAAATTCCCAGTTCTTCACGACATTGGTAAC +CCTAAAGCTATTAAGTGTGTACCTCAAGCTGATGTAGAATGGAAGTTCTATGATGCACAG +CCTTGTAGTGACAAAGCTTATAAAATAGAAGAATTATTCTATTCTTATGCCACACATTCT +GACAAATTCACAGATGGTGTATGCCTATTTTGGAATTGCAATGTCGATAGATATCCTGCT +AATTCCATTGTTTGTAGATTTGACACTAGAGTGCTATCTAACCTTAACTTGCCTGGTTGT +GATGGTGGCAGTTTGTATGTAAATAAACATGCATTCCACACACCAGCTTTTGATAAAAGT +GCTTTTGTTAATTTAAAACAATTACCATTTTTCTATTACTCTGACAGTCCATGTGAGTCT +CATGGAAAACAAGTAGTGTCAGATATAGATTATGTACCACTAAAGTCTGCTACGTGTATA +ACACGTTGCAATTTAGGTGGTGCTGTCTGTAGACATCATGCTAATGAGTACAGATTGTAT +CTCGATGCTTATAACATGATGATCTCAGCTGGCTTTAGCTTGTGGGTTTACAAACAATTT +GATACTTATAACCTCTGGAACACTTTTACAAGACTTCAGAGTTTAGAAAATGTGGCTTTT +AATGTTGTAAATAAGGGACACTTTGATGGACAACAGGGTGAAGTACCAGTTTCTATCATT +AATAACACTGTTTACACAAAGGTTGATGGTGTTGATGTAGAATTGTTTGAAAATAAAACA +ACATTACCTGTTAATGTAGCATTTGAGCTTTGGGCTAAGCGCAACATTAAACCAGTACCA +GAGGTGAAAATACTCAATAATTTGGGTGTGGACATTGCTGCTAATACTGTGATCTGGGAC +TACAAAAGAGATGCTCCAGCACATATATCTACTATTGGTGTTTGTTCTATGACTGACATA +GCCAAGAAACCAATTGAAACGATTTGTGCACCACTCACTGTCTTTTTTGATGGTAGAGTT +GATGGTCAAGTAGACTTATTTAGAAATGCCCGTAATGGTGTTCTTATTACAGAGGGTAGT +GTTAAAGGTTTACAACCATCTGTAGGTCCCAAACAAGCTAGTCTTAATGGAGTCACATTA +ATTGGAGAAGCCGTAAAAACACAGTTCAATTATTATAAGAAAGTTGATGGTGTTGTCCAA +CAATTACCTGAAACTTACTTTACTCAGAGTAGAAATTTACAAGAATTTAAACCCAGGAGT +CAAATGGAAATTGATTTCTTAGAATTAGCTATGGATGAATTCATTGAACGGTATAAATTA +GAAGGCTATGCCTTCGAACATATCGTTTATGGAGATTTTAGTCATAGTCAGTTAGGTGGT +TTACATCTACTGATTGGACTAGCTAAACGTTTTAAGGAATCACCTTTTGAATTAGAAGAT +TTTATTCCTATGGACAGTACAGTTAAAAACTATTTCATAACAGATGCGCAAACAGGTTCA +TCTAAGTGTGTGTGTTCTGTTATTGATTTATTACTTGATGATTTTGTTGAAATAATAAAA +TCCCAAGATTTATCTGTAGTTTCTAAGGTTGTCAAAGTGACTATTGACTATACAGAAATT +TCATTTATGCTTTGGTGTAAAGATGGCCATGTAGAAACATTTTACCCAAAATTACAATCT +AGTCAAGCGTGGCAACCGGGTGTTGCTATGCCTAATCTTTACAAAATGCAAAGAATGCTA +TTAGAAAAGTGTGACCTTCAAAATTATGGTGATAGTGCAACATTACCTAAAGGCATAATG +ATGAATGTCGCAAAATATACTCAACTGTGTCAATATTTAAACACATTAACATTAGCTGTA +CCCTATAATATGAGAGTTATACATTTTGGTGCTGGTTCTGATAAAGGAGTTGCACCAGGT +ACAGCTGTTTTAAGACAGTGGTTGCCTACGGGTACGCTGCTTGTCGATTCAGATCTTAAT +GACTTTGTCTCTGATGCAGATTCAACTTTGATTGGTGATTGTGCAACTGTACATACAGCT +AATAAATGGGATCTCATTATTAGTGATATGTACGACCCTAAGACTAAAAATGTTACAAAA +GAAAATGACTCTAAAGAGGGTTTTTTCACTTACATTTGTGGGTTTATACAACAAAAGCTA +GCTCTTGGAGGTTCCGTGGCTATAAAGATAACAGAACATTCTTGGAATGCTGATCTTTAT +AAGCTCATGGGACACTTCGCATGGTGGACAGCCTTTGTTACTAATGTGAATGCGTCATCA +TCTGAAGCATTTTTAATTGGATGTAATTATCTTGGCAAACCACGCGAACAAATAGATGGT +TATGTCATGCATGCAAATTACATATTTTGGAGGAATACAAATCCAATTCAGTTGTCTTCC +TATTCTTTATTTGACATGAGTAAATTTCCCCTTAAATTAAGGGGTACTGCTGTTATGTCT +TTAAAAGAAGGTCAAATCAATGATATGATTTTATCTCTTCTTAGTAAAGGTAGACTTATA +ATTAGAGAAAACAACAGAGTTGTTATTTCTAGTGATGTTCTTGTTAACAACTAAACGAAC +AATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGTCATGCCGCTGTT +TAATCTTATAACTACAACTCAATCATACACTAATTTCACACGTGGTGTTTATTACCCTGA +CAAAGTTTTCAGATCCTCAGTTTTACATTTAACTCAGGACTTGTTCTTACCTTTCTTTTC +CAATGTTACTTGGTTCCATGCTATCTCTGGGACCAATGGTACTAAGAGGTTTGATAACCC +TGTCCTACCATTTAATGATGGTGTTTATTTTGCTTCCACTGAGAAGTCTAACATAATAAG +AGGCTGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCCCTACTTATTGTTAATAA +CGCTACTAATGTTTTTATTAAAGTCTGTGAATTTCAATTTTGTAATGATCCATTTTTGGA +TGTTTACCACAAAAACAACAAAAGTTGGATGGAAAGTGAGTCAGGAGTTTATTCTAGTGC +GAATAATTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATGGACCTTGAAGGAAAACA +GGGTAATTTCAAAAATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTTATTTTAAAAT +ATATTCTAAGCACACGCCTATTATAGGGCGTGATTTCCCTCAGGGTTTTTCGGCTTTAGA +ACCATTGGTAGATTTGCCAATAGGTATTAACATCACTAGGTTTCAAACTTTACTTGCTTT +AAATAGAAGTTATTTGACTCCTGGTGATTCTTCTTCAGGTTGGACAGCTGGTGCTGCAGA +TTATTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTAAAATATAATGAAAATGGAAC +CATTACAGATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAGTGTACGTTGAA +ATCCTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGTCCAACCAACAGA +ATCTATTGTTAGATTTCCTAATGTTACAAACTTGTGCCCTTTTCATGAAGTTTTTAACGC +CACCAGATTTGCATCTGTTTATGCTTGGAACAGGACGAGAATCAGCAACTGTGTTGCTGA +TTATTCTGTCCTATATAATTTCGCACCATTTTTCGCTTTTAAGTGTTATGGAGTGTCTCC +TACTAAATTAAATGATCTCTGCTTTACTAATGTCTATGCAGATTCATTTGTAATTAAAGG +TAATGAAGTCAGCCAAATCGCTCCAGGGCAAACTGGAAATATTGCTGATTATAATTATAA +ATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATTCTAACAAGCTTGATTCTAA +GCATAGTGGTAATTATGATTACTGGTATAGATCGCTTAGGAAGTCTAAACTCAAACCTTT +TGAGAGAGATATTTCAACTGAAATCTATCAGGCCGGTAACAAACCTTGTAAAGGTAAAGG +TCCTAATTGTTACTTTCCTTTAGAATCATATGGTTTCCGACCCACTTATGGTGTTGGTCA +CCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATGCACCAGCAACTGTTTG +TGGACCTAAAAAGTCTACTAATTTGGTTAAAAACAAATGTGTCAATTTCAACTTCAATGG +TTTAACAGGCACAGGTGTTCTTACTAAGTCTAACAAAAAGTTTCTGCCTTTCCAACAATT +TGGCAGAGACATTGTTGACACTACTGATGCTGTCCGTGATCCACAGACACTTGAGATTCT +TGACATTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCAGGAACAAATACTTC +TAACCAGGTTGCTGTTCTTTATCAGGGTGTTAACTGCACAGAAGTCTCTGTTGCTATTCA +TGCAGATCAACTTACTCCTACTTGGCGTGTTTATTCTACAGGTTCTAATGTTTTTCAAAC +ACGTGCAGGCTGTTTAATAGGGGCTGAATATGTCAACAACTCATATGAGTGTGACATACC +CATTGGTGCAGGTATATGCGCTAGTTATCAGACTCAGACTAAGTCTCGTCGGCGGGCACG +TAGTGTAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTGGTGCAGAAAATTCAGT +TGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTATTAGTGTTACCACAGA +AATTCTACCAGTGTCTATGACCAAGACATCAGTAGATTGTACAATGTACATTTGTGGTGA +TTCAACTGAATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGTACACAATTAAAACG +TGCTTTAACTGGAATAGCTGTTGAACAAGACAAAAACACCCAAGAAGTTTTTGCACAAGT +CAAACAAATTTACAAAACACCACCAATTAAATATTTTGGTGGTTTTAATTTTTCACAAAT +ATTACCAGATCCATCAAAACCAAGCAAGAGGTCATTTATTGAAGATCTACTTTTCAACAA +AGTGACACTTGCAGATGCTGGCTTCATCAAACAATATGGTGATTGCCTTGGTGATATTGC +TGCTAGAGACCTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTTTTGCCACCTTTGCT +CACAGATGAAATGATTGCTCAATACACTTCTGCACTGTTAGCGGGTACAATCACTTCTGG +TTGGACCTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATGCAAATGGCTTATAG +GTTTAATGGTATTGGAGTTACACAGAATGTTCTCTATGAGAACCAAAAATTGATTGCCAA +CCAATTTAATAGTGCTATTGGCAAAATTCAAGACTCACTTTTTTCCACAGCAAGTGCACT +TGGAAAACTTCAAGATGTGGTCAACCATAATGCACAAGCTTTAAACACGCTTGTTAAACA +ACTTAGCTCCAAATTTGGTGCAATTTCAAGTGTTTTAAATGATATCCTTTCACGTCTTGA +CAAAGTTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGACTTCAAAGTTTGCA +GACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGCTTCTGCTAATCTTGC +TGCTACTAAAATGTCAGAGTGTGTACTTGGACAATCAAAAAGAGTTGATTTTTGTGGAAA +GGGCTATCATCTTATGTCCTTCCCTCAGTCAGCACCTCATGGTGTAGTCTTCTTGCATGT +GACTTATGTCCCTGCACAAGAAAAGAACTTCACAACTGCTCCTGCCATTTGTCATGATGG +AAAAGCACACTTTCCTCGTGAAGGTGTCTTTGTTTCAAATGGCACACACTGGTTTTTAAC +ACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACACATTTGTGTCTGGTAA +CTGTGATGTTGTAATAGGAATTGTCAACAACACAGTTTATGATCCTTTGCAACTTGAATT +AGATTCATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACATCACCAGATGTTGA +TTTAGGTGACATCTCTGGCATTAATGCTTCAGTTGTAAACATTCAAAAAGAAATTGACCG +CCTCAATGAGGTTGCCAAGAATTTAAATGAATCTCTCATCGATCTCCAAGAACTTGGAAA +GTATGAGCAGTATATAAAATGGCCATGGTATATTTGGCTAGGTTTTATAGCTGGCTTGAT +TGCCATAGTAATGGTGACAATTATGCTTTGCTGTATGACCAGTTGCTGTAGTTGTCTCAA +GGGCTGTTGTTCTTGTGGATCCTGCTGCAAATTTGATGAAGACGACTCTGAGCCAGTGCT +CAAAGGAGTCAAATTACATTACACATAAACGAACTTATGGATTTGTTTATGAGAATCTTC +ACAATTGGAACTGTAACTTTGAAGCAAGGTGAAATCAAGGATGCTACTCCTTCAGATTTT +GTTCGCGCTACTGCAACGATACCGATACAAGCCTCACTCCCTTTCGGATGGCTTATTGTT +GGCGTTGCACTTCTTGCTGTTTTTCAGAGCGCTTCCAAAATCATAACTCTCAAAAAGAGA +TGGCAACTAGCACTCTCCAAGGGTGTTCACTTTGTTTGCAACTTGCTGTTGTTGTTTGTA +ACAGTTTACTCACACCTTTTGCTCGTTGCTGCTGGCCTTGAAGCCCCTTTTCTCTATCTT +TATGCTTTAGTCTACTTCTTGCAGAGTATAAACTTTGTAAGAATAATAATGAGGCTTTGG +CTTTGCTGGAAATGCCGTTCCAAAAACCCATTACTTTATGATGCCAACTATTTTCTTTGC +TGGCATACTAATTGTTACGACTATTGTATACCTTACAATAGTGTAACTTCTTCAATTGTC +ATTACTTCAGGTGATGGCACAACAAGTCCTATTTCTGAACATGACTACCAGATTGGTGGT +TATACTGAAAAATGGGAATCTGGAGTAAAAGACTGTGTTGTATTACACAGTTACTTCACT +TCAGACTATTACCAGCTGTACTCAACTCAATTGAGTACAGACATTGGTGTTGAACATGTT +ACCTTCTTCATCTACAATAAAATTGTTGATGAGCCTGAAGAACATGTCCAAATTCACACA +ATCGACGGTTCATCCGGAGTTGTTAATCCAGTAATGGAACCAATTTATGATGAACCGACG +ACGACTACTAGCGTGCCTTTGTAAGCACAAGCTGATGAGTACGAACTTATGTACTCATTC +GTTTCGGAAGAGATAGGTACGTTAATAGTTAATAGCGTACTTCTTTTTCTTGCTTTCGTG +GTATTCTTGCTAGTTACACTAGCCATCCTTACTGCGCTTCGATTGTGTGCGTACTGCTGC +AATATTGTTAACGTGAGTCTTGTAAAACCTTCTTTTTACGTTTACTCTCGTGTTAAAAAT +CTGAATTCTTCTAGAGTTCCTGATCTTCTGGTCTAAACGAACTAAATATTATATTAGTTT +TTCTGTTTGGAACTTTAATTTTAGCCATGGCACATTCCAACGGTACTATTACCGTTGAAG +AGCTTAAAAAGCTCCTTGAAGAATGGAACCTAGTAATAGGTTTCCTATTCCTTGCATGGA +TTTGTCTTCTACAATTTGCCTATGCCAACAGGAATAGGTTTTTGTATATAATTAAGTTAA +TTTTTCTCTGGCTGTTATGGCCAGTAACTTTAACTTGTTTTGTGCTTGCTGCTGTTTACA +GAATAAATTGGATCACCGGTGGAATTGCTATCGCAATGGCTTGTCTTGTAGGCTTGATGT +GGCTCAGCTACTTCATTGCTTCTTTCAGACTGTTTGTGCGTACGCGTTCCATGTGGTCAT +TTAATCCAGAAACTAACATTCTTCTCAACGTGCCACTCCATGGCACTATTCTGACCAGAC +CGCTTCTAGAAAGTGAACTCGTAATCGGAGCTGTGATCCTTCGTGGACATCTTCGTATTG +CTGGACACCATCTAGGACGCTGTGACATCAAGGACCTGCCTAAAGAAATCACTGTTGCTA +CATCACGAACGCTTTCTTATTACAAATTGGGAGCTTCGCAGCGTGTAGCAGGTGACTCAG +GTTTTGCTGCATACAGTCGCTACAGGATTGGCAACTATAAATTAAACACAGACCATTCCA +GTAGCAGTGACAATATTGCTTTGCTTGTACAGTAAGTGACAACAGATGTTTCATCTCGTT +GACTTTCAGGTTACTATAGCAGAGATATTACTAATTATTATGCGGACTTTTAAAGTTTCC +ATTTGGAATCTTGATTACATCATAAACCTCATAATTAAAAATTTATCTAAGTCACTAACT +GAGAATAAATATTCTCAATTAGATGAAGAGCAACCAATGGAGATTCTCTAAACGAACATG +AAAATTATTCTTTTCTTGGCACTGATAACACTCGCTACTTGTGAGCTTTATCACTACCAA +GAGTGTGTTAGAGGTACAACAGTACTTTTAAAAGAACCTTGCTCTTCTGGAACATACGAG +GGCAATTCACCATTTCATCCTCTAGCTGATAACAAATTTGCACTGACTTGCTTTAGCACT +CAATTTGCTTTTGCTTGTCCTGACGGCGTAAAACACGTCTATCAGTTACGTGCCAGATCA +GTTTCACCTAAACTGTTCATCAGACAAGAGGAAGTTCAAGAACTTTACTCTCCAATTTTT +CTTATTGTTGCGGCAATAGTGTTTATAACACTTTGCTTCACACTCAAAAGAAAGACAGAA +TGATTGAACTTTCATTAATTGACTTCTATTTGTGCTTTTTAGCCTTTCTGTTACTCCTTG +TTTTAATTATGCTTATTATCTTTTGGTTCTCACTTGAACTGCAAGATCATAATGAAACTT +GTCACGCCTAAACGAACATGAAATTTCTTGTTTTCTTAGGAATCATCACAACTGTAGCTG +CATTTCACCAAGAATGTAGTTTACAGTCATGTACTCAACATCAACCATATGTAGTTGATG +ACCCGTGTCCTATTCACTTCTATTCTAAATGGTATATTAGAGTAGGAGCTAGAAAATCAG +CACCTTTAATTGAATTGTGCGTGGATGAGGCTGGTTCTAAATCACCCATTCAGTACATCG +ATATCGGTAATTATACAGTTTCCTGTTTACCTTTTACAATTAATTGCCAGGAACCTAAAT +TGGGTAGTCTTGTAGTGCGTTGTTCGTTCTATGAAGACTTTTTAGAGTATCATGACGTTC +GTGTTGTTTTAGATTTCATCTAAACGAACAAACTTAAATGTCTGATAATGGACCCCAAAA +TCAGCGAAATGCACTCCGCATTACGTTTGGTGGACCCTCAGATTCAACTGGCAGTAACCA +GAATGGTGGGGCGCGATCAAAACAACGTCGGCCCCAAGGTTTACCCAATAATACTGCGTC +TTGGTTCACCGCTCTCACTCAACATGGCAAGGAAGACCTTAAATTCCCTCGAGGACAAGG +CGTTCCAATTAACACCAATAGCAGTCCAGATGACCAAATTGGCTACTACCGAAGAGCTAC +CAGACGAATTCGTGGTGGTGACGGTAAAATGAAAGATCTCAGTCCAAGATGGTATTTCTA +CTACCTAGGAACTGGGCCAGAAGCTGGACTTCCCTATGGTGCTAACAAAGACGGCATCAT +ATGGGTTGCAACTGAGGGAGCCTTGAATACACCAAAAGATCACATTGGCACCCGCAATCC +TGCTAACAATGCTGCAATCGTGCTACAACTTCCTCAAGGAACAACATTGCCAAAAGGCTT +CTACGCAGAAGGGAGCAGAGGCGGCAGTCAAGCCTCTTCTCGTTCCTCATCACGTAGTCG +CAACAGTTCAAGAAATTCAACTCCAGGCAGCAGTAAACGAACTTCTCCTGCTAGAATGGC +TGGCAATGGCGGTGATGCAGCTCTTGCTTTGCTGCTGCTTGACAGATTGAACAAGCTTGA +GAGCAAAATGTCTGGTAAAGGCCAACAACAACAAGGCCAAACTGTCACTAAGAAATCTGC +TGCTGAGGCTTCTAAGAAGCCTCGGCAAAAACGTACTGCCACTAAAGCATACAATGTAAC +ACAAGCTTTCGGCAGACGTGGTCCAGAACAAACCCAAGGAAATTTTGGGGACCAGGAACT +AATCAGACAAGGAACTGATTACAAACATTGGCCGCAAATTGCACAATTTGCCCCCAGCGC +TTCAGCGTTCTTCGGAATGTCGCGCATTGGCATGGAAGTCACACCTTTGGGAACGTGGTT +GACCTACACAGGTGCCATCAAATTGGATGACAAAGATCCAAATTTCAAAGATCAAGTCAT +TTTGCTGAATAAGCATATTGACGCATACAAAACATTCCCACCAACAGAGCCTAAAAAGGA +CAAAAAGAAGAAGGCTGATGAAACTCAAGCCTTACCGCAGAGACAGAAGAAACAGCAAAC +TGTGACTCTTCTTCCTGCTGCAGATTTGGATGATTTCTCCAAACAATTGCAACAATCCAT +GAGCCGTGCTGACTCAACTCAGGCCTAAACTCATGCAGACCACACAAGGCAGATGGGCTA +TATAAACGTTTTCGCTTTTCCGTTTACGATATATAGTCTACTCTTGTGCAGAATGAATTC +TCGTAACTACATAGCACAAGTAGATGTAGTTAACTTTAATCTCACATAGCAATCTTTAAT +CAGTGTGTAACATTAGGGAGGACTTGAAAGTGCCACCACATTTTCACCTACAGTGAACAA +TGCTAGGGAGAGCTGCCTATATGGAAGAGCCCTAATGTGTAAAATTA diff --git a/tests/data/test_sequences_updated.fasta b/tests/data/test_sequences_updated.fasta new file mode 100644 index 0000000..c4d1228 --- /dev/null +++ b/tests/data/test_sequences_updated.fasta @@ -0,0 +1,1485 @@ +>USA/VA-CDC-LC1109961/2024 +AACGAACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGT +ATAATTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAG +GCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGGG +TGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCC +AACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCG +TGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAGATGGCACTTGTGGCTTAGTAG +AAGTTGAAAAAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGG +ATGCTCGAACTGCACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCA +TTCAGTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGGCGAAATAC +CAGTGGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATAAAGGAGCTGGTGGCCATA +GGTACGGCGCCGATCTAAAGTCATTTGACTTAGGCGACGAGCTTGGCACTGATCCTTATG +AAGATTTTCAAGAAAACTGGAACACTAAACATAGCAGTGGTGTTACCCGTGAACTCATGC +GTGAGCTTAACGGAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGGCCCTGATG +GCTACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGATTCATGCACTT +TGTCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTATACTGCTGCCGTGAACATG +AGCATGAAATTGCTTGGTACACGGAACGTTCTGAAAAGAGCTATGAATTGCAGACACCTT +TTGAAATTAAATTGGCAAAGAAATTTGACACCTTCAATGGGGAATGTCCAAATTTTGTAT +TTCCCTTAAATTCCATAATCAAGACTATTCAACCAAGGGTTGAAAAGAAAAAGCTTGATG +GCTTTATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCACCAAATGAATGCAACCAAA +TGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAACTTCATGGCAGACGGGCG +ATTTTGTTAAAGCCACTTGCGAATTTTGTGGCACTGAGAATTTGACTAAAGAAGGTGCCA +CTACTTGTGGTTACTTACCCCAAAATGCTGTTGTTAAAATTTATTGTCCAGCATGTCACA +ATTCAGAAGTAGGACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAAAA +CCATTCTTCGTAAGGGTGGTCGCACTATTGCCTTTGGAGGCTGTGTGTTCTCTTATGTTG +GTTGCCATAACAAGTGTGCCTATTGGGTTCCACGTGCTAGCGCTAACATAGGTTGTAACC +ATACAGGTGTTGTTGGAGAAGGTTCCGAAGGTCTTAATGACAACCTTCTTGAAATACTCC +AAAAAGAGAAAGTCAACATCAATATTGTTGGTGACTTTAAACTTAATGAAGAGATCGCCA +TTATTTTGGCATCTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTTGG +ATTATAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTACAAAAGGAA +AAGCTAAAAAAGGTGCCTGGAATATTGGTGAACAGAAATCAATACTGAGTCCTCTTTATG +CATTTGCATCAGAGGCTGCTCGTGTTGTACGATCAATTTTCTCCCGCACTCTTGAAACTG +CTCAAAATTCTGTGCGTGTTTTACAGAAGGCCGCTATAACAATACTAGATGGAATTTCAC +AGTATTCACTGAGACTCATTGATGCTATGATGTTCACATCTGATTTGGCTACTAACAATC +TAGTTGTAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTGGCTAACTA +ACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTGATTGGCTTGAAGAGAAGT +TTAAGGAAGGTGTAGAGTTTCTTAGAGACGGTTGGGAAATTGTTAAATTTATCTCAACCT +GTGCTTGTGAAATTGTCGGTGGACAAATTGTCACCTGTGCAAAGGAAATTAAGGAGAGTG +TTCAGACATTCTTTAAGCTTGTAAATAAATTTTTGGCTTTGTGTGCTGACTCTATCATTA +TTGGTGGAGCTAAACTTAAAGCCTTGAATTTAGGTGAAACATTTGTCACGCACTCAAAGG +GATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCCTACTCATGCCTCTAAAAG +CCCCAAAAGAAATTATCTTCTTAGAGGGAGAAACACTTCCCACAGAAGTGTTAACAGAGG +AAGTTGTCTTGAAAACTGGTGATTTACAACCATTAGAACAACCTACTAGTGAAGCTGTTG +AAGCTCCATTGGTTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAAAG +ACACAGAAAAGTACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATACCTTCACAC +TCAAAGGCGGTGCACCAACAAAGGTTACTTTTGGTGATGACACTGTGATAGAAGTGCAAG +GTTACAAGAGTGTGAATATCATTTTTGAACTTGATGAAAGGATTGATAAAGTACTTAATG +AGAAGTGCTCTGCCTATACAGTTGAACTCGGTACAGAAGTAAATGAGTTCGCCTGTGTTG +TGGCAGATGCTGTCATAAAAACTTTGCAACCAGTATCTGAATTACTTACACCACTGGGCA +TTGATTTAGATGAGTGGAGTATGGCTACATACTACTTATTTGATGAGTCTGGTGAGTTTA +AATTGGCTTCACATATGTATTGTTCTTTTTACCCTCCAGATGAGGATGAAGAAGAAGGTG +ATTGTGAAGAAGAAGAGTTTGAGCCATCAACTCAATATGAGTATGGTACTGAAGATGATT +ACCAAGGTAAACCTTTGGAATTTGGTGCCACTTCTGCTGCTCTTCAACCTGAAGAAGAGC +AAGAAGAAGATTGGTTAGATGATGATAGTCAACAAACTGTTGGTCAACAAGACGGCAGTG +AGGACAATCAGACAACTACTATTCAAACAATTGTTGAGGTTCAACCTCAATTAGAGATGG +AACTTACACCAGTTGTTCAGACTATTGAAGTGAATAGTTTTAGTGGTTATTTAAAACTTA +CTGACAATGTATACATTAAAAATGCAGACATTGTGGAAGAAGCTAAAAAGGTAAAACCAA +CATTGGTTGTTAATGCAGCCAATGTTTACCTTAAACATGGAGGAGGTGTTGCAGGAGCCT +TAAATAAGGCTACTAACAATGCCATGCAAGTTGAATCTGATGATTACATAGCTACTAATG +GACCACTTAAAGTGGGCGGTAGTTGTGTTTTAAGCGGACACAATCTTGCTAAACACTGTC +TTCATGTTGTCGGCCCAAATGTTAACAAAGGTGAAGACATTCAACTTCTTAAGAGTGCTT +ATGAAAATTTTAATCAGCACGAAGTTCTACTTGCACCATTATTATCAGCTGGTATTTTTG +GTGCTGACCCTATACATTCTTTAAGAGTTTGTGTAGATACTGTTCGCACAAATGTCTACT +TAGCTGTCTTTGATAAAAATCTCTATGACAAACTTGTTTCAAGCTTTTTGGAAATGAAGA +GTGAAAAGCAAGTTGAACAAAAGATCGCTGAGATTCCTAAAGAGGAAGTTAAGCCATTTA +TAACTGAAAGTAAACCTTCAGTTGAACAGAGAAAACAAGATGATAAGAAAATCAAAGCTT +GTGTTGAAGAAGTTACAACAACTCTGGAAGAAACTAAGTTCCTCACAGAAAACTTGTTAC +TTTATATTGACATTAATGGCAATCTTCATCCAGATTCTGCCACTCTTGTTAGTGACATTG +ACATCACTTTCTTAAAGAAAGATGCTCCATATATAGTGGGTGATGTTGTTCAAGAGGGTG +TTTTAACTGCTGTGGTTATACCTACTAAAAAGGCTAGTGGCACTACTGAAATGCTAGCGA +AAGCTTTGAGAAAAGTGCCAACAGACAATTATATAACCACTTACCCGGGTCAGGGTTTAA +ATGGTTACACTGTAGAGGAGGCAAAGACAGTGCTTAAAAAGTGTAAAAGTGCTTTTTACA +TTCTACCATCTATTATCTCTAATGAGAAGCAAGAAATTCTTGGAACTGTTTCTTGGAATT +TGCGAGAAATGCTTGCACATGCAGAAGAAACACGCAAATTAATGCCTGTCTGTGTGGAAA +CTAAAGCCATAGTTTCAACTATACAGCGTAAATATAAGGGTATTAAAATACAAGAGGGTG +TGGTTGATTATGGTGCTAGATTTTACTTTTACACCAGTAAAACAACTGTAGCGTCACTTA +TCAACACACTTAACGATCTAAATGAAACTCTTGTTACAATGCCACTTGGCTATGTAACAC +ATGGCTTAAATTTGGAAGAAGCTGCTCGGTATATGAGATCTCTCAAAGTGCCAGCTACAG +TTTCTGTTTCTTCACCTGATGCTGTTACAGCGTATAATGGTTATCTTACTTCTTCTTCTA +AAACACCTGAAGAACATTTTATTGAAACCATCTCACTTGCTGGTTCCTATAAAGATTGGT +CCTATTCTGGACAATCTACACAACTAGGTATAGAATTTCTTAAGAGAGGTGATAAAAGTG +TATATTACACTAGTAATCCTACCACATTCCACCTAGATGGTGAAGTTATCACCTTTGACA +ATCTTAAGACACTTCTTTCTTTGAGAGAAGTGAGGACTATTAAGGTGTTTACAACAGTAG +ACAACATTAACCTCCACACGCAAGTTGTGGACATGTCAATGACATATGGACAACAGTTTG +GTCCAACTTATTTGGATGGAGCTGATGTTACTAAAATAAAACCTCATAATTCACATGAAG +GTAAAACATTTTATGTTTTACCTAATGATGACACTCTACGTGTTGAGGCTTTTGAGTACT +ACCACACAACTGATCCTAGTTTTCTGGGTAGGTACATGTCAGCATTAAATCACACTAAAA +AGTGGAAATACCCACAAGTTAATGGTTTAACTTCTATTAAATGGGCAGATAACAACTGTT +ATCTTGCCACTGCATTGTTAACACTCCAACAAATAGAGTTGAAGTTTAATCCACCTGCTC +TACAAGATGCTTATTACAGAGCAAGGGCTGGTGAAGCTGCTAACTTTTGTGCACTTATCT +TAGCCTACTGTAATAAGACAGTAGGTGAGTTAGGTGATGTTAGAGAAACAATGAGTTACT +TGTTTCAACATGCCAATTTAGATTCTTGCAAAAGAGTCTTGAACGTGGTGTGTAAAACTT +GTGGACAACAGCAGACAACCCTTAAGGGTGTAGAAGCTGTTATGTACATGGGCACACTTT +CTTATGAACAATTTAAGAAAGGTGTTCAGATACCTTGTACGTGTGGTAAACAAGCTACAA +AATATCTAGTACAACAGGAGTCACCTTTTGTTATGATGTCAGCACCACCTGCTCAGTATG +AACTTAAGCATGGTACATTTACTTGTGCTAGTGAGTACACTGGTAATTACCAGTGTGGTC +ACTATAAACATATAACTTCTAAAGAAACTTTGTATTGCATAGACGGTGCTTTACTTACAA +AGTCCTCAGAATACAAAGGTCCTATTACGGATGTTTTCTACAAAGAAAACAGTTACACAA +CAACCATAAAACCAGTTACTTATAAATTGGATGGTGTTGTTTGTACAGAAATTGACCCTA +AGTTGGACAATTATTATAAGAAAGACAATTCTTATTTCACAGAGCAACCAATTGATCTTG +TACCAAACCAACCATATCCAAACGCAAGCTTCGATAATTTTAAGTTTGTATGTGATAATA +TCAAATTTGCTGATGATTTAAACCAGTTAACTGGTTATAAGAAACCTGCTTCAAGAGAGC +TTAAAGTTACATTTTTCCCTGACTTAAATGGTGATGTGGTGGCTATTGATTATAGACACT +ACACACCCTCTTTTAAGAAAGGAGCTAAATTGTTACATAAACCTATTGTTTGGCATGTTA +ACAATGCAACTAATAAAGCCACGTATAAACCAAATACCTGGTGTATACGTTGTCTTTGGA +GCACAAAACCAGTTGAAACATCAAATTCGTTTGATGTACTGAAGTCAGAGGACGCGCAGG +GAATGGATAATCTTGCCTGCGAAGATCTAAAACCAGTCTCTGAAGAAGTAGTGGAAAATC +CTACCATACAGAAAGACGTTCTTGAGTGTAATGTGAAAACTACCGAAGTTGTAGGAGACA +TTATACTTAAACCAGCAAATAATAGTTTAAAAATTACAGAAGAGGTTGGCCACACAGATC +TAATGGCTGCTTATGTAGACAATTCTAGTCTTACTATTAAGAAACCTAATGAATTATCTA +GAGTATTAGGTTTGAAAACCCTTGCTACTCATGGTTTAGCTGCTGTTAATAGTGTCCCTT +GGGATACTATAGCTAATTATGCTAAGCCTTTTCTTAACAAAGTTGTTAGTACAACTACTA +ACATAGTTACACGGTGTTTAAACCGTGTTTGTACTAATTATATGCCTTATTTCTTTACTT +TATTGCTACAATTGTGTACTTTTACTAGAAGTACAAATTCTAGAATTAAAGCATCTATGC +CGACTACTATAGCAAAGAATACTGTTAAGAGTGTCGGTAAATTTTGTCTAGAGGCTTCAT +TTAATTATTTGAAGTCACCTAATTTTTCTAAACTGATAAATATTATAATTTGGTTTTTAC +TATTAAGTGTTTGCCTAGGTTCTTTAATCTACTCAACCGCTGCTTTAGGTGTTTTAATGT +CTAATTTAGGCATGCCTTCTTACTGTACTGGTTACAGAGAAGGCTATTTGAACTCTACTA +ATGTCACTATTGCAACCTACTGTATTGGTTCTATACCTTGTAGTGTTTGTCTTAGTGGTT +TAGATTCTTTAGACACCTATCCTTCTTTAGAAACTATACAAATTACCATTTCATCTTTTA +AATGGGATTTAACTGCTTTTGGCTTAGTTGCAGAGTGGTTTTTGGCATATATTCTTTTCA +CTAGGTTTTTCTATGTACTTGGATTGGCTGCAATCATGCAATTGTTTTTCAGCTATTTTG +CAGTACATTTTATTAGTAATTCTTGGCTTATGTGGTTAATAATTAATCTTGTACAAATGG +CCCCGATTTCAGCTATGGTTAGAATGTACATCTTCTTTGCATCATTTTATTATGTATGGA +AAAGTTATGTGCATGTTGTAGACGGTTGTAATTCATCAACTTGTATGATGTGTTACAAAC +GTAATAGAGCAACAAGAGTCGAATGTACAACTATTGTTAATGGTGTTAGAAGGTCCTTTT +ATGTCTATGCTAATGGAGGTAAAGGCTTTTGCAAACTACACAATTGGAATTGTGTTAATT +GTGATACATTCTGTGCTGGTAGTACATTTATTAGTGATGAAGTTGCGAGAGACTTGTCAC +TACAGTTTAAAAGACCAATAAATCCTACTGACCAGTCTTCTTACATCGTTGATAGTGTTA +CAGTGAAGAATGGTTCCATCCATCTTTACTTTGATAAAGCTGGTCAAAAGACTTATGAAA +GACATTCTCTCTCTCATTTTGTTAACTTAGACAGCCTGAGAGCTAATAACACTAAAGGTT +CATTGCCTATTAATGTTATAGTTTTTGATGGTAAATCAAAATGTGAAGAATCATCTGCAA +AATCAGCGTCTGTTTACTACAGTCAGCTTATGTGTCAACCTATACTGTTACTAGATCAGG +CATTAGTGTCTGATGTTGGTGATAGTGCGGAAGTTGCAGTTAAAATGTTTGATGCTTACG +TTAATACGTTTTCATCAACTTTTAACGTACCAATGGAAAAACTCAAAACACTAGTTGCAA +CTGCAGAAGCTGAACTTGCAAAGAATGTGTCCTTAGACAATGTCTTATCTACTTTTATTT +CAGCAGCTCGGCAAGGGTTTGTTGATTCAGATGTAGAAACTAAAGATGTTGTTGAATGTC +TTAAATTGTCACATCAATCTGACATAGAAGTTACTGGCGATAGTTGTAATAACTATATGC +TCACTTATAACAAAGTTGAAAACATGACACCCCGTGACCTTGGTGCTTGTATTGACTGTA +GTGCGCGTCATATTAATGCGCAGGTAGCAAAAAGTCACAACATTACTTTGATATGGAACG +TTAAAGATTTCATGTCATTGTCTGAACAACTACGAAAACAAATACGTAGTGCTGCTAAAA +AGAATAACTTACCTTTTAAGTTGACATGTGCAACTACTAGACAAGTTGTTAATGTTGTAA +CAACAAAGATAGCACTTAAGGGTGGTAAAATTGTTAATAATTGGTTGAAGCAGTTAATTA +AAGTTACACTTGTGTTCCTTTTTGTTGCTGCTATTTTCTATTTAATAACACCTGTTCATG +TCATGTCTAAACATACTGACTTTTCAAGTGAAATCATAGGATACAAGGCTATTGATGGTG +GTGTCACTCGTGACATAGCATCTACAGATACTTGTTTTGCTAACAAACATGCTGATTTTG +ACACATGGTTTAGCCAGCGTGGTGGTAGTTATACTAATGACAAAGCTTGCCCATTGATTG +CTGCAGTCATAACAAGAGAAGTGGGTTTTGTCGTGCCTGGTTTGCCTGGCACGATATTAC +GCACAACTAATGGTGACTTTTTGCATTTCTTACCTAGAGTTTTTAGTGCAGTTGGTAACA +TCTGTTACACACCATCAAAACTTATAGAGTACACTGACTTTGCAACATCAGCTTGTGTTT +TGGCTGCTGAATGTACAATTTTTAAAGATGCTTCTGGTAAGCCAGTACCATATTGTTATG +ATACCAATGTACTAGAAGGTTCTGTTGCTTATGAAAGTTTACGCCCTGACACACGTTATG +TGCTCATGGATGGCTCTATTATTCAATTTCCTAACACCTACCTTGAAGGTTCTGTTAGAG +TGGTAACAACTTTTGATTCTGAGTACTGTAGGCACGGCACTTGTGAAAGATCAGAAGCTG +GTGTTTGTGTATCTACTAGTGGTAGATGGGTACTTAACAATGATTATTACAGATCTTTAC +CAGGAGTTTTCTGTGGTGTAGATGCTGTAAATTTATTTACTAATATGTTTACACCACTAA +TTCAACCTATTGGTGCTTTGGACATATCAGCATCTATAGTAGCTGGTGGTATTGTGGCTA +TCGTAGTAACATGCCTTGCCTACTATTTTATGAGGTTTAGAAGAGCTTTTGGTGAATACA +GTCATGTAGTTGCCTTTAATACTTTACTATTCCTTATGTCATTCATTGTACTCTGTTTAA +CACCAGTTTACTCATTCTTACCTGGTGTTTATTCTGTTATTTACTTGTACTTGACATTTT +ATCTTACTAATGATGTTTCTTTTTTAGCACATATTCAGTGGATGGTTATGTTCACACCTT +TAGTACCTTTCTGGATAACAATTGCTTATATCATTTGTATTTCCACAAAGCATTTCTATT +GGTTCTTTAGTAATTACCTAAAGAGACGTGTAGTCTTTAATGGTGTTTCCTTTAGTACTT +TTGAAGAAGCTGCGCTGTGCACCTTTTTGTTAAATAAAGAAATGTATCTAAAGTTGCGTA +GTGATGTGCTATTACCTCTTACGCAATATAATAGATACTTAGCTCTTTATAATAAGTACA +AGTATTTTAGTGGAGCAATGGATACAACTAGCTACAGAGAAGCTGCTTGTTGTCATCTCG +CAAAGGCTCTCAATGACTTCAGTAACTCAGGTTCTGATGTTCTTTACCAACCACCACAAA +TCTCTATCACCTCAGCTGTTTTGCAGAGTGGTTTTAGAAAAATGGCATTCCCATCTGGTA +AAGTTGAGGGTTGTATGGTACAAGTAACTTGTGGTACAACTACACTTAACGGTCTTTGGC +TTGATGACGTAGTTTACTGTCCAAGACATGTGATCTGCACCTCTGAAGATATGCTTAACC +CTAATTATGAAGATTTACTCATTCGTAAGTCTAATCATAATTTCTTGGTACAGGCTGGTA +ATGTTCAACTCAGGGTTATTGGACATTCTATGCAAAATTGTGTACTTAAGCTTAAGGTTG +ATACAGCCAATCCTAAGACACCTAAGTATAAGTTTGTTCGCATTCAACCAGGACAGACTT +TTTCAGTGTTAGCTTGTTACAATGGTTCACCATCTGGTGTTTACCAATGTGCTATGAGAC +ACAATTTCACTATTAAGGGTTCATTCCTTAATGGTTCATGTGGTAGTGTTGGTTTTAACA +TAGATTATGACTGTGTCTCTTTTTGTTACATGCACCATATGGAATTACCAACTGGAGTTC +ATGCTGGCACAGACTTAGAAGGTAACTTTTATGGACCTTTTGTTGACAGGCAAACAGCAC +AAGCAGCTGGTACGGACACAACTATTACAGTTAATGTTTTAGCTTGGTTGTACGCTGCTG +TTATAAATGGAGACAGGTGGTTTCTCAATCGATTTACCACAACTCTTAATGACTTTAACC +TTGTGGCTATGAAGTACAATTATGAACCTCTAACACAAGACCATGTTGACATACTAGGAC +CTCTTTCTGCTCAAACTGGAATTGCCGTTTTAGATATGTGTGCTTCATTAAAAGAATTAC +TGCAAAATGGTATGAATGGACGTACCATATTGGGTAGTGCTTTATTAGAAGATGAATTTA +CACCTTTTGATGTTGTTAGACAATGCTCAGGTGTTACTTTCCAAAGTGCAGTGAAAAGAA +CAATCAAGGGTACACACCACTGGTTGTTACTCACAATTTTGACTTCACTTTTATTTTTAG +TCCAGAGTACTCAATGGTCTTTGTTCTTTTTTTTGTATGAAAATGCCTTTTTACCTTTTG +CTATGGGTATTATTGCTATGTCTGCTTTTGCAATGATGTTTGTCAAACATAAGCATGCAT +TTCTCTGTTTGTTTTTGTTACCTTCTCTTGCCACTGTAGCTTATTTTAATATGGTCTATA +TGCCTGCTAGTTGGGTGATGCGTATTATGACATGGTTGGATATGGTTGATACTAGTTTGA +AGCTAAAAGACTGTGTTATGTATGCATCAGCTGTAGTGTTACTAATCCTTATGACAGCAA +GAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCG +TTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAA +TCTCTGTTACTTCTAACTACTCAGGTGTAGTTACAACTGTCATGTTTTTGGCCAGAGGTA +TTGTTTTTATGTGTGTTGAGTATTGCCCTATTTTCTTCATAACTGGTAATACACTTCAGT +GTATAATGCTAGTTTATTGTTTCTTAGGCTATTTTTGTACTTGTTACTTTGGCCTCTTTT +GTTTACTCAACCGCTACTTTAGACTGACTCTTGGTGTTTATGATTACTTAGTTTCTACAC +AGGAGTTTAAATATATGAATTCACAGGGACTACTCCCACCCAAGAATAGCATAGATGCCT +TCAAACTCAACATTAAATTGTTGGGTGTTGGTGGCAAACCTTGTATCAAAGTAGCCACTG +TACAGTCTAAAATGTCAGATGTAAAGTGCACATCAGTAGTCTTACTCTCAGTTTTGCAAC +AACTCAGAGTAGAATCATCATCTAAATTGTGGGCTCAATGTGTCCAGTTACACAATGACA +TTCTCTTAGCTAAAGATACTACTGAAGCCTTTGAAAAAATGGTTTCACTACTTTCTGTTT +TGCTTTCCATGCAGGGTGCTGTAGACATAAACAAGCTTTGTGAAGAAATGCTGGACAACA +GGGCAACCTTACAAGCTATAGCCTCAGAGTTTAGTTCCCTTCCATCATATGCAGCTTTTG +CTACTGCTCAAGAAGCTTATGAGCAGGCTGTTGCTAATGGTGATTCTGAAGTTGTTCTTA +AAAAGTTGAAGAAGTCTTTGAATGTGGCTAAATCTGAATTTGACCGTGATGCAGCCATGC +AACGTAAGTTGGAAAAGATGGCTGATCAAGCTATGACCCAAATGTATAAACAGGCTAGAT +CTGAGGACAAGAGGGCAAAAGTTACTAGTGCTATGCAGACAATGCTTTTCACTATGCTTA +GAAAGTTGGATAATGATGCACTCAACAACATTATCAACAATGCAAGAGATGGTTGTGTTC +CCTTGAACATAATACCTCTTACAACAGCAGCCAAACTAATGGTTGTCATACCAGACTATA +ACACATATAAAAATACGTGTGATGGTACAACATTTACTTATGCATCAGCATTGTGGGAAA +TCCAACAGGTTGTAGATGCAGATAGTAAAATTGTTCAACTTAGTGAAATTAGTATGGATA +ATTCACCTAATTTAGCATGGCCTCTTATTGTAACAGCTTTAAGGGCCAATTCTGCTGTCA +AATTACAGAATAATGAGCTTAGTCCTGTTGCACTACGACAGATGTCTTGTGCTGCCGGTA +CTACACAAACTGCTTGCACTGATGACAATGCGTTAGCTTACTACAACACAATAAAGGGAG +GTAGGTTTGTACTTGCATTGTTATCCGATTTACAGGATTTGAAATGGGCTAGATTCCCTA +AGAGTGATGGAACTGGTACTATTTATACAGAACTGGAACCACCTTGTAGGTTTGTTACAG +ACACACCTAAAGGTCCTAAAGTGAAGTATTTATACTTTATTAAAGGATTAAACAACCTAA +ATAGAGGTATGGTACTTGGTAGTTTAGCTGCCACAGTACGTCTACAAGCTGGTAATGCAA +CAGAAGTGCCTGCCAATTCAACTGTATTATCTTTCTGTGCTTTTGCTGTAGATGCTGCTA +AAGCTTACAAAGATTATCTAGCTTGTGGGGGACAACCAATCACTAATTGTGTTAAGATGT +TGTGTACACACACTGGTACTGGTCAGGCAATAACAGTTACACCGGAAGCCAATATGGATC +AAGAATCCTTTGGTGGTGCATCGTGTTGTCTGTACTGCCGTTGCCACATAGATCATCCAA +ATCCTAAAGGATTTTGTGACTTAAAAGGTAAGTATGTACAAATACCTACAACTTGTGCTA +ACGACCCTGTGGGTTTTACACTTAAAAACACAGTCTGTACCGTCTGCGGTATGTGGAAAG +GTTATGGCTGTAGTTGTGATCAACTCCGCGAACCCATGCTTCAGTCAGCTGATGCACAAT +CGTTTTTAAACGGGTTTGCGGTGTAAGTGCAGCCCGTCTTACACCGTGCGGCACAGGCAC +TAGTACTGATGTCGTATACAGGGCTTTTGACATCTACAATGATAAAGTAGCTGGTTTTGC +TAAATTCCTAAAAACTAATTGTTGTCGCTTCCAAGAAAAGGACGAAGATGACAATTTAAT +TGATTCTTACTTTGTAGTTAAGAGACACACTTTCTCTAACTACCAACATGAAGAAACAAT +TTATAATTTACTTAAGGATTGTCCAGCTGTTGCTAAACATGACTTCTTTAAGTTTAGAAT +AGACGGTGACATGGTACCACATATATCACGTCAACGTCTTACTAAATACACAATGGCAGA +CCTCGTCTATGCTTTAAGGCATTTTGATGAAGGTAATTGTGACACATTAAAAGAAATACT +TGTCACATACAATTGTTGTGATGATGATTATTTCAATAAAAAGGACTGGTATGATTTTGT +AGAAAACCCAGATATATTACGCGTATACGCCAACTTAGGTGAACGTGTACGCCAAGCTTT +GTTAAAAACAGTACAATTCTGTGATGCCATGCGAAATGCTGGTATTGTTGGTGTACTGAC +ATTAGATAATCAAGATCTCAATGGTAACTGGTATGATTTCGGTGATTTCATACAAACCAC +GCCAGGTAGTGGAGTTCCTGTTGTAGATTCTTATTATTCATTGTTAATGCCTATATTAAC +CTTGACCAGGGCTTTAACTGCAGAGTCACATGTTGACACTGACTTAACAAAGCCTTACAT +TAAGTGGGATTTGTTAAAATATGACTTCACGGAAGAGAGGTTAAAACTCTTTGACCGTTA +TTTTAAATATTGGGATCAGACATACCACCCAAATTGTGTTAACTGTTTGGATGACAGATG +CATTCTGCATTGTGCAAACTTTAATGTTTTATTCTCTACAGTGTTCCCACTTACAAGTTT +TGGACCACTAGTGAGAAAAATATTTGTTGATGGTGTTCCATTTGTAGTTTCAACTGGATA +CCACTTCAGAGAGCTAGGTGTTGTACATAATCAGGATGTAAACTTACATAGCTCTAGACT +TAGTTTTAAGGAATTACTTGTGTATGCTGCTGACCCTGCTATGCACGCTGCTTCTGGTAA +TCTATTACTAGATAAACGCACTACGTGCTTTTCAGTAGCTGCACTTACTAACAATGTTGC +TTTTCAAACTGTCAAACCCGGTAATTTTAACAAAGACTTCTATGACTTTGCTGTGTCTAA +GGGTTTCTTTAAGGAAGGAAGTTCTGTTGAATTAAAACACTTCTTCTTTGCTCAGGATGG +TAATGCTGCTATCAGCGATTATGACTACTATCGTTATAATCTACCAACAATGTGTGATAT +CAGACAACTACTATTTGTAGTTGAAGTTGTTGATAAGTACTTTGATTGTTACGATGGTGG +CTGTATTAATGCTAACCAAGTCATCGTCAACAACCTAGACAAATCAGCTGGTTTTCCATT +TAATAAATGGGGTAAGGCTAGACTTTATTATGATTCAATGAGTTATGAGGATCAAGATGC +ACTTTTCGCATATACAAAACGTAATGTCATCCCTACTATAACTCAAATGAATCTTAAGTA +TGCCATTAGTGCAAAGAATAGAGCTCGCACCGTAGCTGGTGTCTCTATCTGTAGTACTAT +GACCAATAGACAGTTTCATCAAAAATTATTGAAATCAATAGCCGCCACTAGAGGAGCTAC +TGTAGTAATTGGAACAAGCAAATTCTATGGTGGTTGGCACAACATGTTAAAAACTGTTTA +TAGTGATGTAGAAAACCCTCACCTTATGGGTTGGGATTATCCTAAATGTGATAGAGCCAT +GCCTAACATGCTTAGAATTATGGCCTCACTTGTTCTTGCTCGCAAACATACAACTTGTTG +TAGCTTGTCACACCGTTTCTATAGATTAGCTAATGAGTGTGCTCAAGTATTGAGTGAAAT +GGTCATGTGTGGCGGTTCACTATATGTTAAACCAGGTGGAACCTCATCAGGAGATGCCAC +AACTGCTTATGCTAATAGTGTTTTTAACATTTGTCAAGCTGTCACGGCCAATGTTAATGC +ACTTTTATCTACTGATGGTAACAAAATTGCCGATAAGTATGTCCGCAATTTACAACACAG +ACTTTATGAGTGTCTCTATAGAAATAGAGATGTTGACACAGACTTTGTGAATGAGTTTTA +CGCATATTTGCGTAAACATTTCTCAATGATGATACTTTCTGACGATGCTGTTGTGTGTTT +CAATAGCACTTATGCATCACAAGGTCTAGTGGCTAGCATAAAGAACTTTAAGTCAGTTCT +TTATTATCAAAACAATGTTTTTATGTCTGAAGCAAAATGTTGGACTGAGACTGACCTTAC +TAAAGGACCTCATGAATTTTGCTCTCAACATACAATGCTAGTTAAACAGGGTGATGATTA +TGTGTACCTTCCTTACCCAGATCCATCAAGAATCCTAGGGGCCGGCTGTTTTGTAGATGA +TATCGTAAAAACAGATGGTACACTTATGATTGAACGGTTCGTGTCTTTAGCTATAGATGC +TTACCCACTTACTAAACATCCTAATCAGGAGTATGCTGATGTCTTTCATTTGTACTTACA +ATACATAAGAAAGCTACATGATGAGTTAACAGGACACATGTTAGACATGTATTCTGTTAT +GCTTACTAATGATAACACTTCAAGGTATTGGGAACCTGAGTTTTATGAGGCTATGTACAC +ACCGCATACAGTCTTACAGGCTGTTGGGGCTTGTGTTCTTTGCAATTCACAGACTTCATT +AAGATGTGGTGCTTGCATACGTAGACCATTCTTATGTTGTAAATGCTGTTACGACCATGT +CATATCAACATCACATAAATTAGTCTTGTCTGTTAATCCGTATGTTTGCAATGCTCCAGG +TTGTGATGTCACAGATGTGACTCAACTTTACTTAGGAGGTATGAGCTATTATTGTAAATC +ACATAAACCACCCATTAGTTTTCCATTGTGTGCTAATGGACAAGTTTTTGGTTTATATAA +AAATACATGTGTTGGTAGCGATAATGTTACTGACTTTAATGCAATTGCAACATGTGACTG +GACAAATGCTGGTGATTACATTTTAGCTAACACCTGTACTGAAAGACTCAAGCTTTTTGC +AGCAGAAACGCTCAAAGCTACTGAGGAGACATTTAAACTGTCTTATGGTATTGCTACTGT +ACGTGAAGTGCTGTCTGACAGAGAATTACATCTTTCATGGGAAGTTGGTAAACCTAGACC +ACCACTTAACCGAAATTATGTCTTTACTGGTTATCGTGTAACTAAAAACAGTAAAGTACA +AATAGGAGAGTACACCTTTGAAAAAGGTGACTATGGTGATGCTGTTGTTTACCGAGGTAC +AACAACTTACAAATTAAATGTTGGTGATTATTTTGTGCTGACATCACATACAGTAATGCC +ATTAAGTGCACCTACACTAGTGCCACAAGAGCACTATGTTAGAATTACTGGCTTATACCC +AACACTCAATATCTCAGATGAGTTTTCTAGCAATGTTGCAAATTATCAAAAGGTTGGTAT +GCAAAAGTATTCTACACTCCAGGGACCACCTGGTACTGGTAAGAGTCATTTTGCTATTGG +CCTAGCTCTCTACTACCCTTCTGCTCGCATAGTGTATACAGCTTGCTCTCATGCCGCTGT +TGATGCACTATGTGAGAAGGCATTAAAATATTTGCCTATAGATAAATGTAGTAGAATTAT +ACCTGCACGTGCTCGTGTAGAGTGTTTTGATAAATTCAAAGTGAATTCAACATTAGAACA +GTATGTCTTTTGTACTGTAAATGCATTGCCTGAGACTACAGCAGATATAGTTGTCTTTGA +TGAAATTTCAATGGCCACAAATTATGATTTGAGTGTTGTCAATGCCAGATTATGTGCTAA +GCACTATGTGTACATTGGCGACCCTGCTCAATTACCTGCACCACGCACATTGCTAACTAA +GGGCACACTAGAACCAGAATATTTCAATTCAGTGTGTAGACTTATGAAAACTATAGGTCC +AGACATGTTCCTCGGAACTTGTCGGCGTTGTCCTGCTGAAATTGTTGACACTGTGAGTGC +TTTGGTTTATGATAATAAGCTTAAAGCACATAAAGACAAATCAGCTCAATGCTTTAAAAT +GTTTTATAAGGGTGTTATCACGCATGATGTTTCATCTGCAATTAACAGGCCACAAATAGG +CGTGGTAAGAGAATTCCTTACACGTAACCCTGCTTGGAGAAAAGCTGTCTTTATTTCACC +TTATAATTCACAGAATGCTGTAGCCTCAAAGATTTTGGGACTACCAACTCAAACTGTTGA +TTCATCACAGGGCTCAGAATATGACTATGTCATATTCACTCAAACCACTGAAACAGCTCA +CTCTTGTAATGTAAACAGATTTAATGTTGCTATTACCAGAGCAAAAGTAGGCATACTTTG +CATAATGTCTGATAGAGACCTTTATGACAAGTTGCAATTTACAAGTCTTGAAATTCCACG +TAGGAATGTGGCAACTTTACAAGCTGAAAATGTAACAGGACTCTTTAAAGATTGTAGTAA +GGTAATCACTGGGTTACATCCTACACAGGCACCTACACACCTCAGTGTTGACACTAAATT +CAAAACTGAAGGTTTATGTGTTGACGTACCTGGCATACCTAAGGACATGACCTATAGAAG +ACTCATCTCTATGATGGGTTTTAAAATGAATTATCAAGTTAATGGTTACCCTAACATGTT +TATCACCCGCGAAGAAGCTATAAGACATGTACGTGCATGGATTGGCTTCGATGTCGAGGG +GTGTCATGCTACTAGAGAAGCTGTTGGTACCAATTTACCTTTACAGCTAGGTTTTTCTAC +AGGTGTTAACCTAGTTGCTGTACCTACAGGTTATGTTGATACACCTAATAATACAGATTT +TTCCAGAGTTAGTGCTAAACCACCGCCTGGAGATCAATTTAAACACCTCATACCGCTTAT +GTACAAAGGACTTCCTTGGAATGTAGTGCGTATAAAGATTGTACAAATGTTAAGTGACAC +ACTTAAAAATCTCTCTGACAGAGTCGTATTTGTCTTATGGGCACATGGCTTTGAGTTGAC +ATCTATGAAGTATTTTGTGAAAATAGGACCTGAGCGCACCTGTTGTCTATGTGATAGACG +TGCCACATGCTTTTCCACTGCTTCAGACACTTATGCCTGTTGGCATCATTCTATTGGATT +TGATTACGTCTATAATCCGTTTATGATTGATGTTCAACAATGGGGTTTTACAGGTAACCT +ACAAAGCAACCATGATCTGTATTGTCAAGTCCATGGTAATGCACATGTAGCTAGTTGTGA +TGCAATCATGACTAGGTGTCTAGCTGTCCACGAGTGTTTTGTTAAGCGTGTTGACTGGAC +TATTGAATATCCTATAATTGGTGATGAACTGAAGATTAATGCGGCTTGTAGAAAGGTTCA +ACACATGGTTGTTAAAGCTGCATTATTAGCAGACAAATTCCCAGTTCTTCACGACATTGG +TAACCCTAAAGCTATTAAGTGTGTACCTCAAGCTGATGTAGAATGGAAGTTCTATGATGC +ACAGCCTTGTAGTGACAAAGCTTATAAAATAGAAGAATTATTCTATTCTTATGCCACACA +TTCTGACAAATTCACAGATGGTGTATGCCTATTTTGGAATTGCAATGTCGATAGATATCC +TGCTAATTCCATTGTTTGTAGATTTGACACTAGAGTGCTATCTAACCTTAACTTGCCTGG +TTGTGATGGTGGCAGTTTGTATGTAAATAAACATGCATTCCACACACCAGCTTTTGATAA +AAGTGCTTTTGTTAATTTAAAACAATTACCATTTTTCTATTACTCTGACAGTCCATGTGA +GTCTCATGGAAAACAAGTAGTGTCAGATATAGATTATGTACCACTAAAGTCTGCTACGTG +TATAACACGTTGCAATTTAGGTGGTGCTGTCTGTAGACATCATGCTAATGAGTACAGATT +GTATCTCGATGCTTATAACATGATGATCTCAGCTGGCTTTAGCTTGTGGGTTTACAAACA +ATTTGATACTTATAACCTCTGGAACACTTTTACAAGACTTCAGAGTTTAGAAAATGTGGC +TTTTAATGTTGTAAATAAGGGACACTTTGATGGACAACAGGGTGAAGTACCAGTTTCTAT +CATTAATAACACTGTTTACACAAAGGTTGATGGTGTTGATGTAGAATTGTTTGAAAATAA +AACAACATTACCTGTTAATGTAGCATTTGAGCTTTGGGCTAAGCGCAACATTAAACCAGT +ACCAGAGGTGAAAATACTCAATAATTTGGGTGTGGACATTGCTGCTAATACTGTGATCTG +GGACTACAAAAGAGATGCTCCAGCACATATATCTACTATTGGTGTTTGTTCTATGACTGA +CATAGCCAAGAAACCAATTGAAACGATTTGTGCACCACTCACTGTCTTTTTTGATGGTAG +AGTTGATGGTCAAGTAGACTTATTTAGAAATGCCCGTAATGGTGTTCTTATTACAGAGGG +TAGTGTTAAAGGTTTACAACCATCTGTAGGTCCCAAACAAGCTAGTCTTAATGGAGTCAC +ATTAATTGGAGAAGCCGTAAAAACACAGTTCAATTATTATAAGAAAGTTGATGGTGTTGT +CCAACAATTACCTGAAACTTACTTTACTCAGAGTAGAAATTTACAAGAATTTAAACCCAG +GAGTCAAATGGAAATTGATTTCTTAGAATTAGCTATGGATGAATTCATTGAACGGTATAA +ATTAGAAGGCTATGCCTTCGAACATATCGTTTATGGAGATTTTAGTCATAGTCAGTTAGG +TGGTTTACATCTACTGATTGGACTAGCTAAACGTTTTAAGGAATCACCTTTTGAATTAGA +AGATTTTATTCCTATGGACAGTACAGTTAAAAACTATTTCATAACAGATGCGCAAACAGG +TTCATCTAAGTGTGTGTGTTCTGTTATTGATTTATTACTTGATGATTTTGTTGAAATAAT +AAAATCCCAAGATTTATCTGTAGTTTCTAAGGTTGTCAAAGTGACTATTGACTATACAGA +AATTTCATTTATGCTTTGGTGTAAAGATGGCCATGTAGAAACATTTTACCCAAAATTACA +ATCTAGTCAAGCGTGGCAACCGGGTGTTGCTATGCCTAATCTTTACAAAATGCAAAGAAT +GCTATTAGAAAAGTGTGACCTTCAAAATTATGGTGATAGTGCAACATTACCTAAAGGCAT +AATGATGAATGTCGCAAAATATACTCAACTGTGTCAATATTTAAACACATTAACATTAGC +TGTACCCTATAATATGAGAGTTATACATTTTGGTGCTGGTTCTGATAAAGGAGTTGCACC +AGGTACAGCTGTTTTAAGACAGTGGTTGCCTACGGGTACGCTGCTTGTCGATTCAGATCT +TAATGACTTTGTCTCTGATGCAGATTCAACTTTGATTGGTGATTGTGCAACTGTACATAC +AGCTAATAAATGGGATCTCATTATTAGTGATATGTACGACCCTAAGACTAAAAATGTTAC +AAAAGAAAATGACTCTAAAGAGGGTTTTTTCACTTACATTTGTGGGTTTATACAACAAAA +GCTAGCTCTTGGAGGTTCCGTGGCTATAAAGATAACAGAACATTCTTGGAATGCTGATCT +TTATAAGCTCATGGGACACTTCGCATGGTGGACAGCCTTTGTTACTAATGTGAATGCGTC +ATCATCTGAAGCATTTTTAATTGGATGTAATTATCTTGGCAAACCACGCGAACAAATAGA +TGGTTATGTCATGCATGCAAATTACATATTTTGGAGGAATACAAATCCAATTCAGTTGTC +TTCCTATTCTTTATTTGACATGAGTAAATTTCCCCTTAAATTAAGGGGTACTGCTGTTAT +GTCTTTAAAAGAAGGTCAAATCAATGATATGATTTTATCTCTTCTTAGTAAAGGTAGACT +TATAATTAGAGAAAACAACAGAGTTGTTATTTCTAGTGATGTTCTTGTTAACAACTAAAC +GAACAATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGTCATGCCGC +TGTTTAATCTTATAACTACAACTCAATCATACACTAATTTCACACGTGGTGTTTATTACC +CTGACAAAGTTTTCAGATCCTCAGTTTTACATTTAACTCAGGACTTGTTCTTACCTTTCT +TTTCCAATGTTACTTGGTTCCATGCTATCTCTGGGACCAATGGTACTAAGAGGTTTGATA +ACCCTGTCCTACCATTTAATGATGGTGTTTATTTTGCTTCCACTGAGAAGTCTAACATAA +TAAGAGGCTGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCCCTACTTATTGTTA +ATAACGCTACTAATGTTTTTATTAAAGTCTGTGAATTTCAATTTTGTAATGATCCATTTT +TGGATGTTTACCACAAAAACAACAAAAGTTGGATGGAAAGTGAGTCAGGAGTTTATTCTA +GTGCGAATAATTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATGGACCTTGAAGGAA +AACAGGGTAATTTCAAAAATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTTATTTTA +AAATATATTCTAAGCACACGCCTATTATAGGGCGTGATTTCCCTCAGGGTTTTTCGGCTT +TAGAACCATTGGTAGATTTGCCAATAGGTATTAACATCACTAGGTTTCAAACTTTACTTG +CTTTAAATAGAAGTTATTTGACTCCTGGTGATTCTTCTTCAGGTTGGACAGCTGGTGCTG +CAGATTATTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTAAAATATAATGAAAATG +GAACCATTACAGATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAGTGTACGT +TGAAATCCTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGTCCAACCAA +CAGAATCTATTGTTAGATTTCCTAATGTTACAAACTTGTGCCCTTTTCATGAAGTTTTTA +ACGCCACCAGATTTGCATCTGTTTATGCTTGGAACAGGACGAGAATCAGCAACTGTGTTG +CTGATTATTCTGTCCTATATAATTTCGCACCATTTTTCGCTTTTAAGTGTTATGGAGTGT +CTCCTACTAAATTAAATGATCTCTGCTTTACTAATGTCTATGCAGATTCATTTGTAATTA +AAGGTAATGAAGTCAGCCAAATCGCTCCAGGGCAAACTGGAAATATTGCTGATTATAATT +ATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATTCTAACAAGCTTGATT +CTAAGCATAGTGGTAATTATGATTACTGGTATAGATCGCTTAGGAAGTCTAAACTCAAAC +CTTTTGAGAGAGATATTTCAACTGAAATCTATCAGGCCGGTAACAAACCTTGTAAAGGTA +AAGGTCCTAATTGTTACTTTCCTTTAGAATCATATGGTTTCCGACCCACTTATGGTGTTG +GTCACCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATGCACCAGCAACTG +TTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAAACAAATGTGTCAATTTCAACTTCA +ATGGTTTAACAGGCACAGGTGTTCTTACTAAGTCTAACAAAAAGTTTCTGCCTTTCCAAC +AATTTGGCAGAGACATTGTTGACACTACTGATGCTGTCCGTGATCCACAGACACTTGAGA +TTCTTGACATTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCAGGAACAAATA +CTTCTAACCAGGTTGCTGTTCTTTATCAGGGTGTTAACTGCACAGAAGTCTCTGTTGCTA +TTCATGCAGATCAACTTACTCCTACTTGGCGTGTTTATTCTACAGGTTCTAATGTTTTTC +AAACACGTGCAGGCTGTTTAATAGGGGCTGAATATGTCAACAACTCATATGAGTGTGACA +TACCCATTGGTGCAGGTATATGCGCTAGTTATCAGACTCAGACTAAGTCTCGTCGGCGGG +CACGTAGTGTAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTGGTGCAGAAAATT +CAGTTGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTATTAGTGTTACCA +CAGAAATTCTACCAGTGTCTATGACCAAGACATCAGTAGATTGTACAATGTACATTTGTG +GTGATTCAACTGAATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGTACACAATTAA +AACGTGCTTTAACTGGAATAGCTGTTGAACAAGACAAAAACACCCAAGAAGTTTTTGCAC +AAGTCAAACAAATTTACAAAACACCACCAATTAAATATTTTGGTGGTTTTAATTTTTCAC +AAATATTACCAGATCCATCAAAACCAAGCAAGAGGTCATTTATTGAAGATCTACTTTTCA +ACAAAGTGACACTTGCAGATGCTGGCTTCATCAAACAATATGGTGATTGCCTTGGTGATA +TTGCTGCTAGAGACCTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTTTTGCCACCTT +TGCTCACAGATGAAATGATTGCTCAATACACTTCTGCACTGTTAGCGGGTACAATCACTT +CTGGTTGGACCTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATGCAAATGGCTT +ATAGGTTTAATGGTATTGGAGTTACACAGAATGTTCTCTATGAGAACCAAAAATTGATTG +CCAACCAATTTAATAGTGCTATTGGCAAAATTCAAGACTCACTTTTTTCCACAGCAAGTG +CACTTGGAAAACTTCAAGATGTGGTCAACCATAATGCACAAGCTTTAAACACGCTTGTTA +AACAACTTAGCTCCAAATTTGGTGCAATTTCAAGTGTTTTAAATGATATCCTTTCACGTC +TTGACAAAGTTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGACTTCAAAGTT +TGCAGACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGCTTCTGCTAATC +TTGCTGCTACTAAAATGTCAGAGTGTGTACTTGGACAATCAAAAAGAGTTGATTTTTGTG +GAAAGGGCTATCATCTTATGTCCTTCCCTCAGTCAGCACCTCATGGTGTAGTCTTCTTGC +ATGTGACTTATGTCCCTGCACAAGAAAAGAACTTCACAACTGCTCCTGCCATTTGTCATG +ATGGAAAAGCACACTTTCCTCGTGAAGGTGTCTTTGTTTCAAATGGCACACACTGGTTTT +TAACACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACACATTTGTGTCTG +GTAACTGTGATGTTGTAATAGGAATTGTCAACAACACAGTTTATGATCCTTTGCAACTTG +AATTAGATTCATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACATCACCAGATG +TTGATTTAGGTGACATCTCTGGCATTAATGCTTCAGTTGTAAACATTCAAAAAGAAATTG +ACCGCCTCAATGAGGTTGCCAAGAATTTAAATGAATCTCTCATCGATCTCCAAGAACTTG +GAAAGTATGAGCAGTATATAAAATGGCCATGGTATATTTGGCTAGGTTTTATAGCTGGCT +TGATTGCCATAGTAATGGTGACAATTATGCTTTGCTGTATGACCAGTTGCTGTAGTTGTC +TCAAGGGCTGTTGTTCTTGTGGATCCTGCTGCAAATTTGATGAAGACGACTCTGAGCCAG +TGCTCAAAGGAGTCAAATTACATTACACATAAACGAACTTATGGATTTGTTTATGAGAAT +CTTCACAATTGGAACTGTAACTTTGAAGCAAGGTGAAATCAAGGATGCTACTCCTTCAGA +TTTTGTTCGCGCTACTGCAACGATACCGATACAAGCCTCACTCCCTTTCGGATGGCTTAT +TGTTGGCGTTGCACTTCTTGCTGTTTTTCAGAGCGCTTCCAAAATCATAACTCTCAAAAA +GAGATGGCAACTAGCACTCTCCAAGGGTGTTCACTTTGTTTGCAACTTGCTGTTGTTGTT +TGTAACAGTTTACTCACACCTTTTGCTCGTTGCTGCTGGCCTTGAAGCCCCTTTTCTCTA +TCTTTATGCTTTAGTCTACTTCTTGCAGAGTATAAACTTTGTAAGAATAATAATGAGGCT +TTGGCTTTGCTGGAAATGCCGTTCCAAAAACCCATTACTTTATGATGCCAACTATTTTCT +TTGCTGGCATACTAATTGTTACGACTATTGTATACCTTACAATAGTGTAACTTCTTCAAT +TGTCATTACTTCAGGTGATGGCACAACAAGTCCTATTTCTGAACATGACTACCAGATTGG +TGGTTATACTGAAAAATGGGAATCTGGAGTAAAAGACTGTGTTGTATTACACAGTTACTT +CACTTCAGACTATTACCAGCTGTACTCAACTCAATTGAGTACAGACATTGGTGTTGAACA +TGTTACCTTCTTCATCTACAATAAAATTGTTGATGAGCCTGAAGAACATGTCCAAATTCA +CACAATCGACGGTTCATCCGGAGTTGTTAATCCAGTAATGGAACCAATTTATGATGAACC +GACGACGACTACTAGCGTGCCTTTGTAAGCACAAGCTGATGAGTACGAACTTATGTACTC +ATTCGTTTCGGAAGAGATAGGTACGTTAATAGTTAATAGCGTACTTCTTTTTCTTGCTTT +CGTGGTATTCTTGCTAGTTACACTAGCCATCCTTACTGCGCTTCGATTGTGTGCGTACTG +CTGCAATATTGTTAACGTGAGTCTTGTAAAACCTTCTTTTTACGTTTACTCTCGTGTTAA +AAATCTGAATTCTTCTAGAGTTCCTGATCTTCTGGTCTAAACGAACTAAATATTATATTA +GTTTTTCTGTTTGGAACTTTAATTTTAGCCATGGCACATTCCAACGGTACTATTACCGTT +GAAGAGCTTAAAAAGCTCCTTGAAGAATGGAACCTAGTAATAGGTTTCCTATTCCTTGCA +TGGATTTGTCTTCTACAATTTGCCTATGCCAACAGGAATAGGTTTTTGTATATAATTAAG +TTAATTTTTCTCTGGCTGTTATGGCCAGTAACTTTAACTTGTTTTGTGCTTGCTGCTGTT +TACAGAATAAATTGGATCACCGGTGGAATTGCTATCGCAATGGCTTGTCTTGTAGGCTTG +ATGTGGCTCAGCTACTTCATTGCTTCTTTCAGACTGTTTGTGCGTACGCGTTCCATGTGG +TCATTTAATCCAGAAACTAACATTCTTCTCAACGTGCCACTCCATGGCACTATTCTGACC +AGACCGCTTCTAGAAAGTGAACTCGTAATCGGAGCTGTGATCCTTCGTGGACATCTTCGT +ATTGCTGGACACCATCTAGGACGCTGTGACATCAAGGACCTGCCTAAAGAAATCACTGTT +GCTACATCACGAACGCTTTCTTATTACAAATTGGGAGCTTCGCAGCGTGTAGCAGGTGAC +TCAGGTTTTGCTGCATACAGTCGCTACAGGATTGGCAACTATAAATTAAACACAGACCAT +TCCAGTAGCAGTGACAATATTGCTTTGCTTGTACAGTAAGTGACAACAGATGTTTCATCT +CGTTGACTTTCAGGTTACTATAGCAGAGATATTACTAATTATTATGCGGACTTTTAAAGT +TTCCATTTGGAATCTTGATTACATCATAAACCTCATAATTAAAAATTTATCTAAGTCACT +AACTGAGAATAAATATTCTCAATTAGATGAAGAGCAACCAATGGAGATTCTCTAAACGAA +CATGAAAATTATTCTTTTCTTGGCACTGATAACACTCGCTACTTGTGAGCTTTATCACTA +CCAAGAGTGTGTTAGAGGTACAACAGTACTTTTAAAAGAACCTTGCTCTTCTGGAACATA +CGAGGGCAATTCACCATTTCATCCTCTAGCTGATAACAAATTTGCACTGACTTGCTTTAG +CACTCAATTTGCTTTTGCTTGTCCTGACGGCGTAAAACACGTCTATCAGTTACGTGCCAG +ATCAGTTTCACCTAAACTGTTCATCAGACAAGAGGAAGTTCAAGAACTTTACTCTCCAAT +TTTTCTTATTGTTGCGGCAATAGTGTTTATAACACTTTGCTTCACACTCAAAAGAAAGAC +AGAATGATTGAACTTTCATTAATTGACTTCTATTTGTGCTTTTTAGCCTTTCTGTTACTC +CTTGTTTTAATTATGCTTATTATCTTTTGGTTCTCACTTGAACTGCAAGATCATAATGAA +ACTTGTCACGCCTAAACGAACATGAAATTTCTTGTTTTCTTAGGAATCATCACAACTGTA +GCTGCATTTCACCAAGAATGTAGTTTACAGTCATGTACTCAACATCAACCATATGTAGTT +GATGACCCGTGTCCTATTCACTTCTATTCTAAATGGTATATTAGAGTAGGAGCTAGAAAA +TCAGCACCTTTAATTGAATTGTGCGTGGATGAGGCTGGTTCTAAATCACCCATTCAGTAC +ATCGATATCGGTAATTATACAGTTTCCTGTTTACCTTTTACAATTAATTGCCAGGAACCT +AAATTGGGTAGTCTTGTAGTGCGTTGTTCGTTCTATGAAGACTTTTTAGAGTATCATGAC +GTTCGTGTTGTTTTAGATTTCATCTAAACGAACAAACTTAAATGTCTGATAATGGACCCC +AAAATCAGCGAAATGCACTCCGCATTACGTTTGGTGGACCCTCAGATTCAACTGGCAGTA +ACCAGAATGGTGGGGCGCGATCAAAACAACGTCGGCCCCAAGGTTTACCCAATAATACTG +CGTCTTGGTTCACCGCTCTCACTCAACATGGCAAGGAAGACCTTAAATTCCCTCGAGGAC +AAGGCGTTCCAATTAACACCAATAGCAGTCCAGATGACCAAATTGGCTACTACCGAAGAG +CTACCAGACGAATTCGTGGTGGTGACGGTAAAATGAAAGATCTCAGTCCAAGATGGTATT +TCTACTACCTAGGAACTGGGCCAGAAGCTGGACTTCCCTATGGTGCTAACAAAGACGGCA +TCATATGGGTTGCAACTGAGGGAGCCTTGAATACACCAAAAGATCACATTGGCACCCGCA +ATCCTGCTAACAATGCTGCAATCGTGCTACAACTTCCTCAAGGAACAACATTGCCAAAAG +GCTTCTACGCAGAAGGGAGCAGAGGCGGCAGTCAAGCCTCTTCTCGTTCCTCATCACGTA +GTCGCAACAGTTCAAGAAATTCAACTCCAGGCAGCAGTAAACGAACTTCTCCTGCTAGAA +TGGCTGGCAATGGCGGTGATGCTGCTCTTGCTTTGCTGCTGCTTGACAGATTGAACAAGC +TTGAGAGCAAAATGTCTGGTAAAGGCCAACAACAACAAGGCCAAACTGTCACTAAGAAAT +CTGCTGCTGAGGCTTCTAAGAAGCCTCGGCAAAAACGTACTGCCACTAAAGCATACAATG +TAACACAAGCTTTCGGCAGACGTGGTCCAGAACAAACCCAAGGAAATTTTGGGGACCAGG +AACTAATCAGACAAGGAACTGATTACAAACATTGGCCGCAAATTGCACAATTTGCCCCCA +GCGCTTCAGCGTTCTTCGGAATGTCGCGCATTGGCATGGAAGTCACACCTTCGGGAACGT +GGTTGACCTACACAGGTGCCATCAAATTGGATGACAAAGATCCAAATTTCAAAGATCAAG +TCATTTTGCTGAATAAGCATATTGACGCATACAAAACATTCCCACCAACAGAGCCTAAAA +AGGACAAAAAGAAGAAGGCTGATGAAACTCAAGCCTTACCGCAGAGACAGAAGAAACAGC +AAACTGTGACTCTTCTTCCTGCTGCAGATTTGGATGATTTCTCCAAACAATTGCAACAAT +CCATGAGCCGTGCTGACTCAACTCAGGCCTAAACTCATGCAGACCACACAAGGCAGATGG +GCTATATAAACGTTTTCGCTTTTCCGTTTACGATATATAGTCTACTCTTGTGCAGAATGA +ATTCTCGTAACTACATAGCACAAGTAGATGTAGTTAACTTTAATCTCACATAGCAATCTT +TAATCAGTGTGTAACATTAGGGAGGACTTGAAAGAGCCACCACATTTTCACCTACAGTGA +ACAATGCTAGGGAGAGCTGCCTATATGGAAGAGCCCTAATGTGTAAAATTA +>USA/MD-CDC-LC1110088/2024 +AACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAA +TTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTG +CTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGGGTGTG +ACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACT +CAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGA +GGAGGTCTTATCAGAGGCACGTCAACATCTTAAAGATGGCACTTGTGGCTTAGTAGAAGT +TGAAAAAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGATGC +TCGAACTGCACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCATTCA +GTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGGCGAAATACCAGT +GGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATAAAGGAGCTGGTGGCCATAGGTA +CGGCGCCGATCTAAAGTCATTTGACTTAGGCGACGAGCTTGGCACTGATCCTTATGAAGA +TTTTCAAGAAAACTGGAACACTAAACATAGCAGTGGTGTTACCCGTGAACTCATGCGTGA +GCTTAACGGAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGGCCCTGATGGCTA +CCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGATTCATGCACTTTGTC +CGAACAACTGGACTTTATTGACACTAAGAGGGGTGTATACTGCTGCCGTGAACATGAGCA +TGAAATTGCTTGGTACACGGAACGTTCTGAAAAGAGCTATGAATTGCAGACACCTTTTGA +AATTAAATTGGCAAAGAAATTTGACACCTTCAATGGGGAATGTCCAAATTTTGTATTTCC +CTTAAATTCCATAATCAAGACTATTCAACCAAGGGTTGAAAAGAAAAAGCTTGATGGCTT +TATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCACCAAATGAATGCAACCAAATGTG +CCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAACTTCATGGCAGACGGGCGATTT +TGTTAAAGCCACTTGCGAATTTTGTGGCACTGAGAATTTGACTAAAGAAGGTGCCACTAC +TTGTGGTTACTTACCCCAAAATGCTGTTGTTAAAATTTATTGTCCAGCATGTCACAATTC +AGAAGTAGGACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAAAACCAT +TCTTCGTAAGGGTGGTCGCACTATTGCCTTTGGAGGCTGTGTGTTCTCTTATGTTGGTTG +CCATAACAAGTGTGCCTATTGGGTTCCACGTGCTAGTGCTAACATAGGTTGTAACCATAC +AGGTGTTGTTGGAGAAGGTTCCGAAGGTCTTAATGACAACCTTCTTGAAATACTCCAAAA +AGAGAAAGTCAACATCAATATTGTTGGTGACTTTAAACTTAATGAAGAGATCGCCATTAT +TTTGGCATCTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTTGGATTA +TAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTACAAAAGGAAAAGC +TAAAAAAGGTGCCTGGAATATTGGTGAACAGAAATCAATACTGAGTCCTCTTTATGCATT +TGCATCAGAGGCTGCTCGTGTTGTACGATCAATTTTCTCCCGCACTCTTGAAACTGCTCA +AAATTCTGTGCGTGTTTTACAGAAGGCCGCTATAACAATACTAGATGGAATTTCACAGTA +TTCACTGAGACTCATTGATGCTATGATGTTCACATCTGATTTGGCTACTAACAATCTAGT +TGTAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTGGCTAACTAACAT +CTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTGATTGGCTTGAAGAGAAGTTTAA +GGAAGGTGTAGAGTTTCTTAGAGACGGTTGGGAAATTGTTAAATTTATCTCAACCTGTGC +TTGTGAAATTGTCGGTGGACAAATTGTCACCTGTGCAAAGGAAATTAAGGAGAGTGTTCA +GACATTCTTTAAGCTTGTAAATAAATTTTTGGCTTTGTGTGCTGACTCTATCATTATTGG +TGGAGCTAAACTTAAAGCCTTGAATTTAGGTGAAACATTTGTCACGCACTCAAAGGGATT +GTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCCTACTCATGCCTCTAAAAGCCCC +AAAAGAAATTATCTTCTTAGAGGGAGAAACACTTCCCACAGAAGTGTTAACAGAGGAAGT +TGTCTTGAAAACTGGTGATTTACAACCATTAGAACAACCTACTAGTGAAGCTGTTGAAGC +TCCATTGGTTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAAAGACAC +AGAAAAGTACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATACCTTCACACTCAA +AGGCGGTGCACCAACAAAGGTTACTTTTGGTGATGACACTGTGATAGAAGTGCAAGGTTA +CAAGAGTGTGAATATCATTTTTGAACTTGATGAAAGGATTGATAAAGTACTTAATGAGAA +GTGCTCTGCCTATACAGTTGAACTCGGTACAGAAGTAAATGAGTTCGCCTGTGTTGTGGC +AGATGCTGTCATAAAAACTTTGCAACCAGTATCTGAATTACTTACACCACTGGGCATTGA +TTTAGATGAGTGGAGTATGGCTACATACTACTTATTTGATGAGTCTGGTGAGTTTAAATT +GGCTTCACATATGTATTGTTCTTTTTACCCTCCAGATGAGGATGAAGAAGAAGGTGATTG +TGAAGAAGAAGAGTTTGAGCCATCAACTCAATATGAGTATGGTACTGAAGATGATTACCA +AGGTAAACCTTTGGAATTTGGTGCCACTTCTGCTGCTCTTCAACCTGAAGAAGAGCAAGA +AGAAGATTGGTTAGATGATGATAGTCAACAAACTGTTGGTCAACAAGACGGCAGTGAGGA +CAATCAGACAACTACTATTCAAACAATTGTTGAGGTTCAACCTCAATTAGAGATGGAACT +TACACCAGTTGTTCAGACTATTGAAGTGAATAGTTTTAGTGGTTATTTAAAACTTACTGA +CAATGTATACATTAAAAATGCAGACATTGTGGAAGAAGCTAAAAAGGTAAAACCAACATT +GGTTGTTAATGCAGCCAATGTTTACCTTAAACATGGAGGAGGTGTTGCAGGAGCCTTAAA +TAAGGCTACTAACAATGCCATGCAAGTTGAATCTGATGATTACATAGCTACTAATGGACC +ACTTAAAGTGGGCGGTAGTTGTGTTTTAAGCGGACACAATCTTGCTAAACACTGTCTTCA +TGTTGTCGGCCCAAATGTTAACAAAGGTGAAGACATTCAACTTCTTAAGAGTGCTTATGA +AAATTTTAATCAGCACGAAGTTCTACTTGCACCATTATTATCAGCTGGTATTTTTGGTGC +TGACCCTATACATTCTTTAAGAGTTTGTGTAGATACTGTTCGCACAAATGTCTACTTAGC +TGTCTTTGATAAAAATCTCTATGACAAACTTGTTTCAAGCTTTTTGGAAATGAAGAGTGA +AAAGCAAGTTGAACAAAAGATCGCTGAGATTCCTAAAGAGGAAGTTAAGCCATTTATAAC +TGAAAGTAAACCTTCAGTTGAACAGAGAAAACAAGATGATAAGAAAATCAAAGCTTGTGT +TGAAGAAGTTACAACAACTCTGGAAGAAACTAAGTTCCTCACAGAAAACTTGTTACTTTA +TATTGACATTAATGGCAATCTTCATCCAGATTCTGCCACTCTTGTTAGTGACATTGACAT +CACTTTCTTAAAGAAAGATGCTCCATATATAGTGGGTGATGTTGTTCAAGAGGGTGTTTT +AACTGCTGTGGTTATACCTACTAAAAAGGCTAGTGGCACTACTGAAATGCTAGCGAAAGC +TTTGAGAAAAGTGCCAACAGACAATTATATAACCACTTACCCGGGTCAGGGTTTAAATGG +TTACACTGTAGAGGAGGCAAAGACAGTGCTTAAAAAGTGTAAAAGTGCTTTTTACATTCT +ACCATCTATTATCTCTAATGAGAAGCAAGAAATTCTTGGAACTGTTTCTTGGAATTTGCG +AGAAATGCTTGCACATGCAGAAGAAACACGCAAATTAATGCCTGTCTGTGTGGAAACTAA +AGCCATAGTTTCAACTATACAGCGTAAATATAAGGGTATTAAAATACAAGAGGGTGTGGT +TGATTATGGTGCTAGATTTTACTTTTACACCAGTAAAACAACTGTAGCGTCACTTATCAA +CACACTTAACGATCTAAATGAAACTCTTGTTACAATGCCACTTGGCTATGTAACACATGG +CTTAAATTTGGAAGAAGCTGCTCGGTATATGAGATCTCTCAAAGTGCCAGCTACAGTTTC +TGTTTCTTCACCTGATGCTGTTACAGCGTATAATGGTTATCTTACTTCTTCTTCTAAAAC +ACCTGAAGAACATTTTATTGAAACCATCTCACTTGCTGGTTCCTATAAAGATTGGTCCTA +TTCTGGACAATCTACACAACTAGGTATAGAATTTCTTAAGAGAGGTGATAAAAGTGTATA +TTACACTAGTAATCCTACCACATTCCACCTAGATGGTGAAGTTATCACCTTTGACAATCT +TAAGACACTTCTTTCTTTGAGAGAAGTGAGGACTATTAAGGTGTTTACAACAGTAGACAA +CATTAACCTCCACACGCAAGTTGTGGACATGTCAATGACATATGGACAACAGTTTGGTCC +AACTTATTTGGATGGAGCTGATGTTACTAAAATAAAACCTCATAATTCACATGAAGGTAA +AACATTTTATGTTTTACCTAATGATGACACTCTACGTGTTGAGGCTTTTGAGTACTACCA +CACAACTGATCCTAGTTTTCTGGGTAGGTACATGTCAGCATTAAATCACACTAAAAAGTG +GAAATACCCACAAGTTAATGGTTTAACTTCTATTAAATGGGCAGATAACAACTGTTATCT +TGCCACTGCATTGTTAACACTCCAACAAATAGAGTTGAAGTTTAATCCACCTGCTCTACA +AGATGCTTATTACAGAGCAAGGGCTGGTGAAGCTGCTAACTTTTGTGCACTTATCTTAGC +CTACTGTAATAAGACAGTAGGTGAGTTAGGTGATGTTAGAGAAACAATGAGTTACTTGTT +TCAACATGCCAATTTAGATTCTTGCAAAAGAGTCTTGAACGTGGTGTGTAAAACTTGTGG +ACAACAGCAGACAACCCTTAAGGGTGTAGAAGCTGTTATGTACATGGGCACACTTTCTTA +TGAACAATTTAAGAAAGGTGTTCAGATACCTTGTACGTGTGGTAAACAAGCTACAAAATA +TCTAGTACAACAGGAGTCACCTTTTGTTATGATGTCAGCACCACCTGCTCAGTATGAACT +TAAGCATGGTACATTTACTTGTGCTAGTGAGTACACTGGTAATTACCAGTGTGGTCACTA +TAAACATATAACTTCTAAAGAAACTTTGTATTGCATAGACGGTGCTTTACTTACAAAGTC +CTCAGAATACAAAGGTCCTATTACGGATGTTTTCTACAAAGAAAACAGTTACACAACAAC +CATAAAACCAGTTACTTATAAATTGGATGGTGTTGTTTGTACAGAAATTGACCCTAAGTT +GGACAATTATTATAAGAAAGACAATTCTTATTTCACAGAGCAACCAATTGATCTTGTACC +AAACCAACCATATCCAAACGCAAGCTTCGATAATTTTAAGTTTGTATGTGATAATATCAA +ATTTGCTGATGATTTAAACCAGTTAACTGGTTATAAGAAACCTGCTTCAAGAGAGCTTAA +AGTTACATTTTTCCCTGACTTAAATGGTGATGTGGTGGCTATTGATTATAGACACTACAC +ACCCTCTTTTAAGAAAGGAGCTAAATTGTTACATAAACCTATTGTTTGGCATGTTAACAA +TGCAACTAATAAAGCCACGTATAAACCAAATACCTGGTGTATACGTTGTCTTTGGAGCAC +AAAACCAGTTGAAACATCAAATTCGTTTGATGTACTGAAGTCAGAGGACGCGCAGGGAAT +GGATAATCTTGCCTGCGAAGATCTAAAACCAGTCTCTGAAGAAGTAGTGGAAAATCCTAC +CATACAGAAAGACGTTCTTGAGTGTAATGTGAAAACTACCGAAGTTGTAGGAGACATTAT +ACTTAAACCAGCAAATAATAGTTTAAAAATTACAGAAGAGGTTGGCCACACAGATCTAAT +GGCTGCTTATGTAGACAATTCTAGTCTTACTATTAAGAAACCTAATGAATTATCTAGAGT +ATTAGGTTTGAAAACCCTTGCTACTCATGGTTTAGCTGCTGTTAATAGTGTCCCTTGGGA +TACTATAGCTAATTATGCTAAGCCTTTTCTTAACAAAGTTGTTAGTACAACTACTAACAT +AGTTACACGGTGTTTAAACCGTGTTTGTACTAATTATATGCCTTATTTCTTTACTTTATT +GCTACAATTGTGTACTTTTACTAGAAGTACAAATTCTAGAATTAAAGCATCTATGCCGAC +TACTATAGCAAAGAATACTGTTAAGAGTGTCGGTAAATTTTGTCTAGAGGCTTCATTTAA +TTATTTGAAGTCACCTAATTTTTCTAAACTGATAAATATTATAATTTGGTTTTTACTATT +AAGTGTTTGCCTAGGTTCTTTAATCTACTCAACCGCTGCTTTAGGTGTTTTAATGTCTAA +TTTAGGCATGCCTTCTTACTGTACTGGTTACAGAGAAGGCTATTTGAACTCTACTAATGT +CACTATTGCAACCTACTGTATTGGTTCTATACCTTGTAGTGTTTGTCTTAGTGGTTTAGA +TTCTTTAGACACCTATCCTTCTTTAGAAACTATACAAATTACCATTTCATCTTTTAAATG +GGATTTAACTGCTTTTGGCTTAGTTGCAGAGTGGTTTTTGGCATATATTCTTTTCACTAG +GTTTTTCTATGTACTTGGATTGGCTGCAATCATGCAATTGTTTTTCAGCTATTTTGCAGT +ACATTTTATTAGTAATTCTTGGCTTATGTGGTTAATAATTAATCTTGTACAAATGGCCCC +GATTTCAGCTATGGTTAGAATGTACATCTTCTTTGCATCATTTTATTATGTATGGAAAAG +TTATGTGCATGTTGTAGACGGTTGTAATTCATCAACTTGTATGATGTGTTACAAACGTAA +TAGAGCAACAAGAGTCGAATGTACAACTATTGTTAATGGTGTTAGAAGGTCCTTTTATGT +CTATGCTAATGGAGGTAAAGGCTTTTGCAAACTACACAATTGGAATTGTGTTAATTGTGA +TACATTCTGTGCTGGTAGTACATTTATTAGTGATGAAGTTGCGAGAGACTTGTCACTACA +GTTTAAAAGACCAATAAATCCTACTGACCAGTCTTCTTACATCGTTGATAGTGTTACAGT +GAAGAATGGTTCCATCCATCTTTACTTTGATAAAGCTGGTCAAAAGACTTATGAAAGACA +TTCTCTCTCTCATTTTGTTAACTTAGACAGCCTGAGAGCTAATAACACTAAAGGTTCATT +GCCTATTAATGTTATAGTTTTTGATGGTAAATCAAAATGTGAAGAATCATCTGCAAAATC +AGCGTCTGTTTACTACAGTCAGCTTATGTGTCAACCTATACTGTTACTAGATCAGGCATT +AGTGTCTGATGTTGGTGATAGTGCGGAAGTTGCAGTTAAAATGTTTGATGCTTACGTTAA +TACGTTTTCATCAACTTTTAACGTACCAATGGAAAAACTCAAAACACTAGTTGCAACTGC +AGAAGCTGAACTTGCAAAGAATGTGTCCTTAGACAATGTCTTATCTACTTTTATTTCAGC +AGCTCGGCAAGGGTTTGTTGATTCAGATGTAGAAACTAAAGATGTTGTTGAATGTCTTAA +ATTGTCACATCAATCTGACATAGAAGTTACTGGCGATAGTTGTAATAACTATATGCTCAC +TTATAACAAAGTTGAAAACATGACACCCCGTGACCTTGGTGCTTGTATTGACTGTAGTGC +GCGTCATATTAATGCGCAGGTAGCAAAAAGTCACAACATTACTTTGATATGGAACGTTAA +AGATTTCATGTCATTGTCTGAACAACTACGAAAACAAATACGTAGTGCTGCTAAAAAGAA +TAACTTACCTTTTAAGTTGACATGTGCAACTACTAGACAAGTTGTTAATGTTGTAACAAC +AAAGATAGCACTTAAGGGTGGTAAAATTGTTAATAATTGGTTGAAGCAGTTAATTAAAGT +TACACTTGTGTTCCTTTTTGTTGCTGCTATTTTCTATTTAATAACACCTGTTCATGTCAT +GTCTAAACATACTGACTTTTCAAGTGAAATCATAGGATACAAGGCTATTGATGGTGGTGT +CACTCGTGACATAGCATCTACAGATACTTGTTTTGCTAACAAACATGCTGATTTTGACAC +ATGGTTTAGCCAGCGTGGTGGTAGTTATACTAATGACAAAGCTTGCCCATTGATTGCTGC +AGTCATAACAAGAGAAGTGGGTTTTGTCGTGCCTGGTTTGCCTGGCACGATATTACGCAC +AACTAATGGTGACTTTTTGCATTTCTTACCTAGAGTTTTTAGTGCAGTTGGTAACATCTG +TTACACACCATCAAAACTTATAGAGTACACTGACTTTGCAACATCAGCTTGTGTTTTGGC +TGCTGAATGTACAATTTTTAAAGATGCTTCTGGTAAGCCAGTACCATATTGTTATGATAC +CAATGTACTAGAAGGTTCTGTTGCTTATGAAAGTTTACGCCCTGACACACGTTATGTGCT +CATGGATGGCTCTATTATTCAATTTCCTAACACCTACCTTGAAGGTTCTGTTAGAGTGGT +AACAACTTTTGATTCTGAGTACTGTAGGCACGGCACTTGTGAAAGATCAGAAGCTGGTGT +TTGTGTATCTACTAGTGGTAGATGGGTACTTAACAATGATTATTACAGATCTTTACCAGG +AGTTTTCTGTGGTGTAGATGCTGTAAATTTATTTACTAATATGTTTACACCACTAATTCA +ACCTATTGGTGCTTTGGACATATCAGCATCTATAGTAGCTGGTGGTATTGTGGCTATCGT +AGTAACATGCCTTGCCTACTATTTTATGAGGTTTAGAAGAGCTTTTGGTGAATACAGTCA +TGTAGTTGCCTTTAATACTTTACTATTCCTTATGTCATTCATTGTACTCTGTTTAACACC +AGTTTACTCATTCTTACCTGGTGTTTATTCTGTTATTTACTTGTACTTGACATTTTATCT +TACTAATGATGTTTCTTTTTTAGCACATATTCAGTGGATGGTTATGTTCACACCTTTAGT +ACCTTTCTGGATAACAATTGCTTATATCATTTGTATTTCCACAAAGCATTTCTATTGGTT +CTTTAGTAATTACCTAAAGAGACGTGTAGTCTTTAATGGTGTTTCCTTTAGTACTTTTGA +AGAAGCTGCGCTGTGCACCTTTTTGTTAAATAAAGAAATGTATCTAAAGTTGCGTAGTGA +TGTGCTATTACCTCTTACGCAATATAATAGATACTTAGCTCTTTATAATAAGTACAAGTA +TTTTAGTGGAGCAATGGATACAACTAGCTACAGAGAAGCTGCTTGTTGTCATCTCGCAAA +GGCTCTCAATGACTTCAGTAACTCAGGTTCTGATGTTCTTTACCAACCACCACAAATCTC +TATCACCTCAGCTGTTTTGCAGAGTGGTTTTAGAAAAATGGCATTCCCATCTGGTAAAGT +TGAGGGTTGTATGGTACAAGTAACTTGTGGTACAACTACACTTAACGGTCTTTGGCTTGA +TGACGTAGTTTACTGTCCAAGACATGTGATCTGCACCTCTGAAGATATGCTTAACCCTAA +TTATGAAGATTTACTCATTCGTAAGTCTAATCATAATTTCTTGGTACAGGCTGGTAATGT +TCAACTCAGGGTTATTGGACATTCTATGCAAAATTGTGTACTTAAGCTTAAGGTTGATAC +AGCCAATCCTAAGACACCTAAGTATAAGTTTGTTCGCATTCAACCAGGACAGACTTTTTC +AGTGTTAGCTTGTTACAATGGTTCACCATCTGGTGTTTACCAATGTGCTATGAGACACAA +TTTCACTATTAAGGGTTCATTCCTTAATGGTTCATGTGGTAGTGTTGGTTTTAACATAGA +TTATGACTGTGTCTCTTTTTGTTACATGCACCATATGGAATTACCAACTGGAGTTCATGC +TGGCACAGACTTAGAAGGTAACTTTTATGGACCTTTTGTTGACAGGCAAACAGCACAAGC +AGCTGGTACGGACACAACTATTACAGTTAATGTTTTAGCTTGGTTGTACGCTGCTGTTAT +AAATGGAGACAGGTGGTTTCTCAATCGATTTACCACAACTCTTAATGACTTTAACCTTGT +GGCTATGAAGTACAATTATGAACCTCTAACACAAGACCATGTTGACATACTAGGACCTCT +TTCTGCTCAAACTGGAATTGCCGTTTTAGATATGTGTGCTTCATTAAAAGAATTACTGCA +AAATGGTATGAATGGACGTACCATATTGGGTAGTGCTTTATTAGAAGATGAATTTACACC +TTTTGATGTTGTTAGACAATGCTCAGGTGTTACTTTCCAAAGTGCAGTGAAAAGAACAAT +CAAGGGTACACACCACTGGTTGTTACTCACAATTTTGACTTCACTTTTATTTTTAGTCCA +GAGTACTCAATGGTCTTTGTTCTTTTTTTTGTATGAAAATGCCTTTTTACCTTTTGCTAT +GGGTATTATTGCTATGTCTGCTTTTGCAATGATGTTTGTCAAACATAAGCATGCATTTCT +CTGTTTGTTTTTGTTACCTTCTCTTGCCACTGTAGCTTATTTTAATATGGTCTATATGCC +TGCTAGTTGGGTGATGCGTATTATGACATGGTTGGATATGGTTGATACTAGTTTGAAGCT +AAAAGACTGTGTTATGTATGCATCAGCTGTAGTGTTACTAATCCTTATGACAGCAAGAAC +TGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTA +TAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTC +TGTTACTTCTAACTACTCAGGTGTAGTTACAACTGTCATGTTTTTGGCCAGAGGTATTGT +TTTTATGTGTGTTGAGTATTGCCCTATTTTCTTCATAACTGGTAATACACTTCAGTGTAT +AATGCTAGTTTATTGTTTCTTAGGCTATTTTTGTACTTGTTACTTTGGCCTCTTTTGTTT +ACTCAACCGCTACTTTAGACTGACTCTTGGTGTTTATGATTACTTAGTTTCTACACAGGA +GTTTAAATATATGAATTCACAGGGACTACTCCCACCCAAGAATAGCATAGATGCCTTCAA +ACTCAACATTAAATTGTTGGGTGTTGGTGGCAAACCTTGTATCAAAGTAGCCACTGTACA +GTCTAAAATGTCAGATGTAAAGTGCACATCAGTAGTCTTACTCTCAGTTTTGCAACAACT +CAGAGTAGAATCATCATCTAAATTGTGGGCTCAATGTGTCCAGTTACACAATGACATTCT +CTTAGCTAAAGATACTACTGAAGCCTTTGAAAAAATGGTTTCACTACTTTCTGTTTTGCT +TTCCATGCAGGGTGCTGTAGACATAAACAAGCTTTGTGAAGAAATGCTGGACAACAGGGC +AACCTTACAAGCTATAGCCTCAGAGTTTAGTTCCCTTCCATCATATGCAGCTTTTGCTAC +TGCTCAAGAAGCTTATGAGCAGGCTGTTGCTAATGGTGATTCTGAAGTTGTTCTTAAAAA +GTTGAAGAAGTCTTTGAATGTGGCTAAATCTGAATTTGACCGTGATGCAGCCATGCAACG +TAAGTTGGAAAAGATGGCTGATCAAGCTATGACCCAAATGTATAAACAGGCTAGATCTGA +GGACAAGAGGGCAAAAGTTACTAGTGCTATGCAGACAATGCTTTTCACTATGCTTAGAAA +GTTGGATAATGATGCACTCAACAACATTATCAACAATGCAAGAGATGGTTGTGTTCCCTT +GAACATAATACCTCTTACAACAGCAGCCAAACTAATGGTTGTCATACCAGACTATAACAC +ATATAAAAATACGTGTGATGGTACAACATTTACTTATGCATCAGCATTGTGGGAAATCCA +ACAGGTTGTAGATGCAGATAGTAAAATTGTTCAACTTAGTGAAATTAGTATGGACAATTC +ACCTAATTTAGCATGGCCTCTTATTGTAACAGCTTTAAGGGCCAATTCTGCTGTCAAATT +ACAGAATAATGAGCTTAGTCCTGTTGCACTACGACAGATGTCTTGTGCTGCCGGTACTAC +ACAAACTGCTTGCACTGATGACAATGCGTTAGCTTACTACAACACAATAAAGGGAGGTAG +GTTTGTACTTGCATTGTTATCCGATTTACAGGATTTGAAATGGGCTAGATTCCCTAAGAG +TGATGGAACTGGTACTATTTATACAGAACTGGAACCACCTTGTAGGTTTGTTACAGACAC +ACCTAAAGGTCCTAAAGTGAAGTATTTATACTTTATTAAAGGATTAAACAACCTAAATAG +AGGTATGGTACTTGGTAGTTTAGCTGCCACAGTACGTCTACAAGCTGGTAATGCAACAGA +AGTGCCTGCCAATTCAACTGTATTATCTTTCTGTGCTTTTGCTGTAGATGCTGCTAAAGC +TTACAAAGATTATCTAGCTAGTGGGGGACAACCAATCACTAATTGTGTTAAGATGTTGTG +TACACACACTGGTACTGGTCAGGCAATAACAGTTACACCGGAAGCCAATATGGATCAAGA +ATCCTTTGGTGGTGCATCGTGTTGTCTGTACTGCCGTTGCCACATAGATCATCCAAATCC +TAAAGGATTTTGTGACTTAAAAGGTAAGTATGTACAAATACCTACAACTTGTGCTAACGA +CCCTGTGGGTTTTACACTTAAAAACACAGTCTGTACCGTCTGCGGTATGTGGAAAGGTTA +TGGCTGTAGTTGTGATCAACTCCGCGAACCCATGCTTCAGTCAGCTGATGCACAATCGTT +TTTAAACGGGTTTGCGGTGTAAGTGCAGCCCGTCTTACACCGTGCGGCACAGGCACTAGT +ACTGATGTCGTATACAGGGCTTTTGACATCTACAATGATAAAGTAGCTGGTTTTGCTAAA +TTCCTAAAAACTAATTGTTGTCGCTTCCAAGAAAAGGACGAAGATGACAATTTAATTGAT +TCTTACTTTGTAGTTAAGAGACACACTTTCTCTAACTACCAACATGAAGAAACAATTTAT +AATTTACTTAAGGATTGTCCAGCTGTTGCTAAACATGACTTCTTTAAGTTTAGAATAGAC +GGTGACATGGTACCACATATATCACGTCAACGTCTTACTAAATACACAATGGCAGACCTC +GTCTATGCTTTAAGGCATTTTGATGAAGGTAATTGTGACACATTAAAAGAAATACTTGTC +ACATACAATTGTTGTGATGATGATTATTTCAATAAAAAGGACTGGTATGATTTTGTAGAA +AACCCAGATATATTACGCGTATACGCCAACTTAGGTGAACGTGTACGCCAAGCTTTGTTA +AAAACAGTACAATTCTGTGATGCCATGCGAAATGCTGGTATTGTTGGTGTACTGACATTA +GATAATCAAGATCTCAATGGTAACTGGTATGATTTCGGTGATTTCATACAAACCACGCCA +GGTAGTGGAGTTCCTGTTGTAGATTCTTATTATTCATTGTTAATGCCTATATTAACCTTG +ACCAGGGCTTTAACTGCAGAGTCACATGTTGACACTGACTTAACAAAGCCTTACATTAAG +TGGGATTTGTTAAAATATGACTTCACGGAAGAGAGGTTAAAACTCTTTGACCGTTATTTT +AAATATTGGGATCAGACATACCACCCAAATTGTGTTAACTGTTTGGATGACAGATGCATT +CTGCATTGTGCAAACTTTAATGTTTTATTCTCTACAGTGTTCCCACTTACAAGTTTTGGA +CCACTAGTGAGAAAAATATTTGTTGATGGTGTTCCATTTGTAGTTTCAACTGGATACCAC +TTCAGAGAGCTAGGTGTTGTACATAATCAGGATGTAAACTTACATAGCTCTAGACTTAGT +TTTAAGGAATTACTTGTGTATGCTGCTGACCCTGCTATGCACGCTGCTTCTGGTAATCTA +TTACTAGATAAACGCACTACGTGCTTTTCAGTAGCTGCACTTACTAACAATGTTGCTTTT +CAAACTGTCAAACCCGGTAATTTTAACAAAGACTTCTATGACTTTGCTGTGTCTAAGGGT +TTCTTTAAGGAAGGAAGTTCTGTTGAATTAAAACACTTCTTCTTTGCTCAGGATGGTAAT +GCTGCTATCAGCGATTATGACTACTATCGTTATAATCTACCAACAATGTGTGATATCAGA +CAACTACTATTTGTAGTTGAAGTTGTTGATAAGTACTTTGATTGTTACGATGGTGGCTGT +ATTAATGCTAACCAAGTCATCGTCAACAACCTAGACAAATCAGCTGGTTTTCCATTTAAT +AAATGGGGTAAGGCTAGACTTTATTATGATTCAATGAGTTATGAGGATCAAGATGCACTT +TTCGCATATACAAAACGTAATGTCATCCCTACTACAACTCAAATGAATCTTAAGTATGCC +ATTAGTGCAAAGAATAGAGCTCGCACCGTAGCTGGTGTCTCTATCTGTAGTACTATGACC +AATAGACAGTTTCATCAAAAATTATTGAAATCAATAGCCGCCACTAGAGGAGCTACTGTA +GTAATTGGAACAAGCAAATTCTATGGTGGTTGGCACAACATGTTAAAAACTGTTTATAGT +GATGTAGAAAACCCTCACCTTATGGGTTGGGATTATCCTAAATGTGATAGAGCCATGCCT +AACATGCTTAGAATTATGGCCTCACTTGTTCTTGCTCGCAAACATACAACGTGTTGTAGC +TTGTCACACCGTTTCTATAGATTAGCTAATGAGTGTGCTCAAGTATTGAGTGAAATGGTC +ATGTGTGGCGGTTCACTATATGTTAAACCAGGTGGAACCTCATCAGGAGATGCCACAACT +GCTTATGCTAATAGTGTTTTTAACATTTGTCAAGCTGTCACGGCCAATGTTAATGCACTT +TTATCTACTGATGGTAACAAAATTGCCGATAAGTATGTCCGCAATTTACAACACAGACTT +TATGAGTGTCTCTATAGAAATAGAGATGTTGACACAGACTTTGTGAATGAGTTTTACGCA +TATTTGCGTAAACATTTCTCAATGATGATACTTTCTGACGATGCTGTTGTGTGTTTCAAT +AGCACTTATGCATCACAAGGTCTAGTGGCTAGCATAAAGAACTTTAAGTCAGTTCTTTAT +TATCAAAACAATGTTTTTATGTCTGAAGCAAAATGTTGGACTGAGACTGACCTTACTAAA +GGACCTCATGAATTTTGCTCTCAACATACAATGCTAGTTAAACAGGGTGATGATTATGTG +TACCTTCCTTACCCAGATCCATCAAGAATCCTAGGGGCCGGCTGTTTTGTAGATGATATC +GTAAAAACAGATGGTACACTTATGATTGAACGGTTCGTGTCTTTAGCTATAGATGCTTAC +CCACTTACTAAACATCCTAATCAGGAGTATGCTGATGTCTTTCATTTGTACTTACAATAC +ATAAGAAAGCTACATGATGAGTTAACAGGACACATGTTAGACATGTATTCTGTTATGCTT +ACTAATGATAACACTTCAAGGTATTGGGAACCTGAGTTTTATGAGGCTATGTACACACCG +CATACAGTCTTACAGGCTGTTGGGGCTTGTGTTCTTTGCAATTCACAGACTTCATTAAGA +TGTGGTGCTTGCATACGTAGACCATTCTTATGTTGTAAATGCTGTTACGACCATGTCATA +TCAACATCACATAAATTAGTCTTGTCTGTTAATCCGTATGTTTGCAATGCTCCAGGTTGT +GATGTCACAGATGTGACTCAACTTTACTTAGGAGGTATGAGCTATTATTGTAAATCACAT +AAACCACCCATTAGTTTTCCATTGTGTGCTAATGGACAAGTTTTTGGTTTATATAAAAAT +ACATGTGTTGGTAGCGATAATGTTACTGACTTTAATGCAATTGCAACATGTGACTGGACA +AATGCTGGTGATTACATTTTAGCTAACACCTGTACTGAAAGACTCAAGCTTTTTGCAGCA +GAAACGCTCAAAGCTACTGAGGAGACATTTAAACTGTCTTATGGTATTGCTACTGTACGT +GAAGTGCTGTCTGACAGAGAATTACATCTTTCATGGGAAGTTGGTAAACCTAGACCACCA +CTTAACCGAAATTATGTCTTTACTGGTTATCGTGTAACTAAAAACAGTAAAGTACAAATA +GGAGAGTACACCTTTGAAAAAGGTGACTATGGTGATGCTGTTGTTTACCGAGGTACAACA +ACTTACAAATTAAATGTTGGTGATTATTTTGTGCTGACATCACATACAGTAATGCCATTA +AGTGCACCTACACTAGTGCCACAAGAGCACTATGTTAGAATTACTGGCTTATACCCAACA +CTCAATATCTCAGATGAGTTTTCTAGCAATGTTGCAAATTATCAAAAGGTTGGTATGCAA +AAGTATTCTACACTCCAGGGACCACCTGGTACTGGTAAGAGTCATTTTGCTATTGGCCTA +GCTCTCTACTACCCTTCTGCTCGCATAGTGTATACAGCTTGCTCTCATGCCGCTGTTGAT +GCACTATGTGAGAAGGCATTAAAATATTTGCCTATAGATAAATGTAGTAGAATTATACCT +GCACGTGCTCGTGTAGAGTGTTTTGATAAATTCAAAGTGAATTCAACATTAGAACAGTAT +GTCTTTTGTACTGTAAATGCATTGCCTGAGACTACAGCAGATATAGTTGTCTTTGATGAA +ATTTCAATGGCCACAAATTATGATTTGAGTGTTGTCAATGCCAGATTATGTGCTAAGCAC +TATGTGTACATTGGCGACCCTGCTCAATTACCTGCACCACGCACATTGCTAACTAAGGGC +ACACTAGAACCAGAATATTTCAATTCAGTGTGTAGACTTATGAAAACTATAGGTCCAGAC +ATGTTCCTCGGAACTTGTCGGCGTTGTCCTGCTGAAATTGTTGACACTGTGAGTGCTTTG +GTTTATGATAATAAGCTTAAAGCACATAAAGACAAATCAGCTCAATGCTTTAAAATGTTT +TATAAGGGTGTTATCACGCATGATGTTTCATCTGCAATTAACAGGCCACAAATAGGCGTG +GTAAGAGAATTCCTTACACGTAACCCTGCTTGGAGAAAAGCTGTCTTTATTTCACCTTAT +AATTCACAGAATGCTGTAGCCTCAAAGATTTTGGGACTACCAACTCAAACTGTTGATTCA +TCACAGGGCTCAGAATATGACTATGTCATATTCACTCAAACCACTGAAACAGCTCACTCT +TGTAATGTAAACAGATTTAATGTTGCTATTACCAGAGCAAAAGTAGGCATACTTTGCATA +ATGTCTGATAGAGACCTTTATGACAAGTTGCAATTTACAAGTCTTGAAATTCCACGTAGG +AATGTGGCAACTTTACAAGCTGAAAATGTAACAGGACTCTTTAAAGATTGTAGTAAGGTA +ATCACTGGGTTACATCCTACACAGGCACCTACACACCTCAGTGTTGACACTAAATTCAAA +ACTGAAGGTTTATGTGTTGACGTACCTGGCATACCTAAGGACATGACCTATAGAAGACTC +ATCTCTATGATGGGTTTTAAAATGAATTATCAAGTTAATGGTTACCCTAACATGTTTATC +ACCCGCGAAGAAGCTATAAGACATGTACGTGCATGGATTGGCTTCGATGTCGAGGGGTGT +CATGCTACTAGAGAAGCTGTTGGTACCAATTTACCTTTACAGCTAGGTTTTTCTACAGGT +GTTAACCTAGTTGCTGTACCTACAGGTTATGTTGATACACCTAATAATACAGATTTTTCC +AGAGTTAGTGCTAAACCACCGCCTGGAGATCAATTTAAACACCTCATACCGCTTATGTAC +AAAGGACTTCCTTGGAATGTAGTGCGTATAAAGATTGTACAAATGTTAAGTGACACACTT +AAAAATCTCTCTGACAGAGTCGTATTTGTCTTATGGGCACATGGCTTTGAGTTGACATCT +ATGAAGTATTTTGTGAAAATAGGACCTGAGCGCACCTGTTGTCTATGTGATAGACGTGCC +ACATGCTTTTCCACTGCTTCAGACACTTATGCCTGTTGGCATCATTCTATTGGATTTGAT +TACGTCTATAATCCGTTTATGATTGATGTTCAACAATGGGGTTTTACAGGTAACCTACAA +AGCAACCATGATCTGTATTGTCAAGTCCATGGTAATGCACATGTAGCTAGTTGTGATGCA +ATCATGACTAGGTGTCTAGCTGTCCACGAGTGTTTTGTTAAGCGTGTTGACTGGACTATT +GAATATCCTATAATTGGTGATGAACTGAAGATTAATGCGGCTTGTAGAAAGGTTCAACAC +ATGGTTGTTAAAGCTGCATTATTAGCAGACAAATTCCCAGTTCTTCACGACATTGGTAAC +CCTAAAGCTATTAAGTGTGTACCTCAAGCTGATGTAGAATGGAAGTTCTATGATGCACAG +CCTTGTAGTGACAAAGCTTATAAAATAGAAGAATTATTCTATTCTTATGCCACACATTCT +GACAAATTCACAGATGGTGTATGCCTATTTTGGAATTGCAATGTCGATAGATATCCTGCT +AATTCCATTGTTTGTAGATTTGACACTAGAGTGCTATCTAACCTTAACTTGCCTGGTTGT +GATGGTGGCAGTTTGTATGTAAATAAACATGCATTCCACACACCAGCTTTTGATAAAAGT +GCTTTTGTTAATTTAAAACAATTACCATTTTTCTATTACTCTGACAGTCCATGTGAGTCT +CATGGAAAACAAGTAGTGTCAGATATAGATTATGTACCACTAAAGTCTGCTACGTGTATA +ACACGTTGCAATTTAGGTGGTGCTGTCTGTAGACATCATGCTAATGAGTACAGATTGTAT +CTCGATGCTTATAACATGATGATCTCAGCTGGCTTTAGCTTGTGGGTTTACAAACAATTT +GATACTTATAACCTCTGGAACACTTTTACAAGACTTCAGAGTTTAGAAAATGTGGCTTTT +AATGTTGTAAATAAGGGACACTTTGATGGACAACAGGGTGAAGTACCAGTTTCTATCATT +AATAACACTGTTTACACAAAAGTTGATGGTGTTGATGTAGAATTGTTTGAAAATAAAACA +ACATTACCTGTTAATGTAGCATTTGAGCTTTGGGCTAAGCGCAACATTAAACCAGTACCA +GAGGTGAAAATACTCAATAATTTGGGTGTGGACATTGCTGCTAATACTGTGATCTGGGAC +TACAAAAGAGATGCTCCAGCACATATATCTACTATTGGTGTTTGTTCTATGACTGACATA +GCCAAGAAACCAATTGAAACGATTTGTGCACCACTCACTGTCTTTTTTGATGGTAGAGTT +GATGGTCAAGTAGACTTATTTAGAAATGCCCGTAATGGTGTTCTTATTACAGAGGGTAGT +GTTAAAGGTTTACAACCATCTGTAGGTCCCAAACAAGCTAGTCTTAATGGAGTCACATTA +ATTGGAGAAGCCGTAAAAACACAGTTCAATTATTATAAGAAAGTTGATGGTGTTGTCCAA +CAATTACCTGAAACTTACTTTACTCAGAGTAGAAATTTACAAGAATTTAAACCCAGGAGT +CAAATGGAAATTGATTTCTTAGAATTAGCTATGGATGAATTCATTGAACGGTATAAATTA +GAAGGCTATGCCTTCGAACATATCGTTTATGGAGATTTTAGTCATAGTCAGTTAGGTGGT +TTACATCTACTGATTGGACTAGCTAAACGTTTTAAGGAATCACCTTTTGAATTAGAAGAT +TTTATTCCTATGGACAGTACAGTTAAAAACTATTTCATAACAGATGCGCAAACAGGTTCA +TCTAAGTGTGTGTGTTCTGTTATTGATTTATTACTTGATGATTTTGTTGAAATAATAAAA +TCCCAAGATTTATCTGTAGTTTCTAAGGTTGTCAAAGTGACTATTGACTATACAGAAATT +TCATTTATGCTTTGGTGTAAAGATGGCCATGTAGAAACATTTTACCCAAAATTACAATCT +AGTCAAGCGTGGCAACCGGGTGTTGCTATGCCTAATCTTTACAAAATGCAAAGAATGCTA +TTAGAAAAGTGTGACCTTCAAAATTATGGTGATAGTGCAACATTACCTAAAGGCATAATG +ATGAATGTCGCAAAATATACTCAACTGTGTCAATATTTAAACACATTAACATTAGCTGTA +CCCTATAATATGAGAGTTATACATTTTGGTGCTGGTTCTGATAAAGGAGTTGCACCAGGT +ACAGCTGTTTTAAGACAGTGGTTGCCTACGGGTACGCTGCTTGTCGATTCAGATCTTAAT +GACTTTGTCTCTGATGCAGATTCAACTTTGATTGGTGATTGTGCAACTGTACATACAGCT +AATAAATGGGATCTCATTATTAGTGATATGTACGACCCTAAGACTAAAAATGTTACAAAA +GAAAATGACTCTAAAGAGGGTTTTTTCACTTACATTTGTGGGTTTATACAACAAAAGCTA +GCTCTTGGAGGTTCCGTGGCTATAAAGATAACAGAACATTCTTGGAATGCTGATCTTTAT +AAGCTCATGGGACACTTCGCATGGTGGACAGCCTTTGTTACTAATGTGAATGCGTCATCA +TCTGAAGCATTTTTAATTGGATGTAATTATCTTGGCAAACCACGCGAACAAATAGATGGT +TATGTCATGCATGCAAATTACATATTTTGGAGGAATACAAATCCAATTCAGTTGTCTTCC +TATTCTTTATTTGACATGAGTAAATTTCCCCTTAAATTAAGGGGTACTGCTGTTATGTCT +TTAAAAGAAGGTCAAATCAATGATATGATTTTATCTCTTCTTAGTAAAGGTAGACTTATA +ATTAGAGAAAACAACAGAGTTGTTATTTCTAGTGATGTTCTTGTTAACAACTAAACGAAC +AATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGTCATGCCGCTGTT +TAATCTTATAACTACAACTCAATCATACACTAATTTCACACGTGGTGTTTATTACCCTGA +CAAAGTTTTCAGATCCTCAGTTTTACATTTAACTCAGGACTTGTTCTTACCTTTCTTTTC +CAATGTTACTTGGTTCCATGCTATCTCTGGGACCAATGGTACTAAGAGGTTTGATAACCC +TGTCCTACCATTTAATGATGGTGTTTATTTTGCTTCCACTGAGAAGTCTAACATAATAAG +AGGCTGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCCCTACTTATTGTTAATAA +CGCTACTAATGTTTTTATTAAAGTCTGTGAATTTCAATTTTGTAATGATCCATTTTTGGA +TGTTTACCAAAAAAACAACAAAAGTTGGATGGAAAGTGAGTCAGGAGTTTATTCTAGTGC +GAATAATTGCACTTTTGAATATGTCTCTCGGCCTTTTCTTATGGACCTTGAAGGAAAACA +GGGTAATTTCAAAAATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTTATTTTAAAAT +ATATTCTAAGCACACGCCTATTATAGGGCGTGATTTCCCTCAGGGTTTTTCGGCTTTAGA +ACCATTGGTAGATTTGCCAATAGGTATTAACATCACTAGGTTTCAAACTTTACTTGCTTT +AAATAGAAGTTATTTGACTCCTGGTGATTCTTCTTCAGGTTGGACAGCTGGTGCTGCAGA +TTATTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTAAAATATAATGAAAATGGAAC +CATTACAGATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAGTGTACGTTGAA +ATCCTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGTCCAACCAACAGA +ATCTATTGTTAGATTTCCTAATGTTACAAACTTGTGCCCTTTTCATGAAGTTTTTAACGC +CACCACATTTGCATCTGTTTATGCTTGGAACAGGACGAGAATCAGCAACTGTGTTGCTGA +TTATTCTGTCCTATATAATTTCGCACCATTTTTCGCTTTTAAGTGTTATGGAGTGTCTCC +TACTAAATTAAATGATCTCTGCTTTACTAATGTCTATGCAGATTCATTTGTAATTAAAGG +TAATGAAGTCAGCCAAATCGCTCCAGGGCAAACTGGAAATATTGCTGATTATAATTATAA +ATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATTCTAACAAGCTTGATTCTAA +GCATAGTGGTAATTATGATTACTGGTATAGATCGCTTAGGAAGTCTAAACTCAAACCTTT +TGAGAGAGATATTTCAACTGAAATCTATCAGGCCGGTAACAAACCTTGTAAAGGTAAAGG +TCCTAATTGTTACTTTCCTTTACAATCATATGGTTTCCGACCCACTTATGGTGTTGGTCA +CCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATGCACCAGCAACTGTTTG +TGGACCTAAAAAGTCTACTAATTTGGTTAAAAACAAATGTGTCAATTTCAACTTCAATGG +TTTAACAGGCACAGGTGTTCTTACTAAGTCTAACAAAAAGTTTCTGCCTTTCCAACAATT +TGGCAGAGACATTGTTGACACTACTGATGCTGTCCGTGATCCACAGACACTTGAGATTCT +TGACATTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCAGGAACAAATACTTC +TAACCAGGTTGCTGTTCTTTATCAGGGTGTTAACTGCACAGAAGTCTCTGTTGCTATTCA +TGCAGATCAACTTACTCCTACTTGGCGTGTTTATTCTACAGGTTCTAATGTTTTTCAAAC +ACGTGCAGGCTGTTTAATAGGGGCTGAATATGTCAACAACTCATATGAGTGTGACATACC +CATTGGTGCAGGTATATGCGCTAGTTATCAGACTCAGACTAAGTCTCGTCGGCGGGCACG +TAGTGTAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTGGTGCAGAAAATTCAGT +TGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTATTAGTGTTACCACAGA +AATTCTACCAGTGTCTATGACCAAGACATCAGTAGATTGTACAATGTACATTTGTGGTGA +TTCAACTGAATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGTACACAATTAAAACG +TGCTTTAACTGGAATAGCTGTTGAACAAGACAAAAACACCCAAGAAGTTTTTGCACAAGT +CAAACAAATTTACAAAACACCACCAATTAAATATTTTGGTGGTTTTAATTTTTCACAAAT +ATTACCAGATCCATCAAAACCAAGCAAGAGGTCATTTATTGAAGATCTACTTTTCAACAA +AGTGACACTTGCAGATGCTGGCTTCATCAAACAATATGGTGATTGCCTTGGTGATATTGC +TGCTAGAGACCTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTTTTGCCACCTTTGCT +CACAGATGAAATGATTGCTCAATACACTTCTGCACTGTTAGCGGGTACAATCACTTCTGG +TTGGACCTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATGCAAATGGCTTATAG +GTTTAATGGTATTGGAGTTACACAGAATGTTCTCTATGAGAACCAAAAATTGATTGCCAA +CCAATTTAATAGTGCTATTGGCAAAATTCAAGACTCACTTTTTTCCACAGCAAGTGCACT +TGGAAAACTTCAAGATGTGGTCAACCATAATGCACAAGCTTTAAACACGCTTGTTAAACA +ACTTAGCTCCAAATTTGGTGCAATTTCAAGTGTTTTAAATGATATCCTTTCACGTCTTGA +CAAAGTTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGACTTCAAAGTTTGCA +GACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGCTTCTGCTAATCTTGC +TGCTACTAAAATGTCAGAGTGTGTACTTGGACAATCAAAAAGAGTTGATTTTTGTGGAAA +GGGCTATCATCTTATGTCCTTCCCTCAGTCAGCACCTCATGGTGTAGTCTTCTTGCATGT +GACTTATGTCCCTGCACAAGAAAAGAACTTCACAACTGCTCCTGCCATTTGTCATGATGG +AAAAGCACACTTTCCTCGTGAAGGTGTCTTTGTTTCAAATGGCACACACTGGTTTTTAAC +ACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACACATTTGTGTCTGGTAA +CTGTGATGTTGTAATAGGAATTGTCAACAACACAGTTTATGATCCTTTGCAACTTGAATT +AGATTCATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACATCACCAGATGTTGA +TTTAGGTGACATCTCTGGCATTAATGCTTCAGTTGTAAACATTCAAAAAGAAATTGACCG +CCTCAATGAGGTTGCCAAGAATTTAAATGAATCTCTCATCGATCTCCAAGAACTTGGAAA +GTATGAGCAGTATATAAAATGGCCATGGTATATTTGGCTAGGTTTTATAGCTGGCTTGAT +TGCCATAGTAATGGTGACAATTATGCTTTGCTGTATGACCAGTTGCTGTAGTTGTCTCAA +GGGCTGTTGTTCTTGTGGATCCTGCTGCAAATTTGATGAAGACGACTCTGAGCCAGTGCT +CAAAGGAGTCAAATTACATTACACATAAACGAACTTATGGATTTGTTTATGAGAATCTTC +ACAATTGGAACTGTAACTTTGAAGCAAGGTGAAATCAAGGATGCTACTCCTTCAGATTTT +GTTCGCGCTACTGCAACGATACCGATACAAGCCTCACTCCCTTTCGGATGGCTTATTGTT +GGCGTTGCACTTCTTGCTGTTTTTCAGAGCGCTTCCAAAATCATAACTCTCAAAAATAGA +TGGCAACTAGCACTCTCCAAGGGTGTTCACTTTGTTTGCAACTTGCTGTTGTTGTTTGTA +ACAGTTTACTCACACCTTTTGCTCGTTGCTGCTGGCCTTGAAGCCCCTTTTCTCTATCTT +TATGCTTTAGTCTACTTCTTGCAGAGTATAAACTTTGTAAGAATAATAATGAGGCTTTGG +CTTTGCTGGAAATGCCGTTCCAAAAACCCATTACTTTATGATGCCAACTATTTTCTTTGC +TGGCATACTAATTGTTACGACTATTGTATACCTTACAATAGTGTAACTTCTTCAATTGTC +ATTACTTCAGGTGATGGCACAACAAGTCCTATTTCTGAACATGACTACCAGATTGGTGGT +TATACTGAAAAATGGGAATCTGGAGTAAAAGACTGTGTTGTATTACACAGTTACTTCACT +TCAGACTATTACCAGCTGTACTCAACTCAATTGAGTACAGACATTGGTGTTGAACATGTT +ACCTTCTTCATCTACAATAAAATTGTTGATGAGCCTGAAGAACATGTCCAAATTCACACA +ATCGACGGTTCATCCGGAGTTGTTAATCCAGTAATGGAACCAATTTATGATGAACCGACG +ACGACTACTAGCGTGCCTTTGTAAGCACAAGCTGATGAGTACGAACTTATGTACTCATTC +GTTTCGGAAGAGATAGGTACGTTAATAGTTAATAGCGTACTTCTTTTTCTTGCTTTCGTG +GTATTCTTGCTAGTTACACTAGCCATCCTTACTGCGCTTCGATTGTGTGCGTACTGCTGC +AATATTGTTAACGTGAGTCTTGTAAAACCTTCTTTTTACGTTTACTCTCGTGTTAAAAAT +CTGAATTCTTCTAGAGTTCCTGATCTTCTGGTCTAAACGAACTAAATATTATATTAGTTT +TTCTGTTTGGAACTTTAATTTTAGCCATGGCACATTCCAACGGTACTATTACCGTTGAAG +AGCTTAAAAAGCTCCTTGAAGAATGGAACCTAGTAATAGGTTTCCTATTCCTTGCATGGA +TTTGTCTTCTACAATTTGCCTATGCCAACAGGAATAGGTTTTTGTATATAATTAAGTTAA +TTTTTCTCTGGCTGTTATGGCCAGTAACTTTAACTTGTTTTGTGCTTGCTGCTGTTTACA +GAATAAATTGGATCACCGGTGGAATTGCTATCGCAATGGCTTGTCTTGTAGGCTTGATGT +GGCTCAGCTACTTCATTGCTTCTTTCAGACTGTTTGTGCGTACGCGTTCCATGTGGTCAT +TTAATCCAGAAACTAACATTCTTCTCAACGTGCCACTCCATGGCACTATTCTGACCAGAC +CGCTTCTAGAAAGTGAACTCGTAATCGGAGCTGTGATCCTTCGTGGACATCTTCGTATTG +CTGGACACCATCTAGGACGCTGTGACATCAAGGACCTGCCTAAAGAAATCACTGTTGCTA +CATCACGAACGCTTTCTTATTACAAATTGGGAGCTTCGCAGCGTGTAGCAGGTGACTCAG +GTTTTGCTGCATACAGTCGCTACAGGATTGGCAACTATAAATTAAACACAGACCATTCCA +GTAGCAGTGACAATATTGCTTTGCTTGTACAGTAAGTGACAACAGATGTTTCATCTCGTT +GACTTTCAGGTTACTATAGCAGAGATATTACTAATTATTATGCGGACTTTTAAAGTTTCC +ATTTGGAATCTTGATTACATCATAAACCTCATAATTAAAAATTTATCTAAGTCACTAACT +GAGAATAAATATTCTCAATTAGATGAAGAGCAACCAATGGAGATTCTCTAAACGAACATG +AAAATTATTCTTTTCTTGGCACTGATAACACTCGCTACTTGTGAGCTTTATCACTACCAA +GAGTGTGTTAGAGGTACAACAGTACTTTTAAAAGAACCTTGCTCTTCTGGAACATACGAG +GGCAATTCACCATTTCATCCTCTAGCTGATAACAAATTTGCACTGACTTGCTTTAGCACT +CAATTTGCTTTTGCTTGTCCTGACGGCGTAAAACACGTCTATCAGTTACGTGCCAGATCA +GTTTCACCTAAACTGTTCATCAGACAAGAGGAAGTTCAAGAACTTTACTCTCCAATTTTT +CTTATTGTTGCGGCAATAGTGTTTATAACACTTTGCTTCACACTCAAAAGAAAGACAGAA +TGATTGAACTTTCATTAATTGACTTCTATTTGTGCTTTTTAGCCTTTCTGTTACTCCTTG +TTTTAATTATGCTTATTATCTTTTGGTTCTCACTTGAACTGCAAGATCATAATGAAACTT +GTCACGCCTAAACGAACATGAAATTTCTTGTTTTCTTAGGAATCATCACAACTGTAGCTG +CATTTCACCAAGAATGTAGTTTACAGTCATGTACTCAACATCAACCATATGTAGTTGATG +ACCCGTGTCCTATTCACTTCTATTCTAAATGGTATATTAGAGTAGGAGCTAGAAAATCAG +CACCTTTAATTGAATTGTGCGTGGATGAGCCTGGTTCTAAATCACCCATTCAGTACATCG +ATATCGGTAATTATACAGTTTTCTGTTTACCTTTTACAATTAATTGCCAGGAACCTAAAT +TGGGTAGTCTTGTAGTGCGTTGTTCGTTCTATGAAGACTTTTTAGAGTATCATGACGTTC +GTGTTGTTTTAGATTTCATCTAAACGAACAAACTTAAATGTCTGATAATGGACCCCAAAA +TCAGCGAAATGCACTCCGCATTACGTTTGGTGGACCCTCAGATTCAACTGGCAGTAACCA +GAATGGTGGGGCGCGATCAAAACAACGTCGGCCCCAAGGTTTACCCAATAATACTGCGTC +TTGGTTCACCGCTCTCACTCAACATGGCAAGGAAGACCTTAAATTCCCTCGAGGACAAGG +CGTTCCAATTAACACCAATAGCAGTCCAGATGACCAAATTGGCTACTACCGAAGAGCTAC +CAGACGAATTCGTGGTGGTGACGGTAAAATGAAAGATCTCAGTCCAAGATGGTATTTCTA +CTACCTAGGAACTGGGCCAGAAGCTGGACTTCCCTATGGTGCTAACAAAGACGGCATCAT +ATGGGTTGCAACTGAGGGAGCCTTGAATACACCAAAAGATCACATTGGCACCCGCAATCC +TGCTAACAATGCTGCAATCGTGCTACAACTTCCTCAAGGAACAACATTGCCAAAAGGCTT +CTACGCAGAAGGGAGCAGAGGCGGCAGTCAAGCCTCTTCTCGTTCCTCATCACGTAGTCG +CAACAGTTCAAGAAATTCAACTCCAGGCAGCAGTAAACGAACTTCTCCTGCTAGAATGGC +TGGCAATGGCGGTGATGCTGCTCTTGCTTTGCTGCTGCTTGACAGATTGAACAAGCTTGA +GAGCAAAATGTCTGGTAAAGGCCAACAACAACAAGGCCAAACTGTCACTAAGAAATCTGC +TGCTGAGGCTTCTAAGAAGCCTCGGCAAAAACGTACTGCCACTAAAGCATACAATGTAAC +ACAAGCTTTCGGCAGACGTGGTCCAGAACAAACCCAAGGAAATTTTGGGGACCAGGAACT +AATCAGACAAGGAACTGATTACAAACATTGGCCGCAAATTGCACAATTTGCCCCCAGCGC +TTCAGCGTTCTTCGGAATGTCGCGCATTGGCATGGAAGTCACACCTTCGGGAACGTGGTT +GACCTACACAGGTGCCATCAAATTGGATGACAAAGATCCAAATTTCAAAGATCAAGTCAT +TTTGCTGAATAAGCATATTGACGCATACAAAACATTCCCACCAACAGAGCCTAAAAAGGA +CAAAAAGAAGAAGGCTGATGAAACTCAAGCCTTACCGCAGAGACAGAAGAAACAGCAAAC +TGTGACTCTTCTTCCTGCTGCAGATTTGGATGATTTCTCCAAACAATTGCAACAATCCAT +GAGCCGTGCTGACTCAACTCAGGCCTAAACTCATGCAGACCACACAAGGCAGATGGGCTA +TATAAACGTTTT +>USA/FL-CDC-LC1109983/2024 +AACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAA +TTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTG +CTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGGGTGTG +ACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACT +CAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGA +GGAGGTCTTATCAGAGGCACGTCAACATCTTAAAGATGGCACTTGTGGCTTAGTAGAAGT +TGAAAAAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGATGC +TCGAACTGCACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCATTCA +GTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGGCGAAATACCAGT +GGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATAAAGGAGCTGGTGGCCATAGGTA +CGGCGCCGATCTAAAGTCATTTGACTTAGGCGACGAGCTTGGCACTGATCCTTATGAAGA +TTTTCAAGAAAACTGGAACACTAAACATAGCAGTGGTGTTACCCGTGAACTCATGCGTGA +GCTTAACGGAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGGCCCTGATGGCTA +CCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGATTCATGCACTTTGTC +CGAACAACTGGACTTTATTGACACTAAGAGGGGTGTATACTGCTGCCGTGAACATGAGCA +TGAAATTGCTTGGTACACGGAACGTTCTGAAAAGAGCTATGAATTGCAGACACCTTTTGA +AATTAAATTGGCAAAGAAATTTGACACCTTCAATGGGGAATGTCCAAATTTTGTATTTCC +CTTAAATTCCATAATCAAGACTATTCAACCAAGGGTTGAAAAGAAAAAGCTTGATGGCTT +TATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCACCAAATGAATGCAACCAAATGTG +CCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAACTTCATGGCAGACGGGCGATTT +TGTTAAAGCCACTTGCGAATTTTGTGGCACTGAGAATTTGACTAAAGAAGGTGCCACTAC +TTGTGGTTACTTACCCCAAAATGCTGTTGTTAAAATTTATTGTCCAGCATGTCACAATTC +AGAAGTAGGACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAAAACCAT +TCTTCGTAAGGGTGGTCGCACTATTGCCTTTGGAGGCTGTGTGTTCTCTTATGTTGGTTG +CCATAACAAGTGTGCCTATTGGGTTCCACGTGCTAGCGCTAACATAGGTTGTAACCATAC +AGGTGTTGTTGGAGAAGGTTCCGAAGGTCTTAATGACAACCTTCTTGAAATACTCCAAAA +AGAGAAAGTCAACATCAATATTGTTGGTGACTTTAAACTTAATGAAGAGATCGCCATTAT +TTTGGCATCTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTTGGATTA +TAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTACAAAAGGAAAAGC +TAAAAAAGGTGCCTGGAATATTGGTGAACAGAAATCAATACTGAGTCCTCTTTATGCATT +TGCATCAGAGGCTGCTCGTGTTGTACGATCAATTTTCTCCCGCACTCTTGAAACTGCTCA +AAATTCTGTGCGTGTTTTACAGAAGGCCGCTATAACAATACTANNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGAAACATTTGTNNNNNNNNNNNNNNNNNN +NTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCCTACTCATGCCTCTAAAAGCCCC +AAAAGAAATTATCTTCTTAGAGGGAGAAACACTTCCCACAGAAGTGTTAACAGAGGAAGT +TGTCTTGAAAACTGGTGATTTACAACCATTAGAACAACCTACTAGTGAAGCTGTTGAAGC +TCCATTGGTTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAAAGACAC +AGAAAAGTACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATACCTTCACACTCAA +AGGCGGTGCACCAACAAAGGTTACTTTTGGTGATGACACTGTGATAGAAGTGCAAGGTTA +CAAGAGTGTGAATATCATTTTTGAACTTGATGAAAGGATTGATAAAGTACTTAATGAGAA +GTGCTCTGCCTATACAGTTGAACTCGGTACAGAAGTAAATGAGTTCGCCTGTGTTGTGGC +AGATGCTGTCATAAAAACTTTGCAACCAGTATCTGAATTACTTACACCACTGGGCATTGA +TTTAGATGAGTGGAGTATGGCTACATACTACTTATTTGATGAGTCTGGTGAGTTTAAATT +GGCTTCACATATGTATTGTTCTTTTTACCCTCCAGATGAGGATGAAGAAGAAGGTGATTG +TGAAGAAGAAGAGTTTGAGCCATCAACTCAATATGAGTATGGTACTGAAGATGATTACCA +AGGTAAACCTTTGGAATTTGGTGCCACTTCTGCTGCTCTTCAACCTGAAGAAGAGCAAGA +AGAAGATTGGTTAGATGATGATAGTCAACAAACTGTTGGTCAACAAGACGGCAGTGAGGA +CAATCAGACAACTACTATTCAAACAATTGTTGAGGTTCAACCTCAATTAGAGATGGAACT +TACACCAGTTGTTCAGACTATTGAAGTGAATAGTTTTAGTGGTTATTTAAAACTTACTGA +CAATGTATACATTAAAAATGCAGACATTGTGGAAGAAGCTAAAAAGGTAAAACCAACATT +GGTTGTTAATGCAGCCAATGTTTACCTTAAACATGGAGGAGGTGTTGCAGGAGCCTTAAA +TAAGGCTACTAACAATGCCATGCAAGTTGAATCTGATGATTACATAGCTACTAATGGACC +ACTTAAAGTGGGCGGTAGTTGTGTTTTAAGCGGACACAATCTTGCTAAACACTGTCTTCA +TGTTGTCGGCCCAAATGTTAACAAAGGTGAAGACATTCAACTTCTTAAGAGTGCTTATGA +AAATTTTAATCAGCACGAAGTTCTACTTGCACCATTATTATCAGCTGGTATTTTTGGTGC +TGACCCTATACATTCTTTAAGAGTTTGTGTAGATACTGTTCGCACAAATGTCTACTTAGC +TGTCTTTGATAAAAATCTCTATGACAAACTTGTTTCAAGCTTTTTGGAAATGAAGAGTGA +AAAGCAAGTTGAACAAAAGATCGCTGAGATTCCTAAAGAGGAAGTTAAGCCATTTATAAC +TGAAAGTAAACCTTCAGTTGAACAGAGAAAACAAGATGATAAGAAAATCAAAGCTTGTGT +TGAAGAAGTTACAACAACTCTGGAAGAAACTAAGTTCCTCACAGAAAACTTGTTACTTTA +TATTGACATTAATGGCAATCTTCATCCAGATTCTGCCACTCTTGTTAGTGACATTGACAT +CACTTTCTTAAAGAAAGATGCTCCATATATAGTGGGTGATGTTGTTCAAGAGGGTGTTTT +AACTGCTGTGGTTATACCTACTAAAAAGGCTAGTGGCACTACTGAAATGCTAGCGAAAGC +TTTGAGAAAAGTGCCAACAGACAATTATATAACCACTTACCCGGGTCAGGGTTTAAATGG +TTACACTGTAGAGGAGGCAAAGACAGTGCTTAAAAAGTGTAAAAGTGCTTTTTACATTCT +ACCATCTATTATCTCTAATGAGAAGCAAGAAATTCTTGGAACTGTTTCTTGGAATTTGCG +AGAAATGCTTGCACATGCAGAAGAAACACGCAAATTAATGCCTGTCTGTGTGGAAACTAA +AGCCATAGTTTCAACTATACAGCGTAAATATAAGGGTATTAAAATACAAGAGGGTGTGGT +TGATTATGGTGCTAGATTTTACTTTTACACCAGTAAAACAACTGTAGCGTCACTTATCAA +CACACTTAACGATCTAAATGAAACTCTTGTTACAATGCCACTTGGCTATGTAACACATGG +CTTAAATTTGGAAGAAGCTGCTCGGTATATGAGATCTCTCAAAGTGCCAGCTACAGTTTC +TGTTTCTTCACCTGATGCTGTTACAGCGTATAATGGTTATCTTACTTCTTCTTCTAAAAC +ACCTGAAGAACATTTTATTGAAACCATCTCACTTGCTGGTTCCTATAAAGATTGGTCCTA +TTCTGGACAATCTACACAACTAGGTATAGAATTTCTTAAGAGAGGTGATAAAAGTGTATA +TTACACTAGTAATCCTACCACATTCCACCTAGATGGTGAAGTTATCACCTTTGACAATCT +TAAGACACTTCTTTCTTTGAGAGAAGTGAGGACTATTAAGGTGTTTACAACAGTAGACAA +CATTAACCTCCACACGCAAGTTGTGGACATGTCAATGACATATGGACAACAGTTTGGTCC +AACTTATTTGGATGGAGCTGATGTTACTAAAATAAAACCTCATAATTCACATGAAGGTAA +AACATTTTATGTTTTACCTAATGATGACACTCTACGTGTTGAGGCTTTTGAGTACTACCA +CACAACTGATCCTAGTTTTCTGGGTAGGTACATGTCAGCATTAAATCACACTAAAAAGTG +GAAATACCCACAAGTTAATGGTTTAACTTCTATTAAATGGGCAGATAACAACTGTTATCT +TGCCACTGCATTGTTAACACTCCAACAAATAGAGTTGAAGTTTAATCCACCTGCTCTACA +AGATGCTTATTACAGAGCAAGGGCTGGTGAAGCTGCTAACTTTTGTGCACTTATCTTAGC +CTACTGTAATAAGACAGTAGGTGAGTTAGGTGATGTTAGAGAAACAATGAGTTACTTGTT +TCAACATGCCAATTTAGATTCTTGCAAAAGAGTCTTGAACGTGGTGTGTAAAACTTGTGG +ACAACAGCAGACAACCCTTAAGGGTGTAGAAGCTGTTATGTACATGGGCACACTTTCTTA +TGAACAATTTAAGAAAGGTGTTCAGATACCTTGTACGTGTGGTAAACAAGCTACAAAATA +TCTAGTACAACAGGAGTCACCTTTTGTTATGATGTCAGCACCACCTGCTCAGTATGAACT +TAAGCATGGTACATTTACTTGTGCTAGTGAGTACACTGGTAATTACCAGTGTGGTCACTA +TAAACATATAACTTCTAAAGAAACTTTGTATTGCATAGACGGTGCTTTACTTACAAAGTC +CTCAGAATACAAAGGTCCTATTACGGATGTTTTCTACAAAGAAAACAGTTACACAACAAC +CATAAAACCAGTTACTTATAAATTGGATGGTGTTGTTTGTACAGAAATTGACCCTAAGTT +GGACAATTATTATAAGAAAGACAATTCTTATTTCACAGAGCAACCAATTGATCTTGTACC +AAACCAACCATATCCAAACGCAAGCTTCGATAATTTTAAGTTTGTATGTGATAATATCAA +ATTTGCTGATGATTTAAACCAGTTAACTGGTTATAAGAAACCTGCTTCAAGAGAGCTTAA +AGTTACATTTTTCCCTGACTTAAATGGTGATGTGGTGGCTATTGATTATAGACACTACAC +ACCCTCTTTTAAGAAAGGAGCTAAATTGTTACATAAACCTATTGTTTGGCATGTTAACAA +TGCAACTAATAAAGCCACGTATAAACCAAATACCTGGTGTATACGTTGTCTTTGGAGCAC +AAAACCAGTTGAAACATCAAATTCGTTTGATGTACTGAAGTCAGAGGACGCGCAGGGAAT +GGATAATCTTGCCTGCGAAGATCTAAAACCAGTCTCTGAAGAAGTAGTGGAAAATCCTAC +CATACAGAAAGACGTTCTTGAGTGTAATGTGAAAACTACCGAAGTTGTAGGAGACATTAT +ACTTAAACCAGCAAATAATAGTTTAAAAATTACAGAAGAGGTTGGCCACACAGATCTAAT +GGCTGCTTATGTAGACAATTCTAGTCTTACTATTAAGAAACCTAATGAATTATCTAGAGT +ATTAGGTTTGAAAACCCTTGCTACTCATGGTTTAGCTGCTGTTAATAGTGTCCCTTGGGA +TACTATAGCTAATTATGCTAAGCCTTTTCTTAACAAAGTTGTTAGTACAACTACTAACAT +AGTTACACGGTGTTTAAACCGTGTTTGTACTAATTATATGCCTTATTTCTTTACTTTATT +GCTACAATTGTGTACTTTTACTAGAAGTACAAATTCTAGAATTAAAGCATCTATGCCGAC +TACTATAGCAAAGAATACTGTTAAGAGTGTCGGTAAATTTTGTCTAGAGGCTTCATTTAA +TTATTTGAAGTCACCTAATTTTTCTAAACTGATAAATATTATAATTTGGTTTTTACTATT +AAGTGTTTGCCTAGGTTCTTTAATCTACTCAACCGCTGCTTTAGGTGTTTTAATGTCTAA +TTTAGGCATGCCTTCTTACTGTACTGGTTACAGAGAAGGCTATTTGAACTCTACTAATGT +CACTATTGCAACCTACTGTATTGGTTCTATACCTTGTAGTGTTTGTCTTAGTGGTTTAGA +TTCTTTAGACACCTATCCTTCTTTAGAAACTATACAAATTACCATTTCATCTTTTAAATG +GGATTTAACTGCTTTTGGCTTAGTTGCAGAGTGGTTTTTGGCATATATTCTTTTCACTAG +GTTTTTCTATGTACTTGGATTGGCTGCAATCATGCAATTGTTTTTCAGCTATTTTGCAGT +ACATTTTATTAGTAATTCTTGGCTTATGTGGTTAATAATTAATCTTGTACAAATGGCCCC +GATTTCAGCTATGGTTAGAATGTACATCTTCTTTGCATCATTTTATTATGTATGGAAAAG +TTATGTGCATGTTGTAGACGGTTGTAATTCATCAACTTGTATGATGTGTTACAAACGTAA +TAGAGCAACAAGAGTCGAATGTACAACTATTGTTAATGGTGTTAGAAGGTCCTTTTATGT +CTATGCTAATGGAGGTAAAGGCTTTTGCAAACTACACAATTGGAATTGTGTTAATTGTGA +TACATTCTGTGCTGGTAGTACATTTATTAGTGATGAAGTTGCGAGAGACTTGTCACTACA +GTTTAAAAGACCAATAAATCCTACTGACCAGTCTTCTTACATCGTTGATAGTGTTACAGT +GAAGAATGGTTCCATCCATCTTTACTTTGATAAAGCTGGTCAAAAGACTTATGAAAGACA +TTCTCTCTCTCATTTTGTTAACTTAGACAGCCTGAGAGCTAATAACACTAAAGGTTCATT +GCCTATTAATGTTATAGTTTTTGATGGTAAATCAAAATGTGAAGAATCATCTGCAAAATC +AGCGTCTGTTTACTACAGTCAGCTTATGTGTCAACCTATACTGTTACTAGATCAGGCATT +AGTGTCTGATGTTGGTGATAGTGCGGAAGTTGCAGTTAAAATGTTTGATGCTTACGTTAA +TACGTTTTCATCAACTTTTAACGTACCAATGGAAAAACTCAAAACACTAGTTGCAACTGC +AGAAGCTGAACTTGCAAAGAATGTGTCCTTAGACAATGTCTTATCTACTTTTATTTCAGC +AGCTCGGCAAGGGTTTGTTGATTCAGATGTAGAAACTAAAGATGTTGTTGAATGTCTTAA +ATTGTCACATCAATCTGACATAGAAGTTACTGGCGATAGTTGTAATAACTATATGCTCAC +TTATAACAAAGTTGAAAACATGACACCCCGTGACCTTGGTGCTTGTATTGACTGTAGTGC +GCGTCATATTAATGCGCAGGTAGCAAAAAGTCACAACATTACTTTGATATGGAACGTTAA +AGATTTCATGTCATTGTCTGAACAACTACGAAAACAAATACGTAGTGCTGCTAAAAAGAA +TAACTTACCTTTTAAGTTGACATGTGCAACTACTAGACAAGTTGTTAATGTTGTAACAAC +AAAGATAGCACTTAAGGGTGGTAAAATTGTTAATAATTGGTTGAAGCAGTTAATTAAAGT +TACACTTGTGTTCCTTTTTGTTGCTGCTATTTTCTATTTAATAACACCTGTTCATGTCAT +GTCTAAACATACTGACTTTTCAAGTGAAATCATAGGATACAAGGCTATTGATGGTGGTGT +CACTCGTGACATAGCATCTACAGATACTTGTTTTGCTAACAAACATGCTGATTTTGACAC +ATGGTTTAGCCAGCGTGGTGGTAGTTATACTAATGACAAAGCTTGCCCATTGATTGCTGC +AGTCATAACAAGAGAAGTGGGTTTTGTCGTGCCTGGTTTGCCTGGCACGATATTACGCAC +AACTAATGGTGACTTTTTGCATTTCTTACCTAGAGTTTTTAGTGCAGTTGGTAACATCTG +TTACACACCATCAAAACTTATAGAGTACACTGACTTTGCAACATCAGCTTGTGTTTTGGC +TGCTGAATGTACAATTTTTAAAGATGCTTCTGGTAAGCCAGTACCATATTGTTATGATAC +CAATGTACTAGAAGGNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNCACTTGTGAAAGATCAGAAGCTGGTGT +TTGTGTATCTACTAGTGGTAGATGGGTACTTAACAATGATTATTACAGATCTTTACCAGG +AGTTTTCTGTGGTGTAGATGCTGTAAATTTATTTACTAATATGTTTACACCACTAATTCA +ACCTATTGGTGCTTTGGACATATCAGCATCTATAGTAGCTGGTGGTATTGTGGCTATCGT +AGTAACATGCCTTGCCTACTATTTTATGAGGTTTAGAAGAGCTTTTGGTGAATACAGTCA +TGTAGTTGCCTTTAATACTTTACTATTCCTTATGTCATTCATTGTACTCTGTTTAACACC +AGTTTACTCATTCTTACCTGGTGTTTATTCTGTTATTTACTTGTACTTGACATTTTATCT +TACTAATGATGTTTCTTTTTTAGCACATATTCAGTGGATGGTTATGTTCACACCTTTAGT +ACCTTTCTGGATAACAATTGCTTATATCATTTGTATTTCCACAAAGCATTTCTATTGGTT +CTTTAGTAATTACCTAAAGAGACGTGTAGTCTTTAATGGTGTTTCCTTTAGTACTTTTGA +AGAAGCTGCGCTGTGCACCTTTTTGTTAAATAAAGAAATGTATCTAAAGTTGCGTAGTGA +TGTGCTATTACCTCTTACGCAATATAATAGATACTTAGCTCTTTATAATAAGTACAAGTA +TTTTAGTGGAGCAATGGATACAACTAGCTACAGAGAAGCTGCTTGTTGTCATCTCGCAAA +GGCTCTCAATGACTTCAGTAACTCAGGTTCTGATGTTCTTTACCAACCACCACAAATCTC +TATCACCTCAGCTGTTTTGCAGAGTGGTTTTAGAAAAATGGCATTCCCATCTGGTAAAGT +TGAGGGTTGTATGGTACAAGTAACTTGTGGTACAACTACACTTAACGGTCTTTGGCTTGA +TGACGTAGTTTACTGTCCAAGACATGTGATCTGCACCTCTGAAGATATGCTTAACCCTAA +TTATGAAGATTTACTCATTCGTAAGTCTAATCATAATTTCTTGGTACAGGCTGGTAATGT +TCAACTCAGGGTTATTGGACATTCTATGCAAAATTGTGTACTTAAGCTTAAGGTTGATAC +AGCCAATCCTAAGACACCTAAGTATAAGTTTGTTCGCATTCAACCAGGACAGACTTTTTC +AGTGTTAGCTTGTTACAATGGTTCACCATCTGGTGTTTACCAATGTGCTATGAGACACAA +TTTCACTATTAAGGGTTCATTCCTTAATGGTTCATGTGGTAGTGTTGGTTTTAACATAGA +TTATGACTGTGTCTCTTTTTGTTACATGCACCATATGGAATTACCAACTGGAGTTCATGC +TGGCACAGACTTAGAAGGTAACTTTTATGGACCTTTTGTTGACAGGCAAACAGCACAAGC +AGCTGGTACGGACACAACTATTACAGTTAATGTTTTAGCTTGGTTGTACGCTGCTGTTAT +AAATGGAGACAGGTGGTTTCTCAATCGATTTACCACAACTCTTAATGACTTTAACCTTGT +GGCTATGAAGTACAATTATGAACCTCTAACACAAGACCATGTTGACATACTAGGACCTCT +TTCTGCTCAAACTGGAATTGCCGTTTTAGATATGTGTGCTTCATTAAAAGAATTACTGCA +AAATGGTATGAATGGACGTACCATATTGGGTAGTGCTTTATTAGAAGATGAATTTACACC +TTTTGATGTTGTTAGACAATGCTCAGGTGTTACTTTCCAAAGTGCAGTGAAAAGAACAAT +CAAGGGTACACACCACTGGTTGTTACTCACAATTTTGACTTCACTTTTATTTTTAGTCCA +GAGTACTCAATGGTCTTTGTTCTTTTTTTTGTATGAAAATGCCTTTTTACCTTTTGCTAT +GGGTATTATTGCTATGTCTGCTTTTGCAATGATGTTTGTCAAACATAAGCATGCATTTCT +CTGTTTGTTTTTGTTACCTTCTCTTGCCACTGTAGCTTATTTTAATATGGTCTATATGCC +TGCTAGTTGGGTGATGCGTATTATGACATGGTTGGATATGGTTGATACTAGTTTGAAGCT +AAAAGACTGTGTTATGTATGCATCAGCTGTAGTGTTACTAATCCTTATGACAGCAAGAAC +TGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTA +TAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTC +TGTTACTTCTAACTACTCAGGTGTAGTTACAACTGTCATGTTTTTGGCCAGAGGTATTGT +TTTTATGTGTGTTGAGTATTGCCCTATTTTCTTCATAACTGGTAATACACTTCAGTGTAT +AATGCTAGTTTATTGTTTCTTAGGCTATTTTTGTACTTGTTACTTTGGCCTCTTTTGTTT +ACTCAACCGCTACTTTAGACTGACTCTTGGTGTTTATGATTACTTAGTTTCTACACAGGA +GTTTAGATATATGAATTCACAGGGACTACTCCCACCCAAGAATAGCATAGATGCCTTCAA +ACTCAACATTAAATTGTTGGGTGTTGGTGGCAAACCTTGTATCAAAGTAGCCACTGTACA +GTCTAAAATGTCAGATGTAAAGTGCACATCAGTAGTCTTACTCTCAGTTTTGCAACAACT +CAGAGTAGAATCATCATCTAAATTGTGGGCTCAATGTGTCCAGTTACACAATGACATTCT +CTTAGCTAAAGATACTACTGAAGCCTTTGAAAAAATGGTTTCACTACTTTCTGTTTTGCT +TTCCATGCAGGGTGCTGTAGACATAAACAAGCTTTGTGAAGAAATGCTGGACAACAGGGC +AACCTTACAAGCTATAGCCTCAGAGTTTAGTTCCCTTCCATCATATGCAGCTTTTGCTAC +TGCTCAAGAAGCTTATGAGCAGGCTGTTGCTAATGGTGATTCTGAAGTTGTTCTTAAAAA +GTTGAAGAAGTCTTTGAATGTGGCTAAATCTGAATTTGACCGTGATGCAGCCATGCAACG +TAAGTTGGAAAAGATGGCTGATCAAGCTATGACCCAAATGTATAAACAGGCTAGATCTGA +GGACAAGAGGGCAAAAGTTACTAGTGCTATGCAGACAATGCTTTTCACTATGCTTAGAAA +GTTGGATAATGATGCACTCAACAACATTATCAACAATGCAAGAGATGGTTGTGTTCCCTT +GAACATAATACCTCTTACAACAGCAGCCAAACTAATGGTTGTCATACCAGACTATAACAC +ATATAAAAATACGTGTGATGGTACAACATTTACTTATGCATCAGCATTGTGGGAAATCCA +ACAGGTTGTAGATGCAGATAGTAAAATTGTTCAACTTAGTGAAATTAGTATGGACAATTC +ACCTAATTTAGCATGGCCTCTTATTGTAACAGCTTTAAGGGCCAATTCTGCTGTCAAATT +ACAGAATAATGAGCTTAGTCCTGTTGCACTACGACAGATGTCTTGTGCTGCCGGTACTAC +ACAAACTGCTTGCACTGATGACAATGCGTTAGCTTACTACAACACAATAAAGGGAGGTAG +GTTTGTACTTGCACTGTTATCCGATTTACAGGATTTGAANNNNNNNNNNNNNNNTAAGAG +TGATGGAACTGGTACTATTTATACAGAACTGGAACCACCTTGTAGGTTTGTTACAGACAC +ACCTAAAGGTCCTAAAGTGAAGTATTTATACTTTATTAAAGGATTAAACAACCTAAATAG +AGGTATGGTACTTGGTAGTTTAGCTGCCACAGTACGTCTACAAGCTGGTAATGCAACAGA +AGTGCCTGCCAATTCAACTGTATTATCTTTCTGTGCTTTTGCTGTAGATGCTGCTAAAGC +TTACAAAGATTATCTAGCTAGTGGGGGACAACCAATCACTAATTGTGTTAAGATGTTGTG +TACACACACTGGTACTGGTCAGGCAATAACAGTTACACCGGAAGCCAATATGGATCAAGA +ATCCTTTGGTGGTGCATCGTGTTGTCTGTACTGCCGTTGCCACATAGATCATCCAAATCC +TAAAGGATTTTGTGACTTAAAAGGTAAGTATGTACAAATACCTACAACTTGTGCTAACGA +CCCTGTGGGTTTTACACTTAAAAACACAGTCTGTACCGTCTGCGGTATGTGGAAAGGTTA +TGGCTGTAGTTGTGATCAACTCCGCGAACCCATGCTTCAGTCAGCTGATGCACAATCGTT +TTTAAACGGGTTTGCGGTGTAAGTGCAGCCCGTCTTACACCGTGCGGCACAGGCACTAGT +ACTGATGTCGTATACAGGGCTTTTGACATCTACAATGATAAAGTAGCTGGTTTTGCTAAA +TTCCTAAAAACTAATTGTTGTCGCTTCCAAGAAAAGGACGAAGATGACAATTTAATTGAT +TCTTACTTTGTAGTTAAGAGACACACTTTCTCTAACTACCAACATGAAGAAACAATTTAT +AATTTACTTAAGGATTGTCCAGCTGTTGCTAAACATGACTTCTTTAAGTTTAGAATAGAC +GGTGACATGGTACCACATATATCACGTCAACGTCTTACTAAATACACAATGGCAGACCTC +GTCTATGCTTTAAGGCANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGACATTA +GATAATCAAGATCTCAATGGTAACTGGTATGATTTCGGTGATTTCATACAAACCACGCCA +GGTAGTGGAGTTCCTGTTGTAGATTCTTATTATTCATTGTTAATGCCTATATTAACCTTG +ACCAGGGCTTTAACTGCAGAGTCACATGTTGACACTGACTTAACAAAGCCTTACATTAAG +TGGGATTTGTTAAAATATGACTTCACGGAAGAGAGGTTAAAACTCTTTGACCGTTATTTT +AAATATTGGGATCAGACATACCACCCAAATTGTGTTAACTGTTTGGATGACAGATGCATT +CTGCATTGTGCAAACTTTAATGTTTTATTCTCTACAGTGTTCCCACTTACAAGTTTTGGA +CCACTAGTGAGAAAAATATTTGTTGATGGTGTTCCATTTGTAGTTTCAACTGGATACCAC +TTCAGAGAGCTAGGTGTTGTACATAATCAGGATGTAAACTTACATAGCTCTAGACTTAGT +TTTAAGGAATTACTTGTGTATGCTGCTGACCCTGCTATGCACGCTGCTTCTGGTAATCTA +TTACTAGATAAACGCACTACGTGCTTTTCAGTAGCTGCACTTACTAACAATGTTGCTTTT +CAAACTGTCAAACCCGGTAATTTTAACAAAGACTTCTATGACTTTGCTGTGTCTAAGGGT +TTCTTTAAGGAAGGAAGTTCTGTTGAATTAAAACACTTCTTCTTTGCTCAGGATGGTAAT +GCTGCTATCAGCGATTATGACTACTATCGTTATAATCTACCAACAATGTGTGATATCAGA +CAACTACTATTTGTAGTTGAAGTTGTTGATAAGTACTTTGATTGTTACGATGGTGGCTGT +ATTAATGCTAACCAAGTCATCGTCAACAACCTAGACAAATCAGCTGGTTTTCCATTTAAT +AAATGGGGTAAGGCTAGACTTTATTATGATTCAATGAGTTATGAGGATCAAGATGCACTT +TTCGCATATACAAAACGTAATGTCATCCCTACTATAACTCAAATGAATCTTAAGTATGCC +ATTAGTGCAAAGAATAGAGCTCGCACCGTAGCTGGTGTCTCTATCTGTAGTACTATGACC +AATAGACAGTTTCATCAAAAATTATTGAAATCAATAGCCGCCACTAGAGGAGCTACTGTA +GTAATTGGAACAAGCAAATTCTATGGTGGTTGGCACAACATGTTAAAAACTGTTTATAGT +GATGTAGAAAACCCTCACCTTATGGGTTGGGATTATCCTAAATGTGATAGAGCCATGCCT +AACATGCTTAGAATTATGGCCTCACTTGTTCTTGCTCGCAAACATACAACGTGTTGTAGC +TTGTCACACCGTTTCTATAGATTAGCTAATGAGTGTGCTCAAGTATTGAGTGAAATGGTC +ATGTGTGGCGGTTCACTATATGTTAAACCAGGTGGAACCTCATCAGGAGATGCCACAACT +GCTTATGCTAATAGTGTTTTTAACATTTGTCAAGCTGTCACGGCCAATGTTAATGCACTT +TTATCTACTGATGGTAACAAAATTGCCGATAAGTATGTCCGCAATTTACAACACAGACTT +TATGAGTGTCTCTATAGAAATAGAGATGTTGACACAGACTTTGTGAATGAGTTTTACGCA +TATTTGCGTAAACATTTCTCAATGATGATACTTTCTGACGATGCTGTTGTGTGTTTCAAT +AGCACTTATGCATCACAAGGTCTAGTGGCTAGCATAAAGAACTTTAAGTCAGTTCTTTAT +TATCAAAACAATGTTTTTATGTCTGAAGCAAAATGTTGGACTGAGACTGACCTTACTAAA +GGACCTCATGAATTTTGCTCTCAACATACAATGCTAGTTAAACAGGGTGATGATTATGTG +TACCTTCCTTACCCAGATCCATCAAGAATCCTAGGGGCCGGCTGTTTTGTAGATGATATC +GTAAAAACAGATGGTACACTTATGATTGAACGGTTCGTGTCTTTAGCTATAGATGCTTAC +CCACTTACTAAACATCCTAATCAGGAGTATGCTGATGTCTTTCATTTGTACTTACAATAC +ATAAGAAAGCTACATGATGAGTTAACAGGACACATGTTAGACATGTATTCTGTTATGCTT +ACTAATGATAACACTTCAAGGTATTGGGAACCTGAGTTTTATGAGGCTATGTACACACCG +CATACAGTCTTACAGGCTGTTGGGGCTTGTGTTCTTTGCAATTCACAGACTTCATTAAGA +TGTGGTGCTTGCATACGTAGACCATTCTTATGTTGTAAATGCTGTTACGACCATGTCATA +TCAACATCACATAAATTAGTCTTGTCTGTTAATCCGTATGTTTGCAATGCTCCAGGTTGT +GATGTCACAGATGTGACTCAACTTTACTTAGGAGGTATGAGCTATTATTGTAAATCACAT +AAACCACCCATTAGTTTTCCATTGTGTGCTAATGGACAAGTTTTTGGTTTATATAAAAAT +ACATGTGTTGGTAGCGATAATGTTACTGACTTTAATGCAATTGCAACATGTGACTGGACA +AATGCTGGTGATTACATTTTAGCTAACACCTGTACTGAAAGACTCAAGCTTTTTGCAGCA +GAAACGCTCAAAGCTACTGAGGAGACATTTAAACTGTCTTATGGTATTGCTACTGTACGT +GAAGTGCTGTCTGACAGAGAATTACATCTTTCATGGGAAGTTGGTAAACCTAGACCACCA +CTTAACCGAAATTATGTCTTTACTGGTTATCGTGTAACTAAAAACAGTAAAGTACAAATA +GGAGAGTACACCTTTGAAAAAGGTGACTATGGTGATGCTGTTGTTTACCGAGGTACAACA +ACTTACAAATTAAATGTTGGTGATTATTTTGTGCTGACATCACATACAGTAATGCCATTA +AGTGCACCTACACTAGTGCCACAAGAGCACTATGTTAGAATTACTGGCTTATACCCAACA +CTCAATATCTCAGATGAGTTTTCTAGCAATGTTGCAAATTATCAAAAGGTTGGTATGCAA +AAGTATTCTACACTCCAGGGACCACCTGGTACTGGTAAGAGTCATTTTGCTATTGGCCTA +GCTCTCTACTACCCTTCTGCTCGCATAGTGTATACAGCTTGCTCTCATGCCGCTGTTGAT +GCACTATGTGAGAAGGCATTAAAATATTTGCCTATAGATAAATGTAGTAGAATTATACCT +GCACGTGCTCGTGTAGAGTGTTTTGATAAATTCAAAGTGAATTCAACATTAGAACAGTAT +GTCTTTTGTACTGTAAATGCATTGCCTGAGACTACAGCAGATATAGTTGTCTTTGATGAA +ATTTCAATGGCCACAAATTATGATTTGAGTGTTGTCAATGCCAGATTATGTGCTAAGCAC +TATGTGTACATTGGCGACCCTGCTCAATTACCTGCACCACGCACATTGCTAACTAAGGGC +ACACTAGAACCAGAATATTTCAATTCAGTGTGTAGACTTATGAAAACTATAGGTCCAGAC +ATGTTCCTCGGAACTTGTCGGCGTTGTCCTGCTGAAATTGTTGACACTGTGAGTGCTTTG +GTTTATGATAATAAGCTTAAAGCACATAAAGACAAATCAGCTCAATGCTTTAAAATGTTT +TATAAGGGTGTTATCACGCATGATGTTTCATCTGCAATTAACAGGCCACAAATAGGCGTG +GTAAGAGAATTCCTTACACGTAACCCTGCTTGGAGAAAAGCTGTCTTTATTTCACCTTAC +AATTCACAGAATGCTGTAGCCTCAAAGATTTTGGGACTACCAACTCAAACTGTTGATTCA +TCACAGGGCTCAGAATATGACTATGTCATATTCACTCAAACCACTGAAACAGCTCACTCT +TGTAATGTAAACAGATTTAATGTTGCTATTACCAGAGCAAAAGTAGGCATACTTTGCATA +ATGTCTGATAGAGACCTTTATGACAAGTTGCAATTTACAAGTCTTGAAATTCCACGTAGG +AATGTGGCAACTTTACAAGCTGAAAATGTAACAGGACTCTTTAAAGATTGTAGTAAGGTA +ATCACTGGGTTACATCCTACACAGGCACCTACACACCTCAGTGTTGACACTAAATTCAAA +ACTGAAGGTTTATGTGTTGACGTACCTGGCATACCTAAGGACATGACCTATAGAAGACTC +ATCTCTATGATGGGTTTTAAAATGAATTATCAAGTTAATGGTTACCCTAACATGTTTATC +ACCCGCGAAGAAGCTATAAGACATGTACGTGCATGGATTGGCTTCGATGTCGAGGGGTGT +CATGCTACTAGAGAAGCTGTTGGTACCAATTTACCTTTACAGCTAGGTTTTTCTACAGGT +GTTAACCTAGTTGCTGTACCTACAGGTTATGTTGATACACCTAATAATACAGATTTTTCC +AGAGTTAGTGCTAAACCACCGCCTGGAGATCAATTTAAACACCTCATACCGCTTATGTAC +AAAGGACTTCCTTGGAATGTAGTGCGTATAAAGATTGTACAAATGTTAAGTGATACACTT +AAAAATCTCTCTGACAGAGTCGTATTTGTCTTATGGGCACATGGCTTTGAGTTGACATCT +ATGAAGTATTTTGTGAAAATAGGACCTGAGCGCACCTGTTGTCTATGTGATAGACGTGCC +ACATGCTTTTCCACTGCTTCAGACACTTATGCCTGTTGGCATCATTCTATTGGATTTGAT +TACGTCTATAATCCGTTTATGATTGATGTTCAACAATGGGGTTTTACAGGTAACCTACAA +AGCAACCATGATCTGTATTGTCAAGTCCATGGTAATGCACATGTAGCTAGTTGTGATGCA +ATCATGACTAGGTGTCTAGCTGTCCACGAGTGTTTTGTTAAGCGTGTTGACTGGACTATT +GAATATCCTATAATTGGTGATGAACTGAAGATTAATGCGGCTTGTAGAAAGGTTCAACAC +ATGGTTGTTAAAGCTGCATTATTAGCAGACAAATTCCCAGTTCTTCACGACATTGGTAAC +CCTAAAGCTATTAAGTGTGTACCTCAAGCTGATGTAGAATGGAAGTTCTATGATGCACAG +CCTTGTAGTGACAAAGCTTATAAAATAGAAGAATTATTCTATTCTTATGCCACACATTCT +GACAAATTCACAGATGGTGTATGCCTATTTTGGAATTGCAATGTCGATAGATATCCTGCT +AATTCCATTGTTTGTAGATTTGACACTAGAGTGCTATCTAACCTTAACTTGCCTGGTTGT +GATGGTGGCAGTTTGTATGTAAATAAACATGCATTCCACACACCAGCTTTTGATAAAAGT +GCTTTTGTTAATTTAAAACAATTACCATTTTTCTATTACTCTGACAGTCCATGTGAGTCT +CATGGAAAACAAGTAGTGTCAGATATAGATTATGTACCACTAAAGTCTGCTACGTGTATA +ACACGTTGCAATTTAGGTGGTGCTGTCTGTAGACATCATGCTAATGAGTACAGATTGTAT +CTCGATGCTTATAACATGATGATCTCAGCTGGCTTTAGCTTGTGGGTTTACAAACAATTT +GATACTTATAACCTCTGGAACACTTTTACAAGACTTCAGAGTTTAGAAAATGTGGCTTTT +AATGTTGTAAATAAGGGACACTTTGATGGACAACAGGGTGAAGTACCAGTTTCTATCATT +AATAACACTGTTTACACAAAAGTTGATGGTGTTGATGTAGAATTGTTTGAAAATAAAACA +ACATTACCTGTTAATGTAGCATTTGAGCTTTGGGCTAAGCGCAACATTAAACCAGTACCA +GAGGTGAAAATACTCAATAATTTGGGTGTGGACATTGCTGCTAATACTGTGATCTGGGAC +TACAAAAGAGATGCTCCAGCACATATATCTACTATTGGTGTTTGTTCTATGACTGACATA +GCCAAGAAACCAATTGAAACGATTTGTGCACCACTCACTGTCTTTTTTGATGGTAGAGTT +GATGGTCAAGTAGACTTATTTAGAAATGCCCGTAATGGTGTTCTTATTACAGAGGGTAGT +GTTAAAGGTTTACAACCATCTGTAGGTCCCAAACAAGCTAGTCTTAATGGAGTCACATTA +ATTGGAGAAGCCGTAAAAACACAGTTCAATTATTATAAGAAAGTTGATGGTGTTGTCCAA +CAATTACCTGAAACTTACTTTACTCAGAGTAGAAATTTACAAGAATTTAAACCCAGGAGT +CAAATGGAAATTGATTTCTTAGAATTAGCTATGGATGAATTCATTGAACGGTATAAATTA +GAAGGCTATGCCTTCGAACATATCGTTTATGGAGATTTTAGTCATAGTCAGTTAGGTGGT +TTACATCTACTGATTGGACTAGCTAAACGTTTTAAGGAATCACCTTTTGAATTAGAAGAT +TTTATTCCTATGGACAGTACAGTTAAAAACTATTTCATAACAGATGCGCAAACAGGTTCA +TCTAAGTGTGTGTGTTCTGTTATTGATTTATTACTTGATGATTTTGTTGAAATAATAAAA +TCCCAAGATTTATCTGTAGTTTCTAAGGTTGTCAAAGTGACTATTGACTATACAGAAATT +TCATTTATGCTTTGGTGTAAAGATGGCCATGTAGAAACATTTTACCCAAAATTACAATCT +AGTCAAGCGTGGCAACCGGGTGTTGCTATGCCTAATCTTTACAAAATGCAAAGAATGCTA +TTAGAAAAGTGTGACCTTCAAAATTATGGTGATAGTGCAACATTACCTAAAGGCATAATG +ATGAATGTCGCAAAATATACTCAACTGTGTCAATATTTAAACACATTAACATTAGCTGTA +CCCTATAATATGAGAGTTATACATTTTGGTGCTGGTTCTGATAAAGGAGTTGCACCAGGT +ACAGCTGTTTNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNTCACTTACATTTGTGGGTTTATACAACAAAAGCTA +GCTCTTGGAGGTTCCGTGGCTATAAAGATAACAGAACATTCTTGGAATGCTGATCTTTAT +AAGCTCATGGGACACTTCGCATGGTGGACAGCCTTTGTTACTAATGTGAATGCGTCATCA +TCTGAAGCATTTTTAATTGGATGTAATTATCTTGGCAAACCACGCGAACAAATAGATGGT +TATGTCATGCATGCAAATTACATATTTTGGAGGAATACAAATCCAATTCAGTTGTCTTCC +TATTCTTTATTTGACATGAGTAAATTTCCCCTTAAATTAAGGGGTACTGCTGTTATGTCT +TTAAAAGAAGGTCAAATCAATGATATGATTTTATCTCTTCTTAGTAAAGGTAGACTTATA +ATTAGAGAAAACAACAGAGTTGTTATTTCTAGTGATGTTCTTGTTAACAACTAAACGAAC +AATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGTCATGCCGCTGTT +TAATCTTATAACTACAACTCAATCATACACTAATTTCACACGTGGTGTTTATTACCCTGA +CAAAGTTTTCAGATCCTCAGTTTTACATTTAACTCAGGACTTGTTCTTACCTTTCTTTTC +CAATGTTACTTGGTTCCATGCTATCTCTGGGACCAATGGTACTAAGAGGTTTGATAACCC +TGTCCTACCATTTAATGATGGTGTTTATTTTGCTTCCACTGAGAAGTCTAACATAATAAG +AGGCTGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCCCTACTTATTGTTAATAA +CGCTACTAATGTTTTTATTAAAGTCTGTGAATTTCAATTTTGTAATGATCCATTTTTGGA +TGTTTACCAAAAAAACAACAAAAGTTGGATGGAAAGTGAGTCAGGAGTTTATTCTAGTGC +GAATAATTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATGGACCTTGAAGGAAAACA +GGGTAATTTCAAAAATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTTATTTTAAAAT +ATATTCTAAGCACACGCCTATTATAGGGCGTGATTTCCCTCAGGGTTTTTCGGCTTTAGA +ACCATTGGTAGATTTGCCAATAGGTATTAACATCACTAGGTTTCAAACTTTACTTGCTTT +AAATAGAAGTTATTTGACTCCTGGTGATTCTTCTTCAGGTTGGACAGCTGGTGCTGCAGA +TTATTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTAAAATATAATGAAAATGGAAC +CATTACAGATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAGTGTACGTTGAA +ATCCTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGTCCAACCAACAGA +ATCTATTGTTAGATTTCCTAATGTTACAAACTTGTGCCCTTTTCATGAAGTTTTTAACGC +CACCACATTTGCATCTGTTTATGCTTGGAACAGGACGAGAATCAGCAACTGTGTTGCTGA +TTATTCTGTCCTATATAATTTCGCACCATTTTTCGCTTTTAAGTGTTATGGAGTGTCTCC +TACTAAATTAAATGATCTCTGCTTTACTAATGTCTATGCAGATTCATTTGTAATTAAAGG +TAATGAAGTCAGCCAAATCGCTCCAGGGCAAACTGGAAATATTGCTGATTATAATTATAA +ATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATTCTAACAAGCTTGATTCTAA +GCATAGTGGTAATTATGATTACTGGTATAGATCGCTTAGGAAGTCTAAACTCAAACCTTT +TGAGAGAGATATTTCAACTGAAATCTATCAGGCCGGTAACAAACCTTGTAAAGGTAAAGG +TCCTAATTGTTACTTTCCTTTACAATCATATGGTTTCCGACCCACTTATGGTGTTGGTCA +CCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATGCACCAGCAACTGTTTG +TGGACCTAAAAAGTCTACTAATTTGGTTAAAAACAAATGTGTCAATTTCAACTTCAATGG +TTTAACAGGCACAGGTGTTCTTACTAAGTCTAACAAAAAGTTTCTGCCTTTCCAACAATT +TGGCAGAGACATTGTTGACACTACTGATGCTGTCCGTGATCCACAGACACTTGAGATTCT +TGACATTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCAGGAACAAATACTTC +TAACCAGGTTGCTGTTCTTTATCAGGGTGTTAACTGCACAGAAGTCTCTGTTGCTATTCA +TGCAGATCAACTTACTCCTACTTGGCGTGTTTATTCTACAGGTTCTAATGTTTTTCAAAC +ACGTGCAGGCTGTTTAATAGGGGCTGAATATGTCAACAACTCATATGAGTGTGACATACC +CATTGGTGCAGGTATATGCGCTAGTTATCAGACTCAGACTAAGTCTCGTCGGCGGGCACG +TAGTGTAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTGGTGCAGAAAATTCAGT +TGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTATTAGTGTTACCACAGA +AATTCTACCAGTGTCTATGACCAAGACATCAGTAGATTGTACAATGTACATTTGTGGTGA +TTCAACTGAATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGTACACAATTAAAACG +TGCTTTAACTGGAATAGCTGTTGAACAAGACAAAAACACCCAAGAAGTTTTTGCACAAGT +CAAACAAATTTACAAAACACCACCAATTAAATATTTTGGTGGTTTTAATTTTTCACAAAT +ATTACCAGATCCATCAAAACCAAGCAAGAGGTCATTTATTGAAGATCTACTTTTCAACAA +AGTGACACTTGCAGATGCTGGCTTCATCAAACAATATGGTGATTGCCTTGGTGATATTGC +TGCTAGAGACCTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTTTTGCCACCTTTGCT +CACAGATGAAATGATTGCTCAATACACTTCTGCACTGTTAGCGGGTACAATCACTTCTGG +TTGGACCTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATGCAAATGGCTTATAG +GTTTAATGGTATTGGAGTTACACAGAATGTTCTCTATGAGAACCAAAAATTGATTGCCAA +CCAATTTAATAGTGCTATTGGCAAAATTCAAGACTCACTTTTTTCCACAGCAAGTGCACT +TGGAAAACTTCAAGATGTGGTCAACCATAATGCACAAGCTTTAAACACGCTTGTTAAACA +ACTTAGCTCCAAATTTGGTGCAATTTCAAGTGTTTTAAATGATATCCTTTCACGTCTTGA +CAAAGTTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGACTTCAAAGTTTGCA +GACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGCTTCTGCTAATCTTGC +TGCTACTAAAATGTCAGAGTGTGTACTTGGACAATCAAAAAGAGTTGATTTTTGTGGAAA +GGGCTATCATCTTATGTCCTTCCCTCAGTCAGCACCTCATGGTGTAGTCTTCTTGCATGT +GACTTATGTCCCTGCACAAGAAAAGAACTTCACAACTGCTCCTGCCATTTGTCATGATGG +AAAAGCACACTTTCCTCGTGAAGGTGTCTTTGTTTCAAATGGCACACACTGGTTTTTAAC +ACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACACATTTGTGTCTGGTAA +CTGTGATGTTGTAATAGGAATTGTCAACAACACAGTTTATGATCCTTTGCAACTTGAATT +AGATTCATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACATCACCAGATGTTGA +TTTAGGTGACATCTCTGGCATTAATGCTTCAGTTGTAAACATTCAAAAAGAAATTGACCG +CCTCAATGAGGTTGCCAAGAATTTAAATGAATCTCTCATCGATCTCCAAGAACTTGGAAA +GTATGAGCAGTATATAAAATGGCCATGGTATATTTGGCTAGGTTTTATAGCTGGCTTGAT +TGCCATAGTAATGGTGACAATTATGCTTTGCTGTATGACCAGTTGCTGTAGTTGTCTCAA +GGGCTGTTGTTCTTGTGGATCCTGCTGCAAATTTGATGAAGACGACTCTGAGCCAGTGCT +CAAAGGAGTCAAATTACATTACACATAAACGAACTTATGGATTTGTTTATGAGAATCTTC +ACAATTGGAACTGTAACTTTGAAGCAAGGTGAAATCAAGGATGCTACTCCTTCAGATTTT +GTTCGCGCTACTGCAACGATACCGATACAAGCCTCACTCCCTTTCGGATGGCTTATTGTT +GGCGTTGCACTTCTTGCTGTTTTTCAGAGCGCTTCCAAAATCATAACTCTCAAAAATAGA +TGGCAACTAGCACTCTCCAAGGGTGTTCACTTTGTTTGCAACTTGCTGTTGTTGTTTGTA +ACAGTTTACTCACACCTTTTGCTCGTTGCTGCTGGCCTTGAAGCCCCTTTTCTCTATCTT +TATGCTTTAGTCTACTTCTTGCAGAGTATAAACTTTGTAAGAATAATAATGAGGCTTTGG +CTTTGCTGGAAATGCCGTTCCAAAAACCCATTACTTTATGATGCCAACTATTTTCTTTGC +TGGCATACTAATTGTTACGACTATTGTATACCTTACAATAGTGTAACTTCTTCAATTGTC +ATTACTTCAGGTGATGGCACAACAAGTCCTATTTCTGAACATGACTACCAGATTGGTGGT +TATACTGAAAAATGGGAATCTGGAGTAAAAGACTGTGTTGTATTACACAGTTACTTCACT +TCAGACTATTACCAGCTGTACTCAACTCAATTGAGTACAGACATTGGTGTTGAACATGTT +ACCTTCTTCATCTACAATAAAATTGTTGATGAGCCTGAAGAACATGTCCAAATTCACACA +ATCGACGGTTCATCCGGAGTTGTTAATCCAGTAATGGAACCAATTTATGATGAACCGACG +ACGACTACTAGCGTGCCTTTGTAAGCACAAGCTGATGAGTACGAACTTATGTACTCATTC +GTTTCGGAAGAGATAGGTACGTTAATAGTTAATAGCGTACTTCTTTTTCTTGCTTTCGTG +GTATTCTTGCTAGTTACACTAGCCATCCTTACTGCGCTTCGATTGTGTGCGTACTGCTGC +AATATTGTTAACGTGAGTCTTGTAAAACCTTCTTTTTACGTTTACTCTCGTGTTAAAAAT +CTGAATTCTTCTAGAGTTCCTGATCTTCTGGTCTAAACGAACTAAATATTATATTAGTTT +TTCTGTTTGGAACTTTAATTTTAGCCATGGCACATTCCAACGGTACTATTACCGTTGAAG +AGCTTAAAAAGCTCCTTGAAGAATGGAACCTAGTAATAGGTTTCCTATTCCTTGCATGGA +TTTGTCTTCTACAATTTGCCTATGCCAACAGGAATAGGTTTTTGTATATAATTAAGTTAA +TTTTTCTCTGGCTGTTATGGCCAGTAACTTTAACTTGTTTTGTGCTTGCTGCTGTTTACA +GAATAAATTGGATCACCGGTGGAATTGCTATCGCAATGGCTTGTCTTGTAGGCTTGATGT +GGCTCAGCTACTTCATTGCTTCTTTCAGACTGTTTGTGCGTACGCGTTCCATGTGGTCAT +TTAATCCAGAAACTAACATTCTTCTCAACGTGCCACTCCATGGCACTATTCTGACCAGAC +CGCTTCTAGAAAGTGAACTCGTAATCGGAGCTGTGATCCTTCGTGGACACCTTCGTATTG +CTGGACACCATCTAGGACGCTGTGACATCAAGGACCTGCCTAAAGAAATCACTGTTGCTA +CATCACGAACGCTTTCTTATTACAAATTGGGAGCTTCGCAGCGTGTAGCAGGTGACTCAG +GTTTTGCTGCATACAGTCGCTACAGGATTGGCAACTATAAATTAAACACAGACCATTCCA +GTAGCAGTGACAATATTGCTTTGCTTGTACAGTAAGTGACAACAGATGTTTCATCTCGTT +GACTTTCAGGTTACTATAGCAGAGATATTACTAATTATTATGCGGACTTTTAAAGTTTCC +ATTTGGAATCTTGATTACATCATAAACCTCATAATTAAAAATTTATCTAAGTCACTAACT +GAGAATAAATATTCTCAATTAGATGAAGAGCAACCAATGGAGATTCTCTAAACGAACATG +AAAATTATTCTTTTCTTGGCACTGATAACACTCGCTACTTGTGAGCTTTATCACTACCAA +GAGTGTGTTAGAGGTACAACAGTACTTTTAAAAGAACCTTGCTCTTCTGGAACATACGAG +GGCAATTCACCATTTCATCCTCTAGCTGATAACAAATTTGCACTGACTTGCTTTAGCACT +CAATTTGCTTTTGCTTGTCCTGACGGCGTAAAACACGTCTATCAGTTACGTGCCAGATCA +GTTTCACCTAAACTGTTCATCAGACAAGAGGAAGTTCAAGAACTTTACTCTCCAATTTTT +CTTATTGTTGCGGCAATAGTGTTTATAACACTTTGCTTCACACTCAAAAGAAAGACAGAA +TGATTGAACTTTCATTAATTGACTTCTATTTGTGCTTTTTAGCCTTTCTGTTACTCCTTG +TTTTAATTATGCTTATTATCTTTTGGTTCTCACTTGAACTGCAAGATCATAATGAAACTT +GTCACGCCTAAACGAACATGAAATTTCTTGTTTTCTTAGGAATCATCACAACTGTAGCTG +CATTTCACCAAGAATGTAGTTTACAGTCATGTACTCAACATCAACCATATGTAGTTGATG +ACCCGTGTCCTATTCACTTCTATTCTAAATGGTATATTAGAGTAGGAGCTAGAAAATCAG +CACCTTTAATTGAATTGTGCGTGGATGAGGCTGGTTCTAAATCACCCACTCAGTACATCG +ATATCGGTAATTATACAGTTTCCTGTTTACCTTTTACAATTAATTGCCAGGAACCTAAAT +TGGGTAGTCTTGTAGTGCGTTGTTCGTTCTATGAAGACTTTTTAGAGTATCATGACGTTC +GTGTTGTTTTAGATTTCATCTAAACGAACAAACTTAAATGTCTGATAATGGACCCCAAAA +TCAGCGAAATGCACTCCGCATTACGTTTGGTGGACCCTCAGATTCAACTGGCAGTAACCA +GAATGGTGGGGCGCGATCAAAACAACGTCGGCCCCAAGGTTTACCCAATAATACTGCGTC +TTGGTTCACCGCTCTCACTCAACATGGCAAGGAAGACCTTAAATTCCCTCGAGGACAAGG +CGTTCCAATTAACACCAATAGCAGTCCAGATGACCAAATTGGCTACTACCGAAGAGCTAC +CAGACGAATTCGTGGTGGTGACGGTAAAATGAAAGATCTCAGTCCAAGATGGTATTTCTA +CTACCTAGGAACTGGGCCAGAAGCTGGACTTCCCTATGGTGCTAACAAAGACGGCATCAT +ATGGGTTGCAACTGAGGGAGCCTTGAATACACCAAAAGATCACATTGGTACCCGCAATCC +TGCTAACAATGCTGCAATCGTGCTACAACTTCCTCAAGGAACAACATTGCCAAAAGGCTT +CTACGCAGAAGGGAGCAGAGGCGGCAGTCAAGCCTCTTCTCGTTCCTCATCACGTAGTCG +CAACAGTTCAAGAAATTCAACTCCAGGCAGCAGTAAACGAACTTCTCCTGCTAGAATGGC +TGGCAATGGCGGTGATGCTGCTCTTGCTTTGCTGCTGCTTGACAGATTGAACAAGCTTGA +GAGCAAAATGTCTGGTAAAGGCCAACAACAACAAGGCCAAACTGTCACTAAGAAATCTGC +TGCTGAGGCTTCTAAGAAGCCTCGGCAAAAACGTACTGCCACTAAAGCATACAATGTAAC +ACAAGCTTTCGGCAGACGTGGTCCAGAACAAACCCAAGGAAATTTTGGGGACCAGGAACT +AATCAGACAAGGAACTGATTACAAACATTGGCCGCAAATTGCACAATTTGCCCCCAGCGC +TTCAGCGTTCTTCGGAATGTCGCGCATTGGCATGGAAGTCACACCTTCGGGAACGTGGTT +GACCTACACAGGTGCCATCAAATTGGATGACAAAGATCCAAATTTCAAAGATCAAGTCAT +TTTGCTGAATAAGCATATTGACGCATACAAAACATTCCCACCAACAGAGCCTAAAAAGGA +CAAAAAGAAGAAGGCTGATGAAACTCAAGCCTTACCGCAGAGACAGAAGAAACAGCAAAC +TGTGACTCTTCTTCCTGCTGCAGATTTGGATGATTTCTCCAAACAATTGCAACAATCCAT +GAGCCGTGCTGACTCAACTCAGGCCTAAACTCATGCAGACCACACAAGGCAGATGGGCTA +TATAAACGTTTTCGCTTTTCCGTTTACGATATATAGTCTACTCTTGTGCAGAATGAATTC +TCGTAACTACATAGCACAAGTAGATGTAGTTAACTTTAATCTCACATAGCAATCTTTAAT +CAGTGTGTAACATTAGGGAGGACTTGAAAGAGCCACCACATTTTCACCTACAGTGAACAA +TGCTAGGGAGAGCTGCCTATATGGAAGAGCCCTAATGTGTAAAATTA diff --git a/tests/integration/test_cladetime_integration.py b/tests/integration/test_cladetime_integration.py new file mode 100644 index 0000000..4a89f8e --- /dev/null +++ b/tests/integration/test_cladetime_integration.py @@ -0,0 +1,155 @@ +import lzma +from datetime import datetime, timezone +from unittest.mock import MagicMock, patch + +import polars as pl +import pytest +import requests +from freezegun import freeze_time +from polars.testing import assert_frame_equal, assert_frame_not_equal + +from cladetime import CladeTime, sequence +from cladetime.exceptions import CladeTimeSequenceWarning +from cladetime.util.config import Config +from cladetime.util.reference import _docker_installed, _get_s3_object_url + +docker_enabled = _docker_installed() + + +@pytest.fixture() +def metadata_100k(tmp_path) -> pl.LazyFrame: + "Return metadata for Nextstain's 100k samples as of 2024-11-01" + config = Config() + metadata_url = _get_s3_object_url( + bucket_name=config.nextstrain_ncov_bucket, + object_key="files/ncov/open/100k/metadata.tsv.xz", + date=datetime(2024, 11, 1, tzinfo=timezone.utc), + )[1] + + # download test metadata for Nextstrain's 100k samples (we can't use polars to scan it from + # s3 like we usually do, because the 100k samples don't have ZSTD-compressed versions + # and lmza.open doesn't support https) + response = requests.get(metadata_url) + response.raise_for_status() + with open(tmp_path / "metadata.tsv.xz", "wb") as file: + file.write(response.content) + metadata = pl.read_csv(lzma.open(tmp_path / "metadata.tsv.xz"), separator="\t", infer_schema_length=100000).lazy() + + return metadata + + +@pytest.mark.skipif(not docker_enabled, reason="Docker is not installed") +def test_cladetime_assign_clades(tmp_path, metadata_100k): + config = Config() + assignment_file = tmp_path / "assignments.csv" + + with freeze_time("2024-11-01"): + ct = CladeTime() + + # override link to sequence .fasta to test against the 100k sample dataset + sequence_url = _get_s3_object_url( + bucket_name=config.nextstrain_ncov_bucket, + object_key="files/ncov/open/100k/sequences.fasta.xz", + date=datetime(2024, 11, 1, tzinfo=timezone.utc), + )[1] + ct.url_sequence = sequence_url + + metadata_filtered = sequence.filter_metadata(metadata_100k, collection_min_date="2024-10-01") + + # store clade assignments as they exist on the metadata file downloaded from Nextstrain + original_clade_assignments = metadata_filtered.select(["strain", "clade"]) + + # assign clades to the same sequences using cladetime + assigned_clades = ct.assign_clades(metadata_filtered, output_file=assignment_file) + + # clade assignments via cladetime should match the original clade assignments + check_clade_assignments = original_clade_assignments.join(assigned_clades, on=["strain", "clade"]).collect() + assert len(check_clade_assignments) == len(metadata_filtered.collect()) + unmatched_clade_count = check_clade_assignments.filter(pl.col("clade").is_null()).shape[0] + assert unmatched_clade_count == 0 + + +@pytest.mark.skipif(not docker_enabled, reason="Docker is not installed") +def test_assign_old_tree(test_file_path, tmp_path, test_sequences): + sequence_file, sequence_set = test_sequences + + fasta_mock = MagicMock(return_value=test_file_path / sequence_file, name="cladetime.sequence.filter") + test_filtered_metadata = {"date": ["2022-01-01", "2022-01-02", "2023-12-27"], "strain": list(sequence_set)} + metadata_filtered = pl.LazyFrame(test_filtered_metadata) + + # expected clade assignments for 2024-08-02 (as retrieved from Nextrain metadata) + expected_assignment_dict = { + "strain": ["USA/VA-CDC-LC1109961/2024", "USA/FL-CDC-LC1109983/2024", "USA/MD-CDC-LC1110088/2024"], + "clade": ["24C", "24B", "24B"], + } + expected_assignments = pl.DataFrame(expected_assignment_dict) + + with freeze_time("2024-11-01"): + current_file = tmp_path / "current_assignments.csv" + ct_current_tree = CladeTime() + with patch("cladetime.sequence.filter", fasta_mock): + current_assigned_clades = ct_current_tree.assign_clades(metadata_filtered, output_file=current_file) + current_assigned_clades = current_assigned_clades.select(["strain", "clade"]).collect() + + old_file = tmp_path / "old_assignments.csv" + ct_old_tree = CladeTime(tree_as_of="2024-08-02") + with patch("cladetime.sequence.filter", fasta_mock): + old_assigned_clades = ct_old_tree.assign_clades(metadata_filtered, output_file=old_file) + old_assigned_clades = old_assigned_clades.select(["strain", "clade"]).collect() + + assert_frame_equal(current_assigned_clades.select("strain"), old_assigned_clades.select("strain")) + assert_frame_not_equal(current_assigned_clades.select("clade"), old_assigned_clades.select("clade")) + assert_frame_equal(old_assigned_clades.sort("strain"), expected_assignments.sort("strain")) + + +@pytest.mark.skipif(not docker_enabled, reason="Docker is not installed") +@pytest.mark.parametrize( + "min_date, max_date, expected_rows", + [("2023-12-27", None, 1), (None, "2022-01-03", 2), ("2022-01-02", "2023-12-28", 2)], +) +def test_assign_date_filters(test_file_path, tmp_path, test_sequences, min_date, max_date, expected_rows): + sequence_file, sequence_set = test_sequences + fasta_mock = MagicMock(return_value=test_file_path / sequence_file, name="cladetime.sequence.filter") + test_metadata = { + "date": ["2022-01-01", "2022-01-03", "2023-12-27"], + "strain": list(sequence_set), + "clade_nextstrain": ["11C", "11B", "11B"], + "host": ["Homo sapiens", "Homo sapiens", "Homo sapiens"], + "country": ["USA", "USA", "USA"], + "division": ["Utah", "Utah", "Utah"], + "wombat_count": [2, 22, 222], + } + metadata = pl.LazyFrame(test_metadata) + metadata_filtered = sequence.filter_metadata( + metadata=metadata, collection_min_date=min_date, collection_max_date=max_date + ) + + ct = CladeTime() + assignment_file = tmp_path / "assignments.csv" + with patch("cladetime.sequence.filter", fasta_mock): + assigned_clades = ct.assign_clades(metadata_filtered, output_file=assignment_file) + assert len(assigned_clades.collect()) == expected_rows + + +def test_assign_too_many_sequences_warning(tmp_path, test_file_path, test_sequences): + sequence_file, sequence_set = test_sequences + + ct = CladeTime() + ct._config.clade_assignment_warning_threshold = 2 + test_filtered_metadata = {"date": ["2022-01-01", "2022-01-02", "2023-12-27"], "strain": ["aa", "bb", "cc"]} + metadata_filtered = pl.LazyFrame(test_filtered_metadata) + fasta_mock = MagicMock(return_value=test_file_path / sequence_file, name="cladetime.sequence.filter") + with patch("cladetime.sequence.filter", fasta_mock): + with pytest.warns(CladeTimeSequenceWarning): + assignments = ct.assign_clades(metadata_filtered, output_file=tmp_path / "assignments.csv") + # clade assignment should proceed, despite the warning + assert len(assignments.collect()) == 3 + + +def test_assign_clades_no_sequences(): + ct = CladeTime() + with pytest.warns(CladeTimeSequenceWarning): + assignments = ct.assign_clades( + pl.LazyFrame(), + ) + assert assignments.collect().shape == (0, 0) diff --git a/tests/integration/test_nextclade_integration.py b/tests/integration/test_nextclade_integration.py index b667781..12c199e 100644 --- a/tests/integration/test_nextclade_integration.py +++ b/tests/integration/test_nextclade_integration.py @@ -26,6 +26,7 @@ def test_get_clade_assignments(test_file_path, tmp_path): "USA/CA-CDPH-A3000000297958/2023", "USA/WV064580/2020", "USA/PA-CDC-LC1096774/2024", + "USA/NJ-CDC-LC1124615/2024", } sequence_file = test_file_path / "test_sequences.fasta" @@ -38,7 +39,7 @@ def test_get_clade_assignments(test_file_path, tmp_path): ["seqName", "clade", "clade_nextstrain", "Nextclade_pango"] ) - assert len(assignment_df) == 4 + assert len(assignment_df) == 5 assigned_sequence_set = set(assignment_df["seqName"].unique().to_list()) assert test_sequence_set == assigned_sequence_set assert assignment_df["clade"].is_null().any() is False diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py new file mode 100644 index 0000000..a45f063 --- /dev/null +++ b/tests/unit/conftest.py @@ -0,0 +1,12 @@ +from pathlib import Path + +import pytest + + +@pytest.fixture +def test_file_path() -> Path: + """ + Return path to the unit test files. + """ + test_file_path = Path(__file__).parents[1].joinpath("data") + return test_file_path diff --git a/tests/unit/test_cladetime.py b/tests/unit/test_cladetime.py index fd17484..9f2d0b0 100644 --- a/tests/unit/test_cladetime.py +++ b/tests/unit/test_cladetime.py @@ -7,7 +7,7 @@ from freezegun import freeze_time from cladetime.cladetime import CladeTime -from cladetime.exceptions import CladeTimeDateWarning, CladeTimeInvalidURLError, CladeTimeSequenceWarning +from cladetime.exceptions import CladeTimeDateWarning, CladeTimeInvalidURLError def test_cladetime_no_args(): From 358f7590dd4ff8d5a146e736a98ba2c80319f0fc Mon Sep 17 00:00:00 2001 From: Becky Sweger Date: Tue, 12 Nov 2024 12:37:07 -0500 Subject: [PATCH 11/13] Update the return value of assign_clades This changeset returns a summarized version of the clade assignments as well as some metadata about the clade assignment process. --- src/cladetime/_clade.py | 12 +++ src/cladetime/cladetime.py | 27 +++++- src/cladetime/sequence.py | 60 +++++++++++- .../integration/test_cladetime_integration.py | 69 +++++++++++-- tests/unit/test_sequence.py | 96 +++++++++++++++++++ 5 files changed, 249 insertions(+), 15 deletions(-) create mode 100644 src/cladetime/_clade.py diff --git a/src/cladetime/_clade.py b/src/cladetime/_clade.py new file mode 100644 index 0000000..ac0da60 --- /dev/null +++ b/src/cladetime/_clade.py @@ -0,0 +1,12 @@ +from dataclasses import dataclass + +import polars as pl + + +@dataclass +class Clade: + """Holds detailed and summarized information about clade assignments.""" + + meta: dict + detail: pl.LazyFrame + summary: pl.LazyFrame diff --git a/src/cladetime/cladetime.py b/src/cladetime/cladetime.py index 2e87b1d..49cc3e7 100644 --- a/src/cladetime/cladetime.py +++ b/src/cladetime/cladetime.py @@ -9,6 +9,7 @@ import structlog from cladetime import Tree, sequence +from cladetime._clade import Clade from cladetime.exceptions import CladeTimeDateWarning, CladeTimeInvalidURLError, CladeTimeSequenceWarning from cladetime.util.config import Config from cladetime.util.reference import _get_clade_assignments, _get_date, _get_nextclade_dataset, _get_s3_object_url @@ -233,9 +234,11 @@ def assign_clades(self, sequence_metadata: pl.LazyFrame, output_file: str | None Returns ------- - metadata_clades : polars.LazyFrame - Nextstrain sequence_metadata with an additional column for clade assignments + metadata_clades : Clade + A Clade object that contains detailed and summarized information + about clades assigned to the sequences in sequence_metadata. """ + assignment_date = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M") if output_file is not None: output_file = Path(output_file) else: @@ -249,7 +252,7 @@ def assign_clades(self, sequence_metadata: pl.LazyFrame, output_file: str | None msg, category=CladeTimeSequenceWarning, ) - return pl.LazyFrame() + return Clade(meta={}, detail=pl.LazyFrame(), summary=pl.LazyFrame()) # if there are many sequences in the filtered metadata, warn that clade assignment will # take a long time and require a lot of resources @@ -307,7 +310,23 @@ def assign_clades(self, sequence_metadata: pl.LazyFrame, output_file: str | None assigned_clades = pl.read_csv(assignments, separator=";", infer_schema_length=100000) + # join the assigned clades with the original sequence metadata, create a summarized LazyFrame + # of clade counts by location, date, and host, and return both (along with metadata) in a + # Clade object assigned_clades = sequence_metadata.join( assigned_clades.lazy(), left_on="strain", right_on="seqName", how="left" ) - return assigned_clades + summarized_clades = sequence.summarize_clades( + assigned_clades, group_by=["location", "date", "host", "clade_nextstrain", "country"] + ) + metadata = { + "sequence_as_of": self.sequence_as_of, + "tree_as_of": self.tree_as_of, + "nextclade_dataset_version": tree.ncov_metadata.get("nextclade_dataset_version"), + "nextclade_dataset_name": tree.ncov_metadata.get("nextclade_dataset_name"), + "nextclade_version_num": tree.ncov_metadata.get("nextclade_version_num"), + "assignment_as_of": assignment_date, + } + metadata_clades = Clade(meta=metadata, detail=assigned_clades, summary=summarized_clades) + + return metadata_clades diff --git a/src/cladetime/sequence.py b/src/cladetime/sequence.py index 3916ce4..59055f9 100644 --- a/src/cladetime/sequence.py +++ b/src/cladetime/sequence.py @@ -3,6 +3,7 @@ import lzma import os import re +import warnings from datetime import datetime from pathlib import Path from urllib.parse import urlparse @@ -14,6 +15,7 @@ from Bio.SeqIO import FastaIO from requests import Session +from cladetime.exceptions import CladeTimeSequenceWarning from cladetime.types import StateFormat from cladetime.util.reference import _get_date from cladetime.util.session import _get_session @@ -258,7 +260,12 @@ def filter_metadata( def get_clade_counts(filtered_metadata: pl.LazyFrame) -> pl.LazyFrame: - """Return a count of clades by location and date.""" + """Return a count of clades by location and date. + + Notes: + ------ + Deprecated in favor of summarize_clades + """ cols = [ "clade", @@ -273,6 +280,57 @@ def get_clade_counts(filtered_metadata: pl.LazyFrame) -> pl.LazyFrame: return counts +def summarize_clades(sequence_metadata: pl.LazyFrame, group_by: list | None = None) -> pl.LazyFrame: + """Return clade counts summarized by specific sequence metadata columns. + + Parameters + ---------- + sequence_metadata : :class:`polars.DataFrame` or :class:`polars.LazyFrame` + A Polars DataFrame or LazyFrame that represents + Nextstrain SARS-CoV-2 sequence metadata + group_by : list + Optional. A list of columns to group the clade counts by. Defaults + to ["clade_nextstrain", "country", "date", "location", "host"] + + Returns + ------- + :class:`polars.DataFrame` | :class:`polars.LazyFrame` + A Frame that summarizes clade counts by the specified columns. If sequence_metadata + is a LazyFrame, returns a LazyFrame. Otherwise, returns a DataFrame. + + Raises + ------ + CladeTimeSequenceWarning + If group_by contains a column name that is not in sequence_metadata or + if group_by contains a column named 'count' + """ + if group_by is None: + group_by = ["clade_nextstrain", "country", "date", "location", "host"] + + # Validate group_by columns + metadata_cols = sequence_metadata.collect_schema().names() + warning_msg = "" + if not all(col in metadata_cols for col in group_by): + warning_msg = warning_msg + f"Invalid group_by columns: {group_by} \n" + if "count" in group_by: + warning_msg = warning_msg + "Group_by cannot contain 'count' column \n" + if len(warning_msg) > 0: + warnings.warn( + warning_msg[0], + category=CladeTimeSequenceWarning, + ) + if isinstance(sequence_metadata, pl.LazyFrame): + return pl.LazyFrame() + else: + return pl.DataFrame() + + counts = ( + sequence_metadata.select(group_by).group_by(group_by).agg(pl.len().alias("count")).cast({"count": pl.UInt32}) + ) + + return counts + + def get_metadata_ids(sequence_metadata: pl.DataFrame | pl.LazyFrame) -> set: """Return sequence IDs for a specified set of Nextstrain sequence metadata. diff --git a/tests/integration/test_cladetime_integration.py b/tests/integration/test_cladetime_integration.py index 4a89f8e..2a3d9c4 100644 --- a/tests/integration/test_cladetime_integration.py +++ b/tests/integration/test_cladetime_integration.py @@ -63,18 +63,44 @@ def test_cladetime_assign_clades(tmp_path, metadata_100k): assigned_clades = ct.assign_clades(metadata_filtered, output_file=assignment_file) # clade assignments via cladetime should match the original clade assignments - check_clade_assignments = original_clade_assignments.join(assigned_clades, on=["strain", "clade"]).collect() + check_clade_assignments = original_clade_assignments.join( + assigned_clades.detail, on=["strain", "clade"] + ).collect() assert len(check_clade_assignments) == len(metadata_filtered.collect()) unmatched_clade_count = check_clade_assignments.filter(pl.col("clade").is_null()).shape[0] assert unmatched_clade_count == 0 + # summarized clade assignments should also match summarized clade assignments from the + # original metadata file + assert_frame_equal( + sequence.summarize_clades(metadata_filtered.rename({"clade": "clade_nextstrain"})), + assigned_clades.summary, + check_column_order=False, + check_row_order=False, + ) + + # metadata should reflect ncov metadata as of 2024-11-01 + assert assigned_clades.meta.get("sequence_as_of") == datetime(2024, 11, 1, tzinfo=timezone.utc) + assert assigned_clades.meta.get("tree_as_of") == datetime(2024, 11, 1, tzinfo=timezone.utc) + assert assigned_clades.meta.get("nextclade_dataset_version") == "2024-10-17--16-48-48Z" + assert assigned_clades.meta.get("nextclade_version_num") == "3.9.1" + assert assigned_clades.meta.get("assignment_as_of") == "2024-11-01 00:00" + @pytest.mark.skipif(not docker_enabled, reason="Docker is not installed") def test_assign_old_tree(test_file_path, tmp_path, test_sequences): sequence_file, sequence_set = test_sequences + sequence_list = list(sequence_set) + sequence_list.sort() fasta_mock = MagicMock(return_value=test_file_path / sequence_file, name="cladetime.sequence.filter") - test_filtered_metadata = {"date": ["2022-01-01", "2022-01-02", "2023-12-27"], "strain": list(sequence_set)} + test_filtered_metadata = { + "country": ["USA", "USA", "USA"], + "date": ["2022-01-02", "2022-01-02", "2023-02-01"], + "host": ["Homo sapiens", "Homo sapiens", "Homo sapiens"], + "location": ["Hawaii", "Hawaii", "Utah"], + "strain": sequence_list, + } metadata_filtered = pl.LazyFrame(test_filtered_metadata) # expected clade assignments for 2024-08-02 (as retrieved from Nextrain metadata) @@ -89,17 +115,38 @@ def test_assign_old_tree(test_file_path, tmp_path, test_sequences): ct_current_tree = CladeTime() with patch("cladetime.sequence.filter", fasta_mock): current_assigned_clades = ct_current_tree.assign_clades(metadata_filtered, output_file=current_file) - current_assigned_clades = current_assigned_clades.select(["strain", "clade"]).collect() + current_assigned_clades = current_assigned_clades.detail.select(["strain", "clade"]).collect() old_file = tmp_path / "old_assignments.csv" ct_old_tree = CladeTime(tree_as_of="2024-08-02") with patch("cladetime.sequence.filter", fasta_mock): old_assigned_clades = ct_old_tree.assign_clades(metadata_filtered, output_file=old_file) - old_assigned_clades = old_assigned_clades.select(["strain", "clade"]).collect() + old_assigned_clade_detail = old_assigned_clades.detail.select(["strain", "clade"]).collect() + + assert_frame_equal(current_assigned_clades.select("strain"), old_assigned_clade_detail.select("strain")) + assert_frame_not_equal(current_assigned_clades.select("clade"), old_assigned_clade_detail.select("clade")) + assert_frame_equal(old_assigned_clade_detail.sort("strain"), expected_assignments.sort("strain")) + + expected_summary = pl.DataFrame( + { + "clade_nextstrain": ["24B", "24C"], + "country": ["USA", "USA"], + "date": ["2022-01-02", "2023-02-01"], + "host": ["Homo sapiens", "Homo sapiens"], + "location": ["Hawaii", "Utah"], + "count": [2, 1], + } + ).cast({"count": pl.UInt32}) + assert_frame_equal( + expected_summary, old_assigned_clades.summary.collect(), check_column_order=False, check_row_order=False + ) - assert_frame_equal(current_assigned_clades.select("strain"), old_assigned_clades.select("strain")) - assert_frame_not_equal(current_assigned_clades.select("clade"), old_assigned_clades.select("clade")) - assert_frame_equal(old_assigned_clades.sort("strain"), expected_assignments.sort("strain")) + # metadata should reflect ncov metadata as of 2024-11-01 + assert old_assigned_clades.meta.get("sequence_as_of") == datetime(2024, 11, 1, tzinfo=timezone.utc) + assert old_assigned_clades.meta.get("tree_as_of") == datetime(2024, 8, 2, tzinfo=timezone.utc) + assert old_assigned_clades.meta.get("nextclade_dataset_version") == "2024-07-17--12-57-03Z" + assert old_assigned_clades.meta.get("nextclade_version_num") == "3.8.2" + assert old_assigned_clades.meta.get("assignment_as_of") == "2024-11-01 00:00" @pytest.mark.skipif(not docker_enabled, reason="Docker is not installed") @@ -128,7 +175,7 @@ def test_assign_date_filters(test_file_path, tmp_path, test_sequences, min_date, assignment_file = tmp_path / "assignments.csv" with patch("cladetime.sequence.filter", fasta_mock): assigned_clades = ct.assign_clades(metadata_filtered, output_file=assignment_file) - assert len(assigned_clades.collect()) == expected_rows + assert len(assigned_clades.detail.collect()) == expected_rows def test_assign_too_many_sequences_warning(tmp_path, test_file_path, test_sequences): @@ -143,7 +190,7 @@ def test_assign_too_many_sequences_warning(tmp_path, test_file_path, test_sequen with pytest.warns(CladeTimeSequenceWarning): assignments = ct.assign_clades(metadata_filtered, output_file=tmp_path / "assignments.csv") # clade assignment should proceed, despite the warning - assert len(assignments.collect()) == 3 + assert len(assignments.detail.collect()) == 3 def test_assign_clades_no_sequences(): @@ -152,4 +199,6 @@ def test_assign_clades_no_sequences(): assignments = ct.assign_clades( pl.LazyFrame(), ) - assert assignments.collect().shape == (0, 0) + assert assignments.detail.collect().shape == (0, 0) + assert assignments.summary.collect().shape == (0, 0) + assert assignments.meta == {} diff --git a/tests/unit/test_sequence.py b/tests/unit/test_sequence.py index 4030463..77865e9 100644 --- a/tests/unit/test_sequence.py +++ b/tests/unit/test_sequence.py @@ -6,8 +6,10 @@ import polars as pl import pytest from Bio import SeqIO +from polars.testing import assert_frame_equal from cladetime import sequence +from cladetime.exceptions import CladeTimeSequenceWarning from cladetime.types import StateFormat @@ -247,3 +249,97 @@ def test_filter_empty_fasta(tmpdir): seq_filtered = sequence.filter(test_sequence_set, "http://thisismocked.com", tmpdir) contents = seq_filtered.read_text(encoding=None) assert len(contents) == 0 + + +def test_summarize_clades(): + test_metadata = pl.DataFrame( + { + "clade_nextstrain": ["11C", "11C", "11C"], + "country": ["USA", "USA", "USA"], + "date": ["2022-01-01", "2022-01-01", "2023-12-27"], + "host": ["Homo sapiens", "Homo sapiens", "Homo sapiens"], + "location": ["Utah", "Utah", "Utah"], + "strain": ["abc/123", "abc/456", "def/123"], + "wombat_count": [2, 22, 222], + } + ) + + expected_summary = pl.DataFrame( + { + "clade_nextstrain": ["11C", "11C"], + "country": ["USA", "USA"], + "date": ["2022-01-01", "2023-12-27"], + "host": ["Homo sapiens", "Homo sapiens"], + "location": ["Utah", "Utah"], + "count": [2, 1], + } + ).cast({"count": pl.UInt32}) + + summarized = sequence.summarize_clades(test_metadata) + assert_frame_equal(expected_summary, summarized, check_column_order=False, check_row_order=False) + + +def test_summarize_clades_custom_group(): + test_metadata = pl.LazyFrame( + { + "clade_nextstrain": ["11C", "11C", "11C"], + "country": ["Canada", "USA", "USA"], + "date": ["2022-01-01", "2022-01-01", "2023-12-27"], + "host": ["Homo sapiens", "Homo sapiens", "Homo sapiens"], + "location": ["Utah", "Utah", "Utah"], + "strain": ["abc/123", "abc/456", "def/123"], + "wombat_count": [2, 22, 22], + } + ) + + expected_summary = pl.LazyFrame( + { + "country": ["Canada", "USA"], + "wombat_count": [2, 22], + "count": [1, 2], + } + ).cast({"count": pl.UInt32}) + + summarized = sequence.summarize_clades(test_metadata, group_by=["country", "wombat_count"]) + assert_frame_equal(expected_summary, summarized, check_column_order=False, check_row_order=False) + + test_metadata = pl.LazyFrame( + { + "clade_nextstrain": ["11C", "11C", "11C"], + "country": ["Canada", "USA", "USA"], + "date": ["2022-01-01", "2022-01-01", "2023-12-27"], + } + ) + + expected_summary = pl.LazyFrame( + { + "clade_nextstrain": ["11C"], + "count": [3], + } + ).cast({"count": pl.UInt32}) + + summarized = sequence.summarize_clades(test_metadata, group_by=["clade_nextstrain"]) + assert_frame_equal(expected_summary, summarized, check_column_order=False, check_row_order=False) + + +def test_summarize_clades_invalid_cols(): + test_metadata = pl.DataFrame( + { + "clade_nextstrain": ["11C", "11C", "11C"], + "country": ["Canada", "USA", "USA"], + "date": ["2022-01-01", "2022-01-01", "2023-12-27"], + } + ) + with pytest.warns(CladeTimeSequenceWarning): + summarized = sequence.summarize_clades(test_metadata, group_by=["country", "wombat_count"]) + assert len(summarized) == 0 + + test_metadata = pl.DataFrame( + { + "clade_nextstrain": ["11C", "11C", "11C"], + "count": [1, 2, 3], + } + ) + with pytest.warns(CladeTimeSequenceWarning): + summarized = sequence.summarize_clades(test_metadata, group_by=["clade_nextstrain", "count"]) + assert len(summarized) == 0 From 7c2aa5e893d6b32fabf7701b9cdad6ef74b60d23 Mon Sep 17 00:00:00 2001 From: Becky Sweger Date: Tue, 12 Nov 2024 12:59:24 -0500 Subject: [PATCH 12/13] Run integration tests more frequently --- .github/workflows/run-integration-tests.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/run-integration-tests.yaml b/.github/workflows/run-integration-tests.yaml index 230746f..e4c7ce5 100644 --- a/.github/workflows/run-integration-tests.yaml +++ b/.github/workflows/run-integration-tests.yaml @@ -4,6 +4,8 @@ on: push: branches: - main + pull_request: + types: [opened, ready_for_review, reopened] workflow_dispatch: jobs: From c9c09d17ce23f786466fb3470bffec074350f7f3 Mon Sep 17 00:00:00 2001 From: Becky Sweger Date: Tue, 12 Nov 2024 14:12:01 -0500 Subject: [PATCH 13/13] Fix readthedocs build error --- docs/conf.py | 2 ++ src/cladetime/cladetime.py | 2 +- src/cladetime/sequence.py | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index f026308..b82ae08 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -142,6 +142,8 @@ ("py:class", "polars.dataframe.frame.DataFrame"), ("py:class", "polars.DataFrame"), ("py:class", "polars.lazyframe.frame.LazyFrame"), + ("py:class", "cladetime._clade.Clade"), + ("py:class", "Clade"), ] diff --git a/src/cladetime/cladetime.py b/src/cladetime/cladetime.py index 49cc3e7..4f77a9b 100644 --- a/src/cladetime/cladetime.py +++ b/src/cladetime/cladetime.py @@ -214,7 +214,7 @@ def _get_config(self) -> Config: return config - def assign_clades(self, sequence_metadata: pl.LazyFrame, output_file: str | None = None) -> pl.DataFrame: + def assign_clades(self, sequence_metadata: pl.LazyFrame, output_file: str | None = None) -> Clade: """Assign clades to a specified set of sequences. For each sequence in a sequence file (.fasta), assign a Nextstrain diff --git a/src/cladetime/sequence.py b/src/cladetime/sequence.py index 59055f9..922e0fc 100644 --- a/src/cladetime/sequence.py +++ b/src/cladetime/sequence.py @@ -346,7 +346,7 @@ def get_metadata_ids(sequence_metadata: pl.DataFrame | pl.LazyFrame) -> set: ------- set A set of - :external+ncov:doc:`strains` + :external+ncov:doc:`strains` Raises ------