From cf00328c94c3f844854c9d302dac7da8567241f5 Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Tue, 16 Jul 2024 09:38:12 -0400 Subject: [PATCH 1/7] chore: update ebextensions for downloading latest seqrepo instance --- .ebextensions/01_download_data.config | 44 ++++++++------------------- 1 file changed, 13 insertions(+), 31 deletions(-) diff --git a/.ebextensions/01_download_data.config b/.ebextensions/01_download_data.config index e1358921..7724516f 100644 --- a/.ebextensions/01_download_data.config +++ b/.ebextensions/01_download_data.config @@ -3,11 +3,9 @@ commands: command: "yum install -y python-devel postgresql-devel" 02_install_aws_cli: command: "yum install -y awscli" - 03_install_p7zip: - command: "yum install -y p7zip" - 04_eb_packages: + 03_eb_packages: command: "/var/app/venv/staging-LQM1lest/bin/pip install uvloop websockets httptools typing-extensions" - 05_export_eb_env_var: + 04_export_eb_env_var: command: "export $(cat /opt/elasticbeanstalk/deployment/env | xargs)" container_commands: @@ -16,33 +14,17 @@ container_commands: command: "chmod -R 777 /var/app/venv/staging-LQM1lest/lib/python3.11/site-packages/cool_seq_tool/data" 02_s3_download: - test: test ! -d "/usr/local/share/seqrepo" - command: "aws s3 cp s3://${AWS_BUCKET_NAME}/${AWS_SEQREPO_OBJECT} /usr/local/share/seqrepo.zip --region us-east-2" + test: test ! -d "/usr/local/share/seqrepo/2024-02-20" + command: "aws s3 cp s3://${AWS_BUCKET_NAME}/${AWS_SEQREPO_OBJECT} /usr/local/share/seqrepo.tar.gz --region us-east-2" - 03_p7zip_seqrepo: - test: test -f "/usr/local/share/seqrepo.zip" - command: "7za x /usr/local/share/seqrepo.zip -o/usr/local/share -y" + 03_extract_seqrepo: + test: test -f "/usr/local/share/seqrepo.tar.gz" + command: "mkdir -p /usr/local/share/2024-02-20 && tar -xzvf /usr/local/share/seqrepo.tar.gz -C /usr/local/share/2024-02-20" - 04_seqrepo_permission: - test: test -d "/usr/local/share/seqrepo" - command: "chmod -R 777 /usr/local/share/seqrepo" + 04_seqrepo_zip_permission: + test: test -f "/usr/local/share/seqrepo.tar.gz" + command: "chmod +wr /usr/local/share/seqrepo.tar.gz" - 05_macosx_permission: - test: test -d "/usr/local/share/__MACOSX" - command: "chmod -R +wr /usr/local/share/__MACOSX" - - 06_seqrepo_zip_permission: - test: test -f "/usr/local/share/seqrepo.zip" - command: "chmod +wr /usr/local/share/seqrepo.zip" - - 07_remove_macosx: - test: test -d "/usr/local/share/__MACOSX" - command: "rm -R /usr/local/share/__MACOSX" - - 08_remove_seqrepo_zip: - test: test -f "/usr/local/share/seqrepo.zip" - command: "rm /usr/local/share/seqrepo.zip" - - 9_data_permission: - test: test -d "/usr/local/share/seqrepo" - command: "chmod -R +wrx /usr/local/share/seqrepo" + 05_remove_seqrepo_zip: + test: test -f "/usr/local/share/seqrepo.tar.gz" + command: "rm /usr/local/share/seqrepo.tar.gz" From 80e8a055c571c877af7308d26f1a76bd595314bd Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Tue, 16 Jul 2024 09:42:43 -0400 Subject: [PATCH 2/7] fix path --- .ebextensions/01_download_data.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ebextensions/01_download_data.config b/.ebextensions/01_download_data.config index 7724516f..868e36b2 100644 --- a/.ebextensions/01_download_data.config +++ b/.ebextensions/01_download_data.config @@ -19,7 +19,7 @@ container_commands: 03_extract_seqrepo: test: test -f "/usr/local/share/seqrepo.tar.gz" - command: "mkdir -p /usr/local/share/2024-02-20 && tar -xzvf /usr/local/share/seqrepo.tar.gz -C /usr/local/share/2024-02-20" + command: "mkdir -p /usr/local/share/seqrepo/2024-02-20 && tar -xzvf /usr/local/share/seqrepo.tar.gz -C /usr/local/share/seqrepo/2024-02-20" 04_seqrepo_zip_permission: test: test -f "/usr/local/share/seqrepo.tar.gz" From 18610d86ee9893de2a724700ffc4360038f93ccc Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Tue, 16 Jul 2024 10:19:15 -0400 Subject: [PATCH 3/7] test --- .ebextensions/01_download_data.config | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.ebextensions/01_download_data.config b/.ebextensions/01_download_data.config index 868e36b2..1bf6cb1f 100644 --- a/.ebextensions/01_download_data.config +++ b/.ebextensions/01_download_data.config @@ -9,10 +9,6 @@ commands: command: "export $(cat /opt/elasticbeanstalk/deployment/env | xargs)" container_commands: - 01_cool_seq_tool_permissions: - test: test -d "/var/app/venv/staging-LQM1lest/lib/python3.11/site-packages/cool_seq_tool" - command: "chmod -R 777 /var/app/venv/staging-LQM1lest/lib/python3.11/site-packages/cool_seq_tool/data" - 02_s3_download: test: test ! -d "/usr/local/share/seqrepo/2024-02-20" command: "aws s3 cp s3://${AWS_BUCKET_NAME}/${AWS_SEQREPO_OBJECT} /usr/local/share/seqrepo.tar.gz --region us-east-2" From cbb94db5e642cc288be7f333a785bc5a1fdeb6b2 Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Tue, 16 Jul 2024 10:53:09 -0400 Subject: [PATCH 4/7] fix: TypeError for params that accept union of enums * Methods now raise ValueError if incorrect enum value passed --- src/variation/hgvs_dup_del_mode.py | 44 ++++++++++++++++--------- src/variation/translators/translator.py | 9 ++++- 2 files changed, 37 insertions(+), 16 deletions(-) diff --git a/src/variation/hgvs_dup_del_mode.py b/src/variation/hgvs_dup_del_mode.py index fcfa8c5b..4c7bd369 100644 --- a/src/variation/hgvs_dup_del_mode.py +++ b/src/variation/hgvs_dup_del_mode.py @@ -11,6 +11,20 @@ # Define deletion alt types DELS = {AltType.DELETION_AMBIGUOUS, AltType.DELETION} +# Define supported alt types for HGVS Dup Del Mode +DELS_DUPS = {AltType.DELETION, AltType.DELETION_AMBIGUOUS, AltType.DUPLICATION, AltType.DUPLICATION_AMBIGUOUS} + + +def _check_supported_alt_type(alt_type: AltType) -> None: + """Check that ``alt_type`` is one of ``DUP_DELS`` + + :param alt_type: Alteration type + :raises ValueError: If ``alt_type`` not one of ``DELS_DUPS``. + """ + if alt_type not in DELS_DUPS: + err_msg = f"`alt_type` must be one of: {DELS_DUPS}" + raise ValueError(err_msg) + class HGVSDupDelMode: """Class for handling how to interpret HGVS duplications and deletions.""" @@ -24,10 +38,7 @@ def __init__(self, seqrepo_access: SeqRepoAccess) -> None: def default_mode( self, - alt_type: AltType.DELETION - | AltType.DELETION_AMBIGUOUS - | AltType.DUPLICATION - | AltType.DUPLICATION_AMBIGUOUS, + alt_type: AltType, location: dict, vrs_seq_loc_ac: str, baseline_copies: int | None = None, @@ -43,14 +54,17 @@ def default_mode( else allele - :param alt_type: The type of alteration + :param alt_type: The type of alteration. Must be one of ``DELS_DUPS``. :param location: Sequence Location object :param vrs_seq_loc_ac: Accession used in VRS Sequence Location :param baseline_copies: Baseline copies for Copy Number Count variation :param copy_change: copy change for Copy Number Change Variation :param alt: Alteration + :raises ValueError: If ``alt_type`` not one of ``DELS_DUPS``. :return: VRS Variation object represented as a dict """ + _check_supported_alt_type(alt_type) + variation = None if not baseline_copies and alt_type in AMBIGUOUS_REGIONS: variation = self.copy_number_change_mode(alt_type, location, copy_change) @@ -62,20 +76,20 @@ def default_mode( def copy_number_count_mode( self, - alt_type: AltType.DELETION - | AltType.DELETION_AMBIGUOUS - | AltType.DUPLICATION - | AltType.DUPLICATION_AMBIGUOUS, + alt_type: AltType, location: dict, baseline_copies: int, ) -> dict: """Return a VRS Copy Number Variation. - :param alt_type: The type of alteration + :param alt_type: The type of alteration. Must be one of ``DELS_DUPS``. :param location: VRS SequenceLocation :param baseline_copies: Baseline copies number + :raises ValueError: If ``alt_type`` not one of ``DELS_DUPS``. :return: VRS Copy Number object represented as a dict """ + _check_supported_alt_type(alt_type) + copies = baseline_copies - 1 if alt_type in DELS else baseline_copies + 1 seq_loc = models.SequenceLocation(**location) seq_loc.id = ga4gh_identify(seq_loc) @@ -85,20 +99,20 @@ def copy_number_count_mode( def copy_number_change_mode( self, - alt_type: AltType.DELETION - | AltType.DELETION_AMBIGUOUS - | AltType.DUPLICATION - | AltType.DUPLICATION_AMBIGUOUS, + alt_type: AltType, location: dict, copy_change: models.CopyChange | None = None, ) -> dict: """Return copy number change variation - :param alt_type: The type of alteration + :param alt_type: The type of alteration. Must be one of ``DELS_DUPS``. :param location: VRS SequenceLocation :param copy_change: The copy change + :raises ValueError: If ``alt_type`` not one of ``DELS_DUPS``. :return: Copy Number Change variation as a dict """ + _check_supported_alt_type(alt_type) + if not copy_change: copy_change = ( models.CopyChange.EFO_0030067 diff --git a/src/variation/translators/translator.py b/src/variation/translators/translator.py index 09ce11aa..e8ddb481 100644 --- a/src/variation/translators/translator.py +++ b/src/variation/translators/translator.py @@ -164,7 +164,7 @@ async def get_p_or_cdna_translation_result( start_pos: int, end_pos: int, alt_type: AltType, - coordinate_type: AnnotationLayer.PROTEIN | AnnotationLayer.CDNA, + coordinate_type: AnnotationLayer, errors: list[str], cds_start: int | None = None, ref: str | None = None, @@ -184,8 +184,15 @@ async def get_p_or_cdna_translation_result( `coordinate_type == AnnotationLayer.CDNA`. :param ref: Expected reference sequence :param alt: Expected change + :raises ValueError: If ``coordinate`` type not one of + ``AnnotationLayer.PROTEIN`` or ``AnnotationLayer.CDNA`` :return: Translation result if successful. Else, `None` """ + supported_coordinate_types = {AnnotationLayer.PROTEIN, AnnotationLayer.CDNA} + if coordinate_type not in supported_coordinate_types: + err_msg = f"`coordinate_type` must be one of {supported_coordinate_types}" + raise ValueError(err_msg) + vrs_allele = None vrs_seq_loc_ac = None vrs_seq_loc_ac_status = VrsSeqLocAcStatus.NA From 4de1a077dd4c12410046bea9daa214e9c3675834 Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Tue, 16 Jul 2024 10:57:58 -0400 Subject: [PATCH 5/7] style: ruff format --- src/variation/gnomad_vcf_to_protein_variation.py | 2 +- src/variation/normalize.py | 2 +- src/variation/schemas/copy_number_schema.py | 2 +- src/variation/schemas/hgvs_to_copy_number_schema.py | 2 +- src/variation/schemas/service_schema.py | 2 +- src/variation/to_copy_number_variation.py | 2 +- src/variation/to_vrs.py | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/variation/gnomad_vcf_to_protein_variation.py b/src/variation/gnomad_vcf_to_protein_variation.py index 0bd91d8e..9607f4f4 100644 --- a/src/variation/gnomad_vcf_to_protein_variation.py +++ b/src/variation/gnomad_vcf_to_protein_variation.py @@ -10,6 +10,7 @@ from gene.query import QueryHandler as GeneQueryHandler from gene.schemas import MatchType as GeneMatchType +from variation import __version__ from variation.classify import Classify from variation.schemas.classification_response_schema import Nomenclature from variation.schemas.gnomad_vcf_to_protein_schema import GnomadVcfToProteinService @@ -19,7 +20,6 @@ from variation.tokenize import Tokenize from variation.translate import Translate from variation.validate import Validate -from variation import __version__ class GnomadVcfToProteinError(Exception): diff --git a/src/variation/normalize.py b/src/variation/normalize.py index 437c4256..b40faf3a 100644 --- a/src/variation/normalize.py +++ b/src/variation/normalize.py @@ -7,6 +7,7 @@ from cool_seq_tool.sources import UtaDatabase from ga4gh.vrs import models +from variation import __version__ from variation.classify import Classify from variation.schemas.app_schemas import Endpoint from variation.schemas.normalize_response_schema import ( @@ -25,7 +26,6 @@ from variation.translate import Translate from variation.utils import update_warnings_for_no_resp from variation.validate import Validate -from variation import __version__ class Normalize(ToVRS): diff --git a/src/variation/schemas/copy_number_schema.py b/src/variation/schemas/copy_number_schema.py index 68b31555..0af71e15 100644 --- a/src/variation/schemas/copy_number_schema.py +++ b/src/variation/schemas/copy_number_schema.py @@ -14,8 +14,8 @@ model_validator, ) -from variation.schemas.normalize_response_schema import ServiceResponse from variation import __version__ +from variation.schemas.normalize_response_schema import ServiceResponse class ParsedPosType(str, Enum): diff --git a/src/variation/schemas/hgvs_to_copy_number_schema.py b/src/variation/schemas/hgvs_to_copy_number_schema.py index 022d89db..b1dad176 100644 --- a/src/variation/schemas/hgvs_to_copy_number_schema.py +++ b/src/variation/schemas/hgvs_to_copy_number_schema.py @@ -3,8 +3,8 @@ from ga4gh.vrs import models from pydantic import ConfigDict, StrictStr -from variation.schemas.normalize_response_schema import ServiceResponse from variation import __version__ +from variation.schemas.normalize_response_schema import ServiceResponse class HgvsToCopyNumberCountService(ServiceResponse): diff --git a/src/variation/schemas/service_schema.py b/src/variation/schemas/service_schema.py index da9f0f73..87526131 100644 --- a/src/variation/schemas/service_schema.py +++ b/src/variation/schemas/service_schema.py @@ -6,8 +6,8 @@ from cool_seq_tool.schemas import ToGenomicService as ToGenomic from pydantic import ConfigDict -from variation.schemas.normalize_response_schema import ServiceMeta from variation import __version__ +from variation.schemas.normalize_response_schema import ServiceMeta class ClinVarAssembly(str, Enum): diff --git a/src/variation/to_copy_number_variation.py b/src/variation/to_copy_number_variation.py index 6138679f..f345bb3f 100644 --- a/src/variation/to_copy_number_variation.py +++ b/src/variation/to_copy_number_variation.py @@ -14,6 +14,7 @@ from gene.schemas import MatchType as GeneMatchType from pydantic import ValidationError +from variation import __version__ from variation.classify import Classify from variation.schemas.app_schemas import Endpoint from variation.schemas.classification_response_schema import ClassificationType @@ -43,7 +44,6 @@ from variation.translate import Translate from variation.utils import get_priority_sequence_location from variation.validate import Validate -from variation import __version__ VALID_CLASSIFICATION_TYPES = [ ClassificationType.GENOMIC_DUPLICATION, diff --git a/src/variation/to_vrs.py b/src/variation/to_vrs.py index 74ee3a24..857d9b36 100644 --- a/src/variation/to_vrs.py +++ b/src/variation/to_vrs.py @@ -6,6 +6,7 @@ from cool_seq_tool.handlers import SeqRepoAccess from ga4gh.vrs import models +from variation import __version__ from variation.classify import Classify from variation.schemas.app_schemas import Endpoint from variation.schemas.normalize_response_schema import ( @@ -18,7 +19,6 @@ from variation.tokenize import Tokenize from variation.translate import Translate from variation.validate import Validate -from variation import __version__ from variation.vrs_representation import VRSRepresentation From 8fd4680a6fa78998c4485fae128a1f6d23f0df03 Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Tue, 16 Jul 2024 11:00:04 -0400 Subject: [PATCH 6/7] ruff --- src/variation/hgvs_dup_del_mode.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/variation/hgvs_dup_del_mode.py b/src/variation/hgvs_dup_del_mode.py index 4c7bd369..722c221e 100644 --- a/src/variation/hgvs_dup_del_mode.py +++ b/src/variation/hgvs_dup_del_mode.py @@ -12,7 +12,12 @@ DELS = {AltType.DELETION_AMBIGUOUS, AltType.DELETION} # Define supported alt types for HGVS Dup Del Mode -DELS_DUPS = {AltType.DELETION, AltType.DELETION_AMBIGUOUS, AltType.DUPLICATION, AltType.DUPLICATION_AMBIGUOUS} +DELS_DUPS = { + AltType.DELETION, + AltType.DELETION_AMBIGUOUS, + AltType.DUPLICATION, + AltType.DUPLICATION_AMBIGUOUS, +} def _check_supported_alt_type(alt_type: AltType) -> None: From 067a4c3a6f051c3f4964b735c809ab3e2cf4dca0 Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Tue, 16 Jul 2024 11:10:11 -0400 Subject: [PATCH 7/7] fix --- .ebextensions/01_download_data.config | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.ebextensions/01_download_data.config b/.ebextensions/01_download_data.config index 1bf6cb1f..8ee01fd2 100644 --- a/.ebextensions/01_download_data.config +++ b/.ebextensions/01_download_data.config @@ -9,18 +9,18 @@ commands: command: "export $(cat /opt/elasticbeanstalk/deployment/env | xargs)" container_commands: - 02_s3_download: + 01_s3_download: test: test ! -d "/usr/local/share/seqrepo/2024-02-20" command: "aws s3 cp s3://${AWS_BUCKET_NAME}/${AWS_SEQREPO_OBJECT} /usr/local/share/seqrepo.tar.gz --region us-east-2" - 03_extract_seqrepo: + 02_extract_seqrepo: test: test -f "/usr/local/share/seqrepo.tar.gz" command: "mkdir -p /usr/local/share/seqrepo/2024-02-20 && tar -xzvf /usr/local/share/seqrepo.tar.gz -C /usr/local/share/seqrepo/2024-02-20" - 04_seqrepo_zip_permission: + 03_seqrepo_zip_permission: test: test -f "/usr/local/share/seqrepo.tar.gz" command: "chmod +wr /usr/local/share/seqrepo.tar.gz" - 05_remove_seqrepo_zip: + 04_remove_seqrepo_zip: test: test -f "/usr/local/share/seqrepo.tar.gz" command: "rm /usr/local/share/seqrepo.tar.gz"