Skip to content

Commit

Permalink
enhancement!: clean up __init__.py files
Browse files Browse the repository at this point in the history
  • Loading branch information
jsstevenson committed Nov 21, 2023
1 parent 125a841 commit 1f6bb9a
Show file tree
Hide file tree
Showing 11 changed files with 65 additions and 85 deletions.
2 changes: 1 addition & 1 deletion docs/scripts/generate_normalize_figure.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@

import gravis as gv

from gene import APP_ROOT
from gene.database import create_db
from gene.etl.base import APP_ROOT
from gene.query import QueryHandler
from gene.schemas import UnmergedNormalizationService

Expand Down
65 changes: 2 additions & 63 deletions src/gene/__init__.py
Original file line number Diff line number Diff line change
@@ -1,65 +1,4 @@
"""The VICC library for normalizing genes."""
import logging
from os import environ
from pathlib import Path
from .version import __version__

from .version import __version__ # noqa: F401

APP_ROOT = Path(__file__).resolve().parent

logging.basicConfig(
filename="gene.log", format="[%(asctime)s] - %(name)s - %(levelname)s : %(message)s"
)
logger = logging.getLogger("gene")
logger.setLevel(logging.DEBUG)
logger.handlers = []

logging.getLogger("boto3").setLevel(logging.INFO)
logging.getLogger("botocore").setLevel(logging.INFO)
logging.getLogger("urllib3").setLevel(logging.INFO)
logging.getLogger("python_jsonschema_objects").setLevel(logging.INFO)
logging.getLogger("biocommons.seqrepo.seqaliasdb.seqaliasdb").setLevel(logging.INFO)
logging.getLogger("biocommons.seqrepo.fastadir.fastadir").setLevel(logging.INFO)


SEQREPO_ROOT_DIR = Path(
environ.get("SEQREPO_ROOT_DIR", "/usr/local/share/seqrepo/latest")
)


class DownloadException(Exception): # noqa: N818
"""Exception for failures relating to source file downloads."""


from gene.schemas import ( # noqa: E402
NamespacePrefix,
RefType,
SourceIDAfterNamespace,
SourceName,
)

ITEM_TYPES = {k.lower(): v.value for k, v in RefType.__members__.items()}

# Sources we import directly (HGNC, Ensembl, NCBI)
SOURCES = {
source.value.lower(): source.value for source in SourceName.__members__.values()
}

# Set of sources we import directly
XREF_SOURCES = {src.lower() for src in SourceName.__members__}

# use to fetch source name from schema based on concept id namespace
# e.g. {"hgnc": "HGNC"}
PREFIX_LOOKUP = {
v.value: SourceName[k].value
for k, v in NamespacePrefix.__members__.items()
if k in SourceName.__members__.keys()
}

# use to generate namespace prefix from source ID value
# e.g. {"ensg": "ensembl"}
NAMESPACE_LOOKUP = {
v.value.lower(): NamespacePrefix[k].value
for k, v in SourceIDAfterNamespace.__members__.items()
if v.value != ""
}
__all__ = ["__version__"]
3 changes: 1 addition & 2 deletions src/gene/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,14 @@

import click

from gene import SOURCES
from gene.database import (
AbstractDatabase,
DatabaseReadException,
DatabaseWriteException,
create_db,
)
from gene.database.database import DatabaseException
from gene.schemas import SourceName
from gene.schemas import SOURCES, SourceName

logger = logging.getLogger("gene")
logger.setLevel(logging.DEBUG)
Expand Down
10 changes: 8 additions & 2 deletions src/gene/database/dynamodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from boto3.dynamodb.conditions import Key
from botocore.exceptions import ClientError

from gene import ITEM_TYPES, PREFIX_LOOKUP
from gene.database.database import (
AWS_ENV_VAR_NAME,
SKIP_AWS_DB_ENV_NAME,
Expand All @@ -23,7 +22,14 @@
DatabaseWriteException,
confirm_aws_db_use,
)
from gene.schemas import RecordType, RefType, SourceMeta, SourceName
from gene.schemas import (
ITEM_TYPES,
PREFIX_LOOKUP,
RecordType,
RefType,
SourceMeta,
SourceName,
)

logger = logging.getLogger(__name__)

Expand Down
11 changes: 8 additions & 3 deletions src/gene/etl/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import shutil
from abc import ABC, abstractmethod
from ftplib import FTP
from os import remove
from os import environ, remove
from pathlib import Path
from typing import Callable, Dict, List, Optional

Expand All @@ -15,14 +15,19 @@
from dateutil import parser
from gffutils.feature import Feature

from gene import ITEM_TYPES, SEQREPO_ROOT_DIR
from gene.database import AbstractDatabase
from gene.schemas import Gene, GeneSequenceLocation, MatchType, SourceName
from gene.schemas import ITEM_TYPES, Gene, GeneSequenceLocation, MatchType, SourceName

logger = logging.getLogger("gene")
logger.setLevel(logging.DEBUG)


APP_ROOT = Path(__file__).resolve().parent
SEQREPO_ROOT_DIR = Path(
environ.get("SEQREPO_ROOT_DIR", "/usr/local/share/seqrepo/latest")
)


class Base(ABC):
"""The ETL base class."""

Expand Down
3 changes: 1 addition & 2 deletions src/gene/etl/ensembl.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,8 @@
import requests
from gffutils.feature import Feature

from gene import APP_ROOT
from gene.database import AbstractDatabase
from gene.etl.base import Base
from gene.etl.base import APP_ROOT, Base
from gene.etl.exceptions import (
GeneFileVersionError,
GeneNormalizerEtlError,
Expand Down
4 changes: 2 additions & 2 deletions src/gene/etl/hgnc.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@

from dateutil import parser

from gene import APP_ROOT, PREFIX_LOOKUP
from gene.database import AbstractDatabase
from gene.etl.base import Base
from gene.etl.base import APP_ROOT, Base
from gene.etl.exceptions import (
GeneFileVersionError,
GeneNormalizerEtlError,
GeneSourceFetchError,
)
from gene.schemas import (
PREFIX_LOOKUP,
Annotation,
Chromosome,
NamespacePrefix,
Expand Down
4 changes: 2 additions & 2 deletions src/gene/etl/ncbi.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@

import gffutils

from gene import APP_ROOT, PREFIX_LOOKUP
from gene.database import AbstractDatabase
from gene.etl.base import Base
from gene.etl.base import APP_ROOT, Base
from gene.etl.exceptions import (
GeneFileVersionError,
GeneNormalizerEtlError,
GeneSourceFetchError,
)
from gene.schemas import (
PREFIX_LOOKUP,
Annotation,
Chromosome,
NamespacePrefix,
Expand Down
3 changes: 2 additions & 1 deletion src/gene/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@

from fastapi import FastAPI, HTTPException, Query

from gene import SOURCES, __version__
from gene import __version__
from gene.database import create_db
from gene.query import QueryHandler
from gene.schemas import (
SOURCES,
NormalizeService,
SearchService,
SourceName,
Expand Down
18 changes: 11 additions & 7 deletions src/gene/query.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
"""Provides methods for handling queries."""
import logging
import re
from datetime import datetime
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, TypeVar

from ga4gh.core import core_models, ga4gh_identify
from ga4gh.vrs import models

from gene import ITEM_TYPES, NAMESPACE_LOOKUP, PREFIX_LOOKUP, logger
from gene.database import AbstractDatabase, DatabaseReadException
from gene.schemas import (
ITEM_TYPES,
NAMESPACE_LOOKUP,
PREFIX_LOOKUP,
BaseGene,
BaseNormalizationService,
Gene,
Expand All @@ -28,6 +31,7 @@
)
from gene.version import __version__

_logger = logging.getLogger(__name__)
NormService = TypeVar("NormService", bound=BaseNormalizationService)


Expand Down Expand Up @@ -72,7 +76,7 @@ def _emit_warnings(query_str: str) -> List:
"non_breaking_space_characters": "Query contains non-breaking space characters"
}
]
logger.warning(
_logger.warning(
f"Query ({query_str}) contains non-breaking space characters."
)
return warnings
Expand Down Expand Up @@ -188,14 +192,14 @@ def _fetch_record(
try:
match = self.db.get_record_by_id(concept_id, case_sensitive=False)
except DatabaseReadException as e:
logger.error(
_logger.error(
f"Encountered DatabaseReadException looking up {concept_id}: {e}"
)
else:
if match:
self._add_record(response, match, match_type)
else:
logger.error(
_logger.error(
f"Unable to find expected record for {concept_id} matching as {match_type}"
) # noqa: E501

Expand Down Expand Up @@ -263,7 +267,7 @@ def _get_search_response(self, query: str, sources: Iterable[SourceName]) -> Dic
matched_concept_ids.append(ref)

except DatabaseReadException as e:
logger.error(
_logger.error(
f"Encountered DatabaseReadException looking up {item_type}"
f" {term}: {e}"
)
Expand Down Expand Up @@ -492,7 +496,7 @@ def _handle_failed_merge_ref(record: Dict, response: Dict, query: str) -> Dict:
:param query: original query value
:return: response with no match
"""
logger.error(
_logger.error(
f"Merge ref lookup failed for ref {record['merge_ref']} "
f"in record {record['concept_id']} from query {query}"
)
Expand Down Expand Up @@ -557,7 +561,7 @@ def _resolve_merge(
merge = self.db.get_record_by_id(merge_ref, False, True)
if merge is None:
query = response.query
logger.error(
_logger.error(
f"Merge ref lookup failed for ref {record['merge_ref']} "
f"in record {record['concept_id']} from query `{query}`"
)
Expand Down
27 changes: 27 additions & 0 deletions src/gene/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,12 @@ class SourceName(Enum):
NCBI = "NCBI"


# lowercase imported source name to correctly-cased name, e.g. {"ensembl": "Ensembl"}
SOURCES = {
source.value.lower(): source.value for source in SourceName.__members__.values()
}


class SourcePriority(IntEnum):
"""Define priorities for sources when building merged concepts."""

Expand Down Expand Up @@ -196,6 +202,23 @@ class NamespacePrefix(Enum):
RFAM = "rfam"


# use to fetch source name from schema based on concept id namespace
# e.g. {"hgnc": "HGNC"}
PREFIX_LOOKUP = {
v.value: SourceName[k].value
for k, v in NamespacePrefix.__members__.items()
if k in SourceName.__members__.keys()
}

# use to generate namespace prefix from source ID value
# e.g. {"ensg": "ensembl"}
NAMESPACE_LOOKUP = {
v.value.lower(): NamespacePrefix[k].value
for k, v in SourceIDAfterNamespace.__members__.items()
if v.value != ""
}


class DataLicenseAttributes(BaseModel):
"""Define constraints for data license attributes."""

Expand All @@ -222,6 +245,10 @@ class RefType(str, Enum):
ASSOCIATED_WITH = "associated_with"


# collective name to singular name, e.g. {"previous_symbols": "prev_symbol"}
ITEM_TYPES = {k.lower(): v.value for k, v in RefType.__members__.items()}


class SourceMeta(BaseModel):
"""Metadata for a given source to return in response object."""

Expand Down

0 comments on commit 1f6bb9a

Please sign in to comment.