Skip to content

Commit

Permalink
Format with ruff, fix mypy errors, update dev_dependencies
Browse files Browse the repository at this point in the history
  • Loading branch information
corneliusroemer committed Jun 10, 2024
1 parent 63ea6e9 commit 8d4cb27
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 15 deletions.
2 changes: 2 additions & 0 deletions preprocessing/nextclade/dev_dependencies.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@ mypy
ruff
types-PyYAML
types-requests
types-pytz
types-python-dateutil
10 changes: 5 additions & 5 deletions preprocessing/nextclade/src/loculus_preprocessing/backend.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
"""Functions to interface with the backend"""

import dataclasses
import datetime as dt
import json
import logging
import time
from collections.abc import Sequence
from http import HTTPStatus
import dataclasses
import json
from pathlib import Path

import jwt
import pytz
import requests
import time
from collections.abc import Sequence

from .config import Config
from .datatypes import (
Expand Down Expand Up @@ -76,7 +76,7 @@ def fetch_unprocessed_sequences(n: int, config: Config) -> str:
if response.status_code == HTTPStatus.UNPROCESSABLE_ENTITY:
logging.debug(f"{response.text}.\nSleeping for a while.")
time.sleep(60 * 1)
return []
return ""
msg = f"Fetching unprocessed data failed. Status code: {
response.status_code}"
raise Exception(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
@dataclass
class Config:
organism: str = "mpox"
backend_host: str = None # Set default to None or similar placeholder
backend_host: str = ""
keycloak_host: str = "http://127.0.0.1:8083"
keycloak_user: str = "preprocessing_pipeline"
keycloak_password: str = "preprocessing_pipeline"
Expand Down
20 changes: 11 additions & 9 deletions preprocessing/nextclade/src/loculus_preprocessing/prepro.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
AnnotationSource,
AnnotationSourceType,
GeneName,
SegmentName,
InputMetadata,
NucleotideInsertion,
NucleotideSequence,
Expand All @@ -31,6 +30,7 @@
ProcessingAnnotation,
ProcessingResult,
ProcessingSpec,
SegmentName,
UnprocessedAfterNextclade,
UnprocessedData,
UnprocessedEntry,
Expand Down Expand Up @@ -195,7 +195,10 @@ def enrich_with_nextclade(
logging.debug("Nextclade results available in %s", result_dir)

# Add aligned sequences to aligned_nucleotide_sequences
load_aligned_nuc_sequences(result_dir_seg, segment, aligned_nucleotide_sequences)
# Modifies aligned_nucleotide_sequences in place
aligned_nucleotide_sequences = load_aligned_nuc_sequences(
result_dir_seg, segment, aligned_nucleotide_sequences
)

for gene in config.genes:
translation_path = result_dir_seg + f"/nextclade.cds_translation.{gene}.fasta"
Expand Down Expand Up @@ -273,7 +276,7 @@ def load_aligned_nuc_sequences(
aligned_nucleotide_sequences: dict[
AccessionVersion, dict[SegmentName, NucleotideSequence | None]
],
) -> dict[AccessionVersion, NucleotideSequence]:
) -> dict[AccessionVersion, dict[SegmentName, NucleotideSequence | None]]:
"""
Load the nextclade alignment results into the aligned_nucleotide_sequences dict, mapping each
accession to a segmentName: NucleotideSequence dictionary.
Expand All @@ -284,6 +287,7 @@ def load_aligned_nuc_sequences(
sequence_id: str = aligned_sequence.id
sequence: NucleotideSequence = str(aligned_sequence.seq)
aligned_nucleotide_sequences[sequence_id][segment] = mask_terminal_gaps(sequence)
return aligned_nucleotide_sequences


def accession_from_str(id_str: AccessionVersion) -> str:
Expand Down Expand Up @@ -318,6 +322,8 @@ def get_metadata(
nextclade_prefix = "nextclade."
if input_path.startswith(nextclade_prefix):
# Remove "nextclade." prefix
if spec.args is None:
spec.args = {}
segment = spec.args.get("segment", "main")
if unprocessed.nextcladeMetadata is None:
errors.append(
Expand Down Expand Up @@ -374,15 +380,11 @@ def process_single(
"""Process a single sequence per config"""
errors: list[ProcessingAnnotation] = []
warnings: list[ProcessingAnnotation] = []
len_dict: dict[str, str | int] = {}
output_metadata: ProcessedMetadata = {}
for segment in config.nucleotideSequences:
sequence = unprocessed.unalignedNucleotideSequences[segment]
key = "length" if segment == "main" else "length_" + segment
if sequence:
len_dict[key] = len(sequence)
else:
len_dict[key] = 0
output_metadata: ProcessedMetadata = len_dict
output_metadata[key] = len(sequence) if sequence else 0

for output_field, spec_dict in config.processing_spec.items():
length_fields = [
Expand Down

0 comments on commit 8d4cb27

Please sign in to comment.