diff --git a/.env_template b/.env_template new file mode 100644 index 0000000..23398c8 --- /dev/null +++ b/.env_template @@ -0,0 +1 @@ +HOSTNAME=de.metabolomics-usi.gnps2.org \ No newline at end of file diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..c7945e3 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,29 @@ +name: Docker Build Test + +on: + push: + branches: + master + pull_request: + branches: + master + schedule: + - cron: '0 0 * * 1' + +jobs: + build-test: + runs-on: ubuntu-latest + strategy: + max-parallel: 4 + matrix: + python-version: [3.8] +# TODO: We probably should switch to using the Docker version. + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Build Docker + run: | + docker build . diff --git a/Dockerfile b/Dockerfile index fa855a9..d96a91d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,14 +1,20 @@ -FROM continuumio/miniconda3:4.8.2 +FROM continuumio/miniconda3:4.10.3 MAINTAINER Mingxun Wang "mwang87@gmail.com" WORKDIR /app RUN apt-get update -y && \ apt-get install -y libxrender-dev && \ - apt-get install -y git-core -RUN conda create -y -n usi -c conda-forge -c bioconda -c defaults celery \ + apt-get install -y git-core libarchive-dev +RUN conda install -c conda-forge mamba +RUN mamba create -y -n usi -c conda-forge -c bioconda -c defaults celery==5.3.6 \ dash=1.20.0 dash-bootstrap-components=0.9.2 flask gunicorn \ - joblib matplotlib numba numpy openssl qrcode rdkit requests \ - requests-cache scipy spectrum_utils werkzeug + joblib matplotlib==3.6.3 numba numpy openssl qrcode rdkit requests \ + requests-cache scipy spectrum_utils==0.3.5 werkzeug==2.0.0 + +# install redis with pypi +RUN /bin/bash -c 'source activate usi && pip install redis' + +# installing hash RUN /bin/bash -c 'source activate usi && pip install "git+https://github.com/berlinguyinca/spectra-hash.git#subdirectory=python" && pip install celery-once' RUN echo "source activate usi" > ~/.bashrc diff --git a/Makefile b/Makefile index 5d5f5bc..586efe7 100644 --- a/Makefile +++ b/Makefile @@ -23,19 +23,19 @@ clear-cache: #Docker Compose server-compose-interactive: - docker-compose build - docker-compose up + docker-compose --compatibility build + docker-compose --compatibility up server-compose: - docker-compose build - docker-compose up -d + docker-compose --compatibility build + docker-compose --compatibility up -d server-compose-production-interactive: - docker-compose build + docker-compose --compatibility build docker-compose -f docker-compose.yml -f docker-compose-production.yml --compatibility up server-compose-production: - docker-compose build + docker-compose --compatibility build docker-compose -f docker-compose.yml -f docker-compose-production.yml --compatibility up -d attach: diff --git a/docker-compose-production.yml b/docker-compose-production.yml index 5423467..9bcbe7f 100644 --- a/docker-compose-production.yml +++ b/docker-compose-production.yml @@ -5,29 +5,44 @@ services: - default - nginx-net environment: - VIRTUAL_HOST: metabolomics-usi.ucsd.edu,metabolomics-usi.gnps2.org - VIRTUAL_PORT: 5087 - LETSENCRYPT_HOST: metabolomics-usi.ucsd.edu,metabolomics-usi.gnps2.org + VIRTUAL_HOST: ${HOSTNAME:-metabolomics-usi.gnps2.org} + VIRTUAL_PORT: 5000 + LETSENCRYPT_HOST: ${HOSTNAME:-metabolomics-usi.gnps2.org} LETSENCRYPT_EMAIL: mwang87@gmail.com command: /app/run_server.sh deploy: resources: limits: memory: 16000M + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "3" metabolomicsusi-worker: deploy: resources: limits: memory: 16000M + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "3" metabolomicsusi-redis: deploy: resources: limits: memory: 4000M + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "3" networks: nginx-net: external: - name: nginx-net \ No newline at end of file + name: nginx-net diff --git a/docker-compose.yml b/docker-compose.yml index a0432e7..76a343f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -12,7 +12,7 @@ services: - ./logs/:/app/logs:rw networks: - default - restart: on-failure + restart: always command: /app/run_dev_server.sh metabolomicsusi-worker: @@ -24,7 +24,7 @@ services: - ./tmp:/app/tmp:rw - ./logs:/app/logs:rw command: /app/run_worker.sh - restart: on-failure + restart: always depends_on: - metabolomicsusi-redis networks: @@ -33,10 +33,11 @@ services: metabolomicsusi-redis: container_name: metabolomicsusi-redis - image: redis + #image: valkey/valkey:alpine3.20 + image: redis:alpine networks: - default - restart: on-failure + restart: always networks: nginx-net: diff --git a/metabolomics_spectrum_resolver/dashinterface.py b/metabolomics_spectrum_resolver/dashinterface.py index 5069ef2..9e5c869 100644 --- a/metabolomics_spectrum_resolver/dashinterface.py +++ b/metabolomics_spectrum_resolver/dashinterface.py @@ -39,6 +39,8 @@ gtag('config', 'UA-8412213-8'); + + {%metas%} {%title%} @@ -59,10 +61,10 @@ children=[ dbc.NavbarBrand( html.Img( - src="https://gnps-cytoscape.ucsd.edu/static/img/GNPS_logo.png", + src="https://gnps2.org/static/img/logo.png", width="120px", ), - href="https://gnps.ucsd.edu", + href="https://gnps2.org", ), dbc.Nav( [ @@ -426,7 +428,7 @@ dbc.CardHeader(html.H5("Contributors")), dbc.CardBody( [ - "Mingxun Wang, PhD – UC San Diego", + "Mingxun Wang, PhD – UC Riverside", html.Br(), "Wout Bittremieux, PhD – UC San Diego", html.Br(), diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py index f488e4c..9ac9cdd 100644 --- a/metabolomics_spectrum_resolver/parsing.py +++ b/metabolomics_spectrum_resolver/parsing.py @@ -4,6 +4,8 @@ from typing import Tuple import requests +import pandas as pd +from io import StringIO import urllib.parse import spectrum_utils.spectrum as sus import splash @@ -14,7 +16,8 @@ MS2LDA_SERVER = "http://ms2lda.org/basicviz/" MOTIFDB_SERVER = "http://ms2lda.org/motifdb/" -MASSBANK_SERVER = "https://massbank.us/rest/spectra/" +MONA_SERVER = "https://massbank.us/rest/spectra/" +MASSBANKEUROPE_SERVER = "https://msbi.ipb-halle.de/MassBank-api/v1/records/" # USI specification: http://www.psidev.info/usi usi_pattern = re.compile( @@ -42,8 +45,8 @@ r"^mzspec" # collection identifier # Unofficial proteomics spectral library identifier: MASSIVEKB - # Metabolomics collection identifiers: GNPS, MASSBANK, MS2LDA, MOTIFDB - r":(MASSIVEKB|GNPS|MASSBANK|MS2LDA|MOTIFDB)" + # Metabolomics collection identifiers: GNPS, MASSBANK, MS2LDA, MOTIFDB, MTBLS, ST + r":(MASSIVEKB|GNPS|GNPS2|MASSBANK|MS2LDA|MOTIFDB|TINYMASS|MTBLS\d+|ST\d{6}|)" # msRun identifier r":(.*)" # index flag @@ -90,6 +93,9 @@ def parse_usi(usi: str) -> Tuple[sus.MsmsSpectrum, str, str]: Tuple[sus.MsmsSpectrum, str, str] A tuple of the `MsmsSpectrum`, its source link, and its SPLASH. """ + # Very basic cleanup + usi = str(usi).strip() + match = _match_usi(usi) try: collection = match.group(1).lower() @@ -100,7 +106,6 @@ def parse_usi(usi: str) -> Tuple[sus.MsmsSpectrum, str, str]: # changes, be sure to change this logic. if ( annotation is not None - or collection.startswith("msv") or collection.startswith("pxd") or collection.startswith("pxl") or collection.startswith("rpxd") @@ -108,14 +113,29 @@ def parse_usi(usi: str) -> Tuple[sus.MsmsSpectrum, str, str]: or collection == "massive" ): spectrum, source_link = _parse_msv_pxd(usi) + elif collection.startswith("msv"): + # Lets try to use GNPS2 for this first + try: + spectrum, source_link = _parse_gnps2(usi) + except: + spectrum, source_link = _parse_msv_pxd(usi) elif collection == "gnps": spectrum, source_link = _parse_gnps(usi) + elif collection == "gnps2": + spectrum, source_link = _parse_gnps2(usi) + elif collection.startswith("mtbls"): + # Since they don't have their own resolver, we'll go here to GNPS2 for now + spectrum, source_link = _parse_gnps2(usi) elif collection == "massbank": spectrum, source_link = _parse_massbank(usi) elif collection == "ms2lda": spectrum, source_link = _parse_ms2lda(usi) elif collection == "motifdb": spectrum, source_link = _parse_motifdb(usi) + elif collection.startswith("st"): + spectrum, source_link = _parse_metabolomics_workbench(usi) + elif collection.startswith("tinymass"): + spectrum, source_link = _parse_tinymass(usi) else: raise UsiError(f"Unknown USI collection: {match.group(1)}", 400) splash_key = splash_builder.splash( @@ -318,6 +338,14 @@ def _parse_gnps(usi: str) -> Tuple[sus.MsmsSpectrum, str]: else: return _parse_gnps_library(usi) +def _parse_gnps2(usi: str) -> Tuple[sus.MsmsSpectrum, str]: + match = _match_usi(usi) + ms_run = match.group(2) + if ms_run.lower().startswith("task"): + return _parse_gnps2_task(usi) + else: + # We are likely dealing with a dataset on the GNPS2 side + return _parse_gnps2_dataset(usi) # Parse GNPS clustered spectra in Molecular Networking. def _parse_gnps_task(usi: str) -> Tuple[sus.MsmsSpectrum, str]: @@ -358,6 +386,119 @@ def _parse_gnps_task(usi: str) -> Tuple[sus.MsmsSpectrum, str]: raise UsiError("Unknown GNPS task USI", 404) +# Parse GNPS2 task spectra +def _parse_gnps2_task(usi: str) -> Tuple[sus.MsmsSpectrum, str]: + match = _match_usi(usi) + gnps_task_match = gnps_task_pattern.match(match.group(2)) + if gnps_task_match is None: + raise UsiError("Incorrectly formatted GNPS2 task", 400) + task = gnps_task_match.group(1) + filename = gnps_task_match.group(2) + index_flag = match.group(3) + + if not (index_flag.lower() == "scan" or index_flag.lower() == "nativeid"): + raise UsiError("Currently supported GNPS2 TASK index flags: scan and nativeId", 400) + + scan = match.group(4) + + # We will try in order these GNPS2 URLs to see if the task is actually there + gnps2_server_url_list = [ + "https://gnps2.org", + "https://beta.gnps2.org", + "https://dev.gnps2.org", + "https://de.gnps2.org", + ] + + for gnps2server_url in gnps2_server_url_list: + try: + request_url = ( + f"{gnps2server_url}/spectrumpeaks?format=json&usi={usi}" + ) + lookup_request = requests.get(request_url, timeout=timeout) + lookup_request.raise_for_status() + spectrum_dict = lookup_request.json() + mz, intensity = zip(*spectrum_dict["peaks"]) + source_link = ( + f"{gnps2server_url}/status?task={task}" + ) + if "precursor_mz" in spectrum_dict: + precursor_mz = float(spectrum_dict["precursor_mz"]) + charge = 0 + else: + precursor_mz, charge = 0, 0 + + spectrum = sus.MsmsSpectrum(usi, precursor_mz, charge, mz, intensity) + return spectrum, source_link + except (requests.exceptions.HTTPError, json.decoder.JSONDecodeError): + pass + + raise UsiError("Unknown GNPS2 task USI", 404) + +def _parse_gnps2_dataset(usi: str) -> Tuple[sus.MsmsSpectrum, str]: + match = _match_usi(usi) + dataset_identifier = match.group(1) + index_flag = match.group(3) + scan = match.group(4) + + if not (index_flag.lower() == "scan" or index_flag.lower() == "nativeid"): + raise UsiError("Currently supported GNPS2 Dataset index flags: scan and nativeId", 400) + + try: + request_url = ( + f"https://gnps2.org/spectrumpeaks?format=json&usi={usi}" + ) + lookup_request = requests.get(request_url, timeout=timeout) + lookup_request.raise_for_status() + spectrum_dict = lookup_request.json() + mz, intensity = zip(*spectrum_dict["peaks"]) + + if "MTBLS" in dataset_identifier: + source_link = ( + f"https://www.ebi.ac.uk/metabolights/editor/{dataset_identifier}/descriptors" + ) + elif "MSV" in dataset_identifier: + source_link = ( + f"https://massive.ucsd.edu/ProteoSAFe/" + f"QueryMSV?id={dataset_identifier}" + ) + + if "precursor_mz" in spectrum_dict: + precursor_mz = float(spectrum_dict["precursor_mz"]) + charge = 0 + else: + precursor_mz, charge = 0, 0 + + spectrum = sus.MsmsSpectrum(usi, precursor_mz, charge, mz, intensity) + return spectrum, source_link + except (requests.exceptions.HTTPError, json.decoder.JSONDecodeError): + raise UsiError("Unknown GNPS2 Dataset USI", 404) + +# Parse TINYMASS task spectra +def _parse_tinymass(usi: str) -> Tuple[sus.MsmsSpectrum, str]: + match = _match_usi(usi) + + try: + request_url = ( + f"https://tinymass.gnps2.org/resolve?usi={usi}" + ) + lookup_request = requests.get(request_url, timeout=timeout) + lookup_request.raise_for_status() + spectrum_dict = lookup_request.json() + mz, intensity = zip(*spectrum_dict["peaks"]) + source_link = ( + f"https://tinymass.gnps2.org/resolve?usi={usi}" + ) + if "precursor" in spectrum_dict: + precursor_mz = float(spectrum_dict["precursor"]) + charge = 0 + else: + precursor_mz, charge = 0, 0 + + spectrum = sus.MsmsSpectrum(usi, precursor_mz, charge, mz, intensity) + return spectrum, source_link + except (requests.exceptions.HTTPError, json.decoder.JSONDecodeError): + raise UsiError("Unknown Tiny Mass task USI", 404) + # Parse GNPS library. def _parse_gnps_library(usi: str) -> Tuple[sus.MsmsSpectrum, str]: match = _match_usi(usi) @@ -369,8 +510,8 @@ def _parse_gnps_library(usi: str) -> Tuple[sus.MsmsSpectrum, str]: index = match.group(4) try: request_url = ( - f"https://gnps.ucsd.edu/ProteoSAFe/" - f"SpectrumCommentServlet?SpectrumID={index}" + f"https://external.gnps2.org/" + f"gnpsspectrum?SpectrumID={index}" ) lookup_request = requests.get(request_url, timeout=timeout) lookup_request.raise_for_status() @@ -407,6 +548,23 @@ def _parse_gnps_library(usi: str) -> Tuple[sus.MsmsSpectrum, str]: # Parse MassBank entry. def _parse_massbank(usi: str) -> Tuple[sus.MsmsSpectrum, str]: + """ Parse a MassBank or MoNA USI and return the corresponding spectrum/source url. + + MassBank USIs are of the form: MSBNK-[A-Za-z0-9_]{1,32}-[A-Z0-9_]{1,64} + + Fall back to MoNA if MassBank EU fails to respond. Note that partial MassBank ids + (e.g., SM858102) will only resolve to MoNA. + + Parameters + ---------- + usi : str + The USI to be parsed. + + Returns + ------- + Tuple[sus.MsmsSpectrum, str] + The parsed spectrum and the source link. + """ match = _match_usi(usi) index_flag = match.group(3) if index_flag.lower() != "accession": @@ -416,16 +574,63 @@ def _parse_massbank(usi: str) -> Tuple[sus.MsmsSpectrum, str]: index = match.group(4) # Clean up the new MassBank accessions if necessary. massbank_accession = re.match( - r"MSBNK-[A-Z0-9_]{1,32}-([A-Z0-9_]{1,64})", index + # See https://github.com/MassBank/MassBank-web/blob/main/Documentation/MassBankRecordFormat.md#211-accession + r"(MSBNK-[A-Za-z0-9_]{1,32}-[A-Z0-9_]{1,64})", index ) if massbank_accession is not None: - index = massbank_accession.group(1) + # It's certiainly MassBank EU/JP + try: + return _parse_massbankEurope(usi) + + except UsiError: + pass + + # Either MassBank EU Failed or it's a MoNA entry, fallback to MoNA. + # Let the exception propagate if it fails + return _parse_mona(usi) + + +# Parse MONA entry. +def _parse_mona(usi: str) -> Tuple[sus.MsmsSpectrum, str]: + """ Parse a MONA USI and return the corresponding spectrum. Performs a web request to + MONA_SERVER. + + Parameters + ---------- + usi : str + The USI to be parsed. + + Globals + ------- + MONA_SERVER : str + The base URL for the MONA server. + + Returns + ------- + Tuple[sus.MsmsSpectrum, str] + The parsed spectrum and the source link. + + Raises + ------ + UsiError + If the USI could not be parsed because it is incorrectly formatted. + """ + match = _match_usi(usi) + index_flag = match.group(3) + if index_flag.lower() != "accession": + raise UsiError( + "Currently supported MassBank index flags: accession", 400 + ) + + index = match.group(4) + try: lookup_request = requests.get( - f"{MASSBANK_SERVER}{index}", timeout=timeout + f"{MONA_SERVER}{index}", timeout=timeout ) lookup_request.raise_for_status() spectrum_dict = lookup_request.json() + mz, intensity = [], [] for peak in spectrum_dict["spectrum"].split(): peak_mz, peak_intensity = peak.split(":") @@ -437,14 +642,82 @@ def _parse_massbank(usi: str) -> Tuple[sus.MsmsSpectrum, str]: precursor_mz = float(metadata["value"]) break source_link = ( - f"https://massbank.eu/MassBank/" f"RecordDisplay.jsp?id={index}" + f"https://massbank.us/spectra/display/{index}" ) spectrum = sus.MsmsSpectrum(usi, precursor_mz, 0, mz, intensity) + return spectrum, source_link + except requests.exceptions.HTTPError: raise UsiError("Unknown MassBank USI", 404) +# Parse MassBank entry. +def _parse_massbankEurope(usi: str) -> Tuple[sus.MsmsSpectrum, str]: + """ Parse a MassBank[EU|JP] USI and return the corresponding spectrum. Performs a web request to + MassBank Server. + + Parameters + ---------- + usi : str + The USI to be parsed. + + Globals + ------- + MassBank Server : str + The base URL for the MONA server. + + Returns + ------- + Tuple[sus.MsmsSpectrum, str] + The parsed spectrum and the source link. + + Raises + ------ + UsiError + If the USI could not be parsed because it is incorrectly formatted. + """ + match = _match_usi(usi) + index_flag = match.group(3) + if index_flag.lower() != "accession": + raise UsiError( + "Currently supported MassBank index flags: accession", 400 + ) + + index = match.group(4) + + try: + # Try requesting from massbankeurope first + lookup_request = requests.get( + f"{MASSBANKEUROPE_SERVER}{index}", timeout=timeout + ) + + lookup_request.raise_for_status() + spectrum_dict = lookup_request.json() + + # If request is successful we know it was massbankeurope and parse accordingly + peaks = spectrum_dict["peak"]["peak"]["values"] + + mz = [peak["mz"] for peak in peaks] + intensity = [peak["intensity"] for peak in peaks] + + precursor_mz = next( + (float(item["value"]) for item in spectrum_dict['mass_spectrometry']['focused_ion'] if item["subtag"] == "PRECURSOR_M/Z"), + 0 + ) + + source_link = ( + f"https://massbank.eu/MassBank/" f"RecordDisplay?id={index}" + ) + + spectrum = sus.MsmsSpectrum(usi, precursor_mz, 0, mz, intensity) + return spectrum, source_link + + + #show what error + except requests.exceptions.HTTPError: + raise UsiError("Unknown MassBank USI", 404) + # Parse MS2LDA from ms2lda.org. def _parse_ms2lda(usi: str) -> Tuple[sus.MsmsSpectrum, str]: @@ -490,29 +763,44 @@ def _parse_msv_pxd(usi: str) -> Tuple[sus.MsmsSpectrum, str]: scan = match.group(4) try: lookup_url = ( - f"https://massive.ucsd.edu/ProteoSAFe/" + f"https://proteomics3.ucsd.edu/ProteoSAFe/" f"QuerySpectrum?id={urllib.parse.quote_plus(usi)}" ) lookup_request = requests.get(lookup_url, timeout=timeout) - lookup_request.raise_for_status() + try: + lookup_request.raise_for_status() + except: + lookup_url = ( + f"https://proteomics3.ucsd.edu/ProteoSAFe/" + f"QuerySpectrum?id={urllib.parse.quote_plus(usi)}" + ) + lookup_request = requests.get(lookup_url, timeout=timeout) + lookup_request.raise_for_status() + lookup_json = lookup_request.json() for spectrum_file in lookup_json["row_data"]: + # Checking if its an actual file we can resolve or if MSV will go to PX directly if any( spectrum_file["file_descriptor"].lower().endswith(extension) for extension in ["mzml", "mzxml", "mgf"] - ): - request_url = ( + ) or spectrum_file["file_descriptor"].startswith("f.ProteomeCentral"): + file_descriptor = spectrum_file['file_descriptor'] + if file_descriptor.startswith("f."): + file_descriptor = file_descriptor[2:] + + peaks_request_url = ( f"https://massive.ucsd.edu/ProteoSAFe/" f"DownloadResultFile?" f"task=4f2ac74ea114401787a7e96e143bb4a1&" f"invoke=annotatedSpectrumImageText&block=0&file=FILE->" - f"{urllib.parse.quote(spectrum_file['file_descriptor'])}" + f"{urllib.parse.quote(file_descriptor)}" f"&scan={scan}&peptide=*..*&force=false&" f"format=JSON&uploadfile=True" ) + try: spectrum_request = requests.get( - request_url, timeout=timeout + peaks_request_url, timeout=timeout ) spectrum_request.raise_for_status() spectrum_dict = spectrum_request.json() @@ -569,6 +857,7 @@ def _parse_msv_pxd(usi: str) -> Tuple[sus.MsmsSpectrum, str]: return spectrum, source_link except requests.exceptions.HTTPError: + raise pass raise UsiError("Unsupported/unknown USI", 404) @@ -596,6 +885,57 @@ def _parse_motifdb(usi: str) -> Tuple[sus.MsmsSpectrum, str]: raise UsiError("Unknown MOTIFDB USI", 404) +# Parse GNPS library. +def _parse_metabolomics_workbench(usi: str) -> Tuple[sus.MsmsSpectrum, str]: + match = _match_usi(usi) + accession = match.group(1) + filename = match.group(2) + index_flag = match.group(3) + index = match.group(4) + + if index_flag.lower() != "scan": + raise UsiError( + "Currently supported MW index flags: scan", 400 + ) + try: + request_url = ( + f"https://www.metabolomicsworkbench.org/" + f"data/ms2.php?A={accession}.zip" + f"&F={urllib.parse.quote_plus(filename)}&S={index}" + ) + + # TODO: Do some extra exception handling if we don't find the filename directly. We might need to his another API to get the full filename + # Given the just the basename + + lookup_request = requests.get(request_url, timeout=timeout) + lookup_request.raise_for_status() + + response_text = lookup_request.text + response_text = (response_text.replace("
", "").replace("

", "").lstrip().rstrip()) + + # Parsing the MW Response + precursor_mz = float(response_text.split("\n")[0].split(":")[-1].replace("\"", "")) + charge = int(response_text.split("\n")[2].split(":")[-1].replace("\"", "")) + peaks_df = pd.read_csv(StringIO(response_text), sep=r" +", skiprows=4) + mz = list(peaks_df["m/z"]) + intensity = list(peaks_df["intensity"]) + + source_link = ( + f"https://www.metabolomicsworkbench.org/" + f"data/DRCCMetadata.php?Mode=Study&StudyID={accession}&StudyType=MS&ResultType=1" + ) + + spectrum = sus.MsmsSpectrum( + usi, + float(precursor_mz), + int(charge), + mz, + intensity, + ) + return spectrum, source_link + except requests.exceptions.HTTPError: + raise UsiError("Unknown MW USI", 404) + def _parse_sequence(peptide: str, peptide_clean: str) -> Tuple[str, str, list]: # Parse out gapped sequence (e.g. X+129.04259), faking it # with Glycine as the base residue and adding more mods to diff --git a/metabolomics_spectrum_resolver/templates/homepage.html b/metabolomics_spectrum_resolver/templates/homepage.html index 2e827bd..4efe0e0 100644 --- a/metabolomics_spectrum_resolver/templates/homepage.html +++ b/metabolomics_spectrum_resolver/templates/homepage.html @@ -80,24 +80,8 @@

diff --git a/metabolomics_spectrum_resolver/views.py b/metabolomics_spectrum_resolver/views.py index 17becf0..61a20eb 100644 --- a/metabolomics_spectrum_resolver/views.py +++ b/metabolomics_spectrum_resolver/views.py @@ -625,6 +625,10 @@ def generate_qr(): qr_bytes.seek(0) return flask.send_file(qr_bytes, "image/png") +@blueprint.route("/robot.txt") +def robot(): + # Disallow all + return "User-agent: *\nDisallow: /", 200 @blueprint.errorhandler(Exception) def render_error(error): diff --git a/requirements.txt b/requirements.txt index fdddf45..62ea414 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,5 +19,5 @@ requests requests_cache scipy spectrum_utils -werkzeug -git+https://github.com/berlinguyinca/spectra-hash.git#subdirectory=python +werkzeug==2.0.0 +git+https://github.com/berlinguyinca/spectra-hash.git#subdirectory=python \ No newline at end of file diff --git a/run_worker.sh b/run_worker.sh index 2073a4f..7b9f227 100755 --- a/run_worker.sh +++ b/run_worker.sh @@ -3,4 +3,4 @@ source activate usi export C_FORCE_ROOT="true" #TODO: Make sure we don't run this worker as root -celery -A metabolomics_spectrum_resolver.tasks worker -l info --autoscale=12,1 -Q worker --max-tasks-per-child 10 --loglevel INFO +celery -A metabolomics_spectrum_resolver.tasks worker -l info --autoscale=16,1 -Q worker --max-tasks-per-child 10 --loglevel INFO diff --git a/test/usi_test_data.py b/test/usi_test_data.py index b80eee0..08357aa 100644 --- a/test/usi_test_data.py +++ b/test/usi_test_data.py @@ -10,6 +10,8 @@ "mzspec:GNPS:GNPS-LIBRARY:accession:CCMSLIB00005436077", "mzspec:MASSBANK::accession:SM858102", "mzspec:MASSBANK::accession:MSBNK-AAFC-AC000646", + # New Massbank identifier with lowercase + "mzspec:MASSBANK::accession:MSBNK-Athens_Univ-AU259904", "mzspec:MS2LDA:TASK-190:accession:270684", "mzspec:MOTIFDB::accession:171163", "mzspec:MSV000082791:(-)-epigallocatechin:scan:2", @@ -29,6 +31,8 @@ "mzspec:MassIVE:TASK-f4b86b150a164ee4a440b661e97a7193-spectra/specs_ms.mgf:scan:287215:HPYFYAPELLF[-10.059]FAKR/3", # MassIVE Task USIs disguised as GNPS Task USIs "mzspec:GNPS:TASK-f4b86b150a164ee4a440b661e97a7193-spectra/specs_ms.mgf:scan:287215:HPYFYAPELLF[-10.059]FAKR/3", + # Metabolomics Workbench USIs + "mzspec:ST000003:StemCell+Data+and+Raw+Files/iPSC-T1R1:scan:3", # Legacy cases. "mzspec:GNPSTASK-c95481f0c53d42e78a61bf899e9f9adb:spectra/specs_ms.mgf:scan:1943", "mzspec:GNPSTASK-64b22841ab3548f987b3cfc18696a581:spectra/specs_ms.mgf:scan:1469",