diff --git a/.env_template b/.env_template
new file mode 100644
index 0000000..23398c8
--- /dev/null
+++ b/.env_template
@@ -0,0 +1 @@
+HOSTNAME=de.metabolomics-usi.gnps2.org
\ No newline at end of file
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
new file mode 100644
index 0000000..c7945e3
--- /dev/null
+++ b/.github/workflows/build.yml
@@ -0,0 +1,29 @@
+name: Docker Build Test
+
+on:
+ push:
+ branches:
+ master
+ pull_request:
+ branches:
+ master
+ schedule:
+ - cron: '0 0 * * 1'
+
+jobs:
+ build-test:
+ runs-on: ubuntu-latest
+ strategy:
+ max-parallel: 4
+ matrix:
+ python-version: [3.8]
+# TODO: We probably should switch to using the Docker version.
+ steps:
+ - uses: actions/checkout@v2
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v2
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Build Docker
+ run: |
+ docker build .
diff --git a/Dockerfile b/Dockerfile
index fa855a9..d96a91d 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,14 +1,20 @@
-FROM continuumio/miniconda3:4.8.2
+FROM continuumio/miniconda3:4.10.3
MAINTAINER Mingxun Wang "mwang87@gmail.com"
WORKDIR /app
RUN apt-get update -y && \
apt-get install -y libxrender-dev && \
- apt-get install -y git-core
-RUN conda create -y -n usi -c conda-forge -c bioconda -c defaults celery \
+ apt-get install -y git-core libarchive-dev
+RUN conda install -c conda-forge mamba
+RUN mamba create -y -n usi -c conda-forge -c bioconda -c defaults celery==5.3.6 \
dash=1.20.0 dash-bootstrap-components=0.9.2 flask gunicorn \
- joblib matplotlib numba numpy openssl qrcode rdkit requests \
- requests-cache scipy spectrum_utils werkzeug
+ joblib matplotlib==3.6.3 numba numpy openssl qrcode rdkit requests \
+ requests-cache scipy spectrum_utils==0.3.5 werkzeug==2.0.0
+
+# install redis with pypi
+RUN /bin/bash -c 'source activate usi && pip install redis'
+
+# installing hash
RUN /bin/bash -c 'source activate usi && pip install "git+https://github.com/berlinguyinca/spectra-hash.git#subdirectory=python" && pip install celery-once'
RUN echo "source activate usi" > ~/.bashrc
diff --git a/Makefile b/Makefile
index 5d5f5bc..586efe7 100644
--- a/Makefile
+++ b/Makefile
@@ -23,19 +23,19 @@ clear-cache:
#Docker Compose
server-compose-interactive:
- docker-compose build
- docker-compose up
+ docker-compose --compatibility build
+ docker-compose --compatibility up
server-compose:
- docker-compose build
- docker-compose up -d
+ docker-compose --compatibility build
+ docker-compose --compatibility up -d
server-compose-production-interactive:
- docker-compose build
+ docker-compose --compatibility build
docker-compose -f docker-compose.yml -f docker-compose-production.yml --compatibility up
server-compose-production:
- docker-compose build
+ docker-compose --compatibility build
docker-compose -f docker-compose.yml -f docker-compose-production.yml --compatibility up -d
attach:
diff --git a/docker-compose-production.yml b/docker-compose-production.yml
index 5423467..9bcbe7f 100644
--- a/docker-compose-production.yml
+++ b/docker-compose-production.yml
@@ -5,29 +5,44 @@ services:
- default
- nginx-net
environment:
- VIRTUAL_HOST: metabolomics-usi.ucsd.edu,metabolomics-usi.gnps2.org
- VIRTUAL_PORT: 5087
- LETSENCRYPT_HOST: metabolomics-usi.ucsd.edu,metabolomics-usi.gnps2.org
+ VIRTUAL_HOST: ${HOSTNAME:-metabolomics-usi.gnps2.org}
+ VIRTUAL_PORT: 5000
+ LETSENCRYPT_HOST: ${HOSTNAME:-metabolomics-usi.gnps2.org}
LETSENCRYPT_EMAIL: mwang87@gmail.com
command: /app/run_server.sh
deploy:
resources:
limits:
memory: 16000M
+ logging:
+ driver: "json-file"
+ options:
+ max-size: "10m"
+ max-file: "3"
metabolomicsusi-worker:
deploy:
resources:
limits:
memory: 16000M
+ logging:
+ driver: "json-file"
+ options:
+ max-size: "10m"
+ max-file: "3"
metabolomicsusi-redis:
deploy:
resources:
limits:
memory: 4000M
+ logging:
+ driver: "json-file"
+ options:
+ max-size: "10m"
+ max-file: "3"
networks:
nginx-net:
external:
- name: nginx-net
\ No newline at end of file
+ name: nginx-net
diff --git a/docker-compose.yml b/docker-compose.yml
index a0432e7..76a343f 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -12,7 +12,7 @@ services:
- ./logs/:/app/logs:rw
networks:
- default
- restart: on-failure
+ restart: always
command: /app/run_dev_server.sh
metabolomicsusi-worker:
@@ -24,7 +24,7 @@ services:
- ./tmp:/app/tmp:rw
- ./logs:/app/logs:rw
command: /app/run_worker.sh
- restart: on-failure
+ restart: always
depends_on:
- metabolomicsusi-redis
networks:
@@ -33,10 +33,11 @@ services:
metabolomicsusi-redis:
container_name: metabolomicsusi-redis
- image: redis
+ #image: valkey/valkey:alpine3.20
+ image: redis:alpine
networks:
- default
- restart: on-failure
+ restart: always
networks:
nginx-net:
diff --git a/metabolomics_spectrum_resolver/dashinterface.py b/metabolomics_spectrum_resolver/dashinterface.py
index 5069ef2..9e5c869 100644
--- a/metabolomics_spectrum_resolver/dashinterface.py
+++ b/metabolomics_spectrum_resolver/dashinterface.py
@@ -39,6 +39,8 @@
gtag('config', 'UA-8412213-8');
+
+
{%metas%}
{%title%}
@@ -59,10 +61,10 @@
children=[
dbc.NavbarBrand(
html.Img(
- src="https://gnps-cytoscape.ucsd.edu/static/img/GNPS_logo.png",
+ src="https://gnps2.org/static/img/logo.png",
width="120px",
),
- href="https://gnps.ucsd.edu",
+ href="https://gnps2.org",
),
dbc.Nav(
[
@@ -426,7 +428,7 @@
dbc.CardHeader(html.H5("Contributors")),
dbc.CardBody(
[
- "Mingxun Wang, PhD – UC San Diego",
+ "Mingxun Wang, PhD – UC Riverside",
html.Br(),
"Wout Bittremieux, PhD – UC San Diego",
html.Br(),
diff --git a/metabolomics_spectrum_resolver/parsing.py b/metabolomics_spectrum_resolver/parsing.py
index f488e4c..9ac9cdd 100644
--- a/metabolomics_spectrum_resolver/parsing.py
+++ b/metabolomics_spectrum_resolver/parsing.py
@@ -4,6 +4,8 @@
from typing import Tuple
import requests
+import pandas as pd
+from io import StringIO
import urllib.parse
import spectrum_utils.spectrum as sus
import splash
@@ -14,7 +16,8 @@
MS2LDA_SERVER = "http://ms2lda.org/basicviz/"
MOTIFDB_SERVER = "http://ms2lda.org/motifdb/"
-MASSBANK_SERVER = "https://massbank.us/rest/spectra/"
+MONA_SERVER = "https://massbank.us/rest/spectra/"
+MASSBANKEUROPE_SERVER = "https://msbi.ipb-halle.de/MassBank-api/v1/records/"
# USI specification: http://www.psidev.info/usi
usi_pattern = re.compile(
@@ -42,8 +45,8 @@
r"^mzspec"
# collection identifier
# Unofficial proteomics spectral library identifier: MASSIVEKB
- # Metabolomics collection identifiers: GNPS, MASSBANK, MS2LDA, MOTIFDB
- r":(MASSIVEKB|GNPS|MASSBANK|MS2LDA|MOTIFDB)"
+ # Metabolomics collection identifiers: GNPS, MASSBANK, MS2LDA, MOTIFDB, MTBLS, ST
+ r":(MASSIVEKB|GNPS|GNPS2|MASSBANK|MS2LDA|MOTIFDB|TINYMASS|MTBLS\d+|ST\d{6}|)"
# msRun identifier
r":(.*)"
# index flag
@@ -90,6 +93,9 @@ def parse_usi(usi: str) -> Tuple[sus.MsmsSpectrum, str, str]:
Tuple[sus.MsmsSpectrum, str, str]
A tuple of the `MsmsSpectrum`, its source link, and its SPLASH.
"""
+ # Very basic cleanup
+ usi = str(usi).strip()
+
match = _match_usi(usi)
try:
collection = match.group(1).lower()
@@ -100,7 +106,6 @@ def parse_usi(usi: str) -> Tuple[sus.MsmsSpectrum, str, str]:
# changes, be sure to change this logic.
if (
annotation is not None
- or collection.startswith("msv")
or collection.startswith("pxd")
or collection.startswith("pxl")
or collection.startswith("rpxd")
@@ -108,14 +113,29 @@ def parse_usi(usi: str) -> Tuple[sus.MsmsSpectrum, str, str]:
or collection == "massive"
):
spectrum, source_link = _parse_msv_pxd(usi)
+ elif collection.startswith("msv"):
+ # Lets try to use GNPS2 for this first
+ try:
+ spectrum, source_link = _parse_gnps2(usi)
+ except:
+ spectrum, source_link = _parse_msv_pxd(usi)
elif collection == "gnps":
spectrum, source_link = _parse_gnps(usi)
+ elif collection == "gnps2":
+ spectrum, source_link = _parse_gnps2(usi)
+ elif collection.startswith("mtbls"):
+ # Since they don't have their own resolver, we'll go here to GNPS2 for now
+ spectrum, source_link = _parse_gnps2(usi)
elif collection == "massbank":
spectrum, source_link = _parse_massbank(usi)
elif collection == "ms2lda":
spectrum, source_link = _parse_ms2lda(usi)
elif collection == "motifdb":
spectrum, source_link = _parse_motifdb(usi)
+ elif collection.startswith("st"):
+ spectrum, source_link = _parse_metabolomics_workbench(usi)
+ elif collection.startswith("tinymass"):
+ spectrum, source_link = _parse_tinymass(usi)
else:
raise UsiError(f"Unknown USI collection: {match.group(1)}", 400)
splash_key = splash_builder.splash(
@@ -318,6 +338,14 @@ def _parse_gnps(usi: str) -> Tuple[sus.MsmsSpectrum, str]:
else:
return _parse_gnps_library(usi)
+def _parse_gnps2(usi: str) -> Tuple[sus.MsmsSpectrum, str]:
+ match = _match_usi(usi)
+ ms_run = match.group(2)
+ if ms_run.lower().startswith("task"):
+ return _parse_gnps2_task(usi)
+ else:
+ # We are likely dealing with a dataset on the GNPS2 side
+ return _parse_gnps2_dataset(usi)
# Parse GNPS clustered spectra in Molecular Networking.
def _parse_gnps_task(usi: str) -> Tuple[sus.MsmsSpectrum, str]:
@@ -358,6 +386,119 @@ def _parse_gnps_task(usi: str) -> Tuple[sus.MsmsSpectrum, str]:
raise UsiError("Unknown GNPS task USI", 404)
+# Parse GNPS2 task spectra
+def _parse_gnps2_task(usi: str) -> Tuple[sus.MsmsSpectrum, str]:
+ match = _match_usi(usi)
+ gnps_task_match = gnps_task_pattern.match(match.group(2))
+ if gnps_task_match is None:
+ raise UsiError("Incorrectly formatted GNPS2 task", 400)
+ task = gnps_task_match.group(1)
+ filename = gnps_task_match.group(2)
+ index_flag = match.group(3)
+
+ if not (index_flag.lower() == "scan" or index_flag.lower() == "nativeid"):
+ raise UsiError("Currently supported GNPS2 TASK index flags: scan and nativeId", 400)
+
+ scan = match.group(4)
+
+ # We will try in order these GNPS2 URLs to see if the task is actually there
+ gnps2_server_url_list = [
+ "https://gnps2.org",
+ "https://beta.gnps2.org",
+ "https://dev.gnps2.org",
+ "https://de.gnps2.org",
+ ]
+
+ for gnps2server_url in gnps2_server_url_list:
+ try:
+ request_url = (
+ f"{gnps2server_url}/spectrumpeaks?format=json&usi={usi}"
+ )
+ lookup_request = requests.get(request_url, timeout=timeout)
+ lookup_request.raise_for_status()
+ spectrum_dict = lookup_request.json()
+ mz, intensity = zip(*spectrum_dict["peaks"])
+ source_link = (
+ f"{gnps2server_url}/status?task={task}"
+ )
+ if "precursor_mz" in spectrum_dict:
+ precursor_mz = float(spectrum_dict["precursor_mz"])
+ charge = 0
+ else:
+ precursor_mz, charge = 0, 0
+
+ spectrum = sus.MsmsSpectrum(usi, precursor_mz, charge, mz, intensity)
+ return spectrum, source_link
+ except (requests.exceptions.HTTPError, json.decoder.JSONDecodeError):
+ pass
+
+ raise UsiError("Unknown GNPS2 task USI", 404)
+
+def _parse_gnps2_dataset(usi: str) -> Tuple[sus.MsmsSpectrum, str]:
+ match = _match_usi(usi)
+ dataset_identifier = match.group(1)
+ index_flag = match.group(3)
+ scan = match.group(4)
+
+ if not (index_flag.lower() == "scan" or index_flag.lower() == "nativeid"):
+ raise UsiError("Currently supported GNPS2 Dataset index flags: scan and nativeId", 400)
+
+ try:
+ request_url = (
+ f"https://gnps2.org/spectrumpeaks?format=json&usi={usi}"
+ )
+ lookup_request = requests.get(request_url, timeout=timeout)
+ lookup_request.raise_for_status()
+ spectrum_dict = lookup_request.json()
+ mz, intensity = zip(*spectrum_dict["peaks"])
+
+ if "MTBLS" in dataset_identifier:
+ source_link = (
+ f"https://www.ebi.ac.uk/metabolights/editor/{dataset_identifier}/descriptors"
+ )
+ elif "MSV" in dataset_identifier:
+ source_link = (
+ f"https://massive.ucsd.edu/ProteoSAFe/"
+ f"QueryMSV?id={dataset_identifier}"
+ )
+
+ if "precursor_mz" in spectrum_dict:
+ precursor_mz = float(spectrum_dict["precursor_mz"])
+ charge = 0
+ else:
+ precursor_mz, charge = 0, 0
+
+ spectrum = sus.MsmsSpectrum(usi, precursor_mz, charge, mz, intensity)
+ return spectrum, source_link
+ except (requests.exceptions.HTTPError, json.decoder.JSONDecodeError):
+ raise UsiError("Unknown GNPS2 Dataset USI", 404)
+
+# Parse TINYMASS task spectra
+def _parse_tinymass(usi: str) -> Tuple[sus.MsmsSpectrum, str]:
+ match = _match_usi(usi)
+
+ try:
+ request_url = (
+ f"https://tinymass.gnps2.org/resolve?usi={usi}"
+ )
+ lookup_request = requests.get(request_url, timeout=timeout)
+ lookup_request.raise_for_status()
+ spectrum_dict = lookup_request.json()
+ mz, intensity = zip(*spectrum_dict["peaks"])
+ source_link = (
+ f"https://tinymass.gnps2.org/resolve?usi={usi}"
+ )
+ if "precursor" in spectrum_dict:
+ precursor_mz = float(spectrum_dict["precursor"])
+ charge = 0
+ else:
+ precursor_mz, charge = 0, 0
+
+ spectrum = sus.MsmsSpectrum(usi, precursor_mz, charge, mz, intensity)
+ return spectrum, source_link
+ except (requests.exceptions.HTTPError, json.decoder.JSONDecodeError):
+ raise UsiError("Unknown Tiny Mass task USI", 404)
+
# Parse GNPS library.
def _parse_gnps_library(usi: str) -> Tuple[sus.MsmsSpectrum, str]:
match = _match_usi(usi)
@@ -369,8 +510,8 @@ def _parse_gnps_library(usi: str) -> Tuple[sus.MsmsSpectrum, str]:
index = match.group(4)
try:
request_url = (
- f"https://gnps.ucsd.edu/ProteoSAFe/"
- f"SpectrumCommentServlet?SpectrumID={index}"
+ f"https://external.gnps2.org/"
+ f"gnpsspectrum?SpectrumID={index}"
)
lookup_request = requests.get(request_url, timeout=timeout)
lookup_request.raise_for_status()
@@ -407,6 +548,23 @@ def _parse_gnps_library(usi: str) -> Tuple[sus.MsmsSpectrum, str]:
# Parse MassBank entry.
def _parse_massbank(usi: str) -> Tuple[sus.MsmsSpectrum, str]:
+ """ Parse a MassBank or MoNA USI and return the corresponding spectrum/source url.
+
+ MassBank USIs are of the form: MSBNK-[A-Za-z0-9_]{1,32}-[A-Z0-9_]{1,64}
+
+ Fall back to MoNA if MassBank EU fails to respond. Note that partial MassBank ids
+ (e.g., SM858102) will only resolve to MoNA.
+
+ Parameters
+ ----------
+ usi : str
+ The USI to be parsed.
+
+ Returns
+ -------
+ Tuple[sus.MsmsSpectrum, str]
+ The parsed spectrum and the source link.
+ """
match = _match_usi(usi)
index_flag = match.group(3)
if index_flag.lower() != "accession":
@@ -416,16 +574,63 @@ def _parse_massbank(usi: str) -> Tuple[sus.MsmsSpectrum, str]:
index = match.group(4)
# Clean up the new MassBank accessions if necessary.
massbank_accession = re.match(
- r"MSBNK-[A-Z0-9_]{1,32}-([A-Z0-9_]{1,64})", index
+ # See https://github.com/MassBank/MassBank-web/blob/main/Documentation/MassBankRecordFormat.md#211-accession
+ r"(MSBNK-[A-Za-z0-9_]{1,32}-[A-Z0-9_]{1,64})", index
)
if massbank_accession is not None:
- index = massbank_accession.group(1)
+ # It's certiainly MassBank EU/JP
+ try:
+ return _parse_massbankEurope(usi)
+
+ except UsiError:
+ pass
+
+ # Either MassBank EU Failed or it's a MoNA entry, fallback to MoNA.
+ # Let the exception propagate if it fails
+ return _parse_mona(usi)
+
+
+# Parse MONA entry.
+def _parse_mona(usi: str) -> Tuple[sus.MsmsSpectrum, str]:
+ """ Parse a MONA USI and return the corresponding spectrum. Performs a web request to
+ MONA_SERVER.
+
+ Parameters
+ ----------
+ usi : str
+ The USI to be parsed.
+
+ Globals
+ -------
+ MONA_SERVER : str
+ The base URL for the MONA server.
+
+ Returns
+ -------
+ Tuple[sus.MsmsSpectrum, str]
+ The parsed spectrum and the source link.
+
+ Raises
+ ------
+ UsiError
+ If the USI could not be parsed because it is incorrectly formatted.
+ """
+ match = _match_usi(usi)
+ index_flag = match.group(3)
+ if index_flag.lower() != "accession":
+ raise UsiError(
+ "Currently supported MassBank index flags: accession", 400
+ )
+
+ index = match.group(4)
+
try:
lookup_request = requests.get(
- f"{MASSBANK_SERVER}{index}", timeout=timeout
+ f"{MONA_SERVER}{index}", timeout=timeout
)
lookup_request.raise_for_status()
spectrum_dict = lookup_request.json()
+
mz, intensity = [], []
for peak in spectrum_dict["spectrum"].split():
peak_mz, peak_intensity = peak.split(":")
@@ -437,14 +642,82 @@ def _parse_massbank(usi: str) -> Tuple[sus.MsmsSpectrum, str]:
precursor_mz = float(metadata["value"])
break
source_link = (
- f"https://massbank.eu/MassBank/" f"RecordDisplay.jsp?id={index}"
+ f"https://massbank.us/spectra/display/{index}"
)
spectrum = sus.MsmsSpectrum(usi, precursor_mz, 0, mz, intensity)
+
return spectrum, source_link
+
except requests.exceptions.HTTPError:
raise UsiError("Unknown MassBank USI", 404)
+# Parse MassBank entry.
+def _parse_massbankEurope(usi: str) -> Tuple[sus.MsmsSpectrum, str]:
+ """ Parse a MassBank[EU|JP] USI and return the corresponding spectrum. Performs a web request to
+ MassBank Server.
+
+ Parameters
+ ----------
+ usi : str
+ The USI to be parsed.
+
+ Globals
+ -------
+ MassBank Server : str
+ The base URL for the MONA server.
+
+ Returns
+ -------
+ Tuple[sus.MsmsSpectrum, str]
+ The parsed spectrum and the source link.
+
+ Raises
+ ------
+ UsiError
+ If the USI could not be parsed because it is incorrectly formatted.
+ """
+ match = _match_usi(usi)
+ index_flag = match.group(3)
+ if index_flag.lower() != "accession":
+ raise UsiError(
+ "Currently supported MassBank index flags: accession", 400
+ )
+
+ index = match.group(4)
+
+ try:
+ # Try requesting from massbankeurope first
+ lookup_request = requests.get(
+ f"{MASSBANKEUROPE_SERVER}{index}", timeout=timeout
+ )
+
+ lookup_request.raise_for_status()
+ spectrum_dict = lookup_request.json()
+
+ # If request is successful we know it was massbankeurope and parse accordingly
+ peaks = spectrum_dict["peak"]["peak"]["values"]
+
+ mz = [peak["mz"] for peak in peaks]
+ intensity = [peak["intensity"] for peak in peaks]
+
+ precursor_mz = next(
+ (float(item["value"]) for item in spectrum_dict['mass_spectrometry']['focused_ion'] if item["subtag"] == "PRECURSOR_M/Z"),
+ 0
+ )
+
+ source_link = (
+ f"https://massbank.eu/MassBank/" f"RecordDisplay?id={index}"
+ )
+
+ spectrum = sus.MsmsSpectrum(usi, precursor_mz, 0, mz, intensity)
+ return spectrum, source_link
+
+
+ #show what error
+ except requests.exceptions.HTTPError:
+ raise UsiError("Unknown MassBank USI", 404)
+
# Parse MS2LDA from ms2lda.org.
def _parse_ms2lda(usi: str) -> Tuple[sus.MsmsSpectrum, str]:
@@ -490,29 +763,44 @@ def _parse_msv_pxd(usi: str) -> Tuple[sus.MsmsSpectrum, str]:
scan = match.group(4)
try:
lookup_url = (
- f"https://massive.ucsd.edu/ProteoSAFe/"
+ f"https://proteomics3.ucsd.edu/ProteoSAFe/"
f"QuerySpectrum?id={urllib.parse.quote_plus(usi)}"
)
lookup_request = requests.get(lookup_url, timeout=timeout)
- lookup_request.raise_for_status()
+ try:
+ lookup_request.raise_for_status()
+ except:
+ lookup_url = (
+ f"https://proteomics3.ucsd.edu/ProteoSAFe/"
+ f"QuerySpectrum?id={urllib.parse.quote_plus(usi)}"
+ )
+ lookup_request = requests.get(lookup_url, timeout=timeout)
+ lookup_request.raise_for_status()
+
lookup_json = lookup_request.json()
for spectrum_file in lookup_json["row_data"]:
+ # Checking if its an actual file we can resolve or if MSV will go to PX directly
if any(
spectrum_file["file_descriptor"].lower().endswith(extension)
for extension in ["mzml", "mzxml", "mgf"]
- ):
- request_url = (
+ ) or spectrum_file["file_descriptor"].startswith("f.ProteomeCentral"):
+ file_descriptor = spectrum_file['file_descriptor']
+ if file_descriptor.startswith("f."):
+ file_descriptor = file_descriptor[2:]
+
+ peaks_request_url = (
f"https://massive.ucsd.edu/ProteoSAFe/"
f"DownloadResultFile?"
f"task=4f2ac74ea114401787a7e96e143bb4a1&"
f"invoke=annotatedSpectrumImageText&block=0&file=FILE->"
- f"{urllib.parse.quote(spectrum_file['file_descriptor'])}"
+ f"{urllib.parse.quote(file_descriptor)}"
f"&scan={scan}&peptide=*..*&force=false&"
f"format=JSON&uploadfile=True"
)
+
try:
spectrum_request = requests.get(
- request_url, timeout=timeout
+ peaks_request_url, timeout=timeout
)
spectrum_request.raise_for_status()
spectrum_dict = spectrum_request.json()
@@ -569,6 +857,7 @@ def _parse_msv_pxd(usi: str) -> Tuple[sus.MsmsSpectrum, str]:
return spectrum, source_link
except requests.exceptions.HTTPError:
+ raise
pass
raise UsiError("Unsupported/unknown USI", 404)
@@ -596,6 +885,57 @@ def _parse_motifdb(usi: str) -> Tuple[sus.MsmsSpectrum, str]:
raise UsiError("Unknown MOTIFDB USI", 404)
+# Parse GNPS library.
+def _parse_metabolomics_workbench(usi: str) -> Tuple[sus.MsmsSpectrum, str]:
+ match = _match_usi(usi)
+ accession = match.group(1)
+ filename = match.group(2)
+ index_flag = match.group(3)
+ index = match.group(4)
+
+ if index_flag.lower() != "scan":
+ raise UsiError(
+ "Currently supported MW index flags: scan", 400
+ )
+ try:
+ request_url = (
+ f"https://www.metabolomicsworkbench.org/"
+ f"data/ms2.php?A={accession}.zip"
+ f"&F={urllib.parse.quote_plus(filename)}&S={index}"
+ )
+
+ # TODO: Do some extra exception handling if we don't find the filename directly. We might need to his another API to get the full filename
+ # Given the just the basename
+
+ lookup_request = requests.get(request_url, timeout=timeout)
+ lookup_request.raise_for_status()
+
+ response_text = lookup_request.text
+ response_text = (response_text.replace("", "").replace("
", "").lstrip().rstrip())
+
+ # Parsing the MW Response
+ precursor_mz = float(response_text.split("\n")[0].split(":")[-1].replace("\"", ""))
+ charge = int(response_text.split("\n")[2].split(":")[-1].replace("\"", ""))
+ peaks_df = pd.read_csv(StringIO(response_text), sep=r" +", skiprows=4)
+ mz = list(peaks_df["m/z"])
+ intensity = list(peaks_df["intensity"])
+
+ source_link = (
+ f"https://www.metabolomicsworkbench.org/"
+ f"data/DRCCMetadata.php?Mode=Study&StudyID={accession}&StudyType=MS&ResultType=1"
+ )
+
+ spectrum = sus.MsmsSpectrum(
+ usi,
+ float(precursor_mz),
+ int(charge),
+ mz,
+ intensity,
+ )
+ return spectrum, source_link
+ except requests.exceptions.HTTPError:
+ raise UsiError("Unknown MW USI", 404)
+
def _parse_sequence(peptide: str, peptide_clean: str) -> Tuple[str, str, list]:
# Parse out gapped sequence (e.g. X+129.04259), faking it
# with Glycine as the base residue and adding more mods to
diff --git a/metabolomics_spectrum_resolver/templates/homepage.html b/metabolomics_spectrum_resolver/templates/homepage.html
index 2e827bd..4efe0e0 100644
--- a/metabolomics_spectrum_resolver/templates/homepage.html
+++ b/metabolomics_spectrum_resolver/templates/homepage.html
@@ -80,24 +80,8 @@
-
-
-
-
-
-
-
-
@@ -136,7 +120,7 @@
diff --git a/metabolomics_spectrum_resolver/views.py b/metabolomics_spectrum_resolver/views.py
index 17becf0..61a20eb 100644
--- a/metabolomics_spectrum_resolver/views.py
+++ b/metabolomics_spectrum_resolver/views.py
@@ -625,6 +625,10 @@ def generate_qr():
qr_bytes.seek(0)
return flask.send_file(qr_bytes, "image/png")
+@blueprint.route("/robot.txt")
+def robot():
+ # Disallow all
+ return "User-agent: *\nDisallow: /", 200
@blueprint.errorhandler(Exception)
def render_error(error):
diff --git a/requirements.txt b/requirements.txt
index fdddf45..62ea414 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -19,5 +19,5 @@ requests
requests_cache
scipy
spectrum_utils
-werkzeug
-git+https://github.com/berlinguyinca/spectra-hash.git#subdirectory=python
+werkzeug==2.0.0
+git+https://github.com/berlinguyinca/spectra-hash.git#subdirectory=python
\ No newline at end of file
diff --git a/run_worker.sh b/run_worker.sh
index 2073a4f..7b9f227 100755
--- a/run_worker.sh
+++ b/run_worker.sh
@@ -3,4 +3,4 @@ source activate usi
export C_FORCE_ROOT="true"
#TODO: Make sure we don't run this worker as root
-celery -A metabolomics_spectrum_resolver.tasks worker -l info --autoscale=12,1 -Q worker --max-tasks-per-child 10 --loglevel INFO
+celery -A metabolomics_spectrum_resolver.tasks worker -l info --autoscale=16,1 -Q worker --max-tasks-per-child 10 --loglevel INFO
diff --git a/test/usi_test_data.py b/test/usi_test_data.py
index b80eee0..08357aa 100644
--- a/test/usi_test_data.py
+++ b/test/usi_test_data.py
@@ -10,6 +10,8 @@
"mzspec:GNPS:GNPS-LIBRARY:accession:CCMSLIB00005436077",
"mzspec:MASSBANK::accession:SM858102",
"mzspec:MASSBANK::accession:MSBNK-AAFC-AC000646",
+ # New Massbank identifier with lowercase
+ "mzspec:MASSBANK::accession:MSBNK-Athens_Univ-AU259904",
"mzspec:MS2LDA:TASK-190:accession:270684",
"mzspec:MOTIFDB::accession:171163",
"mzspec:MSV000082791:(-)-epigallocatechin:scan:2",
@@ -29,6 +31,8 @@
"mzspec:MassIVE:TASK-f4b86b150a164ee4a440b661e97a7193-spectra/specs_ms.mgf:scan:287215:HPYFYAPELLF[-10.059]FAKR/3",
# MassIVE Task USIs disguised as GNPS Task USIs
"mzspec:GNPS:TASK-f4b86b150a164ee4a440b661e97a7193-spectra/specs_ms.mgf:scan:287215:HPYFYAPELLF[-10.059]FAKR/3",
+ # Metabolomics Workbench USIs
+ "mzspec:ST000003:StemCell+Data+and+Raw+Files/iPSC-T1R1:scan:3",
# Legacy cases.
"mzspec:GNPSTASK-c95481f0c53d42e78a61bf899e9f9adb:spectra/specs_ms.mgf:scan:1943",
"mzspec:GNPSTASK-64b22841ab3548f987b3cfc18696a581:spectra/specs_ms.mgf:scan:1469",