-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
update logic for no SRR accessions and invalid samples
- Loading branch information
1 parent
26d8c49
commit 770233c
Showing
30 changed files
with
403 additions
and
42 deletions.
There are no files selected for viewing
36 changes: 36 additions & 0 deletions
36
20241122_125512_fetch_srr_accession/call-fetch_srr/command
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
|
||
set -euo pipefail | ||
|
||
# Output the current date and fastq-dl version for debugging | ||
date -u | tee DATE | ||
fastq-dl --version | tee VERSION | ||
|
||
echo "Fetching metadata for accession: SAMD00010204" | ||
|
||
# Run fastq-dl and capture stderr | ||
fastq-dl --accession SAMD00010204 --only-download-metadata -m 2 --verbose 2> stderr.log || true | ||
|
||
# Handle whether the ID/accession is valid and contains SRR metadata based on stderr | ||
if grep -q "No results found for" stderr.log; then | ||
echo "No SRR accession found" > srr_accession.txt | ||
echo "No SRR accession found for accession: SAMD00010204" | ||
elif grep -q "received an empty response" stderr.log; then | ||
echo "No SRR accession found" > srr_accession.txt | ||
echo "No SRR accession found for accession: SAMD00010204" | ||
elif grep -q "is not a Study, Sample, Experiment, or Run accession" stderr.log; then | ||
echo "Invalid accession: SAMD00010204" >&2 | ||
exit 1 | ||
elif [[ ! -f fastq-run-info.tsv ]]; then | ||
echo "No metadata file found for accession: SAMD00010204" >&2 | ||
exit 1 | ||
else | ||
# Extract SRR accessions from the TSV file if it exists | ||
SRR_accessions=$(awk -F'\t' 'NR>1 {print $1}' fastq-run-info.tsv | paste -sd ',' -) | ||
if [[ -z "${SRR_accessions}" ]]; then | ||
echo "No SRR accession found" > srr_accession.txt | ||
else | ||
echo "Extracted SRR accessions: ${SRR_accessions}" | ||
echo "${SRR_accessions}" > srr_accession.txt | ||
fi | ||
fi | ||
|
3 changes: 3 additions & 0 deletions
3
20241122_125512_fetch_srr_accession/call-fetch_srr/inputs.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
{ | ||
"sample_accession": "SAMD00010204" | ||
} |
4 changes: 4 additions & 0 deletions
4
20241122_125512_fetch_srr_accession/call-fetch_srr/outputs.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
{ | ||
"fetch_srr_accession.fastq_dl_version": "fastq-dl, version 2.0.4", | ||
"fetch_srr_accession.srr_accession": "No SRR accession found" | ||
} |
Empty file.
2 changes: 2 additions & 0 deletions
2
20241122_125512_fetch_srr_accession/call-fetch_srr/stderr.txt.offset
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
17043145 | ||
0 |
6 changes: 6 additions & 0 deletions
6
20241122_125512_fetch_srr_accession/call-fetch_srr/stdout.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
Fri Nov 22 18:55:14 UTC 2024 | ||
fastq-dl, version 2.0.4 | ||
Fetching metadata for accession: SAMD00010204 | ||
No results found for SAMD00010204 | ||
No results found for SAMD00010204 | ||
No SRR accession found for accession: SAMD00010204 |
17 changes: 17 additions & 0 deletions
17
20241122_125512_fetch_srr_accession/call-fetch_srr/task.log
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
2024-11-22 12:55:12.808 wdl.w:fetch_srr_accession.t:call-fetch_srr NOTICE task setup :: name: "fetch_srr_accession", source: "../../../tasks/utilities/data_handling/task_fetch_srr_accession.wdl", line: 3, column: 1, dir: "/home/frasc/bioinformatics_projects/public_health_bioinformatics/20241122_125512_fetch_srr_accession/call-fetch_srr", thread: 132198141265472 | ||
2024-11-22 12:55:12.958 wdl.w:fetch_srr_accession.t:call-fetch_srr NOTICE docker swarm resources :: workers: 1, max_cpus: 4, max_mem_bytes: 16767336448, total_cpus: 4, total_mem_bytes: 16767336448 | ||
2024-11-22 12:55:12.958 wdl.w:fetch_srr_accession.t:call-fetch_srr INFO input :: name: "sample_accession", value: "SAMD00010204" | ||
2024-11-22 12:55:12.959 wdl.w:fetch_srr_accession.t:call-fetch_srr INFO eval :: name: "memory", value: 8 | ||
2024-11-22 12:55:12.960 wdl.w:fetch_srr_accession.t:call-fetch_srr INFO eval :: name: "docker", value: "us-docker.pkg.dev/general-theiagen/biocontainers/fastq-dl:2.0.4--pyhdfd78af_0" | ||
2024-11-22 12:55:12.961 wdl.w:fetch_srr_accession.t:call-fetch_srr INFO eval :: name: "cpu", value: 2 | ||
2024-11-22 12:55:12.962 wdl.w:fetch_srr_accession.t:call-fetch_srr INFO eval :: name: "disk_size", value: 10 | ||
2024-11-22 12:55:12.963 wdl.w:fetch_srr_accession.t:call-fetch_srr INFO effective runtime :: docker: "us-docker.pkg.dev/general-theiagen/biocontainers/fastq-dl:2.0.4--pyhdfd78af_0", cpu: 2, memory_reservation: 8000000000, preemptible: 1 | ||
2024-11-22 12:55:12.964 wdl.w:fetch_srr_accession.t:call-fetch_srr WARNING ignored runtime settings :: keys: ["disks", "disk"] | ||
2024-11-22 12:55:12.978 wdl.w:fetch_srr_accession.t:call-fetch_srr NOTICE docker image :: tag: "us-docker.pkg.dev/general-theiagen/biocontainers/fastq-dl:2.0.4--pyhdfd78af_0", id: "sha256:c6689b7f5754d89574331af9a748cdb84e89107ecfafe8855fcdc745d41f0674", RepoDigest: "us-docker.pkg.dev/general-theiagen/biocontainers/fastq-dl@sha256:c0a1484561017e0f14e9cb8ceddfac2f28e3576a9bf1a8b743bd12183f4e38b4" | ||
2024-11-22 12:55:14.613 wdl.w:fetch_srr_accession.t:call-fetch_srr NOTICE docker task running :: service: "b752vpzdc7g1", task: "rl8xyzauey", node: "t2vz2h1tc6", message: "started" | ||
2024-11-22 12:55:40.942 wdl.w:fetch_srr_accession.t:call-fetch_srr INFO docker task complete :: service: "b752vpzdc7g1", task: "rl8xyzauey", node: "t2vz2h1tc6", message: "finished" | ||
2024-11-22 12:55:40.942 wdl.w:fetch_srr_accession.t:call-fetch_srr NOTICE docker task exit :: state: "complete", exit_code: 0 | ||
2024-11-22 12:55:41.247 wdl.w:fetch_srr_accession.t:call-fetch_srr INFO command stdout unused; consider output `File cmd_out = stdout()` or redirect command to stderr log >&2 :: stdout_file: "/home/frasc/bioinformatics_projects/public_health_bioinformatics/20241122_125512_fetch_srr_accession/call-fetch_srr/stdout.txt" | ||
2024-11-22 12:55:41.247 wdl.w:fetch_srr_accession.t:call-fetch_srr INFO output :: name: "srr_accession", value: "No SRR accession found" | ||
2024-11-22 12:55:41.248 wdl.w:fetch_srr_accession.t:call-fetch_srr INFO output :: name: "fastq_dl_version", value: "fastq-dl, version 2.0.4" | ||
2024-11-22 12:55:41.250 wdl.w:fetch_srr_accession.t:call-fetch_srr NOTICE done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Fri Nov 22 18:55:14 UTC 2024 |
1 change: 1 addition & 0 deletions
1
20241122_125512_fetch_srr_accession/call-fetch_srr/work/VERSION
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
fastq-dl, version 2.0.4 |
1 change: 1 addition & 0 deletions
1
20241122_125512_fetch_srr_accession/call-fetch_srr/work/srr_accession.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
No SRR accession found |
98 changes: 98 additions & 0 deletions
98
20241122_125512_fetch_srr_accession/call-fetch_srr/work/stderr.log
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
2024-11-22 18:55:17 DEBUG 2024-11-22 18:55:17:root:DEBUG - fastq_dl.py:500 | ||
Querying ENA for metadata (Attempt | ||
1 of 2) | ||
DEBUG 2024-11-22 connectionpool.py:1048 | ||
18:55:17:urllib3.connectionp | ||
ool:DEBUG - Starting new | ||
HTTPS connection (1): | ||
www.ebi.ac.uk:443 | ||
2024-11-22 18:55:18 DEBUG 2024-11-22 connectionpool.py:546 | ||
18:55:18:urllib3.connectionpo | ||
ol:DEBUG - | ||
https://www.ebi.ac.uk:443 | ||
"GET | ||
/ena/portal/api/search?result | ||
=read_run&format=tsv&query=%2 | ||
2(sample_accession=SAMD000102 | ||
04%20OR%20secondary_sample_ac | ||
cession=SAMD00010204)%22&fiel | ||
ds=all HTTP/1.1" 200 2973 | ||
WARNING 2024-11-22 18:55:18:root:WARNING - fastq_dl.py:531 | ||
Querying ENA was unsuccessful, | ||
retrying after (10 seconds) | ||
2024-11-22 18:55:28 DEBUG 2024-11-22 18:55:28:root:DEBUG - fastq_dl.py:504 | ||
Querying SRA for metadata (Attempt | ||
1 of 2) | ||
DEBUG 2024-11-22 connectionpool.py:1048 | ||
18:55:28:urllib3.connectionp | ||
ool:DEBUG - Starting new | ||
HTTPS connection (1): | ||
www.ebi.ac.uk:443 | ||
2024-11-22 18:55:29 DEBUG 2024-11-22 connectionpool.py:546 | ||
18:55:29:urllib3.connectionpo | ||
ol:DEBUG - | ||
https://www.ebi.ac.uk:443 | ||
"GET | ||
/ena/portal/api/search?result | ||
=read_run&format=tsv&query=%2 | ||
2(sample_accession=SAMD000102 | ||
04%20OR%20secondary_sample_ac | ||
cession=SAMD00010204)%22&fiel | ||
ds=all HTTP/1.1" 200 2973 | ||
DEBUG 2024-11-22 18:55:29:root:DEBUG - fastq_dl.py:514 | ||
Failed to get metadata from ENA. | ||
Trying SRA... | ||
DEBUG 2024-11-22 connectionpool.py:1048 | ||
18:55:29:urllib3.connectionp | ||
ool:DEBUG - Starting new | ||
HTTPS connection (1): | ||
eutils.ncbi.nlm.nih.gov:443 | ||
DEBUG 2024-11-22 connectionpool.py:546 | ||
18:55:29:urllib3.connectionpo | ||
ol:DEBUG - | ||
https://eutils.ncbi.nlm.nih.g | ||
ov:443 "POST | ||
/entrez/eutils/esearch.fcgi | ||
HTTP/1.1" 200 None | ||
WARNING 2024-11-22 18:55:29:root:WARNING - fastq_dl.py:525 | ||
Querying SRA was unsuccessful, | ||
retrying after (10 seconds) | ||
2024-11-22 18:55:39 DEBUG 2024-11-22 18:55:39:root:DEBUG - fastq_dl.py:504 | ||
Querying SRA for metadata (Attempt | ||
2 of 2) | ||
DEBUG 2024-11-22 connectionpool.py:1048 | ||
18:55:39:urllib3.connectionp | ||
ool:DEBUG - Starting new | ||
HTTPS connection (1): | ||
www.ebi.ac.uk:443 | ||
DEBUG 2024-11-22 connectionpool.py:546 | ||
18:55:39:urllib3.connectionpo | ||
ol:DEBUG - | ||
https://www.ebi.ac.uk:443 | ||
"GET | ||
/ena/portal/api/search?result | ||
=read_run&format=tsv&query=%2 | ||
2(sample_accession=SAMD000102 | ||
04%20OR%20secondary_sample_ac | ||
cession=SAMD00010204)%22&fiel | ||
ds=all HTTP/1.1" 200 2973 | ||
DEBUG 2024-11-22 connectionpool.py:1048 | ||
18:55:39:urllib3.connectionp | ||
ool:DEBUG - Starting new | ||
HTTPS connection (1): | ||
eutils.ncbi.nlm.nih.gov:443 | ||
2024-11-22 18:55:40 DEBUG 2024-11-22 connectionpool.py:546 | ||
18:55:40:urllib3.connectionpo | ||
ol:DEBUG - | ||
https://eutils.ncbi.nlm.nih.g | ||
ov:443 "POST | ||
/entrez/eutils/esearch.fcgi | ||
HTTP/1.1" 200 None | ||
ERROR 2024-11-22 18:55:40:root:ERROR - fastq_dl.py:519 | ||
There was an issue querying ENA and | ||
SRA, exiting... | ||
ERROR 2024-11-22 18:55:40:root:ERROR - fastq_dl.py:520 | ||
STATUS: 200 | ||
ERROR 2024-11-22 18:55:40:root:ERROR - fastq_dl.py:521 | ||
TEXT: Query was successful, but | ||
received an empty response |
6 changes: 6 additions & 0 deletions
6
20241122_125512_fetch_srr_accession/call-version_capture/command
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
|
||
PHB_Version="PHB v2.2.1" | ||
|
||
date +"%Y-%m-%d" > TODAY | ||
echo "$PHB_Version" > PHB_VERSION | ||
|
1 change: 1 addition & 0 deletions
1
20241122_125512_fetch_srr_accession/call-version_capture/inputs.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{} |
4 changes: 4 additions & 0 deletions
4
20241122_125512_fetch_srr_accession/call-version_capture/outputs.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
{ | ||
"version_capture.date": "2024-11-22", | ||
"version_capture.phb_version": "PHB v2.2.1" | ||
} |
Empty file.
2 changes: 2 additions & 0 deletions
2
20241122_125512_fetch_srr_accession/call-version_capture/stderr.txt.offset
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
17043147 | ||
0 |
Empty file.
11 changes: 11 additions & 0 deletions
11
20241122_125512_fetch_srr_accession/call-version_capture/task.log
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
2024-11-22 12:55:12.807 wdl.w:fetch_srr_accession.t:call-version_capture NOTICE task setup :: name: "version_capture", source: "../../../tasks/task_versioning.wdl", line: 3, column: 1, dir: "/home/frasc/bioinformatics_projects/public_health_bioinformatics/20241122_125512_fetch_srr_accession/call-version_capture", thread: 132198151751232 | ||
2024-11-22 12:55:12.960 wdl.w:fetch_srr_accession.t:call-version_capture INFO eval :: name: "docker", value: "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0" | ||
2024-11-22 12:55:12.960 wdl.w:fetch_srr_accession.t:call-version_capture INFO eval :: name: "timezone", value: null | ||
2024-11-22 12:55:12.963 wdl.w:fetch_srr_accession.t:call-version_capture INFO effective runtime :: docker: "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0", cpu: 1, memory_reservation: 1000000000, preemptible: 1 | ||
2024-11-22 12:55:12.964 wdl.w:fetch_srr_accession.t:call-version_capture WARNING ignored runtime settings :: keys: ["disks", "dx_instance_type"] | ||
2024-11-22 12:55:12.980 wdl.w:fetch_srr_accession.t:call-version_capture NOTICE docker image :: tag: "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0", id: "sha256:e5b3b43b59e1cd3267788b867d9d4c84d4ffc8236278541b3cc6963784c57a5f", RepoDigest: "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash@sha256:f62289e07dea809f88322fbed3a42057f95177e44c8622a38baf22e8113d1ab0" | ||
2024-11-22 12:55:15.058 wdl.w:fetch_srr_accession.t:call-version_capture INFO docker task complete :: service: "q7zvoncm26cc", task: "k6un27duii", node: "t2vz2h1tc6", message: "finished" | ||
2024-11-22 12:55:15.058 wdl.w:fetch_srr_accession.t:call-version_capture NOTICE docker task exit :: state: "complete", exit_code: 0 | ||
2024-11-22 12:55:15.412 wdl.w:fetch_srr_accession.t:call-version_capture INFO output :: name: "date", value: "2024-11-22" | ||
2024-11-22 12:55:15.413 wdl.w:fetch_srr_accession.t:call-version_capture INFO output :: name: "phb_version", value: "PHB v2.2.1" | ||
2024-11-22 12:55:15.415 wdl.w:fetch_srr_accession.t:call-version_capture NOTICE done |
1 change: 1 addition & 0 deletions
1
20241122_125512_fetch_srr_accession/call-version_capture/work/PHB_VERSION
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
PHB v2.2.1 |
1 change: 1 addition & 0 deletions
1
20241122_125512_fetch_srr_accession/call-version_capture/work/TODAY
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
2024-11-22 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
{ | ||
"fetch_srr_accession.sample_accession": "SAMD00010204" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
{ | ||
"fetch_srr_accession.fetch_srr_accession_analysis_date": "2024-11-22", | ||
"fetch_srr_accession.fetch_srr_accession_version": "PHB v2.2.1", | ||
"fetch_srr_accession.srr_accession": "No SRR accession found" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
pushd /home/frasc/bioinformatics_projects/public_health_bioinformatics && miniwdl run --verbose /home/frasc/bioinformatics_projects/public_health_bioinformatics/workflows/utilities/data_import/wf_fetch_srr_accession.wdl -- sample_accession=SAMD00010204; popd |
30 changes: 30 additions & 0 deletions
30
20241122_125512_fetch_srr_accession/wdl/tasks/task_versioning.wdl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
version 1.0 | ||
|
||
task version_capture { | ||
input { | ||
String? timezone | ||
String docker = "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0" | ||
} | ||
meta { | ||
volatile: true | ||
} | ||
command { | ||
PHB_Version="PHB v2.2.1" | ||
~{default='' 'export TZ=' + timezone} | ||
date +"%Y-%m-%d" > TODAY | ||
echo "$PHB_Version" > PHB_VERSION | ||
} | ||
output { | ||
String date = read_string("TODAY") | ||
String phb_version = read_string("PHB_VERSION") | ||
} | ||
runtime { | ||
memory: "1 GB" | ||
cpu: 1 | ||
docker: docker | ||
disks: "local-disk 10 HDD" | ||
dx_instance_type: "mem1_ssd1_v2_x2" | ||
preemptible: 1 | ||
} | ||
} | ||
|
65 changes: 65 additions & 0 deletions
65
...125512_fetch_srr_accession/wdl/tasks/utilities/data_handling/task_fetch_srr_accession.wdl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
version 1.0 | ||
|
||
task fetch_srr_accession { | ||
input { | ||
String sample_accession | ||
String docker = "us-docker.pkg.dev/general-theiagen/biocontainers/fastq-dl:2.0.4--pyhdfd78af_0" | ||
Int disk_size = 10 | ||
Int cpu = 2 | ||
Int memory = 8 | ||
} | ||
meta { | ||
volatile: true | ||
} | ||
|
||
command <<< | ||
set -euo pipefail | ||
|
||
# Output the current date and fastq-dl version for debugging | ||
date -u | tee DATE | ||
fastq-dl --version | tee VERSION | ||
|
||
echo "Fetching metadata for accession: ~{sample_accession}" | ||
|
||
# Run fastq-dl and capture stderr | ||
fastq-dl --accession ~{sample_accession} --only-download-metadata -m 2 --verbose 2> stderr.log || true | ||
|
||
# Handle whether the ID/accession is valid and contains SRR metadata based on stderr | ||
if grep -q "No results found for" stderr.log; then | ||
echo "No SRR accession found" > srr_accession.txt | ||
echo "No SRR accession found for accession: ~{sample_accession}" | ||
elif grep -q "received an empty response" stderr.log; then | ||
echo "No SRR accession found" > srr_accession.txt | ||
echo "No SRR accession found for accession: ~{sample_accession}" | ||
elif grep -q "is not a Study, Sample, Experiment, or Run accession" stderr.log; then | ||
echo "Invalid accession: ~{sample_accession}" >&2 | ||
exit 1 | ||
elif [[ ! -f fastq-run-info.tsv ]]; then | ||
echo "No metadata file found for accession: ~{sample_accession}" >&2 | ||
exit 1 | ||
else | ||
# Extract SRR accessions from the TSV file if it exists | ||
SRR_accessions=$(awk -F'\t' 'NR>1 {print $1}' fastq-run-info.tsv | paste -sd ',' -) | ||
if [[ -z "${SRR_accessions}" ]]; then | ||
echo "No SRR accession found" > srr_accession.txt | ||
else | ||
echo "Extracted SRR accessions: ${SRR_accessions}" | ||
echo "${SRR_accessions}" > srr_accession.txt | ||
fi | ||
fi | ||
>>> | ||
|
||
output { | ||
String srr_accession = read_string("srr_accession.txt") | ||
String fastq_dl_version = read_string("VERSION") | ||
} | ||
|
||
runtime { | ||
docker: docker | ||
memory: "~{memory} GB" | ||
cpu: cpu | ||
disks: "local-disk " + disk_size + " SSD" | ||
disk: disk_size + " GB" | ||
preemptible: 1 | ||
} | ||
} |
26 changes: 26 additions & 0 deletions
26
...125512_fetch_srr_accession/wdl/workflows/utilities/data_import/wf_fetch_srr_accession.wdl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
version 1.0 | ||
|
||
import "../../../tasks/utilities/data_handling/task_fetch_srr_accession.wdl" as srr_task | ||
import "../../../tasks/task_versioning.wdl" as versioning_task | ||
|
||
workflow fetch_srr_accession { | ||
meta { | ||
description: "This workflow retrieves the Sequence Read Archive (SRA) accession (SRR) associated with a given sample accession. It uses the fastq-dl tool to fetch metadata from SRA and outputs the SRR accession." | ||
} | ||
input { | ||
String sample_accession | ||
} | ||
call versioning_task.version_capture { | ||
input: | ||
} | ||
call srr_task.fetch_srr_accession as fetch_srr { | ||
input: | ||
sample_accession = sample_accession | ||
} | ||
output { | ||
String srr_accession = fetch_srr.srr_accession | ||
# Version Captures | ||
String fetch_srr_accession_version = version_capture.phb_version | ||
String fetch_srr_accession_analysis_date = version_capture.date | ||
} | ||
} |
Oops, something went wrong.