Skip to content

Commit

Permalink
WIP specify threads for compression
Browse files Browse the repository at this point in the history
  • Loading branch information
jameshadfield committed Jun 18, 2024
1 parent de35468 commit fdfb83d
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 5 deletions.
10 changes: 6 additions & 4 deletions vendored/upload-to-s3
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ main() {

local src="${1:?A source file is required as the first argument.}"
local dst="${2:?A destination s3:// URL is required as the second argument.}"
local cloudfront_domain="${3:-}"
# local cloudfront_domain="${3:-}"
local cloudfront_domain=""
local threads="${3:?Need 3 positional arguments for this prototype.}"

local s3path="${dst#s3://}"
local bucket="${s3path%%/*}"
Expand All @@ -32,13 +34,13 @@ main() {
# The record count may have changed
src_record_count="$(wc -l < "$src")"

echo "Uploading $src$dst"
echo "Uploading $src$dst using ${threads} threads"
if [[ "$dst" == *.gz ]]; then
gzip -c "$src"
elif [[ "$dst" == *.xz ]]; then
xz -2 -T0 -c "$src"
xz -2 -T${threads} -c "$src"
elif [[ "$dst" == *.zst ]]; then
zstd -T0 -c "$src"
zstd -T${threads} -c "$src"
else
cat "$src"
fi | aws s3 cp --no-progress - "$dst" --metadata sha256sum="$src_hash",recordcount="$src_record_count" "$(content-type "$dst")"
Expand Down
11 changes: 10 additions & 1 deletion workflow/snakemake_rules/upload.smk
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,13 @@ def compute_files_to_upload():

files_to_upload = compute_files_to_upload()

def compression_threads(w):
## PROTOTYPE ##
if w.remote_filename.startswith("aligned.fasta"): return 7
if w.remote_filename.startswith("sequences.fasta"): return 7
if w.remote_filename.startswith("gisaid.ndjson"): return 7
return 1

rule upload_single:
input:
file_to_upload = lambda w: files_to_upload[w.remote_filename],
Expand All @@ -91,13 +98,15 @@ rule upload_single:
quiet = "" if send_notifications else "--quiet",
s3_bucket = config.get("s3_dst",""),
cloudfront_domain = config.get("cloudfront_domain", ""),
threads: compression_threads
shell:
"""
./vendored/upload-to-s3 \
{params.quiet} \
{input.file_to_upload:q} \
{params.s3_bucket:q}/{wildcards.remote_filename:q} \
{params.cloudfront_domain} 2>&1 | tee {output}
{params.cloudfront_domain} \
{threads} 2>&1 | tee {output}
"""

rule remove_rerun_touchfile:
Expand Down

0 comments on commit fdfb83d

Please sign in to comment.