From 1645651295dcec6f25f27e619d79266eaa3c1c04 Mon Sep 17 00:00:00 2001 From: Kilian Cavalotti Date: Sun, 18 Feb 2024 23:23:48 -0800 Subject: [PATCH 01/13] docs: updated facts --- includes/data/facts.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/includes/data/facts.yml b/includes/data/facts.yml index 749ded1b9..dbee2c2a1 100644 --- a/includes/data/facts.yml +++ b/includes/data/facts.yml @@ -2,9 +2,9 @@ facts: - name: users fields: - name: user accounts - value: 7085 + value: 7116 - name: PI groups - value: 1116 + value: 1117 desc: "from all Stanford's seven Schools, SLAC, Stanford Institutes, _etc._" - name: owner groups value: 201 @@ -41,7 +41,7 @@ facts: icon: plug fields: - name: kW - value: 548.42 + value: 506.39 desc: total power usage - name: PDUs value: 57 @@ -71,11 +71,11 @@ facts: - name: Slurm partitions value: 178 - name: CPU.hours/day - value: 45679 + value: 44505 desc: over **5 years** of computing in a single day - name: /month fmt: "${:,.0f}" - value: 3052120.7296 + value: 2973675.0336 desc: to run the same workload on t2.large on-demand cloud instances - name: partitions From b66c3549b82a8ae8d58645c48f3c0448d686f1ef Mon Sep 17 00:00:00 2001 From: Kilian Cavalotti Date: Wed, 21 Feb 2024 09:30:49 -0800 Subject: [PATCH 02/13] docs: updated software list --- includes/data/software.yml | 4 ++- src/docs/software/updates.xml | 54 ++++++----------------------------- 2 files changed, 12 insertions(+), 46 deletions(-) diff --git a/includes/data/software.yml b/includes/data/software.yml index 53a1a6156..eedf7bac1 100644 --- a/includes/data/software.yml +++ b/includes/data/software.yml @@ -4201,7 +4201,7 @@ software_modules: from different cohorts, or time-series from the same subject.', markedDefault: false, versionName: '12'} - categories: biology, genomics - defaultVersionName: 0.3.4 + defaultVersionName: 0.5.3 description: Dorado is a high-performance, easy-to-use, open source basecaller for Oxford Nanopore reads. package: dorado @@ -4209,6 +4209,8 @@ software_modules: versions: - {description: 'Dorado is a high-performance, easy-to-use, open source basecaller for Oxford Nanopore reads.', markedDefault: false, versionName: 0.3.4} + - {description: 'Dorado is a high-performance, easy-to-use, open source basecaller + for Oxford Nanopore reads.', markedDefault: false, versionName: 0.5.3} - categories: biology, neurology defaultVersionName: 21.3.00 description: AFNI (Analysis of Functional NeuroImages) is a set of C programs diff --git a/src/docs/software/updates.xml b/src/docs/software/updates.xml index 6c38e90ff..db091b01b 100644 --- a/src/docs/software/updates.xml +++ b/src/docs/software/updates.xml @@ -5,6 +5,15 @@ Sherlock software update feed https://www.sherlock.stanford.edu/docs/software/list + + New version: biology/dorado version 0.5.3 + Dorado is a high-performance, easy-to-use, open source basecaller for Oxford Nanopore reads. + https://github.com/nanoporetech/dorado + https://www.sherlock.stanford.edu/docs/software/list/?add:v=0.5.3#dorado + biology, genomics + kilian@stanford.edu (Kilian Cavalotti) + Wed, 21 Feb 2024 09:29:37 -0800 + New module: biology/bcl-convert version 4.2.7 The BCL Convert App generates demultiplexed FASTQ files from a run as input. @@ -203,50 +212,5 @@ kilian@stanford.edu (Kilian Cavalotti) Wed, 29 Nov 2023 10:37:32 -0800 - - New version: system/rclone version 1.65.0 - Rclone is a command line program to sync files and directories to and from - https://rclone.org - https://www.sherlock.stanford.edu/docs/software/list/?add:v=1.65.0#rclone - system, file transfer - kilian@stanford.edu (Kilian Cavalotti) - Tue, 28 Nov 2023 07:59:13 -0800 - - - New module: devel/darshan version 3.4.4 - Darshan is a scalable HPC I/O characterization tool. - https://www.mcs.anl.gov/research/projects/darshan/ - https://www.sherlock.stanford.edu/docs/software/list/?add:v=3.4.4#darshan - devel, profiling - kilian@stanford.edu (Kilian Cavalotti) - Thu, 16 Nov 2023 17:34:11 -0800 - - - New module: system/py-matlab-proxy version 0.9.1_py39 - matlab-proxy is a Python package which enables you to launch MATLAB and access it from a web browser. - https://github.com/mathworks/matlab-proxy - https://www.sherlock.stanford.edu/docs/software/list/?add:v=0.9.1_py39#py-matlab-proxy - system, tools - kilian@stanford.edu (Kilian Cavalotti) - Thu, 16 Nov 2023 13:19:14 -0800 - - - New version: system/py-globus-cli version 3.19.0_py39 - A command line wrapper over the Globus SDK for Python. - https://github.com/globus/globus-cli - https://www.sherlock.stanford.edu/docs/software/list/?add:v=3.19.0_py39#py-globus-cli - system, file transfer - kilian@stanford.edu (Kilian Cavalotti) - Thu, 16 Nov 2023 13:18:58 -0800 - - - New version: math/rstudio version 2023.09.1 - RStudio is an integrated development environment (IDE) for R. It includes a console, syntax-highlighting editor that supports direct code execution, as well as tools for plotting, history, debugging and workspace management. - http://www.rstudio.com - https://www.sherlock.stanford.edu/docs/software/list/?add:v=2023.09.1#rstudio - math, statistics - kilian@stanford.edu (Kilian Cavalotti) - Thu, 16 Nov 2023 13:18:43 -0800 - From 327602a2379422a5a08c28e51b13bb4bcbd68085 Mon Sep 17 00:00:00 2001 From: Kilian Cavalotti Date: Wed, 21 Feb 2024 10:03:31 -0800 Subject: [PATCH 03/13] docs: updated software list --- includes/data/software.yml | 6 +++++- src/docs/software/updates.xml | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/includes/data/software.yml b/includes/data/software.yml index eedf7bac1..2e6055401 100644 --- a/includes/data/software.yml +++ b/includes/data/software.yml @@ -175,7 +175,7 @@ software_modules: quality figures in a variety of hardcopy formats and interactive environments across platforms., family: matplotlib, markedDefault: true, versionName: 2.2.2_py27} - categories: viz, plotting - defaultVersionName: 2.4.1_py27 + defaultVersionName: 5.19.0_py312 description: Plotly's Python graphing library makes interactive, publication-quality graphs online. package: py-plotly @@ -183,6 +183,10 @@ software_modules: versions: - {description: 'Plotly''s Python graphing library makes interactive, publication-quality graphs online.', markedDefault: false, versionName: 2.4.1_py27} + - {description: 'Plotly''s Python graphing library makes interactive, publication-quality + graphs online.', markedDefault: false, versionName: 5.19.0_py39} + - {description: 'Plotly''s Python graphing library makes interactive, publication-quality + graphs online.', markedDefault: false, versionName: 5.19.0_py312} - categories: viz, imaging defaultVersionName: 9.2.0_py39 description: Pillow-SIMD is an optimized version of Pillow diff --git a/src/docs/software/updates.xml b/src/docs/software/updates.xml index db091b01b..c5546a045 100644 --- a/src/docs/software/updates.xml +++ b/src/docs/software/updates.xml @@ -5,6 +5,24 @@ Sherlock software update feed https://www.sherlock.stanford.edu/docs/software/list + + New version: viz/py-plotly version 5.19.0_py312 + Plotly's Python graphing library makes interactive, publication-quality graphs online. + https://plot.ly/python/ + https://www.sherlock.stanford.edu/docs/software/list/?add:v=5.19.0_py312#py-plotly + viz, plotting + kilian@stanford.edu (Kilian Cavalotti) + Wed, 21 Feb 2024 10:02:42 -0800 + + + New version: viz/py-plotly version 5.19.0_py39 + Plotly's Python graphing library makes interactive, publication-quality graphs online. + https://plot.ly/python/ + https://www.sherlock.stanford.edu/docs/software/list/?add:v=5.19.0_py39#py-plotly + viz, plotting + kilian@stanford.edu (Kilian Cavalotti) + Wed, 21 Feb 2024 10:02:42 -0800 + New version: biology/dorado version 0.5.3 Dorado is a high-performance, easy-to-use, open source basecaller for Oxford Nanopore reads. From c4dd18545d6560833cee25057470ec86c4e251d9 Mon Sep 17 00:00:00 2001 From: Kilian Cavalotti Date: Sun, 25 Feb 2024 23:24:07 -0800 Subject: [PATCH 04/13] docs: updated facts --- includes/data/facts.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/includes/data/facts.yml b/includes/data/facts.yml index dbee2c2a1..99cccbc78 100644 --- a/includes/data/facts.yml +++ b/includes/data/facts.yml @@ -2,9 +2,9 @@ facts: - name: users fields: - name: user accounts - value: 7116 + value: 7135 - name: PI groups - value: 1117 + value: 1119 desc: "from all Stanford's seven Schools, SLAC, Stanford Institutes, _etc._" - name: owner groups value: 201 @@ -41,7 +41,7 @@ facts: icon: plug fields: - name: kW - value: 506.39 + value: 596.96 desc: total power usage - name: PDUs value: 57 @@ -61,7 +61,7 @@ facts: value: 104 desc: across **2** Infiniband fabrics (EDR, HDR) - name: Infiniband cables - value: 5739 + value: 5740 desc: spanning about **30.23 km** - name: Ethernet switches value: 53 @@ -71,11 +71,11 @@ facts: - name: Slurm partitions value: 178 - name: CPU.hours/day - value: 44505 + value: 44222 desc: over **5 years** of computing in a single day - name: /month fmt: "${:,.0f}" - value: 2973675.0336 + value: 2954803.4112 desc: to run the same workload on t2.large on-demand cloud instances - name: partitions From a4df1f1216dc8dee66efbf81c32686575736d528 Mon Sep 17 00:00:00 2001 From: cagancayco Date: Tue, 27 Feb 2024 16:33:34 -0800 Subject: [PATCH 05/13] updating Rclone documentation (#146) * updating Rclone documentation --- src/docs/software/using/rclone.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/docs/software/using/rclone.md b/src/docs/software/using/rclone.md index b6e4e5566..874dc61f6 100644 --- a/src/docs/software/using/rclone.md +++ b/src/docs/software/using/rclone.md @@ -21,6 +21,13 @@ browser, so you will need to connect to Sherlock with **local port forwarding** (`ssh -L`). You only need to do this when you are configuring `rclone` for the first time. +!!! Note "Use local terminal for `rclone config`" + + **This method will not work in the Sherlock OnDemand shell.** You will need + to use your local machine's terminal to enable local port forwarding and to + allow `rclone` to communicate with your browser. On Linux and macOS, you + can use the Terminal app; on Windows, you can use the PowerShell app. + When running `rclone config` you will be prompted to enter names and values, indicated by the `>` symbol. To leave it empty, press Enter. From f7e8f975cbe5e4ce463baee544dd5b80596eacbe Mon Sep 17 00:00:00 2001 From: Kilian Cavalotti Date: Sun, 3 Mar 2024 23:24:02 -0800 Subject: [PATCH 06/13] docs: updated facts --- includes/data/facts.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/includes/data/facts.yml b/includes/data/facts.yml index 99cccbc78..d1b47660a 100644 --- a/includes/data/facts.yml +++ b/includes/data/facts.yml @@ -2,9 +2,9 @@ facts: - name: users fields: - name: user accounts - value: 7135 + value: 7155 - name: PI groups - value: 1119 + value: 1121 desc: "from all Stanford's seven Schools, SLAC, Stanford Institutes, _etc._" - name: owner groups value: 201 @@ -41,7 +41,7 @@ facts: icon: plug fields: - name: kW - value: 596.96 + value: 548.15 desc: total power usage - name: PDUs value: 57 @@ -61,7 +61,7 @@ facts: value: 104 desc: across **2** Infiniband fabrics (EDR, HDR) - name: Infiniband cables - value: 5740 + value: 5737 desc: spanning about **30.23 km** - name: Ethernet switches value: 53 @@ -71,11 +71,11 @@ facts: - name: Slurm partitions value: 178 - name: CPU.hours/day - value: 44222 - desc: over **5 years** of computing in a single day + value: 41412 + desc: over **4 years** of computing in a single day - name: /month fmt: "${:,.0f}" - value: 2954803.4112 + value: 2766987.1616 desc: to run the same workload on t2.large on-demand cloud instances - name: partitions From e63790745bc3d0ba975af7aa1d79f5254a3b420b Mon Sep 17 00:00:00 2001 From: Kilian Cavalotti Date: Wed, 6 Mar 2024 14:18:08 -0800 Subject: [PATCH 07/13] docs: updated software list --- includes/data/software.yml | 22 ++++++++++++++++- src/docs/software/updates.xml | 45 ++++++++++++++--------------------- 2 files changed, 39 insertions(+), 28 deletions(-) diff --git a/includes/data/software.yml b/includes/data/software.yml index 2e6055401..2cfa4fc38 100644 --- a/includes/data/software.yml +++ b/includes/data/software.yml @@ -2302,6 +2302,9 @@ software_modules: - {description: The SciPy library provides many user-friendly and efficient numerical routines such as routines for numerical integration and optimization., family: scipy, markedDefault: false, versionName: 1.10.1_py39} + - {description: The SciPy library provides many user-friendly and efficient + numerical routines such as routines for numerical integration and optimization., + family: scipy, markedDefault: false, versionName: 1.12.0_py312} - {description: The SciPy library provides many user-friendly and efficient numerical routines such as routines for numerical integration and optimization., family: scipy, markedDefault: true, versionName: 1.1.0_py27} @@ -3710,6 +3713,12 @@ software_modules: properties: arch: {gpu: 1} versionName: 4.7.0 + - description: OpenCV (Open Source Computer Vision Library) is an open source + computer vision and machine learning software library. + markedDefault: false + properties: + arch: {gpu: 1} + versionName: 4.9.0 - description: OpenCV (Open Source Computer Vision Library) is an open source computer vision and machine learning software library. markedDefault: true @@ -5155,7 +5164,7 @@ software_modules: - {description: A Python framework for the analysis and visualization of trees., markedDefault: false, versionName: 3.0.0_py27} - categories: biology, genomics - defaultVersionName: 0.12.0 + defaultVersionName: 1.10.0 description: Highly-accurate & wicked fast transcript-level quantification from RNA-seq reads using lightweight alignments. package: salmon @@ -5164,6 +5173,9 @@ software_modules: - {description: Highly-accurate & wicked fast transcript-level quantification from RNA-seq reads using lightweight alignments., markedDefault: false, versionName: 0.12.0} + - {description: Highly-accurate & wicked fast transcript-level quantification + from RNA-seq reads using lightweight alignments., markedDefault: false, + versionName: 1.10.0} - categories: biology, genomics defaultVersionName: 1.1.3_py27 description: Fit-Hi-C is a tool for assigning statistical confidence estimates @@ -7097,6 +7109,14 @@ software_modules: properties: arch: {gpu: 1} versionName: 12.2.0 + - description: CUDA is a parallel computing platform and application programming + interface (API) model created by Nvidia. It allows software developers and + software engineers to use a CUDA-enabled graphics processing unit (GPU) + for general purpose processing. + markedDefault: false + properties: + arch: {gpu: 1} + versionName: 12.4.0 - description: CUDA is a parallel computing platform and application programming interface (API) model created by Nvidia. It allows software developers and software engineers to use a CUDA-enabled graphics processing unit (GPU) diff --git a/src/docs/software/updates.xml b/src/docs/software/updates.xml index c5546a045..de3debadf 100644 --- a/src/docs/software/updates.xml +++ b/src/docs/software/updates.xml @@ -5,6 +5,24 @@ Sherlock software update feed https://www.sherlock.stanford.edu/docs/software/list + + New version: biology/salmon version 1.10.0 + Highly-accurate & wicked fast transcript-level quantification from RNA-seq reads using lightweight alignments. + https://combine-lab.github.io/salmon + https://www.sherlock.stanford.edu/docs/software/list/?add:v=1.10.0#salmon + biology, genomics + kilian@stanford.edu (Kilian Cavalotti) + Wed, 6 Mar 2024 14:17:16 -0800 + + + New version: devel/cuda version 12.4.0 + CUDA is a parallel computing platform and application programming interface (API) model created by Nvidia. It allows software developers and software engineers to use a CUDA-enabled graphics processing unit (GPU) for general purpose processing. + https://developer.nvidia.com/cuda-toolkit + https://www.sherlock.stanford.edu/docs/software/list/?add:v=12.4.0#cuda + devel, language + kilian@stanford.edu (Kilian Cavalotti) + Tue, 5 Mar 2024 14:39:10 -0800 + New version: viz/py-plotly version 5.19.0_py312 Plotly's Python graphing library makes interactive, publication-quality graphs online. @@ -203,32 +221,5 @@ kilian@stanford.edu (Kilian Cavalotti) Mon, 8 Jan 2024 14:23:50 -0800 - - New version: physics/geos version 3.12.1 - GEOS is a C/C++ library for computational geometry with a focus on algorithms used in geographic information systems (GIS) software. - https://libgeos.org - https://www.sherlock.stanford.edu/docs/software/list/?add:v=3.12.1#geos - physics, geoscience - kilian@stanford.edu (Kilian Cavalotti) - Tue, 12 Dec 2023 11:17:10 -0800 - - - New version: biology/kallisto version 0.50.1 - kallisto is a program for quantifying abundances of transcripts from RNA-Seq data using high-throughput sequencing reads. - https://pachterlab.github.io/kallisto/ - https://www.sherlock.stanford.edu/docs/software/list/?add:v=0.50.1#kallisto - biology, genomics - kilian@stanford.edu (Kilian Cavalotti) - Wed, 29 Nov 2023 13:50:00 -0800 - - - New version: system/py-matlab-proxy version 0.10.0_py39 - matlab-proxy is a Python package which enables you to launch MATLAB and access it from a web browser. - https://github.com/mathworks/matlab-proxy - https://www.sherlock.stanford.edu/docs/software/list/?add:v=0.10.0_py39#py-matlab-proxy - system, tools - kilian@stanford.edu (Kilian Cavalotti) - Wed, 29 Nov 2023 10:37:32 -0800 - From 56c8721d6da326a6541c07cae42698e8de6439e6 Mon Sep 17 00:00:00 2001 From: Kilian Cavalotti Date: Sun, 10 Mar 2024 23:23:57 -0700 Subject: [PATCH 08/13] docs: updated facts --- includes/data/facts.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/includes/data/facts.yml b/includes/data/facts.yml index d1b47660a..96f6872b9 100644 --- a/includes/data/facts.yml +++ b/includes/data/facts.yml @@ -2,9 +2,9 @@ facts: - name: users fields: - name: user accounts - value: 7155 + value: 7189 - name: PI groups - value: 1121 + value: 1124 desc: "from all Stanford's seven Schools, SLAC, Stanford Institutes, _etc._" - name: owner groups value: 201 @@ -41,7 +41,7 @@ facts: icon: plug fields: - name: kW - value: 548.15 + value: 558.16 desc: total power usage - name: PDUs value: 57 @@ -61,7 +61,7 @@ facts: value: 104 desc: across **2** Infiniband fabrics (EDR, HDR) - name: Infiniband cables - value: 5737 + value: 5739 desc: spanning about **30.23 km** - name: Ethernet switches value: 53 @@ -71,11 +71,11 @@ facts: - name: Slurm partitions value: 178 - name: CPU.hours/day - value: 41412 + value: 41145 desc: over **4 years** of computing in a single day - name: /month fmt: "${:,.0f}" - value: 2766987.1616 + value: 2749185.3376 desc: to run the same workload on t2.large on-demand cloud instances - name: partitions From fa48ab52f630acef04d194c57c5e28222281b167 Mon Sep 17 00:00:00 2001 From: Kilian Cavalotti Date: Sun, 17 Mar 2024 23:23:47 -0700 Subject: [PATCH 09/13] docs: updated facts --- includes/data/facts.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/includes/data/facts.yml b/includes/data/facts.yml index 96f6872b9..f5fab26ce 100644 --- a/includes/data/facts.yml +++ b/includes/data/facts.yml @@ -2,9 +2,9 @@ facts: - name: users fields: - name: user accounts - value: 7189 + value: 7190 - name: PI groups - value: 1124 + value: 1126 desc: "from all Stanford's seven Schools, SLAC, Stanford Institutes, _etc._" - name: owner groups value: 201 @@ -41,7 +41,7 @@ facts: icon: plug fields: - name: kW - value: 558.16 + value: 574.39 desc: total power usage - name: PDUs value: 57 @@ -61,8 +61,8 @@ facts: value: 104 desc: across **2** Infiniband fabrics (EDR, HDR) - name: Infiniband cables - value: 5739 - desc: spanning about **30.23 km** + value: 5733 + desc: spanning about **30.14 km** - name: Ethernet switches value: 53 - name: scheduler @@ -71,11 +71,11 @@ facts: - name: Slurm partitions value: 178 - name: CPU.hours/day - value: 41145 + value: 41542 desc: over **4 years** of computing in a single day - name: /month fmt: "${:,.0f}" - value: 2749185.3376 + value: 2775676.0256 desc: to run the same workload on t2.large on-demand cloud instances - name: partitions From 67cf2c44432d800a97a74ab53967a2c3a4f5acc1 Mon Sep 17 00:00:00 2001 From: Kilian Cavalotti Date: Wed, 20 Mar 2024 11:02:49 -0700 Subject: [PATCH 10/13] pyspelling config: refactor ignores --- .github/workflows/config/spellcheck.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/config/spellcheck.yml b/.github/workflows/config/spellcheck.yml index c73e4d63d..d154de0a6 100644 --- a/.github/workflows/config/spellcheck.yml +++ b/.github/workflows/config/spellcheck.yml @@ -3,6 +3,7 @@ matrix: aspell: lang: en d: en_US + mode: markdown ignore-case: true dictionary: wordlists: @@ -15,6 +16,9 @@ matrix: - open: ':' content: '[\w-]+' close: ':' + # ignore attr_list {: .* :} + - open: '{:' + close: ':}' - pyspelling.filters.markdown: markdown_extensions: - pymdownx.superfences @@ -26,10 +30,7 @@ matrix: - title - alt ignores: - - ':matches(code, pre)' - - code - - pre - - img + - 'code, pre, img' - pyspelling.filters.url: sources: - '**/*.md' From ed9fb613a01fa8aff7a72bb6a4e4f8b1b784842a Mon Sep 17 00:00:00 2001 From: Kilian Cavalotti Date: Wed, 20 Mar 2024 11:03:48 -0700 Subject: [PATCH 11/13] storage: add bit about ncdu --- src/docs/storage/index.md | 63 +++++++++++++++++++++++++++++++++++---- 1 file changed, 58 insertions(+), 5 deletions(-) diff --git a/src/docs/storage/index.md b/src/docs/storage/index.md index 6bef605ed..d4c0344a8 100644 --- a/src/docs/storage/index.md +++ b/src/docs/storage/index.md @@ -1,7 +1,7 @@ # Storage on Sherlock Sherlock provides access to several file systems, each with distinct storage -characteristics. Each user and PI group get access to a set of pre-defined +characteristics. Each user and PI group get access to a set of predefined directories in these file systems to store their data. !!! danger "Sherlock is a compute cluster, not a storage system" @@ -26,7 +26,7 @@ and for some of them, purge policies (time-residency limits). | Name | Type | Backups / Snapshots | Performance | Purpose | Cost | | ------------------------ |--------------------- |------------------ | ----------- | ------- | ---- | -|`$HOME`, `$GROUP_HOME` | [NFS][url_NFS] | :fontawesome-solid-check:{: .chk_yes :} / :fontawesome-solid-check:{: .chk_yes :} | low | small, important files (source code, executables, configuration files...) | free | +|`$HOME`, `$GROUP_HOME` | [NFS][url_NFS] | :fontawesome-solid-check:{: .chk_yes :} / :fontawesome-solid-check:{: .chk_yes :} | low | small, important files (source code, executable files, configuration files...) | free | |`$SCRATCH`, `$GROUP_SCRATCH` | [Lustre][url_lustre] | :fontawesome-solid-xmark:{: .chk_no :} / :fontawesome-solid-xmark:{: .chk_no :} | high bandwidth | large, temporary files (checkpoints, raw application output...) | free | |`$L_SCRATCH` | local SSD | :fontawesome-solid-xmark:{: .chk_no :} / :fontawesome-solid-xmark:{: .chk_no :} | low latency, high IOPS | job specific output requiring high IOPS | free | |`$OAK` | [Lustre][url_lustre] | option / :fontawesome-solid-xmark:{: .chk_no :} | moderate | long term storage of research data | volume-based[^oak_sd] | @@ -86,12 +86,12 @@ Retention types: * **job lifetime**: files are only kept for the duration of the job and are automatically purged when the job ends. -!!! info "Global failsafe user and quota groups on `/scratch`" +!!! info "Global fail-safe user and quota groups on `/scratch`" To prevent potential issues which would result in the file system filling up completely and making it unusable for everyone, additional user and group-level quotas are in place on the `/scratch` file system, as a - failsafe: + fail-safe: * a user will not be able to use more than 250 TB (50M inodes) in total, in all the `/scratch` directories they have access to. @@ -176,9 +176,59 @@ $ sh_quota -f SCRATCH -j } ``` + +#### Locating large directories + +It's not always easy to identify files and directories that take the most space +when getting close to the quota limits. Some tools can help with that. + +* [`du`][url_du] can be used to display the volume used by files and + directories, in a given folder: + + ``` none + $ cd mydir/ + $ du --human-readable --summarize * + 101M dir + 2.0M file + ``` + + !!! note + + `du` will ignore hidden entries (everything that starts with a dot (`.`)). + So when using it in your `$HOME` directory, it will skip things like + `.cache` or `.conda`, which can contain significant volumes. + + +* [`ncdu`][url_ncdu] is an interactive disk usage analyzer, that generates + visual representation of the volume (and inode count) for directories. To run + it, you need to load the `ncdu` module, and then run it on your directory of + choice: + + ``` none + $ ml system ncdu + $ ncdu $HOME + ``` + + For very large directories, running `ncdu` in an interactive shell on a + compute node is recommended, via [`sh_dev`][url_sh_dev]. + + You'll been there presented with an interactive file browser, showing + information about the volume used by your directories, which should make easy + to pinpoint where most space is used. + +!!! info + + Note that any tool you use to view directory contents will only be able to + show files that your user account has read access to. So on group-shared + spaces, if you see a major difference between the totals from a tool like + `ncdu` and the information reported by `sh_quota`, that can be an indicator + that one of your group members has restricted permissions on a large number + of items in your space. + + ## Where should I store my files? -!!! important "Not all filesystems are equivalent" +!!! warning "Not all filesystems are equivalent" Choosing the appropriate storage location for your files is an essential step towards making your utilization of the cluster the most efficient @@ -247,6 +297,9 @@ Transfer][url_data_sshfs] page. [url_oak]: //uit.stanford.edu/service/oak-storage [url_data_sshfs]: /docs/storage/data-transfer#sshfs [url_purge]: /docs/storage/filesystems/#expiration-policy +[url_du]: //www.gnu.org/software/coreutils/manual/html_node/du-invocation.html#du-invocation +[url_ncdu]: //dev.yorhel.nl/ncdu +[url_sh_dev]: /docs/user-guide/running-jobs/#interactive-jobs [comment]: # (footnotes -----------------------------------------------------) From d97ede3ef19848e60bed000e441b8b17ee257bee Mon Sep 17 00:00:00 2001 From: Kilian Cavalotti Date: Wed, 20 Mar 2024 11:05:55 -0700 Subject: [PATCH 12/13] update wordlist --- .github/workflows/config/spellcheck.wordlist.txt | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/config/spellcheck.wordlist.txt b/.github/workflows/config/spellcheck.wordlist.txt index 1b1269bbd..25cb27f68 100644 --- a/.github/workflows/config/spellcheck.wordlist.txt +++ b/.github/workflows/config/spellcheck.wordlist.txt @@ -70,3 +70,10 @@ GeForce unsatisfiable reproducibility Skylake +inode +inodes +IOPS +Lustre +JSON +NFS +SSD From 9826ff4404e28a092b2b388aba39046c02cbc64e Mon Sep 17 00:00:00 2001 From: James Douglass Date: Thu, 21 Mar 2024 16:19:09 -0700 Subject: [PATCH 13/13] Adding instructions on mounting SSHFS to the sherlock docs. (#148) --- src/docs/storage/data-transfer.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/docs/storage/data-transfer.md b/src/docs/storage/data-transfer.md index 12a875f51..a7e916094 100644 --- a/src/docs/storage/data-transfer.md +++ b/src/docs/storage/data-transfer.md @@ -219,6 +219,18 @@ and then should not be typed in). $ umount ~/sherlock_home ``` + On Windows, once SSHFS is installed, you can mount the `$SCRATCH` + filesystem as a network drive through the windows file explorer. To do + this, go to "This PC", right-click in the "Network Locations" section of + the window and select "Add a Network Drive". Then, in the "Add Network + Location Wizard", you would use the following network address: + + ``` + \\sshfs\@dtn.sherlock.stanford.edu + ``` + + This will mount the `$SCRATCH` partition as a network drive on your PC. + For more information about using SSHFS on your local machine, you can refer to this [tutorial][url_sshfs_tuto] for more details and examples.