From cf03b1d2e1b3915d37c3e8411f81845355043e0c Mon Sep 17 00:00:00 2001 From: Mitchell Robert Vollger Date: Mon, 10 Jun 2024 22:26:07 -0700 Subject: [PATCH] feat: starting the book --- docs/book.toml | 4 +- docs/make_help_docs.sh | 1 + docs/src/creating/fire.md | 9 ++- docs/src/help.md | 148 +++++++++++++++++++++++--------------- 4 files changed, 102 insertions(+), 60 deletions(-) diff --git a/docs/book.toml b/docs/book.toml index 47d46a46..1920a21b 100644 --- a/docs/book.toml +++ b/docs/book.toml @@ -29,6 +29,6 @@ renderer = ["html"] max-level = 4 -[preprocessor.auto-links] -command = "python auto-links.py" +#[preprocessor.auto-links] +#command = "python auto-links.py" diff --git a/docs/make_help_docs.sh b/docs/make_help_docs.sh index 25e6bc01..9d0ed32b 100755 --- a/docs/make_help_docs.sh +++ b/docs/make_help_docs.sh @@ -6,6 +6,7 @@ export DYLD_LIBRARY_PATH=${LIBTORCH}/lib:$LD_LIBRARY_PATH out="src/help.md" echo "# Help pages for fibertools subcommands" >$out +echo "" >>$out echo "" >>$out for subcommand in "" "predict-m6a" "fire" "extract" "center" "add-nucleosomes" "footprint" "clear-kinetics" "strip-basemods" "track-decorators" "pileup"; do diff --git a/docs/src/creating/fire.md b/docs/src/creating/fire.md index 169ef546..5b6eb34f 100644 --- a/docs/src/creating/fire.md +++ b/docs/src/creating/fire.md @@ -5,4 +5,11 @@ This command identifies **Fiber-seq Inferred Re This command can be run in isolation; however, it is usually preferable to run the [FIRE pipeline](https://github.com/fiberseq/FIRE), which runs `ft fire` and performs many additional analyses and visualizations. -[**The help page**](../help.md#ft-fire). \ No newline at end of file +[**The help page**](../help.md#ft-fire). + +## Extracting from a FIRE BAM +`ft fire` can also be used as an extraction tool to extract Fiber-seq data from an already processed FIRE BAM file. +```bash +ft fire --extract fire.bam > all.bed +``` +This produces a file in BED format that contains all the MSPs, FIREs, and nucleosomes in the FIRE BAM file. This command produces output analogous to the [now removed](https://github.com/fiberseq/FIRE/issues/24) `model.results.bed.gz` result from older versions of FIRE pipeline. diff --git a/docs/src/help.md b/docs/src/help.md index e06db81b..fe5a3e53 100644 --- a/docs/src/help.md +++ b/docs/src/help.md @@ -1,4 +1,5 @@ # Help pages for fibertools subcommands + ## `ft ` ```console @@ -7,13 +8,16 @@ Fiber-seq toolkit in rust Usage: ft [OPTIONS] Commands: - predict-m6a Predict m6A positions using HiFi kinetics data and encode the results in the MM and ML bam tags. Also adds - nucleosome (nl, ns) and MTase sensitive patches (al, as) [aliases: m6A, m6a] + predict-m6a Predict m6A positions using HiFi kinetics data and encode the results in the + MM and ML bam tags. Also adds nucleosome (nl, ns) and MTase sensitive + patches (al, as) [aliases: m6A, m6a] add-nucleosomes Add nucleosomes to a bam file with m6a predictions - fire Add FIREs (Fiber-seq Inferred Regulatory Elements) to a bam file with m6a predictions + fire Add FIREs (Fiber-seq Inferred Regulatory Elements) to a bam file with m6a + predictions extract Extract fiberseq data into plain text files [aliases: ex, e] - center This command centers fiberseq data around given reference positions. This is useful for making aggregate m6A - and CpG observations, as well as visualization of SVs [aliases: c, ct] + center This command centers fiberseq data around given reference positions. This is + useful for making aggregate m6A and CpG observations, as well as + visualization of SVs [aliases: c, ct] footprint Infer footprints from fiberseq data track-decorators Make decorated bed files for fiberseq data pileup Make a pileup track of Fiber-seq features from a FIRE bam @@ -35,14 +39,15 @@ Debug-Options: ## `ft predict-m6a` ```console -Predict m6A positions using HiFi kinetics data and encode the results in the MM and ML bam tags. Also adds nucleosome (nl, ns) and -MTase sensitive patches (al, as) +Predict m6A positions using HiFi kinetics data and encode the results in the MM and ML bam tags. +Also adds nucleosome (nl, ns) and MTase sensitive patches (al, as) Usage: ft predict-m6a [OPTIONS] [BAM] [OUT] Arguments: - [BAM] Input BAM file. If no path is provided stdin is used. For m6A prediction, this should be a HiFi bam file with kinetics data. - For other commands, this should be a bam file with m6A calls [default: -] + [BAM] Input BAM file. If no path is provided stdin is used. For m6A prediction, this should be a + HiFi bam file with kinetics data. For other commands, this should be a bam file with m6A + calls [default: -] [OUT] Output bam file with m6A calls in new/extended MM and ML bam tags [default: -] Options: @@ -62,7 +67,8 @@ Options: Print version BAM-Options: - -F, --filter BAM bit flags to filter on, equivalent to `-F` in samtools view [default: 0] + -F, --filter BAM bit flags to filter on, equivalent to `-F` in samtools view [default: + 0] --ml Minium score in the ML tag to use or include in the output [default: 125] Global-Options: @@ -73,9 +79,12 @@ Debug-Options: --quiet Turn off all logging Developer-Options: - --force-min-ml-score Force a different minimum ML score - --all-calls Keep all m6A calls regardless of how low the ML value is - -b, --batch-size Number of reads to include in batch prediction [default: 1] + --force-min-ml-score + Force a different minimum ML score + --all-calls + Keep all m6A calls regardless of how low the ML value is + -b, --batch-size + Number of reads to include in batch prediction [default: 1] ``` ## `ft fire` @@ -85,10 +94,11 @@ Add FIREs (Fiber-seq Inferred Regulatory Elements) to a bam file with m6a predic Usage: ft fire [OPTIONS] [BAM] [OUT] Arguments: - [BAM] Input BAM file. If no path is provided stdin is used. For m6A prediction, this should be a HiFi bam file with kinetics data. - For other commands, this should be a bam file with m6A calls [default: -] - [OUT] Output file (BAM by default, table of MSP features if `--feats-to-text` is used, and bed9 + if `--extract`` is used) - [default: -] + [BAM] Input BAM file. If no path is provided stdin is used. For m6A prediction, this should be a + HiFi bam file with kinetics data. For other commands, this should be a bam file with m6A + calls [default: -] + [OUT] Output file (BAM by default, table of MSP features if `--feats-to-text` is used, and bed9 + + if `--extract`` is used) [default: -] Options: -e, --extract @@ -102,18 +112,21 @@ Options: --min-msp Skip reads without at least `N` MSP calls [env: MIN_MSP=] [default: 0] --min-ave-msp-size - Skip reads without an average MSP size greater than `N` [env: MIN_AVE_MSP_SIZE=] [default: 0] + Skip reads without an average MSP size greater than `N` [env: MIN_AVE_MSP_SIZE=] [default: + 0] -w, --width-bin Width of bin for feature collection [env: WIDTH_BIN=] [default: 40] -b, --bin-num Number of bins to collect [env: BIN_NUM=] [default: 9] --best-window-size - Calculate stats for the highest X bp window within each MSP Should be a fair amount higher than the expected linker length - [env: BEST_WINDOW_SIZE=] [default: 100] + Calculate stats for the highest X bp window within each MSP Should be a fair amount higher + than the expected linker length [env: BEST_WINDOW_SIZE=] [default: 100] --min-msp-length-for-positive-fire-call - Minium length of msp to call a FIRE [env: MIN_MSP_LENGTH_FOR_POSITIVE_FIRE_CALL=] [default: 85] + Minium length of msp to call a FIRE [env: MIN_MSP_LENGTH_FOR_POSITIVE_FIRE_CALL=] + [default: 85] --model - Optional path to a model json file. If not provided ft will use the default model (recommended) [env: FIRE_MODEL=] + Optional path to a model json file. If not provided ft will use the default model + (recommended) [env: FIRE_MODEL=] --fdr-table Optional path to a FDR table [env: FDR_TABLE=] -h, --help @@ -122,7 +135,8 @@ Options: Print version BAM-Options: - -F, --filter BAM bit flags to filter on, equivalent to `-F` in samtools view [default: 0] + -F, --filter BAM bit flags to filter on, equivalent to `-F` in samtools view [default: + 0] --ml Minium score in the ML tag to use or include in the output [default: 125] Global-Options: @@ -140,8 +154,9 @@ Extract fiberseq data into plain text files Usage: ft extract [OPTIONS] [BAM] Arguments: - [BAM] Input BAM file. If no path is provided stdin is used. For m6A prediction, this should be a HiFi bam file with kinetics data. - For other commands, this should be a bam file with m6A calls [default: -] + [BAM] Input BAM file. If no path is provided stdin is used. For m6A prediction, this should be a + HiFi bam file with kinetics data. For other commands, this should be a bam file with m6A + calls [default: -] Options: -r, --reference Report in reference sequence coordinates @@ -155,7 +170,8 @@ Options: -V, --version Print version BAM-Options: - -F, --filter BAM bit flags to filter on, equivalent to `-F` in samtools view [default: 0] + -F, --filter BAM bit flags to filter on, equivalent to `-F` in samtools view [default: + 0] --ml Minium score in the ML tag to use or include in the output [default: 125] Global-Options: @@ -172,21 +188,25 @@ All-Format-Options: ## `ft center` ```console -This command centers fiberseq data around given reference positions. This is useful for making aggregate m6A and CpG observations, as -well as visualization of SVs +This command centers fiberseq data around given reference positions. This is useful for making +aggregate m6A and CpG observations, as well as visualization of SVs Usage: ft center [OPTIONS] --bed [BAM] Arguments: - [BAM] Input BAM file. If no path is provided stdin is used. For m6A prediction, this should be a HiFi bam file with kinetics data. - For other commands, this should be a bam file with m6A calls [default: -] + [BAM] Input BAM file. If no path is provided stdin is used. For m6A prediction, this should be a + HiFi bam file with kinetics data. For other commands, this should be a bam file with m6A + calls [default: -] Options: - -b, --bed Bed file on which to center fiberseq reads. Data is adjusted to the start position of the bed file and corrected - for strand if the strand is indicated in the 6th column of the bed file. The 4th column will also be checked for - the strand but only after the 6th is. If you include strand information in the 4th (or 6th) column it will - orient data accordingly and use the end position of bed record instead of the start if on the minus strand. This - means that profiles of motifs in both the forward and minus orientation will align to the same central position + -b, --bed Bed file on which to center fiberseq reads. Data is adjusted to the start + position of the bed file and corrected for strand if the strand is indicated in + the 6th column of the bed file. The 4th column will also be checked for the + strand but only after the 6th is. If you include strand information in the 4th + (or 6th) column it will orient data accordingly and use the end position of bed + record instead of the start if on the minus strand. This means that profiles of + motifs in both the forward and minus orientation will align to the same central + position -d, --dist Set a maximum distance from the start of the motif to keep a feature -w, --wide Provide data in wide format, one row per read -r, --reference Return relative reference position instead of relative molecular position @@ -195,7 +215,8 @@ Options: -V, --version Print version BAM-Options: - -F, --filter BAM bit flags to filter on, equivalent to `-F` in samtools view [default: 0] + -F, --filter BAM bit flags to filter on, equivalent to `-F` in samtools view [default: + 0] --ml Minium score in the ML tag to use or include in the output [default: 125] Global-Options: @@ -213,8 +234,9 @@ Add nucleosomes to a bam file with m6a predictions Usage: ft add-nucleosomes [OPTIONS] [BAM] [OUT] Arguments: - [BAM] Input BAM file. If no path is provided stdin is used. For m6A prediction, this should be a HiFi bam file with kinetics data. - For other commands, this should be a bam file with m6A calls [default: -] + [BAM] Input BAM file. If no path is provided stdin is used. For m6A prediction, this should be a + HiFi bam file with kinetics data. For other commands, this should be a bam file with m6A + calls [default: -] [OUT] Output bam file with nucleosome calls [default: -] Options: @@ -232,7 +254,8 @@ Options: Print version BAM-Options: - -F, --filter BAM bit flags to filter on, equivalent to `-F` in samtools view [default: 0] + -F, --filter BAM bit flags to filter on, equivalent to `-F` in samtools view [default: + 0] --ml Minium score in the ML tag to use or include in the output [default: 125] Global-Options: @@ -250,19 +273,21 @@ Infer footprints from fiberseq data Usage: ft footprint [OPTIONS] --bed --yaml [BAM] Arguments: - [BAM] Input BAM file. If no path is provided stdin is used. For m6A prediction, this should be a HiFi bam file with kinetics data. - For other commands, this should be a bam file with m6A calls [default: -] + [BAM] Input BAM file. If no path is provided stdin is used. For m6A prediction, this should be a + HiFi bam file with kinetics data. For other commands, this should be a bam file with m6A + calls [default: -] Options: - -b, --bed BED file with the regions to footprint. Should all contain the same motif with proper strand information, and - ideally be ChIP-seq peaks + -b, --bed BED file with the regions to footprint. Should all contain the same motif with + proper strand information, and ideally be ChIP-seq peaks -y, --yaml yaml describing the modules of the footprint -o, --out Output bam [default: -] -h, --help Print help -V, --version Print version BAM-Options: - -F, --filter BAM bit flags to filter on, equivalent to `-F` in samtools view [default: 0] + -F, --filter BAM bit flags to filter on, equivalent to `-F` in samtools view [default: + 0] --ml Minium score in the ML tag to use or include in the output [default: 125] Global-Options: @@ -280,8 +305,9 @@ Remove HiFi kinetics tags from the input bam file Usage: ft clear-kinetics [OPTIONS] [BAM] [OUT] Arguments: - [BAM] Input BAM file. If no path is provided stdin is used. For m6A prediction, this should be a HiFi bam file with kinetics data. - For other commands, this should be a bam file with m6A calls [default: -] + [BAM] Input BAM file. If no path is provided stdin is used. For m6A prediction, this should be a + HiFi bam file with kinetics data. For other commands, this should be a bam file with m6A + calls [default: -] [OUT] Output bam file without hifi kinetics [default: -] Options: @@ -289,7 +315,8 @@ Options: -V, --version Print version BAM-Options: - -F, --filter BAM bit flags to filter on, equivalent to `-F` in samtools view [default: 0] + -F, --filter BAM bit flags to filter on, equivalent to `-F` in samtools view [default: + 0] --ml Minium score in the ML tag to use or include in the output [default: 125] Global-Options: @@ -307,17 +334,20 @@ Strip out select base modifications Usage: ft strip-basemods [OPTIONS] [BAM] [OUT] Arguments: - [BAM] Input BAM file. If no path is provided stdin is used. For m6A prediction, this should be a HiFi bam file with kinetics data. - For other commands, this should be a bam file with m6A calls [default: -] + [BAM] Input BAM file. If no path is provided stdin is used. For m6A prediction, this should be a + HiFi bam file with kinetics data. For other commands, this should be a bam file with m6A + calls [default: -] [OUT] Output bam file [default: -] Options: - -b, --basemod base modification to strip out of the bam file [default: m6A] [possible values: m6A, 6mA, 5mC, CpG] + -b, --basemod base modification to strip out of the bam file [default: m6A] [possible + values: m6A, 6mA, 5mC, CpG] -h, --help Print help -V, --version Print version BAM-Options: - -F, --filter BAM bit flags to filter on, equivalent to `-F` in samtools view [default: 0] + -F, --filter BAM bit flags to filter on, equivalent to `-F` in samtools view [default: + 0] --ml Minium score in the ML tag to use or include in the output [default: 125] Global-Options: @@ -335,8 +365,9 @@ Make decorated bed files for fiberseq data Usage: ft track-decorators [OPTIONS] --bed12 [BAM] Arguments: - [BAM] Input BAM file. If no path is provided stdin is used. For m6A prediction, this should be a HiFi bam file with kinetics data. - For other commands, this should be a bam file with m6A calls [default: -] + [BAM] Input BAM file. If no path is provided stdin is used. For m6A prediction, this should be a + HiFi bam file with kinetics data. For other commands, this should be a bam file with m6A + calls [default: -] Options: -b, --bed12 Output path for bed12 file to be decorated @@ -345,7 +376,8 @@ Options: -V, --version Print version BAM-Options: - -F, --filter BAM bit flags to filter on, equivalent to `-F` in samtools view [default: 0] + -F, --filter BAM bit flags to filter on, equivalent to `-F` in samtools view [default: + 0] --ml Minium score in the ML tag to use or include in the output [default: 125] Global-Options: @@ -363,8 +395,9 @@ Make a pileup track of Fiber-seq features from a FIRE bam Usage: ft pileup [OPTIONS] [BAM] [RGN] Arguments: - [BAM] Input BAM file. If no path is provided stdin is used. For m6A prediction, this should be a HiFi bam file with kinetics data. - For other commands, this should be a bam file with m6A calls [default: -] + [BAM] Input BAM file. If no path is provided stdin is used. For m6A prediction, this should be a + HiFi bam file with kinetics data. For other commands, this should be a bam file with m6A + calls [default: -] [RGN] Region string to make a pileup of. If not provided will make a pileup of the whole genome Options: @@ -378,7 +411,8 @@ Options: -V, --version Print version BAM-Options: - -F, --filter BAM bit flags to filter on, equivalent to `-F` in samtools view [default: 0] + -F, --filter BAM bit flags to filter on, equivalent to `-F` in samtools view [default: + 0] --ml Minium score in the ML tag to use or include in the output [default: 125] Global-Options: