diff --git a/docs/_build/.buildinfo b/docs/_build/.buildinfo new file mode 100644 index 0000000..4cfac3b --- /dev/null +++ b/docs/_build/.buildinfo @@ -0,0 +1,4 @@ +# Sphinx build info version 1 +# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. +config: db814d73acc7f62239ef257c51c0498e +tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/docs/_build/.doctrees/ReporterScreen_api.doctree b/docs/_build/.doctrees/ReporterScreen_api.doctree new file mode 100644 index 0000000..fe5ba5d Binary files /dev/null and b/docs/_build/.doctrees/ReporterScreen_api.doctree differ diff --git a/docs/_build/.doctrees/cds.doctree b/docs/_build/.doctrees/cds.doctree new file mode 100644 index 0000000..6ba50ae Binary files /dev/null and b/docs/_build/.doctrees/cds.doctree differ diff --git a/docs/_build/.doctrees/commands/count.doctree b/docs/_build/.doctrees/commands/count.doctree new file mode 100644 index 0000000..965fff2 Binary files /dev/null and b/docs/_build/.doctrees/commands/count.doctree differ diff --git a/docs/_build/.doctrees/commands/create-screen.doctree b/docs/_build/.doctrees/commands/create-screen.doctree new file mode 100644 index 0000000..a393e37 Binary files /dev/null and b/docs/_build/.doctrees/commands/create-screen.doctree differ diff --git a/docs/_build/.doctrees/commands/filter.doctree b/docs/_build/.doctrees/commands/filter.doctree new file mode 100644 index 0000000..f475ac6 Binary files /dev/null and b/docs/_build/.doctrees/commands/filter.doctree differ diff --git a/docs/_build/.doctrees/commands/input.doctree b/docs/_build/.doctrees/commands/input.doctree new file mode 100644 index 0000000..72dc7a1 Binary files /dev/null and b/docs/_build/.doctrees/commands/input.doctree differ diff --git a/docs/_build/.doctrees/commands/profile.doctree b/docs/_build/.doctrees/commands/profile.doctree new file mode 100644 index 0000000..ce55d68 Binary files /dev/null and b/docs/_build/.doctrees/commands/profile.doctree differ diff --git a/docs/_build/.doctrees/commands/qc.doctree b/docs/_build/.doctrees/commands/qc.doctree new file mode 100644 index 0000000..e0bb8f7 Binary files /dev/null and b/docs/_build/.doctrees/commands/qc.doctree differ diff --git a/docs/_build/.doctrees/commands/run.doctree b/docs/_build/.doctrees/commands/run.doctree new file mode 100644 index 0000000..ca7f98f Binary files /dev/null and b/docs/_build/.doctrees/commands/run.doctree differ diff --git a/docs/_build/.doctrees/count.doctree b/docs/_build/.doctrees/count.doctree new file mode 100644 index 0000000..c45e629 Binary files /dev/null and b/docs/_build/.doctrees/count.doctree differ diff --git a/docs/_build/.doctrees/count_samples.doctree b/docs/_build/.doctrees/count_samples.doctree new file mode 100644 index 0000000..01cce7a Binary files /dev/null and b/docs/_build/.doctrees/count_samples.doctree differ diff --git a/docs/_build/.doctrees/environment.pickle b/docs/_build/.doctrees/environment.pickle new file mode 100644 index 0000000..64c36af Binary files /dev/null and b/docs/_build/.doctrees/environment.pickle differ diff --git a/docs/_build/.doctrees/exon_fa_format.doctree b/docs/_build/.doctrees/exon_fa_format.doctree new file mode 100644 index 0000000..404eb35 Binary files /dev/null and b/docs/_build/.doctrees/exon_fa_format.doctree differ diff --git a/docs/_build/.doctrees/filter.doctree b/docs/_build/.doctrees/filter.doctree new file mode 100644 index 0000000..70319eb Binary files /dev/null and b/docs/_build/.doctrees/filter.doctree differ diff --git a/docs/_build/.doctrees/gwas.doctree b/docs/_build/.doctrees/gwas.doctree new file mode 100644 index 0000000..709e67e Binary files /dev/null and b/docs/_build/.doctrees/gwas.doctree differ diff --git a/docs/_build/.doctrees/index.doctree b/docs/_build/.doctrees/index.doctree new file mode 100644 index 0000000..225e5e5 Binary files /dev/null and b/docs/_build/.doctrees/index.doctree differ diff --git a/docs/_build/.doctrees/index_.doctree b/docs/_build/.doctrees/index_.doctree new file mode 100644 index 0000000..d378d5c Binary files /dev/null and b/docs/_build/.doctrees/index_.doctree differ diff --git a/docs/_build/.doctrees/input.doctree b/docs/_build/.doctrees/input.doctree new file mode 100644 index 0000000..5810f03 Binary files /dev/null and b/docs/_build/.doctrees/input.doctree differ diff --git a/docs/_build/.doctrees/profile.doctree b/docs/_build/.doctrees/profile.doctree new file mode 100644 index 0000000..dca5e96 Binary files /dev/null and b/docs/_build/.doctrees/profile.doctree differ diff --git a/docs/_build/.doctrees/qc.doctree b/docs/_build/.doctrees/qc.doctree new file mode 100644 index 0000000..232dc84 Binary files /dev/null and b/docs/_build/.doctrees/qc.doctree differ diff --git a/docs/_build/.doctrees/run.doctree b/docs/_build/.doctrees/run.doctree new file mode 100644 index 0000000..f4aa6ac Binary files /dev/null and b/docs/_build/.doctrees/run.doctree differ diff --git a/docs/_build/.doctrees/subcommands.doctree b/docs/_build/.doctrees/subcommands.doctree new file mode 100644 index 0000000..1e1a601 Binary files /dev/null and b/docs/_build/.doctrees/subcommands.doctree differ diff --git a/docs/_build/.doctrees/tutorials/ldl_cds.doctree b/docs/_build/.doctrees/tutorials/ldl_cds.doctree new file mode 100644 index 0000000..66cc78f Binary files /dev/null and b/docs/_build/.doctrees/tutorials/ldl_cds.doctree differ diff --git a/docs/_build/.doctrees/tutorials/ldl_var.doctree b/docs/_build/.doctrees/tutorials/ldl_var.doctree new file mode 100644 index 0000000..9274c17 Binary files /dev/null and b/docs/_build/.doctrees/tutorials/ldl_var.doctree differ diff --git a/docs/_build/ReporterScreen_api.html b/docs/_build/ReporterScreen_api.html new file mode 100644 index 0000000..265ff5f --- /dev/null +++ b/docs/_build/ReporterScreen_api.html @@ -0,0 +1,1983 @@ + + + + + + + + ReporterScreen API tutorial — bean 1.0.0 documentation + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

ReporterScreen API tutorial

+

Load the required packages. (Anndata import isn’t required to use the package).

+
import numpy as np
+import pandas as pd
+import anndata as ad
+import seaborn as sns
+import matplotlib.pyplot as plt
+import bean as br
+
+
+

bean ReporterScreen object and perturb-seq Screen object are both anndata compatible.

+
adata = ad.read_h5ad("bean_count_07+1021_LDLvar.h5ad")
+
+
+
adata
+
+
+
AnnData object with n_obs × n_vars = 3455 × 12
+    obs: 'name', 'Unnamed: 0', 'Target gene/variant', 'Target descriptor', 'Arbitrary number', 'gRNA position category', 'Target base position in gRNA', 'Target base position in reporter', 'BE', 'Group', 'sequence', 'Reporter', 'barcode', '5-nt PAM', 'offset', 'target', 'target_pos', 'Group2', 'masked_sequence', 'masked_barcode', 'edit_rate'
+    var: 'index', 'sort', 'replicate'
+    uns: 'allele_counts', 'edit_counts'
+    layers: 'X_bcmatch', 'edits'
+
+
+
cdata = br.read_h5ad("bean_count_07+1021_LDLvar.h5ad")
+
+
+
cdata
+
+
+
Genome Editing Screen comprised of n_guides x n_conditions = 3455 x 12
+   guides:    'name', 'Unnamed: 0', 'Target gene/variant', 'Target descriptor', 'Arbitrary number', 'gRNA position category', 'Target base position in gRNA', 'Target base position in reporter', 'BE', 'Group', 'sequence', 'Reporter', 'barcode', '5-nt PAM', 'offset', 'target', 'target_pos', 'Group2', 'masked_sequence', 'masked_barcode', 'edit_rate'
+   samples:    'index', 'sort', 'replicate'
+   condit_m:
+   condit_p:
+   layers:    'X_bcmatch', 'edits'
+   uns:       'allele_counts', 'edit_counts'
+
+
+
    +
  • cdata.X: guide count

  • +
  • cdata.guides: guide metadata

  • +
  • cdata.samples: sample/condition metadata

  • +
  • cdata.layers["X_bcmatch"]: barcode-matched guide counts

  • +
  • cdata.layers["edits"]: edit counts

  • +
  • cdata.uns["allele_counts"]: allele counts per guide and condition

  • +
  • cdata.uns["edit_counts"]: edit counts per guide and condition

  • +
+

guides attribute contains the information about each guide.

+
cdata.guides
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
nameUnnamed: 0Target gene/variantTarget descriptorArbitrary numbergRNA position categoryTarget base position in gRNATarget base position in reporterBEGroup...Reporterbarcode5-nt PAMoffsettargettarget_posGroup2masked_sequencemasked_barcodeedit_rate
0CONTROL_1_g10CONTROLNaN1g1410ABENegCtrl...CCAAGCCCTACGCGGTAGGGAACTTTGGGAGCGTTTGGGAG-10CONTROL_19NegCtrlCCTGCGCGGTGGGGGGCTTTGTTT0.531163
1CONTROL_1_g21CONTROLNaN1g2511ABENegCtrl...TCCAAGCCCTACGCGGTAGGGAACTTTGGGAGAACATGGGA-11CONTROL_110NegCtrlCCCTGCGCGGTGGGGGGCTTGGCG0.640765
2CONTROL_1_g32CONTROLNaN1g3512ABENegCtrl...GTCCAAGCCCTACGCGGTAGGGAACTTTGGGACGCTTTGGG-12CONTROL_111NegCtrlCCCTGCGCGGTGGGGGGCTCGCT0.417709
3CONTROL_1_g43CONTROLNaN1g4713ABENegCtrl...CGTCCAAGCCCTACGCGGTAGGGAACTTTGGGTGAGTTTGG-13CONTROL_112NegCtrlGGCCCTGCGCGGTGGGGGGCTGGG0.126400
4CONTROL_1_g54CONTROLNaN1g5814ABENegCtrl...ACGTCCAAGCCCTACGCGGTAGGGAACTTTGGGTATCTTTG-14CONTROL_113NegCtrlGGGCCCTGCGCGGTGGGGGGGTGT0.201104
..................................................................
3450rs9987289_Maj_ABE_347_g13450rs9987289Maj347g1310ABEVariant...TGCTTGGGCATCAATATCACGTGGAACCAGCCCAGTCCAGC-10rs9987289_Maj_ABE_3479VariantGCGTCGGTGTCGCGTGGGGCGGT0.087379
3451rs9987289_Maj_ABE_347_g23451rs9987289Maj347g2411ABEVariant...ATGCTTGGGCATCAATATCACGTGGAACCAGCTCGCACCAG-11rs9987289_Maj_ABE_34710VariantGGCGTCGGTGTCGCGTGGGTCGC0.299923
3452rs9987289_Maj_ABE_347_g33452rs9987289Maj347g3612ABEVariant...GATGCTTGGGCATCAATATCACGTGGAACCAGGCACAACCA-12rs9987289_Maj_ABE_34711VariantTGGGCGTCGGTGTCGCGTGGGCGC0.224973
3453rs9987289_Maj_ABE_347_g43453rs9987289Maj347g4713ABEVariant...AGATGCTTGGGCATCAATATCACGTGGAACCATTGCGAACC-13rs9987289_Maj_ABE_34712VariantTTGGGCGTCGGTGTCGCGTGTTGC0.265378
3454rs9987289_Maj_ABE_347_g53454rs9987289Maj347g5814ABEVariant...TAGATGCTTGGGCATCAATATCACGTGGAACCGCGAGGAAC-14rs9987289_Maj_ABE_34713VariantCTTGGGCGTCGGTGTCGCGTGCGG0.266573
+

3455 rows × 21 columns

+

samples attribute contains the sample and condition specific information.

+
cdata.samples
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
indexsortreplicate
0rep1_botbotrep1
1rep2_botbotrep2
2rep3_VPA_botbotrep3_VPA
3rep4_VPA_botbotrep4_VPA
4rep1_bulkbulkrep1
5rep2_bulkbulkrep2
6rep3_VPA_bulkbulkrep3_VPA
7rep4_VPA_bulkbulkrep4_VPA
8rep1_toptoprep1
9rep2_toptoprep2
10rep3_VPA_toptoprep3_VPA
11rep4_VPA_toptoprep4_VPA
+

Allele_counts information is stored in .uns["allele_counts"].

+
cdata.uns["allele_counts"]
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
guideallelerep1_botrep2_botrep3_VPA_botrep4_VPA_botrep1_bulkrep2_bulkrep3_VPA_bulkrep4_VPA_bulkrep1_toprep2_toprep3_VPA_toprep4_VPA_top
012:51779544AGA_Maj_ABE_2_g10:9:+:A>G,5:14:+:A>G14201306152172214343
112:51779544AGA_Maj_ABE_2_g1-4:5:+:A>G,-2:7:+:A>G,5:14:+:A>G,10:19:+:A>G100000000000
212:51779544AGA_Maj_ABE_2_g1-7:2:+:A>G,0:9:+:A>G,5:14:+:A>G342010520010
312:51779544AGA_Maj_ABE_2_g1-9:0:+:G>A,-8:1:+:G>A,-7:2:+:A>C,-6:3:+:C>A,-4...100102100010
412:51779544AGA_Maj_ABE_2_g1-7:2:+:A>G,10:19:+:A>G110000000000
.............................................
438407rs9987289_Maj_ABE_347_g54:17:+:A>G,6:19:+:A>G,9:22:+:A>G000000000020
438408rs9987289_Maj_ABE_347_g5-12:1:+:A>G,6:19:+:A>G,9:22:+:A>G,11:24:+:G>A000000000010
438409rs9987289_Maj_ABE_347_g5-12:1:+:A>G,6:19:+:A>G,9:22:+:A>G,16:29:+:A>G000000000001
438410rs9987289_Maj_ABE_347_g5-12:1:+:A>G,0:13:+:A>G,6:19:+:A>G,9:22:+:A>G,1...000000000100
438411rs9987289_Maj_ABE_347_g5-12:1:+:A>G,6:19:+:A>G,9:22:+:A>G,12:25:+:T>G000000000001
+

438412 rows × 14 columns

+

Base-level edit counts can be saved at .uns[“edit_counts”].

+
cdata.uns["edit_counts"]
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
guideeditrep1_botrep2_botrep3_VPA_botrep4_VPA_botrep1_bulkrep2_bulkrep3_VPA_bulkrep4_VPA_bulkrep1_toprep2_toprep3_VPA_toprep4_VPA_topref_basealt_base
012:51779544AGA_Maj_ABE_2_g1-1:8:+:G>A000010000000GA
112:51779544AGA_Maj_ABE_2_g1-1:8:+:G>C000000001010GC
212:51779544AGA_Maj_ABE_2_g1-1:8:+:G>T000010000000GT
312:51779544AGA_Maj_ABE_2_g1-2:7:+:A>C000000002010AC
412:51779544AGA_Maj_ABE_2_g1-2:7:+:A>G1934404592566768481492AG
...................................................
217563rs9987289_Maj_ABE_347_g58:21:+:C>A070001101000CA
217564rs9987289_Maj_ABE_347_g58:21:+:C>G002008000180CG
217565rs9987289_Maj_ABE_347_g58:21:+:C>T007000700000CT
217566rs9987289_Maj_ABE_347_g59:22:+:A>G92130513746122058235947AG
217567rs9987289_Maj_ABE_347_g59:22:+:A>T000000070000AT
+

217568 rows × 16 columns

+
+

Subsetting & addition

+

Works as anndata, supports allele & edit count operations.

+
+

Subsetting & selection

+
cdata_subset = cdata[:10,cdata.samples.sort == "bulk"]
+
+
+
['rep1_bulk', 'rep2_bulk', 'rep3_VPA_bulk', 'rep4_VPA_bulk']
+
+
+
cdata_subset.uns["allele_counts"]
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
guideallelerep1_bulkrep2_bulkrep3_VPA_bulkrep4_VPA_bulk
14979CONTROL_10_g1-4:5:+:A>G,0:9:+:A>G8130
14980CONTROL_10_g1-7:2:+:C>T00010
14981CONTROL_10_g1-4:5:+:A>G2922925
14982CONTROL_10_g11:10:+:A>G0641
14983CONTROL_10_g1-4:5:+:A>G,1:10:+:A>G111512
.....................
22837CONTROL_1_g5-13:0:+:A>-,-12:1:+:C>T,-9:4:+:C>G,-8:5:+:C>T,...0000
22838CONTROL_1_g5-6:7:+:A>C,7:20:+:A>G0000
22839CONTROL_1_g5-13:0:+:A>G,-10:3:+:T>G,0:13:+:A>G,7:20:+:A>G0000
22840CONTROL_1_g50:13:+:A>T0000
22841CONTROL_1_g50:13:+:A>G,18:31:+:G>A0000
+

1080 rows × 6 columns

+
+
+

LFC calculation & Addition

+
cdata1 = br.read_h5ad("/data/pinello/PROJECTS/2021_08_ANBE/data/072121_ABE_topbot/bean_counts/LDLvar/032422_crispresso/bean_count_072121_ABE_topbot_LDLvar.h5ad")
+cdata2 = br.read_h5ad("/data/pinello/PROJECTS/2021_08_ANBE/data/102121_ABE_topbot/bean_counts/LDLvar/032422_crispresso/bean_count_102121_ABE_topbot_LDLvar.h5ad")
+
+
+
cdata1.samples["sort"] = cdata1.samples["index"].map(lambda s: s.rsplit("_", 1)[-1])
+cdata1.samples["replicate"] = cdata1.samples["index"].map(lambda s: s.rsplit("_", 1)[0])
+cdata2.samples["sort"] = cdata2.samples["index"].map(lambda s: s.rsplit("_", 1)[-1])
+cdata2.samples["replicate"] = cdata2.samples["index"].map(lambda s: s.rsplit("_", 1)[0])
+
+
+
cdata1.log_norm()
+lfc1 = cdata1.log_fold_change_reps("bot", "top")
+cdata2.log_norm()
+lfc2 = cdata2.log_fold_change_reps("bot", "top")
+lfcs = lfc1.join(lfc2, lsuffix = "_1", rsuffix = "_2")
+sns.pairplot(lfcs)
+
+
+_images/output_20_2.png +

LFC can be aggregated for biological replicates.

+
cdata1.log_fold_change_aggregate("bot", "top", aggregate_condit = "replicate")
+
+
+
cdata1.guides
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
nameUnnamed: 0Target gene/variantTarget descriptorArbitrary numbergRNA position categoryTarget base position in gRNATarget base position in reporterBEGroup...Reporterbarcode5-nt PAMoffsettargettarget_posGroup2masked_sequencemasked_barcodebot_top.lfc.median
0CONTROL_1_g10CONTROLNaN1g1410ABENegCtrl...CCAAGCCCTACGCGGTAGGGAACTTTGGGAGCGTTTGGGAG-10CONTROL_19NegCtrlCCTGCGCGGTGGGGGGCTTTGTTT-0.158787
1CONTROL_1_g21CONTROLNaN1g2511ABENegCtrl...TCCAAGCCCTACGCGGTAGGGAACTTTGGGAGAACATGGGA-11CONTROL_110NegCtrlCCCTGCGCGGTGGGGGGCTTGGCG-0.212254
2CONTROL_1_g32CONTROLNaN1g3512ABENegCtrl...GTCCAAGCCCTACGCGGTAGGGAACTTTGGGACGCTTTGGG-12CONTROL_111NegCtrlCCCTGCGCGGTGGGGGGCTCGCT0.186679
3CONTROL_1_g43CONTROLNaN1g4713ABENegCtrl...CGTCCAAGCCCTACGCGGTAGGGAACTTTGGGTGAGTTTGG-13CONTROL_112NegCtrlGGCCCTGCGCGGTGGGGGGCTGGG-0.022441
4CONTROL_1_g54CONTROLNaN1g5814ABENegCtrl...ACGTCCAAGCCCTACGCGGTAGGGAACTTTGGGTATCTTTG-14CONTROL_113NegCtrlGGGCCCTGCGCGGTGGGGGGGTGT0.457033
..................................................................
3450rs9987289_Maj_ABE_347_g13450rs9987289Maj347g1310ABEVariant...TGCTTGGGCATCAATATCACGTGGAACCAGCCCAGTCCAGC-10rs9987289_Maj_ABE_3479VariantGCGTCGGTGTCGCGTGGGGCGGT-0.418312
3451rs9987289_Maj_ABE_347_g23451rs9987289Maj347g2411ABEVariant...ATGCTTGGGCATCAATATCACGTGGAACCAGCTCGCACCAG-11rs9987289_Maj_ABE_34710VariantGGCGTCGGTGTCGCGTGGGTCGC-0.084936
3452rs9987289_Maj_ABE_347_g33452rs9987289Maj347g3612ABEVariant...GATGCTTGGGCATCAATATCACGTGGAACCAGGCACAACCA-12rs9987289_Maj_ABE_34711VariantTGGGCGTCGGTGTCGCGTGGGCGC-0.339419
3453rs9987289_Maj_ABE_347_g43453rs9987289Maj347g4713ABEVariant...AGATGCTTGGGCATCAATATCACGTGGAACCATTGCGAACC-13rs9987289_Maj_ABE_34712VariantTTGGGCGTCGGTGTCGCGTGTTGC-0.517138
3454rs9987289_Maj_ABE_347_g53454rs9987289Maj347g5814ABEVariant...TAGATGCTTGGGCATCAATATCACGTGGAACCGCGAGGAAC-14rs9987289_Maj_ABE_34713VariantCTTGGGCGTCGGTGTCGCGTGCGG0.002245
+

3455 rows × 21 columns

+

Technical replicates show decent LFC correlation.

+
cdata = cdata1 + cdata2
+
+
+
cdata
+
+
+
Genome Editing Screen comprised of n_guides x n_conditions = 3455 x 12
+   guides:    'name', 'Unnamed: 0', 'Target gene/variant', 'Target descriptor', 'Arbitrary number', 'gRNA position category', 'Target base position in gRNA', 'Target base position in reporter', 'BE', 'Group', 'sequence', 'Reporter', 'barcode', '5-nt PAM', 'offset', 'target', 'target_pos', 'Group2', 'masked_sequence', 'masked_barcode', 'bot_top.lfc.median'
+   samples:    'index', 'sort', 'replicate'
+   condit_m:
+   condit_p:
+   layers:    'edits', 'X_bcmatch'
+   uns:       'allele_counts'
+
+
+

You can concatenate different samples with shared guides.

+
br.concat((cdata1, cdata2))
+
+
+
Genome Editing Screen comprised of n_guides x n_conditions = 3455 x 24
+   guides:    'name', 'Unnamed: 0', 'Target gene/variant', 'Target descriptor', 'Arbitrary number', 'gRNA position category', 'Target base position in gRNA', 'Target base position in reporter', 'BE', 'Group', 'sequence', 'Reporter', 'barcode', '5-nt PAM', 'offset', 'target', 'target_pos', 'Group2', 'masked_sequence', 'masked_barcode', 'bot_top.lfc.median'
+   samples:    'index', 'sort', 'replicate'
+   condit_m:
+   condit_p:
+   layers:    'X', 'X_bcmatch', 'edits', 'lognorm_counts', 'lognorm_edits'
+   uns:       'allele_counts'
+
+
+
+
+
+

Getting edit rates from allele counts

+
cdata.get_edit_rate(normalize_by_editable_base = False,
+                   edited_base = "A",
+                   editable_base_start = 3,
+                   editable_base_end = 8,
+                   bcmatch_thres = 10,
+                   prior_weight = 1)
+
+
+
cdata.uns["edit_counts"] = cdata.get_edit_from_allele()
+
+
+
cdata.get_edit_mat_from_uns("A", "G", match_target_position = True)
+cdata.get_edit_rate(edited_base = "A", bcmatch_thres = 10)
+plt.hist(cdata.guides.edit_rate, bins=30)
+plt.show()
+
+
+_images/output_34_1.png +
+

Calculating LFC

+
cdata.log_norm()
+cdata.log_fold_change_aggregate("bot", "top", aggregate_condit = "replicate")
+
+
+
cdata.guides
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
nameUnnamed: 0Target gene/variantTarget descriptorArbitrary numbergRNA position categoryTarget base position in gRNATarget base position in reporterBEGroup...barcode5-nt PAMoffsettargettarget_posGroup2masked_sequencemasked_barcodebot_top.lfc.medianedit_rate
0CONTROL_1_g10CONTROLNaN1g1410ABENegCtrl...GTTTGGGAG-10CONTROL_19NegCtrlCCTGCGCGGTGGGGGGCTTTGTTT-0.1355500.531163
1CONTROL_1_g21CONTROLNaN1g2511ABENegCtrl...AACATGGGA-11CONTROL_110NegCtrlCCCTGCGCGGTGGGGGGCTTGGCG-0.0593910.640765
2CONTROL_1_g32CONTROLNaN1g3512ABENegCtrl...CGCTTTGGG-12CONTROL_111NegCtrlCCCTGCGCGGTGGGGGGCTCGCT0.1412900.417709
3CONTROL_1_g43CONTROLNaN1g4713ABENegCtrl...TGAGTTTGG-13CONTROL_112NegCtrlGGCCCTGCGCGGTGGGGGGCTGGG-0.0723580.126400
4CONTROL_1_g54CONTROLNaN1g5814ABENegCtrl...GTATCTTTG-14CONTROL_113NegCtrlGGGCCCTGCGCGGTGGGGGGGTGT0.2696500.201104
..................................................................
3450rs9987289_Maj_ABE_347_g13450rs9987289Maj347g1310ABEVariant...CAGTCCAGC-10rs9987289_Maj_ABE_3479VariantGCGTCGGTGTCGCGTGGGGCGGT-0.2302640.087379
3451rs9987289_Maj_ABE_347_g23451rs9987289Maj347g2411ABEVariant...TCGCACCAG-11rs9987289_Maj_ABE_34710VariantGGCGTCGGTGTCGCGTGGGTCGC-0.1821510.299923
3452rs9987289_Maj_ABE_347_g33452rs9987289Maj347g3612ABEVariant...GCACAACCA-12rs9987289_Maj_ABE_34711VariantTGGGCGTCGGTGTCGCGTGGGCGC-0.1657780.224973
3453rs9987289_Maj_ABE_347_g43453rs9987289Maj347g4713ABEVariant...TTGCGAACC-13rs9987289_Maj_ABE_34712VariantTTGGGCGTCGGTGTCGCGTGTTGC-0.3405900.265378
3454rs9987289_Maj_ABE_347_g53454rs9987289Maj347g5814ABEVariant...GCGAGGAAC-14rs9987289_Maj_ABE_34713VariantCTTGGGCGTCGGTGTCGCGTGCGG0.0343650.266573
+

3455 rows × 22 columns

+
+
+

Allele translation

+
cdata_tiling = br.read_h5ad("../../072121_ABE_topbot/bean_counts/LDLRCDS/032422_crispresso/bean_count_072121_ABE_topbot_LDLRCDS.h5ad")
+
+
+
cdata_tiling.uns["allele_counts"].allele
+
+
+
0                                         11224415:14:+:A>G
+1                        11224401:0:+:A>G,11224415:14:+:A>G
+2                        11224410:9:+:A>G,11224415:14:+:A>G
+3         11224401:0:+:A>G,11224402:1:+:A>G,11224410:9:+...
+4                                          11224401:0:+:A>G
+                                ...
+438001    11203000:4:+:A>G,11203002:6:+:A>G,11203006:10:...
+438002    11224074:0:+:A>G,11224086:12:+:A>G,11224092:18...
+438003    0:0:+:A>G,3:3:+:A>G,11:11:+:A>G,13:13:+:A>G,17...
+438004                  11217409:23:+:G>-,11217417:31:+:->C
+438005    11226735:30:-:A>G,11226742:23:-:A>G,11226747:1...
+Name: allele, Length: 438006, dtype: object
+
+
+
+
+

Writing

+
cdata.to_Excel("tmp.xlsx")
+
+
+
Writing to: tmp.xlsx
+
+    Sheet 1:        X
+    Sheet 2:        edits
+    Sheet 3:        X_bcmatch
+    Sheet 4:        lognorm_counts
+    Sheet 5:        lognorm_edits
+    Sheet 6:        guides
+    Sheet 7:        samples
+    Sheet 8:        screen.uns.allele_counts
+    Sheet 9:        screen.uns.edit_counts
+
+
+
cdata.to_mageck_input("mageck_input.txt", target_column='target')
+
+
+
%%bash
+head mageck_input.txt
+
+
+
sgRNA       gene    0       1       2       3       4       5       6       7       8       9       10      11
+CONTROL_1_g1        CONTROL_1       171     451     251     422     573     389     456     420     835     435     794     439
+CONTROL_1_g2        CONTROL_1       145     278     257     206     364     273     389     254     527     498     768     195
+CONTROL_1_g3        CONTROL_1       333     835     488     632     898     899     780     713     1189    626     1146    603
+CONTROL_1_g4        CONTROL_1       246     663     387     448     823     595     705     600     921     595     1143    506
+CONTROL_1_g5        CONTROL_1       243     647     434     529     776     451     700     676     1062    611     928     379
+CONTROL_10_g1       CONTROL_10      138     329     229     213     422     292     432     352     409     243     390     274
+CONTROL_10_g2       CONTROL_10      187     468     402     479     643     369     428     469     796     422     787     404
+CONTROL_10_g3       CONTROL_10      57      126     83      131     281     114     184     115     300     106     299     106
+CONTROL_10_g4       CONTROL_10      66      112     120     136     182     128     169     181     256     144     258     179
+
+
+
+
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/docs/_build/_images/output_20_2.png b/docs/_build/_images/output_20_2.png new file mode 100644 index 0000000..00de7f8 Binary files /dev/null and b/docs/_build/_images/output_20_2.png differ diff --git a/docs/_build/_images/output_34_1.png b/docs/_build/_images/output_34_1.png new file mode 100644 index 0000000..591f374 Binary files /dev/null and b/docs/_build/_images/output_34_1.png differ diff --git a/docs/_build/_sources/ReporterScreen_api.rst.txt b/docs/_build/_sources/ReporterScreen_api.rst.txt new file mode 100644 index 0000000..d489ffc --- /dev/null +++ b/docs/_build/_sources/ReporterScreen_api.rst.txt @@ -0,0 +1,2015 @@ +ReporterScreen API tutorial +================== + +Load the required packages. (Anndata import isn't required to use the package). + +.. code:: ipython3 + + import numpy as np + import pandas as pd + import anndata as ad + import seaborn as sns + import matplotlib.pyplot as plt + import bean as br + +.. role:: bash(code) + :language: bash +bean :bash:`ReporterScreen` object and perturb-seq :bash:`Screen` object are both :bash:`anndata` compatible. + +.. code:: ipython3 + + adata = ad.read_h5ad("bean_count_07+1021_LDLvar.h5ad") + +.. code:: ipython3 + + adata + + +.. parsed-literal:: + + AnnData object with n_obs × n_vars = 3455 × 12 + obs: 'name', 'Unnamed: 0', 'Target gene/variant', 'Target descriptor', 'Arbitrary number', 'gRNA position category', 'Target base position in gRNA', 'Target base position in reporter', 'BE', 'Group', 'sequence', 'Reporter', 'barcode', '5-nt PAM', 'offset', 'target', 'target_pos', 'Group2', 'masked_sequence', 'masked_barcode', 'edit_rate' + var: 'index', 'sort', 'replicate' + uns: 'allele_counts', 'edit_counts' + layers: 'X_bcmatch', 'edits' + + + +.. code:: ipython3 + + cdata = br.read_h5ad("bean_count_07+1021_LDLvar.h5ad") + +.. code:: ipython3 + + cdata + + + + + +.. parsed-literal:: + + Genome Editing Screen comprised of n_guides x n_conditions = 3455 x 12 + guides: 'name', 'Unnamed: 0', 'Target gene/variant', 'Target descriptor', 'Arbitrary number', 'gRNA position category', 'Target base position in gRNA', 'Target base position in reporter', 'BE', 'Group', 'sequence', 'Reporter', 'barcode', '5-nt PAM', 'offset', 'target', 'target_pos', 'Group2', 'masked_sequence', 'masked_barcode', 'edit_rate' + samples: 'index', 'sort', 'replicate' + condit_m: + condit_p: + layers: 'X_bcmatch', 'edits' + uns: 'allele_counts', 'edit_counts' + +- :bash:`cdata.X`: guide count +- :bash:`cdata.guides`: guide metadata +- :bash:`cdata.samples`: sample/condition metadata +- :bash:`cdata.layers["X_bcmatch"]`: barcode-matched guide counts +- :bash:`cdata.layers["edits"]`: edit counts +- :bash:`cdata.uns["allele_counts"]`: allele counts per guide and condition +- :bash:`cdata.uns["edit_counts"]`: edit counts per guide and condition + +:bash:`guides` attribute contains the information about each guide. + +.. code:: ipython3 + + cdata.guides + + + + + +.. raw:: html + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
nameUnnamed: 0Target gene/variantTarget descriptorArbitrary numbergRNA position categoryTarget base position in gRNATarget base position in reporterBEGroup...Reporterbarcode5-nt PAMoffsettargettarget_posGroup2masked_sequencemasked_barcodeedit_rate
0CONTROL_1_g10CONTROLNaN1g1410ABENegCtrl...CCAAGCCCTACGCGGTAGGGAACTTTGGGAGCGTTTGGGAG-10CONTROL_19NegCtrlCCTGCGCGGTGGGGGGCTTTGTTT0.531163
1CONTROL_1_g21CONTROLNaN1g2511ABENegCtrl...TCCAAGCCCTACGCGGTAGGGAACTTTGGGAGAACATGGGA-11CONTROL_110NegCtrlCCCTGCGCGGTGGGGGGCTTGGCG0.640765
2CONTROL_1_g32CONTROLNaN1g3512ABENegCtrl...GTCCAAGCCCTACGCGGTAGGGAACTTTGGGACGCTTTGGG-12CONTROL_111NegCtrlCCCTGCGCGGTGGGGGGCTCGCT0.417709
3CONTROL_1_g43CONTROLNaN1g4713ABENegCtrl...CGTCCAAGCCCTACGCGGTAGGGAACTTTGGGTGAGTTTGG-13CONTROL_112NegCtrlGGCCCTGCGCGGTGGGGGGCTGGG0.126400
4CONTROL_1_g54CONTROLNaN1g5814ABENegCtrl...ACGTCCAAGCCCTACGCGGTAGGGAACTTTGGGTATCTTTG-14CONTROL_113NegCtrlGGGCCCTGCGCGGTGGGGGGGTGT0.201104
..................................................................
3450rs9987289_Maj_ABE_347_g13450rs9987289Maj347g1310ABEVariant...TGCTTGGGCATCAATATCACGTGGAACCAGCCCAGTCCAGC-10rs9987289_Maj_ABE_3479VariantGCGTCGGTGTCGCGTGGGGCGGT0.087379
3451rs9987289_Maj_ABE_347_g23451rs9987289Maj347g2411ABEVariant...ATGCTTGGGCATCAATATCACGTGGAACCAGCTCGCACCAG-11rs9987289_Maj_ABE_34710VariantGGCGTCGGTGTCGCGTGGGTCGC0.299923
3452rs9987289_Maj_ABE_347_g33452rs9987289Maj347g3612ABEVariant...GATGCTTGGGCATCAATATCACGTGGAACCAGGCACAACCA-12rs9987289_Maj_ABE_34711VariantTGGGCGTCGGTGTCGCGTGGGCGC0.224973
3453rs9987289_Maj_ABE_347_g43453rs9987289Maj347g4713ABEVariant...AGATGCTTGGGCATCAATATCACGTGGAACCATTGCGAACC-13rs9987289_Maj_ABE_34712VariantTTGGGCGTCGGTGTCGCGTGTTGC0.265378
3454rs9987289_Maj_ABE_347_g53454rs9987289Maj347g5814ABEVariant...TAGATGCTTGGGCATCAATATCACGTGGAACCGCGAGGAAC-14rs9987289_Maj_ABE_34713VariantCTTGGGCGTCGGTGTCGCGTGCGG0.266573
+

3455 rows × 21 columns

+
+ + +:bash:`samples` attribute contains the sample and condition specific information. + +.. code:: ipython3 + + cdata.samples + + + + + +.. raw:: html + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
indexsortreplicate
0rep1_botbotrep1
1rep2_botbotrep2
2rep3_VPA_botbotrep3_VPA
3rep4_VPA_botbotrep4_VPA
4rep1_bulkbulkrep1
5rep2_bulkbulkrep2
6rep3_VPA_bulkbulkrep3_VPA
7rep4_VPA_bulkbulkrep4_VPA
8rep1_toptoprep1
9rep2_toptoprep2
10rep3_VPA_toptoprep3_VPA
11rep4_VPA_toptoprep4_VPA
+
+ + +Allele_counts information is stored in :bash:`.uns["allele_counts"]`. + +.. code:: ipython3 + + cdata.uns["allele_counts"] + + + + + +.. raw:: html + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
guideallelerep1_botrep2_botrep3_VPA_botrep4_VPA_botrep1_bulkrep2_bulkrep3_VPA_bulkrep4_VPA_bulkrep1_toprep2_toprep3_VPA_toprep4_VPA_top
012:51779544AGA_Maj_ABE_2_g10:9:+:A>G,5:14:+:A>G14201306152172214343
112:51779544AGA_Maj_ABE_2_g1-4:5:+:A>G,-2:7:+:A>G,5:14:+:A>G,10:19:+:A>G100000000000
212:51779544AGA_Maj_ABE_2_g1-7:2:+:A>G,0:9:+:A>G,5:14:+:A>G342010520010
312:51779544AGA_Maj_ABE_2_g1-9:0:+:G>A,-8:1:+:G>A,-7:2:+:A>C,-6:3:+:C>A,-4...100102100010
412:51779544AGA_Maj_ABE_2_g1-7:2:+:A>G,10:19:+:A>G110000000000
.............................................
438407rs9987289_Maj_ABE_347_g54:17:+:A>G,6:19:+:A>G,9:22:+:A>G000000000020
438408rs9987289_Maj_ABE_347_g5-12:1:+:A>G,6:19:+:A>G,9:22:+:A>G,11:24:+:G>A000000000010
438409rs9987289_Maj_ABE_347_g5-12:1:+:A>G,6:19:+:A>G,9:22:+:A>G,16:29:+:A>G000000000001
438410rs9987289_Maj_ABE_347_g5-12:1:+:A>G,0:13:+:A>G,6:19:+:A>G,9:22:+:A>G,1...000000000100
438411rs9987289_Maj_ABE_347_g5-12:1:+:A>G,6:19:+:A>G,9:22:+:A>G,12:25:+:T>G000000000001
+

438412 rows × 14 columns

+
+ + +Base-level edit counts can be saved at `.uns["edit_counts"]`. + +.. code:: ipython3 + + cdata.uns["edit_counts"] + + + + + +.. raw:: html + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
guideeditrep1_botrep2_botrep3_VPA_botrep4_VPA_botrep1_bulkrep2_bulkrep3_VPA_bulkrep4_VPA_bulkrep1_toprep2_toprep3_VPA_toprep4_VPA_topref_basealt_base
012:51779544AGA_Maj_ABE_2_g1-1:8:+:G>A000010000000GA
112:51779544AGA_Maj_ABE_2_g1-1:8:+:G>C000000001010GC
212:51779544AGA_Maj_ABE_2_g1-1:8:+:G>T000010000000GT
312:51779544AGA_Maj_ABE_2_g1-2:7:+:A>C000000002010AC
412:51779544AGA_Maj_ABE_2_g1-2:7:+:A>G1934404592566768481492AG
...................................................
217563rs9987289_Maj_ABE_347_g58:21:+:C>A070001101000CA
217564rs9987289_Maj_ABE_347_g58:21:+:C>G002008000180CG
217565rs9987289_Maj_ABE_347_g58:21:+:C>T007000700000CT
217566rs9987289_Maj_ABE_347_g59:22:+:A>G92130513746122058235947AG
217567rs9987289_Maj_ABE_347_g59:22:+:A>T000000070000AT
+

217568 rows × 16 columns

+
+ + + + + +Subsetting & addition +--------------------- + +Works as anndata, supports allele & edit count operations. + +Subsetting & selection +~~~~~~~~~~~~~~~~~~~~~~ + +.. code:: ipython3 + + cdata_subset = cdata[:10,cdata.samples.sort == "bulk"] + + +.. parsed-literal:: + + ['rep1_bulk', 'rep2_bulk', 'rep3_VPA_bulk', 'rep4_VPA_bulk'] + + +.. code:: ipython3 + + cdata_subset.uns["allele_counts"] + + + + +.. raw:: html + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
guideallelerep1_bulkrep2_bulkrep3_VPA_bulkrep4_VPA_bulk
14979CONTROL_10_g1-4:5:+:A>G,0:9:+:A>G8130
14980CONTROL_10_g1-7:2:+:C>T00010
14981CONTROL_10_g1-4:5:+:A>G2922925
14982CONTROL_10_g11:10:+:A>G0641
14983CONTROL_10_g1-4:5:+:A>G,1:10:+:A>G111512
.....................
22837CONTROL_1_g5-13:0:+:A>-,-12:1:+:C>T,-9:4:+:C>G,-8:5:+:C>T,...0000
22838CONTROL_1_g5-6:7:+:A>C,7:20:+:A>G0000
22839CONTROL_1_g5-13:0:+:A>G,-10:3:+:T>G,0:13:+:A>G,7:20:+:A>G0000
22840CONTROL_1_g50:13:+:A>T0000
22841CONTROL_1_g50:13:+:A>G,18:31:+:G>A0000
+

1080 rows × 6 columns

+
+ + + +LFC calculation & Addition +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code:: ipython3 + + cdata1 = br.read_h5ad("/data/pinello/PROJECTS/2021_08_ANBE/data/072121_ABE_topbot/bean_counts/LDLvar/032422_crispresso/bean_count_072121_ABE_topbot_LDLvar.h5ad") + cdata2 = br.read_h5ad("/data/pinello/PROJECTS/2021_08_ANBE/data/102121_ABE_topbot/bean_counts/LDLvar/032422_crispresso/bean_count_102121_ABE_topbot_LDLvar.h5ad") + + +.. code:: ipython3 + + cdata1.samples["sort"] = cdata1.samples["index"].map(lambda s: s.rsplit("_", 1)[-1]) + cdata1.samples["replicate"] = cdata1.samples["index"].map(lambda s: s.rsplit("_", 1)[0]) + cdata2.samples["sort"] = cdata2.samples["index"].map(lambda s: s.rsplit("_", 1)[-1]) + cdata2.samples["replicate"] = cdata2.samples["index"].map(lambda s: s.rsplit("_", 1)[0]) + +.. code:: ipython3 + + cdata1.log_norm() + lfc1 = cdata1.log_fold_change_reps("bot", "top") + cdata2.log_norm() + lfc2 = cdata2.log_fold_change_reps("bot", "top") + lfcs = lfc1.join(lfc2, lsuffix = "_1", rsuffix = "_2") + sns.pairplot(lfcs) + + +.. image:: ../imgs/output_20_2.png + + +LFC can be aggregated for biological replicates. + +.. code:: ipython3 + + cdata1.log_fold_change_aggregate("bot", "top", aggregate_condit = "replicate") + +.. code:: ipython3 + + cdata1.guides + + + + +.. raw:: html + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
nameUnnamed: 0Target gene/variantTarget descriptorArbitrary numbergRNA position categoryTarget base position in gRNATarget base position in reporterBEGroup...Reporterbarcode5-nt PAMoffsettargettarget_posGroup2masked_sequencemasked_barcodebot_top.lfc.median
0CONTROL_1_g10CONTROLNaN1g1410ABENegCtrl...CCAAGCCCTACGCGGTAGGGAACTTTGGGAGCGTTTGGGAG-10CONTROL_19NegCtrlCCTGCGCGGTGGGGGGCTTTGTTT-0.158787
1CONTROL_1_g21CONTROLNaN1g2511ABENegCtrl...TCCAAGCCCTACGCGGTAGGGAACTTTGGGAGAACATGGGA-11CONTROL_110NegCtrlCCCTGCGCGGTGGGGGGCTTGGCG-0.212254
2CONTROL_1_g32CONTROLNaN1g3512ABENegCtrl...GTCCAAGCCCTACGCGGTAGGGAACTTTGGGACGCTTTGGG-12CONTROL_111NegCtrlCCCTGCGCGGTGGGGGGCTCGCT0.186679
3CONTROL_1_g43CONTROLNaN1g4713ABENegCtrl...CGTCCAAGCCCTACGCGGTAGGGAACTTTGGGTGAGTTTGG-13CONTROL_112NegCtrlGGCCCTGCGCGGTGGGGGGCTGGG-0.022441
4CONTROL_1_g54CONTROLNaN1g5814ABENegCtrl...ACGTCCAAGCCCTACGCGGTAGGGAACTTTGGGTATCTTTG-14CONTROL_113NegCtrlGGGCCCTGCGCGGTGGGGGGGTGT0.457033
..................................................................
3450rs9987289_Maj_ABE_347_g13450rs9987289Maj347g1310ABEVariant...TGCTTGGGCATCAATATCACGTGGAACCAGCCCAGTCCAGC-10rs9987289_Maj_ABE_3479VariantGCGTCGGTGTCGCGTGGGGCGGT-0.418312
3451rs9987289_Maj_ABE_347_g23451rs9987289Maj347g2411ABEVariant...ATGCTTGGGCATCAATATCACGTGGAACCAGCTCGCACCAG-11rs9987289_Maj_ABE_34710VariantGGCGTCGGTGTCGCGTGGGTCGC-0.084936
3452rs9987289_Maj_ABE_347_g33452rs9987289Maj347g3612ABEVariant...GATGCTTGGGCATCAATATCACGTGGAACCAGGCACAACCA-12rs9987289_Maj_ABE_34711VariantTGGGCGTCGGTGTCGCGTGGGCGC-0.339419
3453rs9987289_Maj_ABE_347_g43453rs9987289Maj347g4713ABEVariant...AGATGCTTGGGCATCAATATCACGTGGAACCATTGCGAACC-13rs9987289_Maj_ABE_34712VariantTTGGGCGTCGGTGTCGCGTGTTGC-0.517138
3454rs9987289_Maj_ABE_347_g53454rs9987289Maj347g5814ABEVariant...TAGATGCTTGGGCATCAATATCACGTGGAACCGCGAGGAAC-14rs9987289_Maj_ABE_34713VariantCTTGGGCGTCGGTGTCGCGTGCGG0.002245
+

3455 rows × 21 columns

+
+ + + +Technical replicates show decent LFC correlation. + +.. code:: ipython3 + + cdata = cdata1 + cdata2 + + +.. code:: ipython3 + + cdata + + + + + +.. parsed-literal:: + + Genome Editing Screen comprised of n_guides x n_conditions = 3455 x 12 + guides: 'name', 'Unnamed: 0', 'Target gene/variant', 'Target descriptor', 'Arbitrary number', 'gRNA position category', 'Target base position in gRNA', 'Target base position in reporter', 'BE', 'Group', 'sequence', 'Reporter', 'barcode', '5-nt PAM', 'offset', 'target', 'target_pos', 'Group2', 'masked_sequence', 'masked_barcode', 'bot_top.lfc.median' + samples: 'index', 'sort', 'replicate' + condit_m: + condit_p: + layers: 'edits', 'X_bcmatch' + uns: 'allele_counts' + + + +You can concatenate different samples with shared guides. + +.. code:: ipython3 + + br.concat((cdata1, cdata2)) + + +.. parsed-literal:: + + Genome Editing Screen comprised of n_guides x n_conditions = 3455 x 24 + guides: 'name', 'Unnamed: 0', 'Target gene/variant', 'Target descriptor', 'Arbitrary number', 'gRNA position category', 'Target base position in gRNA', 'Target base position in reporter', 'BE', 'Group', 'sequence', 'Reporter', 'barcode', '5-nt PAM', 'offset', 'target', 'target_pos', 'Group2', 'masked_sequence', 'masked_barcode', 'bot_top.lfc.median' + samples: 'index', 'sort', 'replicate' + condit_m: + condit_p: + layers: 'X', 'X_bcmatch', 'edits', 'lognorm_counts', 'lognorm_edits' + uns: 'allele_counts' + + + +Getting edit rates from allele counts +------------------------------------- + +.. code:: ipython3 + + cdata.get_edit_rate(normalize_by_editable_base = False, + edited_base = "A", + editable_base_start = 3, + editable_base_end = 8, + bcmatch_thres = 10, + prior_weight = 1) + + +.. code:: ipython3 + + cdata.uns["edit_counts"] = cdata.get_edit_from_allele() + + + +.. code:: ipython3 + + cdata.get_edit_mat_from_uns("A", "G", match_target_position = True) + cdata.get_edit_rate(edited_base = "A", bcmatch_thres = 10) + plt.hist(cdata.guides.edit_rate, bins=30) + plt.show() + + +.. image:: ../imgs/output_34_1.png + + + +Calculating LFC +~~~~~~~~~~~~~~~ + +.. code:: ipython3 + + cdata.log_norm() + cdata.log_fold_change_aggregate("bot", "top", aggregate_condit = "replicate") + +.. code:: ipython3 + + cdata.guides + + + + +.. raw:: html + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
nameUnnamed: 0Target gene/variantTarget descriptorArbitrary numbergRNA position categoryTarget base position in gRNATarget base position in reporterBEGroup...barcode5-nt PAMoffsettargettarget_posGroup2masked_sequencemasked_barcodebot_top.lfc.medianedit_rate
0CONTROL_1_g10CONTROLNaN1g1410ABENegCtrl...GTTTGGGAG-10CONTROL_19NegCtrlCCTGCGCGGTGGGGGGCTTTGTTT-0.1355500.531163
1CONTROL_1_g21CONTROLNaN1g2511ABENegCtrl...AACATGGGA-11CONTROL_110NegCtrlCCCTGCGCGGTGGGGGGCTTGGCG-0.0593910.640765
2CONTROL_1_g32CONTROLNaN1g3512ABENegCtrl...CGCTTTGGG-12CONTROL_111NegCtrlCCCTGCGCGGTGGGGGGCTCGCT0.1412900.417709
3CONTROL_1_g43CONTROLNaN1g4713ABENegCtrl...TGAGTTTGG-13CONTROL_112NegCtrlGGCCCTGCGCGGTGGGGGGCTGGG-0.0723580.126400
4CONTROL_1_g54CONTROLNaN1g5814ABENegCtrl...GTATCTTTG-14CONTROL_113NegCtrlGGGCCCTGCGCGGTGGGGGGGTGT0.2696500.201104
..................................................................
3450rs9987289_Maj_ABE_347_g13450rs9987289Maj347g1310ABEVariant...CAGTCCAGC-10rs9987289_Maj_ABE_3479VariantGCGTCGGTGTCGCGTGGGGCGGT-0.2302640.087379
3451rs9987289_Maj_ABE_347_g23451rs9987289Maj347g2411ABEVariant...TCGCACCAG-11rs9987289_Maj_ABE_34710VariantGGCGTCGGTGTCGCGTGGGTCGC-0.1821510.299923
3452rs9987289_Maj_ABE_347_g33452rs9987289Maj347g3612ABEVariant...GCACAACCA-12rs9987289_Maj_ABE_34711VariantTGGGCGTCGGTGTCGCGTGGGCGC-0.1657780.224973
3453rs9987289_Maj_ABE_347_g43453rs9987289Maj347g4713ABEVariant...TTGCGAACC-13rs9987289_Maj_ABE_34712VariantTTGGGCGTCGGTGTCGCGTGTTGC-0.3405900.265378
3454rs9987289_Maj_ABE_347_g53454rs9987289Maj347g5814ABEVariant...GCGAGGAAC-14rs9987289_Maj_ABE_34713VariantCTTGGGCGTCGGTGTCGCGTGCGG0.0343650.266573
+

3455 rows × 22 columns

+
+ + + +Allele translation +~~~~~~~~~~~~~~~~~~ + +.. code:: ipython3 + + cdata_tiling = br.read_h5ad("../../072121_ABE_topbot/bean_counts/LDLRCDS/032422_crispresso/bean_count_072121_ABE_topbot_LDLRCDS.h5ad") + + +.. code:: ipython3 + + cdata_tiling.uns["allele_counts"].allele + + + + + + +.. parsed-literal:: + + 0 11224415:14:+:A>G + 1 11224401:0:+:A>G,11224415:14:+:A>G + 2 11224410:9:+:A>G,11224415:14:+:A>G + 3 11224401:0:+:A>G,11224402:1:+:A>G,11224410:9:+... + 4 11224401:0:+:A>G + ... + 438001 11203000:4:+:A>G,11203002:6:+:A>G,11203006:10:... + 438002 11224074:0:+:A>G,11224086:12:+:A>G,11224092:18... + 438003 0:0:+:A>G,3:3:+:A>G,11:11:+:A>G,13:13:+:A>G,17... + 438004 11217409:23:+:G>-,11217417:31:+:->C + 438005 11226735:30:-:A>G,11226742:23:-:A>G,11226747:1... + Name: allele, Length: 438006, dtype: object + + + +Writing +~~~~~~~ + +.. code:: ipython3 + + cdata.to_Excel("tmp.xlsx") + + +.. parsed-literal:: + + Writing to: tmp.xlsx + + Sheet 1: X + Sheet 2: edits + Sheet 3: X_bcmatch + Sheet 4: lognorm_counts + Sheet 5: lognorm_edits + Sheet 6: guides + Sheet 7: samples + Sheet 8: screen.uns.allele_counts + Sheet 9: screen.uns.edit_counts + + +.. code:: ipython3 + + cdata.to_mageck_input("mageck_input.txt", target_column='target') + +.. code:: bash + + %%bash + head mageck_input.txt + + +.. parsed-literal:: + + sgRNA gene 0 1 2 3 4 5 6 7 8 9 10 11 + CONTROL_1_g1 CONTROL_1 171 451 251 422 573 389 456 420 835 435 794 439 + CONTROL_1_g2 CONTROL_1 145 278 257 206 364 273 389 254 527 498 768 195 + CONTROL_1_g3 CONTROL_1 333 835 488 632 898 899 780 713 1189 626 1146 603 + CONTROL_1_g4 CONTROL_1 246 663 387 448 823 595 705 600 921 595 1143 506 + CONTROL_1_g5 CONTROL_1 243 647 434 529 776 451 700 676 1062 611 928 379 + CONTROL_10_g1 CONTROL_10 138 329 229 213 422 292 432 352 409 243 390 274 + CONTROL_10_g2 CONTROL_10 187 468 402 479 643 369 428 469 796 422 787 404 + CONTROL_10_g3 CONTROL_10 57 126 83 131 281 114 184 115 300 106 299 106 + CONTROL_10_g4 CONTROL_10 66 112 120 136 182 128 169 181 256 144 258 179 + diff --git a/docs/_build/_sources/cds.rst.txt b/docs/_build/_sources/cds.rst.txt new file mode 100644 index 0000000..5dabfde --- /dev/null +++ b/docs/_build/_sources/cds.rst.txt @@ -0,0 +1,5 @@ +Coding sequence tiling library +*********************** +.. mdinclude:: tutorials/ldl_cds.md + +See :ref:`subcommands` for the full details. diff --git a/docs/_build/_sources/commands/count.md.txt b/docs/_build/_sources/commands/count.md.txt new file mode 100644 index 0000000..5f04b4c --- /dev/null +++ b/docs/_build/_sources/commands/count.md.txt @@ -0,0 +1,31 @@ +# `bean count[-samples]`: Count (reporter) screen data +`bean count-samples` (or `bean count` for a single sample) maps guide into guide counts, **allowing for base transition in spacer sequence**. When the matched reporter information is provided, it can count the **target site edits** and **alleles produced by each guide**. Mapping is efficiently done based on [CRISPResso2](https://github.com/pinellolab/CRISPResso2) modified for base-edit-aware mapping. + + + +```python +bean count-samples \ + --input sample_list.csv `# sample with lines 'R1_filepath,R2_filepath,sample_name\n'` \ + -b A `# base that is being edited (A/G)` \ + -f sgRNA_info_table.csv `# sgRNA information` \ + -o . `# output directory` \ + -r `# read edit/allele information from reporter` \ + -t 12 `# number of threads` \ + --name my_sorting_screen `# name of this sample run` \ +``` +```python +bean count --R1 R1.fq --R2 R2.fq -b A -f sgRNA_info_table.csv -r +``` +By default, `bean count[-samples]` assume R1 and R2 are trimmed off of the adapter sequence. You may need to adjust the command arguments according to your read structure. + + Read structuren + +See full detail [below](#full-parameters). + +# Input file format +See :ref:`input` for input file formats. + +# Output file format +`count` or `count-samples` produces `.h5ad` and `.xlsx` file with guide and per-guide allele counts. +* `.h5ad`: This output file follows annotated matrix format compatible with `AnnData` and is based on `Screen` object in [purturb_tools](https://github.com/pinellolab/perturb-tools). See [Data Structure](#data-structure) section for more information. +* `.xlsx`: This output file contains `.guides`, `.samples`, `.X[_bcmatch,_edits]`. (`allele_tables` are often too large to write into an Excel!) diff --git a/docs/_build/_sources/commands/create-screen.md.txt b/docs/_build/_sources/commands/create-screen.md.txt new file mode 100644 index 0000000..d9200b7 --- /dev/null +++ b/docs/_build/_sources/commands/create-screen.md.txt @@ -0,0 +1,9 @@ +# `bean create-screen`: Create ReporterScreen object from flat files +```bash +bean create-screen gRNA_library.csv sample_list.csv gRNA_counts_table.csv +``` +## Input + * gRNA_library.csv + * sample_list.csv + * gRNA_counts_table.csv: Table with gRNA ID in the first column and sample IDs as the column names (first row) +`gRNA_library.csv` and `sample_list.csv` should be formatted as :ref:`input`. \ No newline at end of file diff --git a/docs/_build/_sources/commands/filter.md.txt b/docs/_build/_sources/commands/filter.md.txt new file mode 100644 index 0000000..53d6ec1 --- /dev/null +++ b/docs/_build/_sources/commands/filter.md.txt @@ -0,0 +1,37 @@ +# `filter`: Filtering (and optionally translating) alleles +As `tiling` mode of `bean run` accounts for any robustly observed alleles, `bean filter` filters for such alleles. +```bash +bean filter my_sorting_screen_masked.h5ad \ +-o my_sorting_screen_filtered.h5ad `# Output file path` \ +``` + +# Output +Above command produces +* `my_sorting_screen_filtered.h5ad` with filtered alleles stored in `.uns`, +* `my_sorting_screen_filtered.filtered_allele_stats.pdf`, and `my_sorting_screen_filtered.filter_log.txt` that report allele count stats in each filtering step. + +You may want to adjust the flitering parameters to obtain optimal balance between # guides per variant & # variants that are scored. See example outputs of filtering step [here](docs/example_filtering_output/). + + +# Translating alleles +If you want to obtain **amino acid level variant** for coding sequence tiling screens, provide coding sequence positions which variants occuring within the coding sequence will be translated. *This is optional, but **highly recommended** to increase per-(coding)variant support.* + +Allele translation + + +```bash +bean filter my_sorting_screen.h5ad \ +-o my_sorting_screen_masked.h5ad \ +--translate `# Translate coding variants` \ +[ --translate-gene-name GENE_SYMBOL OR + --translate-genes-list path_to_gene_names_file.txt OR + --translate-fasta gene_exon.fa, OR + --translate-fastas-csv gene_exon_fas.csv] +``` +* When library covers a single gene, do either of the following: + 1. Feed `--translate-gene-name GENE_SYMBOL` if your `genomic_pos` column of `sgRNA_info_tbl` is compatible with [MANE transcript](https://useast.ensembl.org/info/genome/genebuild/mane.html)'s reference genome. (Per 10/23/2023, GRCh38). This will automatically load the exon positions based on MANE transcript annotation. + 2. To use your custom coding sequence and exon positions, feed `--translate-fasta gene_exon.fa` argument where `gene_exon.fa` is the FASTA file with entries of exons. [See full details here](docs/exon_fa_format.md). +* When library covers multiple genes, do either of the following: + 1. Feed `--translate-genes-list path_to_gene_names_file.txt` where `path_to_gene_names_file.txt` is file with one gene symbol per line. + 2. Feed `--translate-fastas-csv gene_exon_fas.csv` where `gene_exon_fas.csv` is the csv file with lines `gene_id,gene_exon_fasta_path` without header. Each FASTA file in `gene_exon_fasta_path` is formatted [as the single-gene FASTA file](docs/exon_fa_format.md). +* Translation will keep the variants outside the coding sequence as nucleotide-level variants, while aggregating variants leading to the same coding sequence variants. diff --git a/docs/_build/_sources/commands/input.md.txt b/docs/_build/_sources/commands/input.md.txt new file mode 100644 index 0000000..9fa1de9 --- /dev/null +++ b/docs/_build/_sources/commands/input.md.txt @@ -0,0 +1,37 @@ +This document describes the input files of :ref:`count_samples`. +## sgRNA_info_table.csv +File should contain following columns. +* `name`: gRNA ID column +* `sequence`: gRNA sequence +* `barcode`: R2 barcode to help match reporter to gRNA, written in the sense direction (as in R1) +* In order to use accessibility in the [variant effect quantification](#bean-run-quantify-variant-effects), provide accessibility information in one of two options. (For non-targeting guides, provide NA values (empty cell).) + * Option 1: `chrom` & `genomic_pos`: Chromosome (ex. `chr19`) and genomic position of guide sequence. You will have to provide the path to the bigwig file with matching reference version in `bean run`. + * Option 2: `accessibility_signal`: ATAC-seq signal value of the target loci of each guide. +* For variant library (gRNAs are designed to target specific variants and ignores bystander edits) + * `target`: This column denotes which target variant/element of each gRNA. This is not used in `bean count[-samples]` but required to run `bean run` in later steps. + * `target_group`: If negative/positive control gRNA will be considered in `bean qc` and/or `bean run`, specify as "NegCtrl"/"PosCtrl" in this column. + * `target_pos`: If `--match_target_pos` flag is used, input file needs `target_pos` which specifies 0-based relative position of targeted base within Reporter sequence. +* For tiling library (gRNAs tile coding / noncoding sequences) + * `strand`: Specifies gRNA strand information relative to the reference genome. + * `chrom`: Chromosome of gRNA targeted locus. + * `start_pos`: gRNA starting position in the genome. Required when you provide `strand` column. Should specify the smaller coordinate value among start and end position regardless of gRNA strandedness. + +Also see examples for [variant library](tests/data/test_guide_info.csv) and [tiling library](tests/data/test_guide_info_tiling.csv). + +## sample_list.csv +File should contain following columns with header. +* `R1_filepath`: Path to read 1 `.fastq[.gz]` file +* `R2_filepath`: Path to read 1 `.fastq[.gz]` file +* `sample_id`: ID of sequencing sample +* `replicate`: Replicate # of this sample (Should NOT contain `.`) +* `condition`: Name of the sorting bin (ex. `top`, `bot`), or label of timepoint (ex. `D5`, `D18`) + +For FACS sorting screens: +* `upper_quantile`: FACS sorting upper quantile +* `lower_quantile`: FACS sorting lower quantile + +For proliferation / survival screens: +* `time`: Numeric time following the base editing of each sample. + + +Also see examples for [FACS sorting screen](tests/data/sample_list.csv). \ No newline at end of file diff --git a/docs/_build/_sources/commands/profile.md.txt b/docs/_build/_sources/commands/profile.md.txt new file mode 100644 index 0000000..bbe8a8c --- /dev/null +++ b/docs/_build/_sources/commands/profile.md.txt @@ -0,0 +1,8 @@ +# `bean profile`: Profile editing patterns +```bash +bean profile my_sorting_screen.h5ad -o output_prefix `# Prefix for editing profile report` +``` +# Output +Above command produces `prefix_editing_preference.[html,ipynb]` as editing preferences ([see example](../../notebooks/profile_editing_preference.ipynb)). + +Allele translation \ No newline at end of file diff --git a/docs/_build/_sources/commands/qc.md.txt b/docs/_build/_sources/commands/qc.md.txt new file mode 100644 index 0000000..82b719e --- /dev/null +++ b/docs/_build/_sources/commands/qc.md.txt @@ -0,0 +1,94 @@ +# `bean qc`: QC of reporter screen data +```bash +bean qc \ + my_sorting_screen.h5ad `# Input ReporterScreen .h5ad file path` \ + -o my_sorting_screen_masked.h5ad `# Output ReporterScreen .h5ad file path` \ + -r qc_report_my_sorting_screen `# Prefix for QC report` \ + --ctrl-cond presort `# "condition" column in the control sample before selection. Mean gRNA editing rates in these samples are reported. ` \ +# Inspect the output qc_report_my_sorting_screen.html to tweak QC threshold + +bean qc \ + my_sorting_screen.h5ad \ + -o my_sorting_screen_masked.h5ad \ + -r qc_report_my_sorting_screen \ + #[--count-correlation-thres 0.7 ...]\ + -b +``` + +`bean qc` supports following quality control and masks samples with low quality. Specifically: + +Allele translation + +* Plots guide coverage and the uniformity of coverage +* Guide count correlation between samples +* Log fold change correlation when positive controls are provided +* Plots editing rate distribution +* Identify samples with low guide coverage/guide count correlation/editing rate and mask the sample in `bdata.samples.mask` +* Identify outlier guides to filter out + +# Output +Above command produces +* `my_sorting_screen_masked.h5ad` without problematic replicate and guides and with sample masks, and +* `qc_report_my_sorting_screen.[html,ipynb]` as QC report. +##### Optional arguments: +* `-o OUT_SCREEN_PATH`, `--out-screen-path OUT_SCREEN_PATH` + Path where quality-filtered ReporterScreen object to be written to +* `-r OUT_REPORT_PREFIX`, `--out-report-prefix OUT_REPORT_PREFIX` + Output prefix of qc report (prefix.html, prefix.ipynb) + +##### QC thresholds: +* `--count-correlation-thres COUNT_CORRELATION_THRES` + Correlation threshold to mask out. +* `--edit-rate-thres EDIT_RATE_THRES` + Mean editing rate threshold per sample to mask out. +* `--lfc-thres LFC_THRES` + Positive guides' correlation threshold to filter out. + +##### Run options: +* `-b`, `--remove-bad-replicates` + Remove replicates with at least two of its samples meet the QC threshold (bean run does not support having only one sorting bin sample for a replicate). +* `-i`, `--ignore-missing-samples` + If the flag is not provided, if the ReporterScreen object does not contain all condiitons for + each replicate, make fake empty samples. If the flag is provided, don't add dummy samples. +* `--no-editing` Ignore QC about editing. Can be used for QC of other editing modalities. +* `--dont-recalculate-edits` + When ReporterScreen.layers['edit_count'] exists, do not recalculate the edit counts from + ReporterScreen.uns['allele_count']. + +##### Input `.h5ad` formatting: +Note that these arguements will change the way the QC metrics are calculated for guides, samples, or replicates. +* `--tiling TILING` Specify that the guide library is tiling library without 'n guides per target' design +* `--replicate-label REPLICATE_LABEL` + Label of column in `bdata.samples` that describes replicate ID. +* `--sample-covariates SAMPLE_COVARIATES` + Comma-separated list of column names in `bdata.samples` that describes non-selective + experimental condition. (drug treatment, etc.) +* `--condition-label CONDITION_LABEL` + Label of column in `bdata.samples` that describes experimental condition. (sorting bin, time, + etc.) +###### Editing rate calculation + * `--control-condition CTRL_COND` + Values in of column in `ReporterScreen.samples[condition_label]` for guide-level editing rate + to be calculated. Default is `None`, which considers all samples. + * `--rel-pos-is-reporter` + Specifies whether `edit_start_pos` and `edit_end_pos` are relative to reporter position. If + `False`, those are relative to spacer position. + Editing rate is calculated with following parameters in + * Variant screens: + * `--target-pos-col TARGET_POS_COL` + Target position column in `bdata.guides` specifying target edit position in reporter + * tiling screens: + * `--edit-start-pos EDIT_START_POS` + Edit start position to quantify editing rate on, 0-based inclusive. + * `--edit-end-pos EDIT_END_POS` + Edit end position to quantify editing rate on, 0-based exclusive. +###### LFC of positive controls + * `--posctrl-col POSCTRL_COL` + Column name in ReporterScreen.guides DataFrame that specifies guide category. To use all + gRNAs, feed empty string ''. + * `--posctrl-val POSCTRL_VAL` + Value in ReporterScreen.guides[`posctrl_col`] that specifies guide will be used as the + positive control in calculating log fold change. + * `--lfc-conds LFC_CONDS` + Values in of column in `ReporterScreen.samples[condition_label]` for LFC will be calculated + between, delimited by comma \ No newline at end of file diff --git a/docs/_build/_sources/commands/run.md.txt b/docs/_build/_sources/commands/run.md.txt new file mode 100644 index 0000000..4c45259 --- /dev/null +++ b/docs/_build/_sources/commands/run.md.txt @@ -0,0 +1,64 @@ +# `bean run`: Quantify variant effects +BEAN uses Bayesian network to incorporate gRNA editing outcome to provide posterior estimate of variant phenotype. The Bayesian network reflects data generation process. Briefly, +1. Cellular phenotype (either for cells are sorted upon for sorting screen, or log(proliferation rate)) is modeled as the Gaussian mixture distribution of wild-type phenotype and variant phenotype. +2. The weight of the mixture components are inferred from the reporter editing outcome and the chromatin accessibility of the loci. +3. Cells with each gRNA, formulated as the mixture distribution, is sorted by the phenotypic quantile to produce the gRNA counts. + +For the full detail, see the method section of the [BEAN manuscript](https://www.medrxiv.org/content/10.1101/2023.09.08.23295253v1). + +model + +

+ +# Usage example +```bash +bean run sorting[survival] variant[tiling] my_sorting_screen_filtered.h5ad \ +[--uniform-edit, --scale-by-acc [--acc-bw-path accessibility_signal.bw, --acc-col accessibility]] \ +-o output_prefix/ \ +--fit-negctrl +``` +See full list of parameters [below](#full-parameters). + + +# Input +`my_sorting_screen_filtered.h5ad` can be produced by one of the following: +1. [`bean count-samples`]((#bean-count-samples-count-reporter-screen-data)) when you have raw `.fastq` file +2. (Limited to `bean run variant` mode) `bean create-screen` when you have flat `.csv` tables of gRNA metadata table, sample metadata table, gRNA counts table (# guides x # samples), and optionally # edits table. + ```bash + bean create-screen gRNA_info_table.csv sample_info_table.csv gRNA_counts_table.csv \ + [--edits edit_counts_table.csv -o output.h5ad] + ``` + * `gRNA_info_table.csv` should have following columns. + * `name`: gRNA ID column + * `target`: This column denotes which target variant/element of each gRNA. + * `target_group [Optional]`: If negative control gRNA will be used, specify as "NegCtrl" in this column. + * `sample_info_table.csv` should have following columns. + * `sample_id`: ID of sequencing sample + * `replicate`: Replicate # of this sample + * `bin`: Name of the sorting bin + * `upper_quantile`: FACS sorting upper quantile + * `lower_quantile`: FACS sorting lower quantile + * `gRNA_counts_table.csv` should be formatted as follows. + * Columns include one of `sample_id` columns in `sample_info_table.csv` file. + * 1st row (row index) follows `name` (gRNA ID) in `gRNA_info_table.csv` file. +3. You can manually create the `AnnData` object with more annotations including allele counts: see [API tutorial](#using-bean-as-python-module) for full detail. + + +# Output +model + +Above command produces +* `output_prefix/bean_element_result.[model_type].csv` with following columns: + * Estimated variant effect sizes + * `mu` (Effect size): Mean of variant phenotype, given the wild type has standard normal phenotype distribution of `mu = 0, sd = 1`. + * `mu_sd`: Mean of variant phenotype `mu` is modeled as normal distribution. The column shows fitted standard deviation of `mu` that quantify the uncertainty of the variant effect. + * `mu_z`: z-score of `mu` + * `sd`: Standard deviation of variant phenotype, given the wild type has standard normal phenotype distribution of `mu = 0, sd = 1`. + * `CI[0.025`, `0.975]`: Credible interval of `mu` + * When negative control is provided, above columns with `_adj` suffix are provided, which are the corresponding values adjusted for negative control. + * Metrics on per-variant evidence provided in input (provided in `tiling` mode) + * `effective_edit_rate`: Sum of per-variant editing rates over all alleles observed in the input. Allele-level editing rate is divided by the number of variants observed in the allele prior to summing up. + * `n_guides`: # of guides covering the variant. + * `n_coocc`: # of cooccurring variants with a given variant in any alleles observed in the input. +* `output_prefix/bean_sgRNA_result.[model_type].csv`: + * `edit_rate`: Estimated editing rate at the target loci. diff --git a/docs/_build/_sources/count.rst.txt b/docs/_build/_sources/count.rst.txt new file mode 100644 index 0000000..206dbd2 --- /dev/null +++ b/docs/_build/_sources/count.rst.txt @@ -0,0 +1,10 @@ +`bean count` +*********************** +.. mdinclude:: commands/count.md + +Full parameters +================== +.. argparse:: + :filename: ../bean/mapping/utils.py + :func: get_input_parser_count + :prog: bean count \ No newline at end of file diff --git a/docs/_build/_sources/count_samples.rst.txt b/docs/_build/_sources/count_samples.rst.txt new file mode 100644 index 0000000..fbbf8f1 --- /dev/null +++ b/docs/_build/_sources/count_samples.rst.txt @@ -0,0 +1,11 @@ +.. _count_samples: +`bean count-samples` +*********************** +.. mdinclude:: commands/count.md + +Full parameters +================== +.. argparse:: + :filename: ../bean/mapping/utils.py + :func: get_input_parser + :prog: bean count-samples \ No newline at end of file diff --git a/docs/_build/_sources/exon_fa_format.md.txt b/docs/_build/_sources/exon_fa_format.md.txt new file mode 100644 index 0000000..2498b39 --- /dev/null +++ b/docs/_build/_sources/exon_fa_format.md.txt @@ -0,0 +1,8 @@ +# Input .fa file format for `bean-filter` +You can provide custom FASTA file with exon sequence entries. Currently only supports positive strand genes. + +* Exon FASTA files can be downloaded from UCSC Genomic sequences / Table Browser: [see the instruction video](https://www.youtube.com/watch?v=T4E0Ez5Vjz8) +* You can manually format as: + * Header line has ` range=chrom:start-end ` and `strand=+/-` tag that is parsed. + * fasta entry has the sequence of exons, where the first (includes 5'-UTR) and last (includes 3'-UTR) exon sequence has lower-case sequence denoting noncoding sequences. +* See the example .fa [here](../tests/data/ldlr_exons.fa). \ No newline at end of file diff --git a/docs/_build/_sources/filter.rst.txt b/docs/_build/_sources/filter.rst.txt new file mode 100644 index 0000000..ce485ab --- /dev/null +++ b/docs/_build/_sources/filter.rst.txt @@ -0,0 +1,11 @@ +.. _filter: +`bean filter` +*********************** +.. mdinclude:: commands/filter.md + +Full parameters +================== +.. argparse:: + :filename: ../bean/annotate/utils.py + :func: parse_args + :prog: bean filter \ No newline at end of file diff --git a/docs/_build/_sources/gwas.rst.txt b/docs/_build/_sources/gwas.rst.txt new file mode 100644 index 0000000..e407ad9 --- /dev/null +++ b/docs/_build/_sources/gwas.rst.txt @@ -0,0 +1,5 @@ +GWAS variant library +*********************** +.. mdinclude:: tutorials/ldl_var.md + +See :ref:`subcommands` for the full details. diff --git a/docs/_build/_sources/index.md.txt b/docs/_build/_sources/index.md.txt new file mode 100644 index 0000000..45e2414 --- /dev/null +++ b/docs/_build/_sources/index.md.txt @@ -0,0 +1,4 @@ +--- +layout: default +title: CRISPR-BEAN +--- diff --git a/docs/_build/_sources/index_.rst.txt b/docs/_build/_sources/index_.rst.txt new file mode 100644 index 0000000..d78bc9a --- /dev/null +++ b/docs/_build/_sources/index_.rst.txt @@ -0,0 +1,38 @@ +.. bean documentation master file, created by + sphinx-quickstart on Fri Mar 29 19:10:46 2024. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to `bean`'s documentation! +================================ +=================== +Workflows +=================== +.. toctree:: + :maxdepth: 2 + + gwas + cds + input + +=================== +`bean` subcommands +=================== +.. toctree:: + :maxdepth: 3 + + subcommands + +=================== +Screen data structure +=================== +.. toctree:: + ReporterScreen_api + +================== +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/docs/_build/_sources/input.rst.txt b/docs/_build/_sources/input.rst.txt new file mode 100644 index 0000000..d63417e --- /dev/null +++ b/docs/_build/_sources/input.rst.txt @@ -0,0 +1,4 @@ +.. _input: +Input file format +*********************** +.. mdinclude:: commands/input.md \ No newline at end of file diff --git a/docs/_build/_sources/profile.rst.txt b/docs/_build/_sources/profile.rst.txt new file mode 100644 index 0000000..3ee525f --- /dev/null +++ b/docs/_build/_sources/profile.rst.txt @@ -0,0 +1,10 @@ +`bean profile` +*********************** +.. mdinclude:: commands/profile.md + +Full parameters +================== +.. argparse:: + :filename: ../bean/plotting/utils.py + :func: parse_args + :prog: bean profile \ No newline at end of file diff --git a/docs/_build/_sources/qc.rst.txt b/docs/_build/_sources/qc.rst.txt new file mode 100644 index 0000000..604e77a --- /dev/null +++ b/docs/_build/_sources/qc.rst.txt @@ -0,0 +1,11 @@ +.. _qc: +`bean qc` +*********************** +.. mdinclude:: commands/qc.md + +Full parameters +================== +.. argparse:: + :filename: ../bean/qc/parser.py + :func: parse_args + :prog: bean qc \ No newline at end of file diff --git a/docs/_build/_sources/run.rst.txt b/docs/_build/_sources/run.rst.txt new file mode 100644 index 0000000..a6dc1fc --- /dev/null +++ b/docs/_build/_sources/run.rst.txt @@ -0,0 +1,11 @@ +.. _run: +`bean run` +*********************** +.. mdinclude:: commands/run.md + +Full parameters +================== +.. argparse:: + :filename: ../bean/model/parser.py + :func: parse_args + :prog: bean run \ No newline at end of file diff --git a/docs/_build/_sources/subcommands.rst.txt b/docs/_build/_sources/subcommands.rst.txt new file mode 100644 index 0000000..56319ca --- /dev/null +++ b/docs/_build/_sources/subcommands.rst.txt @@ -0,0 +1,14 @@ +.. _subcommands: +=================== +Subcommands +=================== +.. toctree:: + :maxdepth: 2 + + count + count_samples + profile + qc + filter + run + create_screen \ No newline at end of file diff --git a/docs/_build/_sources/tutorials/ldl_cds.md.txt b/docs/_build/_sources/tutorials/ldl_cds.md.txt new file mode 100644 index 0000000..ba78a00 --- /dev/null +++ b/docs/_build/_sources/tutorials/ldl_cds.md.txt @@ -0,0 +1,148 @@ +# Tiling sorting screen tutorial +Tiling screen that tiles gRNA densely across locus or multiple loci, selected based on FACS signal quantiles. + + + + + + + + + + +
Library designTiling (gRNAs tile each locus densely)
tiling library design
SelectionCells are sorted based on FACS signal quantiles
variant library design
+ +

+ +## Example workflow +```bash +screen_id=my_sorting_tiling_screen + +# 1. Count gRNA & reporter +bean-count-samples \ +--input tests/data/sample_list_tiling.csv `# Contains fastq file path; see test file for example.`\ +-b A `# Base A is edited (into G)` \ +-f tests/data/test_guide_info_tiling_chrom.csv `# Contains gRNA metadata; see test file for example.`\ +-o ./ `# Output directory` \ +-r `# Quantify reporter edits` \ +-n ${screen_id} `# ID of the screen` \ +--tiling + +# 2. QC samples & guides +bean-qc \ + bean_count_${screen_id}.h5ad `# Input ReporterScreen .h5ad file path` \ + -o bean_count_${screen_id}_masked.h5ad `# Output ReporterScreen .h5ad file path` \ + -r qc_report_${screen_id} `# Prefix for QC report` \ + +# 3. Filter & translate alleles +bean-filter ./bean_count_${screen_id}_masked.h5ad \ +-o ./bean_count_${screen_id}_alleleFiltered \ +--filter-target-basechange `# Filter based on intended base changes. If -b A was provided in bean-count, filters for A>G edit. If -b C was provided, filters for C>T edit.`\ +--filter-window --edit-start-pos 0 --edit-end-pos 19 `# Filter based on editing window in spacer position within reporter.`\ +--filter-allele-proportion 0.1 --filter-sample-proportion 0.3 `#Filter based on allele proportion larger than 0.1 in at least 0.3 (30%) of the control samples.` \ +--translate --translate-genes-list tests/data/gene_symbols.txt + +# 4. Quantify variant effect +bean-run sorting tiling \ + ./bean_count_${screen_id}_alleleFiltered.h5ad \ + -o tests/test_res/var/ \ + --fit-negctrl \ + --scale-by-acc \ + --accessibility-col accessibility +``` +See more details below. + +## 1. Count gRNA & reporter (:ref:`count_samples`) +``` +screen_id=my_sorting_tiling_screen + +bean-count-samples \ +--input tests/data/sample_list_tiling.csv `# Contains fastq file path; see test file for example.`\ +-b A `# Base A is edited (into G)` \ +-f tests/data/test_guide_info_tiling_chrom.csv `# Contains gRNA metadata; see test file for example.`\ +-o ./ `# Output directory` \ +-r `# Quantify reporter edits` \ +-n ${screen_id} `# ID of the screen` \ +--tiling +``` +Make sure you follow the [input file format](../../README#input-file-format) for seamless downstream steps. This will produce `./bean_count_${screen_id}.h5ad`. + +## 2. QC (:ref:`qc`) +Base editing data will include QC about editing efficiency. As QC uses predefined column names and values, beware to follow the [input file guideline](../../README#input-file-format), but you can change the parameters with the full argument list of [`bean-qc`](../../README#bean-qc-qc-of-reporter-screen-data). (Common factors you may want to tweak is `--ctrl-cond=bulk` and `--lfc-conds=top,bot` if you have different sample condition labels.) +``` +bean-qc \ + bean_count_${screen_id}.h5ad `# Input ReporterScreen .h5ad file path` \ + -o bean_count_${screen_id}_masked.h5ad `# Output ReporterScreen .h5ad file path` \ + -r qc_report_${screen_id} `# Prefix for QC report` \ + [--tiling] `# Not required if you have passed --tiling in counting step` +``` + + + +If the data does not include reporter editing data, you can provide `--no-editing` flag to omit the editing rate QC. + +## 3. Filter alleles (:ref:`filter`) +As tiling library doesn't have designated per-gRNA target variant, any base edit observed in reporter may be the candidate variant, while having too many variants with very low editing rate significantly decreases the power. Variants are filtered based on multiple criteria in `bean-fitler`. + +If the screen targets coding sequence, it's beneficial to translate edits into coding varaints whenever possible for better power. For translation, provide `--translate` and one of the following: +``` +[ --translate-gene-name GENE_SYMBOL OR + --translate-genes-list path_to_gene_names_file.txt OR + --translate-fasta gene_exon.fa, OR + --translate-fastas-csv gene_exon_fas.csv] +``` +where `path_to_gene_names_file.txt` has one gene symbol per line, and gene symbol uses its MANE transcript (hg38) coordinates of exons. In order to use other reference versions or transcript ID, you'll need to feed in fasta file. See detailed formatting of fasta file [here](../../README#translating-alleles). + +Example allele filtering given we're translating based on MANE transcript exons of multiple gene symbols: + +```bash +bean-filter ./bean_count_${screen_id}_masked.h5ad \ +-o ./bean_count_${screen_id}_alleleFiltered \ +--filter-target-basechange `# Filter based on intended base changes. If -b A was provided in bean-count, filters for A>G edit. If -b C was provided, filters for C>T edit.`\ +--filter-window --edit-start-pos 0 --edit-end-pos 19 `# Filter based on editing window in spacer position within reporter.`\ +--filter-allele-proportion 0.1 --filter-sample-proportion 0.3 `#Filter based on allele proportion larger than 0.1 in at least 0.3 (30%) of the control samples.` \ +--translate --translate-genes-list tests/data/gene_symbols.txt +``` + +Ouptut file `` shows number of alleles per guide and number of guides per variant, where we want high enough values for the latter. See the typical output for dataset with good editing coverage & filtering result [here](../example_filtering_ouptut/). + +## 4. Quantify variant effect (:ref:`run`) +By default, `bean-run [sorting,survival] tiling` uses most filtered allele counts table for variant identification and quantification of their effects. **Check [allele filtering output](../example_filtering_ouptut/)** and choose alternative filtered allele counts table if necessary. + +`bean-run` can take 3 run options to quantify editing rate: +1. From **reporter + accessibility** + 1-1. If your gRNA metadata table (`tests/data/test_guide_info.csv` above) included per-gRNA accessibility score, + ``` + bean-run sorting tiling \ + ./bean_count_${screen_id}_alleleFiltered.h5ad \ + -o tests/test_res/var/ \ + --fit-negctrl \ + --scale-by-acc \ + --accessibility-col accessibility + ``` + 1-2. If your gRNA metadata table (`tests/data/test_guide_info.csv` above) included per-gRNA chromosome & position and you have bigWig file with accessibility signal, + ``` + bean-run sorting tiling \ + ./bean_count_${screen_id}_alleleFiltered.h5ad \ + -o tests/test_res/var/ \ + --fit-negctrl \ + --scale-by-acc \ + --accessibility-bw accessibility.bw + ``` + +2. From **reporter** + ``` + bean-run sorting tiling \ + ./bean_count_${screen_id}_alleleFiltered.h5ad \ + -o tests/test_res/var/ \ + --fit-negctrl + ``` +3. No reporter information, assume the same editing efficiency of all gRNAs. + Use this option if your data don't have editing rate information. + ``` + bean-run sorting tiling \ + ./bean_count_${screen_id}_alleleFiltered.h5ad \ + -o tests/test_res/var/ \ + --fit-negctrl \ + --uniform-edit + ``` \ No newline at end of file diff --git a/docs/_build/_sources/tutorials/ldl_var.md.txt b/docs/_build/_sources/tutorials/ldl_var.md.txt new file mode 100644 index 0000000..1542536 --- /dev/null +++ b/docs/_build/_sources/tutorials/ldl_var.md.txt @@ -0,0 +1,116 @@ +# Variant sorting screen tutorial +GWAS variant screen with per-variant gRNA tiling design, selected based on FACS signal quantiles. + + + + + + + + + + +
Library designVariant (gRNAs tile each target variant)
variant library design
SelectionCells are sorted based on FACS signal quantiles
variant library design
+ +

+ +## Example workflow +```bash +screen_id=my_sorting_tiling_screen + +# 1. Count gRNA & reporter +bean-count-samples \ +--input tests/data/sample_list.csv `# Contains fastq file path; see test file for example.`\ +-b A `# Base A is edited (into G)` \ +-f tests/data/test_guide_info.csv `# Contains gRNA metadata; see test file for example.`\ +-o ./ `# Output directory` \ +-r `# Quantify reporter edits` \ +-n ${screen_id} `# ID of the screen to be counted` + +# 2. QC samples & guides +bean-qc \ + bean_count_${screen_id}.h5ad `# Input ReporterScreen .h5ad file path` \ + -o bean_count_${screen_id}_masked.h5ad `# Output ReporterScreen .h5ad file path` \ + -r qc_report_${screen_id} `# Prefix for QC report` \ + -b ` # Remove replicates with no good samples. + +# 3. Quantify variant effect +bean-run sorting variant \ + tests/data/bean_count_${screen_id}_masked.h5ad \ + -o tests/test_res/var/ \ + --fit-negctrl \ + --scale-by-acc \ + --accessibility-col accessibility +``` +See more details below. + +## 1. Count gRNA & reporter (:ref:`count_samples`) +```bash +screen_id=my_sorting_tiling_screen + +# 1. Count gRNA & reporter +bean-count-samples \ +--input tests/data/sample_list.csv `# Contains fastq file path; see test file for example.`\ +-b A `# Base A is edited (into G)` \ +-f tests/data/test_guide_info.csv `# Contains gRNA metadata; see test file for example.`\ +-o ./ `# Output directory` \ +-r `# Quantify reporter edits` \ +-n ${screen_id} `# ID of the screen to be counted` +``` +Make sure you follow the [input file format](../../README#input-file-format) for seamless downstream steps. This will produce `./bean_count_${screen_id}.h5ad`. + +## 2. QC samples & guides (:ref:`qc`) +Base editing data will include QC about editing efficiency. As QC uses predefined column names and values, beware to follow the [input file guideline](../../README#input-file-format), but you can change the parameters with the full argument list of [`bean-qc`](../../README#bean-qc-qc-of-reporter-screen-data). (Common factors you may want to tweak is `--ctrl-cond=bulk` and `--lfc-conds=top,bot` if you have different sample condition labels.) +``` +bean-qc \ + bean_count_${screen_id}.h5ad `# Input ReporterScreen .h5ad file path` \ + -o bean_count_${screen_id}_masked.h5ad `# Output ReporterScreen .h5ad file path` \ + -r qc_report_${screen_id} `# Prefix for QC report` +``` + + + +If the data does not include reporter editing data, you can provide `--no-editing` flag to omit the editing rate QC. + + +## 3. Quantify variant effect (:ref:`run`) + +`bean-run` can take 3 run options to quantify editing rate: +1. From **reporter + accessibility** + If your gRNA metadata table (`tests/data/test_guide_info.csv` above) included per-gRNA accessibility score, + ``` + bean-run sorting variant \ + tests/data/bean_count_${screen_id}_masked.h5ad \ + -o tests/test_res/var/ \ + --fit-negctrl \ + --scale-by-acc \ + --accessibility-col accessibility + ``` + If your gRNA metadata table (`tests/data/test_guide_info.csv` above) included per-gRNA chromosome & position and you have bigWig file with accessibility signal, + ``` + bean-run sorting variant \ + tests/data/bean_count_${screen_id}_masked.h5ad \ + -o tests/test_res/var/ \ + --fit-negctrl \ + --scale-by-acc \ + --accessibility-bw accessibility.bw + ``` + +2. From **reporter**, without accessibility + + This assumes the all target sites have the uniform chromatin accessibility. + ``` + bean-run sorting variant \ + tests/data/bean_count_${screen_id}_masked.h5ad \ + -o tests/test_res/var/ \ + --fit-negctrl + ``` +3. No reporter information, assume the same editing efficiency of all gRNAs. + Use this option if your data don't have editing outcome information. + ``` + bean-run sorting variant \ + tests/data/bean_count_${screen_id}_masked.h5ad \ + -o tests/test_res/var/ \ + --fit-negctrl \ + --uniform-edit + ``` \ No newline at end of file diff --git a/docs/_build/_static/alabaster.css b/docs/_build/_static/alabaster.css new file mode 100644 index 0000000..e3174bf --- /dev/null +++ b/docs/_build/_static/alabaster.css @@ -0,0 +1,708 @@ +@import url("basic.css"); + +/* -- page layout ----------------------------------------------------------- */ + +body { + font-family: Georgia, serif; + font-size: 17px; + background-color: #fff; + color: #000; + margin: 0; + padding: 0; +} + + +div.document { + width: 940px; + margin: 30px auto 0 auto; +} + +div.documentwrapper { + float: left; + width: 100%; +} + +div.bodywrapper { + margin: 0 0 0 220px; +} + +div.sphinxsidebar { + width: 220px; + font-size: 14px; + line-height: 1.5; +} + +hr { + border: 1px solid #B1B4B6; +} + +div.body { + background-color: #fff; + color: #3E4349; + padding: 0 30px 0 30px; +} + +div.body > .section { + text-align: left; +} + +div.footer { + width: 940px; + margin: 20px auto 30px auto; + font-size: 14px; + color: #888; + text-align: right; +} + +div.footer a { + color: #888; +} + +p.caption { + font-family: inherit; + font-size: inherit; +} + + +div.relations { + display: none; +} + + +div.sphinxsidebar { + max-height: 100%; + overflow-y: auto; +} + +div.sphinxsidebar a { + color: #444; + text-decoration: none; + border-bottom: 1px dotted #999; +} + +div.sphinxsidebar a:hover { + border-bottom: 1px solid #999; +} + +div.sphinxsidebarwrapper { + padding: 18px 10px; +} + +div.sphinxsidebarwrapper p.logo { + padding: 0; + margin: -10px 0 0 0px; + text-align: center; +} + +div.sphinxsidebarwrapper h1.logo { + margin-top: -10px; + text-align: center; + margin-bottom: 5px; + text-align: left; +} + +div.sphinxsidebarwrapper h1.logo-name { + margin-top: 0px; +} + +div.sphinxsidebarwrapper p.blurb { + margin-top: 0; + font-style: normal; +} + +div.sphinxsidebar h3, +div.sphinxsidebar h4 { + font-family: Georgia, serif; + color: #444; + font-size: 24px; + font-weight: normal; + margin: 0 0 5px 0; + padding: 0; +} + +div.sphinxsidebar h4 { + font-size: 20px; +} + +div.sphinxsidebar h3 a { + color: #444; +} + +div.sphinxsidebar p.logo a, +div.sphinxsidebar h3 a, +div.sphinxsidebar p.logo a:hover, +div.sphinxsidebar h3 a:hover { + border: none; +} + +div.sphinxsidebar p { + color: #555; + margin: 10px 0; +} + +div.sphinxsidebar ul { + margin: 10px 0; + padding: 0; + color: #000; +} + +div.sphinxsidebar ul li.toctree-l1 > a { + font-size: 120%; +} + +div.sphinxsidebar ul li.toctree-l2 > a { + font-size: 110%; +} + +div.sphinxsidebar input { + border: 1px solid #CCC; + font-family: Georgia, serif; + font-size: 1em; +} + +div.sphinxsidebar #searchbox input[type="text"] { + width: 160px; +} + +div.sphinxsidebar .search > div { + display: table-cell; +} + +div.sphinxsidebar hr { + border: none; + height: 1px; + color: #AAA; + background: #AAA; + + text-align: left; + margin-left: 0; + width: 50%; +} + +div.sphinxsidebar .badge { + border-bottom: none; +} + +div.sphinxsidebar .badge:hover { + border-bottom: none; +} + +/* To address an issue with donation coming after search */ +div.sphinxsidebar h3.donation { + margin-top: 10px; +} + +/* -- body styles ----------------------------------------------------------- */ + +a { + color: #004B6B; + text-decoration: underline; +} + +a:hover { + color: #6D4100; + text-decoration: underline; +} + +div.body h1, +div.body h2, +div.body h3, +div.body h4, +div.body h5, +div.body h6 { + font-family: Georgia, serif; + font-weight: normal; + margin: 30px 0px 10px 0px; + padding: 0; +} + +div.body h1 { margin-top: 0; padding-top: 0; font-size: 240%; } +div.body h2 { font-size: 180%; } +div.body h3 { font-size: 150%; } +div.body h4 { font-size: 130%; } +div.body h5 { font-size: 100%; } +div.body h6 { font-size: 100%; } + +a.headerlink { + color: #DDD; + padding: 0 4px; + text-decoration: none; +} + +a.headerlink:hover { + color: #444; + background: #EAEAEA; +} + +div.body p, div.body dd, div.body li { + line-height: 1.4em; +} + +div.admonition { + margin: 20px 0px; + padding: 10px 30px; + background-color: #EEE; + border: 1px solid #CCC; +} + +div.admonition tt.xref, div.admonition code.xref, div.admonition a tt { + background-color: #FBFBFB; + border-bottom: 1px solid #fafafa; +} + +div.admonition p.admonition-title { + font-family: Georgia, serif; + font-weight: normal; + font-size: 24px; + margin: 0 0 10px 0; + padding: 0; + line-height: 1; +} + +div.admonition p.last { + margin-bottom: 0; +} + +div.highlight { + background-color: #fff; +} + +dt:target, .highlight { + background: #FAF3E8; +} + +div.warning { + background-color: #FCC; + border: 1px solid #FAA; +} + +div.danger { + background-color: #FCC; + border: 1px solid #FAA; + -moz-box-shadow: 2px 2px 4px #D52C2C; + -webkit-box-shadow: 2px 2px 4px #D52C2C; + box-shadow: 2px 2px 4px #D52C2C; +} + +div.error { + background-color: #FCC; + border: 1px solid #FAA; + -moz-box-shadow: 2px 2px 4px #D52C2C; + -webkit-box-shadow: 2px 2px 4px #D52C2C; + box-shadow: 2px 2px 4px #D52C2C; +} + +div.caution { + background-color: #FCC; + border: 1px solid #FAA; +} + +div.attention { + background-color: #FCC; + border: 1px solid #FAA; +} + +div.important { + background-color: #EEE; + border: 1px solid #CCC; +} + +div.note { + background-color: #EEE; + border: 1px solid #CCC; +} + +div.tip { + background-color: #EEE; + border: 1px solid #CCC; +} + +div.hint { + background-color: #EEE; + border: 1px solid #CCC; +} + +div.seealso { + background-color: #EEE; + border: 1px solid #CCC; +} + +div.topic { + background-color: #EEE; +} + +p.admonition-title { + display: inline; +} + +p.admonition-title:after { + content: ":"; +} + +pre, tt, code { + font-family: 'Consolas', 'Menlo', 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', monospace; + font-size: 0.9em; +} + +.hll { + background-color: #FFC; + margin: 0 -12px; + padding: 0 12px; + display: block; +} + +img.screenshot { +} + +tt.descname, tt.descclassname, code.descname, code.descclassname { + font-size: 0.95em; +} + +tt.descname, code.descname { + padding-right: 0.08em; +} + +img.screenshot { + -moz-box-shadow: 2px 2px 4px #EEE; + -webkit-box-shadow: 2px 2px 4px #EEE; + box-shadow: 2px 2px 4px #EEE; +} + +table.docutils { + border: 1px solid #888; + -moz-box-shadow: 2px 2px 4px #EEE; + -webkit-box-shadow: 2px 2px 4px #EEE; + box-shadow: 2px 2px 4px #EEE; +} + +table.docutils td, table.docutils th { + border: 1px solid #888; + padding: 0.25em 0.7em; +} + +table.field-list, table.footnote { + border: none; + -moz-box-shadow: none; + -webkit-box-shadow: none; + box-shadow: none; +} + +table.footnote { + margin: 15px 0; + width: 100%; + border: 1px solid #EEE; + background: #FDFDFD; + font-size: 0.9em; +} + +table.footnote + table.footnote { + margin-top: -15px; + border-top: none; +} + +table.field-list th { + padding: 0 0.8em 0 0; +} + +table.field-list td { + padding: 0; +} + +table.field-list p { + margin-bottom: 0.8em; +} + +/* Cloned from + * https://github.com/sphinx-doc/sphinx/commit/ef60dbfce09286b20b7385333d63a60321784e68 + */ +.field-name { + -moz-hyphens: manual; + -ms-hyphens: manual; + -webkit-hyphens: manual; + hyphens: manual; +} + +table.footnote td.label { + width: .1px; + padding: 0.3em 0 0.3em 0.5em; +} + +table.footnote td { + padding: 0.3em 0.5em; +} + +dl { + margin-left: 0; + margin-right: 0; + margin-top: 0; + padding: 0; +} + +dl dd { + margin-left: 30px; +} + +blockquote { + margin: 0 0 0 30px; + padding: 0; +} + +ul, ol { + /* Matches the 30px from the narrow-screen "li > ul" selector below */ + margin: 10px 0 10px 30px; + padding: 0; +} + +pre { + background: #EEE; + padding: 7px 30px; + margin: 15px 0px; + line-height: 1.3em; +} + +div.viewcode-block:target { + background: #ffd; +} + +dl pre, blockquote pre, li pre { + margin-left: 0; + padding-left: 30px; +} + +tt, code { + background-color: #ecf0f3; + color: #222; + /* padding: 1px 2px; */ +} + +tt.xref, code.xref, a tt { + background-color: #FBFBFB; + border-bottom: 1px solid #fff; +} + +a.reference { + text-decoration: none; + border-bottom: 1px dotted #004B6B; +} + +/* Don't put an underline on images */ +a.image-reference, a.image-reference:hover { + border-bottom: none; +} + +a.reference:hover { + border-bottom: 1px solid #6D4100; +} + +a.footnote-reference { + text-decoration: none; + font-size: 0.7em; + vertical-align: top; + border-bottom: 1px dotted #004B6B; +} + +a.footnote-reference:hover { + border-bottom: 1px solid #6D4100; +} + +a:hover tt, a:hover code { + background: #EEE; +} + + +@media screen and (max-width: 870px) { + + div.sphinxsidebar { + display: none; + } + + div.document { + width: 100%; + + } + + div.documentwrapper { + margin-left: 0; + margin-top: 0; + margin-right: 0; + margin-bottom: 0; + } + + div.bodywrapper { + margin-top: 0; + margin-right: 0; + margin-bottom: 0; + margin-left: 0; + } + + ul { + margin-left: 0; + } + + li > ul { + /* Matches the 30px from the "ul, ol" selector above */ + margin-left: 30px; + } + + .document { + width: auto; + } + + .footer { + width: auto; + } + + .bodywrapper { + margin: 0; + } + + .footer { + width: auto; + } + + .github { + display: none; + } + + + +} + + + +@media screen and (max-width: 875px) { + + body { + margin: 0; + padding: 20px 30px; + } + + div.documentwrapper { + float: none; + background: #fff; + } + + div.sphinxsidebar { + display: block; + float: none; + width: 102.5%; + margin: 50px -30px -20px -30px; + padding: 10px 20px; + background: #333; + color: #FFF; + } + + div.sphinxsidebar h3, div.sphinxsidebar h4, div.sphinxsidebar p, + div.sphinxsidebar h3 a { + color: #fff; + } + + div.sphinxsidebar a { + color: #AAA; + } + + div.sphinxsidebar p.logo { + display: none; + } + + div.document { + width: 100%; + margin: 0; + } + + div.footer { + display: none; + } + + div.bodywrapper { + margin: 0; + } + + div.body { + min-height: 0; + padding: 0; + } + + .rtd_doc_footer { + display: none; + } + + .document { + width: auto; + } + + .footer { + width: auto; + } + + .footer { + width: auto; + } + + .github { + display: none; + } +} + + +/* misc. */ + +.revsys-inline { + display: none!important; +} + +/* Hide ugly table cell borders in ..bibliography:: directive output */ +table.docutils.citation, table.docutils.citation td, table.docutils.citation th { + border: none; + /* Below needed in some edge cases; if not applied, bottom shadows appear */ + -moz-box-shadow: none; + -webkit-box-shadow: none; + box-shadow: none; +} + + +/* relbar */ + +.related { + line-height: 30px; + width: 100%; + font-size: 0.9rem; +} + +.related.top { + border-bottom: 1px solid #EEE; + margin-bottom: 20px; +} + +.related.bottom { + border-top: 1px solid #EEE; +} + +.related ul { + padding: 0; + margin: 0; + list-style: none; +} + +.related li { + display: inline; +} + +nav#rellinks { + float: right; +} + +nav#rellinks li+li:before { + content: "|"; +} + +nav#breadcrumbs li+li:before { + content: "\00BB"; +} + +/* Hide certain items when printing */ +@media print { + div.related { + display: none; + } +} \ No newline at end of file diff --git a/docs/_build/_static/basic.css b/docs/_build/_static/basic.css new file mode 100644 index 0000000..4157edf --- /dev/null +++ b/docs/_build/_static/basic.css @@ -0,0 +1,925 @@ +/* + * basic.css + * ~~~~~~~~~ + * + * Sphinx stylesheet -- basic theme. + * + * :copyright: Copyright 2007-2023 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ + +/* -- main layout ----------------------------------------------------------- */ + +div.clearer { + clear: both; +} + +div.section::after { + display: block; + content: ''; + clear: left; +} + +/* -- relbar ---------------------------------------------------------------- */ + +div.related { + width: 100%; + font-size: 90%; +} + +div.related h3 { + display: none; +} + +div.related ul { + margin: 0; + padding: 0 0 0 10px; + list-style: none; +} + +div.related li { + display: inline; +} + +div.related li.right { + float: right; + margin-right: 5px; +} + +/* -- sidebar --------------------------------------------------------------- */ + +div.sphinxsidebarwrapper { + padding: 10px 5px 0 10px; +} + +div.sphinxsidebar { + float: left; + width: 230px; + margin-left: -100%; + font-size: 90%; + word-wrap: break-word; + overflow-wrap : break-word; +} + +div.sphinxsidebar ul { + list-style: none; +} + +div.sphinxsidebar ul ul, +div.sphinxsidebar ul.want-points { + margin-left: 20px; + list-style: square; +} + +div.sphinxsidebar ul ul { + margin-top: 0; + margin-bottom: 0; +} + +div.sphinxsidebar form { + margin-top: 10px; +} + +div.sphinxsidebar input { + border: 1px solid #98dbcc; + font-family: sans-serif; + font-size: 1em; +} + +div.sphinxsidebar #searchbox form.search { + overflow: hidden; +} + +div.sphinxsidebar #searchbox input[type="text"] { + float: left; + width: 80%; + padding: 0.25em; + box-sizing: border-box; +} + +div.sphinxsidebar #searchbox input[type="submit"] { + float: left; + width: 20%; + border-left: none; + padding: 0.25em; + box-sizing: border-box; +} + + +img { + border: 0; + max-width: 100%; +} + +/* -- search page ----------------------------------------------------------- */ + +ul.search { + margin: 10px 0 0 20px; + padding: 0; +} + +ul.search li { + padding: 5px 0 5px 20px; + background-image: url(file.png); + background-repeat: no-repeat; + background-position: 0 7px; +} + +ul.search li a { + font-weight: bold; +} + +ul.search li p.context { + color: #888; + margin: 2px 0 0 30px; + text-align: left; +} + +ul.keywordmatches li.goodmatch a { + font-weight: bold; +} + +/* -- index page ------------------------------------------------------------ */ + +table.contentstable { + width: 90%; + margin-left: auto; + margin-right: auto; +} + +table.contentstable p.biglink { + line-height: 150%; +} + +a.biglink { + font-size: 1.3em; +} + +span.linkdescr { + font-style: italic; + padding-top: 5px; + font-size: 90%; +} + +/* -- general index --------------------------------------------------------- */ + +table.indextable { + width: 100%; +} + +table.indextable td { + text-align: left; + vertical-align: top; +} + +table.indextable ul { + margin-top: 0; + margin-bottom: 0; + list-style-type: none; +} + +table.indextable > tbody > tr > td > ul { + padding-left: 0em; +} + +table.indextable tr.pcap { + height: 10px; +} + +table.indextable tr.cap { + margin-top: 10px; + background-color: #f2f2f2; +} + +img.toggler { + margin-right: 3px; + margin-top: 3px; + cursor: pointer; +} + +div.modindex-jumpbox { + border-top: 1px solid #ddd; + border-bottom: 1px solid #ddd; + margin: 1em 0 1em 0; + padding: 0.4em; +} + +div.genindex-jumpbox { + border-top: 1px solid #ddd; + border-bottom: 1px solid #ddd; + margin: 1em 0 1em 0; + padding: 0.4em; +} + +/* -- domain module index --------------------------------------------------- */ + +table.modindextable td { + padding: 2px; + border-collapse: collapse; +} + +/* -- general body styles --------------------------------------------------- */ + +div.body { + min-width: inherit; + max-width: 800px; +} + +div.body p, div.body dd, div.body li, div.body blockquote { + -moz-hyphens: auto; + -ms-hyphens: auto; + -webkit-hyphens: auto; + hyphens: auto; +} + +a.headerlink { + visibility: hidden; +} + +a:visited { + color: #551A8B; +} + +h1:hover > a.headerlink, +h2:hover > a.headerlink, +h3:hover > a.headerlink, +h4:hover > a.headerlink, +h5:hover > a.headerlink, +h6:hover > a.headerlink, +dt:hover > a.headerlink, +caption:hover > a.headerlink, +p.caption:hover > a.headerlink, +div.code-block-caption:hover > a.headerlink { + visibility: visible; +} + +div.body p.caption { + text-align: inherit; +} + +div.body td { + text-align: left; +} + +.first { + margin-top: 0 !important; +} + +p.rubric { + margin-top: 30px; + font-weight: bold; +} + +img.align-left, figure.align-left, .figure.align-left, object.align-left { + clear: left; + float: left; + margin-right: 1em; +} + +img.align-right, figure.align-right, .figure.align-right, object.align-right { + clear: right; + float: right; + margin-left: 1em; +} + +img.align-center, figure.align-center, .figure.align-center, object.align-center { + display: block; + margin-left: auto; + margin-right: auto; +} + +img.align-default, figure.align-default, .figure.align-default { + display: block; + margin-left: auto; + margin-right: auto; +} + +.align-left { + text-align: left; +} + +.align-center { + text-align: center; +} + +.align-default { + text-align: center; +} + +.align-right { + text-align: right; +} + +/* -- sidebars -------------------------------------------------------------- */ + +div.sidebar, +aside.sidebar { + margin: 0 0 0.5em 1em; + border: 1px solid #ddb; + padding: 7px; + background-color: #ffe; + width: 40%; + float: right; + clear: right; + overflow-x: auto; +} + +p.sidebar-title { + font-weight: bold; +} + +nav.contents, +aside.topic, +div.admonition, div.topic, blockquote { + clear: left; +} + +/* -- topics ---------------------------------------------------------------- */ + +nav.contents, +aside.topic, +div.topic { + border: 1px solid #ccc; + padding: 7px; + margin: 10px 0 10px 0; +} + +p.topic-title { + font-size: 1.1em; + font-weight: bold; + margin-top: 10px; +} + +/* -- admonitions ----------------------------------------------------------- */ + +div.admonition { + margin-top: 10px; + margin-bottom: 10px; + padding: 7px; +} + +div.admonition dt { + font-weight: bold; +} + +p.admonition-title { + margin: 0px 10px 5px 0px; + font-weight: bold; +} + +div.body p.centered { + text-align: center; + margin-top: 25px; +} + +/* -- content of sidebars/topics/admonitions -------------------------------- */ + +div.sidebar > :last-child, +aside.sidebar > :last-child, +nav.contents > :last-child, +aside.topic > :last-child, +div.topic > :last-child, +div.admonition > :last-child { + margin-bottom: 0; +} + +div.sidebar::after, +aside.sidebar::after, +nav.contents::after, +aside.topic::after, +div.topic::after, +div.admonition::after, +blockquote::after { + display: block; + content: ''; + clear: both; +} + +/* -- tables ---------------------------------------------------------------- */ + +table.docutils { + margin-top: 10px; + margin-bottom: 10px; + border: 0; + border-collapse: collapse; +} + +table.align-center { + margin-left: auto; + margin-right: auto; +} + +table.align-default { + margin-left: auto; + margin-right: auto; +} + +table caption span.caption-number { + font-style: italic; +} + +table caption span.caption-text { +} + +table.docutils td, table.docutils th { + padding: 1px 8px 1px 5px; + border-top: 0; + border-left: 0; + border-right: 0; + border-bottom: 1px solid #aaa; +} + +th { + text-align: left; + padding-right: 5px; +} + +table.citation { + border-left: solid 1px gray; + margin-left: 1px; +} + +table.citation td { + border-bottom: none; +} + +th > :first-child, +td > :first-child { + margin-top: 0px; +} + +th > :last-child, +td > :last-child { + margin-bottom: 0px; +} + +/* -- figures --------------------------------------------------------------- */ + +div.figure, figure { + margin: 0.5em; + padding: 0.5em; +} + +div.figure p.caption, figcaption { + padding: 0.3em; +} + +div.figure p.caption span.caption-number, +figcaption span.caption-number { + font-style: italic; +} + +div.figure p.caption span.caption-text, +figcaption span.caption-text { +} + +/* -- field list styles ----------------------------------------------------- */ + +table.field-list td, table.field-list th { + border: 0 !important; +} + +.field-list ul { + margin: 0; + padding-left: 1em; +} + +.field-list p { + margin: 0; +} + +.field-name { + -moz-hyphens: manual; + -ms-hyphens: manual; + -webkit-hyphens: manual; + hyphens: manual; +} + +/* -- hlist styles ---------------------------------------------------------- */ + +table.hlist { + margin: 1em 0; +} + +table.hlist td { + vertical-align: top; +} + +/* -- object description styles --------------------------------------------- */ + +.sig { + font-family: 'Consolas', 'Menlo', 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', monospace; +} + +.sig-name, code.descname { + background-color: transparent; + font-weight: bold; +} + +.sig-name { + font-size: 1.1em; +} + +code.descname { + font-size: 1.2em; +} + +.sig-prename, code.descclassname { + background-color: transparent; +} + +.optional { + font-size: 1.3em; +} + +.sig-paren { + font-size: larger; +} + +.sig-param.n { + font-style: italic; +} + +/* C++ specific styling */ + +.sig-inline.c-texpr, +.sig-inline.cpp-texpr { + font-family: unset; +} + +.sig.c .k, .sig.c .kt, +.sig.cpp .k, .sig.cpp .kt { + color: #0033B3; +} + +.sig.c .m, +.sig.cpp .m { + color: #1750EB; +} + +.sig.c .s, .sig.c .sc, +.sig.cpp .s, .sig.cpp .sc { + color: #067D17; +} + + +/* -- other body styles ----------------------------------------------------- */ + +ol.arabic { + list-style: decimal; +} + +ol.loweralpha { + list-style: lower-alpha; +} + +ol.upperalpha { + list-style: upper-alpha; +} + +ol.lowerroman { + list-style: lower-roman; +} + +ol.upperroman { + list-style: upper-roman; +} + +:not(li) > ol > li:first-child > :first-child, +:not(li) > ul > li:first-child > :first-child { + margin-top: 0px; +} + +:not(li) > ol > li:last-child > :last-child, +:not(li) > ul > li:last-child > :last-child { + margin-bottom: 0px; +} + +ol.simple ol p, +ol.simple ul p, +ul.simple ol p, +ul.simple ul p { + margin-top: 0; +} + +ol.simple > li:not(:first-child) > p, +ul.simple > li:not(:first-child) > p { + margin-top: 0; +} + +ol.simple p, +ul.simple p { + margin-bottom: 0; +} + +aside.footnote > span, +div.citation > span { + float: left; +} +aside.footnote > span:last-of-type, +div.citation > span:last-of-type { + padding-right: 0.5em; +} +aside.footnote > p { + margin-left: 2em; +} +div.citation > p { + margin-left: 4em; +} +aside.footnote > p:last-of-type, +div.citation > p:last-of-type { + margin-bottom: 0em; +} +aside.footnote > p:last-of-type:after, +div.citation > p:last-of-type:after { + content: ""; + clear: both; +} + +dl.field-list { + display: grid; + grid-template-columns: fit-content(30%) auto; +} + +dl.field-list > dt { + font-weight: bold; + word-break: break-word; + padding-left: 0.5em; + padding-right: 5px; +} + +dl.field-list > dd { + padding-left: 0.5em; + margin-top: 0em; + margin-left: 0em; + margin-bottom: 0em; +} + +dl { + margin-bottom: 15px; +} + +dd > :first-child { + margin-top: 0px; +} + +dd ul, dd table { + margin-bottom: 10px; +} + +dd { + margin-top: 3px; + margin-bottom: 10px; + margin-left: 30px; +} + +.sig dd { + margin-top: 0px; + margin-bottom: 0px; +} + +.sig dl { + margin-top: 0px; + margin-bottom: 0px; +} + +dl > dd:last-child, +dl > dd:last-child > :last-child { + margin-bottom: 0; +} + +dt:target, span.highlighted { + background-color: #fbe54e; +} + +rect.highlighted { + fill: #fbe54e; +} + +dl.glossary dt { + font-weight: bold; + font-size: 1.1em; +} + +.versionmodified { + font-style: italic; +} + +.system-message { + background-color: #fda; + padding: 5px; + border: 3px solid red; +} + +.footnote:target { + background-color: #ffa; +} + +.line-block { + display: block; + margin-top: 1em; + margin-bottom: 1em; +} + +.line-block .line-block { + margin-top: 0; + margin-bottom: 0; + margin-left: 1.5em; +} + +.guilabel, .menuselection { + font-family: sans-serif; +} + +.accelerator { + text-decoration: underline; +} + +.classifier { + font-style: oblique; +} + +.classifier:before { + font-style: normal; + margin: 0 0.5em; + content: ":"; + display: inline-block; +} + +abbr, acronym { + border-bottom: dotted 1px; + cursor: help; +} + +.translated { + background-color: rgba(207, 255, 207, 0.2) +} + +.untranslated { + background-color: rgba(255, 207, 207, 0.2) +} + +/* -- code displays --------------------------------------------------------- */ + +pre { + overflow: auto; + overflow-y: hidden; /* fixes display issues on Chrome browsers */ +} + +pre, div[class*="highlight-"] { + clear: both; +} + +span.pre { + -moz-hyphens: none; + -ms-hyphens: none; + -webkit-hyphens: none; + hyphens: none; + white-space: nowrap; +} + +div[class*="highlight-"] { + margin: 1em 0; +} + +td.linenos pre { + border: 0; + background-color: transparent; + color: #aaa; +} + +table.highlighttable { + display: block; +} + +table.highlighttable tbody { + display: block; +} + +table.highlighttable tr { + display: flex; +} + +table.highlighttable td { + margin: 0; + padding: 0; +} + +table.highlighttable td.linenos { + padding-right: 0.5em; +} + +table.highlighttable td.code { + flex: 1; + overflow: hidden; +} + +.highlight .hll { + display: block; +} + +div.highlight pre, +table.highlighttable pre { + margin: 0; +} + +div.code-block-caption + div { + margin-top: 0; +} + +div.code-block-caption { + margin-top: 1em; + padding: 2px 5px; + font-size: small; +} + +div.code-block-caption code { + background-color: transparent; +} + +table.highlighttable td.linenos, +span.linenos, +div.highlight span.gp { /* gp: Generic.Prompt */ + user-select: none; + -webkit-user-select: text; /* Safari fallback only */ + -webkit-user-select: none; /* Chrome/Safari */ + -moz-user-select: none; /* Firefox */ + -ms-user-select: none; /* IE10+ */ +} + +div.code-block-caption span.caption-number { + padding: 0.1em 0.3em; + font-style: italic; +} + +div.code-block-caption span.caption-text { +} + +div.literal-block-wrapper { + margin: 1em 0; +} + +code.xref, a code { + background-color: transparent; + font-weight: bold; +} + +h1 code, h2 code, h3 code, h4 code, h5 code, h6 code { + background-color: transparent; +} + +.viewcode-link { + float: right; +} + +.viewcode-back { + float: right; + font-family: sans-serif; +} + +div.viewcode-block:target { + margin: -1px -10px; + padding: 0 10px; +} + +/* -- math display ---------------------------------------------------------- */ + +img.math { + vertical-align: middle; +} + +div.body div.math p { + text-align: center; +} + +span.eqno { + float: right; +} + +span.eqno a.headerlink { + position: absolute; + z-index: 1; +} + +div.math:hover a.headerlink { + visibility: visible; +} + +/* -- printout stylesheet --------------------------------------------------- */ + +@media print { + div.document, + div.documentwrapper, + div.bodywrapper { + margin: 0 !important; + width: 100%; + } + + div.sphinxsidebar, + div.related, + div.footer, + #top-link { + display: none; + } +} \ No newline at end of file diff --git a/docs/_build/_static/custom.css b/docs/_build/_static/custom.css new file mode 100644 index 0000000..2a924f1 --- /dev/null +++ b/docs/_build/_static/custom.css @@ -0,0 +1 @@ +/* This file intentionally left blank. */ diff --git a/docs/_build/_static/doctools.js b/docs/_build/_static/doctools.js new file mode 100644 index 0000000..d06a71d --- /dev/null +++ b/docs/_build/_static/doctools.js @@ -0,0 +1,156 @@ +/* + * doctools.js + * ~~~~~~~~~~~ + * + * Base JavaScript utilities for all Sphinx HTML documentation. + * + * :copyright: Copyright 2007-2023 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ +"use strict"; + +const BLACKLISTED_KEY_CONTROL_ELEMENTS = new Set([ + "TEXTAREA", + "INPUT", + "SELECT", + "BUTTON", +]); + +const _ready = (callback) => { + if (document.readyState !== "loading") { + callback(); + } else { + document.addEventListener("DOMContentLoaded", callback); + } +}; + +/** + * Small JavaScript module for the documentation. + */ +const Documentation = { + init: () => { + Documentation.initDomainIndexTable(); + Documentation.initOnKeyListeners(); + }, + + /** + * i18n support + */ + TRANSLATIONS: {}, + PLURAL_EXPR: (n) => (n === 1 ? 0 : 1), + LOCALE: "unknown", + + // gettext and ngettext don't access this so that the functions + // can safely bound to a different name (_ = Documentation.gettext) + gettext: (string) => { + const translated = Documentation.TRANSLATIONS[string]; + switch (typeof translated) { + case "undefined": + return string; // no translation + case "string": + return translated; // translation exists + default: + return translated[0]; // (singular, plural) translation tuple exists + } + }, + + ngettext: (singular, plural, n) => { + const translated = Documentation.TRANSLATIONS[singular]; + if (typeof translated !== "undefined") + return translated[Documentation.PLURAL_EXPR(n)]; + return n === 1 ? singular : plural; + }, + + addTranslations: (catalog) => { + Object.assign(Documentation.TRANSLATIONS, catalog.messages); + Documentation.PLURAL_EXPR = new Function( + "n", + `return (${catalog.plural_expr})` + ); + Documentation.LOCALE = catalog.locale; + }, + + /** + * helper function to focus on search bar + */ + focusSearchBar: () => { + document.querySelectorAll("input[name=q]")[0]?.focus(); + }, + + /** + * Initialise the domain index toggle buttons + */ + initDomainIndexTable: () => { + const toggler = (el) => { + const idNumber = el.id.substr(7); + const toggledRows = document.querySelectorAll(`tr.cg-${idNumber}`); + if (el.src.substr(-9) === "minus.png") { + el.src = `${el.src.substr(0, el.src.length - 9)}plus.png`; + toggledRows.forEach((el) => (el.style.display = "none")); + } else { + el.src = `${el.src.substr(0, el.src.length - 8)}minus.png`; + toggledRows.forEach((el) => (el.style.display = "")); + } + }; + + const togglerElements = document.querySelectorAll("img.toggler"); + togglerElements.forEach((el) => + el.addEventListener("click", (event) => toggler(event.currentTarget)) + ); + togglerElements.forEach((el) => (el.style.display = "")); + if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) togglerElements.forEach(toggler); + }, + + initOnKeyListeners: () => { + // only install a listener if it is really needed + if ( + !DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS && + !DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS + ) + return; + + document.addEventListener("keydown", (event) => { + // bail for input elements + if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return; + // bail with special keys + if (event.altKey || event.ctrlKey || event.metaKey) return; + + if (!event.shiftKey) { + switch (event.key) { + case "ArrowLeft": + if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break; + + const prevLink = document.querySelector('link[rel="prev"]'); + if (prevLink && prevLink.href) { + window.location.href = prevLink.href; + event.preventDefault(); + } + break; + case "ArrowRight": + if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break; + + const nextLink = document.querySelector('link[rel="next"]'); + if (nextLink && nextLink.href) { + window.location.href = nextLink.href; + event.preventDefault(); + } + break; + } + } + + // some keyboard layouts may need Shift to get / + switch (event.key) { + case "/": + if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) break; + Documentation.focusSearchBar(); + event.preventDefault(); + } + }); + }, +}; + +// quick alias for translations +const _ = Documentation.gettext; + +_ready(Documentation.init); diff --git a/docs/_build/_static/documentation_options.js b/docs/_build/_static/documentation_options.js new file mode 100644 index 0000000..89435bb --- /dev/null +++ b/docs/_build/_static/documentation_options.js @@ -0,0 +1,13 @@ +const DOCUMENTATION_OPTIONS = { + VERSION: '1.0.0', + LANGUAGE: 'en', + COLLAPSE_INDEX: false, + BUILDER: 'html', + FILE_SUFFIX: '.html', + LINK_SUFFIX: '.html', + HAS_SOURCE: true, + SOURCELINK_SUFFIX: '.txt', + NAVIGATION_WITH_KEYS: false, + SHOW_SEARCH_SUMMARY: true, + ENABLE_SEARCH_SHORTCUTS: true, +}; \ No newline at end of file diff --git a/docs/_build/_static/file.png b/docs/_build/_static/file.png new file mode 100644 index 0000000..a858a41 Binary files /dev/null and b/docs/_build/_static/file.png differ diff --git a/docs/_build/_static/language_data.js b/docs/_build/_static/language_data.js new file mode 100644 index 0000000..250f566 --- /dev/null +++ b/docs/_build/_static/language_data.js @@ -0,0 +1,199 @@ +/* + * language_data.js + * ~~~~~~~~~~~~~~~~ + * + * This script contains the language-specific data used by searchtools.js, + * namely the list of stopwords, stemmer, scorer and splitter. + * + * :copyright: Copyright 2007-2023 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ + +var stopwords = ["a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "near", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"]; + + +/* Non-minified version is copied as a separate JS file, is available */ + +/** + * Porter Stemmer + */ +var Stemmer = function() { + + var step2list = { + ational: 'ate', + tional: 'tion', + enci: 'ence', + anci: 'ance', + izer: 'ize', + bli: 'ble', + alli: 'al', + entli: 'ent', + eli: 'e', + ousli: 'ous', + ization: 'ize', + ation: 'ate', + ator: 'ate', + alism: 'al', + iveness: 'ive', + fulness: 'ful', + ousness: 'ous', + aliti: 'al', + iviti: 'ive', + biliti: 'ble', + logi: 'log' + }; + + var step3list = { + icate: 'ic', + ative: '', + alize: 'al', + iciti: 'ic', + ical: 'ic', + ful: '', + ness: '' + }; + + var c = "[^aeiou]"; // consonant + var v = "[aeiouy]"; // vowel + var C = c + "[^aeiouy]*"; // consonant sequence + var V = v + "[aeiou]*"; // vowel sequence + + var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0 + var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1 + var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1 + var s_v = "^(" + C + ")?" + v; // vowel in stem + + this.stemWord = function (w) { + var stem; + var suffix; + var firstch; + var origword = w; + + if (w.length < 3) + return w; + + var re; + var re2; + var re3; + var re4; + + firstch = w.substr(0,1); + if (firstch == "y") + w = firstch.toUpperCase() + w.substr(1); + + // Step 1a + re = /^(.+?)(ss|i)es$/; + re2 = /^(.+?)([^s])s$/; + + if (re.test(w)) + w = w.replace(re,"$1$2"); + else if (re2.test(w)) + w = w.replace(re2,"$1$2"); + + // Step 1b + re = /^(.+?)eed$/; + re2 = /^(.+?)(ed|ing)$/; + if (re.test(w)) { + var fp = re.exec(w); + re = new RegExp(mgr0); + if (re.test(fp[1])) { + re = /.$/; + w = w.replace(re,""); + } + } + else if (re2.test(w)) { + var fp = re2.exec(w); + stem = fp[1]; + re2 = new RegExp(s_v); + if (re2.test(stem)) { + w = stem; + re2 = /(at|bl|iz)$/; + re3 = new RegExp("([^aeiouylsz])\\1$"); + re4 = new RegExp("^" + C + v + "[^aeiouwxy]$"); + if (re2.test(w)) + w = w + "e"; + else if (re3.test(w)) { + re = /.$/; + w = w.replace(re,""); + } + else if (re4.test(w)) + w = w + "e"; + } + } + + // Step 1c + re = /^(.+?)y$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(s_v); + if (re.test(stem)) + w = stem + "i"; + } + + // Step 2 + re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + suffix = fp[2]; + re = new RegExp(mgr0); + if (re.test(stem)) + w = stem + step2list[suffix]; + } + + // Step 3 + re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + suffix = fp[2]; + re = new RegExp(mgr0); + if (re.test(stem)) + w = stem + step3list[suffix]; + } + + // Step 4 + re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; + re2 = /^(.+?)(s|t)(ion)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(mgr1); + if (re.test(stem)) + w = stem; + } + else if (re2.test(w)) { + var fp = re2.exec(w); + stem = fp[1] + fp[2]; + re2 = new RegExp(mgr1); + if (re2.test(stem)) + w = stem; + } + + // Step 5 + re = /^(.+?)e$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(mgr1); + re2 = new RegExp(meq1); + re3 = new RegExp("^" + C + v + "[^aeiouwxy]$"); + if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) + w = stem; + } + re = /ll$/; + re2 = new RegExp(mgr1); + if (re.test(w) && re2.test(w)) { + re = /.$/; + w = w.replace(re,""); + } + + // and turn initial Y back to y + if (firstch == "y") + w = firstch.toLowerCase() + w.substr(1); + return w; + } +} + diff --git a/docs/_build/_static/minus.png b/docs/_build/_static/minus.png new file mode 100644 index 0000000..d96755f Binary files /dev/null and b/docs/_build/_static/minus.png differ diff --git a/docs/_build/_static/plus.png b/docs/_build/_static/plus.png new file mode 100644 index 0000000..7107cec Binary files /dev/null and b/docs/_build/_static/plus.png differ diff --git a/docs/_build/_static/pygments.css b/docs/_build/_static/pygments.css new file mode 100644 index 0000000..04a4174 --- /dev/null +++ b/docs/_build/_static/pygments.css @@ -0,0 +1,84 @@ +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +.highlight .hll { background-color: #ffffcc } +.highlight { background: #f8f8f8; } +.highlight .c { color: #8f5902; font-style: italic } /* Comment */ +.highlight .err { color: #a40000; border: 1px solid #ef2929 } /* Error */ +.highlight .g { color: #000000 } /* Generic */ +.highlight .k { color: #004461; font-weight: bold } /* Keyword */ +.highlight .l { color: #000000 } /* Literal */ +.highlight .n { color: #000000 } /* Name */ +.highlight .o { color: #582800 } /* Operator */ +.highlight .x { color: #000000 } /* Other */ +.highlight .p { color: #000000; font-weight: bold } /* Punctuation */ +.highlight .ch { color: #8f5902; font-style: italic } /* Comment.Hashbang */ +.highlight .cm { color: #8f5902; font-style: italic } /* Comment.Multiline */ +.highlight .cp { color: #8f5902 } /* Comment.Preproc */ +.highlight .cpf { color: #8f5902; font-style: italic } /* Comment.PreprocFile */ +.highlight .c1 { color: #8f5902; font-style: italic } /* Comment.Single */ +.highlight .cs { color: #8f5902; font-style: italic } /* Comment.Special */ +.highlight .gd { color: #a40000 } /* Generic.Deleted */ +.highlight .ge { color: #000000; font-style: italic } /* Generic.Emph */ +.highlight .ges { color: #000000 } /* Generic.EmphStrong */ +.highlight .gr { color: #ef2929 } /* Generic.Error */ +.highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +.highlight .gi { color: #00A000 } /* Generic.Inserted */ +.highlight .go { color: #888888 } /* Generic.Output */ +.highlight .gp { color: #745334 } /* Generic.Prompt */ +.highlight .gs { color: #000000; font-weight: bold } /* Generic.Strong */ +.highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +.highlight .gt { color: #a40000; font-weight: bold } /* Generic.Traceback */ +.highlight .kc { color: #004461; font-weight: bold } /* Keyword.Constant */ +.highlight .kd { color: #004461; font-weight: bold } /* Keyword.Declaration */ +.highlight .kn { color: #004461; font-weight: bold } /* Keyword.Namespace */ +.highlight .kp { color: #004461; font-weight: bold } /* Keyword.Pseudo */ +.highlight .kr { color: #004461; font-weight: bold } /* Keyword.Reserved */ +.highlight .kt { color: #004461; font-weight: bold } /* Keyword.Type */ +.highlight .ld { color: #000000 } /* Literal.Date */ +.highlight .m { color: #990000 } /* Literal.Number */ +.highlight .s { color: #4e9a06 } /* Literal.String */ +.highlight .na { color: #c4a000 } /* Name.Attribute */ +.highlight .nb { color: #004461 } /* Name.Builtin */ +.highlight .nc { color: #000000 } /* Name.Class */ +.highlight .no { color: #000000 } /* Name.Constant */ +.highlight .nd { color: #888888 } /* Name.Decorator */ +.highlight .ni { color: #ce5c00 } /* Name.Entity */ +.highlight .ne { color: #cc0000; font-weight: bold } /* Name.Exception */ +.highlight .nf { color: #000000 } /* Name.Function */ +.highlight .nl { color: #f57900 } /* Name.Label */ +.highlight .nn { color: #000000 } /* Name.Namespace */ +.highlight .nx { color: #000000 } /* Name.Other */ +.highlight .py { color: #000000 } /* Name.Property */ +.highlight .nt { color: #004461; font-weight: bold } /* Name.Tag */ +.highlight .nv { color: #000000 } /* Name.Variable */ +.highlight .ow { color: #004461; font-weight: bold } /* Operator.Word */ +.highlight .pm { color: #000000; font-weight: bold } /* Punctuation.Marker */ +.highlight .w { color: #f8f8f8 } /* Text.Whitespace */ +.highlight .mb { color: #990000 } /* Literal.Number.Bin */ +.highlight .mf { color: #990000 } /* Literal.Number.Float */ +.highlight .mh { color: #990000 } /* Literal.Number.Hex */ +.highlight .mi { color: #990000 } /* Literal.Number.Integer */ +.highlight .mo { color: #990000 } /* Literal.Number.Oct */ +.highlight .sa { color: #4e9a06 } /* Literal.String.Affix */ +.highlight .sb { color: #4e9a06 } /* Literal.String.Backtick */ +.highlight .sc { color: #4e9a06 } /* Literal.String.Char */ +.highlight .dl { color: #4e9a06 } /* Literal.String.Delimiter */ +.highlight .sd { color: #8f5902; font-style: italic } /* Literal.String.Doc */ +.highlight .s2 { color: #4e9a06 } /* Literal.String.Double */ +.highlight .se { color: #4e9a06 } /* Literal.String.Escape */ +.highlight .sh { color: #4e9a06 } /* Literal.String.Heredoc */ +.highlight .si { color: #4e9a06 } /* Literal.String.Interpol */ +.highlight .sx { color: #4e9a06 } /* Literal.String.Other */ +.highlight .sr { color: #4e9a06 } /* Literal.String.Regex */ +.highlight .s1 { color: #4e9a06 } /* Literal.String.Single */ +.highlight .ss { color: #4e9a06 } /* Literal.String.Symbol */ +.highlight .bp { color: #3465a4 } /* Name.Builtin.Pseudo */ +.highlight .fm { color: #000000 } /* Name.Function.Magic */ +.highlight .vc { color: #000000 } /* Name.Variable.Class */ +.highlight .vg { color: #000000 } /* Name.Variable.Global */ +.highlight .vi { color: #000000 } /* Name.Variable.Instance */ +.highlight .vm { color: #000000 } /* Name.Variable.Magic */ +.highlight .il { color: #990000 } /* Literal.Number.Integer.Long */ \ No newline at end of file diff --git a/docs/_build/_static/searchtools.js b/docs/_build/_static/searchtools.js new file mode 100644 index 0000000..7918c3f --- /dev/null +++ b/docs/_build/_static/searchtools.js @@ -0,0 +1,574 @@ +/* + * searchtools.js + * ~~~~~~~~~~~~~~~~ + * + * Sphinx JavaScript utilities for the full-text search. + * + * :copyright: Copyright 2007-2023 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ +"use strict"; + +/** + * Simple result scoring code. + */ +if (typeof Scorer === "undefined") { + var Scorer = { + // Implement the following function to further tweak the score for each result + // The function takes a result array [docname, title, anchor, descr, score, filename] + // and returns the new score. + /* + score: result => { + const [docname, title, anchor, descr, score, filename] = result + return score + }, + */ + + // query matches the full name of an object + objNameMatch: 11, + // or matches in the last dotted part of the object name + objPartialMatch: 6, + // Additive scores depending on the priority of the object + objPrio: { + 0: 15, // used to be importantResults + 1: 5, // used to be objectResults + 2: -5, // used to be unimportantResults + }, + // Used when the priority is not in the mapping. + objPrioDefault: 0, + + // query found in title + title: 15, + partialTitle: 7, + // query found in terms + term: 5, + partialTerm: 2, + }; +} + +const _removeChildren = (element) => { + while (element && element.lastChild) element.removeChild(element.lastChild); +}; + +/** + * See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#escaping + */ +const _escapeRegExp = (string) => + string.replace(/[.*+\-?^${}()|[\]\\]/g, "\\$&"); // $& means the whole matched string + +const _displayItem = (item, searchTerms, highlightTerms) => { + const docBuilder = DOCUMENTATION_OPTIONS.BUILDER; + const docFileSuffix = DOCUMENTATION_OPTIONS.FILE_SUFFIX; + const docLinkSuffix = DOCUMENTATION_OPTIONS.LINK_SUFFIX; + const showSearchSummary = DOCUMENTATION_OPTIONS.SHOW_SEARCH_SUMMARY; + const contentRoot = document.documentElement.dataset.content_root; + + const [docName, title, anchor, descr, score, _filename] = item; + + let listItem = document.createElement("li"); + let requestUrl; + let linkUrl; + if (docBuilder === "dirhtml") { + // dirhtml builder + let dirname = docName + "/"; + if (dirname.match(/\/index\/$/)) + dirname = dirname.substring(0, dirname.length - 6); + else if (dirname === "index/") dirname = ""; + requestUrl = contentRoot + dirname; + linkUrl = requestUrl; + } else { + // normal html builders + requestUrl = contentRoot + docName + docFileSuffix; + linkUrl = docName + docLinkSuffix; + } + let linkEl = listItem.appendChild(document.createElement("a")); + linkEl.href = linkUrl + anchor; + linkEl.dataset.score = score; + linkEl.innerHTML = title; + if (descr) { + listItem.appendChild(document.createElement("span")).innerHTML = + " (" + descr + ")"; + // highlight search terms in the description + if (SPHINX_HIGHLIGHT_ENABLED) // set in sphinx_highlight.js + highlightTerms.forEach((term) => _highlightText(listItem, term, "highlighted")); + } + else if (showSearchSummary) + fetch(requestUrl) + .then((responseData) => responseData.text()) + .then((data) => { + if (data) + listItem.appendChild( + Search.makeSearchSummary(data, searchTerms) + ); + // highlight search terms in the summary + if (SPHINX_HIGHLIGHT_ENABLED) // set in sphinx_highlight.js + highlightTerms.forEach((term) => _highlightText(listItem, term, "highlighted")); + }); + Search.output.appendChild(listItem); +}; +const _finishSearch = (resultCount) => { + Search.stopPulse(); + Search.title.innerText = _("Search Results"); + if (!resultCount) + Search.status.innerText = Documentation.gettext( + "Your search did not match any documents. Please make sure that all words are spelled correctly and that you've selected enough categories." + ); + else + Search.status.innerText = _( + `Search finished, found ${resultCount} page(s) matching the search query.` + ); +}; +const _displayNextItem = ( + results, + resultCount, + searchTerms, + highlightTerms, +) => { + // results left, load the summary and display it + // this is intended to be dynamic (don't sub resultsCount) + if (results.length) { + _displayItem(results.pop(), searchTerms, highlightTerms); + setTimeout( + () => _displayNextItem(results, resultCount, searchTerms, highlightTerms), + 5 + ); + } + // search finished, update title and status message + else _finishSearch(resultCount); +}; + +/** + * Default splitQuery function. Can be overridden in ``sphinx.search`` with a + * custom function per language. + * + * The regular expression works by splitting the string on consecutive characters + * that are not Unicode letters, numbers, underscores, or emoji characters. + * This is the same as ``\W+`` in Python, preserving the surrogate pair area. + */ +if (typeof splitQuery === "undefined") { + var splitQuery = (query) => query + .split(/[^\p{Letter}\p{Number}_\p{Emoji_Presentation}]+/gu) + .filter(term => term) // remove remaining empty strings +} + +/** + * Search Module + */ +const Search = { + _index: null, + _queued_query: null, + _pulse_status: -1, + + htmlToText: (htmlString) => { + const htmlElement = new DOMParser().parseFromString(htmlString, 'text/html'); + htmlElement.querySelectorAll(".headerlink").forEach((el) => { el.remove() }); + const docContent = htmlElement.querySelector('[role="main"]'); + if (docContent !== undefined) return docContent.textContent; + console.warn( + "Content block not found. Sphinx search tries to obtain it via '[role=main]'. Could you check your theme or template." + ); + return ""; + }, + + init: () => { + const query = new URLSearchParams(window.location.search).get("q"); + document + .querySelectorAll('input[name="q"]') + .forEach((el) => (el.value = query)); + if (query) Search.performSearch(query); + }, + + loadIndex: (url) => + (document.body.appendChild(document.createElement("script")).src = url), + + setIndex: (index) => { + Search._index = index; + if (Search._queued_query !== null) { + const query = Search._queued_query; + Search._queued_query = null; + Search.query(query); + } + }, + + hasIndex: () => Search._index !== null, + + deferQuery: (query) => (Search._queued_query = query), + + stopPulse: () => (Search._pulse_status = -1), + + startPulse: () => { + if (Search._pulse_status >= 0) return; + + const pulse = () => { + Search._pulse_status = (Search._pulse_status + 1) % 4; + Search.dots.innerText = ".".repeat(Search._pulse_status); + if (Search._pulse_status >= 0) window.setTimeout(pulse, 500); + }; + pulse(); + }, + + /** + * perform a search for something (or wait until index is loaded) + */ + performSearch: (query) => { + // create the required interface elements + const searchText = document.createElement("h2"); + searchText.textContent = _("Searching"); + const searchSummary = document.createElement("p"); + searchSummary.classList.add("search-summary"); + searchSummary.innerText = ""; + const searchList = document.createElement("ul"); + searchList.classList.add("search"); + + const out = document.getElementById("search-results"); + Search.title = out.appendChild(searchText); + Search.dots = Search.title.appendChild(document.createElement("span")); + Search.status = out.appendChild(searchSummary); + Search.output = out.appendChild(searchList); + + const searchProgress = document.getElementById("search-progress"); + // Some themes don't use the search progress node + if (searchProgress) { + searchProgress.innerText = _("Preparing search..."); + } + Search.startPulse(); + + // index already loaded, the browser was quick! + if (Search.hasIndex()) Search.query(query); + else Search.deferQuery(query); + }, + + /** + * execute search (requires search index to be loaded) + */ + query: (query) => { + const filenames = Search._index.filenames; + const docNames = Search._index.docnames; + const titles = Search._index.titles; + const allTitles = Search._index.alltitles; + const indexEntries = Search._index.indexentries; + + // stem the search terms and add them to the correct list + const stemmer = new Stemmer(); + const searchTerms = new Set(); + const excludedTerms = new Set(); + const highlightTerms = new Set(); + const objectTerms = new Set(splitQuery(query.toLowerCase().trim())); + splitQuery(query.trim()).forEach((queryTerm) => { + const queryTermLower = queryTerm.toLowerCase(); + + // maybe skip this "word" + // stopwords array is from language_data.js + if ( + stopwords.indexOf(queryTermLower) !== -1 || + queryTerm.match(/^\d+$/) + ) + return; + + // stem the word + let word = stemmer.stemWord(queryTermLower); + // select the correct list + if (word[0] === "-") excludedTerms.add(word.substr(1)); + else { + searchTerms.add(word); + highlightTerms.add(queryTermLower); + } + }); + + if (SPHINX_HIGHLIGHT_ENABLED) { // set in sphinx_highlight.js + localStorage.setItem("sphinx_highlight_terms", [...highlightTerms].join(" ")) + } + + // console.debug("SEARCH: searching for:"); + // console.info("required: ", [...searchTerms]); + // console.info("excluded: ", [...excludedTerms]); + + // array of [docname, title, anchor, descr, score, filename] + let results = []; + _removeChildren(document.getElementById("search-progress")); + + const queryLower = query.toLowerCase(); + for (const [title, foundTitles] of Object.entries(allTitles)) { + if (title.toLowerCase().includes(queryLower) && (queryLower.length >= title.length/2)) { + for (const [file, id] of foundTitles) { + let score = Math.round(100 * queryLower.length / title.length) + results.push([ + docNames[file], + titles[file] !== title ? `${titles[file]} > ${title}` : title, + id !== null ? "#" + id : "", + null, + score, + filenames[file], + ]); + } + } + } + + // search for explicit entries in index directives + for (const [entry, foundEntries] of Object.entries(indexEntries)) { + if (entry.includes(queryLower) && (queryLower.length >= entry.length/2)) { + for (const [file, id] of foundEntries) { + let score = Math.round(100 * queryLower.length / entry.length) + results.push([ + docNames[file], + titles[file], + id ? "#" + id : "", + null, + score, + filenames[file], + ]); + } + } + } + + // lookup as object + objectTerms.forEach((term) => + results.push(...Search.performObjectSearch(term, objectTerms)) + ); + + // lookup as search terms in fulltext + results.push(...Search.performTermsSearch(searchTerms, excludedTerms)); + + // let the scorer override scores with a custom scoring function + if (Scorer.score) results.forEach((item) => (item[4] = Scorer.score(item))); + + // now sort the results by score (in opposite order of appearance, since the + // display function below uses pop() to retrieve items) and then + // alphabetically + results.sort((a, b) => { + const leftScore = a[4]; + const rightScore = b[4]; + if (leftScore === rightScore) { + // same score: sort alphabetically + const leftTitle = a[1].toLowerCase(); + const rightTitle = b[1].toLowerCase(); + if (leftTitle === rightTitle) return 0; + return leftTitle > rightTitle ? -1 : 1; // inverted is intentional + } + return leftScore > rightScore ? 1 : -1; + }); + + // remove duplicate search results + // note the reversing of results, so that in the case of duplicates, the highest-scoring entry is kept + let seen = new Set(); + results = results.reverse().reduce((acc, result) => { + let resultStr = result.slice(0, 4).concat([result[5]]).map(v => String(v)).join(','); + if (!seen.has(resultStr)) { + acc.push(result); + seen.add(resultStr); + } + return acc; + }, []); + + results = results.reverse(); + + // for debugging + //Search.lastresults = results.slice(); // a copy + // console.info("search results:", Search.lastresults); + + // print the results + _displayNextItem(results, results.length, searchTerms, highlightTerms); + }, + + /** + * search for object names + */ + performObjectSearch: (object, objectTerms) => { + const filenames = Search._index.filenames; + const docNames = Search._index.docnames; + const objects = Search._index.objects; + const objNames = Search._index.objnames; + const titles = Search._index.titles; + + const results = []; + + const objectSearchCallback = (prefix, match) => { + const name = match[4] + const fullname = (prefix ? prefix + "." : "") + name; + const fullnameLower = fullname.toLowerCase(); + if (fullnameLower.indexOf(object) < 0) return; + + let score = 0; + const parts = fullnameLower.split("."); + + // check for different match types: exact matches of full name or + // "last name" (i.e. last dotted part) + if (fullnameLower === object || parts.slice(-1)[0] === object) + score += Scorer.objNameMatch; + else if (parts.slice(-1)[0].indexOf(object) > -1) + score += Scorer.objPartialMatch; // matches in last name + + const objName = objNames[match[1]][2]; + const title = titles[match[0]]; + + // If more than one term searched for, we require other words to be + // found in the name/title/description + const otherTerms = new Set(objectTerms); + otherTerms.delete(object); + if (otherTerms.size > 0) { + const haystack = `${prefix} ${name} ${objName} ${title}`.toLowerCase(); + if ( + [...otherTerms].some((otherTerm) => haystack.indexOf(otherTerm) < 0) + ) + return; + } + + let anchor = match[3]; + if (anchor === "") anchor = fullname; + else if (anchor === "-") anchor = objNames[match[1]][1] + "-" + fullname; + + const descr = objName + _(", in ") + title; + + // add custom score for some objects according to scorer + if (Scorer.objPrio.hasOwnProperty(match[2])) + score += Scorer.objPrio[match[2]]; + else score += Scorer.objPrioDefault; + + results.push([ + docNames[match[0]], + fullname, + "#" + anchor, + descr, + score, + filenames[match[0]], + ]); + }; + Object.keys(objects).forEach((prefix) => + objects[prefix].forEach((array) => + objectSearchCallback(prefix, array) + ) + ); + return results; + }, + + /** + * search for full-text terms in the index + */ + performTermsSearch: (searchTerms, excludedTerms) => { + // prepare search + const terms = Search._index.terms; + const titleTerms = Search._index.titleterms; + const filenames = Search._index.filenames; + const docNames = Search._index.docnames; + const titles = Search._index.titles; + + const scoreMap = new Map(); + const fileMap = new Map(); + + // perform the search on the required terms + searchTerms.forEach((word) => { + const files = []; + const arr = [ + { files: terms[word], score: Scorer.term }, + { files: titleTerms[word], score: Scorer.title }, + ]; + // add support for partial matches + if (word.length > 2) { + const escapedWord = _escapeRegExp(word); + Object.keys(terms).forEach((term) => { + if (term.match(escapedWord) && !terms[word]) + arr.push({ files: terms[term], score: Scorer.partialTerm }); + }); + Object.keys(titleTerms).forEach((term) => { + if (term.match(escapedWord) && !titleTerms[word]) + arr.push({ files: titleTerms[word], score: Scorer.partialTitle }); + }); + } + + // no match but word was a required one + if (arr.every((record) => record.files === undefined)) return; + + // found search word in contents + arr.forEach((record) => { + if (record.files === undefined) return; + + let recordFiles = record.files; + if (recordFiles.length === undefined) recordFiles = [recordFiles]; + files.push(...recordFiles); + + // set score for the word in each file + recordFiles.forEach((file) => { + if (!scoreMap.has(file)) scoreMap.set(file, {}); + scoreMap.get(file)[word] = record.score; + }); + }); + + // create the mapping + files.forEach((file) => { + if (fileMap.has(file) && fileMap.get(file).indexOf(word) === -1) + fileMap.get(file).push(word); + else fileMap.set(file, [word]); + }); + }); + + // now check if the files don't contain excluded terms + const results = []; + for (const [file, wordList] of fileMap) { + // check if all requirements are matched + + // as search terms with length < 3 are discarded + const filteredTermCount = [...searchTerms].filter( + (term) => term.length > 2 + ).length; + if ( + wordList.length !== searchTerms.size && + wordList.length !== filteredTermCount + ) + continue; + + // ensure that none of the excluded terms is in the search result + if ( + [...excludedTerms].some( + (term) => + terms[term] === file || + titleTerms[term] === file || + (terms[term] || []).includes(file) || + (titleTerms[term] || []).includes(file) + ) + ) + break; + + // select one (max) score for the file. + const score = Math.max(...wordList.map((w) => scoreMap.get(file)[w])); + // add result to the result list + results.push([ + docNames[file], + titles[file], + "", + null, + score, + filenames[file], + ]); + } + return results; + }, + + /** + * helper function to return a node containing the + * search summary for a given text. keywords is a list + * of stemmed words. + */ + makeSearchSummary: (htmlText, keywords) => { + const text = Search.htmlToText(htmlText); + if (text === "") return null; + + const textLower = text.toLowerCase(); + const actualStartPosition = [...keywords] + .map((k) => textLower.indexOf(k.toLowerCase())) + .filter((i) => i > -1) + .slice(-1)[0]; + const startWithContext = Math.max(actualStartPosition - 120, 0); + + const top = startWithContext === 0 ? "" : "..."; + const tail = startWithContext + 240 < text.length ? "..." : ""; + + let summary = document.createElement("p"); + summary.classList.add("context"); + summary.textContent = top + text.substr(startWithContext, 240).trim() + tail; + + return summary; + }, +}; + +_ready(Search.init); diff --git a/docs/_build/_static/sphinx_highlight.js b/docs/_build/_static/sphinx_highlight.js new file mode 100644 index 0000000..8a96c69 --- /dev/null +++ b/docs/_build/_static/sphinx_highlight.js @@ -0,0 +1,154 @@ +/* Highlighting utilities for Sphinx HTML documentation. */ +"use strict"; + +const SPHINX_HIGHLIGHT_ENABLED = true + +/** + * highlight a given string on a node by wrapping it in + * span elements with the given class name. + */ +const _highlight = (node, addItems, text, className) => { + if (node.nodeType === Node.TEXT_NODE) { + const val = node.nodeValue; + const parent = node.parentNode; + const pos = val.toLowerCase().indexOf(text); + if ( + pos >= 0 && + !parent.classList.contains(className) && + !parent.classList.contains("nohighlight") + ) { + let span; + + const closestNode = parent.closest("body, svg, foreignObject"); + const isInSVG = closestNode && closestNode.matches("svg"); + if (isInSVG) { + span = document.createElementNS("http://www.w3.org/2000/svg", "tspan"); + } else { + span = document.createElement("span"); + span.classList.add(className); + } + + span.appendChild(document.createTextNode(val.substr(pos, text.length))); + const rest = document.createTextNode(val.substr(pos + text.length)); + parent.insertBefore( + span, + parent.insertBefore( + rest, + node.nextSibling + ) + ); + node.nodeValue = val.substr(0, pos); + /* There may be more occurrences of search term in this node. So call this + * function recursively on the remaining fragment. + */ + _highlight(rest, addItems, text, className); + + if (isInSVG) { + const rect = document.createElementNS( + "http://www.w3.org/2000/svg", + "rect" + ); + const bbox = parent.getBBox(); + rect.x.baseVal.value = bbox.x; + rect.y.baseVal.value = bbox.y; + rect.width.baseVal.value = bbox.width; + rect.height.baseVal.value = bbox.height; + rect.setAttribute("class", className); + addItems.push({ parent: parent, target: rect }); + } + } + } else if (node.matches && !node.matches("button, select, textarea")) { + node.childNodes.forEach((el) => _highlight(el, addItems, text, className)); + } +}; +const _highlightText = (thisNode, text, className) => { + let addItems = []; + _highlight(thisNode, addItems, text, className); + addItems.forEach((obj) => + obj.parent.insertAdjacentElement("beforebegin", obj.target) + ); +}; + +/** + * Small JavaScript module for the documentation. + */ +const SphinxHighlight = { + + /** + * highlight the search words provided in localstorage in the text + */ + highlightSearchWords: () => { + if (!SPHINX_HIGHLIGHT_ENABLED) return; // bail if no highlight + + // get and clear terms from localstorage + const url = new URL(window.location); + const highlight = + localStorage.getItem("sphinx_highlight_terms") + || url.searchParams.get("highlight") + || ""; + localStorage.removeItem("sphinx_highlight_terms") + url.searchParams.delete("highlight"); + window.history.replaceState({}, "", url); + + // get individual terms from highlight string + const terms = highlight.toLowerCase().split(/\s+/).filter(x => x); + if (terms.length === 0) return; // nothing to do + + // There should never be more than one element matching "div.body" + const divBody = document.querySelectorAll("div.body"); + const body = divBody.length ? divBody[0] : document.querySelector("body"); + window.setTimeout(() => { + terms.forEach((term) => _highlightText(body, term, "highlighted")); + }, 10); + + const searchBox = document.getElementById("searchbox"); + if (searchBox === null) return; + searchBox.appendChild( + document + .createRange() + .createContextualFragment( + '" + ) + ); + }, + + /** + * helper function to hide the search marks again + */ + hideSearchWords: () => { + document + .querySelectorAll("#searchbox .highlight-link") + .forEach((el) => el.remove()); + document + .querySelectorAll("span.highlighted") + .forEach((el) => el.classList.remove("highlighted")); + localStorage.removeItem("sphinx_highlight_terms") + }, + + initEscapeListener: () => { + // only install a listener if it is really needed + if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) return; + + document.addEventListener("keydown", (event) => { + // bail for input elements + if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return; + // bail with special keys + if (event.shiftKey || event.altKey || event.ctrlKey || event.metaKey) return; + if (DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS && (event.key === "Escape")) { + SphinxHighlight.hideSearchWords(); + event.preventDefault(); + } + }); + }, +}; + +_ready(() => { + /* Do not call highlightSearchWords() when we are on the search page. + * It will highlight words from the *previous* search query. + */ + if (typeof Search === "undefined") SphinxHighlight.highlightSearchWords(); + SphinxHighlight.initEscapeListener(); +}); diff --git a/docs/_build/cds.html b/docs/_build/cds.html new file mode 100644 index 0000000..8b93f22 --- /dev/null +++ b/docs/_build/cds.html @@ -0,0 +1,272 @@ + + + + + + + + Coding sequence tiling library — bean 1.0.0 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

Coding sequence tiling library

+
+

Tiling sorting screen tutorial

+

Tiling screen that tiles gRNA densely across locus or multiple loci, selected based on FACS signal quantiles.

+ + + + + + + + + +
Library designTiling (gRNAs tile each locus densely)
tiling library design
SelectionCells are sorted based on FACS signal quantiles
variant library design



+
+

Example workflow

+
screen_id=my_sorting_tiling_screen
+
+# 1. Count gRNA & reporter
+bean-count-samples \
+--input tests/data/sample_list_tiling.csv          `# Contains fastq file path; see test file for example.`\
+-b A                                               `# Base A is edited (into G)` \
+-f tests/data/test_guide_info_tiling_chrom.csv     `# Contains gRNA metadata; see test file for example.`\
+-o ./                                              `# Output directory` \
+-r                                                 `# Quantify reporter edits` \
+-n ${screen_id}                                       `# ID of the screen` \
+--tiling
+
+# 2. QC samples & guides
+bean-qc \
+  bean_count_${screen_id}.h5ad           `# Input ReporterScreen .h5ad file path` \
+  -o bean_count_${screen_id}_masked.h5ad `# Output ReporterScreen .h5ad file path` \
+  -r qc_report_${screen_id}              `# Prefix for QC report` \
+
+# 3. Filter & translate alleles
+bean-filter ./bean_count_${screen_id}_masked.h5ad \
+-o ./bean_count_${screen_id}_alleleFiltered \
+--filter-target-basechange                             `# Filter based on intended base changes. If -b A was provided in bean-count, filters for A>G edit. If -b C was provided, filters for C>T edit.`\
+--filter-window --edit-start-pos 0 --edit-end-pos 19   `# Filter based on editing window in spacer position within reporter.`\
+--filter-allele-proportion 0.1 --filter-sample-proportion 0.3 `#Filter based on allele proportion larger than 0.1 in at least 0.3 (30%) of the control samples.` \
+--translate --translate-genes-list tests/data/gene_symbols.txt
+
+# 4. Quantify variant effect
+bean-run sorting tiling \
+    ./bean_count_${screen_id}_alleleFiltered.h5ad \
+    -o tests/test_res/var/ \
+    --fit-negctrl \
+    --scale-by-acc \
+    --accessibility-col accessibility
+
+
+

See more details below.

+
+
+

1. Count gRNA & reporter (bean count-samples)

+
screen_id=my_sorting_tiling_screen
+
+bean-count-samples \
+--input tests/data/sample_list_tiling.csv          `# Contains fastq file path; see test file for example.`\
+-b A                                               `# Base A is edited (into G)` \
+-f tests/data/test_guide_info_tiling_chrom.csv     `# Contains gRNA metadata; see test file for example.`\
+-o ./                                              `# Output directory` \
+-r                                                 `# Quantify reporter edits` \
+-n ${screen_id}                                       `# ID of the screen` \
+--tiling
+
+
+

Make sure you follow the input file format for seamless downstream steps. This will produce ./bean_count_${screen_id}.h5ad.

+
+
+

2. QC (bean qc)

+

Base editing data will include QC about editing efficiency. As QC uses predefined column names and values, beware to follow the input file guideline, but you can change the parameters with the full argument list of ``bean-qc` <../../README#bean-qc-qc-of-reporter-screen-data>`_. (Common factors you may want to tweak is --ctrl-cond=bulk and --lfc-conds=top,bot if you have different sample condition labels.)

+
bean-qc \
+  bean_count_${screen_id}.h5ad           `# Input ReporterScreen .h5ad file path` \
+  -o bean_count_${screen_id}_masked.h5ad `# Output ReporterScreen .h5ad file path` \
+  -r qc_report_${screen_id}              `# Prefix for QC report` \
+  [--tiling]                          `# Not required if you have passed --tiling in counting step`
+
+
+

If the data does not include reporter editing data, you can provide --no-editing flag to omit the editing rate QC.

+
+
+

3. Filter alleles (bean filter)

+

As tiling library doesn’t have designated per-gRNA target variant, any base edit observed in reporter may be the candidate variant, while having too many variants with very low editing rate significantly decreases the power. Variants are filtered based on multiple criteria in bean-fitler.

+

If the screen targets coding sequence, it’s beneficial to translate edits into coding varaints whenever possible for better power. For translation, provide --translate and one of the following:

+
[ --translate-gene-name GENE_SYMBOL OR
+  --translate-genes-list path_to_gene_names_file.txt OR
+  --translate-fasta gene_exon.fa, OR
+  --translate-fastas-csv gene_exon_fas.csv]
+
+
+

where path_to_gene_names_file.txt has one gene symbol per line, and gene symbol uses its MANE transcript (hg38) coordinates of exons. In order to use other reference versions or transcript ID, you’ll need to feed in fasta file. See detailed formatting of fasta file here.

+

Example allele filtering given we’re translating based on MANE transcript exons of multiple gene symbols:

+
bean-filter ./bean_count_${screen_id}_masked.h5ad \
+-o ./bean_count_${screen_id}_alleleFiltered \
+--filter-target-basechange                             `# Filter based on intended base changes. If -b A was provided in bean-count, filters for A>G edit. If -b C was provided, filters for C>T edit.`\
+--filter-window --edit-start-pos 0 --edit-end-pos 19   `# Filter based on editing window in spacer position within reporter.`\
+--filter-allele-proportion 0.1 --filter-sample-proportion 0.3 `#Filter based on allele proportion larger than 0.1 in at least 0.3 (30%) of the control samples.` \
+--translate --translate-genes-list tests/data/gene_symbols.txt
+
+
+

Ouptut file `` shows number of alleles per guide and number of guides per variant, where we want high enough values for the latter. See the typical output for dataset with good editing coverage & filtering result here.

+
+
+

4. Quantify variant effect (bean run)

+

By default, bean-run [sorting,survival] tiling uses most filtered allele counts table for variant identification and quantification of their effects. **Check allele filtering output** and choose alternative filtered allele counts table if necessary.

+

bean-run can take 3 run options to quantify editing rate:

+
    +
  1. +
    From reporter + accessibility

    1-1. If your gRNA metadata table (tests/data/test_guide_info.csv above) included per-gRNA accessibility score,

    +
    +
    +
       bean-run sorting tiling \
    +   ./bean_count_${screen_id}_alleleFiltered.h5ad \
    +   -o tests/test_res/var/ \
    +   --fit-negctrl \
    +   --scale-by-acc \
    +   --accessibility-col accessibility
    +
    +1-2. If your gRNA metadata table (\ ``tests/data/test_guide_info.csv`` above) included per-gRNA chromosome & position and you have bigWig file with accessibility signal,
    +
    +
    +
    bean-run sorting tiling \
    +./bean_count_${screen_id}_alleleFiltered.h5ad \
    +-o tests/test_res/var/ \
    +--fit-negctrl \
    +--scale-by-acc \
    +--accessibility-bw accessibility.bw
    +
    +
    +
  2. +
  3. From reporter

    +
    bean-run sorting tiling \
    +./bean_count_${screen_id}_alleleFiltered.h5ad \
    +-o tests/test_res/var/ \
    +--fit-negctrl
    +
    +
    +
  4. +
  5. +
    No reporter information, assume the same editing efficiency of all gRNAs.

    Use this option if your data don’t have editing rate information.

    +
    +
    +
    bean-run sorting tiling \
    +./bean_count_${screen_id}_alleleFiltered.h5ad \
    +-o tests/test_res/var/ \
    +--fit-negctrl \
    +--uniform-edit
    +
    +
    +
  6. +
+

See Subcommands for the full details.

+
+
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/docs/_build/commands/count.html b/docs/_build/commands/count.html new file mode 100644 index 0000000..4b364ed --- /dev/null +++ b/docs/_build/commands/count.html @@ -0,0 +1,142 @@ + + + + + + + + bean count[-samples]: Count (reporter) screen data — bean 1.0.0 documentation + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

bean count[-samples]: Count (reporter) screen data

+

bean count-samples (or bean count for a single sample) maps guide into guide counts, allowing for base transition in spacer sequence. When the matched reporter information is provided, it can count the target site edits and alleles produced by each guide. Mapping is efficiently done based on CRISPResso2 modified for base-edit-aware mapping.

+
bean count-samples \
+  --input sample_list.csv   `# sample with lines 'R1_filepath,R2_filepath,sample_name\n'` \
+  -b A                      `# base that is being edited (A/G)` \
+  -f sgRNA_info_table.csv   `# sgRNA information` \
+  -o .                      `# output directory` \
+  -r                        `# read edit/allele information from reporter` \
+  -t 12                     `# number of threads` \
+  --name my_sorting_screen  `# name of this sample run` \
+
+
+
bean count --R1 R1.fq --R2 R2.fq -b A -f sgRNA_info_table.csv -r
+
+
+

By default, bean count[-samples] assume R1 and R2 are trimmed off of the adapter sequence. You may need to adjust the command arguments according to your read structure.

+
+

Read structuren

+
+

See full detail below.

+
+
+

Input file format

+

See Input file format for input file formats.

+
+
+

Output file format

+

count or count-samples produces .h5ad and .xlsx file with guide and per-guide allele counts.

+
    +
  • .h5ad: This output file follows annotated matrix format compatible with AnnData and is based on Screen object in [purturb_tools](https://github.com/pinellolab/perturb-tools). See Data Structure section for more information.

  • +
  • .xlsx: This output file contains .guides, .samples, .X[_bcmatch,_edits]. (allele_tables are often too large to write into an Excel!)

  • +
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/docs/_build/commands/create-screen.html b/docs/_build/commands/create-screen.html new file mode 100644 index 0000000..8ea5daf --- /dev/null +++ b/docs/_build/commands/create-screen.html @@ -0,0 +1,123 @@ + + + + + + + + bean create-screen: Create ReporterScreen object from flat files — bean 1.0.0 documentation + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

bean create-screen: Create ReporterScreen object from flat files

+
bean create-screen gRNA_library.csv sample_list.csv gRNA_counts_table.csv
+
+
+
+

Input

+
    +
  • gRNA_library.csv

  • +
  • sample_list.csv

  • +
  • gRNA_counts_table.csv: Table with gRNA ID in the first column and sample IDs as the column names (first row) +gRNA_library.csv and sample_list.csv should be formatted as Input file format.

  • +
+
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/docs/_build/commands/filter.html b/docs/_build/commands/filter.html new file mode 100644 index 0000000..22d1c8e --- /dev/null +++ b/docs/_build/commands/filter.html @@ -0,0 +1,154 @@ + + + + + + + + filter: Filtering (and optionally translating) alleles — bean 1.0.0 documentation + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

filter: Filtering (and optionally translating) alleles

+

As tiling mode of bean run accounts for any robustly observed alleles, bean filter filters for such alleles.

+
bean filter my_sorting_screen_masked.h5ad \
+-o my_sorting_screen_filtered.h5ad  `# Output file path` \
+
+
+
+
+

Output

+

Above command produces

+
    +
  • my_sorting_screen_filtered.h5ad with filtered alleles stored in .uns,

  • +
  • my_sorting_screen_filtered.filtered_allele_stats.pdf, and my_sorting_screen_filtered.filter_log.txt that report allele count stats in each filtering step.

  • +
+

You may want to adjust the flitering parameters to obtain optimal balance between # guides per variant & # variants that are scored. See example outputs of filtering step here.

+
+
+

Translating alleles

+

If you want to obtain amino acid level variant for coding sequence tiling screens, provide coding sequence positions which variants occuring within the coding sequence will be translated. This is optional, but **highly recommended* to increase per-(coding)variant support.*

+

Allele translation

+
bean filter my_sorting_screen.h5ad \
+-o my_sorting_screen_masked.h5ad \
+--translate   `# Translate coding variants` \
+[ --translate-gene-name GENE_SYMBOL OR
+  --translate-genes-list path_to_gene_names_file.txt OR
+  --translate-fasta gene_exon.fa, OR
+  --translate-fastas-csv gene_exon_fas.csv]
+
+
+
    +
  • When library covers a single gene, do either of the following:

    +
      +
    1. Feed --translate-gene-name GENE_SYMBOL if your genomic_pos column of sgRNA_info_tbl is compatible with MANE transcript‘s reference genome. (Per 10/23/2023, GRCh38). This will automatically load the exon positions based on MANE transcript annotation.

    2. +
    3. To use your custom coding sequence and exon positions, feed --translate-fasta gene_exon.fa argument where gene_exon.fa is the FASTA file with entries of exons. See full details here.

    4. +
    +
  • +
  • When library covers multiple genes, do either of the following:

    +
      +
    1. Feed --translate-genes-list path_to_gene_names_file.txt where path_to_gene_names_file.txt is file with one gene symbol per line.

    2. +
    3. Feed --translate-fastas-csv gene_exon_fas.csv where gene_exon_fas.csv is the csv file with lines gene_id,gene_exon_fasta_path without header. Each FASTA file in gene_exon_fasta_path is formatted as the single-gene FASTA file.

    4. +
    +
  • +
  • Translation will keep the variants outside the coding sequence as nucleotide-level variants, while aggregating variants leading to the same coding sequence variants.

  • +
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/docs/_build/commands/input.html b/docs/_build/commands/input.html new file mode 100644 index 0000000..f0b9c24 --- /dev/null +++ b/docs/_build/commands/input.html @@ -0,0 +1,160 @@ + + + + + + + + sgRNA_info_table.csv — bean 1.0.0 documentation + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +

This document describes the input files of bean count-samples.

+
+

sgRNA_info_table.csv

+

File should contain following columns.

+
    +
  • name: gRNA ID column

  • +
  • sequence: gRNA sequence

  • +
  • barcode: R2 barcode to help match reporter to gRNA, written in the sense direction (as in R1)

  • +
  • In order to use accessibility in the variant effect quantification, provide accessibility information in one of two options. (For non-targeting guides, provide NA values (empty cell).)

    +
      +
    • Option 1: chrom & genomic_pos: Chromosome (ex. chr19) and genomic position of guide sequence. You will have to provide the path to the bigwig file with matching reference version in bean run.

    • +
    • Option 2: accessibility_signal: ATAC-seq signal value of the target loci of each guide.

    • +
    +
  • +
  • For variant library (gRNAs are designed to target specific variants and ignores bystander edits)

    +
      +
    • target: This column denotes which target variant/element of each gRNA. This is not used in bean count[-samples] but required to run bean run in later steps.

    • +
    • target_group: If negative/positive control gRNA will be considered in bean qc and/or bean run, specify as “NegCtrl”/”PosCtrl” in this column.

    • +
    • target_pos: If --match_target_pos flag is used, input file needs target_pos which specifies 0-based relative position of targeted base within Reporter sequence.

    • +
    +
  • +
  • For tiling library (gRNAs tile coding / noncoding sequences)

    +
      +
    • strand: Specifies gRNA strand information relative to the reference genome.

    • +
    • chrom: Chromosome of gRNA targeted locus.

    • +
    • start_pos: gRNA starting position in the genome. Required when you provide strand column. Should specify the smaller coordinate value among start and end position regardless of gRNA strandedness.

    • +
    +
  • +
+

Also see examples for variant library and tiling library.

+
+
+

sample_list.csv

+

File should contain following columns with header.

+
    +
  • R1_filepath: Path to read 1 .fastq[.gz] file

  • +
  • R2_filepath: Path to read 1 .fastq[.gz] file

  • +
  • sample_id: ID of sequencing sample

  • +
  • replicate: Replicate # of this sample (Should NOT contain .)

  • +
  • condition: Name of the sorting bin (ex. top, bot), or label of timepoint (ex. D5, D18)

  • +
+

For FACS sorting screens:

+
    +
  • upper_quantile: FACS sorting upper quantile

  • +
  • lower_quantile: FACS sorting lower quantile

  • +
+

For proliferation / survival screens:

+
    +
  • time: Numeric time following the base editing of each sample.

  • +
+

Also see examples for FACS sorting screen.

+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/docs/_build/commands/profile.html b/docs/_build/commands/profile.html new file mode 100644 index 0000000..32f78d4 --- /dev/null +++ b/docs/_build/commands/profile.html @@ -0,0 +1,119 @@ + + + + + + + + bean profile: Profile editing patterns — bean 1.0.0 documentation + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

bean profile: Profile editing patterns

+
bean profile my_sorting_screen.h5ad -o output_prefix `# Prefix for editing profile report`
+
+
+
+
+

Output

+

Above command produces prefix_editing_preference.[html,ipynb] as editing preferences (see example).

+

Allele translation

+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/docs/_build/commands/qc.html b/docs/_build/commands/qc.html new file mode 100644 index 0000000..5526162 --- /dev/null +++ b/docs/_build/commands/qc.html @@ -0,0 +1,282 @@ + + + + + + + + bean qc: QC of reporter screen data — bean 1.0.0 documentation + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

bean qc: QC of reporter screen data

+
bean qc \
+  my_sorting_screen.h5ad             `# Input ReporterScreen .h5ad file path` \
+  -o my_sorting_screen_masked.h5ad   `# Output ReporterScreen .h5ad file path` \
+  -r qc_report_my_sorting_screen     `# Prefix for QC report` \
+  --ctrl-cond presort                `# "condition" column in the control sample before selection. Mean gRNA editing rates in these samples are reported. ` \
+# Inspect the output qc_report_my_sorting_screen.html to tweak QC threshold
+
+bean qc \
+  my_sorting_screen.h5ad              \
+  -o my_sorting_screen_masked.h5ad    \
+  -r qc_report_my_sorting_screen      \
+  #[--count-correlation-thres 0.7 ...]\
+  -b
+
+
+

bean qc supports following quality control and masks samples with low quality. Specifically:

+

Allele translation

+
    +
  • Plots guide coverage and the uniformity of coverage

  • +
  • Guide count correlation between samples

  • +
  • Log fold change correlation when positive controls are provided

  • +
  • Plots editing rate distribution

  • +
  • Identify samples with low guide coverage/guide count correlation/editing rate and mask the sample in bdata.samples.mask

  • +
  • Identify outlier guides to filter out

  • +
+
+
+

Output

+

Above command produces

+
    +
  • my_sorting_screen_masked.h5ad without problematic replicate and guides and with sample masks, and

  • +
  • qc_report_my_sorting_screen.[html,ipynb] as QC report.
    +##### Optional arguments:

  • +
  • -o OUT_SCREEN_PATH, --out-screen-path OUT_SCREEN_PATH

    +
    Path where quality-filtered ReporterScreen object to be written to
    +
    +
    +
  • +
  • -r OUT_REPORT_PREFIX, --out-report-prefix OUT_REPORT_PREFIX

    +
    Output prefix of qc report (prefix.html, prefix.ipynb)
    +
    +
    +
  • +
+
+

QC thresholds:

+
    +
  • --count-correlation-thres COUNT_CORRELATION_THRES

    +
    Correlation threshold to mask out.
    +
    +
    +
  • +
  • --edit-rate-thres EDIT_RATE_THRES

    +
    Mean editing rate threshold per sample to mask out.
    +
    +
    +
  • +
  • --lfc-thres LFC_THRES

    +
    Positive guides' correlation threshold to filter out.
    +
    +
    +
  • +
+
+
+

Run options:

+
    +
  • -b, --remove-bad-replicates

    +
    Remove replicates with at least two of its samples meet the QC threshold (bean run does not support having only one sorting bin sample for a replicate).
    +
    +
    +
  • +
  • -i, --ignore-missing-samples

    +
    If the flag is not provided, if the ReporterScreen object does not contain all condiitons for
    +each replicate, make fake empty samples. If the flag is provided, don't add dummy samples.
    +
    +
    +
  • +
  • --no-editing Ignore QC about editing. Can be used for QC of other editing modalities.

  • +
  • --dont-recalculate-edits

    +
    When ReporterScreen.layers['edit_count'] exists, do not recalculate the edit counts from
    +ReporterScreen.uns['allele_count'].
    +
    +
    +
  • +
+
+
+

Input .h5ad formatting:

+

Note that these arguements will change the way the QC metrics are calculated for guides, samples, or replicates.

+
    +
  • --tiling TILING Specify that the guide library is tiling library without ‘n guides per target’ design

  • +
  • --replicate-label REPLICATE_LABEL

    +
    Label of column in `bdata.samples` that describes replicate ID.
    +
    +
    +
  • +
  • --sample-covariates SAMPLE_COVARIATES

    +
    Comma-separated list of column names in `bdata.samples` that describes non-selective
    +experimental condition. (drug treatment, etc.)
    +
    +
    +
  • +
  • --condition-label CONDITION_LABEL

    +
    Label of column in `bdata.samples` that describes experimental condition. (sorting bin, time,
    +etc.)
    +
    +
    +

    ###### Editing rate calculation

    +
      +
    • --control-condition CTRL_COND

      +
      Values in of column in `ReporterScreen.samples[condition_label]` for guide-level editing rate
      +to be calculated. Default is `None`, which considers all samples.
      +
      +
      +
    • +
    • --rel-pos-is-reporter

      +
      Specifies whether `edit_start_pos` and `edit_end_pos` are relative to reporter position. If
      +`False`, those are relative to spacer position.
      +
      +
      +

      Editing rate is calculated with following parameters in

      +
        +
      • Variant screens:

        +
          +
        • --target-pos-col TARGET_POS_COL

          +
          Target position column in `bdata.guides` specifying target edit position in reporter
          +
          +
          +
        • +
        +
      • +
      • tiling screens:

        +
          +
        • --edit-start-pos EDIT_START_POS

          +
          Edit start position to quantify editing rate on, 0-based inclusive.
          +
          +
          +
        • +
        • --edit-end-pos EDIT_END_POS

          +
          Edit end position to quantify editing rate on, 0-based exclusive.
          +
          +
          +

          ###### LFC of positive controls

          +
        • +
        +
      • +
      +
    • +
    • --posctrl-col POSCTRL_COL

      +
      Column name in ReporterScreen.guides DataFrame that specifies guide category. To use all
      +gRNAs, feed empty string ''.
      +
      +
      +
    • +
    • --posctrl-val POSCTRL_VAL

      +
      Value in ReporterScreen.guides[`posctrl_col`] that specifies guide will be used as the
      +positive control in calculating log fold change.
      +
      +
      +
    • +
    • --lfc-conds LFC_CONDS

      +
      Values in of column in `ReporterScreen.samples[condition_label]` for LFC will be calculated
      +between, delimited by comma
      +
      +
      +
    • +
    +
  • +
+
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/docs/_build/commands/run.html b/docs/_build/commands/run.html new file mode 100644 index 0000000..4711982 --- /dev/null +++ b/docs/_build/commands/run.html @@ -0,0 +1,201 @@ + + + + + + + + bean run: Quantify variant effects — bean 1.0.0 documentation + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

bean run: Quantify variant effects

+

BEAN uses Bayesian network to incorporate gRNA editing outcome to provide posterior estimate of variant phenotype. The Bayesian network reflects data generation process. Briefly,

+
    +
  1. Cellular phenotype (either for cells are sorted upon for sorting screen, or log(proliferation rate)) is modeled as the Gaussian mixture distribution of wild-type phenotype and variant phenotype.

  2. +
  3. The weight of the mixture components are inferred from the reporter editing outcome and the chromatin accessibility of the loci.

  4. +
  5. Cells with each gRNA, formulated as the mixture distribution, is sorted by the phenotypic quantile to produce the gRNA counts.

  6. +
+

For the full detail, see the method section of the BEAN manuscript.

+

model

+



+
+
+

Usage example

+
bean run sorting[survival] variant[tiling] my_sorting_screen_filtered.h5ad \
+[--uniform-edit, --scale-by-acc [--acc-bw-path accessibility_signal.bw, --acc-col accessibility]] \
+-o output_prefix/ \
+--fit-negctrl
+
+
+

See full list of parameters below.

+
+
+

Input

+

my_sorting_screen_filtered.h5ad can be produced by one of the following:

+
    +
  1. ``bean count-samples` <(#bean-count-samples-count-reporter-screen-data>`_) when you have raw .fastq file

  2. +
  3. (Limited to bean run variant mode) bean create-screen when you have flat .csv tables of gRNA metadata table, sample metadata table, gRNA counts table (# guides x # samples), and optionally # edits table. +.. code-block:: bash

    +
    +

    bean create-screen gRNA_info_table.csv sample_info_table.csv gRNA_counts_table.csv [–edits edit_counts_table.csv -o output.h5ad]

    +
    +
      +
    • gRNA_info_table.csv should have following columns.

      +
        +
      • name: gRNA ID column

      • +
      • target: This column denotes which target variant/element of each gRNA.

      • +
      • target_group [Optional]: If negative control gRNA will be used, specify as “NegCtrl” in this column.

      • +
      +
    • +
    • sample_info_table.csv should have following columns.

      +
        +
      • sample_id: ID of sequencing sample

      • +
      • replicate: Replicate # of this sample

      • +
      • bin: Name of the sorting bin

      • +
      • upper_quantile: FACS sorting upper quantile

      • +
      • lower_quantile: FACS sorting lower quantile

      • +
      +
    • +
    • gRNA_counts_table.csv should be formatted as follows.

      +
        +
      • Columns include one of sample_id columns in sample_info_table.csv file.

      • +
      • 1st row (row index) follows name (gRNA ID) in gRNA_info_table.csv file.

      • +
      +
    • +
    +
  4. +
  5. You can manually create the AnnData object with more annotations including allele counts: see API tutorial for full detail.

  6. +
+
+
+

Output

+

model

+

Above command produces

+
    +
  • output_prefix/bean_element_result.[model_type].csv with following columns:

    +
      +
    • Estimated variant effect sizes

      +
        +
      • mu (Effect size): Mean of variant phenotype, given the wild type has standard normal phenotype distribution of mu = 0, sd = 1.

      • +
      • mu_sd: Mean of variant phenotype mu is modeled as normal distribution. The column shows fitted standard deviation of mu that quantify the uncertainty of the variant effect.

      • +
      • mu_z: z-score of mu

      • +
      • sd: Standard deviation of variant phenotype, given the wild type has standard normal phenotype distribution of mu = 0, sd = 1.

      • +
      • CI[0.025, 0.975]: Credible interval of mu

      • +
      • When negative control is provided, above columns with _adj suffix are provided, which are the corresponding values adjusted for negative control.

      • +
      +
    • +
    • Metrics on per-variant evidence provided in input (provided in tiling mode)

      +
        +
      • effective_edit_rate: Sum of per-variant editing rates over all alleles observed in the input. Allele-level editing rate is divided by the number of variants observed in the allele prior to summing up.

      • +
      • n_guides: # of guides covering the variant.

      • +
      • n_coocc: # of cooccurring variants with a given variant in any alleles observed in the input.

      • +
      +
    • +
    +
  • +
  • output_prefix/bean_sgRNA_result.[model_type].csv:

    +
      +
    • edit_rate: Estimated editing rate at the target loci.

    • +
    +
  • +
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/docs/_build/count.html b/docs/_build/count.html new file mode 100644 index 0000000..2aa8294 --- /dev/null +++ b/docs/_build/count.html @@ -0,0 +1,299 @@ + + + + + + + + bean count — bean 1.0.0 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

bean count

+
+

bean count[-samples]: Count (reporter) screen data

+

bean count-samples (or bean count for a single sample) maps guide into guide counts, allowing for base transition in spacer sequence. When the matched reporter information is provided, it can count the target site edits and alleles produced by each guide. Mapping is efficiently done based on CRISPResso2 modified for base-edit-aware mapping.

+
bean count-samples \
+  --input sample_list.csv   `# sample with lines 'R1_filepath,R2_filepath,sample_name\n'` \
+  -b A                      `# base that is being edited (A/G)` \
+  -f sgRNA_info_table.csv   `# sgRNA information` \
+  -o .                      `# output directory` \
+  -r                        `# read edit/allele information from reporter` \
+  -t 12                     `# number of threads` \
+  --name my_sorting_screen  `# name of this sample run` \
+
+
+
bean count --R1 R1.fq --R2 R2.fq -b A -f sgRNA_info_table.csv -r
+
+
+

By default, bean count[-samples] assume R1 and R2 are trimmed off of the adapter sequence. You may need to adjust the command arguments according to your read structure.

+
+

Read structuren

+
+

See full detail below.

+
+
+

Input file format

+

See Input file format for input file formats.

+
+
+

Output file format

+

count or count-samples produces .h5ad and .xlsx file with guide and per-guide allele counts.

+
    +
  • .h5ad: This output file follows annotated matrix format compatible with AnnData and is based on Screen object in [purturb_tools](https://github.com/pinellolab/perturb-tools). See Data Structure section for more information.

  • +
  • .xlsx: This output file contains .guides, .samples, .X[_bcmatch,_edits]. (allele_tables are often too large to write into an Excel!)

  • +
+
+
+

Full parameters

+
usage: bean count [-h] --R1 R1 --R2 R2 -b EDITED_BASE -f SGRNA_FILENAME
+                  [--guide-start-seq GUIDE_START_SEQ]
+                  [--guide-end-seq GUIDE_END_SEQ]
+                  [--barcode-start-seq BARCODE_START_SEQ] [-r]
+                  [-q MIN_AVERAGE_READ_QUALITY] [-s MIN_SINGLE_BP_QUALITY]
+                  [-n NAME] [-o OUTPUT_FOLDER] [-l REPORTER_LENGTH]
+                  [--keep-intermediate] [--qstart-R1 QSTART_R1]
+                  [--qend-R1 QEND_R1] [--qstart-R2 QSTART_R2]
+                  [--qend-R2 QEND_R2] [--gstart-reporter GSTART_REPORTER]
+                  [--match-target-pos] [--target-pos-col TARGET_POS_COL]
+                  [--guide-bc GUIDE_BC] [--guide-bc-len GUIDE_BC_LEN]
+                  [--offset] [--align-fasta ALIGN_FASTA] [--string-allele]
+                  [-g] [-m] [--tiling]
+
+
+
+

Named Arguments

+
+
--R1
+

FASTQ file path for read 1

+
+
--R2
+

FASTQ file path for read 2.

+
+
-b, --edited-base
+

For base editors, the base that should be ignored when matching the gRNA sequence

+
+
-f, --sgRNA-filename
+

sgRNA description file. The format requires three columns: name, sequence, barcode [ reporter [,strand, target_pos], [start_pos, offset] ].

+
+
--guide-start-seq
+

Guide starts after this sequence in R1

+

Default: “”

+
+
--guide-end-seq
+

Guide starts after this sequence in R1

+

Default: “”

+
+
--barcode-start-seq
+

Barcode + reporter starts after this sequence in R2, denoted as the sense direction (the same sequence direction as R1).

+

Default: “”

+
+
-r, --count-reporter
+

Count reporter edits.

+

Default: False

+
+
-q, --min-average-read-quality
+

Minimum average quality score (phred33) to keep a read

+

Default: 30

+
+
-s, --min-single-bp-quality
+

Minimum single bp score (phred33) to keep a read

+

Default: 0

+
+
-n, --name
+

Output name

+

Default: “”

+
+
-o, --output-folder
+

Default: “”

+
+
-l, --reporter-length
+

length of the reporter

+

Default: 32

+
+
--keep-intermediate
+

Keep all the intermediate files

+

Default: False

+
+
--qstart-R1
+

Start position of the read when filtering for quality score of the read 1

+

Default: 0

+
+
--qend-R1
+

End position of the read when filtering for quality score of the read 1

+

Default: 47

+
+
--qstart-R2
+

Same as qstart_R1, for read 2 fastq file

+

Default: 0

+
+
--qend-R2
+

Same as qstart_R2, for read 2 fastq file

+

Default: 36

+
+
--gstart-reporter
+

Start position of the guide sequence in the reporter

+

Default: 6

+
+
--match-target-pos
+

Count the edit in the exact target position.

+

Default: False

+
+
--target-pos-col
+

Column name specifying the relative target position within reporter sequence.

+

Default: “target_pos”

+
+
--guide-bc
+

Construct has guide barcode

+

Default: True

+
+
--guide-bc-len
+

Guide barcode sequence length at the beginning of the R2

+

Default: 4

+
+
--offset
+

Guide file has offest column that will be added to the relative position of reporters.

+

Default: False

+
+
--align-fasta
+

gRNA is aligned to this sequence to infer the offset. Can be used when the exact offset is not provided.

+

Default: “”

+
+
--string-allele
+

Store allele as quality filtered string instead of Allele object

+

Default: False

+
+
-g, --count-guide-edits
+

count the self editing of guides

+

Default: False

+
+
-m, --count-guide-reporter-alleles
+

count the matched allele of guide and reporter edit

+

Default: False

+
+
--tiling
+

Specify that the guide library is tiling library without ‘n guides per target’ design

+

Default: False

+
+
+
+
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/docs/_build/count_samples.html b/docs/_build/count_samples.html new file mode 100644 index 0000000..6f7d4af --- /dev/null +++ b/docs/_build/count_samples.html @@ -0,0 +1,320 @@ + + + + + + + + bean count-samples — bean 1.0.0 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

bean count-samples

+
+

bean count[-samples]: Count (reporter) screen data

+

bean count-samples (or bean count for a single sample) maps guide into guide counts, allowing for base transition in spacer sequence. When the matched reporter information is provided, it can count the target site edits and alleles produced by each guide. Mapping is efficiently done based on CRISPResso2 modified for base-edit-aware mapping.

+
bean count-samples \
+  --input sample_list.csv   `# sample with lines 'R1_filepath,R2_filepath,sample_name\n'` \
+  -b A                      `# base that is being edited (A/G)` \
+  -f sgRNA_info_table.csv   `# sgRNA information` \
+  -o .                      `# output directory` \
+  -r                        `# read edit/allele information from reporter` \
+  -t 12                     `# number of threads` \
+  --name my_sorting_screen  `# name of this sample run` \
+
+
+
bean count --R1 R1.fq --R2 R2.fq -b A -f sgRNA_info_table.csv -r
+
+
+

By default, bean count[-samples] assume R1 and R2 are trimmed off of the adapter sequence. You may need to adjust the command arguments according to your read structure.

+
+

Read structuren

+
+

See full detail below.

+
+
+

Input file format

+

See Input file format for input file formats.

+
+
+

Output file format

+

count or count-samples produces .h5ad and .xlsx file with guide and per-guide allele counts.

+
    +
  • .h5ad: This output file follows annotated matrix format compatible with AnnData and is based on Screen object in [purturb_tools](https://github.com/pinellolab/perturb-tools). See Data Structure section for more information.

  • +
  • .xlsx: This output file contains .guides, .samples, .X[_bcmatch,_edits]. (allele_tables are often too large to write into an Excel!)

  • +
+
+
+

Full parameters

+
usage: bean count-samples [-h] -i SAMPLE_LIST -b EDITED_BASE -f SGRNA_FILENAME
+                          [--guide-start-seq GUIDE_START_SEQ]
+                          [--guide-end-seq GUIDE_END_SEQ]
+                          [--barcode-start-seq BARCODE_START_SEQ] [-r]
+                          [-q MIN_AVERAGE_READ_QUALITY]
+                          [-s MIN_SINGLE_BP_QUALITY] [-n NAME]
+                          [-o OUTPUT_FOLDER] [-l REPORTER_LENGTH]
+                          [--keep-intermediate] [--qstart-R1 QSTART_R1]
+                          [--qend-R1 QEND_R1] [--qstart-R2 QSTART_R2]
+                          [--qend-R2 QEND_R2]
+                          [--gstart-reporter GSTART_REPORTER]
+                          [--match-target-pos]
+                          [--target-pos-col TARGET_POS_COL]
+                          [--guide-bc GUIDE_BC] [--guide-bc-len GUIDE_BC_LEN]
+                          [--offset] [--align-fasta ALIGN_FASTA]
+                          [--string-allele] [-g] [-m] [--tiling] [-t THREADS]
+                          [--guide-start-seqs-file GUIDE_START_SEQS_FILE]
+                          [--guide-end-seqs-file GUIDE_END_SEQS_FILE]
+                          [--barcode-start-seqs-file BARCODE_START_SEQS_FILE]
+                          [--rerun]
+
+
+
+

Named Arguments

+
+
-i, --sample-list
+

List of fastq and sample ids. Formatted as R1_filepath,R2_filepath,sample_id

+
+
-b, --edited-base
+

For base editors, the base that should be ignored when matching the gRNA sequence

+
+
-f, --sgRNA-filename
+

sgRNA description file. The format requires three columns: name, sequence, barcode [ reporter [,strand, target_pos], [start_pos, offset] ].

+
+
--guide-start-seq
+

Guide starts after this sequence in R1

+

Default: “”

+
+
--guide-end-seq
+

Guide starts after this sequence in R1

+

Default: “”

+
+
--barcode-start-seq
+

Barcode + reporter starts after this sequence in R2, denoted as the sense direction (the same sequence direction as R1).

+

Default: “”

+
+
-r, --count-reporter
+

Count reporter edits.

+

Default: False

+
+
-q, --min-average-read-quality
+

Minimum average quality score (phred33) to keep a read

+

Default: 30

+
+
-s, --min-single-bp-quality
+

Minimum single bp score (phred33) to keep a read

+

Default: 0

+
+
-n, --name
+

Output name

+

Default: “”

+
+
-o, --output-folder
+

Default: “”

+
+
-l, --reporter-length
+

length of the reporter

+

Default: 32

+
+
--keep-intermediate
+

Keep all the intermediate files

+

Default: False

+
+
--qstart-R1
+

Start position of the read when filtering for quality score of the read 1

+

Default: 0

+
+
--qend-R1
+

End position of the read when filtering for quality score of the read 1

+

Default: 47

+
+
--qstart-R2
+

Same as qstart_R1, for read 2 fastq file

+

Default: 0

+
+
--qend-R2
+

Same as qstart_R2, for read 2 fastq file

+

Default: 36

+
+
--gstart-reporter
+

Start position of the guide sequence in the reporter

+

Default: 6

+
+
--match-target-pos
+

Count the edit in the exact target position.

+

Default: False

+
+
--target-pos-col
+

Column name specifying the relative target position within reporter sequence.

+

Default: “target_pos”

+
+
--guide-bc
+

Construct has guide barcode

+

Default: True

+
+
--guide-bc-len
+

Guide barcode sequence length at the beginning of the R2

+

Default: 4

+
+
--offset
+

Guide file has offest column that will be added to the relative position of reporters.

+

Default: False

+
+
--align-fasta
+

gRNA is aligned to this sequence to infer the offset. Can be used when the exact offset is not provided.

+

Default: “”

+
+
--string-allele
+

Store allele as quality filtered string instead of Allele object

+

Default: False

+
+
-g, --count-guide-edits
+

count the self editing of guides

+

Default: False

+
+
-m, --count-guide-reporter-alleles
+

count the matched allele of guide and reporter edit

+

Default: False

+
+
--tiling
+

Specify that the guide library is tiling library without ‘n guides per target’ design

+

Default: False

+
+
-t, --threads
+

Number of threads

+

Default: 10

+
+
--guide-start-seqs-file
+

CSV file path with per-sample guide_start_seq to be used.Formatted as sample_id, guide_start_seq

+
+
--guide-end-seqs-file
+

CSV file path with per-sample guide_end_seq to be used.Formatted as sample_id,guide_end_seq

+
+
--barcode-start-seqs-file
+

CSV file path with per-sample barcode_start_seq to be used.Formatted as sample_id,guide_end_seq

+
+
--rerun
+

Recount each sample

+

Default: False

+
+
+
+
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/docs/_build/exon_fa_format.html b/docs/_build/exon_fa_format.html new file mode 100644 index 0000000..36916fe --- /dev/null +++ b/docs/_build/exon_fa_format.html @@ -0,0 +1,122 @@ + + + + + + + + Input .fa file format for bean-filter — bean 1.0.0 documentation + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

Input .fa file format for bean-filter

+

You can provide custom FASTA file with exon sequence entries. Currently only supports positive strand genes.

+
    +
  • Exon FASTA files can be downloaded from UCSC Genomic sequences / Table Browser: see the instruction video

  • +
  • You can manually format as:

    +
      +
    • Header line has range=chrom:start-end and strand=+/- tag that is parsed.

    • +
    • fasta entry has the sequence of exons, where the first (includes 5’-UTR) and last (includes 3’-UTR) exon sequence has lower-case sequence denoting noncoding sequences.

    • +
    +
  • +
  • See the example .fa here.

  • +
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/docs/_build/filter.html b/docs/_build/filter.html new file mode 100644 index 0000000..688d41f --- /dev/null +++ b/docs/_build/filter.html @@ -0,0 +1,274 @@ + + + + + + + + bean filter — bean 1.0.0 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

bean filter

+
+

filter: Filtering (and optionally translating) alleles

+

As tiling mode of bean run accounts for any robustly observed alleles, bean filter filters for such alleles.

+
bean filter my_sorting_screen_masked.h5ad \
+-o my_sorting_screen_filtered.h5ad  `# Output file path` \
+
+
+
+
+

Output

+

Above command produces

+
    +
  • my_sorting_screen_filtered.h5ad with filtered alleles stored in .uns,

  • +
  • my_sorting_screen_filtered.filtered_allele_stats.pdf, and my_sorting_screen_filtered.filter_log.txt that report allele count stats in each filtering step.

  • +
+

You may want to adjust the flitering parameters to obtain optimal balance between # guides per variant & # variants that are scored. See example outputs of filtering step here.

+
+
+

Translating alleles

+

If you want to obtain amino acid level variant for coding sequence tiling screens, provide coding sequence positions which variants occuring within the coding sequence will be translated. This is optional, but **highly recommended* to increase per-(coding)variant support.*

+

Allele translation

+
bean filter my_sorting_screen.h5ad \
+-o my_sorting_screen_masked.h5ad \
+--translate   `# Translate coding variants` \
+[ --translate-gene-name GENE_SYMBOL OR
+  --translate-genes-list path_to_gene_names_file.txt OR
+  --translate-fasta gene_exon.fa, OR
+  --translate-fastas-csv gene_exon_fas.csv]
+
+
+
    +
  • When library covers a single gene, do either of the following:

    +
      +
    1. Feed --translate-gene-name GENE_SYMBOL if your genomic_pos column of sgRNA_info_tbl is compatible with MANE transcript‘s reference genome. (Per 10/23/2023, GRCh38). This will automatically load the exon positions based on MANE transcript annotation.

    2. +
    3. To use your custom coding sequence and exon positions, feed --translate-fasta gene_exon.fa argument where gene_exon.fa is the FASTA file with entries of exons. See full details here.

    4. +
    +
  • +
  • When library covers multiple genes, do either of the following:

    +
      +
    1. Feed --translate-genes-list path_to_gene_names_file.txt where path_to_gene_names_file.txt is file with one gene symbol per line.

    2. +
    3. Feed --translate-fastas-csv gene_exon_fas.csv where gene_exon_fas.csv is the csv file with lines gene_id,gene_exon_fasta_path without header. Each FASTA file in gene_exon_fasta_path is formatted as the single-gene FASTA file.

    4. +
    +
  • +
  • Translation will keep the variants outside the coding sequence as nucleotide-level variants, while aggregating variants leading to the same coding sequence variants.

  • +
+
+
+

Full parameters

+

Filter alleles based on edit position in spacer and frequency across samples.

+

+
usage: bean filter [-h] [--output-prefix OUTPUT_PREFIX]
+                   [--plasmid-path PLASMID_PATH]
+                   [--edit-start-pos EDIT_START_POS]
+                   [--edit-end-pos EDIT_END_POS]
+                   [--jaccard-threshold JACCARD_THRESHOLD] [--filter-window]
+                   [--keep-indels] [--filter-target-basechange] [--translate]
+                   [--translate-fasta TRANSLATE_FASTA]
+                   [--translate-fastas-csv TRANSLATE_FASTAS_CSV]
+                   [--translate-gene TRANSLATE_GENE]
+                   [--translate-genes-list TRANSLATE_GENES_LIST]
+                   [--filter-allele-proportion FILTER_ALLELE_PROPORTION]
+                   [--filter-allele-count FILTER_ALLELE_COUNT]
+                   [--filter-sample-proportion FILTER_SAMPLE_PROPORTION]
+                   [--load-tmp]
+                   bdata_path
+
+
+
+

Positional Arguments

+
+
bdata_path
+

Input ReporterScreen file of which allele will be filtered out.

+
+
+
+
+

Named Arguments

+
+
--output-prefix, -o
+

Output prefix for log and ReporterScreen file with allele assignment

+
+
--plasmid-path, -p
+

Plasmid ReporterScreen object path. If provided, alleles are filtered based on if a nucleotide edit is more significantly enriched in sample compared to the plasmid data. Negative control data where no edit is expected can be fed in instead of plasmid library.

+
+
--edit-start-pos, -s
+

0-based start posiiton (inclusive) of edit relative to the start of guide spacer.

+

Default: 2

+
+
--edit-end-pos, -e
+

0-based end position (exclusive) of edit relative to the start of guide spacer.

+

Default: 7

+
+
--jaccard-threshold, -j
+

Jaccard Index threshold when the alleles are mapped to the most similar alleles. In each filtering step, allele counts of filtered out alleles will be mapped to the most similar allele only if they have Jaccard Index of shared edit higher than this threshold.

+

Default: 0.3

+
+
--filter-window, -w
+

Only consider edit within window provided by (edit-start-pos, edit-end-pos). If this flag is not provided, –edit-start-pos and –edit-end-pos flags are ignored.

+

Default: False

+
+
--keep-indels, -i
+

Include indels.

+

Default: False

+
+
--filter-target-basechange, -b
+

Only consider target edit (stored in bdata.uns[‘target_base_change’])

+

Default: False

+
+
--translate, -t
+

Translate alleles

+

Default: False

+
+
--translate-fasta, -f
+

fasta file path with exon positions. If not provided, LDLR hg19 coordinates will be used.

+
+
--translate-fastas-csv, -fs
+

.csv with two columns with gene IDs and FASTA file path corresponding to each gene.

+
+
--translate-gene, -g
+

Gene symbol if a gene is tiled. If not provided, LDLR hg19 coordinates will be used.

+
+
--translate-genes-list, -gs
+

File with gene symbols, one per line, if multiple genes are tiled.

+
+
--filter-allele-proportion, -ap
+

If provided, alleles that exceed filter_allele_proportion in filter-sample-proportion will be retained.

+

Default: 0.05

+
+
--filter-allele-count, -ac
+

If provided, alleles that exceed filter_allele_proportion AND filter_allele_count in filter-sample-proportion will be retained.

+

Default: 5

+
+
--filter-sample-proportion, -sp
+

If filter_allele_proportion is provided, alleles that exceed filter_allele_proportion in filter-sample-proportion will be retained.

+

Default: 0.2

+
+
--load-tmp
+

Load temporary file and work from there.

+

Default: False

+
+
+
+
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/docs/_build/genindex.html b/docs/_build/genindex.html new file mode 100644 index 0000000..a301425 --- /dev/null +++ b/docs/_build/genindex.html @@ -0,0 +1,110 @@ + + + + + + + Index — bean 1.0.0 documentation + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ + +

Index

+ +
+ +
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/docs/_build/gwas.html b/docs/_build/gwas.html new file mode 100644 index 0000000..7655e7f --- /dev/null +++ b/docs/_build/gwas.html @@ -0,0 +1,243 @@ + + + + + + + + GWAS variant library — bean 1.0.0 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

GWAS variant library

+
+

Variant sorting screen tutorial

+

GWAS variant screen with per-variant gRNA tiling design, selected based on FACS signal quantiles.

+ + + + + + + + + +
Library designVariant (gRNAs tile each target variant)
variant library design
SelectionCells are sorted based on FACS signal quantiles
variant library design



+
+

Example workflow

+
screen_id=my_sorting_tiling_screen
+
+# 1. Count gRNA & reporter
+bean-count-samples \
+--input tests/data/sample_list.csv    `# Contains fastq file path; see test file for example.`\
+-b A                                  `# Base A is edited (into G)` \
+-f tests/data/test_guide_info.csv     `# Contains gRNA metadata; see test file for example.`\
+-o ./                                 `# Output directory` \
+-r                                    `# Quantify reporter edits` \
+-n ${screen_id}                          `# ID of the screen to be counted`
+
+# 2. QC samples & guides
+bean-qc \
+  bean_count_${screen_id}.h5ad             `# Input ReporterScreen .h5ad file path` \
+  -o bean_count_${screen_id}_masked.h5ad   `# Output ReporterScreen .h5ad file path` \
+  -r qc_report_${screen_id}                `# Prefix for QC report` \
+  -b                                       ` # Remove replicates with no good samples.
+
+# 3. Quantify variant effect
+bean-run sorting variant \
+    tests/data/bean_count_${screen_id}_masked.h5ad \
+    -o tests/test_res/var/ \
+    --fit-negctrl \
+    --scale-by-acc \
+    --accessibility-col accessibility
+
+
+

See more details below.

+
+
+

1. Count gRNA & reporter (bean count-samples)

+
screen_id=my_sorting_tiling_screen
+
+# 1. Count gRNA & reporter
+bean-count-samples \
+--input tests/data/sample_list.csv    `# Contains fastq file path; see test file for example.`\
+-b A                                  `# Base A is edited (into G)` \
+-f tests/data/test_guide_info.csv     `# Contains gRNA metadata; see test file for example.`\
+-o ./                                 `# Output directory` \
+-r                                    `# Quantify reporter edits` \
+-n ${screen_id}                          `# ID of the screen to be counted`
+
+
+

Make sure you follow the input file format for seamless downstream steps. This will produce ./bean_count_${screen_id}.h5ad.

+
+
+

2. QC samples & guides (bean qc)

+

Base editing data will include QC about editing efficiency. As QC uses predefined column names and values, beware to follow the input file guideline, but you can change the parameters with the full argument list of ``bean-qc` <../../README#bean-qc-qc-of-reporter-screen-data>`_. (Common factors you may want to tweak is --ctrl-cond=bulk and --lfc-conds=top,bot if you have different sample condition labels.)

+
bean-qc \
+  bean_count_${screen_id}.h5ad    `# Input ReporterScreen .h5ad file path` \
+  -o bean_count_${screen_id}_masked.h5ad   `# Output ReporterScreen .h5ad file path` \
+  -r qc_report_${screen_id}   `# Prefix for QC report`
+
+
+

If the data does not include reporter editing data, you can provide --no-editing flag to omit the editing rate QC.

+
+
+

3. Quantify variant effect (bean run)

+

bean-run can take 3 run options to quantify editing rate:

+
    +
  1. +
    From reporter + accessibility

    If your gRNA metadata table (tests/data/test_guide_info.csv above) included per-gRNA accessibility score,

    +
    +
    +
       bean-run sorting variant \
    +   tests/data/bean_count_${screen_id}_masked.h5ad \
    +   -o tests/test_res/var/ \
    +   --fit-negctrl \
    +   --scale-by-acc \
    +   --accessibility-col accessibility
    +
    +If your gRNA metadata table (\ ``tests/data/test_guide_info.csv`` above) included per-gRNA chromosome & position and you have bigWig file with accessibility signal,
    +
    +
    +
    bean-run sorting variant \
    +tests/data/bean_count_${screen_id}_masked.h5ad \
    +-o tests/test_res/var/ \
    +--fit-negctrl \
    +--scale-by-acc \
    +--accessibility-bw accessibility.bw
    +
    +
    +
  2. +
  3. From reporter, without accessibility

    +
    +

    This assumes the all target sites have the uniform chromatin accessibility.

    +
    +
    bean-run sorting variant \
    +tests/data/bean_count_${screen_id}_masked.h5ad \
    +-o tests/test_res/var/ \
    +--fit-negctrl
    +
    +
    +
  4. +
  5. +
    No reporter information, assume the same editing efficiency of all gRNAs.

    Use this option if your data don’t have editing outcome information.

    +
    +
    +
    bean-run sorting variant \
    +tests/data/bean_count_${screen_id}_masked.h5ad \
    +-o tests/test_res/var/ \
    +--fit-negctrl \
    +--uniform-edit
    +
    +
    +
  6. +
+

See Subcommands for the full details.

+
+
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/docs/_build/index.html b/docs/_build/index.html new file mode 100644 index 0000000..06c90d1 --- /dev/null +++ b/docs/_build/index.html @@ -0,0 +1,113 @@ + + + + + + + + title: CRISPR-BEAN — bean 1.0.0 documentation + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

layout: default

+
+

title: CRISPR-BEAN

+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/docs/_build/index_.html b/docs/_build/index_.html new file mode 100644 index 0000000..bc8e2dc --- /dev/null +++ b/docs/_build/index_.html @@ -0,0 +1,213 @@ + + + + + + + + Welcome to bean’s documentation! — bean 1.0.0 documentation + + + + + + + + + + + + + + + + + + +
+
+ +
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/docs/_build/input.html b/docs/_build/input.html new file mode 100644 index 0000000..b55703d --- /dev/null +++ b/docs/_build/input.html @@ -0,0 +1,171 @@ + + + + + + + + Input file format — bean 1.0.0 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

Input file format

+

This document describes the input files of bean count-samples.

+
+

sgRNA_info_table.csv

+

File should contain following columns.

+
    +
  • name: gRNA ID column

  • +
  • sequence: gRNA sequence

  • +
  • barcode: R2 barcode to help match reporter to gRNA, written in the sense direction (as in R1)

  • +
  • In order to use accessibility in the variant effect quantification, provide accessibility information in one of two options. (For non-targeting guides, provide NA values (empty cell).)

    +
      +
    • Option 1: chrom & genomic_pos: Chromosome (ex. chr19) and genomic position of guide sequence. You will have to provide the path to the bigwig file with matching reference version in bean run.

    • +
    • Option 2: accessibility_signal: ATAC-seq signal value of the target loci of each guide.

    • +
    +
  • +
  • For variant library (gRNAs are designed to target specific variants and ignores bystander edits)

    +
      +
    • target: This column denotes which target variant/element of each gRNA. This is not used in bean count[-samples] but required to run bean run in later steps.

    • +
    • target_group: If negative/positive control gRNA will be considered in bean qc and/or bean run, specify as “NegCtrl”/”PosCtrl” in this column.

    • +
    • target_pos: If --match_target_pos flag is used, input file needs target_pos which specifies 0-based relative position of targeted base within Reporter sequence.

    • +
    +
  • +
  • For tiling library (gRNAs tile coding / noncoding sequences)

    +
      +
    • strand: Specifies gRNA strand information relative to the reference genome.

    • +
    • chrom: Chromosome of gRNA targeted locus.

    • +
    • start_pos: gRNA starting position in the genome. Required when you provide strand column. Should specify the smaller coordinate value among start and end position regardless of gRNA strandedness.

    • +
    +
  • +
+

Also see examples for variant library and tiling library.

+
+
+

sample_list.csv

+

File should contain following columns with header.

+
    +
  • R1_filepath: Path to read 1 .fastq[.gz] file

  • +
  • R2_filepath: Path to read 1 .fastq[.gz] file

  • +
  • sample_id: ID of sequencing sample

  • +
  • replicate: Replicate # of this sample (Should NOT contain .)

  • +
  • condition: Name of the sorting bin (ex. top, bot), or label of timepoint (ex. D5, D18)

  • +
+

For FACS sorting screens:

+
    +
  • upper_quantile: FACS sorting upper quantile

  • +
  • lower_quantile: FACS sorting lower quantile

  • +
+

For proliferation / survival screens:

+
    +
  • time: Numeric time following the base editing of each sample.

  • +
+

Also see examples for FACS sorting screen.

+
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/docs/_build/objects.inv b/docs/_build/objects.inv new file mode 100644 index 0000000..b01eb0a Binary files /dev/null and b/docs/_build/objects.inv differ diff --git a/docs/_build/profile.html b/docs/_build/profile.html new file mode 100644 index 0000000..9d55d75 --- /dev/null +++ b/docs/_build/profile.html @@ -0,0 +1,185 @@ + + + + + + + + bean profile — bean 1.0.0 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

bean profile

+
+

bean profile: Profile editing patterns

+
bean profile my_sorting_screen.h5ad -o output_prefix `# Prefix for editing profile report`
+
+
+
+
+

Output

+

Above command produces prefix_editing_preference.[html,ipynb] as editing preferences (see example).

+

Allele translation

+
+
+

Full parameters

+
usage: bean profile [-h] [-o OUTPUT_PREFIX] [--replicate-col REPLICATE_COL]
+                    [--condition-col CONDITION_COL] [--pam-col PAM_COL]
+                    [--control-condition CONTROL_CONDITION] [-w WINDOW_LENGTH]
+                    bdata_path
+
+
+
+

Positional Arguments

+
+
bdata_path
+

Path to the ReporterScreen object to run QC on

+
+
+
+
+

Named Arguments

+
+
-o, --output-prefix
+

Output prefix of editing pattern report (prefix.html, prefix.ipynb). If not provided, base name of bdata_path is used.

+
+
--replicate-col
+

Column name in bdata.samples that describes replicate ID.

+

Default: “replicate”

+
+
--condition-col
+

Column name in bdata.samples that describes experimental condition. (sorting bin, time, etc.)

+

Default: “bin”

+
+
--pam-col
+

Column name describing PAM of each gRNA in bdata.guides.

+
+
--control-condition
+

Control condition where editing preference would be profiled at. Pre-filters data where bdata.samples[condition_col] == control_condition.

+

Default: “bulk”

+
+
-w, --window-length
+

Window length of editing window of maximal editing efficiency to be identified. This window is used to quantify context specificity within the window.

+

Default: 6

+
+
+
+
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/docs/_build/qc.html b/docs/_build/qc.html new file mode 100644 index 0000000..f08de74 --- /dev/null +++ b/docs/_build/qc.html @@ -0,0 +1,429 @@ + + + + + + + + bean qc — bean 1.0.0 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

bean qc

+
+

bean qc: QC of reporter screen data

+
bean qc \
+  my_sorting_screen.h5ad             `# Input ReporterScreen .h5ad file path` \
+  -o my_sorting_screen_masked.h5ad   `# Output ReporterScreen .h5ad file path` \
+  -r qc_report_my_sorting_screen     `# Prefix for QC report` \
+  --ctrl-cond presort                `# "condition" column in the control sample before selection. Mean gRNA editing rates in these samples are reported. ` \
+# Inspect the output qc_report_my_sorting_screen.html to tweak QC threshold
+
+bean qc \
+  my_sorting_screen.h5ad              \
+  -o my_sorting_screen_masked.h5ad    \
+  -r qc_report_my_sorting_screen      \
+  #[--count-correlation-thres 0.7 ...]\
+  -b
+
+
+

bean qc supports following quality control and masks samples with low quality. Specifically:

+

Allele translation

+
    +
  • Plots guide coverage and the uniformity of coverage

  • +
  • Guide count correlation between samples

  • +
  • Log fold change correlation when positive controls are provided

  • +
  • Plots editing rate distribution

  • +
  • Identify samples with low guide coverage/guide count correlation/editing rate and mask the sample in bdata.samples.mask

  • +
  • Identify outlier guides to filter out

  • +
+
+
+

Output

+

Above command produces

+
    +
  • my_sorting_screen_masked.h5ad without problematic replicate and guides and with sample masks, and

  • +
  • qc_report_my_sorting_screen.[html,ipynb] as QC report.
    +##### Optional arguments:

  • +
  • -o OUT_SCREEN_PATH, --out-screen-path OUT_SCREEN_PATH

    +
    Path where quality-filtered ReporterScreen object to be written to
    +
    +
    +
  • +
  • -r OUT_REPORT_PREFIX, --out-report-prefix OUT_REPORT_PREFIX

    +
    Output prefix of qc report (prefix.html, prefix.ipynb)
    +
    +
    +
  • +
+
+

QC thresholds:

+
    +
  • --count-correlation-thres COUNT_CORRELATION_THRES

    +
    Correlation threshold to mask out.
    +
    +
    +
  • +
  • --edit-rate-thres EDIT_RATE_THRES

    +
    Mean editing rate threshold per sample to mask out.
    +
    +
    +
  • +
  • --lfc-thres LFC_THRES

    +
    Positive guides' correlation threshold to filter out.
    +
    +
    +
  • +
+
+
+

Run options:

+
    +
  • -b, --remove-bad-replicates

    +
    Remove replicates with at least two of its samples meet the QC threshold (bean run does not support having only one sorting bin sample for a replicate).
    +
    +
    +
  • +
  • -i, --ignore-missing-samples

    +
    If the flag is not provided, if the ReporterScreen object does not contain all condiitons for
    +each replicate, make fake empty samples. If the flag is provided, don't add dummy samples.
    +
    +
    +
  • +
  • --no-editing Ignore QC about editing. Can be used for QC of other editing modalities.

  • +
  • --dont-recalculate-edits

    +
    When ReporterScreen.layers['edit_count'] exists, do not recalculate the edit counts from
    +ReporterScreen.uns['allele_count'].
    +
    +
    +
  • +
+
+
+

Input .h5ad formatting:

+

Note that these arguements will change the way the QC metrics are calculated for guides, samples, or replicates.

+
    +
  • --tiling TILING Specify that the guide library is tiling library without ‘n guides per target’ design

  • +
  • --replicate-label REPLICATE_LABEL

    +
    Label of column in `bdata.samples` that describes replicate ID.
    +
    +
    +
  • +
  • --sample-covariates SAMPLE_COVARIATES

    +
    Comma-separated list of column names in `bdata.samples` that describes non-selective
    +experimental condition. (drug treatment, etc.)
    +
    +
    +
  • +
  • --condition-label CONDITION_LABEL

    +
    Label of column in `bdata.samples` that describes experimental condition. (sorting bin, time,
    +etc.)
    +
    +
    +

    ###### Editing rate calculation

    +
      +
    • --control-condition CTRL_COND

      +
      Values in of column in `ReporterScreen.samples[condition_label]` for guide-level editing rate
      +to be calculated. Default is `None`, which considers all samples.
      +
      +
      +
    • +
    • --rel-pos-is-reporter

      +
      Specifies whether `edit_start_pos` and `edit_end_pos` are relative to reporter position. If
      +`False`, those are relative to spacer position.
      +
      +
      +

      Editing rate is calculated with following parameters in

      +
        +
      • Variant screens:

        +
          +
        • --target-pos-col TARGET_POS_COL

          +
          Target position column in `bdata.guides` specifying target edit position in reporter
          +
          +
          +
        • +
        +
      • +
      • tiling screens:

        +
          +
        • --edit-start-pos EDIT_START_POS

          +
          Edit start position to quantify editing rate on, 0-based inclusive.
          +
          +
          +
        • +
        • --edit-end-pos EDIT_END_POS

          +
          Edit end position to quantify editing rate on, 0-based exclusive.
          +
          +
          +

          ###### LFC of positive controls

          +
        • +
        +
      • +
      +
    • +
    • --posctrl-col POSCTRL_COL

      +
      Column name in ReporterScreen.guides DataFrame that specifies guide category. To use all
      +gRNAs, feed empty string ''.
      +
      +
      +
    • +
    • --posctrl-val POSCTRL_VAL

      +
      Value in ReporterScreen.guides[`posctrl_col`] that specifies guide will be used as the
      +positive control in calculating log fold change.
      +
      +
      +
    • +
    • --lfc-conds LFC_CONDS

      +
      Values in of column in `ReporterScreen.samples[condition_label]` for LFC will be calculated
      +between, delimited by comma
      +
      +
      +
    • +
    +
  • +
+
+
+
+

Full parameters

+
usage: bean qc [-h] [--count-correlation-thres COUNT_CORRELATION_THRES]
+               [--edit-rate-thres EDIT_RATE_THRES] [--lfc-thres LFC_THRES]
+               [-o OUT_SCREEN_PATH] [-r OUT_REPORT_PREFIX] [-b] [-i]
+               [--no-editing] [--dont-recalculate-edits] [--tiling TILING]
+               [--replicate-label REPLICATE_LABEL]
+               [--sample-covariates SAMPLE_COVARIATES]
+               [--condition-label CONDITION_LABEL]
+               [--target-pos-col TARGET_POS_COL] [--rel-pos-is-reporter]
+               [--edit-start-pos EDIT_START_POS] [--edit-end-pos EDIT_END_POS]
+               [--posctrl-col POSCTRL_COL] [--posctrl-val POSCTRL_VAL]
+               [--lfc-conds LFC_CONDS] [--control-condition CONTROL_CONDITION]
+               bdata_path
+
+
+
+

Positional Arguments

+
+
bdata_path
+

Path to the ReporterScreen object to run QC on

+
+
+
+
+

Named Arguments

+
+
-o, --out-screen-path
+

Path where quality-filtered ReporterScreen object to be written to

+
+
-r, --out-report-prefix
+

Output prefix of qc report (prefix.html, prefix.ipynb)

+
+
+
+
+

QC thresholds

+
+
--count-correlation-thres
+

Correlation threshold to mask out.

+

Default: 0.7

+
+
--edit-rate-thres
+

Mean editing rate threshold per sample to mask out.

+

Default: 0.1

+
+
--lfc-thres
+

Positive guides’ correlation threshold to filter out.

+

Default: -0.1

+
+
+
+
+

Run options

+
+
-b, --remove-bad-replicates
+

Remove replicates with at least two of its samples meet the QC threshold.

+

Default: False

+
+
-i, --ignore-missing-samples
+

If the flag is not provided, if the ReporterScreen object does not contain all condiitons for each replicate, make fake empty samples. If the flag is provided, don’t add dummy samples.

+

Default: False

+
+
--no-editing
+

Ignore QC about editing. Can be used for QC of other editing modalities.

+

Default: False

+
+
--dont-recalculate-edits
+

When ReporterScreen.layers[‘edit_count’] exists, do not recalculate the edit counts from ReporterScreen.uns[‘allele_count’].

+

Default: False

+
+
+
+
+

Input .h5ad formatting

+
+
--tiling
+

Specify that the guide library is tiling library without ‘n guides per target’ design

+
+
--replicate-label
+

Label of column in bdata.samples that describes replicate ID.

+

Default: “replicate”

+
+
--sample-covariates
+

Comma-separated list of column names in bdata.samples that describes non-selective experimental condition. (drug treatment, etc.)

+
+
--condition-label
+

Label of column in bdata.samples that describes experimental condition. (sorting bin, time, etc.)

+

Default: “condition”

+
+
--target-pos-col
+

Target position column in bdata.guides specifying target edit position in reporter

+

Default: “target_pos”

+
+
--rel-pos-is-reporter
+

Specifies whether edit_start_pos and edit_end_pos are relative to reporter position. If False, those are relative to spacer position.

+

Default: False

+
+
--edit-start-pos
+

Edit start position to quantify editing rate on, 0-based inclusive.

+

Default: 2

+
+
--edit-end-pos
+

Edit end position to quantify editing rate on, 0-based exclusive.

+

Default: 7

+
+
--posctrl-col
+

Column name in ReporterScreen.guides DataFrame that specifies guide category. To use all gRNAs, feed empty string ‘’.

+

Default: “target_group”

+
+
--posctrl-val
+

Value in ReporterScreen.guides[posctrl_col] that specifies guide will be used as the positive control in calculating log fold change.

+

Default: “PosCtrl”

+
+
--lfc-conds
+

Values in of column in ReporterScreen.samples[condition_label] for LFC will be calculated between, delimited by comma

+

Default: “top,bot”

+
+
--control-condition
+

Values in of column in ReporterScreen.samples[condition_label] for guide-level editing rate to be calculated

+

Default: “bulk”

+
+
+
+
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/docs/_build/run.html b/docs/_build/run.html new file mode 100644 index 0000000..d9e232b --- /dev/null +++ b/docs/_build/run.html @@ -0,0 +1,401 @@ + + + + + + + + bean run — bean 1.0.0 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

bean run

+
+

bean run: Quantify variant effects

+

BEAN uses Bayesian network to incorporate gRNA editing outcome to provide posterior estimate of variant phenotype. The Bayesian network reflects data generation process. Briefly,

+
    +
  1. Cellular phenotype (either for cells are sorted upon for sorting screen, or log(proliferation rate)) is modeled as the Gaussian mixture distribution of wild-type phenotype and variant phenotype.

  2. +
  3. The weight of the mixture components are inferred from the reporter editing outcome and the chromatin accessibility of the loci.

  4. +
  5. Cells with each gRNA, formulated as the mixture distribution, is sorted by the phenotypic quantile to produce the gRNA counts.

  6. +
+

For the full detail, see the method section of the BEAN manuscript.

+

model

+



+
+
+

Usage example

+
bean run sorting[survival] variant[tiling] my_sorting_screen_filtered.h5ad \
+[--uniform-edit, --scale-by-acc [--acc-bw-path accessibility_signal.bw, --acc-col accessibility]] \
+-o output_prefix/ \
+--fit-negctrl
+
+
+

See full list of parameters below.

+
+
+

Input

+

my_sorting_screen_filtered.h5ad can be produced by one of the following:

+
    +
  1. ``bean count-samples` <(#bean-count-samples-count-reporter-screen-data>`_) when you have raw .fastq file

  2. +
  3. (Limited to bean run variant mode) bean create-screen when you have flat .csv tables of gRNA metadata table, sample metadata table, gRNA counts table (# guides x # samples), and optionally # edits table. +.. code-block:: bash

    +
    +

    bean create-screen gRNA_info_table.csv sample_info_table.csv gRNA_counts_table.csv [–edits edit_counts_table.csv -o output.h5ad]

    +
    +
      +
    • gRNA_info_table.csv should have following columns.

      +
        +
      • name: gRNA ID column

      • +
      • target: This column denotes which target variant/element of each gRNA.

      • +
      • target_group [Optional]: If negative control gRNA will be used, specify as “NegCtrl” in this column.

      • +
      +
    • +
    • sample_info_table.csv should have following columns.

      +
        +
      • sample_id: ID of sequencing sample

      • +
      • replicate: Replicate # of this sample

      • +
      • bin: Name of the sorting bin

      • +
      • upper_quantile: FACS sorting upper quantile

      • +
      • lower_quantile: FACS sorting lower quantile

      • +
      +
    • +
    • gRNA_counts_table.csv should be formatted as follows.

      +
        +
      • Columns include one of sample_id columns in sample_info_table.csv file.

      • +
      • 1st row (row index) follows name (gRNA ID) in gRNA_info_table.csv file.

      • +
      +
    • +
    +
  4. +
  5. You can manually create the AnnData object with more annotations including allele counts: see API tutorial for full detail.

  6. +
+
+
+

Output

+

model

+

Above command produces

+
    +
  • output_prefix/bean_element_result.[model_type].csv with following columns:

    +
      +
    • Estimated variant effect sizes

      +
        +
      • mu (Effect size): Mean of variant phenotype, given the wild type has standard normal phenotype distribution of mu = 0, sd = 1.

      • +
      • mu_sd: Mean of variant phenotype mu is modeled as normal distribution. The column shows fitted standard deviation of mu that quantify the uncertainty of the variant effect.

      • +
      • mu_z: z-score of mu

      • +
      • sd: Standard deviation of variant phenotype, given the wild type has standard normal phenotype distribution of mu = 0, sd = 1.

      • +
      • CI[0.025, 0.975]: Credible interval of mu

      • +
      • When negative control is provided, above columns with _adj suffix are provided, which are the corresponding values adjusted for negative control.

      • +
      +
    • +
    • Metrics on per-variant evidence provided in input (provided in tiling mode)

      +
        +
      • effective_edit_rate: Sum of per-variant editing rates over all alleles observed in the input. Allele-level editing rate is divided by the number of variants observed in the allele prior to summing up.

      • +
      • n_guides: # of guides covering the variant.

      • +
      • n_coocc: # of cooccurring variants with a given variant in any alleles observed in the input.

      • +
      +
    • +
    +
  • +
  • output_prefix/bean_sgRNA_result.[model_type].csv:

    +
      +
    • edit_rate: Estimated editing rate at the target loci.

    • +
    +
  • +
+
+
+

Full parameters

+

Run model on data.

+

+
usage: bean run [-h] [--rep-pi] [--uniform-edit] [--scale-by-acc]
+                [--acc-bw-path ACC_BW_PATH] [--acc-col ACC_COL] [--const-pi]
+                [--shrink-alpha] [--condition-col CONDITION_COL]
+                [--time-col TIME_COL] [--control-condition CONTROL_CONDITION]
+                [--include-control-condition-for-inference]
+                [--replicate-col REPLICATE_COL] [--target-col TARGET_COL]
+                [--guide-activity-col GUIDE_ACTIVITY_COL] [--outdir OUTDIR]
+                [--result-suffix RESULT_SUFFIX]
+                [--sorting-bin-upper-quantile-col SORTING_BIN_UPPER_QUANTILE_COL]
+                [--sorting-bin-lower-quantile-col SORTING_BIN_LOWER_QUANTILE_COL]
+                [--alpha-if-overdispersion-fitting-fails ALPHA_IF_OVERDISPERSION_FITTING_FAILS]
+                [--cuda] [--sample-mask-col SAMPLE_MASK_COL] [--fit-negctrl]
+                [--negctrl-col NEGCTRL_COL]
+                [--negctrl-col-value NEGCTRL_COL_VALUE]
+                [--repguide-mask REPGUIDE_MASK] [--device DEVICE]
+                [--ignore-bcmatch] [--allele-df-key ALLELE_DF_KEY]
+                [--splice-site-path SPLICE_SITE_PATH]
+                [--control-guide-tag CONTROL_GUIDE_TAG] [--dont-fit-noise]
+                [--dont-adjust-confidence-by-negative-control]
+                [--n-iter N_ITER] [--load-existing]
+                {sorting,survival} {variant,tiling} bdata_path
+
+
+
+

Positional Arguments

+
+
selection
+

Possible choices: sorting, survival

+

Screen selection type whether cells are sorted based on continuous phenotype (‘sorting’) or proliferated based on their viability (‘survival’).

+
+
library_design
+

Possible choices: variant, tiling

+

Library design type whether to run variant or tiling screen model. +Variant library design assumes gRNA has specific target variant and bystander edits are ignored. Tiling library design considers all alleles generated by gRNA in reporter.

+
+
bdata_path
+

Path of an ReporterScreen object

+
+
+
+
+

Named Arguments

+
+
--rep-pi, -r
+

Fit replicate specific scaling factor. Recommended to set as True if you expect variable editing activity across biological replicates.

+

Default: False

+
+
--uniform-edit, -p
+

Assume uniform editing rate for all guides.

+

Default: False

+
+
--scale-by-acc
+

Scale guide editing efficiency by the target loci accessibility

+

Default: False

+
+
--acc-bw-path
+

Accessibility .bigWig file to be used to assign accessibility of guides.

+
+
--acc-col
+

Column name in bdata.guides that specify raw ATAC-seq signal.

+
+
--const-pi
+

Use constant pi provided in –guide-activity-col (instead of fitting from reporter data)

+

Default: False

+
+
--shrink-alpha
+

Instead of using the trend-fitted alpha values, use estimated alpha values for each gRNA that are shrunk towards the fitted trend.

+

Default: False

+
+
--condition-col
+

Column key in bdata.samples that describes experimental condition.

+

Default: “condition”

+
+
--time-col
+

Column key in bdata.samples that describes time elapsed.

+

Default: “time”

+
+
--control-condition
+

Value in bdata.samples[condition_col] that indicates control experimental condition.

+

Default: “bulk”

+
+
--include-control-condition-for-inference, -ic
+

Include control conditions for inference. Currently only supported for survival screens.

+

Default: False

+
+
--replicate-col
+

Column key in bdata.samples that describes experimental replicates.

+

Default: “replicate”

+
+
--target-col
+

Column key in bdata.guides that describes the target element of each guide.

+

Default: “target”

+
+
--guide-activity-col, -a
+

Column in ReporterScreen.guides DataFrame showing the editing rate estimated via external tools

+
+
--outdir, -o
+

Directory to save the run result.

+

Default: “.”

+
+
--result-suffix
+

Suffix of the output files

+

Default: “”

+
+
--sorting-bin-upper-quantile-col, -uq
+

Column name with upper quantile values of each sorting bin in [Reporter]Screen.samples (or AnnData.var)

+

Default: “upper_quantile”

+
+
--sorting-bin-lower-quantile-col, -lq
+

Column name with lower quantile values of each sorting bin in [Reporter]Screen.samples (or AnnData var)

+

Default: “lower_quantile”

+
+
--alpha-if-overdispersion-fitting-fails, -af
+

Comma-separated regression coefficient (b0, b1) of log(a0) ~ log(q) that will be used if fitting dispersion on the data fails.

+
+
--cuda
+

run on GPU

+

Default: False

+
+
--sample-mask-col
+

Name of the column indicating the sample mask in [Reporter]Screen.samples (or AnnData.var). Sample is ignored if the value in this column is 0. This can be used to mask out low-quality samples.

+
+
--fit-negctrl
+

Fit the shared negative control distribution to normalize the fitted parameters

+

Default: False

+
+
--negctrl-col
+

Column in bdata.obs specifying if a guide is negative control. If the bdata.guides[negctrl_col].lower() == negctrl_col_value, it is treated as negative control guide.

+

Default: “target_group”

+
+
--negctrl-col-value
+

Column value in bdata.guides specifying if a guide is negative control. If the bdata.guides[negctrl_col].lower() == negctrl_col_value, it is treated as negative control guide.

+

Default: “negctrl”

+
+
--repguide-mask
+

n_replicate x n_guide mask to mask the outlier guides. screen.uns[repguide_mask] will be used.

+

Default: repguide_mask

+
+
--device
+

Optionally use GPU if provided valid GPU device name (ex. cuda:0)

+
+
--ignore-bcmatch
+

If provided, even if the screen object has .X_bcmatch, ignore the count when fitting.

+

Default: False

+
+
--allele-df-key
+

screen.uns[allele_df_key] will be used as the allele count.

+
+
--splice-site-path
+

Path to splicing site

+
+
--control-guide-tag
+

If this string is in guide name, treat each guide separately not to mix the position. Used for negative controls.

+
+
--dont-fit-noise
+

Default: False

+
+
--dont-adjust-confidence-by-negative-control
+

Adjust confidence by negative controls. For variant library_design, this uses negative control variants. For tiling library_design, adjusts confidence by synonymous edits.

+

Default: False

+
+
--n-iter
+

# of SVI steps taken for inference.

+

Default: 2000

+
+
--load-existing
+

Load existing .pkl file if present.

+

Default: False

+
+
+
+
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/docs/_build/search.html b/docs/_build/search.html new file mode 100644 index 0000000..6c852dd --- /dev/null +++ b/docs/_build/search.html @@ -0,0 +1,129 @@ + + + + + + + Search — bean 1.0.0 documentation + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +

Search

+ + + + +

+ Searching for multiple words only shows matches that contain + all words. +

+ + +
+ + + +
+ + + +
+ +
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/docs/_build/searchindex.js b/docs/_build/searchindex.js new file mode 100644 index 0000000..0f17e54 --- /dev/null +++ b/docs/_build/searchindex.js @@ -0,0 +1 @@ +Search.setIndex({"docnames": ["ReporterScreen_api", "cds", "commands/count", "commands/create-screen", "commands/filter", "commands/input", "commands/profile", "commands/qc", "commands/run", "count", "count_samples", "exon_fa_format", "filter", "gwas", "index", "index_", "input", "profile", "qc", "run", "subcommands", "tutorials/ldl_cds", "tutorials/ldl_var"], "filenames": ["ReporterScreen_api.rst", "cds.rst", "commands/count.md", "commands/create-screen.md", "commands/filter.md", "commands/input.md", "commands/profile.md", "commands/qc.md", "commands/run.md", "count.rst", "count_samples.rst", "exon_fa_format.md", "filter.rst", "gwas.rst", "index.md", "index_.rst", "input.rst", "profile.rst", "qc.rst", "run.rst", "subcommands.rst", "tutorials/ldl_cds.md", "tutorials/ldl_var.md"], "titles": ["ReporterScreen API tutorial", "Coding sequence tiling library", "bean count[-samples]: Count (reporter) screen data", "bean create-screen: Create ReporterScreen object from flat files", "filter: Filtering (and optionally translating) alleles", "sgRNA_info_table.csv", "bean profile: Profile editing patterns", "bean qc: QC of reporter screen data", "bean run: Quantify variant effects", "bean count", "bean count-samples", "Input .fa file format for bean-filter", "bean filter", "GWAS variant library", "title: CRISPR-BEAN", "Welcome to bean\u2019s documentation!", "Input file format", "bean profile", "bean qc", "bean run", "Subcommands", "Tiling sorting screen tutorial", "Variant sorting screen tutorial"], "terms": {"load": [0, 4, 12, 19], "requir": [0, 1, 5, 9, 10, 16, 21], "packag": 0, "anndata": [0, 2, 8, 9, 10, 19], "import": 0, "isn": 0, "t": [0, 1, 2, 7, 9, 10, 12, 13, 18, 21, 22], "us": [0, 1, 4, 5, 7, 8, 9, 10, 12, 13, 16, 17, 18, 19, 21, 22], "numpi": 0, "np": 0, "panda": 0, "pd": 0, "ad": [0, 9, 10], "seaborn": 0, "sn": 0, "matplotlib": 0, "pyplot": 0, "plt": 0, "bean": [0, 4, 5, 16, 20], "br": 0, "object": [0, 2, 7, 8, 9, 10, 12, 17, 18, 19], "perturb": [0, 2, 9, 10], "seq": [0, 5, 9, 10, 16, 19], "screen": [0, 4, 5, 8, 12, 16, 19, 20], "ar": [0, 1, 2, 4, 5, 7, 8, 9, 10, 12, 13, 16, 18, 19, 21, 22], "both": 0, "compat": [0, 2, 4, 9, 10, 12], "adata": 0, "read_h5ad": 0, "bean_count_07": 0, "1021_ldlvar": 0, "h5ad": [0, 1, 2, 4, 6, 8, 9, 10, 12, 13, 17, 19, 21, 22], "n_ob": 0, "n_var": 0, "3455": 0, "12": [0, 2, 9, 10], "ob": [0, 19], "name": [0, 1, 2, 3, 4, 5, 7, 8, 13, 16, 21, 22], "unnam": 0, "0": [0, 1, 5, 7, 8, 9, 10, 12, 16, 18, 19, 21], "target": [0, 1, 2, 5, 7, 8, 9, 10, 12, 13, 16, 18, 19, 21, 22], "gene": [0, 1, 4, 11, 12, 21], "variant": [0, 4, 5, 7, 12, 15, 16, 18, 20], "descriptor": 0, "arbitrari": 0, "number": [0, 1, 2, 8, 9, 10, 19, 21], "grna": [0, 3, 5, 7, 8, 9, 10, 16, 17, 18, 19], "posit": [0, 1, 4, 5, 7, 9, 10, 11, 13, 16, 21, 22], "categori": [0, 7, 18], "base": [0, 1, 2, 4, 5, 7, 9, 10, 12, 13, 16, 17, 18, 19, 21, 22], "report": [0, 4, 5, 6, 8, 12, 15, 16, 17, 19, 20], "BE": 0, "group": 0, "sequenc": [0, 2, 4, 5, 8, 9, 10, 11, 12, 15, 16, 19, 21], "barcod": [0, 5, 9, 10, 16], "5": [0, 11, 12], "nt": 0, "pam": [0, 17], "offset": [0, 9, 10], "target_po": [0, 5, 9, 10, 16, 18], "group2": 0, "masked_sequ": 0, "masked_barcod": 0, "edit_r": [0, 8, 19], "var": [0, 1, 13, 19, 21, 22], "index": [0, 8, 12, 15, 19], "sort": [0, 5, 7, 8, 15, 16, 17, 18, 19], "replic": [0, 5, 7, 8, 13, 16, 17, 18, 19, 22], "un": [0, 4, 7, 12, 18, 19], "allele_count": [0, 7, 18], "edit_count": [0, 7, 18], "layer": [0, 7, 18], "x_bcmatch": [0, 19], "cdata": 0, "genom": [0, 4, 5, 11, 12, 16], "compris": 0, "n_guid": [0, 8, 19], "x": [0, 2, 8, 9, 10, 19], "n_condit": 0, "guid": [0, 1, 2, 4, 5, 7, 8, 9, 10, 12, 16, 17, 18, 19, 21], "sampl": [0, 3, 5, 7, 8, 12, 15, 16, 17, 18, 19, 20], "condit_m": 0, "condit_p": 0, "metadata": [0, 1, 8, 13, 19, 21, 22], "condit": [0, 1, 5, 7, 13, 16, 17, 18, 19, 21, 22], "match": [0, 2, 5, 9, 10, 16], "per": [0, 1, 2, 4, 7, 8, 9, 10, 12, 13, 18, 19, 21, 22], "attribut": 0, "contain": [0, 1, 2, 5, 7, 9, 10, 13, 16, 18, 21, 22], "inform": [0, 1, 2, 5, 9, 10, 13, 16, 21, 22], "about": [0, 1, 7, 13, 18, 21, 22], "each": [0, 1, 2, 4, 5, 7, 8, 9, 10, 12, 13, 16, 17, 18, 19, 21, 22], "control_1_g1": 0, "control": [0, 1, 5, 7, 8, 12, 16, 17, 18, 19, 21], "nan": 0, "1": [0, 5, 8, 9, 10, 16, 18, 19], "g1": 0, "4": [0, 9, 10], "10": [0, 4, 10, 12], "ab": 0, "negctrl": [0, 1, 5, 8, 13, 16, 19, 21, 22], "ccaagccctacgcggtagggaactttgggagc": 0, "gttt": 0, "gggag": 0, "control_1": 0, "9": 0, "cctgcgcggtggggggcttt": 0, "531163": 0, "control_1_g2": 0, "g2": 0, "11": 0, "tccaagccctacgcggtagggaactttgggag": 0, "aaca": 0, "tggga": 0, "ccctgcgcggtggggggctt": 0, "ggcg": 0, "640765": 0, "2": [0, 5, 9, 10, 12, 16, 18], "control_1_g3": 0, "g3": 0, "gtccaagccctacgcggtagggaactttggga": 0, "cgct": 0, "ttggg": 0, "ccctgcgcggtggggggct": 0, "417709": 0, "3": [0, 11, 12], "control_1_g4": 0, "g4": 0, "7": [0, 7, 12, 18], "13": 0, "cgtccaagccctacgcggtagggaactttggg": 0, "tgag": 0, "tttgg": 0, "ggccctgcgcggtggggggc": 0, "tggg": 0, "126400": 0, "control_1_g5": 0, "g5": 0, "8": 0, "14": 0, "acgtccaagccctacgcggtagggaactttgg": 0, "gtat": 0, "ctttg": 0, "gggccctgcgcggtgggggg": 0, "gtgt": 0, "201104": 0, "3450": 0, "rs9987289_maj_abe_347_g1": 0, "rs9987289": 0, "maj": 0, "347": 0, "tgcttgggcatcaatatcacgtggaaccagcc": 0, "cagt": 0, "ccagc": 0, "rs9987289_maj_abe_347": 0, "gcgtcggtgtcgcgtgggg": 0, "cggt": 0, "087379": 0, "3451": 0, "rs9987289_maj_abe_347_g2": 0, "atgcttgggcatcaatatcacgtggaaccagc": 0, "tcgc": 0, "accag": 0, "ggcgtcggtgtcgcgtggg": 0, "299923": 0, "3452": 0, "rs9987289_maj_abe_347_g3": 0, "6": [0, 9, 10, 17], "gatgcttgggcatcaatatcacgtggaaccag": 0, "gcac": 0, "aacca": 0, "tgggcgtcggtgtcgcgtgg": 0, "gcgc": 0, "224973": 0, "3453": 0, "rs9987289_maj_abe_347_g4": 0, "agatgcttgggcatcaatatcacgtggaacca": 0, "ttgc": 0, "gaacc": 0, "ttgggcgtcggtgtcgcgtg": 0, "265378": 0, "3454": 0, "rs9987289_maj_abe_347_g5": 0, "tagatgcttgggcatcaatatcacgtggaacc": 0, "gcga": 0, "ggaac": 0, "cttgggcgtcggtgtcgcgt": 0, "gcgg": 0, "266573": 0, "row": [0, 3, 8, 19], "21": 0, "column": [0, 1, 3, 4, 5, 7, 8, 9, 10, 12, 13, 16, 17, 18, 19, 21, 22], "specif": [0, 5, 7, 16, 17, 18, 19], "rep1_bot": 0, "bot": [0, 1, 5, 13, 16, 18, 21, 22], "rep1": 0, "rep2_bot": 0, "rep2": 0, "rep3_vpa_bot": 0, "rep3_vpa": 0, "rep4_vpa_bot": 0, "rep4_vpa": 0, "rep1_bulk": 0, "bulk": [0, 1, 13, 17, 18, 19, 21, 22], "rep2_bulk": 0, "rep3_vpa_bulk": 0, "rep4_vpa_bulk": 0, "rep1_top": 0, "top": [0, 1, 5, 13, 16, 18, 21, 22], "rep2_top": 0, "rep3_vpa_top": 0, "rep4_vpa_top": 0, "i": [0, 1, 2, 4, 5, 7, 8, 9, 10, 11, 12, 13, 16, 17, 18, 19, 21, 22], "store": [0, 4, 9, 10, 12], "51779544aga_maj_abe_2_g1": 0, "A": [0, 1, 2, 9, 10, 13, 21, 22], "gt": 0, "g": [0, 1, 2, 9, 10, 12, 13, 21, 22], "20": 0, "15": 0, "17": 0, "22": 0, "34": 0, "19": [0, 1, 21], "c": [0, 1, 21], "438407": 0, "438408": 0, "24": 0, "438409": 0, "16": 0, "29": 0, "438410": 0, "438411": 0, "25": 0, "438412": 0, "level": [0, 4, 7, 8, 12, 18, 19], "can": [0, 1, 2, 7, 8, 9, 10, 11, 12, 13, 18, 19, 21, 22], "save": [0, 19], "ref_bas": 0, "alt_bas": 0, "40": 0, "59": 0, "66": 0, "68": 0, "48": 0, "149": 0, "217563": 0, "217564": 0, "217565": 0, "217566": 0, "30": [0, 1, 9, 10, 21], "51": 0, "37": 0, "46": 0, "58": 0, "23": [0, 4, 12], "47": [0, 9, 10], "217567": 0, "217568": 0, "work": [0, 12], "support": [0, 4, 7, 11, 12, 18, 19], "oper": 0, "cdata_subset": 0, "14979": 0, "control_10_g1": 0, "14980": 0, "14981": 0, "14982": 0, "14983": 0, "22837": 0, "22838": 0, "22839": 0, "22840": 0, "22841": 0, "18": 0, "31": 0, "1080": 0, "cdata1": 0, "data": [0, 1, 8, 12, 13, 17, 19, 20, 21, 22], "pinello": 0, "project": 0, "2021_08_anb": 0, "072121_abe_topbot": 0, "bean_count": 0, "ldlvar": 0, "032422_crispresso": 0, "bean_count_072121_abe_topbot_ldlvar": 0, "cdata2": 0, "102121_abe_topbot": 0, "bean_count_102121_abe_topbot_ldlvar": 0, "map": [0, 2, 9, 10, 12], "lambda": 0, "": [0, 1, 4, 9, 10, 12, 21], "rsplit": 0, "_": [0, 1, 8, 13, 19, 21, 22], "log_norm": 0, "lfc1": 0, "log_fold_change_rep": 0, "lfc2": 0, "join": 0, "lsuffix": 0, "_1": 0, "rsuffix": 0, "_2": 0, "pairplot": 0, "aggreg": [0, 4, 12], "biolog": [0, 19], "log_fold_change_aggreg": 0, "aggregate_condit": 0, "bot_top": 0, "median": 0, "158787": 0, "212254": 0, "186679": 0, "022441": 0, "457033": 0, "418312": 0, "084936": 0, "339419": 0, "517138": 0, "002245": 0, "technic": 0, "show": [0, 1, 8, 19, 21], "decent": 0, "correl": [0, 7, 18], "you": [0, 1, 2, 4, 5, 8, 9, 10, 11, 12, 13, 16, 19, 21, 22], "concaten": 0, "differ": [0, 1, 13, 21, 22], "share": [0, 12, 19], "concat": 0, "lognorm_count": 0, "lognorm_edit": 0, "get_edit_r": 0, "normalize_by_editable_bas": 0, "fals": [0, 7, 9, 10, 12, 18, 19], "edited_bas": [0, 9, 10], "editable_base_start": 0, "editable_base_end": 0, "bcmatch_thr": 0, "prior_weight": 0, "get_edit_from_allel": 0, "get_edit_mat_from_un": 0, "match_target_posit": 0, "true": [0, 9, 10, 19], "hist": 0, "bin": [0, 5, 7, 8, 16, 17, 18, 19], "135550": 0, "059391": 0, "141290": 0, "072358": 0, "269650": 0, "230264": 0, "182151": 0, "165778": 0, "340590": 0, "034365": 0, "cdata_til": 0, "ldlrcd": 0, "bean_count_072121_abe_topbot_ldlrcd": 0, "11224415": 0, "11224401": 0, "11224410": 0, "11224402": 0, "438001": 0, "11203000": 0, "11203002": 0, "11203006": 0, "438002": 0, "11224074": 0, "11224086": 0, "11224092": 0, "438003": 0, "438004": 0, "11217409": 0, "11217417": 0, "438005": 0, "11226735": 0, "11226742": 0, "11226747": 0, "length": [0, 9, 10, 17], "438006": 0, "dtype": 0, "to_excel": 0, "tmp": [0, 12], "xlsx": [0, 2, 9, 10], "sheet": 0, "to_mageck_input": 0, "mageck_input": 0, "txt": [0, 1, 4, 12, 21], "target_column": 0, "bash": [0, 8, 19], "head": 0, "sgrna": [0, 2, 9, 10], "171": 0, "451": 0, "251": 0, "422": 0, "573": 0, "389": 0, "456": 0, "420": 0, "835": 0, "435": 0, "794": 0, "439": 0, "145": 0, "278": 0, "257": 0, "206": 0, "364": 0, "273": 0, "254": 0, "527": 0, "498": 0, "768": 0, "195": 0, "333": 0, "488": 0, "632": 0, "898": 0, "899": 0, "780": 0, "713": 0, "1189": 0, "626": 0, "1146": 0, "603": 0, "246": 0, "663": 0, "387": 0, "448": 0, "823": 0, "595": 0, "705": 0, "600": 0, "921": 0, "1143": 0, "506": 0, "243": 0, "647": 0, "434": 0, "529": 0, "776": 0, "700": 0, "676": 0, "1062": 0, "611": 0, "928": 0, "379": 0, "control_10": 0, "138": 0, "329": 0, "229": 0, "213": 0, "292": 0, "432": 0, "352": 0, "409": 0, "390": 0, "274": 0, "control_10_g2": 0, "187": 0, "468": 0, "402": 0, "479": 0, "643": 0, "369": 0, "428": 0, "469": 0, "796": 0, "787": 0, "404": 0, "control_10_g3": 0, "57": 0, "126": 0, "83": 0, "131": 0, "281": 0, "114": 0, "184": 0, "115": 0, "300": 0, "106": 0, "299": 0, "control_10_g4": 0, "112": 0, "120": 0, "136": 0, "182": 0, "128": 0, "169": 0, "181": 0, "256": 0, "144": 0, "258": 0, "179": 0, "dens": [1, 21], "across": [1, 12, 19, 21], "locu": [1, 5, 16, 21], "multipl": [1, 4, 12, 21], "loci": [1, 5, 8, 16, 19, 21], "select": [1, 7, 13, 15, 18, 19, 21, 22], "fac": [1, 5, 8, 13, 16, 19, 21, 22], "signal": [1, 5, 13, 16, 19, 21, 22], "quantil": [1, 5, 8, 13, 16, 19, 21, 22], "design": [1, 5, 7, 9, 10, 13, 16, 18, 19, 21, 22], "cell": [1, 5, 8, 13, 16, 19, 21, 22], "screen_id": [1, 13, 21, 22], "my_sorting_tiling_screen": [1, 13, 21, 22], "input": [1, 5, 12, 13, 15, 20, 21, 22], "test": [1, 13, 21, 22], "sample_list_til": [1, 21], "csv": [1, 2, 3, 4, 8, 9, 10, 12, 13, 15, 19, 21, 22], "fastq": [1, 5, 8, 9, 10, 13, 16, 19, 21, 22], "file": [1, 4, 5, 7, 8, 12, 13, 15, 18, 19, 20, 21, 22], "path": [1, 4, 5, 7, 8, 9, 10, 12, 13, 16, 17, 18, 19, 21, 22], "see": [1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 13, 16, 17, 19, 21, 22], "b": [1, 2, 7, 9, 10, 12, 13, 18, 21, 22], "edit": [1, 2, 5, 7, 8, 9, 10, 12, 13, 15, 16, 18, 19, 20, 21, 22], "f": [1, 2, 9, 10, 12, 13, 21, 22], "test_guide_info_tiling_chrom": [1, 21], "o": [1, 2, 4, 6, 7, 8, 9, 10, 12, 13, 17, 18, 19, 21, 22], "output": [1, 13, 15, 20, 21, 22], "directori": [1, 2, 9, 10, 13, 19, 21, 22], "r": [1, 2, 7, 9, 10, 13, 18, 19, 21, 22], "n": [1, 2, 7, 9, 10, 13, 18, 19, 21, 22], "id": [1, 3, 5, 7, 8, 10, 12, 13, 16, 17, 18, 19, 21, 22], "bean_count_": [1, 13, 21, 22], "reporterscreen": [1, 7, 12, 13, 15, 17, 18, 19, 21, 22], "_mask": [1, 13, 21, 22], "qc_report_": [1, 13, 21, 22], "prefix": [1, 6, 7, 12, 13, 17, 18, 21, 22], "translat": [1, 15, 20, 21], "_allelefilt": [1, 21], "basechang": [1, 12, 21], "intend": [1, 21], "chang": [1, 7, 13, 18, 21, 22], "If": [1, 4, 5, 7, 8, 12, 13, 16, 17, 18, 19, 21, 22], "wa": [1, 21], "provid": [1, 2, 4, 5, 7, 8, 9, 10, 11, 12, 13, 16, 17, 18, 19, 21, 22], "window": [1, 12, 17, 21], "start": [1, 5, 7, 9, 10, 11, 12, 16, 18, 21], "po": [1, 7, 9, 10, 12, 18, 21], "end": [1, 5, 7, 9, 10, 11, 12, 16, 18, 21], "spacer": [1, 2, 7, 9, 10, 12, 18, 21], "within": [1, 4, 5, 9, 10, 12, 16, 17, 21], "proport": [1, 12, 21], "larger": [1, 21], "than": [1, 12, 21], "least": [1, 7, 18, 21], "list": [1, 4, 7, 8, 10, 12, 13, 18, 19, 21, 22], "gene_symbol": [1, 4, 12, 21], "test_r": [1, 13, 21, 22], "fit": [1, 8, 13, 19, 21, 22], "scale": [1, 8, 13, 19, 21, 22], "acc": [1, 8, 13, 19, 21, 22], "access": [1, 5, 8, 13, 16, 19, 21, 22], "col": [1, 7, 8, 9, 10, 13, 17, 18, 19, 21, 22], "more": [1, 2, 8, 9, 10, 12, 13, 19, 21, 22], "detail": [1, 2, 4, 8, 9, 10, 12, 13, 19, 21, 22], "below": [1, 2, 8, 9, 10, 13, 19, 21, 22], "make": [1, 7, 13, 18, 21, 22], "sure": [1, 13, 21, 22], "follow": [1, 2, 4, 5, 7, 8, 9, 10, 12, 13, 16, 18, 19, 21, 22], "format": [1, 3, 4, 8, 12, 13, 15, 19, 20, 21, 22], "seamless": [1, 13, 21, 22], "downstream": [1, 13, 21, 22], "step": [1, 4, 5, 12, 13, 16, 19, 21, 22], "thi": [1, 2, 4, 5, 8, 9, 10, 12, 13, 16, 17, 19, 21, 22], "produc": [1, 2, 4, 6, 7, 8, 9, 10, 12, 13, 17, 18, 19, 21, 22], "includ": [1, 8, 11, 12, 13, 19, 21, 22], "effici": [1, 2, 9, 10, 13, 17, 19, 21, 22], "As": [1, 4, 12, 13, 21, 22], "predefin": [1, 13, 21, 22], "valu": [1, 5, 7, 8, 13, 16, 18, 19, 21, 22], "bewar": [1, 13, 21, 22], "guidelin": [1, 13, 21, 22], "paramet": [1, 4, 7, 8, 13, 15, 20, 21, 22], "full": [1, 2, 4, 8, 13, 15, 20, 21, 22], "argument": [1, 2, 4, 7, 13, 21, 22], "readm": [1, 13, 21, 22], "common": [1, 13, 21, 22], "factor": [1, 13, 19, 21, 22], "mai": [1, 2, 4, 9, 10, 12, 13, 21, 22], "want": [1, 4, 12, 13, 21, 22], "tweak": [1, 7, 13, 18, 21, 22], "ctrl": [1, 7, 13, 18, 21, 22], "cond": [1, 7, 13, 18, 21, 22], "lfc": [1, 7, 13, 15, 18, 21, 22], "have": [1, 5, 7, 8, 12, 13, 16, 18, 19, 21, 22], "label": [1, 5, 7, 13, 16, 18, 21, 22], "Not": [1, 21], "pass": [1, 21], "doe": [1, 7, 13, 18, 21, 22], "flag": [1, 5, 7, 12, 13, 16, 18, 21, 22], "omit": [1, 13, 21, 22], "rate": [1, 7, 8, 13, 15, 18, 19, 21, 22], "doesn": [1, 21], "ani": [1, 4, 8, 12, 19, 21], "observ": [1, 4, 8, 12, 19, 21], "candid": [1, 21], "while": [1, 4, 12, 21], "too": [1, 2, 9, 10, 21], "mani": [1, 21], "veri": [1, 21], "low": [1, 7, 18, 19, 21], "significantli": [1, 12, 21], "decreas": [1, 21], "power": [1, 21], "criteria": [1, 21], "fitler": [1, 21], "benefici": [1, 21], "varaint": [1, 21], "whenev": [1, 21], "possibl": [1, 19, 21], "better": [1, 21], "For": [1, 5, 8, 9, 10, 16, 19, 21], "one": [1, 4, 5, 7, 8, 12, 16, 18, 19, 21], "OR": [1, 4, 12, 21], "path_to_gene_names_fil": [1, 4, 12, 21], "fasta": [1, 4, 9, 10, 11, 12, 21], "gene_exon": [1, 4, 12, 21], "fa": [1, 4, 12, 21], "gene_exon_fa": [1, 4, 12, 21], "where": [1, 4, 7, 11, 12, 17, 18, 21], "ha": [1, 8, 9, 10, 11, 19, 21], "symbol": [1, 4, 12, 21], "line": [1, 2, 4, 9, 10, 11, 12, 21], "its": [1, 7, 18, 21], "mane": [1, 4, 12, 21], "transcript": [1, 4, 12, 21], "hg38": [1, 21], "coordin": [1, 5, 12, 16, 21], "exon": [1, 4, 11, 12, 21], "In": [1, 5, 12, 16, 21], "order": [1, 5, 16, 21], "other": [1, 7, 18, 21], "refer": [1, 4, 5, 12, 16, 21], "version": [1, 5, 16, 21], "ll": [1, 21], "need": [1, 2, 5, 9, 10, 16, 21], "feed": [1, 4, 7, 12, 18, 21], "here": [1, 4, 11, 12, 21], "given": [1, 8, 19, 21], "we": [1, 21], "re": [1, 21], "ouptut": [1, 21], "high": [1, 21], "enough": [1, 21], "latter": [1, 21], "typic": [1, 21], "dataset": [1, 21], "good": [1, 13, 21, 22], "coverag": [1, 7, 18, 21], "result": [1, 19, 21], "By": [1, 2, 9, 10, 21], "default": [1, 2, 7, 9, 10, 12, 14, 17, 18, 19, 21], "surviv": [1, 5, 8, 16, 19, 21], "most": [1, 12, 21], "tabl": [1, 3, 8, 11, 13, 19, 21, 22], "identif": [1, 21], "quantif": [1, 5, 16, 21], "check": [1, 21], "choos": [1, 21], "altern": [1, 21], "necessari": [1, 21], "take": [1, 13, 21, 22], "option": [1, 5, 8, 13, 15, 16, 19, 20, 21, 22], "from": [1, 2, 7, 8, 9, 10, 11, 12, 13, 15, 18, 19, 21, 22], "your": [1, 2, 4, 9, 10, 12, 13, 21, 22], "test_guide_info": [1, 13, 21, 22], "abov": [1, 4, 6, 7, 8, 12, 13, 17, 18, 19, 21, 22], "score": [1, 4, 8, 9, 10, 12, 13, 19, 21, 22], "chromosom": [1, 5, 13, 16, 21, 22], "bigwig": [1, 5, 13, 16, 19, 21, 22], "bw": [1, 8, 13, 19, 21, 22], "No": [1, 13, 21, 22], "assum": [1, 2, 9, 10, 13, 19, 21, 22], "same": [1, 4, 9, 10, 12, 13, 21, 22], "all": [1, 7, 8, 9, 10, 13, 18, 19, 21, 22], "don": [1, 7, 13, 18, 21, 22], "uniform": [1, 7, 8, 13, 18, 19, 21, 22], "subcommand": [1, 13], "singl": [2, 4, 9, 10, 12], "allow": [2, 9, 10], "transit": [2, 9, 10], "when": [2, 4, 5, 7, 8, 9, 10, 12, 16, 18, 19], "site": [2, 9, 10, 13, 19, 22], "allel": [2, 8, 9, 10, 15, 19, 20], "done": [2, 9, 10], "crispresso2": [2, 9, 10], "modifi": [2, 9, 10], "awar": [2, 9, 10], "sample_list": [2, 3, 9, 10, 13, 15, 22], "r1_filepath": [2, 5, 9, 10, 16], "r2_filepath": [2, 5, 9, 10, 16], "sample_nam": [2, 9, 10], "being": [2, 9, 10], "sgrna_info_t": [2, 9, 10, 15], "read": [2, 5, 9, 10, 16], "thread": [2, 9, 10], "my_sorting_screen": [2, 4, 6, 7, 9, 10, 12, 17, 18], "run": [2, 4, 5, 9, 10, 12, 15, 16, 17, 20], "r1": [2, 5, 9, 10, 16], "fq": [2, 9, 10], "r2": [2, 5, 9, 10, 16], "trim": [2, 9, 10], "off": [2, 9, 10], "adapt": [2, 9, 10], "adjust": [2, 4, 8, 9, 10, 12, 19], "command": [2, 4, 6, 7, 8, 9, 10, 12, 17, 18, 19], "accord": [2, 9, 10], "structur": [2, 9, 10], "annot": [2, 4, 8, 9, 10, 12, 19], "matrix": [2, 9, 10], "purturb_tool": [2, 9, 10], "http": [2, 9, 10], "github": [2, 9, 10], "com": [2, 9, 10], "pinellolab": [2, 9, 10], "tool": [2, 9, 10, 19], "section": [2, 8, 9, 10, 19], "_bcmatch": [2, 9, 10], "_edit": [2, 9, 10], "allele_t": [2, 9, 10], "often": [2, 9, 10], "larg": [2, 9, 10], "write": [2, 9, 10, 15], "an": [2, 9, 10, 19], "excel": [2, 9, 10], "grna_librari": 3, "grna_counts_t": [3, 8, 19], "first": [3, 11], "should": [3, 5, 8, 9, 10, 16, 19], "tile": [4, 5, 7, 8, 9, 10, 12, 13, 15, 16, 18, 19, 22], "mode": [4, 8, 12, 19], "account": [4, 12], "robustli": [4, 12], "my_sorting_screen_mask": [4, 7, 12, 18], "my_sorting_screen_filt": [4, 8, 12, 19], "filtered_allele_stat": [4, 12], "pdf": [4, 12], "filter_log": [4, 12], "count": [4, 5, 7, 8, 12, 15, 16, 18, 19, 20], "stat": [4, 12], "fliter": [4, 12], "obtain": [4, 12], "optim": [4, 12], "balanc": [4, 12], "between": [4, 7, 12, 18], "exampl": [4, 5, 6, 11, 12, 15, 16, 17, 20], "amino": [4, 12], "acid": [4, 12], "code": [4, 5, 8, 12, 15, 16, 19, 21], "which": [4, 5, 7, 8, 12, 16, 18, 19], "occur": [4, 12], "highli": [4, 12], "recommend": [4, 12, 19], "increas": [4, 12], "librari": [4, 5, 7, 9, 10, 12, 15, 16, 18, 19, 21, 22], "cover": [4, 8, 12, 19], "do": [4, 7, 12, 18], "either": [4, 8, 12, 19], "genomic_po": [4, 5, 12, 16], "sgrna_info_tbl": [4, 12], "2023": [4, 12], "grch38": [4, 12], "automat": [4, 12], "To": [4, 7, 12, 18], "custom": [4, 11, 12], "entri": [4, 11, 12], "gene_id": [4, 12], "gene_exon_fasta_path": [4, 12], "without": [4, 7, 9, 10, 12, 13, 18, 22], "header": [4, 5, 11, 12, 16], "keep": [4, 9, 10, 12], "outsid": [4, 12], "nucleotid": [4, 12], "lead": [4, 12], "document": [5, 16], "describ": [5, 7, 16, 17, 18, 19], "help": [5, 16], "written": [5, 7, 16, 18], "sens": [5, 9, 10, 16], "direct": [5, 9, 10, 16], "effect": [5, 15, 16, 20], "two": [5, 7, 12, 16, 18], "non": [5, 7, 16, 18], "na": [5, 16], "empti": [5, 7, 16, 18], "chrom": [5, 11, 16], "ex": [5, 16, 19], "chr19": [5, 16], "accessibility_sign": [5, 8, 16, 19], "atac": [5, 16, 19], "ignor": [5, 7, 9, 10, 12, 16, 18, 19], "bystand": [5, 16, 19], "denot": [5, 8, 9, 10, 11, 16, 19], "element": [5, 8, 16, 19], "later": [5, 16], "target_group": [5, 8, 16, 18, 19], "neg": [5, 8, 12, 16, 19], "consid": [5, 7, 12, 16, 18, 19], "qc": [5, 15, 16, 17, 20], "specifi": [5, 7, 8, 9, 10, 16, 18, 19], "posctrl": [5, 7, 16, 18], "match_target_po": [5, 16], "rel": [5, 7, 9, 10, 12, 16, 18], "noncod": [5, 11, 16], "strand": [5, 9, 10, 11, 16], "start_po": [5, 9, 10, 16], "smaller": [5, 16], "among": [5, 16], "regardless": [5, 16], "stranded": [5, 16], "also": [5, 16], "gz": [5, 16], "sample_id": [5, 8, 10, 16, 19], "NOT": [5, 16], "timepoint": [5, 16], "d5": [5, 16], "d18": [5, 16], "upper_quantil": [5, 8, 16, 19], "upper": [5, 8, 16, 19], "lower_quantil": [5, 8, 16, 19], "lower": [5, 8, 11, 16, 19], "prolifer": [5, 8, 16, 19], "time": [5, 7, 16, 17, 18, 19], "numer": [5, 16], "output_prefix": [6, 8, 12, 17, 19], "prefix_editing_prefer": [6, 17], "html": [6, 7, 17, 18], "ipynb": [6, 7, 17, 18], "prefer": [6, 17], "qc_report_my_sorting_screen": [7, 18], "presort": [7, 18], "befor": [7, 18], "mean": [7, 8, 18, 19], "inspect": [7, 18], "thre": [7, 18], "qualiti": [7, 9, 10, 18, 19], "mask": [7, 18, 19], "plot": [7, 18], "log": [7, 8, 12, 18, 19], "fold": [7, 18], "distribut": [7, 8, 18, 19], "identifi": [7, 17, 18], "bdata": [7, 12, 17, 18, 19], "outlier": [7, 18, 19], "filter": [7, 9, 10, 15, 17, 18, 20], "out": [7, 12, 18, 19], "problemat": [7, 18], "out_screen_path": [7, 18], "out_report_prefix": [7, 18], "count_correlation_thr": [7, 18], "edit_rate_thr": [7, 18], "lfc_thre": [7, 18], "remov": [7, 13, 18, 22], "bad": [7, 18], "meet": [7, 18], "onli": [7, 11, 12, 18, 19], "miss": [7, 18], "condiiton": [7, 18], "fake": [7, 18], "add": [7, 18], "dummi": [7, 18], "modal": [7, 18], "dont": [7, 18, 19], "recalcul": [7, 18], "exist": [7, 18, 19], "note": [7, 18], "arguement": [7, 18], "wai": [7, 18], "metric": [7, 8, 18, 19], "calcul": [7, 15, 18], "replicate_label": [7, 18], "covari": [7, 18], "sample_covari": [7, 18], "comma": [7, 18, 19], "separ": [7, 18, 19], "experiment": [7, 17, 18, 19], "drug": [7, 18], "treatment": [7, 18], "etc": [7, 17, 18], "condition_label": [7, 18], "ctrl_cond": [7, 18], "none": [7, 18], "whether": [7, 18, 19], "edit_start_po": [7, 12, 18], "edit_end_po": [7, 12, 18], "those": [7, 18], "target_pos_col": [7, 9, 10, 18], "quantifi": [7, 15, 17, 18, 20], "inclus": [7, 12, 18], "exclus": [7, 12, 18], "posctrl_col": [7, 18], "datafram": [7, 18, 19], "string": [7, 9, 10, 18, 19], "val": [7, 18], "posctrl_val": [7, 18], "lfc_cond": [7, 18], "delimit": [7, 18], "bayesian": [8, 19], "network": [8, 19], "incorpor": [8, 19], "outcom": [8, 13, 19, 22], "posterior": [8, 19], "estim": [8, 19], "phenotyp": [8, 19], "The": [8, 9, 10, 19], "reflect": [8, 19], "gener": [8, 19], "process": [8, 19], "briefli": [8, 19], "cellular": [8, 19], "upon": [8, 19], "model": [8, 19], "gaussian": [8, 19], "mixtur": [8, 19], "wild": [8, 19], "type": [8, 19], "weight": [8, 19], "compon": [8, 19], "infer": [8, 9, 10, 19], "chromatin": [8, 13, 19, 22], "formul": [8, 19], "method": [8, 19], "manuscript": [8, 19], "raw": [8, 19], "limit": [8, 19], "creat": [8, 19], "flat": [8, 19], "block": [8, 19], "grna_info_t": [8, 19], "sample_info_t": [8, 19], "edit_counts_t": [8, 19], "1st": [8, 19], "manual": [8, 11, 19], "api": [8, 15, 19], "tutori": [8, 15, 19], "bean_element_result": [8, 19], "model_typ": [8, 19], "size": [8, 19], "mu": [8, 19], "standard": [8, 19], "normal": [8, 19], "sd": [8, 19], "mu_sd": [8, 19], "deviat": [8, 19], "uncertainti": [8, 19], "mu_z": [8, 19], "z": [8, 19], "ci": [8, 19], "025": [8, 19], "975": [8, 19], "credibl": [8, 19], "interv": [8, 19], "_adj": [8, 19], "suffix": [8, 19], "correspond": [8, 12, 19], "evid": [8, 19], "effective_edit_r": [8, 19], "sum": [8, 19], "over": [8, 19], "divid": [8, 19], "prior": [8, 19], "up": [8, 19], "n_coocc": [8, 19], "cooccur": [8, 19], "bean_sgrna_result": [8, 19], "usag": [9, 10, 12, 15, 17, 18, 20], "h": [9, 10, 12, 17, 18, 19], "sgrna_filenam": [9, 10], "guide_start_seq": [9, 10], "guide_end_seq": [9, 10], "barcode_start_seq": [9, 10], "q": [9, 10, 19], "min_average_read_qu": [9, 10], "min_single_bp_qu": [9, 10], "output_fold": [9, 10], "l": [9, 10], "reporter_length": [9, 10], "intermedi": [9, 10], "qstart": [9, 10], "qstart_r1": [9, 10], "qend": [9, 10], "qend_r1": [9, 10], "qstart_r2": [9, 10], "qend_r2": [9, 10], "gstart": [9, 10], "gstart_report": [9, 10], "bc": [9, 10], "guide_bc": [9, 10], "len": [9, 10], "guide_bc_len": [9, 10], "align": [9, 10], "align_fasta": [9, 10], "m": [9, 10], "editor": [9, 10], "filenam": [9, 10], "descript": [9, 10], "three": [9, 10], "after": [9, 10], "min": [9, 10], "averag": [9, 10], "minimum": [9, 10], "phred33": [9, 10], "bp": [9, 10], "folder": [9, 10], "32": [9, 10], "36": [9, 10], "exact": [9, 10], "construct": [9, 10], "begin": [9, 10], "offest": [9, 10], "instead": [9, 10, 12, 19], "self": [9, 10], "guide_start_seqs_fil": 10, "guide_end_seqs_fil": 10, "barcode_start_seqs_fil": 10, "rerun": 10, "recount": 10, "current": [11, 19], "download": 11, "ucsc": 11, "browser": 11, "instruct": 11, "video": 11, "rang": 11, "tag": [11, 19], "pars": 11, "utr": 11, "last": 11, "case": 11, "frequenc": 12, "plasmid": 12, "plasmid_path": 12, "jaccard": 12, "threshold": 12, "jaccard_threshold": 12, "indel": 12, "translate_fasta": 12, "translate_fastas_csv": 12, "translate_gen": 12, "translate_genes_list": 12, "filter_allele_proport": 12, "filter_allele_count": 12, "filter_sample_proport": 12, "bdata_path": [12, 17, 18, 19], "assign": [12, 19], "p": [12, 19], "enrich": 12, "compar": 12, "expect": [12, 19], "fed": 12, "posiiton": 12, "e": 12, "j": 12, "similar": 12, "thei": 12, "higher": 12, "w": [12, 17], "target_base_chang": 12, "ldlr": 12, "hg19": 12, "ap": 12, "exce": 12, "retain": 12, "05": 12, "ac": 12, "AND": 12, "sp": 12, "temporari": 12, "layout": 14, "gwa": [15, 22], "profil": [15, 20], "pattern": [15, 20], "subset": 15, "addit": 15, "get": 15, "modul": 15, "search": 15, "page": 15, "replicate_col": [17, 19], "condition_col": [17, 19], "pam_col": 17, "control_condit": [17, 18, 19], "window_length": 17, "would": 17, "pre": 17, "maxim": 17, "context": 17, "rep": 19, "pi": 19, "acc_bw_path": 19, "acc_col": 19, "const": 19, "shrink": 19, "alpha": 19, "time_col": 19, "target_col": 19, "activ": 19, "guide_activity_col": 19, "outdir": 19, "result_suffix": 19, "sorting_bin_upper_quantile_col": 19, "sorting_bin_lower_quantile_col": 19, "overdispers": 19, "fail": 19, "alpha_if_overdispersion_fitting_fail": 19, "cuda": 19, "sample_mask_col": 19, "negctrl_col": 19, "negctrl_col_valu": 19, "repguid": 19, "repguide_mask": 19, "devic": 19, "bcmatch": 19, "df": 19, "kei": 19, "allele_df_kei": 19, "splice": 19, "splice_site_path": 19, "control_guide_tag": 19, "nois": 19, "confid": 19, "iter": 19, "n_iter": 19, "choic": 19, "continu": 19, "viabil": 19, "library_design": 19, "set": 19, "variabl": 19, "constant": 19, "trend": 19, "shrunk": 19, "toward": 19, "elaps": 19, "indic": 19, "ic": 19, "via": 19, "extern": 19, "uq": 19, "lq": 19, "af": 19, "regress": 19, "coeffici": 19, "b0": 19, "b1": 19, "a0": 19, "dispers": 19, "gpu": 19, "treat": 19, "n_replic": 19, "valid": 19, "even": 19, "mix": 19, "synonym": 19, "svi": 19, "taken": 19, "2000": 19, "pkl": 19, "present": 19}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"reporterscreen": [0, 3], "api": 0, "tutori": [0, 1, 13, 21, 22], "subset": 0, "addit": 0, "select": 0, "lfc": 0, "calcul": 0, "get": 0, "edit": [0, 6, 17], "rate": 0, "from": [0, 3], "allel": [0, 1, 4, 12, 21], "count": [0, 1, 2, 9, 10, 13, 21, 22], "translat": [0, 4, 12], "write": 0, "code": 1, "sequenc": 1, "tile": [1, 21], "librari": [1, 13], "sort": [1, 13, 21, 22], "screen": [1, 2, 3, 7, 9, 10, 13, 15, 18, 21, 22], "exampl": [1, 8, 13, 19, 21, 22], "workflow": [1, 13, 15, 21, 22], "1": [1, 13, 21, 22], "grna": [1, 13, 21, 22], "report": [1, 2, 7, 9, 10, 13, 18, 21, 22], "bean": [1, 2, 3, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 21, 22], "sampl": [1, 2, 9, 10, 13, 21, 22], "2": [1, 13, 21, 22], "qc": [1, 7, 13, 18, 21, 22], "3": [1, 13, 21, 22], "filter": [1, 4, 11, 12, 21], "4": [1, 21], "quantifi": [1, 8, 13, 19, 21, 22], "variant": [1, 8, 13, 19, 21, 22], "effect": [1, 8, 13, 19, 21, 22], "run": [1, 7, 8, 13, 18, 19, 21, 22], "data": [2, 7, 9, 10, 15, 18], "input": [2, 3, 7, 8, 9, 10, 11, 16, 18, 19], "file": [2, 3, 9, 10, 11, 16], "format": [2, 7, 9, 10, 11, 16, 18], "output": [2, 4, 6, 7, 8, 9, 10, 12, 17, 18, 19], "creat": 3, "object": 3, "flat": 3, "option": [4, 7, 12, 18], "sgrna_info_t": [5, 16], "csv": [5, 16], "sample_list": [5, 16], "profil": [6, 17], "pattern": [6, 17], "threshold": [7, 18], "h5ad": [7, 18], "usag": [8, 19], "full": [9, 10, 12, 17, 18, 19], "paramet": [9, 10, 12, 17, 18, 19], "name": [9, 10, 12, 17, 18, 19], "argument": [9, 10, 12, 17, 18, 19], "fa": 11, "posit": [12, 17, 18, 19], "gwa": 13, "guid": [13, 22], "titl": 14, "crispr": 14, "welcom": 15, "": 15, "document": 15, "subcommand": [15, 20], "structur": 15, "indic": 15, "tabl": 15}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx": 60}, "alltitles": {"ReporterScreen API tutorial": [[0, "reporterscreen-api-tutorial"]], "Subsetting & addition": [[0, "subsetting-addition"]], "Subsetting & selection": [[0, "subsetting-selection"]], "LFC calculation & Addition": [[0, "lfc-calculation-addition"]], "Getting edit rates from allele counts": [[0, "getting-edit-rates-from-allele-counts"]], "Calculating LFC": [[0, "calculating-lfc"]], "Allele translation": [[0, "allele-translation"]], "Writing": [[0, "writing"]], "Coding sequence tiling library": [[1, "coding-sequence-tiling-library"]], "Tiling sorting screen tutorial": [[1, "tiling-sorting-screen-tutorial"], [21, "tiling-sorting-screen-tutorial"]], "Example workflow": [[1, "example-workflow"], [13, "example-workflow"], [21, "example-workflow"], [22, "example-workflow"]], "1. Count gRNA & reporter (bean count-samples)": [[1, "count-grna-reporter-count-samples"], [13, "count-grna-reporter-count-samples"], [21, "count-grna-reporter-count-samples"], [22, "count-grna-reporter-count-samples"]], "2. QC (bean qc)": [[1, "qc-qc"], [21, "qc-qc"]], "3. Filter alleles (bean filter)": [[1, "filter-alleles-filter"], [21, "filter-alleles-filter"]], "4. Quantify variant effect (bean run)": [[1, "quantify-variant-effect-run"], [21, "quantify-variant-effect-run"]], "bean count[-samples]: Count (reporter) screen data": [[2, "bean-count-samples-count-reporter-screen-data"], [9, "bean-count-samples-count-reporter-screen-data"], [10, "bean-count-samples-count-reporter-screen-data"]], "Input file format": [[2, "input-file-format"], [9, "input-file-format"], [10, "input-file-format"], [16, "input-file-format"]], "Output file format": [[2, "output-file-format"], [9, "output-file-format"], [10, "output-file-format"]], "bean create-screen: Create ReporterScreen object from flat files": [[3, "bean-create-screen-create-reporterscreen-object-from-flat-files"]], "Input": [[3, "input"], [8, "input"], [19, "input"]], "filter: Filtering (and optionally translating) alleles": [[4, "filter-filtering-and-optionally-translating-alleles"], [12, "filter-filtering-and-optionally-translating-alleles"]], "Output": [[4, "output"], [6, "output"], [7, "output"], [8, "output"], [12, "output"], [17, "output"], [18, "output"], [19, "output"]], "Translating alleles": [[4, "translating-alleles"], [12, "translating-alleles"]], "sgRNA_info_table.csv": [[5, "sgrna-info-table-csv"], [16, "sgrna-info-table-csv"]], "sample_list.csv": [[5, "sample-list-csv"], [16, "sample-list-csv"]], "bean profile: Profile editing patterns": [[6, "bean-profile-profile-editing-patterns"], [17, "bean-profile-profile-editing-patterns"]], "bean qc: QC of reporter screen data": [[7, "bean-qc-qc-of-reporter-screen-data"], [18, "bean-qc-qc-of-reporter-screen-data"]], "QC thresholds:": [[7, "qc-thresholds"], [18, "qc-thresholds"]], "Run options:": [[7, "run-options"], [18, "run-options"]], "Input .h5ad formatting:": [[7, "input-h5ad-formatting"], [18, "input-h5ad-formatting"]], "bean run: Quantify variant effects": [[8, "bean-run-quantify-variant-effects"], [19, "bean-run-quantify-variant-effects"]], "Usage example": [[8, "usage-example"], [19, "usage-example"]], "bean count": [[9, "bean-count"]], "Full parameters": [[9, "full-parameters"], [10, "full-parameters"], [12, "full-parameters"], [17, "full-parameters"], [18, "full-parameters"], [19, "full-parameters"]], "Named Arguments": [[9, "named-arguments"], [10, "named-arguments"], [12, "named-arguments"], [17, "named-arguments"], [18, "named-arguments"], [19, "named-arguments"]], "bean count-samples": [[10, "bean-count-samples"]], "Input .fa file format for bean-filter": [[11, "input-fa-file-format-for-bean-filter"]], "bean filter": [[12, "bean-filter"]], "Positional Arguments": [[12, "positional-arguments"], [17, "positional-arguments"], [18, "positional-arguments"], [19, "positional-arguments"]], "GWAS variant library": [[13, "gwas-variant-library"]], "Variant sorting screen tutorial": [[13, "variant-sorting-screen-tutorial"], [22, "variant-sorting-screen-tutorial"]], "2. QC samples & guides (bean qc)": [[13, "qc-samples-guides-qc"], [22, "qc-samples-guides-qc"]], "3. Quantify variant effect (bean run)": [[13, "quantify-variant-effect-run"], [22, "quantify-variant-effect-run"]], "title: CRISPR-BEAN": [[14, "title-crispr-bean"]], "Welcome to bean\u2019s documentation!": [[15, "welcome-to-bean-s-documentation"]], "Workflows": [[15, "workflows"]], "bean subcommands": [[15, "bean-subcommands"]], "Screen data structure": [[15, "screen-data-structure"]], "Indices and tables": [[15, "indices-and-tables"]], "bean profile": [[17, "bean-profile"]], "bean qc": [[18, "bean-qc"]], "QC thresholds": [[18, "qc-thresholds"]], "Run options": [[18, "run-options"]], "Input .h5ad formatting": [[18, "input-.h5ad-formatting"]], "bean run": [[19, "bean-run"]], "Subcommands": [[20, "subcommands"]]}, "indexentries": {}}) \ No newline at end of file diff --git a/docs/_build/subcommands.html b/docs/_build/subcommands.html new file mode 100644 index 0000000..bb2154a --- /dev/null +++ b/docs/_build/subcommands.html @@ -0,0 +1,168 @@ + + + + + + + + Subcommands — bean 1.0.0 documentation + + + + + + + + + + + + + + + + + + + +
+
+ +
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/docs/_build/tutorials/ldl_cds.html b/docs/_build/tutorials/ldl_cds.html new file mode 100644 index 0000000..09e4c5e --- /dev/null +++ b/docs/_build/tutorials/ldl_cds.html @@ -0,0 +1,261 @@ + + + + + + + + Tiling sorting screen tutorial — bean 1.0.0 documentation + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

Tiling sorting screen tutorial

+

Tiling screen that tiles gRNA densely across locus or multiple loci, selected based on FACS signal quantiles.

+ + + + + + + + + +
Library designTiling (gRNAs tile each locus densely)
tiling library design
SelectionCells are sorted based on FACS signal quantiles
variant library design



+
+

Example workflow

+
screen_id=my_sorting_tiling_screen
+
+# 1. Count gRNA & reporter
+bean-count-samples \
+--input tests/data/sample_list_tiling.csv          `# Contains fastq file path; see test file for example.`\
+-b A                                               `# Base A is edited (into G)` \
+-f tests/data/test_guide_info_tiling_chrom.csv     `# Contains gRNA metadata; see test file for example.`\
+-o ./                                              `# Output directory` \
+-r                                                 `# Quantify reporter edits` \
+-n ${screen_id}                                       `# ID of the screen` \
+--tiling
+
+# 2. QC samples & guides
+bean-qc \
+  bean_count_${screen_id}.h5ad           `# Input ReporterScreen .h5ad file path` \
+  -o bean_count_${screen_id}_masked.h5ad `# Output ReporterScreen .h5ad file path` \
+  -r qc_report_${screen_id}              `# Prefix for QC report` \
+
+# 3. Filter & translate alleles
+bean-filter ./bean_count_${screen_id}_masked.h5ad \
+-o ./bean_count_${screen_id}_alleleFiltered \
+--filter-target-basechange                             `# Filter based on intended base changes. If -b A was provided in bean-count, filters for A>G edit. If -b C was provided, filters for C>T edit.`\
+--filter-window --edit-start-pos 0 --edit-end-pos 19   `# Filter based on editing window in spacer position within reporter.`\
+--filter-allele-proportion 0.1 --filter-sample-proportion 0.3 `#Filter based on allele proportion larger than 0.1 in at least 0.3 (30%) of the control samples.` \
+--translate --translate-genes-list tests/data/gene_symbols.txt
+
+# 4. Quantify variant effect
+bean-run sorting tiling \
+    ./bean_count_${screen_id}_alleleFiltered.h5ad \
+    -o tests/test_res/var/ \
+    --fit-negctrl \
+    --scale-by-acc \
+    --accessibility-col accessibility
+
+
+

See more details below.

+
+
+

1. Count gRNA & reporter (bean count-samples)

+
screen_id=my_sorting_tiling_screen
+
+bean-count-samples \
+--input tests/data/sample_list_tiling.csv          `# Contains fastq file path; see test file for example.`\
+-b A                                               `# Base A is edited (into G)` \
+-f tests/data/test_guide_info_tiling_chrom.csv     `# Contains gRNA metadata; see test file for example.`\
+-o ./                                              `# Output directory` \
+-r                                                 `# Quantify reporter edits` \
+-n ${screen_id}                                       `# ID of the screen` \
+--tiling
+
+
+

Make sure you follow the input file format for seamless downstream steps. This will produce ./bean_count_${screen_id}.h5ad.

+
+
+

2. QC (bean qc)

+

Base editing data will include QC about editing efficiency. As QC uses predefined column names and values, beware to follow the input file guideline, but you can change the parameters with the full argument list of ``bean-qc` <../../README#bean-qc-qc-of-reporter-screen-data>`_. (Common factors you may want to tweak is --ctrl-cond=bulk and --lfc-conds=top,bot if you have different sample condition labels.)

+
bean-qc \
+  bean_count_${screen_id}.h5ad           `# Input ReporterScreen .h5ad file path` \
+  -o bean_count_${screen_id}_masked.h5ad `# Output ReporterScreen .h5ad file path` \
+  -r qc_report_${screen_id}              `# Prefix for QC report` \
+  [--tiling]                          `# Not required if you have passed --tiling in counting step`
+
+
+

If the data does not include reporter editing data, you can provide --no-editing flag to omit the editing rate QC.

+
+
+

3. Filter alleles (bean filter)

+

As tiling library doesn’t have designated per-gRNA target variant, any base edit observed in reporter may be the candidate variant, while having too many variants with very low editing rate significantly decreases the power. Variants are filtered based on multiple criteria in bean-fitler.

+

If the screen targets coding sequence, it’s beneficial to translate edits into coding varaints whenever possible for better power. For translation, provide --translate and one of the following:

+
[ --translate-gene-name GENE_SYMBOL OR
+  --translate-genes-list path_to_gene_names_file.txt OR
+  --translate-fasta gene_exon.fa, OR
+  --translate-fastas-csv gene_exon_fas.csv]
+
+
+

where path_to_gene_names_file.txt has one gene symbol per line, and gene symbol uses its MANE transcript (hg38) coordinates of exons. In order to use other reference versions or transcript ID, you’ll need to feed in fasta file. See detailed formatting of fasta file here.

+

Example allele filtering given we’re translating based on MANE transcript exons of multiple gene symbols:

+
bean-filter ./bean_count_${screen_id}_masked.h5ad \
+-o ./bean_count_${screen_id}_alleleFiltered \
+--filter-target-basechange                             `# Filter based on intended base changes. If -b A was provided in bean-count, filters for A>G edit. If -b C was provided, filters for C>T edit.`\
+--filter-window --edit-start-pos 0 --edit-end-pos 19   `# Filter based on editing window in spacer position within reporter.`\
+--filter-allele-proportion 0.1 --filter-sample-proportion 0.3 `#Filter based on allele proportion larger than 0.1 in at least 0.3 (30%) of the control samples.` \
+--translate --translate-genes-list tests/data/gene_symbols.txt
+
+
+

Ouptut file `` shows number of alleles per guide and number of guides per variant, where we want high enough values for the latter. See the typical output for dataset with good editing coverage & filtering result here.

+
+
+

4. Quantify variant effect (bean run)

+

By default, bean-run [sorting,survival] tiling uses most filtered allele counts table for variant identification and quantification of their effects. **Check allele filtering output** and choose alternative filtered allele counts table if necessary.

+

bean-run can take 3 run options to quantify editing rate:

+
    +
  1. +
    From reporter + accessibility

    1-1. If your gRNA metadata table (tests/data/test_guide_info.csv above) included per-gRNA accessibility score,

    +
    +
    +
       bean-run sorting tiling \
    +   ./bean_count_${screen_id}_alleleFiltered.h5ad \
    +   -o tests/test_res/var/ \
    +   --fit-negctrl \
    +   --scale-by-acc \
    +   --accessibility-col accessibility
    +
    +1-2. If your gRNA metadata table (\ ``tests/data/test_guide_info.csv`` above) included per-gRNA chromosome & position and you have bigWig file with accessibility signal,
    +
    +
    +
    bean-run sorting tiling \
    +./bean_count_${screen_id}_alleleFiltered.h5ad \
    +-o tests/test_res/var/ \
    +--fit-negctrl \
    +--scale-by-acc \
    +--accessibility-bw accessibility.bw
    +
    +
    +
  2. +
  3. From reporter

    +
    bean-run sorting tiling \
    +./bean_count_${screen_id}_alleleFiltered.h5ad \
    +-o tests/test_res/var/ \
    +--fit-negctrl
    +
    +
    +
  4. +
  5. +
    No reporter information, assume the same editing efficiency of all gRNAs.

    Use this option if your data don’t have editing rate information.

    +
    +
    +
    bean-run sorting tiling \
    +./bean_count_${screen_id}_alleleFiltered.h5ad \
    +-o tests/test_res/var/ \
    +--fit-negctrl \
    +--uniform-edit
    +
    +
    +
  6. +
+
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file diff --git a/docs/_build/tutorials/ldl_var.html b/docs/_build/tutorials/ldl_var.html new file mode 100644 index 0000000..d03177f --- /dev/null +++ b/docs/_build/tutorials/ldl_var.html @@ -0,0 +1,232 @@ + + + + + + + + Variant sorting screen tutorial — bean 1.0.0 documentation + + + + + + + + + + + + + + + + + +
+
+
+ + +
+ +
+

Variant sorting screen tutorial

+

GWAS variant screen with per-variant gRNA tiling design, selected based on FACS signal quantiles.

+ + + + + + + + + +
Library designVariant (gRNAs tile each target variant)
variant library design
SelectionCells are sorted based on FACS signal quantiles
variant library design



+
+

Example workflow

+
screen_id=my_sorting_tiling_screen
+
+# 1. Count gRNA & reporter
+bean-count-samples \
+--input tests/data/sample_list.csv    `# Contains fastq file path; see test file for example.`\
+-b A                                  `# Base A is edited (into G)` \
+-f tests/data/test_guide_info.csv     `# Contains gRNA metadata; see test file for example.`\
+-o ./                                 `# Output directory` \
+-r                                    `# Quantify reporter edits` \
+-n ${screen_id}                          `# ID of the screen to be counted`
+
+# 2. QC samples & guides
+bean-qc \
+  bean_count_${screen_id}.h5ad             `# Input ReporterScreen .h5ad file path` \
+  -o bean_count_${screen_id}_masked.h5ad   `# Output ReporterScreen .h5ad file path` \
+  -r qc_report_${screen_id}                `# Prefix for QC report` \
+  -b                                       ` # Remove replicates with no good samples.
+
+# 3. Quantify variant effect
+bean-run sorting variant \
+    tests/data/bean_count_${screen_id}_masked.h5ad \
+    -o tests/test_res/var/ \
+    --fit-negctrl \
+    --scale-by-acc \
+    --accessibility-col accessibility
+
+
+

See more details below.

+
+
+

1. Count gRNA & reporter (bean count-samples)

+
screen_id=my_sorting_tiling_screen
+
+# 1. Count gRNA & reporter
+bean-count-samples \
+--input tests/data/sample_list.csv    `# Contains fastq file path; see test file for example.`\
+-b A                                  `# Base A is edited (into G)` \
+-f tests/data/test_guide_info.csv     `# Contains gRNA metadata; see test file for example.`\
+-o ./                                 `# Output directory` \
+-r                                    `# Quantify reporter edits` \
+-n ${screen_id}                          `# ID of the screen to be counted`
+
+
+

Make sure you follow the input file format for seamless downstream steps. This will produce ./bean_count_${screen_id}.h5ad.

+
+
+

2. QC samples & guides (bean qc)

+

Base editing data will include QC about editing efficiency. As QC uses predefined column names and values, beware to follow the input file guideline, but you can change the parameters with the full argument list of ``bean-qc` <../../README#bean-qc-qc-of-reporter-screen-data>`_. (Common factors you may want to tweak is --ctrl-cond=bulk and --lfc-conds=top,bot if you have different sample condition labels.)

+
bean-qc \
+  bean_count_${screen_id}.h5ad    `# Input ReporterScreen .h5ad file path` \
+  -o bean_count_${screen_id}_masked.h5ad   `# Output ReporterScreen .h5ad file path` \
+  -r qc_report_${screen_id}   `# Prefix for QC report`
+
+
+

If the data does not include reporter editing data, you can provide --no-editing flag to omit the editing rate QC.

+
+
+

3. Quantify variant effect (bean run)

+

bean-run can take 3 run options to quantify editing rate:

+
    +
  1. +
    From reporter + accessibility

    If your gRNA metadata table (tests/data/test_guide_info.csv above) included per-gRNA accessibility score,

    +
    +
    +
       bean-run sorting variant \
    +   tests/data/bean_count_${screen_id}_masked.h5ad \
    +   -o tests/test_res/var/ \
    +   --fit-negctrl \
    +   --scale-by-acc \
    +   --accessibility-col accessibility
    +
    +If your gRNA metadata table (\ ``tests/data/test_guide_info.csv`` above) included per-gRNA chromosome & position and you have bigWig file with accessibility signal,
    +
    +
    +
    bean-run sorting variant \
    +tests/data/bean_count_${screen_id}_masked.h5ad \
    +-o tests/test_res/var/ \
    +--fit-negctrl \
    +--scale-by-acc \
    +--accessibility-bw accessibility.bw
    +
    +
    +
  2. +
  3. From reporter, without accessibility

    +
    +

    This assumes the all target sites have the uniform chromatin accessibility.

    +
    +
    bean-run sorting variant \
    +tests/data/bean_count_${screen_id}_masked.h5ad \
    +-o tests/test_res/var/ \
    +--fit-negctrl
    +
    +
    +
  4. +
  5. +
    No reporter information, assume the same editing efficiency of all gRNAs.

    Use this option if your data don’t have editing outcome information.

    +
    +
    +
    bean-run sorting variant \
    +tests/data/bean_count_${screen_id}_masked.h5ad \
    +-o tests/test_res/var/ \
    +--fit-negctrl \
    +--uniform-edit
    +
    +
    +
  6. +
+
+
+ + +
+ +
+
+ +
+
+ + + + + + + \ No newline at end of file