ReporterScreen API tutorial¶
+Load the required packages. (Anndata import isn’t required to use the package).
+import numpy as np
+import pandas as pd
+import anndata as ad
+import seaborn as sns
+import matplotlib.pyplot as plt
+import bean as br
+
bean ReporterScreen
object and perturb-seq Screen
object are both anndata
compatible.
adata = ad.read_h5ad("bean_count_07+1021_LDLvar.h5ad")
+
adata
+
AnnData object with n_obs × n_vars = 3455 × 12
+ obs: 'name', 'Unnamed: 0', 'Target gene/variant', 'Target descriptor', 'Arbitrary number', 'gRNA position category', 'Target base position in gRNA', 'Target base position in reporter', 'BE', 'Group', 'sequence', 'Reporter', 'barcode', '5-nt PAM', 'offset', 'target', 'target_pos', 'Group2', 'masked_sequence', 'masked_barcode', 'edit_rate'
+ var: 'index', 'sort', 'replicate'
+ uns: 'allele_counts', 'edit_counts'
+ layers: 'X_bcmatch', 'edits'
+
cdata = br.read_h5ad("bean_count_07+1021_LDLvar.h5ad")
+
cdata
+
Genome Editing Screen comprised of n_guides x n_conditions = 3455 x 12
+ guides: 'name', 'Unnamed: 0', 'Target gene/variant', 'Target descriptor', 'Arbitrary number', 'gRNA position category', 'Target base position in gRNA', 'Target base position in reporter', 'BE', 'Group', 'sequence', 'Reporter', 'barcode', '5-nt PAM', 'offset', 'target', 'target_pos', 'Group2', 'masked_sequence', 'masked_barcode', 'edit_rate'
+ samples: 'index', 'sort', 'replicate'
+ condit_m:
+ condit_p:
+ layers: 'X_bcmatch', 'edits'
+ uns: 'allele_counts', 'edit_counts'
+
-
+
cdata.X
: guide count
+cdata.guides
: guide metadata
+cdata.samples
: sample/condition metadata
+cdata.layers["X_bcmatch"]
: barcode-matched guide counts
+cdata.layers["edits"]
: edit counts
+cdata.uns["allele_counts"]
: allele counts per guide and condition
+cdata.uns["edit_counts"]
: edit counts per guide and condition
+
guides
attribute contains the information about each guide.
cdata.guides
+
+ | name | +Unnamed: 0 | +Target gene/variant | +Target descriptor | +Arbitrary number | +gRNA position category | +Target base position in gRNA | +Target base position in reporter | +BE | +Group | +... | +Reporter | +barcode | +5-nt PAM | +offset | +target | +target_pos | +Group2 | +masked_sequence | +masked_barcode | +edit_rate | +
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | +CONTROL_1_g1 | +0 | +CONTROL | +NaN | +1 | +g1 | +4 | +10 | +ABE | +NegCtrl | +... | +CCAAGCCCTACGCGGTAGGGAACTTTGGGAGC | +GTTT | +GGGAG | +-10 | +CONTROL_1 | +9 | +NegCtrl | +CCTGCGCGGTGGGGGGCTTT | +GTTT | +0.531163 | +
1 | +CONTROL_1_g2 | +1 | +CONTROL | +NaN | +1 | +g2 | +5 | +11 | +ABE | +NegCtrl | +... | +TCCAAGCCCTACGCGGTAGGGAACTTTGGGAG | +AACA | +TGGGA | +-11 | +CONTROL_1 | +10 | +NegCtrl | +CCCTGCGCGGTGGGGGGCTT | +GGCG | +0.640765 | +
2 | +CONTROL_1_g3 | +2 | +CONTROL | +NaN | +1 | +g3 | +5 | +12 | +ABE | +NegCtrl | +... | +GTCCAAGCCCTACGCGGTAGGGAACTTTGGGA | +CGCT | +TTGGG | +-12 | +CONTROL_1 | +11 | +NegCtrl | +CCCTGCGCGGTGGGGGGCT | +CGCT | +0.417709 | +
3 | +CONTROL_1_g4 | +3 | +CONTROL | +NaN | +1 | +g4 | +7 | +13 | +ABE | +NegCtrl | +... | +CGTCCAAGCCCTACGCGGTAGGGAACTTTGGG | +TGAG | +TTTGG | +-13 | +CONTROL_1 | +12 | +NegCtrl | +GGCCCTGCGCGGTGGGGGGC | +TGGG | +0.126400 | +
4 | +CONTROL_1_g5 | +4 | +CONTROL | +NaN | +1 | +g5 | +8 | +14 | +ABE | +NegCtrl | +... | +ACGTCCAAGCCCTACGCGGTAGGGAACTTTGG | +GTAT | +CTTTG | +-14 | +CONTROL_1 | +13 | +NegCtrl | +GGGCCCTGCGCGGTGGGGGG | +GTGT | +0.201104 | +
... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +
3450 | +rs9987289_Maj_ABE_347_g1 | +3450 | +rs9987289 | +Maj | +347 | +g1 | +3 | +10 | +ABE | +Variant | +... | +TGCTTGGGCATCAATATCACGTGGAACCAGCC | +CAGT | +CCAGC | +-10 | +rs9987289_Maj_ABE_347 | +9 | +Variant | +GCGTCGGTGTCGCGTGGGG | +CGGT | +0.087379 | +
3451 | +rs9987289_Maj_ABE_347_g2 | +3451 | +rs9987289 | +Maj | +347 | +g2 | +4 | +11 | +ABE | +Variant | +... | +ATGCTTGGGCATCAATATCACGTGGAACCAGC | +TCGC | +ACCAG | +-11 | +rs9987289_Maj_ABE_347 | +10 | +Variant | +GGCGTCGGTGTCGCGTGGG | +TCGC | +0.299923 | +
3452 | +rs9987289_Maj_ABE_347_g3 | +3452 | +rs9987289 | +Maj | +347 | +g3 | +6 | +12 | +ABE | +Variant | +... | +GATGCTTGGGCATCAATATCACGTGGAACCAG | +GCAC | +AACCA | +-12 | +rs9987289_Maj_ABE_347 | +11 | +Variant | +TGGGCGTCGGTGTCGCGTGG | +GCGC | +0.224973 | +
3453 | +rs9987289_Maj_ABE_347_g4 | +3453 | +rs9987289 | +Maj | +347 | +g4 | +7 | +13 | +ABE | +Variant | +... | +AGATGCTTGGGCATCAATATCACGTGGAACCA | +TTGC | +GAACC | +-13 | +rs9987289_Maj_ABE_347 | +12 | +Variant | +TTGGGCGTCGGTGTCGCGTG | +TTGC | +0.265378 | +
3454 | +rs9987289_Maj_ABE_347_g5 | +3454 | +rs9987289 | +Maj | +347 | +g5 | +8 | +14 | +ABE | +Variant | +... | +TAGATGCTTGGGCATCAATATCACGTGGAACC | +GCGA | +GGAAC | +-14 | +rs9987289_Maj_ABE_347 | +13 | +Variant | +CTTGGGCGTCGGTGTCGCGT | +GCGG | +0.266573 | +
3455 rows × 21 columns
+samples
attribute contains the sample and condition specific information.
cdata.samples
+
+ | index | +sort | +replicate | +
---|---|---|---|
0 | +rep1_bot | +bot | +rep1 | +
1 | +rep2_bot | +bot | +rep2 | +
2 | +rep3_VPA_bot | +bot | +rep3_VPA | +
3 | +rep4_VPA_bot | +bot | +rep4_VPA | +
4 | +rep1_bulk | +bulk | +rep1 | +
5 | +rep2_bulk | +bulk | +rep2 | +
6 | +rep3_VPA_bulk | +bulk | +rep3_VPA | +
7 | +rep4_VPA_bulk | +bulk | +rep4_VPA | +
8 | +rep1_top | +top | +rep1 | +
9 | +rep2_top | +top | +rep2 | +
10 | +rep3_VPA_top | +top | +rep3_VPA | +
11 | +rep4_VPA_top | +top | +rep4_VPA | +
Allele_counts information is stored in .uns["allele_counts"]
.
cdata.uns["allele_counts"]
+
+ | guide | +allele | +rep1_bot | +rep2_bot | +rep3_VPA_bot | +rep4_VPA_bot | +rep1_bulk | +rep2_bulk | +rep3_VPA_bulk | +rep4_VPA_bulk | +rep1_top | +rep2_top | +rep3_VPA_top | +rep4_VPA_top | +
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | +12:51779544AGA_Maj_ABE_2_g1 | +0:9:+:A>G,5:14:+:A>G | +14 | +20 | +13 | +0 | +6 | +15 | +2 | +17 | +22 | +14 | +34 | +3 | +
1 | +12:51779544AGA_Maj_ABE_2_g1 | +-4:5:+:A>G,-2:7:+:A>G,5:14:+:A>G,10:19:+:A>G | +1 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +
2 | +12:51779544AGA_Maj_ABE_2_g1 | +-7:2:+:A>G,0:9:+:A>G,5:14:+:A>G | +3 | +4 | +2 | +0 | +1 | +0 | +5 | +2 | +0 | +0 | +1 | +0 | +
3 | +12:51779544AGA_Maj_ABE_2_g1 | +-9:0:+:G>A,-8:1:+:G>A,-7:2:+:A>C,-6:3:+:C>A,-4... | +1 | +0 | +0 | +1 | +0 | +2 | +1 | +0 | +0 | +0 | +1 | +0 | +
4 | +12:51779544AGA_Maj_ABE_2_g1 | +-7:2:+:A>G,10:19:+:A>G | +1 | +1 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +
... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +
438407 | +rs9987289_Maj_ABE_347_g5 | +4:17:+:A>G,6:19:+:A>G,9:22:+:A>G | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +2 | +0 | +
438408 | +rs9987289_Maj_ABE_347_g5 | +-12:1:+:A>G,6:19:+:A>G,9:22:+:A>G,11:24:+:G>A | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +1 | +0 | +
438409 | +rs9987289_Maj_ABE_347_g5 | +-12:1:+:A>G,6:19:+:A>G,9:22:+:A>G,16:29:+:A>G | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +1 | +
438410 | +rs9987289_Maj_ABE_347_g5 | +-12:1:+:A>G,0:13:+:A>G,6:19:+:A>G,9:22:+:A>G,1... | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +1 | +0 | +0 | +
438411 | +rs9987289_Maj_ABE_347_g5 | +-12:1:+:A>G,6:19:+:A>G,9:22:+:A>G,12:25:+:T>G | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +1 | +
438412 rows × 14 columns
+Base-level edit counts can be saved at .uns[“edit_counts”].
+cdata.uns["edit_counts"]
+
+ | guide | +edit | +rep1_bot | +rep2_bot | +rep3_VPA_bot | +rep4_VPA_bot | +rep1_bulk | +rep2_bulk | +rep3_VPA_bulk | +rep4_VPA_bulk | +rep1_top | +rep2_top | +rep3_VPA_top | +rep4_VPA_top | +ref_base | +alt_base | +
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | +12:51779544AGA_Maj_ABE_2_g1 | +-1:8:+:G>A | +0 | +0 | +0 | +0 | +1 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +G | +A | +
1 | +12:51779544AGA_Maj_ABE_2_g1 | +-1:8:+:G>C | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +1 | +0 | +1 | +0 | +G | +C | +
2 | +12:51779544AGA_Maj_ABE_2_g1 | +-1:8:+:G>T | +0 | +0 | +0 | +0 | +1 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +G | +T | +
3 | +12:51779544AGA_Maj_ABE_2_g1 | +-2:7:+:A>C | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +2 | +0 | +1 | +0 | +A | +C | +
4 | +12:51779544AGA_Maj_ABE_2_g1 | +-2:7:+:A>G | +19 | +34 | +40 | +4 | +59 | +25 | +66 | +7 | +68 | +48 | +149 | +2 | +A | +G | +
... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +
217563 | +rs9987289_Maj_ABE_347_g5 | +8:21:+:C>A | +0 | +7 | +0 | +0 | +0 | +1 | +1 | +0 | +1 | +0 | +0 | +0 | +C | +A | +
217564 | +rs9987289_Maj_ABE_347_g5 | +8:21:+:C>G | +0 | +0 | +2 | +0 | +0 | +8 | +0 | +0 | +0 | +1 | +8 | +0 | +C | +G | +
217565 | +rs9987289_Maj_ABE_347_g5 | +8:21:+:C>T | +0 | +0 | +7 | +0 | +0 | +0 | +7 | +0 | +0 | +0 | +0 | +0 | +C | +T | +
217566 | +rs9987289_Maj_ABE_347_g5 | +9:22:+:A>G | +9 | +21 | +30 | +51 | +37 | +46 | +12 | +20 | +58 | +23 | +59 | +47 | +A | +G | +
217567 | +rs9987289_Maj_ABE_347_g5 | +9:22:+:A>T | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +7 | +0 | +0 | +0 | +0 | +A | +T | +
217568 rows × 16 columns
+Subsetting & addition¶
+Works as anndata, supports allele & edit count operations.
+Subsetting & selection¶
+cdata_subset = cdata[:10,cdata.samples.sort == "bulk"]
+
['rep1_bulk', 'rep2_bulk', 'rep3_VPA_bulk', 'rep4_VPA_bulk']
+
cdata_subset.uns["allele_counts"]
+
+ | guide | +allele | +rep1_bulk | +rep2_bulk | +rep3_VPA_bulk | +rep4_VPA_bulk | +
---|---|---|---|---|---|---|
14979 | +CONTROL_10_g1 | +-4:5:+:A>G,0:9:+:A>G | +8 | +1 | +3 | +0 | +
14980 | +CONTROL_10_g1 | +-7:2:+:C>T | +0 | +0 | +0 | +10 | +
14981 | +CONTROL_10_g1 | +-4:5:+:A>G | +29 | +2 | +29 | +25 | +
14982 | +CONTROL_10_g1 | +1:10:+:A>G | +0 | +6 | +4 | +1 | +
14983 | +CONTROL_10_g1 | +-4:5:+:A>G,1:10:+:A>G | +1 | +11 | +5 | +12 | +
... | +... | +... | +... | +... | +... | +... | +
22837 | +CONTROL_1_g5 | +-13:0:+:A>-,-12:1:+:C>T,-9:4:+:C>G,-8:5:+:C>T,... | +0 | +0 | +0 | +0 | +
22838 | +CONTROL_1_g5 | +-6:7:+:A>C,7:20:+:A>G | +0 | +0 | +0 | +0 | +
22839 | +CONTROL_1_g5 | +-13:0:+:A>G,-10:3:+:T>G,0:13:+:A>G,7:20:+:A>G | +0 | +0 | +0 | +0 | +
22840 | +CONTROL_1_g5 | +0:13:+:A>T | +0 | +0 | +0 | +0 | +
22841 | +CONTROL_1_g5 | +0:13:+:A>G,18:31:+:G>A | +0 | +0 | +0 | +0 | +
1080 rows × 6 columns
+LFC calculation & Addition¶
+cdata1 = br.read_h5ad("/data/pinello/PROJECTS/2021_08_ANBE/data/072121_ABE_topbot/bean_counts/LDLvar/032422_crispresso/bean_count_072121_ABE_topbot_LDLvar.h5ad")
+cdata2 = br.read_h5ad("/data/pinello/PROJECTS/2021_08_ANBE/data/102121_ABE_topbot/bean_counts/LDLvar/032422_crispresso/bean_count_102121_ABE_topbot_LDLvar.h5ad")
+
cdata1.samples["sort"] = cdata1.samples["index"].map(lambda s: s.rsplit("_", 1)[-1])
+cdata1.samples["replicate"] = cdata1.samples["index"].map(lambda s: s.rsplit("_", 1)[0])
+cdata2.samples["sort"] = cdata2.samples["index"].map(lambda s: s.rsplit("_", 1)[-1])
+cdata2.samples["replicate"] = cdata2.samples["index"].map(lambda s: s.rsplit("_", 1)[0])
+
cdata1.log_norm()
+lfc1 = cdata1.log_fold_change_reps("bot", "top")
+cdata2.log_norm()
+lfc2 = cdata2.log_fold_change_reps("bot", "top")
+lfcs = lfc1.join(lfc2, lsuffix = "_1", rsuffix = "_2")
+sns.pairplot(lfcs)
+
LFC can be aggregated for biological replicates.
+cdata1.log_fold_change_aggregate("bot", "top", aggregate_condit = "replicate")
+
cdata1.guides
+
+ | name | +Unnamed: 0 | +Target gene/variant | +Target descriptor | +Arbitrary number | +gRNA position category | +Target base position in gRNA | +Target base position in reporter | +BE | +Group | +... | +Reporter | +barcode | +5-nt PAM | +offset | +target | +target_pos | +Group2 | +masked_sequence | +masked_barcode | +bot_top.lfc.median | +
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | +CONTROL_1_g1 | +0 | +CONTROL | +NaN | +1 | +g1 | +4 | +10 | +ABE | +NegCtrl | +... | +CCAAGCCCTACGCGGTAGGGAACTTTGGGAGC | +GTTT | +GGGAG | +-10 | +CONTROL_1 | +9 | +NegCtrl | +CCTGCGCGGTGGGGGGCTTT | +GTTT | +-0.158787 | +
1 | +CONTROL_1_g2 | +1 | +CONTROL | +NaN | +1 | +g2 | +5 | +11 | +ABE | +NegCtrl | +... | +TCCAAGCCCTACGCGGTAGGGAACTTTGGGAG | +AACA | +TGGGA | +-11 | +CONTROL_1 | +10 | +NegCtrl | +CCCTGCGCGGTGGGGGGCTT | +GGCG | +-0.212254 | +
2 | +CONTROL_1_g3 | +2 | +CONTROL | +NaN | +1 | +g3 | +5 | +12 | +ABE | +NegCtrl | +... | +GTCCAAGCCCTACGCGGTAGGGAACTTTGGGA | +CGCT | +TTGGG | +-12 | +CONTROL_1 | +11 | +NegCtrl | +CCCTGCGCGGTGGGGGGCT | +CGCT | +0.186679 | +
3 | +CONTROL_1_g4 | +3 | +CONTROL | +NaN | +1 | +g4 | +7 | +13 | +ABE | +NegCtrl | +... | +CGTCCAAGCCCTACGCGGTAGGGAACTTTGGG | +TGAG | +TTTGG | +-13 | +CONTROL_1 | +12 | +NegCtrl | +GGCCCTGCGCGGTGGGGGGC | +TGGG | +-0.022441 | +
4 | +CONTROL_1_g5 | +4 | +CONTROL | +NaN | +1 | +g5 | +8 | +14 | +ABE | +NegCtrl | +... | +ACGTCCAAGCCCTACGCGGTAGGGAACTTTGG | +GTAT | +CTTTG | +-14 | +CONTROL_1 | +13 | +NegCtrl | +GGGCCCTGCGCGGTGGGGGG | +GTGT | +0.457033 | +
... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +
3450 | +rs9987289_Maj_ABE_347_g1 | +3450 | +rs9987289 | +Maj | +347 | +g1 | +3 | +10 | +ABE | +Variant | +... | +TGCTTGGGCATCAATATCACGTGGAACCAGCC | +CAGT | +CCAGC | +-10 | +rs9987289_Maj_ABE_347 | +9 | +Variant | +GCGTCGGTGTCGCGTGGGG | +CGGT | +-0.418312 | +
3451 | +rs9987289_Maj_ABE_347_g2 | +3451 | +rs9987289 | +Maj | +347 | +g2 | +4 | +11 | +ABE | +Variant | +... | +ATGCTTGGGCATCAATATCACGTGGAACCAGC | +TCGC | +ACCAG | +-11 | +rs9987289_Maj_ABE_347 | +10 | +Variant | +GGCGTCGGTGTCGCGTGGG | +TCGC | +-0.084936 | +
3452 | +rs9987289_Maj_ABE_347_g3 | +3452 | +rs9987289 | +Maj | +347 | +g3 | +6 | +12 | +ABE | +Variant | +... | +GATGCTTGGGCATCAATATCACGTGGAACCAG | +GCAC | +AACCA | +-12 | +rs9987289_Maj_ABE_347 | +11 | +Variant | +TGGGCGTCGGTGTCGCGTGG | +GCGC | +-0.339419 | +
3453 | +rs9987289_Maj_ABE_347_g4 | +3453 | +rs9987289 | +Maj | +347 | +g4 | +7 | +13 | +ABE | +Variant | +... | +AGATGCTTGGGCATCAATATCACGTGGAACCA | +TTGC | +GAACC | +-13 | +rs9987289_Maj_ABE_347 | +12 | +Variant | +TTGGGCGTCGGTGTCGCGTG | +TTGC | +-0.517138 | +
3454 | +rs9987289_Maj_ABE_347_g5 | +3454 | +rs9987289 | +Maj | +347 | +g5 | +8 | +14 | +ABE | +Variant | +... | +TAGATGCTTGGGCATCAATATCACGTGGAACC | +GCGA | +GGAAC | +-14 | +rs9987289_Maj_ABE_347 | +13 | +Variant | +CTTGGGCGTCGGTGTCGCGT | +GCGG | +0.002245 | +
3455 rows × 21 columns
+Technical replicates show decent LFC correlation.
+cdata = cdata1 + cdata2
+
cdata
+
Genome Editing Screen comprised of n_guides x n_conditions = 3455 x 12
+ guides: 'name', 'Unnamed: 0', 'Target gene/variant', 'Target descriptor', 'Arbitrary number', 'gRNA position category', 'Target base position in gRNA', 'Target base position in reporter', 'BE', 'Group', 'sequence', 'Reporter', 'barcode', '5-nt PAM', 'offset', 'target', 'target_pos', 'Group2', 'masked_sequence', 'masked_barcode', 'bot_top.lfc.median'
+ samples: 'index', 'sort', 'replicate'
+ condit_m:
+ condit_p:
+ layers: 'edits', 'X_bcmatch'
+ uns: 'allele_counts'
+
You can concatenate different samples with shared guides.
+br.concat((cdata1, cdata2))
+
Genome Editing Screen comprised of n_guides x n_conditions = 3455 x 24
+ guides: 'name', 'Unnamed: 0', 'Target gene/variant', 'Target descriptor', 'Arbitrary number', 'gRNA position category', 'Target base position in gRNA', 'Target base position in reporter', 'BE', 'Group', 'sequence', 'Reporter', 'barcode', '5-nt PAM', 'offset', 'target', 'target_pos', 'Group2', 'masked_sequence', 'masked_barcode', 'bot_top.lfc.median'
+ samples: 'index', 'sort', 'replicate'
+ condit_m:
+ condit_p:
+ layers: 'X', 'X_bcmatch', 'edits', 'lognorm_counts', 'lognorm_edits'
+ uns: 'allele_counts'
+
Getting edit rates from allele counts¶
+cdata.get_edit_rate(normalize_by_editable_base = False,
+ edited_base = "A",
+ editable_base_start = 3,
+ editable_base_end = 8,
+ bcmatch_thres = 10,
+ prior_weight = 1)
+
cdata.uns["edit_counts"] = cdata.get_edit_from_allele()
+
cdata.get_edit_mat_from_uns("A", "G", match_target_position = True)
+cdata.get_edit_rate(edited_base = "A", bcmatch_thres = 10)
+plt.hist(cdata.guides.edit_rate, bins=30)
+plt.show()
+
Calculating LFC¶
+cdata.log_norm()
+cdata.log_fold_change_aggregate("bot", "top", aggregate_condit = "replicate")
+
cdata.guides
+
+ | name | +Unnamed: 0 | +Target gene/variant | +Target descriptor | +Arbitrary number | +gRNA position category | +Target base position in gRNA | +Target base position in reporter | +BE | +Group | +... | +barcode | +5-nt PAM | +offset | +target | +target_pos | +Group2 | +masked_sequence | +masked_barcode | +bot_top.lfc.median | +edit_rate | +
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | +CONTROL_1_g1 | +0 | +CONTROL | +NaN | +1 | +g1 | +4 | +10 | +ABE | +NegCtrl | +... | +GTTT | +GGGAG | +-10 | +CONTROL_1 | +9 | +NegCtrl | +CCTGCGCGGTGGGGGGCTTT | +GTTT | +-0.135550 | +0.531163 | +
1 | +CONTROL_1_g2 | +1 | +CONTROL | +NaN | +1 | +g2 | +5 | +11 | +ABE | +NegCtrl | +... | +AACA | +TGGGA | +-11 | +CONTROL_1 | +10 | +NegCtrl | +CCCTGCGCGGTGGGGGGCTT | +GGCG | +-0.059391 | +0.640765 | +
2 | +CONTROL_1_g3 | +2 | +CONTROL | +NaN | +1 | +g3 | +5 | +12 | +ABE | +NegCtrl | +... | +CGCT | +TTGGG | +-12 | +CONTROL_1 | +11 | +NegCtrl | +CCCTGCGCGGTGGGGGGCT | +CGCT | +0.141290 | +0.417709 | +
3 | +CONTROL_1_g4 | +3 | +CONTROL | +NaN | +1 | +g4 | +7 | +13 | +ABE | +NegCtrl | +... | +TGAG | +TTTGG | +-13 | +CONTROL_1 | +12 | +NegCtrl | +GGCCCTGCGCGGTGGGGGGC | +TGGG | +-0.072358 | +0.126400 | +
4 | +CONTROL_1_g5 | +4 | +CONTROL | +NaN | +1 | +g5 | +8 | +14 | +ABE | +NegCtrl | +... | +GTAT | +CTTTG | +-14 | +CONTROL_1 | +13 | +NegCtrl | +GGGCCCTGCGCGGTGGGGGG | +GTGT | +0.269650 | +0.201104 | +
... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +
3450 | +rs9987289_Maj_ABE_347_g1 | +3450 | +rs9987289 | +Maj | +347 | +g1 | +3 | +10 | +ABE | +Variant | +... | +CAGT | +CCAGC | +-10 | +rs9987289_Maj_ABE_347 | +9 | +Variant | +GCGTCGGTGTCGCGTGGGG | +CGGT | +-0.230264 | +0.087379 | +
3451 | +rs9987289_Maj_ABE_347_g2 | +3451 | +rs9987289 | +Maj | +347 | +g2 | +4 | +11 | +ABE | +Variant | +... | +TCGC | +ACCAG | +-11 | +rs9987289_Maj_ABE_347 | +10 | +Variant | +GGCGTCGGTGTCGCGTGGG | +TCGC | +-0.182151 | +0.299923 | +
3452 | +rs9987289_Maj_ABE_347_g3 | +3452 | +rs9987289 | +Maj | +347 | +g3 | +6 | +12 | +ABE | +Variant | +... | +GCAC | +AACCA | +-12 | +rs9987289_Maj_ABE_347 | +11 | +Variant | +TGGGCGTCGGTGTCGCGTGG | +GCGC | +-0.165778 | +0.224973 | +
3453 | +rs9987289_Maj_ABE_347_g4 | +3453 | +rs9987289 | +Maj | +347 | +g4 | +7 | +13 | +ABE | +Variant | +... | +TTGC | +GAACC | +-13 | +rs9987289_Maj_ABE_347 | +12 | +Variant | +TTGGGCGTCGGTGTCGCGTG | +TTGC | +-0.340590 | +0.265378 | +
3454 | +rs9987289_Maj_ABE_347_g5 | +3454 | +rs9987289 | +Maj | +347 | +g5 | +8 | +14 | +ABE | +Variant | +... | +GCGA | +GGAAC | +-14 | +rs9987289_Maj_ABE_347 | +13 | +Variant | +CTTGGGCGTCGGTGTCGCGT | +GCGG | +0.034365 | +0.266573 | +
3455 rows × 22 columns
+Allele translation¶
+cdata_tiling = br.read_h5ad("../../072121_ABE_topbot/bean_counts/LDLRCDS/032422_crispresso/bean_count_072121_ABE_topbot_LDLRCDS.h5ad")
+
cdata_tiling.uns["allele_counts"].allele
+
0 11224415:14:+:A>G
+1 11224401:0:+:A>G,11224415:14:+:A>G
+2 11224410:9:+:A>G,11224415:14:+:A>G
+3 11224401:0:+:A>G,11224402:1:+:A>G,11224410:9:+...
+4 11224401:0:+:A>G
+ ...
+438001 11203000:4:+:A>G,11203002:6:+:A>G,11203006:10:...
+438002 11224074:0:+:A>G,11224086:12:+:A>G,11224092:18...
+438003 0:0:+:A>G,3:3:+:A>G,11:11:+:A>G,13:13:+:A>G,17...
+438004 11217409:23:+:G>-,11217417:31:+:->C
+438005 11226735:30:-:A>G,11226742:23:-:A>G,11226747:1...
+Name: allele, Length: 438006, dtype: object
+
Writing¶
+cdata.to_Excel("tmp.xlsx")
+
Writing to: tmp.xlsx
+
+ Sheet 1: X
+ Sheet 2: edits
+ Sheet 3: X_bcmatch
+ Sheet 4: lognorm_counts
+ Sheet 5: lognorm_edits
+ Sheet 6: guides
+ Sheet 7: samples
+ Sheet 8: screen.uns.allele_counts
+ Sheet 9: screen.uns.edit_counts
+
cdata.to_mageck_input("mageck_input.txt", target_column='target')
+
%%bash
+head mageck_input.txt
+
sgRNA gene 0 1 2 3 4 5 6 7 8 9 10 11
+CONTROL_1_g1 CONTROL_1 171 451 251 422 573 389 456 420 835 435 794 439
+CONTROL_1_g2 CONTROL_1 145 278 257 206 364 273 389 254 527 498 768 195
+CONTROL_1_g3 CONTROL_1 333 835 488 632 898 899 780 713 1189 626 1146 603
+CONTROL_1_g4 CONTROL_1 246 663 387 448 823 595 705 600 921 595 1143 506
+CONTROL_1_g5 CONTROL_1 243 647 434 529 776 451 700 676 1062 611 928 379
+CONTROL_10_g1 CONTROL_10 138 329 229 213 422 292 432 352 409 243 390 274
+CONTROL_10_g2 CONTROL_10 187 468 402 479 643 369 428 469 796 422 787 404
+CONTROL_10_g3 CONTROL_10 57 126 83 131 281 114 184 115 300 106 299 106
+CONTROL_10_g4 CONTROL_10 66 112 120 136 182 128 169 181 256 144 258 179
+