Skip to content

Commit

Permalink
Unify control condition arguments, add sanity check for control condi…
Browse files Browse the repository at this point in the history
…tion samples
  • Loading branch information
jykr committed Mar 28, 2024
1 parent 97d464e commit 25c0a1b
Show file tree
Hide file tree
Showing 9 changed files with 51 additions and 35 deletions.
11 changes: 6 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -201,9 +201,9 @@ Above command produces `prefix_editing_preference.[html,ipynb]` as editing prefe
### Parameters
* `-o`, `--output-prefix` (default: `None`): Output prefix of editing pattern report (prefix.html, prefix.ipynb). If not provided, base name of `bdata_path` is used.
* `--replicate-col` (default: `"rep"`): Column name in `bdata.samples` that describes replicate ID.
* `--condition-col` (default: `"bin"`): Column name in `bdata.samples` that describes experimental condition. (sorting bin, time, etc.)
* `--condition-col` (default: `"condition"`): Column name in `bdata.samples` that describes experimental condition. (sorting bin, time, etc.)
* `--pam-col` (default: `None`): Column name describing PAM of each gRNA in `bdata.guides`.
* `--control-condition` (default: `"bulk"`): Control condition where editing preference would be profiled at. Pre-filters data where `bdata.samples[condition_col] == control_condition`.
* `--control-condition` (default: `"bulk"`): Control condition where editing preference would be profiled at. Pre-filters data where `bdata.samples[condition_col] == control_condition`. DO NOT use plasmid library as control here where we do not expect editing.
* `-w`, `--window-length` (default: `6`): Window length of editing window of maximal editing efficiency to be identified. This window is used to quantify context specificity within the window.


Expand All @@ -215,7 +215,7 @@ bean-qc \
my_sorting_screen.h5ad `# Input ReporterScreen .h5ad file path` \
-o my_sorting_screen_masked.h5ad `# Output ReporterScreen .h5ad file path` \
-r qc_report_my_sorting_screen `# Prefix for QC report` \

--ctrl-cond presort `# "condition" column in the control sample before selection. Mean gRNA editing rates in these samples are reported. ` \
# Inspect the output qc_report_my_sorting_screen.html to tweak QC threshold

bean-qc \
Expand Down Expand Up @@ -281,9 +281,9 @@ Note that these arguements will change the way the QC metrics are calculated for
Label of column in `bdata.samples` that describes experimental condition. (sorting bin, time,
etc.)
###### Editing rate calculation
* `--ctrl-cond CTRL_COND`
* `--control-condition CTRL_COND`
Values in of column in `ReporterScreen.samples[condition_label]` for guide-level editing rate
to be calculated
to be calculated. Default is `None`, which considers all samples.
* `--rel-pos-is-reporter`
Specifies whether `edit_start_pos` and `edit_end_pos` are relative to reporter position. If
`False`, those are relative to spacer position.
Expand Down Expand Up @@ -453,6 +453,7 @@ Above command produces
* `--guide-activity-col`: Column in `bdata.guides` DataFrame showing the editing rate estimated via external tools.
* Sample annotations (`bdata.samples` column keys)
* `--condition-column` (default: `condition`): Column key in `bdata.samples` that describes experimental condition.
* `--control-condition` (default: `bulk`): Value in `bdata.samples[condition_col]` that indicates control experimental condition.
* `-uq`, `--sorting-bin-upper-quantile-column` (default: `upper_quantile`): Column name with upper quantile values of each sorting bin in bdata.samples
* `-lq`, `--sorting-bin-lower-quantile-column` (default: `lower_quantile`): Column name with lower quantile values of each sorting bin in bdata.samples
Expand Down
6 changes: 3 additions & 3 deletions bean/framework/Edit.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def from_str(cls, edit_str): # pos:strand:start>end
def match_str(cls, edit_str):
if isinstance(edit_str, Edit):
return True
pattern = r"(((chr)?\d+|nan):)?-?\d+:-?\d+:[+-]:[A-Z*-]>[A-Z*-]"
pattern = r"(((chr)?\w+|nan):)?-?\d+:-?\d+:[+-]:[A-Z*-]>[A-Z*-]"
pattern2 = r"[\w*]!-?\d+:-?\d+:[+-]:[A-Z*-]>[A-Z*-]"
return re.fullmatch(pattern, edit_str) or re.fullmatch(pattern2, edit_str)

Expand Down Expand Up @@ -175,11 +175,11 @@ def get_range(self):
min(edit.pos for edit in self.edits),
max(edit.pos for edit in self.edits),
)

def set_uid(self, uid):
self.edits = {edit.set_uid(uid) for edit in self.edits}
return self

def get_uid(self):
uid = None
if (
Expand Down
13 changes: 9 additions & 4 deletions bean/framework/ReporterScreen.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ def get_guide_edit_rate(
return_result=False,
count_layer="X_bcmatch",
edit_layer="edits",
unsorted_condition_label="bulk",
unsorted_condition_label=None,
):
"""
prior_weight:
Expand All @@ -497,9 +497,14 @@ def get_guide_edit_rate(
num_targetable_sites = self.guides.sequence.map(
lambda s: s[editable_base_start:editable_base_end].count(edited_base)
)
bulk_idx = np.where(
self.samples.index.astype(str).map(lambda s: unsorted_condition_label in s)
)[0]
if unsorted_condition_label is not None:
bulk_idx = np.where(
self.samples.index.astype(str).map(
lambda s: unsorted_condition_label in s
)
)[0]
else:
bulk_idx = np.arange(0, len(self.samples)).astype(int)

if prior_weight is None:
prior_weight = 1
Expand Down
20 changes: 11 additions & 9 deletions bean/model/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def parse_args():
help="Column key in `bdata.samples` that describes time elapsed.",
)
parser.add_argument(
"--control-condition-label",
"--control-condition",
default="bulk",
type=str,
help="Value in `bdata.samples[condition_col]` that indicates control experimental condition.",
Expand Down Expand Up @@ -334,19 +334,21 @@ def check_args(args, bdata):
raise ValueError(
f"Condition column set by `--replicate-col` {args.replicate_col} not in ReporterScreen.samples.columns:{bdata.samples.columns}. Check your input."
)
if (
args.control_guide_tag is not None
and not bdata.guides.index.map(lambda s: args.control_guide_tag in s).any()
):
raise ValueError(
f"Negative control guide label {args.control_guide_tag} provided by `--control-guide-tag` doesn't appear in any of the guide names. Check your input."
)
if args.control_guide_tag is not None:
if args.library_design == "variant":
raise ValueError(
"`--control-guide-tag` is not used for the variant mode. Make sure you provide the separate `target` column for negative control guide that targets different negative control variant."
)
elif not bdata.guides.index.map(lambda s: args.control_guide_tag in s).any():
raise ValueError(
f"Negative control guide label {args.control_guide_tag} provided by `--control-guide-tag` doesn't appear in any of the guide names. Check your input."
)
if args.alpha_if_overdispersion_fitting_fails is not None:
try:
b0, b1 = args.alpha_if_overdispersion_fitting_fails.split(",")
args.popt = (float(b0), float(b1))
except TypeError as e:
raise e(
raise ValueError(
f"Input --alpha-if-overdispersion-fitting-fails {args.alpha_if_overdispersion_fitting_fails} is malformatted! Provide [float].[float] format."
)
else:
Expand Down
2 changes: 1 addition & 1 deletion bean/qc/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def parse_args():
default="top,bot",
)
input_parser.add_argument(
"--ctrl-cond",
"--control-condition",
help="Values in of column in `ReporterScreen.samples[condition_label]` for guide-level editing rate to be calculated",
type=str,
default="bulk",
Expand Down
2 changes: 1 addition & 1 deletion bin/bean-qc
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def main():
condition_label=args.condition_label,
comp_cond1=args.lfc_cond1,
comp_cond2=args.lfc_cond2,
ctrl_cond=args.ctrl_cond,
ctrl_cond=args.control_condition,
exp_id=args.out_report_prefix,
recalculate_edits=~args.dont_recalculate_edits,
base_edit_data=args.base_edit_data,
Expand Down
22 changes: 13 additions & 9 deletions bin/bean-run
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def main(args, bdata):
use_const_pi=args.const_pi,
condition_column=args.condition_col,
time_column=args.time_col,
control_condition=args.control_condition_label,
control_condition=args.control_condition,
control_can_be_selected=args.include_control_condition_for_inference,
allele_df_key=args.allele_df_key,
control_guide_tag=args.control_guide_tag,
Expand All @@ -101,7 +101,9 @@ def main(args, bdata):
if "edit_rate" not in ndata.screen.guides.columns:
ndata.screen.get_edit_from_allele()
ndata.screen.get_edit_mat_from_uns(rel_pos_is_reporter=True)
ndata.screen.get_guide_edit_rate(unsorted_condition_label=args.control_condition_label)
ndata.screen.get_guide_edit_rate(
unsorted_condition_label=args.control_condition
)
target_info_df = _get_guide_target_info(
ndata.screen, args, cols_include=[args.negctrl_col]
)
Expand Down Expand Up @@ -182,16 +184,18 @@ def main(args, bdata):
prefix=f"{prefix}/",
suffix=args.result_suffix,
guide_index=guide_index,
guide_acc=ndata.guide_accessibility.cpu().numpy()
if hasattr(ndata, "guide_accessibility")
and ndata.guide_accessibility is not None
else None,
guide_acc=(
ndata.guide_accessibility.cpu().numpy()
if hasattr(ndata, "guide_accessibility")
and ndata.guide_accessibility is not None
else None
),
adjust_confidence_by_negative_control=args.adjust_confidence_by_negative_control,
adjust_confidence_negatives=adj_negctrl_idx,
sd_is_fitted=(args.selection == "sorting"),
sample_covariates=ndata.sample_covariates
if hasattr(ndata, "sample_covariates")
else None,
sample_covariates=(
ndata.sample_covariates if hasattr(ndata, "sample_covariates") else None
),
)
info("Done!")

Expand Down
8 changes: 6 additions & 2 deletions notebooks/profile_editing_preference.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
"source": [
"bdata_path = \"../../../bean_manuscript/workflow/results/mapped/LDLRCDS/bean_count_LDLRCDS_combined.h5ad\"\n",
"replicate_col = \"rep\"\n",
"condition_col = \"bin\"\n",
"condition_col = \"condition\"\n",
"control_condition = \"bulk\"\n",
"output_prefix = \"editing pattern\"\n",
"max_editing_window_length = 6\n",
Expand Down Expand Up @@ -97,7 +97,11 @@
}
],
"source": [
"cdata_bulk = cdata[:,cdata.samples[condition_col] == control_condition]"
"cdata_bulk = cdata[:,cdata.samples[condition_col] == control_condition]\n",
"if len(cdata_bulk) == 0:\n",
" raise ValueError(\n",
" f\"No samples with bdata.samples['{condition_col}'] == {control_condition}\"\n",
" )"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

setup(
name="crispr-bean",
version="0.3.1",
version="1.0.0",
python_requires=">=3.8.0",
author="Jayoung Ryu",
author_email="[email protected]",
Expand Down

0 comments on commit 25c0a1b

Please sign in to comment.