From f52937ccd9942dd021da47897231a758522da53f Mon Sep 17 00:00:00 2001 From: jykr Date: Sat, 30 Mar 2024 14:29:16 -0400 Subject: [PATCH] Fix QC notebook --- notebooks/sample_quality_report.ipynb | 42 +++++++++++++++++---------- tests/test_qc.py | 7 ++--- 2 files changed, 29 insertions(+), 20 deletions(-) diff --git a/notebooks/sample_quality_report.ipynb b/notebooks/sample_quality_report.ipynb index 2390ed7..23f49ec 100755 --- a/notebooks/sample_quality_report.ipynb +++ b/notebooks/sample_quality_report.ipynb @@ -60,7 +60,7 @@ "recalculate_edits = True\n", "tiling = None\n", "base_edit_data = True\n", - "remove_bad_replciates = False" + "remove_bad_replicates = False" ] }, { @@ -128,7 +128,7 @@ "metadata": {}, "outputs": [], "source": [ - "bdata.X" + "bdata.guides" ] }, { @@ -397,13 +397,31 @@ "source": [ "bdata.samples[\"mask\"] = 1\n", "bdata.samples.loc[\n", - " bdata.samples.median_corr_X.isnull() | (bdata.samples.median_corr_X < count_correlation_thres),\n", + " (\"PosCtrl\" in bdata.guides.target_group) & bdata.samples.median_corr_X.isnull() | (bdata.samples.median_corr_X < count_correlation_thres),\n", " \"mask\",\n", "] = 0\n", "if \"mean_editing_rate\" in bdata.samples.columns.tolist():\n", " bdata.samples.loc[bdata.samples.mean_editing_rate < edit_rate_thres, \"mask\"] = 0\n", - "if f\"median_lfc_corr.{comp_cond1}_{comp_cond2}\" in bdata.samples.columns.tolist():\n", - " bdata.samples.loc[bdata.samples[f\"median_lfc_corr.{comp_cond1}_{comp_cond2}\"] < lfc_thres, \"mask\"] = 0" + "\n", + "bdata.samples.loc[\n", + " bdata.samples[f\"median_lfc_corr.{comp_cond1}_{comp_cond2}\"] < lfc_thres,\n", + " \"mask\",\n", + "] = 0\n", + "if posctrl_val in bdata.guides[posctrl_col].values:\n", + " print(\"filter with posctrl LFC\")\n", + " bdata.samples.loc[\n", + " bdata.samples[f\"median_lfc_corr.{comp_cond1}_{comp_cond2}\"].isnull(),\n", + " \"mask\",\n", + " ] = 0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bdata.samples.style.background_gradient(cmap=\"coolwarm_r\")" ] }, { @@ -413,7 +431,7 @@ "outputs": [], "source": [ "# leave replicate with more than 1 sorting bin data\n", - "if remove_bad_replciates:\n", + "if remove_bad_replicates:\n", " rep_n_samples = bdata.samples.groupby(replicate_label)[\"mask\"].sum()\n", " print(rep_n_samples)\n", " rep_has_too_small_sample = rep_n_samples.loc[rep_n_samples < 2].index.tolist()\n", @@ -431,20 +449,12 @@ " samples_include = ~bdata.samples[\"_rc\"].isin(rep_has_too_small_sample)\n", " bdata.samples.pop(\"_rc\")\n", " bdata_filtered = bdata[:, samples_include]\n", + " if isinstance(replicate_label, str) and len(bdata_filtered.samples[replicate_label].unique()) <= 1 or isinstance(replicate_label, list) and len(bdata_filtered.samples[replicate_label].drop_duplicates() <= 1): \n", + " raise ValueError(\"Too small number of replicate left after QC. Check the input data or adjust the QC metric thresholds.\")\n", "else:\n", " bdata_filtered = bdata" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "if isinstance(replicate_label, str) and len(bdata_filtered.samples[replicate_label].unique()) <= 1 or isinstance(replicate_label, list) and len(bdata_filtered.samples[replicate_label].drop_duplicates() <= 1): \n", - " raise ValueError(\"Too small number of replicate left after QC. Check the input data or adjust the QC metric thresholds.\")" - ] - }, { "cell_type": "code", "execution_count": null, diff --git a/tests/test_qc.py b/tests/test_qc.py index 3910432..241ad01 100755 --- a/tests/test_qc.py +++ b/tests/test_qc.py @@ -17,7 +17,7 @@ def test_qc(): @pytest.mark.order(8) def test_qc_tiling(): - cmd = "bean qc tests/data/tiling_mini_screen.h5ad -o tests/data/tiling_mini_screen_masked.h5ad -r tests/test_res/qc_report_tiling_mini_screen --count-correlation-thres 0.6 --posctrl-col ''" + cmd = "bean qc tests/data/tiling_mini_screen.h5ad -o tests/data/tiling_mini_screen_masked.h5ad -r tests/test_res/qc_report_tiling_mini_screen --count-correlation-thres 0.6 " try: subprocess.check_output( cmd, @@ -56,12 +56,11 @@ def test_dummy_insertion_varscreen(): @pytest.mark.order(10) def test_dummy_insertion_tilingscreen(): - cmd = "bean qc tests/data/tiling_mini_screen_missing.h5ad -o tests/data/tiling_mini_screen_missing_masked.h5ad -r tests/test_res/qc_report_tiling_mini_screen_missing --count-correlation-thres 0.6 --posctrl-col -b''" + cmd = "bean qc tests/data/tiling_mini_screen_missing.h5ad -o tests/data/tiling_mini_screen_missing_masked.h5ad -r tests/test_res/qc_report_tiling_mini_screen_missing --count-correlation-thres 0.6 -b" try: subprocess.check_output( cmd, shell=True, universal_newlines=True, stderr=subprocess.STDOUT ) raise ValueError("Filtering should fail with too small number of replicates.") except subprocess.CalledProcessError as exc: - if "Too small number of replicate left after QC" not in exc.output: - raise exc + raise exc