Merge pull request #44 from bcgsc/feat/DEVSU-2494-add-custom-text-to-analysis-summary

elewis2 · web-flow · commit c0fd9c8c8bee · 2025-01-30T13:02:22.000-08:00
Feat/devsu 2494 add custom text to analysis summary
diff --git a/pori_python/ipr/main.py b/pori_python/ipr/main.py
@@ -38,7 +38,7 @@
     multi_variant_filtering,
     select_expression_plots,
 )
-from .summary import auto_analyst_comments
+from .summary import auto_analyst_comments, get_ipr_analyst_comments
 from .therapeutic_options import create_therapeutic_options
 from .util import LOG_LEVELS, logger, trim_empty_values
 
@@ -195,6 +195,7 @@ def clean_unsupported_content(upload_content: Dict, ipr_spec: Dict = {}) -> Dict
         "copyVariants",
         "structuralVariants",
         "probeResults",
+        "signatureVariants",
         "msi",
     ]
     for variant_list_section in VARIANT_LIST_KEYS:
@@ -247,6 +248,10 @@ def ipr_report(
     custom_kb_match_filter=None,
     async_upload: bool = False,
     mins_to_wait: int = 5,
+    include_ipr_variant_text: bool = True,
+    include_nonspecific_disease: bool = False,
+    include_nonspecific_project: bool = False,
+    include_nonspecific_template: bool = False,
     multi_variant_filter: bool = True,
 ) -> Dict:
     """Run the matching and create the report JSON for upload to IPR.
@@ -271,6 +276,10 @@ def ipr_report(
         custom_kb_match_filter: function(List[kbMatch]) -> List[kbMatch]
         async_upload: use report_async endpoint to upload reports
         mins_to_wait: if using report_async, number of minutes to wait for success before exception raised
+        include_ipr_variant_text: if True, include output from the ipr variant-texts endpoint in analysis comments
+        include_nonspecific_disease: if include_ipr_variant_text is True, if no disease match is found use disease-nonspecific variant comment
+        include_nonspecific_project: if include_ipr_variant_text is True, if no project match is found use project-nonspecific variant comment
+        include_nonspecific_template: if include_ipr_variant_text is True, if no template match is found use template-nonspecific variant comment
         multi_variant_filter: filters out matches that doesn't match to all required variants on multi-variant statements
 
     Returns:
@@ -469,14 +478,27 @@ def ipr_report(
 
     # ANALYST COMMENTS
     logger.info("generating analyst comments")
+
+    comments_list = []
     if generate_comments:
-        comments = {
-            "comments": auto_analyst_comments(
-                graphkb_conn, gkb_matches, disease_name=kb_disease_match, variants=all_variants
-            )
-        }
-    else:
-        comments = {"comments": ""}
+        graphkb_comments = auto_analyst_comments(
+            graphkb_conn, gkb_matches, disease_name=kb_disease_match, variants=all_variants
+        )
+        comments_list.append(graphkb_comments)
+
+    if include_ipr_variant_text:
+        ipr_comments = get_ipr_analyst_comments(
+            ipr_conn,
+            gkb_matches,
+            disease_name=kb_disease_match,
+            project_name=content['project'],
+            report_type=content['template'],
+            include_nonspecific_disease=include_nonspecific_disease,
+            include_nonspecific_project=include_nonspecific_project,
+            include_nonspecific_template=include_nonspecific_template,
+        )
+        comments_list.append(ipr_comments)
+    comments = "\n".join(comments_list)
 
     # OUTPUT CONTENT
     # thread safe deep-copy the original content
diff --git a/pori_python/ipr/summary.py b/pori_python/ipr/summary.py
@@ -11,7 +11,15 @@
 from pori_python.graphkb.util import convert_to_rid_list
 from pori_python.graphkb.vocab import get_term_tree
 from pori_python.ipr.inputs import create_graphkb_sv_notation
-from pori_python.types import Hashabledict, IprVariant, KbMatch, Ontology, Record, Statement
+from pori_python.ipr.connection import IprConnection
+from pori_python.types import (
+    Hashabledict,
+    IprVariant,
+    KbMatch,
+    Ontology,
+    Record,
+    Statement,
+)
 
 from .util import (
     convert_to_rid_set,
@@ -264,7 +272,9 @@ def create_section_html(
     for statement_id, sentence in sentences_by_statement_id.items():
         relevance = statements[statement_id]["relevance"]["@rid"]
         category = categorize_relevance(
-            graphkb_conn, relevance, RELEVANCE_BASE_TERMS + [("resistance", ["no sensitivity"])]
+            graphkb_conn,
+            relevance,
+            RELEVANCE_BASE_TERMS + [("resistance", ["no sensitivity"])],
         )
         sentence_categories[sentence] = category
 
@@ -274,7 +284,12 @@ def create_section_html(
             "target": "Feature",
             "filters": {
                 "AND": [
-                    {"source": {"target": "Source", "filters": {"name": "entrez gene"}}},
+                    {
+                        "source": {
+                            "target": "Source",
+                            "filters": {"name": "entrez gene"},
+                        }
+                    },
                     {"name": gene_name},
                     {"biotype": "gene"},
                 ]
@@ -311,7 +326,14 @@ def create_section_html(
         {
             s
             for (s, v) in sentence_categories.items()
-            if v not in ["diagnostic", "biological", "therapeutic", "prognostic", "resistance"]
+            if v
+            not in [
+                "diagnostic",
+                "biological",
+                "therapeutic",
+                "prognostic",
+                "resistance",
+            ]
         },
         {s for (s, v) in sentence_categories.items() if v == "resistance"},
     ]:
@@ -342,6 +364,112 @@ def section_statements_by_genes(
     return genes
 
 
+def prep_single_ipr_variant_comment(variant_text):
+    """Formats single item of custom variant text for inclusion in the analyst comments.
+
+    Params:
+        variant_text:
+
+    Returns:
+        section: html-formatted string
+    """
+    cancer_type = ",".join(variant_text["cancerType"])
+    if not cancer_type:
+        cancer_type = "no specific cancer types"
+    cancer_type = f" ({cancer_type})"
+    section = [f"<h2>{variant_text['variantName']}{cancer_type}</h2>"]
+    section.append(f"<p>{variant_text['text']}</p>")
+    return section
+
+
+def get_ipr_analyst_comments(
+    ipr_conn: IprConnection,
+    matches: Sequence[KbMatch] | Sequence[Hashabledict],
+    disease_name: str,
+    project_name: str,
+    report_type: str,
+    include_nonspecific_disease: bool = False,
+    include_nonspecific_project: bool = False,
+    include_nonspecific_template: bool = False,
+) -> str:
+    """
+    Given a list of kbmatches, checks the variant_texts table in IPR-API to get any
+    pre-prepared text for this variant for inclusion in the analyst comments.
+    Matches on template, project and variant_name. Matches on project, disease and template
+    if possible. If no match is found and the related include_nonspecific arg is True,
+    uses a result with no specified value for that field if a result is found (eg
+    a result with no cancer type specified, if it exists).
+
+    Params:
+        ipr_conn: connection to the ipr db
+        matches: list of kbmatches which will be included in the report
+        disease_name: str, eg 'colorectal cancer'
+        project_name: str, eg TEST or pog
+        report_type: str, eg genomic or rapid
+        include_nonspecific_disease: bool - true if variant texts that don't explicitly
+            name a cancer type should be included
+        include_nonspecific_project: bool - true if variant texts that don't explicitly
+            name a project should be included
+        include_nonspecific_template: bool - true if variant texts that don't explicitly
+            name a project should be included
+    Returns:
+        html-formatted string
+    """
+    output_header = "<h3>The comments below were automatically drawn from curated text stored in IPR for variant matches in this report, and have not been manually reviewed</h3>"
+    no_comments_found_output = "No comments found in IPR for variants in this report"
+    output = []
+    # get the list of variants to check for custom text for
+    match_set = list(set([item["kbVariant"] for item in matches]))
+
+    for variant in match_set:
+        data = {
+            "variantName": variant,
+        }
+        itemlist: list[dict] = []
+        itemlist = ipr_conn.get("variant-text", data=data)  # type: ignore
+        if itemlist:
+            project_matches = [
+                item
+                for item in itemlist
+                if 'project' in item.keys() and item['project']['name'] == project_name
+            ]
+            if project_matches:
+                itemlist = project_matches
+            elif include_nonspecific_project:
+                itemlist = [item for item in itemlist if 'project' not in item.keys()]
+            else:
+                itemlist = []
+
+            template_matches = [
+                item
+                for item in itemlist
+                if 'template' in item.keys() and item['template']['name'] == report_type
+            ]
+            if template_matches:
+                itemlist = template_matches
+            elif include_nonspecific_template:
+                itemlist = [item for item in itemlist if 'template' not in item.keys()]
+            else:
+                itemlist = []
+
+            disease_matches = [item for item in itemlist if disease_name in item['cancerType']]
+            if disease_matches:
+                itemlist = disease_matches
+            elif include_nonspecific_disease:
+                itemlist = [item for item in itemlist if not item['cancerType']]
+            else:
+                itemlist = []
+
+            for item in itemlist:
+                section = prep_single_ipr_variant_comment(item)
+                output.extend(section)
+
+    if not output:
+        return no_comments_found_output
+    output.insert(0, output_header)
+    return "\n".join(output)
+
+
 def auto_analyst_comments(
     graphkb_conn: GraphKBConnection,
     matches: Sequence[KbMatch] | Sequence[Hashabledict],
diff --git a/tests/test_ipr/test_main.py b/tests/test_ipr/test_main.py
@@ -40,7 +40,10 @@ def report_upload_content(tmp_path_factory) -> Dict:
                     {"analysisRole": "expression (disease)", "name": "1"},
                     {"analysisRole": "expression (primary site)", "name": "2"},
                     {"analysisRole": "expression (biopsy site)", "name": "3"},
-                    {"analysisRole": "expression (internal pancancer cohort)", "name": "4"},
+                    {
+                        "analysisRole": "expression (internal pancancer cohort)",
+                        "name": "4",
+                    },
                 ],
                 "patientId": "PATIENT001",
                 "project": "TEST",
@@ -67,6 +70,15 @@ def report_upload_content(tmp_path_factory) -> Dict:
             allow_nan=False,
         )
     )
+
+    def side_effect_function(*args, **kwargs):
+        if 'templates' in args[0]:
+            return [{"name": "genomic", "ident": "001"}]
+        elif args[0] == "project":
+            return [{"name": "TEST", "ident": "001"}]
+        else:
+            return []
+
     with patch.object(
         sys,
         "argv",
@@ -91,7 +103,8 @@ def report_upload_content(tmp_path_factory) -> Dict:
     ):
         with patch.object(IprConnection, "upload_report", new=mock):
             with patch.object(IprConnection, "get_spec", return_value=get_test_spec()):
-                command_interface()
+                with patch.object(IprConnection, "get", side_effect=side_effect_function):
+                    command_interface()
 
     assert mock.called
 
diff --git a/tests/test_ipr/test_probe.py b/tests/test_ipr/test_probe.py
@@ -5,8 +5,8 @@
 from unittest.mock import MagicMock, patch
 
 from pori_python.ipr.connection import IprConnection
+from pori_python.ipr import main
 from pori_python.ipr.main import create_report
-
 from .constants import EXCLUDE_INTEGRATION_TESTS
 
 EXCLUDE_BCGSC_TESTS = os.environ.get("EXCLUDE_BCGSC_TESTS") == "1"
@@ -19,31 +19,41 @@ def get_test_file(name: str) -> str:
 @pytest.fixture(scope="module")
 def probe_upload_content() -> Dict:
     mock = MagicMock()
+
+    def side_effect_function(*args, **kwargs):
+        if "templates" in args[0]:
+            return [{"name": "genomic", "ident": "001"}]
+        elif args[0] == "project":
+            return [{"name": "TEST", "ident": "001"}]
+        else:
+            return []
+
     with patch.object(IprConnection, "upload_report", new=mock):
         with patch.object(IprConnection, "get_spec", return_value={}):
-            create_report(
-                content={
-                    "patientId": "PATIENT001",
-                    "project": "TEST",
-                    "smallMutations": pd.read_csv(
-                        get_test_file("small_mutations_probe.tab"),
-                        sep="\t",
-                        dtype={"chromosome": "string"},
-                    ).to_dict("records"),
-                    "structuralVariants": pd.read_csv(
-                        get_test_file("fusions.tab"), sep="\t"
-                    ).to_dict("records"),
-                    "blargh": "some fake content",
-                    "kbDiseaseMatch": "colorectal cancer",
-                },
-                username=os.environ["IPR_USER"],
-                password=os.environ["IPR_PASS"],
-                log_level="info",
-                ipr_url="http://fake.url.ca",
-                graphkb_username=os.environ.get("GRAPHKB_USER", os.environ["IPR_USER"]),
-                graphkb_password=os.environ.get("GRAPHKB_PASS", os.environ["IPR_PASS"]),
-                graphkb_url=os.environ.get("GRAPHKB_URL", False),
-            )
+            with patch.object(IprConnection, "get", side_effect=side_effect_function):
+                create_report(
+                    content={
+                        "patientId": "PATIENT001",
+                        "project": "TEST",
+                        "smallMutations": pd.read_csv(
+                            get_test_file("small_mutations_probe.tab"),
+                            sep="\t",
+                            dtype={"chromosome": "string"},
+                        ).to_dict("records"),
+                        "structuralVariants": pd.read_csv(
+                            get_test_file("fusions.tab"), sep="\t"
+                        ).to_dict("records"),
+                        "blargh": "some fake content",
+                        "kbDiseaseMatch": "colorectal cancer",
+                    },
+                    username=os.environ["IPR_USER"],
+                    password=os.environ["IPR_PASS"],
+                    log_level="info",
+                    ipr_url="http://fake.url.ca",
+                    graphkb_username=os.environ.get("GRAPHKB_USER", os.environ["IPR_USER"]),
+                    graphkb_password=os.environ.get("GRAPHKB_PASS", os.environ["IPR_PASS"]),
+                    graphkb_url=os.environ.get("GRAPHKB_URL", False),
+                )
 
     assert mock.called
 
diff --git a/tests/test_ipr/test_summary.py b/tests/test_ipr/test_summary.py