Skip to content

Commit c0fd9c8

Browse files
authored
Merge pull request #44 from bcgsc/feat/DEVSU-2494-add-custom-text-to-analysis-summary
Feat/devsu 2494 add custom text to analysis summary
2 parents ad1dbfc + 175a9c3 commit c0fd9c8

File tree

5 files changed

+409
-38
lines changed

5 files changed

+409
-38
lines changed

pori_python/ipr/main.py

+30-8
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
multi_variant_filtering,
3939
select_expression_plots,
4040
)
41-
from .summary import auto_analyst_comments
41+
from .summary import auto_analyst_comments, get_ipr_analyst_comments
4242
from .therapeutic_options import create_therapeutic_options
4343
from .util import LOG_LEVELS, logger, trim_empty_values
4444

@@ -195,6 +195,7 @@ def clean_unsupported_content(upload_content: Dict, ipr_spec: Dict = {}) -> Dict
195195
"copyVariants",
196196
"structuralVariants",
197197
"probeResults",
198+
"signatureVariants",
198199
"msi",
199200
]
200201
for variant_list_section in VARIANT_LIST_KEYS:
@@ -247,6 +248,10 @@ def ipr_report(
247248
custom_kb_match_filter=None,
248249
async_upload: bool = False,
249250
mins_to_wait: int = 5,
251+
include_ipr_variant_text: bool = True,
252+
include_nonspecific_disease: bool = False,
253+
include_nonspecific_project: bool = False,
254+
include_nonspecific_template: bool = False,
250255
multi_variant_filter: bool = True,
251256
) -> Dict:
252257
"""Run the matching and create the report JSON for upload to IPR.
@@ -271,6 +276,10 @@ def ipr_report(
271276
custom_kb_match_filter: function(List[kbMatch]) -> List[kbMatch]
272277
async_upload: use report_async endpoint to upload reports
273278
mins_to_wait: if using report_async, number of minutes to wait for success before exception raised
279+
include_ipr_variant_text: if True, include output from the ipr variant-texts endpoint in analysis comments
280+
include_nonspecific_disease: if include_ipr_variant_text is True, if no disease match is found use disease-nonspecific variant comment
281+
include_nonspecific_project: if include_ipr_variant_text is True, if no project match is found use project-nonspecific variant comment
282+
include_nonspecific_template: if include_ipr_variant_text is True, if no template match is found use template-nonspecific variant comment
274283
multi_variant_filter: filters out matches that doesn't match to all required variants on multi-variant statements
275284
276285
Returns:
@@ -469,14 +478,27 @@ def ipr_report(
469478

470479
# ANALYST COMMENTS
471480
logger.info("generating analyst comments")
481+
482+
comments_list = []
472483
if generate_comments:
473-
comments = {
474-
"comments": auto_analyst_comments(
475-
graphkb_conn, gkb_matches, disease_name=kb_disease_match, variants=all_variants
476-
)
477-
}
478-
else:
479-
comments = {"comments": ""}
484+
graphkb_comments = auto_analyst_comments(
485+
graphkb_conn, gkb_matches, disease_name=kb_disease_match, variants=all_variants
486+
)
487+
comments_list.append(graphkb_comments)
488+
489+
if include_ipr_variant_text:
490+
ipr_comments = get_ipr_analyst_comments(
491+
ipr_conn,
492+
gkb_matches,
493+
disease_name=kb_disease_match,
494+
project_name=content['project'],
495+
report_type=content['template'],
496+
include_nonspecific_disease=include_nonspecific_disease,
497+
include_nonspecific_project=include_nonspecific_project,
498+
include_nonspecific_template=include_nonspecific_template,
499+
)
500+
comments_list.append(ipr_comments)
501+
comments = "\n".join(comments_list)
480502

481503
# OUTPUT CONTENT
482504
# thread safe deep-copy the original content

pori_python/ipr/summary.py

+132-4
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,15 @@
1111
from pori_python.graphkb.util import convert_to_rid_list
1212
from pori_python.graphkb.vocab import get_term_tree
1313
from pori_python.ipr.inputs import create_graphkb_sv_notation
14-
from pori_python.types import Hashabledict, IprVariant, KbMatch, Ontology, Record, Statement
14+
from pori_python.ipr.connection import IprConnection
15+
from pori_python.types import (
16+
Hashabledict,
17+
IprVariant,
18+
KbMatch,
19+
Ontology,
20+
Record,
21+
Statement,
22+
)
1523

1624
from .util import (
1725
convert_to_rid_set,
@@ -264,7 +272,9 @@ def create_section_html(
264272
for statement_id, sentence in sentences_by_statement_id.items():
265273
relevance = statements[statement_id]["relevance"]["@rid"]
266274
category = categorize_relevance(
267-
graphkb_conn, relevance, RELEVANCE_BASE_TERMS + [("resistance", ["no sensitivity"])]
275+
graphkb_conn,
276+
relevance,
277+
RELEVANCE_BASE_TERMS + [("resistance", ["no sensitivity"])],
268278
)
269279
sentence_categories[sentence] = category
270280

@@ -274,7 +284,12 @@ def create_section_html(
274284
"target": "Feature",
275285
"filters": {
276286
"AND": [
277-
{"source": {"target": "Source", "filters": {"name": "entrez gene"}}},
287+
{
288+
"source": {
289+
"target": "Source",
290+
"filters": {"name": "entrez gene"},
291+
}
292+
},
278293
{"name": gene_name},
279294
{"biotype": "gene"},
280295
]
@@ -311,7 +326,14 @@ def create_section_html(
311326
{
312327
s
313328
for (s, v) in sentence_categories.items()
314-
if v not in ["diagnostic", "biological", "therapeutic", "prognostic", "resistance"]
329+
if v
330+
not in [
331+
"diagnostic",
332+
"biological",
333+
"therapeutic",
334+
"prognostic",
335+
"resistance",
336+
]
315337
},
316338
{s for (s, v) in sentence_categories.items() if v == "resistance"},
317339
]:
@@ -342,6 +364,112 @@ def section_statements_by_genes(
342364
return genes
343365

344366

367+
def prep_single_ipr_variant_comment(variant_text):
368+
"""Formats single item of custom variant text for inclusion in the analyst comments.
369+
370+
Params:
371+
variant_text:
372+
373+
Returns:
374+
section: html-formatted string
375+
"""
376+
cancer_type = ",".join(variant_text["cancerType"])
377+
if not cancer_type:
378+
cancer_type = "no specific cancer types"
379+
cancer_type = f" ({cancer_type})"
380+
section = [f"<h2>{variant_text['variantName']}{cancer_type}</h2>"]
381+
section.append(f"<p>{variant_text['text']}</p>")
382+
return section
383+
384+
385+
def get_ipr_analyst_comments(
386+
ipr_conn: IprConnection,
387+
matches: Sequence[KbMatch] | Sequence[Hashabledict],
388+
disease_name: str,
389+
project_name: str,
390+
report_type: str,
391+
include_nonspecific_disease: bool = False,
392+
include_nonspecific_project: bool = False,
393+
include_nonspecific_template: bool = False,
394+
) -> str:
395+
"""
396+
Given a list of kbmatches, checks the variant_texts table in IPR-API to get any
397+
pre-prepared text for this variant for inclusion in the analyst comments.
398+
Matches on template, project and variant_name. Matches on project, disease and template
399+
if possible. If no match is found and the related include_nonspecific arg is True,
400+
uses a result with no specified value for that field if a result is found (eg
401+
a result with no cancer type specified, if it exists).
402+
403+
Params:
404+
ipr_conn: connection to the ipr db
405+
matches: list of kbmatches which will be included in the report
406+
disease_name: str, eg 'colorectal cancer'
407+
project_name: str, eg TEST or pog
408+
report_type: str, eg genomic or rapid
409+
include_nonspecific_disease: bool - true if variant texts that don't explicitly
410+
name a cancer type should be included
411+
include_nonspecific_project: bool - true if variant texts that don't explicitly
412+
name a project should be included
413+
include_nonspecific_template: bool - true if variant texts that don't explicitly
414+
name a project should be included
415+
Returns:
416+
html-formatted string
417+
"""
418+
output_header = "<h3>The comments below were automatically drawn from curated text stored in IPR for variant matches in this report, and have not been manually reviewed</h3>"
419+
no_comments_found_output = "No comments found in IPR for variants in this report"
420+
output = []
421+
# get the list of variants to check for custom text for
422+
match_set = list(set([item["kbVariant"] for item in matches]))
423+
424+
for variant in match_set:
425+
data = {
426+
"variantName": variant,
427+
}
428+
itemlist: list[dict] = []
429+
itemlist = ipr_conn.get("variant-text", data=data) # type: ignore
430+
if itemlist:
431+
project_matches = [
432+
item
433+
for item in itemlist
434+
if 'project' in item.keys() and item['project']['name'] == project_name
435+
]
436+
if project_matches:
437+
itemlist = project_matches
438+
elif include_nonspecific_project:
439+
itemlist = [item for item in itemlist if 'project' not in item.keys()]
440+
else:
441+
itemlist = []
442+
443+
template_matches = [
444+
item
445+
for item in itemlist
446+
if 'template' in item.keys() and item['template']['name'] == report_type
447+
]
448+
if template_matches:
449+
itemlist = template_matches
450+
elif include_nonspecific_template:
451+
itemlist = [item for item in itemlist if 'template' not in item.keys()]
452+
else:
453+
itemlist = []
454+
455+
disease_matches = [item for item in itemlist if disease_name in item['cancerType']]
456+
if disease_matches:
457+
itemlist = disease_matches
458+
elif include_nonspecific_disease:
459+
itemlist = [item for item in itemlist if not item['cancerType']]
460+
else:
461+
itemlist = []
462+
463+
for item in itemlist:
464+
section = prep_single_ipr_variant_comment(item)
465+
output.extend(section)
466+
467+
if not output:
468+
return no_comments_found_output
469+
output.insert(0, output_header)
470+
return "\n".join(output)
471+
472+
345473
def auto_analyst_comments(
346474
graphkb_conn: GraphKBConnection,
347475
matches: Sequence[KbMatch] | Sequence[Hashabledict],

tests/test_ipr/test_main.py

+15-2
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,10 @@ def report_upload_content(tmp_path_factory) -> Dict:
4040
{"analysisRole": "expression (disease)", "name": "1"},
4141
{"analysisRole": "expression (primary site)", "name": "2"},
4242
{"analysisRole": "expression (biopsy site)", "name": "3"},
43-
{"analysisRole": "expression (internal pancancer cohort)", "name": "4"},
43+
{
44+
"analysisRole": "expression (internal pancancer cohort)",
45+
"name": "4",
46+
},
4447
],
4548
"patientId": "PATIENT001",
4649
"project": "TEST",
@@ -67,6 +70,15 @@ def report_upload_content(tmp_path_factory) -> Dict:
6770
allow_nan=False,
6871
)
6972
)
73+
74+
def side_effect_function(*args, **kwargs):
75+
if 'templates' in args[0]:
76+
return [{"name": "genomic", "ident": "001"}]
77+
elif args[0] == "project":
78+
return [{"name": "TEST", "ident": "001"}]
79+
else:
80+
return []
81+
7082
with patch.object(
7183
sys,
7284
"argv",
@@ -91,7 +103,8 @@ def report_upload_content(tmp_path_factory) -> Dict:
91103
):
92104
with patch.object(IprConnection, "upload_report", new=mock):
93105
with patch.object(IprConnection, "get_spec", return_value=get_test_spec()):
94-
command_interface()
106+
with patch.object(IprConnection, "get", side_effect=side_effect_function):
107+
command_interface()
95108

96109
assert mock.called
97110

tests/test_ipr/test_probe.py

+34-24
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
from unittest.mock import MagicMock, patch
66

77
from pori_python.ipr.connection import IprConnection
8+
from pori_python.ipr import main
89
from pori_python.ipr.main import create_report
9-
1010
from .constants import EXCLUDE_INTEGRATION_TESTS
1111

1212
EXCLUDE_BCGSC_TESTS = os.environ.get("EXCLUDE_BCGSC_TESTS") == "1"
@@ -19,31 +19,41 @@ def get_test_file(name: str) -> str:
1919
@pytest.fixture(scope="module")
2020
def probe_upload_content() -> Dict:
2121
mock = MagicMock()
22+
23+
def side_effect_function(*args, **kwargs):
24+
if "templates" in args[0]:
25+
return [{"name": "genomic", "ident": "001"}]
26+
elif args[0] == "project":
27+
return [{"name": "TEST", "ident": "001"}]
28+
else:
29+
return []
30+
2231
with patch.object(IprConnection, "upload_report", new=mock):
2332
with patch.object(IprConnection, "get_spec", return_value={}):
24-
create_report(
25-
content={
26-
"patientId": "PATIENT001",
27-
"project": "TEST",
28-
"smallMutations": pd.read_csv(
29-
get_test_file("small_mutations_probe.tab"),
30-
sep="\t",
31-
dtype={"chromosome": "string"},
32-
).to_dict("records"),
33-
"structuralVariants": pd.read_csv(
34-
get_test_file("fusions.tab"), sep="\t"
35-
).to_dict("records"),
36-
"blargh": "some fake content",
37-
"kbDiseaseMatch": "colorectal cancer",
38-
},
39-
username=os.environ["IPR_USER"],
40-
password=os.environ["IPR_PASS"],
41-
log_level="info",
42-
ipr_url="http://fake.url.ca",
43-
graphkb_username=os.environ.get("GRAPHKB_USER", os.environ["IPR_USER"]),
44-
graphkb_password=os.environ.get("GRAPHKB_PASS", os.environ["IPR_PASS"]),
45-
graphkb_url=os.environ.get("GRAPHKB_URL", False),
46-
)
33+
with patch.object(IprConnection, "get", side_effect=side_effect_function):
34+
create_report(
35+
content={
36+
"patientId": "PATIENT001",
37+
"project": "TEST",
38+
"smallMutations": pd.read_csv(
39+
get_test_file("small_mutations_probe.tab"),
40+
sep="\t",
41+
dtype={"chromosome": "string"},
42+
).to_dict("records"),
43+
"structuralVariants": pd.read_csv(
44+
get_test_file("fusions.tab"), sep="\t"
45+
).to_dict("records"),
46+
"blargh": "some fake content",
47+
"kbDiseaseMatch": "colorectal cancer",
48+
},
49+
username=os.environ["IPR_USER"],
50+
password=os.environ["IPR_PASS"],
51+
log_level="info",
52+
ipr_url="http://fake.url.ca",
53+
graphkb_username=os.environ.get("GRAPHKB_USER", os.environ["IPR_USER"]),
54+
graphkb_password=os.environ.get("GRAPHKB_PASS", os.environ["IPR_PASS"]),
55+
graphkb_url=os.environ.get("GRAPHKB_URL", False),
56+
)
4757

4858
assert mock.called
4959

0 commit comments

Comments
 (0)