diff --git a/CHANGELOG.md b/CHANGELOG.md index 78f86c63..0be19d33 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v2.2.0dev - [25-Oct-2024] +## v2.2.0dev - [31-Oct-2024] ### `Added` @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 5. Added `text/html` as content mime type for the report file [#146](https://github.com/Plant-Food-Research-Open/assemblyqc/issues/146) 6. Added a sequence labels table below the HiC contact map [#147](https://github.com/Plant-Food-Research-Open/assemblyqc/issues/147) 7. Added parameter `hic_samtools_ext_args` and set its default value to `-F 3852` [#159](https://github.com/Plant-Food-Research-Open/assemblyqc/issues/159) +8. Added the HiC QC report to the final report so that users don't have to navigate to the results folder [#162](https://github.com/Plant-Food-Research-Open/assemblyqc/issues/162) ### `Fixed` diff --git a/bin/report_modules/parsers/hic_parser.py b/bin/report_modules/parsers/hic_parser.py index 01992ecd..17231e17 100644 --- a/bin/report_modules/parsers/hic_parser.py +++ b/bin/report_modules/parsers/hic_parser.py @@ -15,28 +15,38 @@ def parse_hic_folder(folder_name="hic_outputs"): return {} list_of_hic_files = hic_folder_path.glob("*.html") + list_of_hic_files = [ + x for x in list_of_hic_files if re.match(r"^\w+\.html$", x.name) + ] data = {"HIC": []} for hic_path in list_of_hic_files: hic_file_name = os.path.basename(str(hic_path)) - file_tokens = re.findall( + tag = re.findall( r"([\w]+).html", hic_file_name, )[0] - labels_table = pd.read_csv(f"{folder_name}/{file_tokens}.agp.assembly", sep=" ") - + # Get the labels table + labels_table = pd.read_csv(f"{folder_name}/{tag}.agp.assembly", sep=" ") labels_table = labels_table[labels_table.iloc[:, 0].str.startswith(">")].iloc[ :, [0, 2] ] labels_table.columns = ["Sequence", "Length"] labels_table.Length = labels_table.Length.astype(int) + # Get the HiC QC report + hicqc_report = [ + x + for x in hic_folder_path.glob("*.pdf") + if re.match(rf"[\S]+\.on\.{tag}_qc_report\.pdf", x.name) + ][0] + data["HIC"].append( { - "hap": file_tokens, + "hap": tag, "hic_html_file_name": hic_file_name, "labels_table": labels_table.to_dict("records"), "labels_table_html": tabulate( @@ -46,6 +56,7 @@ def parse_hic_folder(folder_name="hic_outputs"): numalign="left", showindex=False, ), + "hicqc_report_pdf": os.path.basename(str(hicqc_report)), } ) diff --git a/bin/report_modules/templates/header.html b/bin/report_modules/templates/header.html index 989b37f0..795ecd3d 100644 --- a/bin/report_modules/templates/header.html +++ b/bin/report_modules/templates/header.html @@ -213,6 +213,18 @@ .iframe-wrapper { text-align: center; + width: 90%; + margin-left: auto; + margin-right: auto; + margin-bottom: 32px; + } + + .iframe-wrapper-hic { + width: 700px; + height: 850px; + margin-left: auto; + margin-right: auto; + margin-bottom: 32px; } .tab { diff --git a/bin/report_modules/templates/hic/report_contents.html b/bin/report_modules/templates/hic/report_contents.html index 4a7f3089..b35fa5dd 100644 --- a/bin/report_modules/templates/hic/report_contents.html +++ b/bin/report_modules/templates/hic/report_contents.html @@ -5,14 +5,23 @@
{{ all_stats_dicts['HIC'][item]['hap'] }}
-
- +
+
+
+

Sequence labels and lengths

+
{{ all_stats_dicts['HIC'][item]['labels_table_html'] }}
+
+

HiC QC report

+
+
+ +
{% if vars.update({'is_first': False}) %} {% endif %} {% endfor %} diff --git a/docs/images/hicqc.png b/docs/images/hicqc.png new file mode 100644 index 00000000..edff46d9 Binary files /dev/null and b/docs/images/hicqc.png differ diff --git a/docs/output.md b/docs/output.md index f5bfacff..f124df2d 100644 --- a/docs/output.md +++ b/docs/output.md @@ -198,7 +198,12 @@ Kraken2 [assigns taxonomic labels](https://ccb.jhu.edu/software/kraken2/) to seq Hi-C contact mapping experiments measure the frequency of physical contact between loci in the genome. The resulting dataset, called a “contact map,” is represented using a [two-dimensional heatmap](https://github.com/igvteam/juicebox.js) where the intensity of each pixel indicates the frequency of contact between a pair of loci. -
AssemblyQC - HiC interactive contact map
AssemblyQC - HiC interactive contact map
+
+AssemblyQC - HiC QC report +AssemblyQC - HiC interactive contact map +
+AssemblyQC - HiC results +
### Synteny diff --git a/subworkflows/local/fq2hic.nf b/subworkflows/local/fq2hic.nf index a37a325a..7b00cae2 100644 --- a/subworkflows/local/fq2hic.nf +++ b/subworkflows/local/fq2hic.nf @@ -64,6 +64,7 @@ workflow FQ2HIC { HICQC ( ch_bam_and_ref.map { meta3, bam, fa -> [ meta3, bam ] } ) + ch_hicqc_pdf = HICQC.out.pdf ch_versions = ch_versions.mix(HICQC.out.versions) // MODULE: MAKEAGPFROMFASTA | AGP2ASSEMBLY | ASSEMBLY2BEDPE @@ -95,6 +96,7 @@ workflow FQ2HIC { ch_versions = ch_versions.mix(HIC2HTML.out.versions.first()) emit: + hicqc_pdf = ch_hicqc_pdf hic = ch_hic html = HIC2HTML.out.html assembly = AGP2ASSEMBLY.out.assembly diff --git a/workflows/assemblyqc.nf b/workflows/assemblyqc.nf index c892263c..103e1618 100644 --- a/workflows/assemblyqc.nf +++ b/workflows/assemblyqc.nf @@ -590,12 +590,16 @@ workflow ASSEMBLYQC { params.hic_skip_fastqc ) + ch_hicqc_pdf = FQ2HIC.out.hicqc_pdf ch_hic_html = FQ2HIC.out.html ch_hic_assembly = FQ2HIC.out.assembly ch_hic_report_files = ch_hic_html | mix( ch_hic_assembly.map { tag, assembly -> assembly } ) + | mix( + ch_hicqc_pdf.map { meta, pdf -> pdf } + ) ch_versions = ch_versions.mix(FQ2HIC.out.versions) // SUBWORKFLOW: FASTA_SYNTENY