multiqc/utils/search_patterns.yaml

# MultiQC search patterns.
# Default configurations for how modules can find their log files.
# Loaded by the config module so that these patterns can be overwritten in user config files.

adapterRemoval:
    fn: '*.settings'
    contents: 'AdapterRemoval'
    num_lines: 1
afterqc:
    fn: '*.json'
    contents: 'allow_mismatch_in_poly'
bamtools/stats:
    contents: 'Stats for BAM file(s):'
    shared: true
    num_lines: 2
bbmap/stats:
    contents: '#Name	Reads	ReadsPct'
    num_lines: 4
bbmap/aqhist:
    contents: '#Quality	count1	fraction1	count2	fraction2'
    num_lines: 1
bbmap/bhist:
    contents: '#Pos	A	C	G	T	N'
    num_lines: 1
bbmap/bincov:
    # 3rd line (1st is #Mean)
    contents: '#RefName	Cov	Pos	RunningPos'
    num_lines: 3
bbmap/bqhist:
    contents: '#BaseNum	count_1	min_1	max_1	mean_1	Q1_1	med_1	Q3_1	LW_1	RW_1	count_2	min_2	max_2	mean_2	Q1_2	med_2	Q3_2	LW_2	RW_2'
    num_lines: 1
bbmap/covhist:
    contents: '#Coverage	numBases'
    num_lines: 1
bbmap/covstats:
    contents: '#ID	Avg_fold'
    num_lines: 1
bbmap/ehist:
    contents: '#Errors	Count'
    num_lines: 1
bbmap/gchist:
    # 5th line (1st is #Mean)
    contents: '#GC	Count'
    num_lines: 5
bbmap/idhist:
    contents: '#Mean_reads'
    num_lines: 1
bbmap/ihist:
    # 6th line (first is #Mean)
    contents: '#InsertSize	Count'
    num_lines: 6
bbmap/indelhist:
    contents: '#Length	Deletions	Insertions'
    num_lines: 1
bbmap/lhist:
    contents: '#Length	Count'
    num_lines: 1
bbmap/mhist:
    contents: '#BaseNum	Match1	Sub1	Del1	Ins1	N1	Other1	Match2	Sub2	Del2	Ins2	N2	Other2'
    num_lines: 1
bbmap/qahist:
    contents: '#Deviation'
    num_lines: 1
bbmap/qhist:
    contents: '#BaseNum	Read1_linear	Read1_log	Read1_measured	Read2_linear	Read2_log	Read2_measured'
    num_lines: 1
bbmap/rpkm:
    contents: '#File	'
    num_lines: 1
bbmap/statsfile_machine:
    contents: 'Reads Used='
    num_lines: 1
bbmap/statsfile:
    contents: 'Reads Used:'
    num_lines: 1
bcftools/stats:
    contents: 'This file was produced by bcftools stats'
    shared: true
bcl2fastq:
    - fn: 'Stats.json'
      contents: 'DemuxResults'
    - num_lines: 300
biobambam2/bamsormadup:
    contents: '# bamsormadup'
    num_lines: 2
biobloomtools:
    contents: 'filter_id	hits	misses	shared	rate_hit	rate_miss	rate_shared'
    num_lines: 2
biscuit/align_mapq:
    fn: '*_mapq_table.txt'
    contents: 'BISCUITqc Mapping Quality Table'
biscuit/align_strand:
    fn: '*_strand_table.txt'
    contents: 'BISCUITqc Strand Table'
biscuit/align_isize:
    fn: '*_isize_score_table.txt'
    contents: 'BISCUITqc Insert Size, Score Table'
biscuit/covdist_cpg:
    fn: '*_covdist_cpg_table.txt'
    contents: 'BISCUITqc CpG Depth Distribution (All)'
biscuit/covdist_cpg_q40:
    fn: '*_covdist_cpg_q40_table.txt'
    contents: 'BISCUITqc CpG Depth Distribution (Q40)'
biscuit/covdist_cpg_q40_botgc:
    fn: '*_covdist_cpg_q40_botgc_table.txt'
    contents: 'BISCUITqc CpG Depth Distribution (low GC, Q40)'
biscuit/covdist_cpg_q40_topgc:
    fn: '*_covdist_cpg_q40_topgc_table.txt'
    contents: 'BISCUITqc CpG Depth Distribution (high GC, Q40)'
biscuit/covdist:
    fn: '*_covdist_table.txt'
    contents: 'BISCUITqc Depth Distribution (All)'
biscuit/covdist_q40:
    fn: '*_covdist_q40_table.txt'
    contents: 'BISCUITqc Depth Distribution (Q40)'
biscuit/covdist_q40_botgc:
    fn: '*_covdist_q40_botgc_table.txt'
    contents: 'BISCUITqc Depth Distribution (low GC, Q40)'
biscuit/covdist_q40_topgc:
    fn: '*_covdist_q40_topgc_table.txt'
    contents: 'BISCUITqc Depth Distribution (high GC, Q40)'
biscuit/qc_cv:
    fn: '*_all_cv_table.txt'
    contents: 'BISCUITqc Uniformity Table'
biscuit/qc_cpg_cv:
    fn: '*_cpg_cv_table.txt'
    contents: 'BISCUITqc CpG Uniformity Table'
biscuit/qc_cpg_dist:
    fn: '*_cpg_dist_table.txt'
    contents: 'BISCUITqc CpG Distribution Table'
biscuit/dup_report:
    fn: '*_dup_report.txt'
    contents: 'BISCUITqc Read Duplication Table'
biscuit/markdup_report:
    fn: '*_markdup_report.txt'
    contents: '[mark_dup] parsed'
biscuit/retention_cph_readpos:
    fn: '*_CpHRetentionByReadPos.txt'
    contents: 'BISCUITqc CpH Retention by Read Position Table'
biscuit/retention_cpg_readpos:
    fn: '*_CpGRetentionByReadPos.txt'
    contents: 'BISCUITqc CpG Retention by Read Position Table'
biscuit/retention_dist:
    fn: '*_CpGRetentionDist.txt'
    contents: 'BISCUITqc Retention Distribution Table'
biscuit/retention_dist_byread:
    fn: '*_freqOfTotalRetentionPerRead.txt'
    contents: 'BISCUITqc Frequency of Total Retention per Read Table'
biscuit/retention_rate_bybase:
    fn: '*_totalBaseConversionRate.txt'
    contents: 'BISCUITqc Conversion Rate by Base Average Table'
biscuit/retention_rate_byread:
    fn: '*_totalReadConversionRate.txt'
    contents: 'BISCUITqc Conversion Rate by Read Average Table'
bismark/align:
    fn: '*_[SP]E_report.txt'
bismark/dedup:
    fn: '*.deduplication_report.txt'
bismark/meth_extract:
    fn: '*_splitting_report.txt'
bismark/m_bias:
    fn: '*M-bias.txt'
bismark/bam2nuc:
    fn: '*.nucleotide_stats.txt'
bowtie1:
    contents: '# reads processed:'
    exclude_fn:
        # Tophat log files
        - 'bowtie.left_kept_reads.log'
        - 'bowtie.left_kept_reads.m2g_um.log'
        - 'bowtie.left_kept_reads.m2g_um_seg1.log'
        - 'bowtie.left_kept_reads.m2g_um_seg2.log'
        - 'bowtie.right_kept_reads.log'
        - 'bowtie.right_kept_reads.m2g_um.log'
        - 'bowtie.right_kept_reads.m2g_um_seg1.log'
        - 'bowtie.right_kept_reads.m2g_um_seg2.log'
    shared: true
bowtie2:
    contents: 'reads; of these:'
    exclude_contents:
        - 'bisulfite'
        - 'HiC-Pro'
    shared: true
busco:
    fn: 'short_summary_*'
    contents: 'BUSCO version is:'
    num_lines: 1
custom_content:
    fn_re: '.+_mqc\.(yaml|yml|json|txt|csv|tsv|log|out|png|jpg|jpeg)'
clipandmerge:
    contents: 'ClipAndMerge ('
    num_lines: 5
clusterflow/logs:
    fn: '*_clusterFlow.txt'
    shared: true
clusterflow/runfiles:
    fn: '*.run'
    contents: 'Cluster Flow Run File'
    num_lines: 2
conpair/concordance:
    contents: 'markers (coverage per marker threshold : '
    num_lines: 3
conpair/contamination:
    contents: 'Tumor sample contamination level: '
    num_lines: 3
cutadapt:
    contents: 'This is cutadapt'
    # contents: 'cutadapt version' # Use this instead if using very old versions of cutadapt (eg. v1.2)
    shared: true
damageprofiler:
    fn: '*dmgprof.json'
dedup:
    fn: 'dedup.log'
deeptools/bamPEFragmentSizeTable:
    contents: '	Frag. Sampled	Frag. Len. Min.	Frag. Len. 1st. Qu.	Frag. Len. Mean	Frag. Len. Median	Frag. Len. 3rd Qu.'
    num_lines: 1
deeptools/bamPEFragmentSizeDistribution:
    contents: '#bamPEFragmentSize'
    num_lines: 1
deeptools/estimateReadFiltering:
    contents: 'Sample	Total Reads	Mapped Reads	Alignments in blacklisted regions	Estimated mapped reads'
    num_lines: 1
deeptools/plotCorrelationData:
    contents: '#plotCorrelation --outFileCorMatrix'
    num_lines: 1
deeptools/plotCoverageStdout:
    contents: 'sample	mean	std	min	25%	50%	75%	max'
    num_lines: 1
deeptools/plotCoverageOutRawCounts:
    contents: "#plotCoverage --outRawCounts"
    num_lines: 1
deeptools/plotEnrichment:
    contents: 'file	featureType	percent	featureReadCount	totalReadCount'
    num_lines: 1
deeptools/plotFingerprintOutRawCounts:
    contents: '#plotFingerprint --outRawCounts'
    num_lines: 1
deeptools/plotFingerprintOutQualityMetrics:
    contents: 'Sample	AUC	Synthetic AUC	X-intercept	Synthetic X-intercept	Elbow Point	Synthetic Elbow Point'
    num_lines: 1
deeptools/plotPCAData:
    contents: '#plotPCA --outFileNameData'
    num_lines: 1
deeptools/plotProfile:
    contents: 'bin labels'
    num_lines: 1
fastp:
    fn: '*fastp.json'
fastq_screen:
    fn: '*_screen.txt'
fastqc/data:
    fn: 'fastqc_data.txt'
fastqc/zip:
    fn: '*_fastqc.zip'
fastqc/theoretical_gc:
    fn: '*fastqc_theoretical_gc*'
featurecounts:
    fn: '*.summary'
    shared: true
flash/log:
    contents: '[FLASH]'
    shared: true
flash/hist:
    fn: '*flash*.hist'
flexbar:
    contents: 'Flexbar - flexible barcode and adapter removal'
    shared: true
gatk/varianteval:
    contents: '#:GATKTable:TiTvVariantEvaluator'
    shared: true
gatk/base_recalibrator:
    contents: '#:GATKTable:Arguments:Recalibration'
    num_lines: 3
goleft_indexcov/roc:
    fn: '*-indexcov.roc'
goleft_indexcov/ped:
    fn: '*-indexcov.ped'
happy:
    fn: '*.summary.csv'
    contents: 'Type,Filter,TRUTH'
htseq:
    contents: '__too_low_aQual'
hicexplorer:
    contents: 'Min rest. site distance'
    max_filesize: 4096
    num_lines: 5
hicup:
    fn: 'HiCUP_summary_report*'
hicpro/mmapstat:
    fn: '*.mmapstat'
hicpro/mpairstat:
    fn: '*.mpairstat'
hicpro/mergestat:
    fn: '*.mergestat'
hicpro/mRSstat:
    fn: '*.mRSstat'
hicpro/assplit:
    fn: '*.assplit.stat'
hisat2:
    contents: 'HISAT2 summary stats:'
    shared: true
homer/findpeaks:
    contents: '# HOMER Peaks'
    num_lines: 3
homer/GCcontent:
    fn: 'tagGCcontent.txt'
homer/genomeGCcontent:
    fn: 'genomeGCcontent.txt'
homer/RestrictionDistribution:
    fn: 'petagRestrictionDistribution.*.txt'
homer/LengthDistribution:
    fn: 'tagLengthDistribution.txt'
homer/tagInfo:
    fn: 'tagInfo.txt'
homer/FreqDistribution:
    fn: 'petag.FreqDistribution_1000.txt'
interop/summary:
    contents: 'Level,Yield,Projected Yield,Aligned,Error Rate,Intensity C1,%>=Q30'
interop/index-summary:
    contents: 'Total Reads,PF Reads,% Read Identified (PF),CV,Min,Max'
jellyfish:
    fn: '*_jf.hist'
kallisto:
    contents: '[quant] finding pseudoalignments for the reads'
    shared: true
kat:
    fn: '*.dist_analysis.json'
leehom:
    contents: 'Adapter dimers/chimeras'
    shared: true
longranger/summary:
    fn: '*summary.csv'
    contents: 'longranger_version,instrument_ids,gems_detected,mean_dna_per_gem,bc_on_whitelist,bc_mean_qscore,n50_linked_reads_per_molecule'
    num_lines: 2
longranger/invocation:
    fn: '_invocation'
    contents: 'call PHASER_SVCALLER_CS('
    max_filesize: 2048
macs2:
    fn: '*_peaks.xls'
methylQA:
    fn: '*.report'
    shared: true
minionqc:
    fn: 'summary.yaml'
    contents: 'total.gigabases'
mirtrace/summary:
    fn: 'mirtrace-results.json'
mirtrace/length:
    fn: 'mirtrace-stats-length.tsv'
mirtrace/contaminationbasic:
    fn: 'mirtrace-stats-contamination_basic.tsv'
mirtrace/mirnacomplexity:
    fn: 'mirtrace-stats-mirna-complexity.tsv'
disambiguate:
    contents: 'unique species A pairs'
    num_lines: 2
peddy/summary_table:
    fn: '*.peddy.ped'
peddy/het_check:
    fn: '*.het_check.csv'
peddy/ped_check:
    fn: '*.ped_check.csv'
peddy/sex_check:
    fn: '*.sex_check.csv'
peddy/background_pca:
    fn: '*.background_pca.json'
phantompeakqualtools/out:
    fn: '*.spp.out'
picard/alignment_metrics:
    contents: 'AlignmentSummaryMetrics'
    shared: true
picard/basedistributionbycycle:
    contents: 'BaseDistributionByCycleMetrics'
    shared: true
picard/gcbias:
    contents: 'GcBias'
    shared: true
picard/hsmetrics:
    contents: 'HsMetrics'
    shared: true
picard/insertsize:
    contents: 'InsertSizeMetrics'
    shared: true
picard/markdups:
    contents: 'DuplicationMetrics'
    shared: true
picard/oxogmetrics:
    contents: 'CollectOxoGMetrics'
    shared: true
picard/pcr_metrics:
    contents: 'TargetedPcrMetrics'
    shared: true
picard/rnaseqmetrics:
    contents_re: 'Collect[Rr]na[Ss]eq[Mm]etrics'
    shared: true
picard/rrbs_metrics:
    contents: 'RrbsSummaryMetrics'
    shared: true
picard/sam_file_validation:
    fn: '*[Vv]alidate[Ss]am[Ff]ile*'
picard/variant_calling_metrics:
    fn: '*.variant_calling_detail_metrics'
    contents: 'CollectVariantCallingMetrics'
    shared: true
picard/wgs_metrics:
    contents: 'CollectWgsMetrics'
    shared: true
preseq:
    - contents: 'EXPECTED_DISTINCT'
      num_lines: 2
    - contents: 'distinct_reads'
      num_lines: 2
preseq/real_counts:
    fn: '*preseq_real_counts*'
prokka:
    contents: 'contigs:'
    num_lines: 2
qorts:
    contents: 'BENCHMARK_MinutesOnSamIteration'
    num_lines: 100
qualimap/bamqc/genome_results:
    fn: 'genome_results.txt'
qualimap/bamqc/coverage:
    fn: 'coverage_histogram.txt'
qualimap/bamqc/insert_size:
    fn: 'insert_size_histogram.txt'
qualimap/bamqc/genome_fraction:
    fn: 'genome_fraction_coverage.txt'
qualimap/bamqc/gc_dist:
    fn: 'mapped_reads_gc-content_distribution.txt'
qualimap/rnaseq/rnaseq_results:
    fn: 'rnaseq_qc_results.txt'
qualimap/rnaseq/coverage:
    fn: 'coverage_profile_along_genes_(total).txt'
quast:
    fn: 'report.tsv'
    shared: true
rna_seqc/metrics:
    fn: 'metrics.tsv'
    shared: true
rna_seqc/coverage:
    fn_re: 'meanCoverageNorm_(high|medium|low)\.txt'
rna_seqc/correlation:
    fn_re: 'corrMatrix(Pearson|Spearman)\.txt'
rsem:
    - fn: '*.cnt'
rseqc/bam_stat:
    contents: 'Proper-paired reads map to different chrom:'
    max_filesize: 500000
rseqc/gene_body_coverage:
    fn: '*.geneBodyCoverage.txt'
rseqc/inner_distance:
    fn: '*.inner_distance_freq.txt'
rseqc/junction_annotation:
    contents: 'Partial Novel Splicing Junctions:'
    max_filesize: 500000
rseqc/junction_saturation:
    fn: '*.junctionSaturation_plot.r'
rseqc/read_gc:
    fn: '*.GC.xls'
rseqc/read_distribution:
    contents: 'Group               Total_bases         Tag_count           Tags/Kb'
    max_filesize: 500000
rseqc/read_duplication_pos:
    fn: '*.pos.DupRate.xls'
rseqc/infer_experiment:
    - fn: '*infer_experiment.txt'
    - contents: 'Fraction of reads explained by'
      max_filesize: 500000
salmon/meta:
    fn: 'meta_info.json'
    contents: 'salmon_version'
salmon/fld:
    fn: 'flenDist.txt'
samblaster:
    contents: 'samblaster: Version'
    shared: true
samtools/stats:
    contents: 'This file was produced by samtools stats'
    shared: true
samtools/flagstat:
    contents: 'in total (QC-passed reads + QC-failed reads)'
    shared: true
samtools/idxstats:
    fn: '*idxstat*'
samtools/rmdup:
    contents: '[bam_rmdup'
    shared: true
sargasso:
    fn: 'overall_filtering_summary.txt'
skewer:
    contents: 'maximum error ratio allowed (-r):'
    shared: true
slamdunk/summary:
    contents: '# slamdunk summary'
    num_lines: 1
slamdunk/PCA:
    contents: '# slamdunk PCA'
    num_lines: 1
slamdunk/rates:
    contents: '# slamdunk rates'
    num_lines: 1
slamdunk/utrrates:
    contents: '# slamdunk utrrates'
    num_lines: 1
slamdunk/tcperreadpos:
    contents: '# slamdunk tcperreadpos'
    num_lines: 1
slamdunk/tcperutrpos:
    contents: '# slamdunk tcperutr'
    num_lines: 1
snpeff:
    contents: 'SnpEff_version'
    max_filesize: 5000000
sortmerna:
    contents: 'Minimal SW score based on E-value'
    shared: true
stacks/gstacks:
    fn: 'gstacks.log.distribs'
    contents: 'BEGIN effective_coverages_per_sample'
stacks/populations:
    fn: 'populations.log.distribs'
    contents: 'BEGIN missing_samples_per_loc_prefilters'
stacks/sumstats:
    fn: '*.sumstats_summary.tsv'
    contents: '# Pop ID	Private	Num_Indv	Var	StdErr	P	Var'
    max_filesize: 1000000
star:
    fn: '*Log.final.out'
star/genecounts:
    fn: '*ReadsPerGene.out.tab'
supernova/report:
    fn: '*report*.txt'
    num_lines: 100
    contents: '- assembly checksum ='
supernova/summary:
    fn: 'summary.json'
    num_lines: 120
    contents: '"lw_mean_mol_len":'
supernova/molecules:
    fn: 'histogram_molecules.json'
    num_lines: 10
    contents: '"description": "molecules",'
supernova/kmers:
    fn: 'histogram_kmer_count.json'
    num_lines: 10
    contents: '"description": "kmer_count",'
theta2:
    fn: '*.BEST.results'
tophat:
    fn: '*align_summary.txt'
    shared: true
trimmomatic:
    contents: 'Trimmomatic'
    shared: true
vcftools/relatedness2:
    fn: '*.relatedness2'
vcftools/tstv_by_count:
    fn: '*.TsTv.count'
vcftools/tstv_by_qual:
    fn: '*.TsTv.qual'
vcftools/tstv_summary:
    fn: '*.TsTv.summary'
verifybamid/selfsm:
    fn: '*.selfSM'