diff --git a/Changes b/Changes index 0b20d1340..290c33485 100644 --- a/Changes +++ b/Changes @@ -1,5 +1,15 @@ LIST OF CHANGES FOR NPG-QC PACKAGE + - remove attribute qc_report_dir from check object: the output directory + is created by default using the sample's filename root + - upgrade_schema-60.x to create rna_seqc table has been included. + - npg_qc::autoqc::qc_store - load rna_seqc results into npg_qc db: + requires rna_seqc table exists in database. + - updated data/schema.txt including definition for rna_seqc table. + - SeQC: + - added template for rna_seqc check with selected metrics shown + in summary + - include a link to original RNA-SeQC report in check's template - translation from a database composition representation to the npg_tracking::glossary::composition type object - db query for compisition-based tables should include a condition diff --git a/MANIFEST b/MANIFEST index a79558926..5b79d02d3 100644 --- a/MANIFEST +++ b/MANIFEST @@ -264,6 +264,7 @@ lib/npg_qc/Schema/Result/QXYield.pm lib/npg_qc/Schema/Result/RecipeFile.pm lib/npg_qc/Schema/Result/RefMatch.pm lib/npg_qc/Schema/Result/RefSnpInfo.pm +lib/npg_qc/Schema/Result/RnaSeqc.pm lib/npg_qc/Schema/Result/RunAndPair.pm lib/npg_qc/Schema/Result/RunConfig.pm lib/npg_qc/Schema/Result/RunGraph.pm diff --git a/data/schema.txt b/data/schema.txt index 7b29e697f..3e4071ce7 100644 --- a/data/schema.txt +++ b/data/schema.txt @@ -1,8 +1,8 @@ -- MySQL dump 10.13 Distrib 5.5.41, for debian-linux-gnu (x86_64) -- --- Host: npgtest-db Database: npgtest_mg8qc +-- Host: npgtest-db Database: npgtest_rb11 -- ------------------------------------------------------ --- Server version 5.5.31-log +-- Server version 5.7.13-log /*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */; /*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */; @@ -1130,6 +1130,40 @@ CREATE TABLE `ref_snp_info` ( ) ENGINE=InnoDB DEFAULT CHARSET=utf8; /*!40101 SET character_set_client = @saved_cs_client */; +-- +-- Table structure for table `rna_seqc` +-- + +DROP TABLE IF EXISTS `rna_seqc`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `rna_seqc` ( + `id_rna_seqc` bigint(20) unsigned NOT NULL AUTO_INCREMENT COMMENT 'Auto-generated primary key', + `id_seq_composition` bigint(20) unsigned NOT NULL COMMENT 'A foreign key referencing the id_seq_composition column of the seq_composition table', + `info` text, + `rrna` float unsigned DEFAULT NULL, + `rrna_rate` float unsigned DEFAULT NULL, + `exonic_rate` float unsigned DEFAULT NULL, + `expression_profiling_efficiency` float unsigned DEFAULT NULL, + `genes_detected` float unsigned DEFAULT NULL, + `end_1_sense` float unsigned DEFAULT NULL, + `end_1_antisense` float unsigned DEFAULT NULL, + `end_2_sense` float unsigned DEFAULT NULL, + `end_2_antisense` float unsigned DEFAULT NULL, + `end_1_pct_sense` float unsigned DEFAULT NULL, + `end_2_pct_sense` float unsigned DEFAULT NULL, + `mean_per_base_cov` float unsigned DEFAULT NULL, + `mean_cv` float unsigned DEFAULT NULL, + `end_5_norm` float unsigned DEFAULT NULL, + `end_3_norm` float unsigned DEFAULT NULL, + `other_metrics` text, + PRIMARY KEY (`id_rna_seqc`), + UNIQUE KEY `rna_seqc_id_compos_unq` (`id_seq_composition`), + KEY `rna_seqc_compos` (`id_seq_composition`), + CONSTRAINT `rna_seqc_compos` FOREIGN KEY (`id_seq_composition`) REFERENCES `seq_composition` (`id_seq_composition`) ON DELETE NO ACTION ON UPDATE NO ACTION +) ENGINE=InnoDB DEFAULT CHARSET=latin1; +/*!40101 SET character_set_client = @saved_cs_client */; + -- -- Table structure for table `run_and_pair` -- @@ -1697,4 +1731,4 @@ CREATE TABLE `verify_bam_id` ( /*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */; /*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */; --- Dump completed on 2016-02-25 13:46:17 +-- Dump completed on 2016-12-06 11:53:49 diff --git a/lib/npg_qc/Schema/Result/RnaSeqc.pm b/lib/npg_qc/Schema/Result/RnaSeqc.pm new file mode 100644 index 000000000..a2ac66045 --- /dev/null +++ b/lib/npg_qc/Schema/Result/RnaSeqc.pm @@ -0,0 +1,394 @@ + +package npg_qc::Schema::Result::RnaSeqc; + +# Created by DBIx::Class::Schema::Loader +# DO NOT MODIFY THE FIRST PART OF THIS FILE + +##no critic(RequirePodAtEnd RequirePodLinksIncludeText ProhibitMagicNumbers ProhibitEmptyQuotes) + +=head1 NAME + +npg_qc::Schema::Result::RnaSeqc + +=cut + +use strict; +use warnings; + +use Moose; +use MooseX::NonMoose; +use MooseX::MarkAsMethods autoclean => 1; +extends 'DBIx::Class::Core'; + +=head1 ADDITIONAL CLASSES USED + +=over 4 + +=item * L + +=back + +=cut + +use namespace::autoclean; + +=head1 COMPONENTS LOADED + +=over 4 + +=item * L + +=item * L + +=back + +=cut + +__PACKAGE__->load_components('InflateColumn::DateTime', 'InflateColumn::Serializer'); + +=head1 TABLE: C + +=cut + +__PACKAGE__->table('rna_seqc'); + +=head1 ACCESSORS + +=head2 id_rna_seqc + + data_type: 'bigint' + extra: {unsigned => 1} + is_auto_increment: 1 + is_nullable: 0 + +Auto-generated primary key + +=head2 id_seq_composition + + data_type: 'bigint' + extra: {unsigned => 1} + is_foreign_key: 1 + is_nullable: 0 + +A foreign key referencing the id_seq_composition column of the seq_composition table + +=head2 info + + data_type: 'text' + is_nullable: 1 + +=head2 rrna + + data_type: 'float' + extra: {unsigned => 1} + is_nullable: 1 + +=head2 rrna_rate + + data_type: 'float' + extra: {unsigned => 1} + is_nullable: 1 + +=head2 exonic_rate + + data_type: 'float' + extra: {unsigned => 1} + is_nullable: 1 + +=head2 expression_profiling_efficiency + + data_type: 'float' + extra: {unsigned => 1} + is_nullable: 1 + +=head2 genes_detected + + data_type: 'float' + extra: {unsigned => 1} + is_nullable: 1 + +=head2 end_1_sense + + data_type: 'float' + extra: {unsigned => 1} + is_nullable: 1 + +=head2 end_1_antisense + + data_type: 'float' + extra: {unsigned => 1} + is_nullable: 1 + +=head2 end_2_sense + + data_type: 'float' + extra: {unsigned => 1} + is_nullable: 1 + +=head2 end_2_antisense + + data_type: 'float' + extra: {unsigned => 1} + is_nullable: 1 + +=head2 end_1_pct_sense + + data_type: 'float' + extra: {unsigned => 1} + is_nullable: 1 + +=head2 end_2_pct_sense + + data_type: 'float' + extra: {unsigned => 1} + is_nullable: 1 + +=head2 mean_per_base_cov + + data_type: 'float' + extra: {unsigned => 1} + is_nullable: 1 + +=head2 mean_cv + + data_type: 'float' + extra: {unsigned => 1} + is_nullable: 1 + +=head2 end_5_norm + + data_type: 'float' + extra: {unsigned => 1} + is_nullable: 1 + +=head2 end_3_norm + + data_type: 'float' + extra: {unsigned => 1} + is_nullable: 1 + +=head2 other_metrics + + data_type: 'text' + is_nullable: 1 + +=cut + +__PACKAGE__->add_columns( + 'id_rna_seqc', + { + data_type => 'bigint', + extra => { unsigned => 1 }, + is_auto_increment => 1, + is_nullable => 0, + }, + 'id_seq_composition', + { + data_type => 'bigint', + extra => { unsigned => 1 }, + is_foreign_key => 1, + is_nullable => 0, + }, + 'info', + { data_type => 'text', is_nullable => 1 }, + 'rrna', + { data_type => 'float', extra => { unsigned => 1 }, is_nullable => 1 }, + 'rrna_rate', + { data_type => 'float', extra => { unsigned => 1 }, is_nullable => 1 }, + 'exonic_rate', + { data_type => 'float', extra => { unsigned => 1 }, is_nullable => 1 }, + 'expression_profiling_efficiency', + { data_type => 'float', extra => { unsigned => 1 }, is_nullable => 1 }, + 'genes_detected', + { data_type => 'float', extra => { unsigned => 1 }, is_nullable => 1 }, + 'end_1_sense', + { data_type => 'float', extra => { unsigned => 1 }, is_nullable => 1 }, + 'end_1_antisense', + { data_type => 'float', extra => { unsigned => 1 }, is_nullable => 1 }, + 'end_2_sense', + { data_type => 'float', extra => { unsigned => 1 }, is_nullable => 1 }, + 'end_2_antisense', + { data_type => 'float', extra => { unsigned => 1 }, is_nullable => 1 }, + 'end_1_pct_sense', + { data_type => 'float', extra => { unsigned => 1 }, is_nullable => 1 }, + 'end_2_pct_sense', + { data_type => 'float', extra => { unsigned => 1 }, is_nullable => 1 }, + 'mean_per_base_cov', + { data_type => 'float', extra => { unsigned => 1 }, is_nullable => 1 }, + 'mean_cv', + { data_type => 'float', extra => { unsigned => 1 }, is_nullable => 1 }, + 'end_5_norm', + { data_type => 'float', extra => { unsigned => 1 }, is_nullable => 1 }, + 'end_3_norm', + { data_type => 'float', extra => { unsigned => 1 }, is_nullable => 1 }, + 'other_metrics', + { data_type => 'text', is_nullable => 1 }, +); + +=head1 PRIMARY KEY + +=over 4 + +=item * L + +=back + +=cut + +__PACKAGE__->set_primary_key('id_rna_seqc'); + +=head1 UNIQUE CONSTRAINTS + +=head2 C + +=over 4 + +=item * L + +=back + +=cut + +__PACKAGE__->add_unique_constraint('rna_seqc_id_compos_unq', ['id_seq_composition']); + +=head1 RELATIONS + +=head2 seq_composition + +Type: belongs_to + +Related object: L + +=cut + +__PACKAGE__->belongs_to( + 'seq_composition', + 'npg_qc::Schema::Result::SeqComposition', + { id_seq_composition => 'id_seq_composition' }, + { is_deferrable => 1, on_delete => 'NO ACTION', on_update => 'NO ACTION' }, +); + +=head1 L ROLES APPLIED + +=over 4 + +=item * L + +=item * L + +=back + +=cut + + +with 'npg_qc::Schema::Flators', 'npg_qc::autoqc::role::result'; + + +# Created by DBIx::Class::Schema::Loader v0.07046 @ 2017-01-19 11:15:22 +# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:2mZZ7aTIqW6xAZ63EQaX1g + +# You can replace this text with custom code or comments, and it will be preserved on regeneration + +our $VERSION = '0'; + +__PACKAGE__->set_flators4non_scalar(qw( other_metrics info )); + +=head2 seq_component_compositions + +Type: has_many + +Related object: L + +To simplify queries, skip SeqComposition and link directly to the linking table. + +=cut + +__PACKAGE__->has_many( + 'seq_component_compositions', + 'npg_qc::Schema::Result::SeqComponentComposition', + { 'foreign.id_seq_composition' => 'self.id_seq_composition' }, + { cascade_copy => 0, cascade_delete => 0 }, +); + +=head1 SYNOPSIS + +=head1 DESCRIPTION + +Result class definition in DBIx binding for npg-qc database. + +=head1 DIAGNOSTICS + +=head1 CONFIGURATION AND ENVIRONMENT + +=head1 SUBROUTINES/METHODS + +=cut + +=head2 composition + +An lazy-build attribute representing a composition this result +corresponds to. + +=cut + +__PACKAGE__->create_composition_attribute(); + +__PACKAGE__->meta->make_immutable; + +1; + +__END__ + +=head1 DEPENDENCIES + +=over + +=item strict + +=item warnings + +=item Moose + +=item namespace::autoclean + +=item MooseX::NonMoose + +=item MooseX::MarkAsMethods + +=item DBIx::Class::Core + +=item DBIx::Class::InflateColumn::DateTime + +=item DBIx::Class::InflateColumn::Serializer + +=back + +=head1 INCOMPATIBILITIES + +=head1 BUGS AND LIMITATIONS + +=head1 AUTHOR + +Ruben Bautista Erb11@sanger.ac.ukE + +=head1 LICENSE AND COPYRIGHT + +Copyright (C) 2016 GRL + +This file is part of NPG. + +NPG is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +=cut + diff --git a/lib/npg_qc/Schema/Result/SeqComposition.pm b/lib/npg_qc/Schema/Result/SeqComposition.pm index 5267d0616..2bfb655d6 100644 --- a/lib/npg_qc/Schema/Result/SeqComposition.pm +++ b/lib/npg_qc/Schema/Result/SeqComposition.pm @@ -135,6 +135,21 @@ __PACKAGE__->add_unique_constraint('unq_seq_compos_ps', ['id_seq_composition', ' =head1 RELATIONS +=head2 rna_seqc + +Type: might_have + +Related object: L + +=cut + +__PACKAGE__->might_have( + 'rna_seqc', + 'npg_qc::Schema::Result::RnaSeqc', + { 'foreign.id_seq_composition' => 'self.id_seq_composition' }, + { cascade_copy => 0, cascade_delete => 0 }, +); + =head2 samtools_stats Type: has_many @@ -184,8 +199,8 @@ __PACKAGE__->has_many( ); -# Created by DBIx::Class::Schema::Loader v0.07043 @ 2015-09-09 17:35:44 -# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:zzx4SOq+Lh95G58mYLuWWw +# Created by DBIx::Class::Schema::Loader v0.07046 @ 2017-01-19 11:15:22 +# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:sUtyrkK1I4aaxjciaI6mLQ # You can replace this text with custom code or comments, and it will be preserved on regeneration diff --git a/lib/npg_qc/autoqc/checks/rna_seqc.pm b/lib/npg_qc/autoqc/checks/rna_seqc.pm index fb7b32f85..2e25e741f 100644 --- a/lib/npg_qc/autoqc/checks/rna_seqc.pm +++ b/lib/npg_qc/autoqc/checks/rna_seqc.pm @@ -32,8 +32,8 @@ Readonly::Scalar my $METRICS_FILE_NAME => q[metrics.tsv]; Readonly::Scalar my $MINUS_ONE => -1; Readonly::Hash my %RNASEQC_METRICS_FIELDS_MAPPING => { - '3\' Norm' => 'end_3_norm', - '5\' Norm' => 'end_5_norm', + '3\' Norm' => 'end_3_norm', + '5\' Norm' => 'end_5_norm', 'End 1 % Sense' => 'end_1_pct_sense', 'End 1 Antisense' => 'end_1_antisense', 'End 1 Sense' => 'end_1_sense', @@ -55,25 +55,35 @@ has '+aligner' => (default => 'fasta', is => 'ro', writer => '_set_aligner',); -has 'qc_report_dir' => (is => 'ro', - isa => 'NpgTrackingDirectory', - required => 1,); +has 'output_dir' => (is => 'ro', + isa => 'Str', + required => 0, + lazy => 1, + builder => '_build_output_dir',); -has '_java_jar_path' => (is => 'ro', - isa => 'NpgCommonResolvedPathJarFile', - coerce => 1, - default => $RNASEQC_JAR_NAME, - init_arg => undef,); +sub _build_output_dir { + my ($self) = @_; + my $qc_out_path = $self->qc_out; + my $output_dir = File::Spec->catdir($qc_out_path, $self->result->filename_root . q[_rna_seqc]); + return $output_dir; +} + +has '_java_jar_path' => (is => 'ro', + isa => 'NpgCommonResolvedPathJarFile', + coerce => 1, + default => $RNASEQC_JAR_NAME, + init_arg => undef,); has '_ttype_gtf_column' => (is => 'ro', isa => 'Int', default => 2,); -has '_alignments_in_bam' => (is => 'ro', - isa => 'Maybe[Bool]', - lazy_build => 1,); +has '_alignments_in_bam' => (is => 'ro', + isa => 'Bool', + lazy => 1, + builder => '_build_alignments_in_bam',); -sub _build__alignments_in_bam { +sub _build_alignments_in_bam { my $self = shift; my $aligned = 0; my $command = $self->samtools_irods_cmd . ' view -H ' . $self->_bam_file . ' |'; @@ -87,11 +97,12 @@ sub _build__alignments_in_bam { return $aligned; } -has '_is_paired_end' => (is => 'ro', - isa => 'Maybe[Bool]', - lazy_build => 1,); +has '_is_paired_end' => (is => 'ro', + isa => 'Bool', + lazy => 1, + builder => '_build_is_paired_end',); -sub _build__is_paired_end { +sub _build_is_paired_end { my ($self) = @_; my $paired = 0; my $flag; @@ -114,11 +125,12 @@ sub _build__is_paired_end { return $paired; } -has '_is_rna_alignment' => (is => 'ro', - isa => 'Maybe[Bool]', - lazy_build => 1,); +has '_is_rna_alignment' => (is => 'ro', + isa => 'Bool', + lazy => 1, + builder => '_build_is_rna_alignment',); -sub _build__is_rna_alignment { +sub _build_is_rna_alignment { my ($self) = @_; my $rna_alignment = 0; my $command = $self->samtools_irods_cmd . ' view -H ' . $self->_bam_file . ' |'; @@ -133,12 +145,13 @@ sub _build__is_rna_alignment { } -has '_input_str' => (is => 'ro', - isa => 'Str', - lazy_build => 1, - init_arg => undef,); +has '_input_str' => (is => 'ro', + isa => 'Str', + lazy => 1, + builder => '_build_input_str', + init_arg => undef,); -sub _build__input_str { +sub _build_input_str { my ($self) = @_; my $sample_id = $self->lims->sample_id; my $library_name = $self->lims->library_name // $sample_id; @@ -147,40 +160,47 @@ sub _build__input_str { return qq["$library_names[0]|$input_file|$sample_id"]; } -has '_ref_genome' => (is => 'ro', - isa => 'Maybe[Str]', - lazy_build => 1,); +has '_ref_genome' => (is => 'ro', + isa => 'Str', + required => 0, + lazy => 1, + builder => '_build_ref_genome',); -sub _build__ref_genome { +sub _build_ref_genome { my ($self) = @_; my $reference_fasta = $self->refs->[0] // q[]; return $reference_fasta; } -has '_bam_file' => (is => 'ro', - isa => 'NpgTrackingReadableFile', - lazy_build => 1,); +has '_bam_file' => (is => 'ro', + isa => 'NpgTrackingReadableFile', + lazy => 1, + builder => '_build_bam_file',); -sub _build__bam_file { +sub _build_bam_file { my $self = shift; return $self->input_files->[0]; } -has '_annotation_gtf' => (is => 'ro', - isa => 'Maybe[Str]', - lazy_build => 1,); +has '_annotation_gtf' => (is => 'ro', + isa => 'Str', + required => 0, + lazy => 1, + builder => '_build_annotation_gtf',); -sub _build__annotation_gtf { +sub _build_annotation_gtf { my $self = shift; my $trans_gtf = $self->rnaseqc_gtf_file // q[]; return $trans_gtf; } -has '_ref_rrna' => (is => 'ro', - isa => 'Maybe[Str]', - lazy_build => 1,); +has '_ref_rrna' => (is => 'ro', + isa => 'Str', + required => 0, + lazy => 1, + builder => '_build_ref_rrna',); -sub _build__ref_rrna { +sub _build_ref_rrna { my $self = shift; my ($organism, $strain, $transcriptome) = $self->parse_reference_genome($self->lims->reference_genome); $self->_set_aligner($RRNA_ALIGNER); @@ -192,7 +212,8 @@ sub _build__ref_rrna { sub _command { my ($self) = @_; - my ($ref_rrna_option, $single_end_option) = q[]; + my $ref_rrna_option = q[]; + my $single_end_option = q[]; if(!$self->_is_paired_end){ $single_end_option = q[-singleEnd]; } @@ -202,7 +223,7 @@ sub _command { my $command = $self->java_cmd. sprintf q[ -Xmx4000m -XX:+UseSerialGC -XX:-UsePerfData -jar %s -s %s -o %s -r %s -t %s -ttype %d %s %s], $self->_java_jar_path, $self->_input_str, - $self->qc_report_dir, + $self->output_dir, $self->_ref_genome, $self->_annotation_gtf, $self->_ttype_gtf_column, @@ -246,9 +267,7 @@ override 'execute' => sub { return 1; } my $command = $self->_command(); - $self->result->set_info('Command', $command); carp qq[EXECUTING $command time ]. DateTime->now(); - if (system $command) { my $error = $CHILD_ERROR >> $CHILD_ERROR_SHIFT; croak sprintf "Child %s exited with value %d\n", $command, $error; @@ -261,9 +280,9 @@ override 'execute' => sub { sub _parse_metrics { my ($self) = @_; - my $filename = File::Spec->catfile($self->qc_report_dir, $METRICS_FILE_NAME); + my $filename = File::Spec->catfile($self->output_dir, $METRICS_FILE_NAME); if (! -e $filename) { - croak q[Metrics file is not available, cannot parse RNA-SeQC metrics]; + croak qq[No such file $filename: cannot parse RNA-SeQC metrics]; } my $fh = IO::File->new($filename, 'r'); my @lines; @@ -278,9 +297,10 @@ sub _parse_metrics { } my $i = 0; my $results = {}; - foreach(@keys){ + foreach my $key (@keys){ chomp $values[$i]; - $results->{$_} = $values[$i]; + chomp $key; + $results->{$key} = $values[$i]; $i++; } return $results; @@ -292,13 +312,16 @@ sub _save_results { my $value = $results->{$key}; if (defined $value) { my $attr_name = $RNASEQC_METRICS_FIELDS_MAPPING{$key}; - if ($value eq q[?]) { - carp qq[Field $attr_name is set to '?', skipping...]; - } else { - $self->result->$attr_name($value); + if ($value eq q[NaN]) { + carp qq[Value of $attr_name is 'NaN', skipping...]; + } else { + $self->result->$attr_name($value); } } + delete $results->{$key}; } + $self->result->other_metrics($results); + $self->result->output_dir($self->output_dir); return; } __PACKAGE__->meta->make_immutable(); @@ -316,10 +339,11 @@ npg_qc::autoqc::checks::rna_seqc =head1 DESCRIPTION -QC check that runs Broad Institute's RNA-SeQC software over an RNA sample. -Files generated by RNA-SeQC are overwriten everytime it is executed and except -for the directory where the metrics are stored (named after Sample ID) all use -the same names. The user must consider this when passing the value of qc_report_dir. +QC check that runs Broad Institute's RNA-SeQC; a java program which computes a +series of quality control metrics for RNA-seq data. The output consists of +HTML reports and tab delimited files of metrics data from which a selection of +them are extracted to generate an autoqc result. The output directory is +created by default using the sample's filename root. =head1 SUBROUTINES/METHODS diff --git a/lib/npg_qc/autoqc/qc_store.pm b/lib/npg_qc/autoqc/qc_store.pm index db29058bb..e9aa03b2b 100644 --- a/lib/npg_qc/autoqc/qc_store.pm +++ b/lib/npg_qc/autoqc/qc_store.pm @@ -14,8 +14,6 @@ use npg_qc::autoqc::results::collection; our $VERSION = '0'; -Readonly::Scalar my $NON_STORABLE_CHECK => qr/rna_seqc/sm; - ## no critic (Documentation::RequirePodAtEnd Subroutines::ProhibitManyArgs) =head1 NAME @@ -193,7 +191,6 @@ sub run_from_db { my $ti_key = 'tag_index'; foreach my $check_name (@{$c->checks_list()}) { - next if ($check_name =~ $NON_STORABLE_CHECK); my $dbix_query = { 'id_run' => $query->id_run}; if (@{$query->positions}) { $dbix_query->{'position'} = $query->positions; diff --git a/lib/npg_qc/autoqc/results/collection.pm b/lib/npg_qc/autoqc/results/collection.pm index 62d4a107f..cec2a3dc9 100644 --- a/lib/npg_qc/autoqc/results/collection.pm +++ b/lib/npg_qc/autoqc/results/collection.pm @@ -556,7 +556,6 @@ sub check_names { my @check_names = (); my $map = {}; foreach my $check (@{$self->checks_list}) { - if ($check eq 'rna_seqc') { next; } push @check_names, @{$classes->{$check}}; foreach my $name (@{$classes->{$check}}) { $map->{$name} = $check; diff --git a/lib/npg_qc/autoqc/results/rna_seqc.pm b/lib/npg_qc/autoqc/results/rna_seqc.pm index efa1e7691..3185cb2b0 100644 --- a/lib/npg_qc/autoqc/results/rna_seqc.pm +++ b/lib/npg_qc/autoqc/results/rna_seqc.pm @@ -1,17 +1,49 @@ -######### -# Author: Ruben Bautista -# Created: 2015-08-13 -# - package npg_qc::autoqc::results::rna_seqc; use Moose; use namespace::autoclean; +use Readonly; -extends qw(npg_qc::autoqc::results::result); +extends 'npg_qc::autoqc::results::result'; our $VERSION = '0'; -## no critic (Documentation::RequirePodAtEnd) + +Readonly::Array my @ATTRIBUTES => qw/ rrna + rrna_rate + exonic_rate + expression_profiling_efficiency + genes_detected + end_1_sense + end_1_antisense + end_2_sense + end_2_antisense + end_1_pct_sense + end_2_pct_sense + mean_per_base_cov + mean_cv + end_5_norm + end_3_norm + /; + +has [ @ATTRIBUTES ] => ( + is => 'rw', + isa => 'Num', + required => 0,); + +has 'other_metrics' => (isa => 'HashRef[Str]', + is => 'rw', + default => sub { {} }, + required => 0,); + +has 'output_dir' => (is => 'rw', + isa => 'Str', + required => 0,); + +__PACKAGE__->meta->make_immutable; + +1; + +__END__ =head1 NAME @@ -21,157 +53,112 @@ npg_qc::autoqc::results::rna_seqc =head1 DESCRIPTION +A class for wrapping some of the metrics generated by RNA-SeQC. + =head1 SUBROUTINES/METHODS =head2 rrna -rRNA reads are non-duplicate and duplicate reads aligning to rRNA regions as defined in the transcript model definition. +rRNA reads are non-duplicate and duplicate reads aligning to rRNA +regions as defined in the transcript model definition. =cut -has 'rrna' => (isa => 'Maybe[Num]', - is => 'rw', - required => 0,); =head2 rrna_rate Rate of rRNA per total reads. =cut -has 'rrna_rate' => (isa => 'Maybe[Num]', - is => 'rw', - required => 0,); =head2 exonic_rate Fraction mapping within exons. =cut -has 'exonic_rate' => (isa => 'Maybe[Num]', - is => 'rw', - required => 0,); =head2 expression_profiling_efficiency Ratio of exon reads to total reads. =cut -has 'expression_profiling_efficiency' => (isa => 'Maybe[Num]', - is => 'rw', - required => 0,); =head2 genes_detected Number of Genes with at least 5 reads. =cut -has 'genes_detected' => (isa => 'Maybe[Num]', - is => 'rw', - required => 0,); =head2 end_1_sense Number of End 1 reads that were sequenced in the sense direction. =cut -has 'end_1_sense' => (isa => 'Maybe[Num]', - is => 'rw', - required => 0,); =head2 end_1_antisense Number of End 1 reads that were sequenced in the antisense direction. =cut -has 'end_1_antisense' => (isa => 'Maybe[Num]', - is => 'rw', - required => 0,); =head2 end_2_sense Number of End 1 reads that were sequenced in the sense direction. =cut -has 'end_2_sense' => (isa => 'Maybe[Num]', - is => 'rw', - required => 0,); =head2 end_2_antisense Number of End 2 reads that were sequenced in the antisense direction. =cut -has 'end_2_antisense' => (isa => 'Maybe[Num]', - is => 'rw', - required => 0,); =head2 end_1_pct_sense -Percentage of intragenic End 1 reads that were sequenced in the sense direction. +Percentage of intragenic End 1 reads that were sequenced in the sense +direction. =cut -has 'end_1_pct_sense' => (isa => 'Maybe[Num]', - is => 'rw', - required => 0,); =head2 end_2_pct_sense -Percentage of intragenic End 2 reads that were sequenced in the sense direction. +Percentage of intragenic End 2 reads that were sequenced in the sense +direction. =cut -has 'end_2_pct_sense' => (isa => 'Maybe[Num]', - is => 'rw', - required => 0,); =head2 mean_per_base_cov -Mean Per Base Coverage of the middle 1000 expressed transcripts determined to have the highest expression levels. +Mean Per Base Coverage of the middle 1000 expressed transcripts +determined to have the highest expression levels. =cut -has 'mean_per_base_cov' => (isa => 'Maybe[Num]', - is => 'rw', - required => 0,); =head2 mean_cv -Mean Coverage of the middle 1000 expressed transcripts determined to have the highest expression levels. +Mean Coverage of the middle 1000 expressed transcripts determined to +have the highest expression levels. =cut -has 'mean_cv' => (isa => 'Maybe[Num]', - is => 'rw', - required => 0,); =head2 end_5_norm -Norm denotes that the end coverage is divided by the mean coverage for that transcript. +Norm denotes that the end coverage is divided by the mean coverage +for that transcript. =cut -has 'end_5_norm' => (isa => 'Maybe[Num]', - is => 'rw', - required => 0,); =head2 end_3_norm -Norm denotes that the end coverage is divided by the mean coverage for that transcript. +Norm denotes that the end coverage is divided by the mean coverage +for that transcript. =cut -has 'end_3_norm' => (isa => 'Maybe[Num]', - is => 'rw', - required => 0,); =head2 end_5_norm All remaining RNA-SeQC metrics as a key-values pairs =cut -has 'other_metrics' => (isa => 'HashRef', - is => 'rw', - required => 0,); - -__PACKAGE__->meta->make_immutable; - -1; - -__END__ =head1 DIAGNOSTICS @@ -183,6 +170,8 @@ __END__ =item Moose +=item MooseX::StrictConstructor + =item namespace::autoclean =item npg_qc::autoqc::results::result diff --git a/npg_qc_viewer/Changes b/npg_qc_viewer/Changes index 04eb21463..1ecb48c5d 100644 --- a/npg_qc_viewer/Changes +++ b/npg_qc_viewer/Changes @@ -1,6 +1,7 @@ Changes for NPG SeqQC - add composite alternate matches to genotype check display + - add metrics for RNA-SeQC qc check: new template and an "about" entry with information explaining metrics release 20.6 - added norm_fit information @@ -232,7 +233,7 @@ Links within the SeqQC pages are shown as blue arrows - the focus icon (4 outward arrows) to the left of the pool/library name takes you to the page of all lanes/plexes of this library - the focus icon to the left of the run number takes you to the main run page -New ± icon on the headers for lane results and an individual result: +New � icon on the headers for lane results and an individual result: - collapse/expand relevant part of the page. release 8.4 diff --git a/npg_qc_viewer/root/src/about.tt2 b/npg_qc_viewer/root/src/about.tt2 index 6c864fa19..a80cb4587 100644 --- a/npg_qc_viewer/root/src/about.tt2 +++ b/npg_qc_viewer/root/src/about.tt2 @@ -13,18 +13,19 @@

Currently implemented checksLink to information about qc checks

    -
  • qX yield (threshold quality 20)
  • -
  • insert size
  • adapter
  • -
  • ref_match
  • -
  • split statistics
  • -
  • sequence mismatch
  • -
  • gc fraction
  • -
  • qc bias
  • -
  • tag metrics (formely tag decode stats)
  • bam flag stats
  • fastqcheck files rendering as heatmaps
  • +
  • gc fraction
  • genotype - sample ID check (Sequenom genotypes)
  • +
  • insert size
  • +
  • qc bias
  • +
  • qX yield (threshold quality 20)
  • +
  • ref_match
  • +
  • rna seqc (subset)
  • +
  • sequence mismatch
  • +
  • split statistics
  • +
  • tag metrics (formely tag decode stats)
  • upstream tags - check for contamination of tag#0 BAM file
  • verify BAM ID - check for sample contamination
diff --git a/npg_qc_viewer/root/src/about_qc_checks.tt2 b/npg_qc_viewer/root/src/about_qc_checks.tt2 index cd9441d75..58e33d05c 100644 --- a/npg_qc_viewer/root/src/about_qc_checks.tt2 +++ b/npg_qc_viewer/root/src/about_qc_checks.tt2 @@ -10,25 +10,6 @@
- -

Tag metrics

-

Description

-

This check displays statistics about decoding of the index read. The semantics of the summary table data differs for pools and individual libraries.

-

For a pool the overall decoding percent is displayed, followed by the coefficient of variance that characterises the uniformity of tag distribution in a pool. The background of the table cell is blue if the overall decoding rate is over 80%, for lesser values the background is red.

-

For an individual library (tag), the decoding percent for this library within a pool is displayed. If the number of reads for this tag is at least one tenth the average number of reads per tag for this pool, the background is grey; otherwise, the background is red.

-
-

Coefficient of variance calculation:

-

mean = Σ(xi) ⁄ N
- rms = √( Σ(xi-mean)² ⁄ (N-1))
- coef_of_var = rms/mean * 100

-
- - -

Upstream Tags Check

-

Description

-Check for contamination of tag#0 BAM file by reads from upstream runs -

-

Genotype check

@@ -138,6 +119,46 @@ Any close genotype matches found under a different sample name, allowing homozyg
+ +

RNA-SeQC metrics

+

Description

+

Subset of metrics obtained from Broad Institute's RNA-SeQC package. The following summary statistics are calculated by counting the number of reads that have the given characteristics.

+
+

Mapped Reads

+

rRNA reads are non-duplicate and duplicate reads aligning to rRNA regions as defined in the transcript model definition. rRNA Rate is per total reads.

+
+

Transcript-associated Reads

+

Rates are per mapped read. Exonic Rate is the fraction mapping within exons. Expression Profile Efficiency is the ratio of exon reads to total reads. Transcripts/Genes Detected is the number of transcripts/Genes with at least 5 reads.

+
+

Strand Specificity

+

End 1/2 Sense are the number of End 1 or 2 reads that were sequenced in the sense direction. Similarly, End 1/2 Antisense are the number of End 1 or 2 reads that were sequenced in the antisense direction.End 1/2 Sense % are percentages of intragenic End 1/2 reads that were sequenced in the sense direction.

+
+

Coverage Metrics Highest 1000 Expressed Transcripts

+

The metrics in this table are calculated across the transcripts that were determined to have the highest expression levels. 5' and 3' values are per-base coverage averaged across all top transcripts. 5' and 3' ends are 200 base pairs. Gap % is the total cumulative gap length divided by the total cumulative transcript lengths.

+
+
+ + +

Tag metrics

+

Description

+

This check displays statistics about decoding of the index read. The semantics of the summary table data differs for pools and individual libraries.

+

For a pool the overall decoding percent is displayed, followed by the coefficient of variance that characterises the uniformity of tag distribution in a pool. The background of the table cell is blue if the overall decoding rate is over 80%, for lesser values the background is red.

+

For an individual library (tag), the decoding percent for this library within a pool is displayed. If the number of reads for this tag is at least one tenth the average number of reads per tag for this pool, the background is grey; otherwise, the background is red.

+
+

Coefficient of variance calculation:

+

mean = Σ(xi) ⁄ N
+ rms = √( Σ(xi-mean)² ⁄ (N-1))
+ coef_of_var = rms/mean * 100

+
+ + +

Upstream Tags Check

+

Description

+Check for contamination of tag#0 BAM file by reads from upstream runs +

+ +
+

VerifyBamID contamination check



diff --git a/npg_qc_viewer/root/src/ui_checks/checks_in_full.tt2 b/npg_qc_viewer/root/src/ui_checks/checks_in_full.tt2 index cd82a3d83..37111efae 100644 --- a/npg_qc_viewer/root/src/ui_checks/checks_in_full.tt2 +++ b/npg_qc_viewer/root/src/ui_checks/checks_in_full.tt2 @@ -3,7 +3,7 @@ lane_collection = rl_map.${lane_key}; NEXT IF (!lane_collection || lane_collection.size == 0); temp = lane_collection.sort; - to_remove = ['split stats phix', 'spatial filter', 'split stats', 'rna seqc']; + to_remove = ['split stats phix', 'spatial filter', 'split stats']; IF !run_view; to_remove.push('qX yield', 'gc fraction'); END; diff --git a/npg_qc_viewer/root/src/ui_checks/rna_seqc.tt2 b/npg_qc_viewer/root/src/ui_checks/rna_seqc.tt2 new file mode 100644 index 000000000..842d3730e --- /dev/null +++ b/npg_qc_viewer/root/src/ui_checks/rna_seqc.tt2 @@ -0,0 +1,73 @@ +[% USE Number.Format(THOUSANDS_SEP=',') %] + +[% IF !check.genes_detected.defined; %] +
+

Reason: not an RNA alignment

+
+[% ELSE %] +
mapped reads
+
+ + + + + +
rRNArRNA rate
[% IF check.rrna.defined; check.rrna | format_number; END %][% IF check.rrna_rate.defined; check.rrna_rate | format_number(3,1); END %]
+
+
transcript-associated reads
+
+ + + + + + +
exonic rateexpression profiling efficiencytranscripts/genes detected
[% IF check.exonic_rate.defined; check.exonic_rate | format_number(3,1); END %][% IF check.expression_profiling_efficiency.defined; check.expression_profiling_efficiency | format_number(3,1); END %][% IF check.genes_detected.defined; check.genes_detected | format_number; END %]
+
+
strand specificity
+
+ + + + + + + + + +
end 1 senseend 1 antisenseend 2 senseend 2 antisenseend 1 % senseend 2 % sense
[% IF check.end_1_sense.defined; check.end_1_sense | format_number; END %][% IF check.end_1_antisense.defined; check.end_1_antisense | format_number; END %][% IF check.end_2_sense.defined; check.end_2_sense | format_number; END %][% IF check.end_2_antisense.defined; check.end_2_antisense | format_number; END %][% IF check.end_1_pct_sense.defined; check.end_1_pct_sense | format_number(3,1); END %][% IF check.end_2_pct_sense.defined; check.end_2_pct_sense | format_number(3,1); END %]
+
+
coverage metrics
+
+ + + + + + + +
mean per base cov.mean CVno. covered 3' (norm)no. covered 5' (norm)
[% IF check.mean_per_base_cov.defined; check.mean_per_base_cov | format_number(2,1); END %][% IF check.mean_cv.defined; check.mean_cv | format_number(2,1); END %][% IF check.end_3_norm.defined; check.end_3_norm | format_number(3,1); END %][% IF check.end_5_norm.defined; check.end_5_norm | format_number(3,1); END %]
+
+
+[% IF check.id_run.defined; + IF c.model('NpgDB').resultset('Run').find(id_run).is_tag_set('staging'); + base = base_url _ '/cgi-bin/locate_runfolder'; + run_folder_glob = id_run; + npg_run_row = c.model('NpgDB').resultset('Run').find(id_run); + IF npg_run_row; + rfglob = npg_run_row.folder_path_glob; + rfname = npg_run_row.folder_name; + IF rfglob && rfname; + run_folder_glob = rfglob _ rfname; + END; + END; + rna_seqc_output_dir = check.id_run _ '_' _ check.position; + IF check.tag_index.defined; + rna_seqc_output_dir = rna_seqc_output_dir _ '%23' _ check.tag_index; + END; + rna_seqc_output_dir = rna_seqc_output_dir _ '_rna_seqc' %] +

Other RNA-SeQC metrics.

+[% END; + END %] +
+[% END %] diff --git a/npg_qc_viewer/root/src/ui_lanes/checks_header.tt2 b/npg_qc_viewer/root/src/ui_lanes/checks_header.tt2 index aba5deb33..bca40eaea 100644 --- a/npg_qc_viewer/root/src/ui_lanes/checks_header.tt2 +++ b/npg_qc_viewer/root/src/ui_lanes/checks_header.tt2 @@ -1,24 +1,25 @@ [%- labels = { - qX_yield = 'yield,
Kb' - insert_size = 'quartiles,
bases' adapter = 'adapters,
%' - split_stats = 'aligned
reads,
%' + alignment_filter_metrics = 'target, %' + bam_flagstats = 'mapped %
duplicates %' contamination = 'top two' + gc_bias = 'plot created
' + gc_fraction = 'fraction,
%' + genotype = 'match
mean cvg.
depth
' + insert_size = 'quartiles,
bases' + pulldown_metrics = 'coverage at 20X, %
mean depth per Gb
on bait bases, %
on target bases, %' + qX_yield = 'yield,
Kb' + rna_seqc = 'exonic rate,
rRNA Rate,
mean CV' ref_match = 'top two' sequence_error = 'average
mismatch,
%' + spatial_filter = 'filter fail,
total' + split_stats = 'aligned
reads,
%' tag_decode_stats = 'decode rate %
CV %' - gc_fraction = 'fraction,
%' - gc_bias = 'plot created
' - bam_flagstats = 'mapped %
duplicates %' - genotype = 'match
mean cvg.
depth
' tag_metrics = 'decode rate, %
CV %' - pulldown_metrics = 'coverage at 20X, %
mean depth per Gb
on bait bases, %
on target bases, %' - alignment_filter_metrics = 'target, %' - spatial_filter = 'filter fail,
total' - upstream_tags = 'tag0 reads (%)
perf.match reads (%)' - varify_bam_id = 'pass' - tags_reporters = '' + tags_reporters = '' + upstream_tags = 'tag0 reads (%)
perf.match reads (%)' + varify_bam_id = 'pass' } %] Lane
No diff --git a/npg_qc_viewer/root/src/ui_lanes/lane.tt2 b/npg_qc_viewer/root/src/ui_lanes/lane.tt2 index 66a248921..8fcb8ef38 100644 --- a/npg_qc_viewer/root/src/ui_lanes/lane.tt2 +++ b/npg_qc_viewer/root/src/ui_lanes/lane.tt2 @@ -428,4 +428,21 @@ END [% result.freemix %]
[%- END -%] +[% BLOCK rna_seqc %] +[% IF result.exonic_rate.defined %] + [% result.exonic_rate | format_number(3,1) %] +[% ELSE %] + na +[% END %] +[% IF result.rrna_rate.defined %] + [% result.rrna_rate | format_number(3,1) %] +[% ELSE %] + na +[% END %] +[% IF result.mean_cv.defined %] + [% result.mean_cv | format_number(2,1) %] +[% ELSE %] + na +[% END %] +[% END %] diff --git a/scripts/npgqc_dbix_schema_loader.pl b/scripts/npgqc_dbix_schema_loader.pl index e0ea62ad6..6cf3d2a1d 100755 --- a/scripts/npgqc_dbix_schema_loader.pl +++ b/scripts/npgqc_dbix_schema_loader.pl @@ -8,6 +8,7 @@ use npg_qc::autoqc::results::collection; use npg_qc::autoqc::role::result; +use npg_qc::autoqc::results::collection; our $VERSION = '0'; diff --git a/scripts/upgrade_schema/upgrade_schema-60.x b/scripts/upgrade_schema/upgrade_schema-60.x new file mode 100644 index 000000000..d594287b2 --- /dev/null +++ b/scripts/upgrade_schema/upgrade_schema-60.x @@ -0,0 +1,34 @@ +-- +-- Table structure for table `rna_seqc` +-- + +CREATE TABLE `rna_seqc` ( + `id_rna_seqc` bigint(20) unsigned NOT NULL AUTO_INCREMENT COMMENT 'Auto-generated primary key', + `id_seq_composition` bigint(20) unsigned NOT NULL COMMENT 'A foreign key referencing the id_seq_composition column of the seq_composition table', + `info` text, + `rrna` float unsigned DEFAULT NULL, + `rrna_rate` float unsigned DEFAULT NULL, + `exonic_rate` float unsigned DEFAULT NULL, + `expression_profiling_efficiency` float unsigned DEFAULT NULL, + `genes_detected` float unsigned DEFAULT NULL, + `end_1_sense` float unsigned DEFAULT NULL, + `end_1_antisense` float unsigned DEFAULT NULL, + `end_2_sense` float unsigned DEFAULT NULL, + `end_2_antisense` float unsigned DEFAULT NULL, + `end_1_pct_sense` float unsigned DEFAULT NULL, + `end_2_pct_sense` float unsigned DEFAULT NULL, + `mean_per_base_cov` float unsigned DEFAULT NULL, + `mean_cv` float unsigned DEFAULT NULL, + `end_5_norm` float unsigned DEFAULT NULL, + `end_3_norm` float unsigned DEFAULT NULL, + `other_metrics` text, + PRIMARY KEY (`id_rna_seqc`), + UNIQUE KEY `rna_seqc_id_compos_unq` (`id_seq_composition`), + KEY `rna_seqc_compos` (`id_seq_composition`), + CONSTRAINT `rna_seqc_compos` FOREIGN KEY (`id_seq_composition`) REFERENCES `seq_composition` (`id_seq_composition`) ON DELETE NO ACTION ON UPDATE NO ACTION +) ENGINE=InnoDB DEFAULT CHARSET=latin1; +-- +-- +-- +GRANT SELECT ON `rna_seqc` TO nqcro; + diff --git a/t/50-schema-result-RnaSeqc.t b/t/50-schema-result-RnaSeqc.t new file mode 100644 index 000000000..2a5c9aee1 --- /dev/null +++ b/t/50-schema-result-RnaSeqc.t @@ -0,0 +1,56 @@ +use strict; +use warnings; +use Test::More tests => 2; +use Test::Exception; +use File::Temp qw/ tempdir /; +use Cwd qw/getcwd abs_path/; +use Archive::Extract; +use Perl6::Slurp; +use JSON; +use npg_testing::db; + +use_ok('npg_qc::Schema::Result::RnaSeqc'); + + +my $schema = Moose::Meta::Class->create_anon_class( + roles => [qw/npg_testing::db/]) + ->new_object({})->create_test_db(q[npg_qc::Schema]); + +my $tempdir = tempdir( CLEANUP => 1); +my $repos = getcwd . q[/t/data/autoqc/rna_seqc]; +my $archive = join q[/], $repos, q[data]; + +my $rs = $schema->resultset('RnaSeqc'); +my $rc = $rs->result_class; + +sub _get_data { + my $file_name = shift; + my $json = slurp join(q[/], $archive, $file_name); + my $values = from_json($json); + foreach my $key (keys %{$values}) { + if (!$rc->has_column($key)) { + delete $values->{$key}; + } + } + return $values; +} + +subtest 'load results with a composition fk' => sub { + plan tests => 4; + + my $values = _get_data('18407_1#7.rna_seqc.json'); + my $fk_row = $schema->resultset('SeqComposition')->create({digest => '45678', size => 2}); + + my $object = $rs->new_result($values); + isa_ok($object, 'npg_qc::Schema::Result::RnaSeqc'); + throws_ok {$object->insert()} + qr/NOT NULL constraint failed: rna_seqc.id_seq_composition/, + 'foreign key referencing the composition table absent - error'; + + $object->id_seq_composition($fk_row->id_seq_composition); + lives_ok { $object->insert() } 'insert with fk is ok'; + my $a_rs = $rs->search({}); + is ($a_rs->count, 1, q[one row created in the table]); +}; + +1; \ No newline at end of file diff --git a/t/60-autoqc-checks-rna_seqc.t b/t/60-autoqc-checks-rna_seqc.t index 917f00998..a5c491d79 100644 --- a/t/60-autoqc-checks-rna_seqc.t +++ b/t/60-autoqc-checks-rna_seqc.t @@ -1,8 +1,9 @@ use strict; use warnings; use Cwd qw/getcwd abs_path/; -use Test::More tests => 17; +use Test::More tests => 5; use Test::Exception; +use Test::Warn; use File::Temp qw/ tempdir /; use_ok ('npg_qc::autoqc::checks::rna_seqc'); @@ -19,47 +20,73 @@ my $repos = getcwd . '/t/data/autoqc/rna_seqc'; `touch $dir/RNA-SeQC.jar`; -{ +subtest 'Find CLASSPATH' => sub { + plan tests => 3; my $rnaseqc = npg_qc::autoqc::checks::rna_seqc->new( id_run => 17550, position => 3, tag_index => 8, path => 't/data/autoqc/rna_seqc/data', - repository => $repos, - qc_report_dir => q[t/data],); + repository => $repos,); isa_ok ($rnaseqc, 'npg_qc::autoqc::checks::rna_seqc'); lives_ok { $rnaseqc->result; } 'result object created'; local $ENV{CLASSPATH} = q[]; - throws_ok {npg_qc::autoqc::checks::rna_seqc->new(id_run => 2, path => q[mypath], position => 1, qc_report_dir => q[t/data])} + throws_ok {npg_qc::autoqc::checks::rna_seqc->new(id_run => 2, path => q[mypath], position => 1,)} qr/Can\'t find \'RNA-SeQC\.jar\' because CLASSPATH is not set/, q[Fails to create object when RNA-SeQC.jar not found]; -} +}; -{ +subtest 'Input and output paths' => sub { + plan tests => 3; throws_ok { my $qc = npg_qc::autoqc::checks::rna_seqc->new( id_run => 17550, position => 3, tag_index => 8, path => q[nonexisting], - repository => $repos, - qc_report_dir => q[t/data],); + repository => $repos,); $qc->execute() } qr/directory nonexisting does not exist/, 'execute: error on nonexisting path'; -} - -{ + my $run = 17550; + my $pos = 3; + my $tag = 13; my $check = npg_qc::autoqc::checks::rna_seqc->new( - id_run => 17550, - position => 3, - tag_index => 13, + id_run => $run, + position => $pos, + tag_index => $tag, path => 't/data/autoqc/rna_seqc/data', - repository => $repos, - qc_report_dir => q[t/data],); + repository => $repos); lives_ok { $check->execute } 'no error when input not found'; -} + my $filename_root = $check->result->filename_root; + my $output_dir_shouldbe = join q[/], $check->path, $filename_root.q[_rna_seqc]; + is($check->output_dir, $output_dir_shouldbe, q[output directory is formed correctly]); +}; -{ +subtest 'Parse metrics' => sub { + plan tests => 4; + my $rnaseqc = npg_qc::autoqc::checks::rna_seqc->new( + id_run => 17550, + position => 3, + tag_index => 8, + path => 't/data/autoqc/rna_seqc/data', + repository => $repos,); + my $metrics_hash; + my $results_hash; + throws_ok {$rnaseqc->_parse_metrics()} qr/No\ such\ file\ t\/data\/autoqc\/rna_seqc\/data\/17550\_3\#8\_rna\_seqc\/metrics\.tsv\:\ cannot\ parse\ RNA-SeQC\ metrics/, + 'error if metrics file is not found where expected'; + $rnaseqc = npg_qc::autoqc::checks::rna_seqc->new( + id_run => 18407, + position => 1, + tag_index => 7, + path => 't/data/autoqc/rna_seqc/data', + repository => $repos,); + lives_ok {$metrics_hash = $rnaseqc->_parse_metrics()} q[parsing RNA-SeQC metrics.tsv ok]; + warning_like {$results_hash = $rnaseqc->_save_results($metrics_hash)} {carped => qr/Value of .* is 'NaN'/}, q[saving results ok - a NaN carp was caught]; + is ($results_hash->{'end_3_norm'}, undef, q[fields with value NaN are skipped]); +}; + +subtest 'Argument input files' => sub { + plan tests => 11; my $ref_repos_dir = join q[/],$dir,'references'; my $ref_dir = join q[/], $ref_repos_dir,'Mus_musculus','GRCm38','all'; `mkdir -p $ref_dir/fasta`; @@ -85,8 +112,7 @@ my $repos = getcwd . '/t/data/autoqc/rna_seqc'; repository => $repos, ref_repository => $ref_repos_dir, transcriptome_repository => $trans_repos_dir, - _alignments_in_bam => 0, - qc_report_dir => q[t/data],); + _alignments_in_bam => 0); is($check->_bam_file, 't/data/autoqc/rna_seqc/data/17550_3#8.bam', 'bam file path for id run 17550 lane 3 tag 8'); lives_ok { $check->execute } 'execution ok for no alignments in BAM'; like ($check->result->comments, qr/BAM file is not aligned/, 'comment when bam file is not aligned'); @@ -97,7 +123,6 @@ my $repos = getcwd . '/t/data/autoqc/rna_seqc'; tag_index => 8, path => 't/data/autoqc/rna_seqc/data', repository => $repos, - qc_report_dir => q[t/data], _ref_genome => q[], transcriptome_repository => $trans_repos_dir,); lives_ok { $check->execute } 'execution ok for no reference genome file'; @@ -109,7 +134,6 @@ my $repos = getcwd . '/t/data/autoqc/rna_seqc'; tag_index => 8, path => 't/data/autoqc/rna_seqc/data', repository => $repos, - qc_report_dir => q[t/data], _annotation_gtf => q[], ref_repository => $ref_repos_dir,); lives_ok { $check->execute } 'execution ok for no annotation file'; @@ -125,7 +149,6 @@ my $repos = getcwd . '/t/data/autoqc/rna_seqc'; tag_index => 1, path => 't/data/autoqc/rna_seqc/data', repository => $repos, - qc_report_dir => q[t/data], ref_repository => $ref_repos_dir, transcriptome_repository => $trans_repos_dir,); throws_ok { $check->execute } qr/Binary fasta reference for Danio_rerio, zv9, all does not exist/, @@ -142,11 +165,10 @@ my $repos = getcwd . '/t/data/autoqc/rna_seqc'; path => 't/data/autoqc/rna_seqc/data', repository => $repos, ref_repository => $ref_repos_dir, - transcriptome_repository => $trans_repos_dir, - qc_report_dir => q[t/data],); + transcriptome_repository => $trans_repos_dir,); is($check->_bam_file, 't/data/autoqc/rna_seqc/data/17550_1#1.bam', 'bam file path for id run 17550 lane 1 tag 1'); lives_ok { $check->execute } 'execution ok for no RNA alignment'; like ($check->result->comments, qr/BAM file is not RNA alignment/, 'comment when bam file is not RNA alignment'); -} +}; 1; diff --git a/t/60-autoqc-results-rna_seqc.t b/t/60-autoqc-results-rna_seqc.t index 0e29f9d92..985dc6822 100644 --- a/t/60-autoqc-results-rna_seqc.t +++ b/t/60-autoqc-results-rna_seqc.t @@ -1,15 +1,24 @@ use strict; use warnings; -use Test::More tests => 4; +use Test::More tests => 3; use Test::Exception; use_ok ('npg_qc::autoqc::results::rna_seqc'); -{ - my $r = npg_qc::autoqc::results::rna_seqc->new(id_run => 12, position => 3, path => q[mypath]); +subtest 'Loading check' => sub { + plan tests => 4; + my $r = npg_qc::autoqc::results::rna_seqc->new(id_run => 18407, position => 1, tag_index => 7, path => q[mypath]); isa_ok ($r, 'npg_qc::autoqc::results::rna_seqc'); is($r->check_name(), 'rna seqc', 'check name'); is($r->class_name(), 'rna_seqc', 'class name'); -} + is ($r->filename4serialization(), '18407_1#7.rna_seqc.json', 'default file name'); +}; + +subtest 'Testing utility methods' => sub { + plan tests => 2; + my $r; + lives_ok {$r = npg_qc::autoqc::results::rna_seqc->load('t/data/autoqc/rna_seqc/data/15911_1#1.rna_seqc.json');} 'load serialised empty result'; + lives_ok {$r = npg_qc::autoqc::results::rna_seqc->load('t/data/autoqc/rna_seqc/data/18407_1#7.rna_seqc.json');} 'load serialised valid result'; +}; 1; \ No newline at end of file diff --git a/t/data/autoqc/rna_seqc/data/15911_1#1.rna_seqc.json b/t/data/autoqc/rna_seqc/data/15911_1#1.rna_seqc.json new file mode 100644 index 000000000..fdcbad542 --- /dev/null +++ b/t/data/autoqc/rna_seqc/data/15911_1#1.rna_seqc.json @@ -0,0 +1 @@ +{"__CLASS__":"npg_qc::autoqc::results::rna_seqc","comments":"BAM file is not RNA alignment","composition":{"__CLASS__":"npg_tracking::glossary::composition-85.5","components":[{"__CLASS__":"npg_tracking::glossary::composition::component::illumina-85.5","id_run":15911,"position":1,"subset":"all","tag_index":1}]},"id_run":15911,"info":{"Check":"npg_qc::autoqc::checks::rna_seqc","Check_version":"0","Jar":"RNA-SeqQC RNA-SeQC.jar"},"other_metrics":{},"path":"data","position":1,"tag_index":1} \ No newline at end of file diff --git a/t/data/autoqc/rna_seqc/data/18407_1#7.rna_seqc.json b/t/data/autoqc/rna_seqc/data/18407_1#7.rna_seqc.json new file mode 100644 index 000000000..a57a854db --- /dev/null +++ b/t/data/autoqc/rna_seqc/data/18407_1#7.rna_seqc.json @@ -0,0 +1 @@ +{"__CLASS__":"npg_qc::autoqc::results::rna_seqc","composition":{"__CLASS__":"npg_tracking::glossary::composition-85.4","components":[{"__CLASS__":"npg_tracking::glossary::composition::component::illumina-85.4","id_run":18407,"position":1,"tag_index":7}]},"end_1_antisense":1530,"end_1_pct_sense":6.9908814,"end_1_sense":115,"end_2_antisense":95,"end_2_pct_sense":93.716934,"end_2_sense":1417,"end_3_norm":1.0583231,"end_5_norm":0.7867761,"exonic_rate":0.001790527,"expression_profiling_efficiency":0.0014548181,"genes_detected":12,"id_run":18407,"info":{"Check":"npg_qc::autoqc::checks::rna_seqc","Check_version":"0","Jar":"RNA-SeqQC RNA-SeQC.jar"},"mean_cv":1.4219271,"mean_per_base_cov":2.9021204,"other_metrics":{"Alternative Aligments":"23685","Base Mismatch Rate":"0.001572254","Chimeric Pairs":"9098","Cumul. Gap Length":"62246","Duplication Rate of Mapped":"0.041722357","End 1 Mapping Rate":"0.84337074","End 1 Mismatch Rate":"0.0015389597","End 2 Mapping Rate":"0.78164583","End 2 Mismatch Rate":"0.0016081773","Estimated Library Size":"58717226","Failed Vendor QC Check":"0","Fragment Length Mean":"147","Fragment Length StdDev":"70","Gap %":"0.4247801","Intergenic Rate":"0.99788785","Intragenic Rate":"0.0021107893","Intronic Rate":"3.202623E-4","Mapped":"1495649","Mapped Pairs":"660763","Mapped Unique":"1433247","Mapped Unique Rate of Total":"0.7786085","Mapping Rate":"0.8125083","No. Covered 5'":"21","Note":"2477423","Num. Gaps":"227","Read Length":"150","Sample":"15358794","Split Reads":"50","Total Purity Filtered Reads Sequenced":"1840780","Transcripts Detected":"28","Unique Rate of Mapped":"0.95827764","Unpaired Reads":"0"},"path":"data","position":1,"rrna":4257,"rrna_rate":0.0023126067,"tag_index":7} diff --git a/t/data/autoqc/rna_seqc/data/18407_1#7_rna_seqc/metrics.tsv b/t/data/autoqc/rna_seqc/data/18407_1#7_rna_seqc/metrics.tsv new file mode 100644 index 000000000..02d53d585 --- /dev/null +++ b/t/data/autoqc/rna_seqc/data/18407_1#7_rna_seqc/metrics.tsv @@ -0,0 +1,2 @@ +Sample Note End 2 Mapping Rate Chimeric Pairs Intragenic Rate Num. Gaps Exonic Rate Mapping Rate 5' Norm Genes Detected Unique Rate of Mapped 3' Norm Read Length Mean Per Base Cov. End 1 Mismatch Rate Fragment Length StdDev Estimated Library Size Mapped Intergenic Rate Total Purity Filtered Reads Sequenced rRNA Failed Vendor QC Check Mean CV Transcripts Detected Mapped Pairs Cumul. Gap Length Gap % Unpaired Reads Intronic Rate Mapped Unique Rate of Total Expression Profiling Efficiency Mapped Unique End 2 Mismatch Rate End 2 Antisense Alternative Aligments End 2 Sense Fragment Length Mean End 1 Antisense Split Reads Base Mismatch Rate End 1 Sense End 1 % Sense rRNA rate End 1 Mapping Rate No. Covered 5' Duplication Rate of Mapped End 2 % Sense +15358794 2477423 0.78164583 9098 NaN 227 0.001790527 0.8125083 0.7867761 12 0.95827764 NaN 150 2.9021204 0.0015389597 70 58717226 1495649 NaN 1840780 4257 0 1.4219271 28 660763 62246 0.4247801 0 3.202623E-4 0.7786085 0.0014548181 1433247 0.0016081773 95 23685 1417 147 1530 50 0.001572254 115 6.9908814 0.0023126067 0.84337074 21 0.041722357 93.716934