From 5bdd09027d869151393f6ec73486d47d592a152a Mon Sep 17 00:00:00 2001 From: "Ruben E. Bautista" Date: Mon, 10 Aug 2015 14:37:36 +0100 Subject: [PATCH 01/24] add Eclipse's .project file to .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index e84d919fc..6afe23746 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ blib/ src/*/build/* *~ bower_components +.project From 87ad3df4218abc8f94a647cd7416f643f5c7753b Mon Sep 17 00:00:00 2001 From: "Ruben E. Bautista" Date: Tue, 25 Oct 2016 10:03:38 +0100 Subject: [PATCH 02/24] Initialise undef variables - Done some code reformatting --- lib/npg_qc/autoqc/checks/rna_seqc.pm | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/lib/npg_qc/autoqc/checks/rna_seqc.pm b/lib/npg_qc/autoqc/checks/rna_seqc.pm index fb7b32f85..5a34bee14 100644 --- a/lib/npg_qc/autoqc/checks/rna_seqc.pm +++ b/lib/npg_qc/autoqc/checks/rna_seqc.pm @@ -32,8 +32,8 @@ Readonly::Scalar my $METRICS_FILE_NAME => q[metrics.tsv]; Readonly::Scalar my $MINUS_ONE => -1; Readonly::Hash my %RNASEQC_METRICS_FIELDS_MAPPING => { - '3\' Norm' => 'end_3_norm', - '5\' Norm' => 'end_5_norm', + '3\' Norm' => 'end_3_norm', + '5\' Norm' => 'end_5_norm', 'End 1 % Sense' => 'end_1_pct_sense', 'End 1 Antisense' => 'end_1_antisense', 'End 1 Sense' => 'end_1_sense', @@ -59,11 +59,11 @@ has 'qc_report_dir' => (is => 'ro', isa => 'NpgTrackingDirectory', required => 1,); -has '_java_jar_path' => (is => 'ro', - isa => 'NpgCommonResolvedPathJarFile', - coerce => 1, - default => $RNASEQC_JAR_NAME, - init_arg => undef,); +has '_java_jar_path' => (is => 'ro', + isa => 'NpgCommonResolvedPathJarFile', + coerce => 1, + default => $RNASEQC_JAR_NAME, + init_arg => undef,); has '_ttype_gtf_column' => (is => 'ro', isa => 'Int', @@ -192,7 +192,8 @@ sub _build__ref_rrna { sub _command { my ($self) = @_; - my ($ref_rrna_option, $single_end_option) = q[]; + my $ref_rrna_option = q[]; + my $single_end_option = q[]; if(!$self->_is_paired_end){ $single_end_option = q[-singleEnd]; } @@ -246,9 +247,7 @@ override 'execute' => sub { return 1; } my $command = $self->_command(); - $self->result->set_info('Command', $command); carp qq[EXECUTING $command time ]. DateTime->now(); - if (system $command) { my $error = $CHILD_ERROR >> $CHILD_ERROR_SHIFT; croak sprintf "Child %s exited with value %d\n", $command, $error; @@ -294,11 +293,13 @@ sub _save_results { my $attr_name = $RNASEQC_METRICS_FIELDS_MAPPING{$key}; if ($value eq q[?]) { carp qq[Field $attr_name is set to '?', skipping...]; - } else { - $self->result->$attr_name($value); + } else { + $self->result->$attr_name($value); } } + delete $results->{$key}; } + $self->result->other_metrics($results); return; } __PACKAGE__->meta->make_immutable(); From c3b3b07644d1d01de4e61b8d02d09fa194af23cb Mon Sep 17 00:00:00 2001 From: "Ruben E. Bautista" Date: Tue, 25 Oct 2016 10:08:09 +0100 Subject: [PATCH 03/24] Use of npg_qc::autoqc::autoqc->checks_list is deprecated - Use npg_qc::autoqc::results::collection instead to iterate through a list of available results --- scripts/npgqc_dbix_schema_loader.pl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/npgqc_dbix_schema_loader.pl b/scripts/npgqc_dbix_schema_loader.pl index ad3bf5128..b3df1e67b 100755 --- a/scripts/npgqc_dbix_schema_loader.pl +++ b/scripts/npgqc_dbix_schema_loader.pl @@ -8,6 +8,7 @@ use npg_qc::autoqc::autoqc; use npg_qc::autoqc::role::result; +use npg_qc::autoqc::results::collection; our $VERSION = '0'; @@ -26,8 +27,9 @@ my $generic_role = $role_base . 'result'; my $component = 'InflateColumn::Serializer'; my $flator = 'npg_qc::Schema::Flators'; +my $results = npg_qc::autoqc::results::collection->new(); -foreach my $check (@{npg_qc::autoqc::autoqc->checks_list}) { +foreach my $check (@{$results->checks_list}) { my ($result_name, $dbix_result_name ) = $generic_role->class_names($check); my @roles = ($flator, $generic_role); From c843875c7b8b74ce6edfc407742f618996600e64 Mon Sep 17 00:00:00 2001 From: "Ruben E. Bautista" Date: Wed, 2 Nov 2016 14:27:34 +0000 Subject: [PATCH 04/24] Simplify rna_seqc result object by grouping similar attributes using Readonly::Array - Move all POD at the end of module --- lib/npg_qc/autoqc/results/rna_seqc.pm | 122 ++++++++++++-------------- 1 file changed, 54 insertions(+), 68 deletions(-) diff --git a/lib/npg_qc/autoqc/results/rna_seqc.pm b/lib/npg_qc/autoqc/results/rna_seqc.pm index efa1e7691..5f1e8fec0 100644 --- a/lib/npg_qc/autoqc/results/rna_seqc.pm +++ b/lib/npg_qc/autoqc/results/rna_seqc.pm @@ -1,17 +1,46 @@ -######### -# Author: Ruben Bautista -# Created: 2015-08-13 -# - package npg_qc::autoqc::results::rna_seqc; use Moose; use namespace::autoclean; +use Readonly; -extends qw(npg_qc::autoqc::results::result); +extends 'npg_qc::autoqc::results::result'; our $VERSION = '0'; -## no critic (Documentation::RequirePodAtEnd) + +Readonly::Array my @ATTRIBUTES => qw/ rrna + rrna_rate + exonic_rate + expression_profiling_efficiency + genes_detected + end_1_sense + end_1_antisense + end_2_sense + end_2_antisense + end_1_pct_sense + end_2_pct_sense + mean_per_base_cov + mean_cv + end_5_norm + end_3_norm + /; + +has [ @ATTRIBUTES ] => ( + is => 'rw', + isa => 'Maybe[Num]', + required => 0, +); + +has 'other_metrics' => (isa => 'HashRef[Str]', + is => 'rw', + default => sub { {} }, + required => 0,); + +__PACKAGE__->meta->make_immutable; + +1; + +__END__ =head1 NAME @@ -21,157 +50,112 @@ npg_qc::autoqc::results::rna_seqc =head1 DESCRIPTION +A class for wrapping some of the metrics generated by RNA-SeQC. + =head1 SUBROUTINES/METHODS =head2 rrna -rRNA reads are non-duplicate and duplicate reads aligning to rRNA regions as defined in the transcript model definition. +rRNA reads are non-duplicate and duplicate reads aligning to rRNA +regions as defined in the transcript model definition. =cut -has 'rrna' => (isa => 'Maybe[Num]', - is => 'rw', - required => 0,); =head2 rrna_rate Rate of rRNA per total reads. =cut -has 'rrna_rate' => (isa => 'Maybe[Num]', - is => 'rw', - required => 0,); =head2 exonic_rate Fraction mapping within exons. =cut -has 'exonic_rate' => (isa => 'Maybe[Num]', - is => 'rw', - required => 0,); =head2 expression_profiling_efficiency Ratio of exon reads to total reads. =cut -has 'expression_profiling_efficiency' => (isa => 'Maybe[Num]', - is => 'rw', - required => 0,); =head2 genes_detected Number of Genes with at least 5 reads. =cut -has 'genes_detected' => (isa => 'Maybe[Num]', - is => 'rw', - required => 0,); =head2 end_1_sense Number of End 1 reads that were sequenced in the sense direction. =cut -has 'end_1_sense' => (isa => 'Maybe[Num]', - is => 'rw', - required => 0,); =head2 end_1_antisense Number of End 1 reads that were sequenced in the antisense direction. =cut -has 'end_1_antisense' => (isa => 'Maybe[Num]', - is => 'rw', - required => 0,); =head2 end_2_sense Number of End 1 reads that were sequenced in the sense direction. =cut -has 'end_2_sense' => (isa => 'Maybe[Num]', - is => 'rw', - required => 0,); =head2 end_2_antisense Number of End 2 reads that were sequenced in the antisense direction. =cut -has 'end_2_antisense' => (isa => 'Maybe[Num]', - is => 'rw', - required => 0,); =head2 end_1_pct_sense -Percentage of intragenic End 1 reads that were sequenced in the sense direction. +Percentage of intragenic End 1 reads that were sequenced in the sense +direction. =cut -has 'end_1_pct_sense' => (isa => 'Maybe[Num]', - is => 'rw', - required => 0,); =head2 end_2_pct_sense -Percentage of intragenic End 2 reads that were sequenced in the sense direction. +Percentage of intragenic End 2 reads that were sequenced in the sense +direction. =cut -has 'end_2_pct_sense' => (isa => 'Maybe[Num]', - is => 'rw', - required => 0,); =head2 mean_per_base_cov -Mean Per Base Coverage of the middle 1000 expressed transcripts determined to have the highest expression levels. +Mean Per Base Coverage of the middle 1000 expressed transcripts +determined to have the highest expression levels. =cut -has 'mean_per_base_cov' => (isa => 'Maybe[Num]', - is => 'rw', - required => 0,); =head2 mean_cv -Mean Coverage of the middle 1000 expressed transcripts determined to have the highest expression levels. +Mean Coverage of the middle 1000 expressed transcripts determined to +have the highest expression levels. =cut -has 'mean_cv' => (isa => 'Maybe[Num]', - is => 'rw', - required => 0,); =head2 end_5_norm -Norm denotes that the end coverage is divided by the mean coverage for that transcript. +Norm denotes that the end coverage is divided by the mean coverage +for that transcript. =cut -has 'end_5_norm' => (isa => 'Maybe[Num]', - is => 'rw', - required => 0,); =head2 end_3_norm -Norm denotes that the end coverage is divided by the mean coverage for that transcript. +Norm denotes that the end coverage is divided by the mean coverage +for that transcript. =cut -has 'end_3_norm' => (isa => 'Maybe[Num]', - is => 'rw', - required => 0,); =head2 end_5_norm All remaining RNA-SeQC metrics as a key-values pairs =cut -has 'other_metrics' => (isa => 'HashRef', - is => 'rw', - required => 0,); - -__PACKAGE__->meta->make_immutable; - -1; - -__END__ =head1 DIAGNOSTICS @@ -183,6 +167,8 @@ __END__ =item Moose +=item MooseX::StrictConstructor + =item namespace::autoclean =item npg_qc::autoqc::results::result From 1875a32bffd407c6fb486f1ce1ae3b22fd231ed4 Mon Sep 17 00:00:00 2001 From: "Ruben E. Bautista" Date: Tue, 8 Nov 2016 11:56:24 +0000 Subject: [PATCH 05/24] Store RNA-SeQC check JSON in npq_qc database - Update data/schema.txt: added new table to npg_qc database to store QC check: rna_seqc - New file: lib/npg_qc/Schema/Result/RnaSeqc.pm: added new object for storage using DBIx - Schema::Result::SeqComposition was updated with a new ER relationship - rna_seqc is no longer a non-storable qc check and does not have to be skiped by qc_store any more. --- data/schema.txt | 77 ++-- lib/npg_qc/Schema/Result/RnaSeqc.pm | 399 +++++++++++++++++++++ lib/npg_qc/Schema/Result/SeqComposition.pm | 19 +- lib/npg_qc/autoqc/qc_store.pm | 3 - 4 files changed, 473 insertions(+), 25 deletions(-) create mode 100644 lib/npg_qc/Schema/Result/RnaSeqc.pm diff --git a/data/schema.txt b/data/schema.txt index 7b29e697f..638dd813c 100644 --- a/data/schema.txt +++ b/data/schema.txt @@ -1,8 +1,8 @@ -- MySQL dump 10.13 Distrib 5.5.41, for debian-linux-gnu (x86_64) -- --- Host: npgtest-db Database: npgtest_mg8qc +-- Host: npgtest-db Database: npgtest_rb11 -- ------------------------------------------------------ --- Server version 5.5.31-log +-- Server version 5.7.13-log /*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */; /*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */; @@ -43,7 +43,7 @@ CREATE TABLE `adapter` ( `tag_index` bigint(20) NOT NULL DEFAULT '-1', PRIMARY KEY (`id_adapter`), UNIQUE KEY `unq_run_lane_adapter` (`id_run`,`position`,`tag_index`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; +) ENGINE=InnoDB DEFAULT CHARSET=utf8; /*!40101 SET character_set_client = @saved_cs_client */; -- @@ -64,7 +64,7 @@ CREATE TABLE `alignment_filter_metrics` ( `all_metrics` text, PRIMARY KEY (`id_alignment_filter_metrics`), UNIQUE KEY `unq_run_lane_afmetrics` (`id_run`,`position`,`tag_index`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; +) ENGINE=InnoDB DEFAULT CHARSET=utf8; /*!40101 SET character_set_client = @saved_cs_client */; -- @@ -203,7 +203,7 @@ CREATE TABLE `bam_flagstats` ( `read_pairs_examined` bigint(20) unsigned DEFAULT NULL, PRIMARY KEY (`id_bam_flagstats`), UNIQUE KEY `unq_run_lane_index_sp_flag` (`id_run`,`position`,`human_split`,`tag_index`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; +) ENGINE=InnoDB DEFAULT CHARSET=utf8; /*!40101 SET character_set_client = @saved_cs_client */; -- @@ -560,7 +560,7 @@ CREATE TABLE `gc_bias` ( `tag_index` bigint(20) NOT NULL DEFAULT '-1', PRIMARY KEY (`id_gc_bias`), UNIQUE KEY `unq_run_lane_gc_bias` (`id_run`,`position`,`tag_index`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; +) ENGINE=InnoDB DEFAULT CHARSET=utf8; /*!40101 SET character_set_client = @saved_cs_client */; -- @@ -588,7 +588,7 @@ CREATE TABLE `gc_fraction` ( `tag_index` bigint(20) NOT NULL DEFAULT '-1', PRIMARY KEY (`id_gc_fraction`), UNIQUE KEY `unq_run_lane_gc_fraction` (`id_run`,`position`,`tag_index`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; +) ENGINE=InnoDB DEFAULT CHARSET=utf8; /*!40101 SET character_set_client = @saved_cs_client */; -- @@ -709,7 +709,7 @@ CREATE TABLE `insert_size` ( `norm_fit_modes` text, PRIMARY KEY (`id_insert_size`), UNIQUE KEY `unq_run_lane_insert_size` (`id_run`,`position`,`tag_index`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; +) ENGINE=InnoDB DEFAULT CHARSET=utf8; /*!40101 SET character_set_client = @saved_cs_client */; -- @@ -1035,7 +1035,7 @@ CREATE TABLE `pulldown_metrics` ( `other_metrics` text, PRIMARY KEY (`id_pulldown_metrics`), UNIQUE KEY `unq_run_lane_pdmetrics` (`id_run`,`position`,`tag_index`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; +) ENGINE=InnoDB DEFAULT CHARSET=utf8; /*!40101 SET character_set_client = @saved_cs_client */; -- @@ -1063,7 +1063,7 @@ CREATE TABLE `qx_yield` ( `tag_index` bigint(20) NOT NULL DEFAULT '-1', PRIMARY KEY (`id_qx_yield`), UNIQUE KEY `unq_run_lane_qx_yield` (`id_run`,`position`,`tag_index`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; +) ENGINE=InnoDB DEFAULT CHARSET=utf8; /*!40101 SET character_set_client = @saved_cs_client */; -- @@ -1106,7 +1106,7 @@ CREATE TABLE `ref_match` ( `info` text, PRIMARY KEY (`id_ref_match`), UNIQUE KEY `unq_run_lane_ref_match` (`id_run`,`position`,`tag_index`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; +) ENGINE=InnoDB DEFAULT CHARSET=utf8; /*!40101 SET character_set_client = @saved_cs_client */; -- @@ -1130,6 +1130,43 @@ CREATE TABLE `ref_snp_info` ( ) ENGINE=InnoDB DEFAULT CHARSET=utf8; /*!40101 SET character_set_client = @saved_cs_client */; +-- +-- Table structure for table `rna_seqc` +-- + +DROP TABLE IF EXISTS `rna_seqc`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `rna_seqc` ( + `id_rna_seqc` bigint(20) unsigned NOT NULL AUTO_INCREMENT COMMENT 'Auto-generated primary key', + `id_seq_composition` bigint(20) unsigned NOT NULL COMMENT 'A foreign key referencing the id_seq_composition column of the seq_composition table', + `path` varchar(256) DEFAULT NULL, + `info` text, + `rrna` bigint(20) unsigned DEFAULT NULL, + `rrna_rate` bigint(20) unsigned DEFAULT NULL, + `exonic_rate` bigint(20) unsigned DEFAULT NULL, + `expression_profiling_efficiency` bigint(20) unsigned DEFAULT NULL, + `genes_detected` bigint(20) unsigned DEFAULT NULL, + `end_1_sense` bigint(20) unsigned DEFAULT NULL, + `end_1_antisense` bigint(20) unsigned DEFAULT NULL, + `end_2_sense` bigint(20) unsigned DEFAULT NULL, + `end_2_antisense` bigint(20) unsigned DEFAULT NULL, + `end_1_pct_sense` bigint(20) unsigned DEFAULT NULL, + `end_2_pct_sense` bigint(20) unsigned DEFAULT NULL, + `mean_per_base_cov` bigint(20) unsigned DEFAULT NULL, + `mean_cv` bigint(20) unsigned DEFAULT NULL, + `end_5_norm` bigint(20) unsigned DEFAULT NULL, + `end_3_norm` bigint(20) unsigned DEFAULT NULL, + `other_metrics` text, + `metrics` mediumblob COMMENT 'Compressed metrics.tsv metrics file content', + `date` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT 'Date the record was created', + `iscurrent` tinyint(1) unsigned NOT NULL DEFAULT '1' COMMENT 'Boolean flag to indicate that the record is current, defaults to 1', + PRIMARY KEY (`id_rna_seqc`), + KEY `rna_seqc_compos` (`id_seq_composition`), + CONSTRAINT `rna_seqc_compos` FOREIGN KEY (`id_seq_composition`) REFERENCES `seq_composition` (`id_seq_composition`) ON DELETE NO ACTION ON UPDATE NO ACTION +) ENGINE=InnoDB DEFAULT CHARSET=utf8; +/*!40101 SET character_set_client = @saved_cs_client */; + -- -- Table structure for table `run_and_pair` -- @@ -1281,7 +1318,7 @@ CREATE TABLE `samtools_stats` ( PRIMARY KEY (`id_samtools_stats`), UNIQUE KEY `unq_seqstats` (`id_seq_composition`,`filter`), CONSTRAINT `seq_sum_compos_ss` FOREIGN KEY (`id_seq_composition`) REFERENCES `seq_composition` (`id_seq_composition`) ON DELETE NO ACTION ON UPDATE NO ACTION -) ENGINE=InnoDB DEFAULT CHARSET=latin1; +) ENGINE=InnoDB DEFAULT CHARSET=latin1; /*!40101 SET character_set_client = @saved_cs_client */; -- @@ -1301,7 +1338,7 @@ CREATE TABLE `seq_component` ( PRIMARY KEY (`id_seq_component`), UNIQUE KEY `unq_seq_compon_d` (`digest`), KEY `unq_seq_compos_rp` (`id_run`,`position`,`tag_index`,`subset`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1; +) ENGINE=InnoDB DEFAULT CHARSET=latin1; /*!40101 SET character_set_client = @saved_cs_client */; -- @@ -1321,7 +1358,7 @@ CREATE TABLE `seq_component_composition` ( KEY `seq_comcom_cmpos` (`id_seq_composition`,`size`), CONSTRAINT `seq_comcom_cmpon` FOREIGN KEY (`id_seq_component`) REFERENCES `seq_component` (`id_seq_component`) ON DELETE NO ACTION ON UPDATE NO ACTION, CONSTRAINT `seq_comcom_cmpos` FOREIGN KEY (`id_seq_composition`, `size`) REFERENCES `seq_composition` (`id_seq_composition`, `size`) ON DELETE NO ACTION ON UPDATE NO ACTION -) ENGINE=InnoDB DEFAULT CHARSET=latin1; +) ENGINE=InnoDB DEFAULT CHARSET=latin1; /*!40101 SET character_set_client = @saved_cs_client */; -- @@ -1338,7 +1375,7 @@ CREATE TABLE `seq_composition` ( PRIMARY KEY (`id_seq_composition`), UNIQUE KEY `unq_seq_compos_d` (`digest`), UNIQUE KEY `unq_seq_compos_ps` (`id_seq_composition`,`size`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1; +) ENGINE=InnoDB DEFAULT CHARSET=latin1; /*!40101 SET character_set_client = @saved_cs_client */; -- @@ -1379,7 +1416,7 @@ CREATE TABLE `sequence_error` ( `sequence_type` varchar(25) NOT NULL DEFAULT 'default', PRIMARY KEY (`id_sequence_error`), UNIQUE KEY `unq_rlts_sequence_error` (`id_run`,`position`,`tag_index`,`sequence_type`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; +) ENGINE=InnoDB DEFAULT CHARSET=utf8; /*!40101 SET character_set_client = @saved_cs_client */; -- @@ -1402,7 +1439,7 @@ CREATE TABLE `sequence_summary` ( PRIMARY KEY (`id_sequence_summary`), KEY `seq_sum_compos` (`id_seq_composition`), CONSTRAINT `seq_sum_compos` FOREIGN KEY (`id_seq_composition`) REFERENCES `seq_composition` (`id_seq_composition`) ON DELETE NO ACTION ON UPDATE NO ACTION -) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; /*!40101 SET character_set_client = @saved_cs_client */; -- @@ -1565,7 +1602,7 @@ CREATE TABLE `tag_metrics` ( `tag_index` bigint(20) NOT NULL DEFAULT '-1', PRIMARY KEY (`id_tag_metrics`), UNIQUE KEY `unq_run_lane_tagmetrics` (`id_run`,`position`,`tag_index`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1; +) ENGINE=InnoDB DEFAULT CHARSET=latin1; /*!40101 SET character_set_client = @saved_cs_client */; -- @@ -1658,7 +1695,7 @@ CREATE TABLE `upstream_tags` ( `tag_index` bigint(20) NOT NULL DEFAULT '-1', PRIMARY KEY (`id_upstream_tags`), UNIQUE KEY `unq_run_lane_upstreamtags` (`id_run`,`position`,`tag_index`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1; +) ENGINE=InnoDB DEFAULT CHARSET=latin1; /*!40101 SET character_set_client = @saved_cs_client */; -- @@ -1697,4 +1734,4 @@ CREATE TABLE `verify_bam_id` ( /*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */; /*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */; --- Dump completed on 2016-02-25 13:46:17 +-- Dump completed on 2016-11-08 10:42:54 diff --git a/lib/npg_qc/Schema/Result/RnaSeqc.pm b/lib/npg_qc/Schema/Result/RnaSeqc.pm new file mode 100644 index 000000000..dccdb027b --- /dev/null +++ b/lib/npg_qc/Schema/Result/RnaSeqc.pm @@ -0,0 +1,399 @@ + +package npg_qc::Schema::Result::RnaSeqc; + +# Created by DBIx::Class::Schema::Loader +# DO NOT MODIFY THE FIRST PART OF THIS FILE + +##no critic(RequirePodAtEnd RequirePodLinksIncludeText ProhibitMagicNumbers ProhibitEmptyQuotes) + +=head1 NAME + +npg_qc::Schema::Result::RnaSeqc + +=cut + +use strict; +use warnings; + +use Moose; +use MooseX::NonMoose; +use MooseX::MarkAsMethods autoclean => 1; +extends 'DBIx::Class::Core'; + +=head1 ADDITIONAL CLASSES USED + +=over 4 + +=item * L + +=back + +=cut + +use namespace::autoclean; + +=head1 COMPONENTS LOADED + +=over 4 + +=item * L + +=item * L + +=back + +=cut + +__PACKAGE__->load_components('InflateColumn::DateTime', 'InflateColumn::Serializer'); + +=head1 TABLE: C + +=cut + +__PACKAGE__->table('rna_seqc'); + +=head1 ACCESSORS + +=head2 id_rna_seqc + + data_type: 'bigint' + extra: {unsigned => 1} + is_auto_increment: 1 + is_nullable: 0 + +Auto-generated primary key + +=head2 id_seq_composition + + data_type: 'bigint' + extra: {unsigned => 1} + is_foreign_key: 1 + is_nullable: 0 + +A foreign key referencing the id_seq_composition column of the seq_composition table + +=head2 path + + data_type: 'varchar' + is_nullable: 1 + size: 256 + +=head2 info + + data_type: 'text' + is_nullable: 1 + +=head2 rrna + + data_type: 'bigint' + extra: {unsigned => 1} + is_nullable: 1 + +=head2 rrna_rate + + data_type: 'bigint' + extra: {unsigned => 1} + is_nullable: 1 + +=head2 exonic_rate + + data_type: 'bigint' + extra: {unsigned => 1} + is_nullable: 1 + +=head2 expression_profiling_efficiency + + data_type: 'bigint' + extra: {unsigned => 1} + is_nullable: 1 + +=head2 genes_detected + + data_type: 'bigint' + extra: {unsigned => 1} + is_nullable: 1 + +=head2 end_1_sense + + data_type: 'bigint' + extra: {unsigned => 1} + is_nullable: 1 + +=head2 end_1_antisense + + data_type: 'bigint' + extra: {unsigned => 1} + is_nullable: 1 + +=head2 end_2_sense + + data_type: 'bigint' + extra: {unsigned => 1} + is_nullable: 1 + +=head2 end_2_antisense + + data_type: 'bigint' + extra: {unsigned => 1} + is_nullable: 1 + +=head2 end_1_pct_sense + + data_type: 'bigint' + extra: {unsigned => 1} + is_nullable: 1 + +=head2 end_2_pct_sense + + data_type: 'bigint' + extra: {unsigned => 1} + is_nullable: 1 + +=head2 mean_per_base_cov + + data_type: 'bigint' + extra: {unsigned => 1} + is_nullable: 1 + +=head2 mean_cv + + data_type: 'bigint' + extra: {unsigned => 1} + is_nullable: 1 + +=head2 end_5_norm + + data_type: 'bigint' + extra: {unsigned => 1} + is_nullable: 1 + +=head2 end_3_norm + + data_type: 'bigint' + extra: {unsigned => 1} + is_nullable: 1 + +=head2 other_metrics + + data_type: 'text' + is_nullable: 1 + +=head2 metrics + + data_type: 'mediumblob' + is_nullable: 1 + +Compressed metrics.tsv metrics file content + +=head2 date + + data_type: 'timestamp' + datetime_undef_if_invalid: 1 + default_value: current_timestamp + is_nullable: 0 + +Date the record was created + +=head2 iscurrent + + data_type: 'tinyint' + default_value: 1 + extra: {unsigned => 1} + is_nullable: 0 + +Boolean flag to indicate that the record is current, defaults to 1 + +=cut + +__PACKAGE__->add_columns( + 'id_rna_seqc', + { + data_type => 'bigint', + extra => { unsigned => 1 }, + is_auto_increment => 1, + is_nullable => 0, + }, + 'id_seq_composition', + { + data_type => 'bigint', + extra => { unsigned => 1 }, + is_foreign_key => 1, + is_nullable => 0, + }, + 'path', + { data_type => 'varchar', is_nullable => 1, size => 256 }, + 'info', + { data_type => 'text', is_nullable => 1 }, + 'rrna', + { data_type => 'bigint', extra => { unsigned => 1 }, is_nullable => 1 }, + 'rrna_rate', + { data_type => 'bigint', extra => { unsigned => 1 }, is_nullable => 1 }, + 'exonic_rate', + { data_type => 'bigint', extra => { unsigned => 1 }, is_nullable => 1 }, + 'expression_profiling_efficiency', + { data_type => 'bigint', extra => { unsigned => 1 }, is_nullable => 1 }, + 'genes_detected', + { data_type => 'bigint', extra => { unsigned => 1 }, is_nullable => 1 }, + 'end_1_sense', + { data_type => 'bigint', extra => { unsigned => 1 }, is_nullable => 1 }, + 'end_1_antisense', + { data_type => 'bigint', extra => { unsigned => 1 }, is_nullable => 1 }, + 'end_2_sense', + { data_type => 'bigint', extra => { unsigned => 1 }, is_nullable => 1 }, + 'end_2_antisense', + { data_type => 'bigint', extra => { unsigned => 1 }, is_nullable => 1 }, + 'end_1_pct_sense', + { data_type => 'bigint', extra => { unsigned => 1 }, is_nullable => 1 }, + 'end_2_pct_sense', + { data_type => 'bigint', extra => { unsigned => 1 }, is_nullable => 1 }, + 'mean_per_base_cov', + { data_type => 'bigint', extra => { unsigned => 1 }, is_nullable => 1 }, + 'mean_cv', + { data_type => 'bigint', extra => { unsigned => 1 }, is_nullable => 1 }, + 'end_5_norm', + { data_type => 'bigint', extra => { unsigned => 1 }, is_nullable => 1 }, + 'end_3_norm', + { data_type => 'bigint', extra => { unsigned => 1 }, is_nullable => 1 }, + 'other_metrics', + { data_type => 'text', is_nullable => 1 }, + 'metrics', + { data_type => 'mediumblob', is_nullable => 1 }, + 'date', + { + data_type => 'timestamp', + datetime_undef_if_invalid => 1, + default_value => \'current_timestamp', + is_nullable => 0, + }, + 'iscurrent', + { + data_type => 'tinyint', + default_value => 1, + extra => { unsigned => 1 }, + is_nullable => 0, + }, +); + +=head1 PRIMARY KEY + +=over 4 + +=item * L + +=back + +=cut + +__PACKAGE__->set_primary_key('id_rna_seqc'); + +=head1 RELATIONS + +=head2 seq_composition + +Type: belongs_to + +Related object: L + +=cut + +__PACKAGE__->belongs_to( + 'seq_composition', + 'npg_qc::Schema::Result::SeqComposition', + { id_seq_composition => 'id_seq_composition' }, + { is_deferrable => 1, on_delete => 'NO ACTION', on_update => 'NO ACTION' }, +); + +=head1 L ROLES APPLIED + +=over 4 + +=item * L + +=item * L + +=back + +=cut + + +with 'npg_qc::Schema::Flators', 'npg_qc::autoqc::role::result'; + + +# Created by DBIx::Class::Schema::Loader v0.07045 @ 2016-10-25 14:50:10 +# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:O7WLo51sSmOzKhK3AQW5LQ + +__PACKAGE__->set_flators4non_scalar(qw( other_metrics info )); + + +our $VERSION = '0'; + +# You can replace this text with custom code or comments, and it will be preserved on regeneration +__PACKAGE__->meta->make_immutable; +1; +__END__ + +=head1 SYNOPSIS + +=head1 DESCRIPTION + +Result class definition in DBIx binding for npg-qc database. + +=head1 DIAGNOSTICS + +=head1 CONFIGURATION AND ENVIRONMENT + +=head1 SUBROUTINES/METHODS + +=head1 DEPENDENCIES + +=over + +=item strict + +=item warnings + +=item Moose + +=item namespace::autoclean + +=item MooseX::NonMoose + +=item MooseX::MarkAsMethods + +=item DBIx::Class::Core + +=item DBIx::Class::InflateColumn::DateTime + +=item DBIx::Class::InflateColumn::Serializer + +=back + +=head1 INCOMPATIBILITIES + +=head1 BUGS AND LIMITATIONS + +=head1 AUTHOR + +Ruben Bautista Erb11@sanger.ac.ukE + +=head1 LICENSE AND COPYRIGHT + +Copyright (C) 2016 GRL + +This file is part of NPG. + +NPG is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +=cut + diff --git a/lib/npg_qc/Schema/Result/SeqComposition.pm b/lib/npg_qc/Schema/Result/SeqComposition.pm index 20c5d1617..bbe71c2ea 100644 --- a/lib/npg_qc/Schema/Result/SeqComposition.pm +++ b/lib/npg_qc/Schema/Result/SeqComposition.pm @@ -135,6 +135,21 @@ __PACKAGE__->add_unique_constraint('unq_seq_compos_ps', ['id_seq_composition', ' =head1 RELATIONS +=head2 rna_seqcs + +Type: has_many + +Related object: L + +=cut + +__PACKAGE__->has_many( + 'rna_seqcs', + 'npg_qc::Schema::Result::RnaSeqc', + { 'foreign.id_seq_composition' => 'self.id_seq_composition' }, + { cascade_copy => 0, cascade_delete => 0 }, +); + =head2 samtools_stats Type: has_many @@ -184,8 +199,8 @@ __PACKAGE__->has_many( ); -# Created by DBIx::Class::Schema::Loader v0.07043 @ 2015-09-09 17:35:44 -# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:zzx4SOq+Lh95G58mYLuWWw +# Created by DBIx::Class::Schema::Loader v0.07045 @ 2016-10-25 14:50:10 +# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:8NLCyZyGS5x1emB/RF7omg # You can replace this text with custom code or comments, and it will be preserved on regeneration diff --git a/lib/npg_qc/autoqc/qc_store.pm b/lib/npg_qc/autoqc/qc_store.pm index 6a0d4c201..41527856d 100644 --- a/lib/npg_qc/autoqc/qc_store.pm +++ b/lib/npg_qc/autoqc/qc_store.pm @@ -14,8 +14,6 @@ use npg_qc::autoqc::results::collection; our $VERSION = '0'; -Readonly::Scalar my $NON_STORABLE_CHECK => qr/rna_seqc/sm; - ## no critic (Documentation::RequirePodAtEnd Subroutines::ProhibitManyArgs) =head1 NAME @@ -191,7 +189,6 @@ sub run_from_db { return $c; } foreach my $check_name (@{$c->checks_list()}) { - next if ($check_name =~ $NON_STORABLE_CHECK); my $dbix_query = { 'id_run' => $query->id_run}; if (@{$query->positions}) { $dbix_query->{'position'} = $query->positions; From a8fc23bf4ac7622b16901caec92b8e01f335760f Mon Sep 17 00:00:00 2001 From: "Ruben E. Bautista" Date: Tue, 8 Nov 2016 14:44:24 +0000 Subject: [PATCH 06/24] Add RNA-SeQC check template to npg_qc_viewer - Create new template file for RNA-SeQC check: npg_qc_viewer/root/src/ui_checks/rna_seqc.tt2 - about.tt2: Rearrange list of available checks alphabetically and - add rna_seqc to the list of available checks - about_qc_checks.tt2: rearrange information about qc checks alphabetically and - add "about" information for rna_seqc metrics --- npg_qc_viewer/root/src/about.tt2 | 17 +++--- npg_qc_viewer/root/src/about_qc_checks.tt2 | 59 +++++++++++++------ npg_qc_viewer/root/src/ui_checks/rna_seqc.tt2 | 29 +++++++++ 3 files changed, 78 insertions(+), 27 deletions(-) create mode 100644 npg_qc_viewer/root/src/ui_checks/rna_seqc.tt2 diff --git a/npg_qc_viewer/root/src/about.tt2 b/npg_qc_viewer/root/src/about.tt2 index 6c864fa19..a80cb4587 100644 --- a/npg_qc_viewer/root/src/about.tt2 +++ b/npg_qc_viewer/root/src/about.tt2 @@ -13,18 +13,19 @@

Currently implemented checksLink to information about qc checks

    -
  • qX yield (threshold quality 20)
  • -
  • insert size
  • adapter
  • -
  • ref_match
  • -
  • split statistics
  • -
  • sequence mismatch
  • -
  • gc fraction
  • -
  • qc bias
  • -
  • tag metrics (formely tag decode stats)
  • bam flag stats
  • fastqcheck files rendering as heatmaps
  • +
  • gc fraction
  • genotype - sample ID check (Sequenom genotypes)
  • +
  • insert size
  • +
  • qc bias
  • +
  • qX yield (threshold quality 20)
  • +
  • ref_match
  • +
  • rna seqc (subset)
  • +
  • sequence mismatch
  • +
  • split statistics
  • +
  • tag metrics (formely tag decode stats)
  • upstream tags - check for contamination of tag#0 BAM file
  • verify BAM ID - check for sample contamination
diff --git a/npg_qc_viewer/root/src/about_qc_checks.tt2 b/npg_qc_viewer/root/src/about_qc_checks.tt2 index cd9441d75..305b85d7a 100644 --- a/npg_qc_viewer/root/src/about_qc_checks.tt2 +++ b/npg_qc_viewer/root/src/about_qc_checks.tt2 @@ -10,25 +10,6 @@
- -

Tag metrics

-

Description

-

This check displays statistics about decoding of the index read. The semantics of the summary table data differs for pools and individual libraries.

-

For a pool the overall decoding percent is displayed, followed by the coefficient of variance that characterises the uniformity of tag distribution in a pool. The background of the table cell is blue if the overall decoding rate is over 80%, for lesser values the background is red.

-

For an individual library (tag), the decoding percent for this library within a pool is displayed. If the number of reads for this tag is at least one tenth the average number of reads per tag for this pool, the background is grey; otherwise, the background is red.

-
-

Coefficient of variance calculation:

-

mean = Σ(xi) ⁄ N
- rms = √( Σ(xi-mean)² ⁄ (N-1))
- coef_of_var = rms/mean * 100

-
- - -

Upstream Tags Check

-

Description

-Check for contamination of tag#0 BAM file by reads from upstream runs -

-

Genotype check

@@ -138,6 +119,46 @@ Any close genotype matches found under a different sample name, allowing homozyg
+ +

RNA-SeQC metrics

+

Description

+

Subset of metrics obtained from Borad Institute's RNA-SeQC package. The following summary statistics are calculated by counting the number of reads that have the given characteristics.

+
+

Mapped Reads

+

rRNA reads are non-duplicate and duplicate reads aligning to rRNA regions as defined in the transcript model definition. rRNA Rate is per total reads.

+
+

Transcript-associated Reads

+

Rates are per mapped read. Exonic Rate is the fraction mapping within exons. Expression Profile Efficiency is the ratio of exon reads to total reads. Transcripts/Genes Detected is the number of transcripts/Genes with at least 5 reads.

+
+

Strand Specificity

+

End 1/2 Sense are the number of End 1 or 2 reads that were sequenced in the sense direction. Similarly, End 1/2 Antisense are the number of End 1 or 2 reads that were sequenced in the antisense direction.End 1/2 Sense % are percentages of intragenic End 1/2 reads that were sequenced in the sense direction.

+
+

Coverage Metrics Highest 1000 Expressed Transcripts

+

The metrics in this table are calculated across the transcripts that were determined to have the highest expression levels. 5' and 3' values are per-base coverage averaged across all top transcripts. 5' and 3' ends are 200 base pairs. Gap % is the total cumulative gap length divided by the total cumulative transcript lengths.

+
+
+ + +

Tag metrics

+

Description

+

This check displays statistics about decoding of the index read. The semantics of the summary table data differs for pools and individual libraries.

+

For a pool the overall decoding percent is displayed, followed by the coefficient of variance that characterises the uniformity of tag distribution in a pool. The background of the table cell is blue if the overall decoding rate is over 80%, for lesser values the background is red.

+

For an individual library (tag), the decoding percent for this library within a pool is displayed. If the number of reads for this tag is at least one tenth the average number of reads per tag for this pool, the background is grey; otherwise, the background is red.

+
+

Coefficient of variance calculation:

+

mean = Σ(xi) ⁄ N
+ rms = √( Σ(xi-mean)² ⁄ (N-1))
+ coef_of_var = rms/mean * 100

+
+ + +

Upstream Tags Check

+

Description

+Check for contamination of tag#0 BAM file by reads from upstream runs +

+ +
+

VerifyBamID contamination check



diff --git a/npg_qc_viewer/root/src/ui_checks/rna_seqc.tt2 b/npg_qc_viewer/root/src/ui_checks/rna_seqc.tt2 new file mode 100644 index 000000000..065db4945 --- /dev/null +++ b/npg_qc_viewer/root/src/ui_checks/rna_seqc.tt2 @@ -0,0 +1,29 @@ +[% USE Number.Format(THOUSANDS_SEP=',') %] + +
+ + + + + + + + + + + + + + + + + + + [%- IF check.criterion.defined -%] + + [%- END -%] + +
3' Norm [% check.end_3_norm %]
5' Norm [% check.end_5_norm %]
End 1 % Sense [% check.end_1_pct_sense %]
End 1 Antisense [% check.end_1_antisense %]
End 1 Sense [% check.end_1_sense %]
End 2 % Sense [% check.end_2_pct_sense %]
End 2 Antisense [% check.end_2_antisense %]
End 2 Sense [% check.end_2_sense %]
Exonic Rate [% check.exonic_rate %]
Expression Profiling Efficiency [% check.expression_profiling_efficiency %]
Genes Detected [% check.genes_detected %]
Mean CV [% check.mean_cv %]
Mean Per Base Cov. [% check.mean_per_base_cov %]
rRNA [% check.rrna %]
rRNA rate [% check.rrna_rate %]
Pass criterion [% check.criterion FILTER html %]
+ +
+ From b0287ba81415432d46c48fb8a02ec5a867fe857c Mon Sep 17 00:00:00 2001 From: "Ruben E. Bautista" Date: Tue, 8 Nov 2016 14:56:02 +0000 Subject: [PATCH 07/24] Update npg_qc_viewer/Changes --- npg_qc_viewer/Changes | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/npg_qc_viewer/Changes b/npg_qc_viewer/Changes index 04eb21463..1ecb48c5d 100644 --- a/npg_qc_viewer/Changes +++ b/npg_qc_viewer/Changes @@ -1,6 +1,7 @@ Changes for NPG SeqQC - add composite alternate matches to genotype check display + - add metrics for RNA-SeQC qc check: new template and an "about" entry with information explaining metrics release 20.6 - added norm_fit information @@ -232,7 +233,7 @@ Links within the SeqQC pages are shown as blue arrows - the focus icon (4 outward arrows) to the left of the pool/library name takes you to the page of all lanes/plexes of this library - the focus icon to the left of the run number takes you to the main run page -New ± icon on the headers for lane results and an individual result: +New � icon on the headers for lane results and an individual result: - collapse/expand relevant part of the page. release 8.4 From f030757cfec087a15ca0d4966a70336fb1357b8a Mon Sep 17 00:00:00 2001 From: "Ruben E. Bautista" Date: Tue, 15 Nov 2016 11:11:04 +0000 Subject: [PATCH 08/24] Change type of RNA-SeQC table columns and other bug fixes - Bug fix: mega bug in which the value of metrics were being rounded up because the type of the columns was integer instead of floating point. - Bug fix: Value of "End 2 % Sense" is not being stored because of a trailing \n (being the last column). Use chomp on keys to fix. --- data/schema.txt | 66 ++++++++++----------- lib/npg_qc/Schema/Result/RnaSeqc.pm | 86 +++++++++++++++++----------- lib/npg_qc/autoqc/checks/rna_seqc.pm | 1 + 3 files changed, 86 insertions(+), 67 deletions(-) diff --git a/data/schema.txt b/data/schema.txt index 638dd813c..b6230ad79 100644 --- a/data/schema.txt +++ b/data/schema.txt @@ -43,7 +43,7 @@ CREATE TABLE `adapter` ( `tag_index` bigint(20) NOT NULL DEFAULT '-1', PRIMARY KEY (`id_adapter`), UNIQUE KEY `unq_run_lane_adapter` (`id_run`,`position`,`tag_index`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; +) ENGINE=InnoDB DEFAULT CHARSET=utf8; /*!40101 SET character_set_client = @saved_cs_client */; -- @@ -64,7 +64,7 @@ CREATE TABLE `alignment_filter_metrics` ( `all_metrics` text, PRIMARY KEY (`id_alignment_filter_metrics`), UNIQUE KEY `unq_run_lane_afmetrics` (`id_run`,`position`,`tag_index`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; +) ENGINE=InnoDB DEFAULT CHARSET=utf8; /*!40101 SET character_set_client = @saved_cs_client */; -- @@ -203,7 +203,7 @@ CREATE TABLE `bam_flagstats` ( `read_pairs_examined` bigint(20) unsigned DEFAULT NULL, PRIMARY KEY (`id_bam_flagstats`), UNIQUE KEY `unq_run_lane_index_sp_flag` (`id_run`,`position`,`human_split`,`tag_index`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; +) ENGINE=InnoDB DEFAULT CHARSET=utf8; /*!40101 SET character_set_client = @saved_cs_client */; -- @@ -560,7 +560,7 @@ CREATE TABLE `gc_bias` ( `tag_index` bigint(20) NOT NULL DEFAULT '-1', PRIMARY KEY (`id_gc_bias`), UNIQUE KEY `unq_run_lane_gc_bias` (`id_run`,`position`,`tag_index`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; +) ENGINE=InnoDB DEFAULT CHARSET=utf8; /*!40101 SET character_set_client = @saved_cs_client */; -- @@ -588,7 +588,7 @@ CREATE TABLE `gc_fraction` ( `tag_index` bigint(20) NOT NULL DEFAULT '-1', PRIMARY KEY (`id_gc_fraction`), UNIQUE KEY `unq_run_lane_gc_fraction` (`id_run`,`position`,`tag_index`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; +) ENGINE=InnoDB DEFAULT CHARSET=utf8; /*!40101 SET character_set_client = @saved_cs_client */; -- @@ -709,7 +709,7 @@ CREATE TABLE `insert_size` ( `norm_fit_modes` text, PRIMARY KEY (`id_insert_size`), UNIQUE KEY `unq_run_lane_insert_size` (`id_run`,`position`,`tag_index`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; +) ENGINE=InnoDB DEFAULT CHARSET=utf8; /*!40101 SET character_set_client = @saved_cs_client */; -- @@ -1035,7 +1035,7 @@ CREATE TABLE `pulldown_metrics` ( `other_metrics` text, PRIMARY KEY (`id_pulldown_metrics`), UNIQUE KEY `unq_run_lane_pdmetrics` (`id_run`,`position`,`tag_index`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; +) ENGINE=InnoDB DEFAULT CHARSET=utf8; /*!40101 SET character_set_client = @saved_cs_client */; -- @@ -1063,7 +1063,7 @@ CREATE TABLE `qx_yield` ( `tag_index` bigint(20) NOT NULL DEFAULT '-1', PRIMARY KEY (`id_qx_yield`), UNIQUE KEY `unq_run_lane_qx_yield` (`id_run`,`position`,`tag_index`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; +) ENGINE=InnoDB DEFAULT CHARSET=utf8; /*!40101 SET character_set_client = @saved_cs_client */; -- @@ -1106,7 +1106,7 @@ CREATE TABLE `ref_match` ( `info` text, PRIMARY KEY (`id_ref_match`), UNIQUE KEY `unq_run_lane_ref_match` (`id_run`,`position`,`tag_index`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; +) ENGINE=InnoDB DEFAULT CHARSET=utf8; /*!40101 SET character_set_client = @saved_cs_client */; -- @@ -1142,21 +1142,21 @@ CREATE TABLE `rna_seqc` ( `id_seq_composition` bigint(20) unsigned NOT NULL COMMENT 'A foreign key referencing the id_seq_composition column of the seq_composition table', `path` varchar(256) DEFAULT NULL, `info` text, - `rrna` bigint(20) unsigned DEFAULT NULL, - `rrna_rate` bigint(20) unsigned DEFAULT NULL, - `exonic_rate` bigint(20) unsigned DEFAULT NULL, - `expression_profiling_efficiency` bigint(20) unsigned DEFAULT NULL, - `genes_detected` bigint(20) unsigned DEFAULT NULL, - `end_1_sense` bigint(20) unsigned DEFAULT NULL, - `end_1_antisense` bigint(20) unsigned DEFAULT NULL, - `end_2_sense` bigint(20) unsigned DEFAULT NULL, - `end_2_antisense` bigint(20) unsigned DEFAULT NULL, - `end_1_pct_sense` bigint(20) unsigned DEFAULT NULL, - `end_2_pct_sense` bigint(20) unsigned DEFAULT NULL, - `mean_per_base_cov` bigint(20) unsigned DEFAULT NULL, - `mean_cv` bigint(20) unsigned DEFAULT NULL, - `end_5_norm` bigint(20) unsigned DEFAULT NULL, - `end_3_norm` bigint(20) unsigned DEFAULT NULL, + `rrna` float unsigned DEFAULT NULL, + `rrna_rate` float unsigned DEFAULT NULL, + `exonic_rate` float unsigned DEFAULT NULL, + `expression_profiling_efficiency` float unsigned DEFAULT NULL, + `genes_detected` float unsigned DEFAULT NULL, + `end_1_sense` float unsigned DEFAULT NULL, + `end_1_antisense` float unsigned DEFAULT NULL, + `end_2_sense` float unsigned DEFAULT NULL, + `end_2_antisense` float unsigned DEFAULT NULL, + `end_1_pct_sense` float unsigned DEFAULT NULL, + `end_2_pct_sense` float unsigned DEFAULT NULL, + `mean_per_base_cov` float unsigned DEFAULT NULL, + `mean_cv` float unsigned DEFAULT NULL, + `end_5_norm` float unsigned DEFAULT NULL, + `end_3_norm` float unsigned DEFAULT NULL, `other_metrics` text, `metrics` mediumblob COMMENT 'Compressed metrics.tsv metrics file content', `date` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT 'Date the record was created', @@ -1318,7 +1318,7 @@ CREATE TABLE `samtools_stats` ( PRIMARY KEY (`id_samtools_stats`), UNIQUE KEY `unq_seqstats` (`id_seq_composition`,`filter`), CONSTRAINT `seq_sum_compos_ss` FOREIGN KEY (`id_seq_composition`) REFERENCES `seq_composition` (`id_seq_composition`) ON DELETE NO ACTION ON UPDATE NO ACTION -) ENGINE=InnoDB DEFAULT CHARSET=latin1; +) ENGINE=InnoDB DEFAULT CHARSET=latin1; /*!40101 SET character_set_client = @saved_cs_client */; -- @@ -1338,7 +1338,7 @@ CREATE TABLE `seq_component` ( PRIMARY KEY (`id_seq_component`), UNIQUE KEY `unq_seq_compon_d` (`digest`), KEY `unq_seq_compos_rp` (`id_run`,`position`,`tag_index`,`subset`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1; +) ENGINE=InnoDB DEFAULT CHARSET=latin1; /*!40101 SET character_set_client = @saved_cs_client */; -- @@ -1358,7 +1358,7 @@ CREATE TABLE `seq_component_composition` ( KEY `seq_comcom_cmpos` (`id_seq_composition`,`size`), CONSTRAINT `seq_comcom_cmpon` FOREIGN KEY (`id_seq_component`) REFERENCES `seq_component` (`id_seq_component`) ON DELETE NO ACTION ON UPDATE NO ACTION, CONSTRAINT `seq_comcom_cmpos` FOREIGN KEY (`id_seq_composition`, `size`) REFERENCES `seq_composition` (`id_seq_composition`, `size`) ON DELETE NO ACTION ON UPDATE NO ACTION -) ENGINE=InnoDB DEFAULT CHARSET=latin1; +) ENGINE=InnoDB DEFAULT CHARSET=latin1; /*!40101 SET character_set_client = @saved_cs_client */; -- @@ -1375,7 +1375,7 @@ CREATE TABLE `seq_composition` ( PRIMARY KEY (`id_seq_composition`), UNIQUE KEY `unq_seq_compos_d` (`digest`), UNIQUE KEY `unq_seq_compos_ps` (`id_seq_composition`,`size`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1; +) ENGINE=InnoDB DEFAULT CHARSET=latin1; /*!40101 SET character_set_client = @saved_cs_client */; -- @@ -1416,7 +1416,7 @@ CREATE TABLE `sequence_error` ( `sequence_type` varchar(25) NOT NULL DEFAULT 'default', PRIMARY KEY (`id_sequence_error`), UNIQUE KEY `unq_rlts_sequence_error` (`id_run`,`position`,`tag_index`,`sequence_type`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; +) ENGINE=InnoDB DEFAULT CHARSET=utf8; /*!40101 SET character_set_client = @saved_cs_client */; -- @@ -1439,7 +1439,7 @@ CREATE TABLE `sequence_summary` ( PRIMARY KEY (`id_sequence_summary`), KEY `seq_sum_compos` (`id_seq_composition`), CONSTRAINT `seq_sum_compos` FOREIGN KEY (`id_seq_composition`) REFERENCES `seq_composition` (`id_seq_composition`) ON DELETE NO ACTION ON UPDATE NO ACTION -) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; /*!40101 SET character_set_client = @saved_cs_client */; -- @@ -1602,7 +1602,7 @@ CREATE TABLE `tag_metrics` ( `tag_index` bigint(20) NOT NULL DEFAULT '-1', PRIMARY KEY (`id_tag_metrics`), UNIQUE KEY `unq_run_lane_tagmetrics` (`id_run`,`position`,`tag_index`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1; +) ENGINE=InnoDB DEFAULT CHARSET=latin1; /*!40101 SET character_set_client = @saved_cs_client */; -- @@ -1695,7 +1695,7 @@ CREATE TABLE `upstream_tags` ( `tag_index` bigint(20) NOT NULL DEFAULT '-1', PRIMARY KEY (`id_upstream_tags`), UNIQUE KEY `unq_run_lane_upstreamtags` (`id_run`,`position`,`tag_index`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1; +) ENGINE=InnoDB DEFAULT CHARSET=latin1; /*!40101 SET character_set_client = @saved_cs_client */; -- @@ -1734,4 +1734,4 @@ CREATE TABLE `verify_bam_id` ( /*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */; /*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */; --- Dump completed on 2016-11-08 10:42:54 +-- Dump completed on 2016-11-11 14:08:21 diff --git a/lib/npg_qc/Schema/Result/RnaSeqc.pm b/lib/npg_qc/Schema/Result/RnaSeqc.pm index dccdb027b..0c7bdaf5b 100644 --- a/lib/npg_qc/Schema/Result/RnaSeqc.pm +++ b/lib/npg_qc/Schema/Result/RnaSeqc.pm @@ -85,91 +85,91 @@ A foreign key referencing the id_seq_composition column of the seq_composition t =head2 rrna - data_type: 'bigint' + data_type: 'float' extra: {unsigned => 1} is_nullable: 1 =head2 rrna_rate - data_type: 'bigint' + data_type: 'float' extra: {unsigned => 1} is_nullable: 1 =head2 exonic_rate - data_type: 'bigint' + data_type: 'float' extra: {unsigned => 1} is_nullable: 1 =head2 expression_profiling_efficiency - data_type: 'bigint' + data_type: 'float' extra: {unsigned => 1} is_nullable: 1 =head2 genes_detected - data_type: 'bigint' + data_type: 'float' extra: {unsigned => 1} is_nullable: 1 =head2 end_1_sense - data_type: 'bigint' + data_type: 'float' extra: {unsigned => 1} is_nullable: 1 =head2 end_1_antisense - data_type: 'bigint' + data_type: 'float' extra: {unsigned => 1} is_nullable: 1 =head2 end_2_sense - data_type: 'bigint' + data_type: 'float' extra: {unsigned => 1} is_nullable: 1 =head2 end_2_antisense - data_type: 'bigint' + data_type: 'float' extra: {unsigned => 1} is_nullable: 1 =head2 end_1_pct_sense - data_type: 'bigint' + data_type: 'float' extra: {unsigned => 1} is_nullable: 1 =head2 end_2_pct_sense - data_type: 'bigint' + data_type: 'float' extra: {unsigned => 1} is_nullable: 1 =head2 mean_per_base_cov - data_type: 'bigint' + data_type: 'float' extra: {unsigned => 1} is_nullable: 1 =head2 mean_cv - data_type: 'bigint' + data_type: 'float' extra: {unsigned => 1} is_nullable: 1 =head2 end_5_norm - data_type: 'bigint' + data_type: 'float' extra: {unsigned => 1} is_nullable: 1 =head2 end_3_norm - data_type: 'bigint' + data_type: 'float' extra: {unsigned => 1} is_nullable: 1 @@ -225,35 +225,35 @@ __PACKAGE__->add_columns( 'info', { data_type => 'text', is_nullable => 1 }, 'rrna', - { data_type => 'bigint', extra => { unsigned => 1 }, is_nullable => 1 }, + { data_type => 'float', extra => { unsigned => 1 }, is_nullable => 1 }, 'rrna_rate', - { data_type => 'bigint', extra => { unsigned => 1 }, is_nullable => 1 }, + { data_type => 'float', extra => { unsigned => 1 }, is_nullable => 1 }, 'exonic_rate', - { data_type => 'bigint', extra => { unsigned => 1 }, is_nullable => 1 }, + { data_type => 'float', extra => { unsigned => 1 }, is_nullable => 1 }, 'expression_profiling_efficiency', - { data_type => 'bigint', extra => { unsigned => 1 }, is_nullable => 1 }, + { data_type => 'float', extra => { unsigned => 1 }, is_nullable => 1 }, 'genes_detected', - { data_type => 'bigint', extra => { unsigned => 1 }, is_nullable => 1 }, + { data_type => 'float', extra => { unsigned => 1 }, is_nullable => 1 }, 'end_1_sense', - { data_type => 'bigint', extra => { unsigned => 1 }, is_nullable => 1 }, + { data_type => 'float', extra => { unsigned => 1 }, is_nullable => 1 }, 'end_1_antisense', - { data_type => 'bigint', extra => { unsigned => 1 }, is_nullable => 1 }, + { data_type => 'float', extra => { unsigned => 1 }, is_nullable => 1 }, 'end_2_sense', - { data_type => 'bigint', extra => { unsigned => 1 }, is_nullable => 1 }, + { data_type => 'float', extra => { unsigned => 1 }, is_nullable => 1 }, 'end_2_antisense', - { data_type => 'bigint', extra => { unsigned => 1 }, is_nullable => 1 }, + { data_type => 'float', extra => { unsigned => 1 }, is_nullable => 1 }, 'end_1_pct_sense', - { data_type => 'bigint', extra => { unsigned => 1 }, is_nullable => 1 }, + { data_type => 'float', extra => { unsigned => 1 }, is_nullable => 1 }, 'end_2_pct_sense', - { data_type => 'bigint', extra => { unsigned => 1 }, is_nullable => 1 }, + { data_type => 'float', extra => { unsigned => 1 }, is_nullable => 1 }, 'mean_per_base_cov', - { data_type => 'bigint', extra => { unsigned => 1 }, is_nullable => 1 }, + { data_type => 'float', extra => { unsigned => 1 }, is_nullable => 1 }, 'mean_cv', - { data_type => 'bigint', extra => { unsigned => 1 }, is_nullable => 1 }, + { data_type => 'float', extra => { unsigned => 1 }, is_nullable => 1 }, 'end_5_norm', - { data_type => 'bigint', extra => { unsigned => 1 }, is_nullable => 1 }, + { data_type => 'float', extra => { unsigned => 1 }, is_nullable => 1 }, 'end_3_norm', - { data_type => 'bigint', extra => { unsigned => 1 }, is_nullable => 1 }, + { data_type => 'float', extra => { unsigned => 1 }, is_nullable => 1 }, 'other_metrics', { data_type => 'text', is_nullable => 1 }, 'metrics', @@ -319,15 +319,33 @@ __PACKAGE__->belongs_to( with 'npg_qc::Schema::Flators', 'npg_qc::autoqc::role::result'; -# Created by DBIx::Class::Schema::Loader v0.07045 @ 2016-10-25 14:50:10 -# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:O7WLo51sSmOzKhK3AQW5LQ +# Created by DBIx::Class::Schema::Loader v0.07045 @ 2016-11-11 14:10:26 +# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:/acbhXi0BTkTRboZWtfs8g -__PACKAGE__->set_flators4non_scalar(qw( other_metrics info )); +# You can replace this text with custom code or comments, and it will be preserved on regeneration +__PACKAGE__->set_flators4non_scalar(qw( other_metrics info )); our $VERSION = '0'; -# You can replace this text with custom code or comments, and it will be preserved on regeneration +=head2 seq_component_compositions + +Type: has_many + +Related object: L + +To simplify queries, skip SeqComposition and link directly to the linking table. + +=cut + +__PACKAGE__->has_many( + 'seq_component_compositions', + 'npg_qc::Schema::Result::SeqComponentComposition', + { 'foreign.id_seq_composition' => 'self.id_seq_composition' }, + { cascade_copy => 0, cascade_delete => 0 }, +); + + __PACKAGE__->meta->make_immutable; 1; __END__ diff --git a/lib/npg_qc/autoqc/checks/rna_seqc.pm b/lib/npg_qc/autoqc/checks/rna_seqc.pm index 5a34bee14..b7ed194e4 100644 --- a/lib/npg_qc/autoqc/checks/rna_seqc.pm +++ b/lib/npg_qc/autoqc/checks/rna_seqc.pm @@ -279,6 +279,7 @@ sub _parse_metrics { my $results = {}; foreach(@keys){ chomp $values[$i]; + chomp $_; $results->{$_} = $values[$i]; $i++; } From 600aa4f86213c3f7b17fb5a77bcfc751a95162af Mon Sep 17 00:00:00 2001 From: "Ruben E. Bautista" Date: Tue, 22 Nov 2016 16:55:45 +0000 Subject: [PATCH 09/24] Display RNA-SeQC results in summary table and minor fixes - rna_seqc check template shows nicely formatted results and includes a link to the qc/rna_seqc directory where all of the reports are stored - reorganize keys in hash of labels alphabetically in the summary table's template(what's wrong with that?) - add rna_seqc label and its 3 chosen metrics to said list - add selected rna_seqc metrics to summary table in lanes template - fix misspelled name for the Broad in info page --- npg_qc_viewer/root/src/about_qc_checks.tt2 | 2 +- npg_qc_viewer/root/src/ui_checks/rna_seqc.tt2 | 106 ++++++++++++++---- .../root/src/ui_lanes/checks_header.tt2 | 27 ++--- npg_qc_viewer/root/src/ui_lanes/lane.tt2 | 19 ++++ 4 files changed, 116 insertions(+), 38 deletions(-) diff --git a/npg_qc_viewer/root/src/about_qc_checks.tt2 b/npg_qc_viewer/root/src/about_qc_checks.tt2 index 305b85d7a..6a2641267 100644 --- a/npg_qc_viewer/root/src/about_qc_checks.tt2 +++ b/npg_qc_viewer/root/src/about_qc_checks.tt2 @@ -122,7 +122,7 @@ Any close genotype matches found under a different sample name, allowing homozyg

RNA-SeQC metrics

Description

-

Subset of metrics obtained from Borad Institute's RNA-SeQC package. The following summary statistics are calculated by counting the number of reads that have the given characteristics.

+

Subset of metrics obtained from Broad Institute's RNA-SeQC package. The following summary statistics are calculated by counting the number of reads that have the given characteristics.


Mapped Reads

rRNA reads are non-duplicate and duplicate reads aligning to rRNA regions as defined in the transcript model definition. rRNA Rate is per total reads.

diff --git a/npg_qc_viewer/root/src/ui_checks/rna_seqc.tt2 b/npg_qc_viewer/root/src/ui_checks/rna_seqc.tt2 index 065db4945..cbf9c923d 100644 --- a/npg_qc_viewer/root/src/ui_checks/rna_seqc.tt2 +++ b/npg_qc_viewer/root/src/ui_checks/rna_seqc.tt2 @@ -1,29 +1,87 @@ [% USE Number.Format(THOUSANDS_SEP=',') %] +[% IF !check.genes_detected.defined; %]
- - - - - - - - - - - - - - - - - - - [%- IF check.criterion.defined -%] - - [%- END -%] - +
3' Norm [% check.end_3_norm %]
5' Norm [% check.end_5_norm %]
End 1 % Sense [% check.end_1_pct_sense %]
End 1 Antisense [% check.end_1_antisense %]
End 1 Sense [% check.end_1_sense %]
End 2 % Sense [% check.end_2_pct_sense %]
End 2 Antisense [% check.end_2_antisense %]
End 2 Sense [% check.end_2_sense %]
Exonic Rate [% check.exonic_rate %]
Expression Profiling Efficiency [% check.expression_profiling_efficiency %]
Genes Detected [% check.genes_detected %]
Mean CV [% check.mean_cv %]
Mean Per Base Cov. [% check.mean_per_base_cov %]
rRNA [% check.rrna %]
rRNA rate [% check.rrna_rate %]
Pass criterion [% check.criterion FILTER html %]
+
File is not RNA alignment
-
- +[% ELSE %] +
mapped reads
+
+ + + + + + + +
rRNArRNA rate
[% IF check.rrna.defined; check.rrna | format_number; END %][% IF check.rrna_rate.defined; check.rrna_rate | format_number(3,1); END %]
+
+
transcript-associated reads
+
+ + + + + + + + + + +
exonic rateexpression profiling efficiencytranscripts/genes detected
[% IF check.exonic_rate.defined; check.exonic_rate | format_number(3,1); END %][% IF check.expression_profiling_efficiency.defined; check.expression_profiling_efficiency | format_number(3,1); END %][% IF check.genes_detected.defined; check.genes_detected | format_number; END %]
+
+
strand specificity
+
+ + + + + + + + + + + + + + + + +
end 1 senseend 1 antisenseend 2 senseend 2 antisenseend 1 % senseend 2 % sense
[% IF check.end_1_sense.defined; check.end_1_sense | format_number; END %][% IF check.end_1_antisense.defined; check.end_1_antisense | format_number; END %][% IF check.end_2_sense.defined; check.end_2_sense | format_number; END %][% IF check.end_2_antisense.defined; check.end_2_antisense | format_number; END %][% IF check.end_1_pct_sense.defined; check.end_1_pct_sense | format_number(3,1); END %][% IF check.end_2_pct_sense.defined; check.end_2_pct_sense | format_number(3,1); END %]
+
+
coverage metrics
+
+ + + + + + + + + + + + + +
mean per base cov.mean CVno. covered 3' (norm)no. covered 5' (norm)
[% IF check.mean_per_base_cov.defined; check.mean_per_base_cov | format_number(2,1); END %][% IF check.mean_cv.defined; check.mean_cv | format_number(2,1); END %][% IF check.end_3_norm.defined; check.end_3_norm | format_number(3,1); END %][% IF check.end_5_norm.defined; check.end_5_norm | format_number(3,1); END %]
+
+
+[% IF c.model('NpgDB').resultset('Run').find(id_run).is_tag_set('staging'); + base = base_url _ '/cgi-bin/locate_runfolder'; + run_folder_glob = id_run; + npg_run_row = c.model('NpgDB').resultset('Run').find(id_run); + IF npg_run_row; + rfglob = npg_run_row.folder_path_glob; + rfname = npg_run_row.folder_name; + IF rfglob && rfname; + run_folder_glob = rfglob _ rfname; + END; + END; +-%]

Other RNA-SeQC metrics.

+[% END; %] +
+[% END; %] diff --git a/npg_qc_viewer/root/src/ui_lanes/checks_header.tt2 b/npg_qc_viewer/root/src/ui_lanes/checks_header.tt2 index aba5deb33..bca40eaea 100644 --- a/npg_qc_viewer/root/src/ui_lanes/checks_header.tt2 +++ b/npg_qc_viewer/root/src/ui_lanes/checks_header.tt2 @@ -1,24 +1,25 @@ [%- labels = { - qX_yield = 'yield,
Kb' - insert_size = 'quartiles,
bases' adapter = 'adapters,
%' - split_stats = 'aligned
reads,
%' + alignment_filter_metrics = 'target, %' + bam_flagstats = 'mapped %
duplicates %' contamination = 'top two' + gc_bias = 'plot created
' + gc_fraction = 'fraction,
%' + genotype = 'match
mean cvg.
depth
' + insert_size = 'quartiles,
bases' + pulldown_metrics = 'coverage at 20X, %
mean depth per Gb
on bait bases, %
on target bases, %' + qX_yield = 'yield,
Kb' + rna_seqc = 'exonic rate,
rRNA Rate,
mean CV' ref_match = 'top two' sequence_error = 'average
mismatch,
%' + spatial_filter = 'filter fail,
total' + split_stats = 'aligned
reads,
%' tag_decode_stats = 'decode rate %
CV %' - gc_fraction = 'fraction,
%' - gc_bias = 'plot created
' - bam_flagstats = 'mapped %
duplicates %' - genotype = 'match
mean cvg.
depth
' tag_metrics = 'decode rate, %
CV %' - pulldown_metrics = 'coverage at 20X, %
mean depth per Gb
on bait bases, %
on target bases, %' - alignment_filter_metrics = 'target, %' - spatial_filter = 'filter fail,
total' - upstream_tags = 'tag0 reads (%)
perf.match reads (%)' - varify_bam_id = 'pass' - tags_reporters = '' + tags_reporters = '' + upstream_tags = 'tag0 reads (%)
perf.match reads (%)' + varify_bam_id = 'pass' } %] Lane
No diff --git a/npg_qc_viewer/root/src/ui_lanes/lane.tt2 b/npg_qc_viewer/root/src/ui_lanes/lane.tt2 index 66a248921..6e40550b1 100644 --- a/npg_qc_viewer/root/src/ui_lanes/lane.tt2 +++ b/npg_qc_viewer/root/src/ui_lanes/lane.tt2 @@ -428,4 +428,23 @@ END [% result.freemix %]
[%- END -%] +[% BLOCK rna_seqc %] +[% IF result.exonic_rate.defined %] +[% result.exonic_rate | format_number(3,1) %] +[% ELSE %] + na +[% END %] +
+[% IF result.rrna_rate.defined %] + [% result.rrna_rate | format_number(3,1) %] +[% ELSE %] + na +[% END %] +
+[% IF result.mean_cv.defined %] +[% result.mean_cv | format_number(2,1) %] +[% ELSE %] + na +[% END %] +[% END %] From 05773eb4a9f4c92f100ba24c1253a9b41ed3aae2 Mon Sep 17 00:00:00 2001 From: "Ruben E. Bautista" Date: Mon, 28 Nov 2016 15:21:04 +0000 Subject: [PATCH 10/24] Fixed link to RNA-SeQC report --- npg_qc_viewer/root/src/ui_checks/rna_seqc.tt2 | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/npg_qc_viewer/root/src/ui_checks/rna_seqc.tt2 b/npg_qc_viewer/root/src/ui_checks/rna_seqc.tt2 index cbf9c923d..7355999bb 100644 --- a/npg_qc_viewer/root/src/ui_checks/rna_seqc.tt2 +++ b/npg_qc_viewer/root/src/ui_checks/rna_seqc.tt2 @@ -2,9 +2,7 @@ [% IF !check.genes_detected.defined; %]
- - -
File is not RNA alignment
+

Reason: not an RNA alignment

[% ELSE %]
mapped reads
@@ -81,7 +79,12 @@ run_folder_glob = rfglob _ rfname; END; END; --%]

Other RNA-SeQC metrics.

+ run_position_dir = check.id_run _ '_' _ check.position; + rna_seqc_report_dir = run_position_dir; + IF check.tag_index.defined; + rna_seqc_report_dir = run_position_dir _ '/' _ run_position_dir _ '%23' _ check.tag_index; + END; +-%]

Other RNA-SeQC metrics.

[% END; %]
[% END; %] From d9e4bafab60ec33d744c643d5a53e1becc8ecd6d Mon Sep 17 00:00:00 2001 From: "Ruben E. Bautista" Date: Wed, 30 Nov 2016 16:23:53 +0000 Subject: [PATCH 11/24] Add composition attribute to rna_seqc schema; don't hide its template anymore - New attribute allows schema object to play nice with qc_store new capabilities --- lib/npg_qc/Schema/Result/RnaSeqc.pm | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/lib/npg_qc/Schema/Result/RnaSeqc.pm b/lib/npg_qc/Schema/Result/RnaSeqc.pm index 0c7bdaf5b..69b9cf96a 100644 --- a/lib/npg_qc/Schema/Result/RnaSeqc.pm +++ b/lib/npg_qc/Schema/Result/RnaSeqc.pm @@ -324,10 +324,10 @@ with 'npg_qc::Schema::Flators', 'npg_qc::autoqc::role::result'; # You can replace this text with custom code or comments, and it will be preserved on regeneration -__PACKAGE__->set_flators4non_scalar(qw( other_metrics info )); - our $VERSION = '0'; +__PACKAGE__->set_flators4non_scalar(qw( other_metrics info )); + =head2 seq_component_compositions Type: has_many @@ -345,11 +345,6 @@ __PACKAGE__->has_many( { cascade_copy => 0, cascade_delete => 0 }, ); - -__PACKAGE__->meta->make_immutable; -1; -__END__ - =head1 SYNOPSIS =head1 DESCRIPTION @@ -362,6 +357,23 @@ Result class definition in DBIx binding for npg-qc database. =head1 SUBROUTINES/METHODS +=cut + +=head2 composition + +An lazy-build attribute representing a composition this result +corresponds to. + +=cut + +__PACKAGE__->create_composition_attribute(); + +__PACKAGE__->meta->make_immutable; + +1; + +__END__ + =head1 DEPENDENCIES =over From d45a50ac38be7d49e9c00be77b5455960d7f5377 Mon Sep 17 00:00:00 2001 From: "Ruben E. Bautista" Date: Mon, 5 Dec 2016 14:52:33 +0000 Subject: [PATCH 12/24] Template for rna_seqc exists now, render its metrics - Reverse previous change that prevented rendering metrics for rna_seqc check because they was no rna_seqc template --- lib/npg_qc/autoqc/results/collection.pm | 1 - npg_qc_viewer/root/src/ui_checks/checks_in_full.tt2 | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/npg_qc/autoqc/results/collection.pm b/lib/npg_qc/autoqc/results/collection.pm index 62d4a107f..cec2a3dc9 100644 --- a/lib/npg_qc/autoqc/results/collection.pm +++ b/lib/npg_qc/autoqc/results/collection.pm @@ -556,7 +556,6 @@ sub check_names { my @check_names = (); my $map = {}; foreach my $check (@{$self->checks_list}) { - if ($check eq 'rna_seqc') { next; } push @check_names, @{$classes->{$check}}; foreach my $name (@{$classes->{$check}}) { $map->{$name} = $check; diff --git a/npg_qc_viewer/root/src/ui_checks/checks_in_full.tt2 b/npg_qc_viewer/root/src/ui_checks/checks_in_full.tt2 index cd82a3d83..37111efae 100644 --- a/npg_qc_viewer/root/src/ui_checks/checks_in_full.tt2 +++ b/npg_qc_viewer/root/src/ui_checks/checks_in_full.tt2 @@ -3,7 +3,7 @@ lane_collection = rl_map.${lane_key}; NEXT IF (!lane_collection || lane_collection.size == 0); temp = lane_collection.sort; - to_remove = ['split stats phix', 'spatial filter', 'split stats', 'rna seqc']; + to_remove = ['split stats phix', 'spatial filter', 'split stats']; IF !run_view; to_remove.push('qX yield', 'gc fraction'); END; From 721db9914d240fd66e704904d9ac160f6647893e Mon Sep 17 00:00:00 2001 From: "Ruben E. Bautista" Date: Tue, 6 Dec 2016 11:04:45 +0000 Subject: [PATCH 13/24] Show link to report only when db_lookup=0; single-line summary metrics - Link to RNA-SeQC report can be display only if id_run is defined, and this is true only when check results are being loaded from disk. - Metrics in summary table are shown in a single line to reduce scrolling. --- npg_qc_viewer/root/src/ui_checks/rna_seqc.tt2 | 155 ++++++++---------- npg_qc_viewer/root/src/ui_lanes/lane.tt2 | 14 +- 2 files changed, 75 insertions(+), 94 deletions(-) diff --git a/npg_qc_viewer/root/src/ui_checks/rna_seqc.tt2 b/npg_qc_viewer/root/src/ui_checks/rna_seqc.tt2 index 7355999bb..f8d90a7bb 100644 --- a/npg_qc_viewer/root/src/ui_checks/rna_seqc.tt2 +++ b/npg_qc_viewer/root/src/ui_checks/rna_seqc.tt2 @@ -1,90 +1,73 @@ [% USE Number.Format(THOUSANDS_SEP=',') %] [% IF !check.genes_detected.defined; %] -
-

Reason: not an RNA alignment

-
+
+

Reason: not an RNA alignment

+
[% ELSE %] -
mapped reads
-
- - - - - - - -
rRNArRNA rate
[% IF check.rrna.defined; check.rrna | format_number; END %][% IF check.rrna_rate.defined; check.rrna_rate | format_number(3,1); END %]
-
-
transcript-associated reads
-
- - - - - - - - - - -
exonic rateexpression profiling efficiencytranscripts/genes detected
[% IF check.exonic_rate.defined; check.exonic_rate | format_number(3,1); END %][% IF check.expression_profiling_efficiency.defined; check.expression_profiling_efficiency | format_number(3,1); END %][% IF check.genes_detected.defined; check.genes_detected | format_number; END %]
-
-
strand specificity
-
- - - - - - - - - - - - - - - - -
end 1 senseend 1 antisenseend 2 senseend 2 antisenseend 1 % senseend 2 % sense
[% IF check.end_1_sense.defined; check.end_1_sense | format_number; END %][% IF check.end_1_antisense.defined; check.end_1_antisense | format_number; END %][% IF check.end_2_sense.defined; check.end_2_sense | format_number; END %][% IF check.end_2_antisense.defined; check.end_2_antisense | format_number; END %][% IF check.end_1_pct_sense.defined; check.end_1_pct_sense | format_number(3,1); END %][% IF check.end_2_pct_sense.defined; check.end_2_pct_sense | format_number(3,1); END %]
-
-
coverage metrics
-
- - - - - - - - - - - - - -
mean per base cov.mean CVno. covered 3' (norm)no. covered 5' (norm)
[% IF check.mean_per_base_cov.defined; check.mean_per_base_cov | format_number(2,1); END %][% IF check.mean_cv.defined; check.mean_cv | format_number(2,1); END %][% IF check.end_3_norm.defined; check.end_3_norm | format_number(3,1); END %][% IF check.end_5_norm.defined; check.end_5_norm | format_number(3,1); END %]
-
-
-[% IF c.model('NpgDB').resultset('Run').find(id_run).is_tag_set('staging'); - base = base_url _ '/cgi-bin/locate_runfolder'; - run_folder_glob = id_run; - npg_run_row = c.model('NpgDB').resultset('Run').find(id_run); - IF npg_run_row; - rfglob = npg_run_row.folder_path_glob; - rfname = npg_run_row.folder_name; - IF rfglob && rfname; - run_folder_glob = rfglob _ rfname; - END; - END; - run_position_dir = check.id_run _ '_' _ check.position; - rna_seqc_report_dir = run_position_dir; - IF check.tag_index.defined; - rna_seqc_report_dir = run_position_dir _ '/' _ run_position_dir _ '%23' _ check.tag_index; - END; --%]

Other RNA-SeQC metrics.

-[% END; %] -
-[% END; %] +
mapped reads
+
+ + + + + +
rRNArRNA rate
[% IF check.rrna.defined; check.rrna | format_number; END %][% IF check.rrna_rate.defined; check.rrna_rate | format_number(3,1); END %]
+
+
transcript-associated reads
+
+ + + + + + +
exonic rateexpression profiling efficiencytranscripts/genes detected
[% IF check.exonic_rate.defined; check.exonic_rate | format_number(3,1); END %][% IF check.expression_profiling_efficiency.defined; check.expression_profiling_efficiency | format_number(3,1); END %][% IF check.genes_detected.defined; check.genes_detected | format_number; END %]
+
+
strand specificity
+
+ + + + + + + + + +
end 1 senseend 1 antisenseend 2 senseend 2 antisenseend 1 % senseend 2 % sense
[% IF check.end_1_sense.defined; check.end_1_sense | format_number; END %][% IF check.end_1_antisense.defined; check.end_1_antisense | format_number; END %][% IF check.end_2_sense.defined; check.end_2_sense | format_number; END %][% IF check.end_2_antisense.defined; check.end_2_antisense | format_number; END %][% IF check.end_1_pct_sense.defined; check.end_1_pct_sense | format_number(3,1); END %][% IF check.end_2_pct_sense.defined; check.end_2_pct_sense | format_number(3,1); END %]
+
+
coverage metrics
+
+ + + + + + + +
mean per base cov.mean CVno. covered 3' (norm)no. covered 5' (norm)
[% IF check.mean_per_base_cov.defined; check.mean_per_base_cov | format_number(2,1); END %][% IF check.mean_cv.defined; check.mean_cv | format_number(2,1); END %][% IF check.end_3_norm.defined; check.end_3_norm | format_number(3,1); END %][% IF check.end_5_norm.defined; check.end_5_norm | format_number(3,1); END %]
+
+
+[% IF check.id_run.defined; + IF c.model('NpgDB').resultset('Run').find(id_run).is_tag_set('staging'); + base = base_url _ '/cgi-bin/locate_runfolder'; + run_folder_glob = id_run; + npg_run_row = c.model('NpgDB').resultset('Run').find(id_run); + IF npg_run_row; + rfglob = npg_run_row.folder_path_glob; + rfname = npg_run_row.folder_name; + IF rfglob && rfname; + run_folder_glob = rfglob _ rfname; + END; + END; + run_position_dir = check.id_run _ '_' _ check.position; + rna_seqc_report_dir = run_position_dir; + IF check.tag_index.defined; + rna_seqc_report_dir = run_position_dir _ '/' _ run_position_dir _ '%23' _ check.tag_index; + END %] +

Other RNA-SeQC metrics.

+[% END; + END %] +
+[% END %] diff --git a/npg_qc_viewer/root/src/ui_lanes/lane.tt2 b/npg_qc_viewer/root/src/ui_lanes/lane.tt2 index 6e40550b1..8fcb8ef38 100644 --- a/npg_qc_viewer/root/src/ui_lanes/lane.tt2 +++ b/npg_qc_viewer/root/src/ui_lanes/lane.tt2 @@ -430,21 +430,19 @@ END [% BLOCK rna_seqc %] [% IF result.exonic_rate.defined %] -[% result.exonic_rate | format_number(3,1) %] + [% result.exonic_rate | format_number(3,1) %] [% ELSE %] - na + na [% END %] -
[% IF result.rrna_rate.defined %] - [% result.rrna_rate | format_number(3,1) %] + [% result.rrna_rate | format_number(3,1) %] [% ELSE %] - na + na [% END %] -
[% IF result.mean_cv.defined %] -[% result.mean_cv | format_number(2,1) %] + [% result.mean_cv | format_number(2,1) %] [% ELSE %] - na + na [% END %] [% END %] From 4293e13e84c576eb6d465db148f527534d31e14b Mon Sep 17 00:00:00 2001 From: "Ruben E. Bautista" Date: Tue, 6 Dec 2016 11:44:09 +0000 Subject: [PATCH 14/24] Drop columns that aren't necessary from rna_seqc --- data/schema.txt | 5 +--- lib/npg_qc/Schema/Result/RnaSeqc.pm | 45 ++--------------------------- 2 files changed, 3 insertions(+), 47 deletions(-) diff --git a/data/schema.txt b/data/schema.txt index b6230ad79..7e46aaf8a 100644 --- a/data/schema.txt +++ b/data/schema.txt @@ -1158,9 +1158,6 @@ CREATE TABLE `rna_seqc` ( `end_5_norm` float unsigned DEFAULT NULL, `end_3_norm` float unsigned DEFAULT NULL, `other_metrics` text, - `metrics` mediumblob COMMENT 'Compressed metrics.tsv metrics file content', - `date` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT 'Date the record was created', - `iscurrent` tinyint(1) unsigned NOT NULL DEFAULT '1' COMMENT 'Boolean flag to indicate that the record is current, defaults to 1', PRIMARY KEY (`id_rna_seqc`), KEY `rna_seqc_compos` (`id_seq_composition`), CONSTRAINT `rna_seqc_compos` FOREIGN KEY (`id_seq_composition`) REFERENCES `seq_composition` (`id_seq_composition`) ON DELETE NO ACTION ON UPDATE NO ACTION @@ -1734,4 +1731,4 @@ CREATE TABLE `verify_bam_id` ( /*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */; /*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */; --- Dump completed on 2016-11-11 14:08:21 +-- Dump completed on 2016-12-06 11:53:49 diff --git a/lib/npg_qc/Schema/Result/RnaSeqc.pm b/lib/npg_qc/Schema/Result/RnaSeqc.pm index 69b9cf96a..12f6cc46a 100644 --- a/lib/npg_qc/Schema/Result/RnaSeqc.pm +++ b/lib/npg_qc/Schema/Result/RnaSeqc.pm @@ -178,31 +178,6 @@ A foreign key referencing the id_seq_composition column of the seq_composition t data_type: 'text' is_nullable: 1 -=head2 metrics - - data_type: 'mediumblob' - is_nullable: 1 - -Compressed metrics.tsv metrics file content - -=head2 date - - data_type: 'timestamp' - datetime_undef_if_invalid: 1 - default_value: current_timestamp - is_nullable: 0 - -Date the record was created - -=head2 iscurrent - - data_type: 'tinyint' - default_value: 1 - extra: {unsigned => 1} - is_nullable: 0 - -Boolean flag to indicate that the record is current, defaults to 1 - =cut __PACKAGE__->add_columns( @@ -256,22 +231,6 @@ __PACKAGE__->add_columns( { data_type => 'float', extra => { unsigned => 1 }, is_nullable => 1 }, 'other_metrics', { data_type => 'text', is_nullable => 1 }, - 'metrics', - { data_type => 'mediumblob', is_nullable => 1 }, - 'date', - { - data_type => 'timestamp', - datetime_undef_if_invalid => 1, - default_value => \'current_timestamp', - is_nullable => 0, - }, - 'iscurrent', - { - data_type => 'tinyint', - default_value => 1, - extra => { unsigned => 1 }, - is_nullable => 0, - }, ); =head1 PRIMARY KEY @@ -319,8 +278,8 @@ __PACKAGE__->belongs_to( with 'npg_qc::Schema::Flators', 'npg_qc::autoqc::role::result'; -# Created by DBIx::Class::Schema::Loader v0.07045 @ 2016-11-11 14:10:26 -# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:/acbhXi0BTkTRboZWtfs8g +# Created by DBIx::Class::Schema::Loader v0.07046 @ 2016-12-06 11:41:23 +# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:T+W05fIQCWmHH4bJUw2MYw # You can replace this text with custom code or comments, and it will be preserved on regeneration From 8af9a9db9759d96b82de193bc834b8395a79e9b2 Mon Sep 17 00:00:00 2001 From: "Ruben E. Bautista" Date: Wed, 7 Dec 2016 13:19:22 +0000 Subject: [PATCH 15/24] Updated Changes; Minor HTML fixes - Travis complains about unexpected closing tags --- Changes | 9 ++++++++- npg_qc_viewer/root/src/about_qc_checks.tt2 | 10 +++++----- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/Changes b/Changes index fad298d70..d85c634c1 100644 --- a/Changes +++ b/Changes @@ -1,6 +1,13 @@ LIST OF CHANGES FOR NPG-QC PACKAGE - + - npg_qc::autoqc::qc_store - load rna_seqc results into npg_qc db + (requires rna_seqc table exists in database). + - Updated data/schema.txt including definition for rna_seqc table. + - SeQC: + - Added template for rna_seqc check with selected metrics shown + in summary + - Include a link to original RNA-SeQC report only visible + when db_lookup=0. - translation from a database composition representation to the npg_tracking::glossary::composition type object - db query for compisition-based tables should include a condition diff --git a/npg_qc_viewer/root/src/about_qc_checks.tt2 b/npg_qc_viewer/root/src/about_qc_checks.tt2 index 6a2641267..58e33d05c 100644 --- a/npg_qc_viewer/root/src/about_qc_checks.tt2 +++ b/npg_qc_viewer/root/src/about_qc_checks.tt2 @@ -126,17 +126,17 @@ Any close genotype matches found under a different sample name, allowing homozyg

Mapped Reads

rRNA reads are non-duplicate and duplicate reads aligning to rRNA regions as defined in the transcript model definition. rRNA Rate is per total reads.

-
+

Transcript-associated Reads

Rates are per mapped read. Exonic Rate is the fraction mapping within exons. Expression Profile Efficiency is the ratio of exon reads to total reads. Transcripts/Genes Detected is the number of transcripts/Genes with at least 5 reads.


Strand Specificity

End 1/2 Sense are the number of End 1 or 2 reads that were sequenced in the sense direction. Similarly, End 1/2 Antisense are the number of End 1 or 2 reads that were sequenced in the antisense direction.End 1/2 Sense % are percentages of intragenic End 1/2 reads that were sequenced in the sense direction.

-
+

Coverage Metrics Highest 1000 Expressed Transcripts

The metrics in this table are calculated across the transcripts that were determined to have the highest expression levels. 5' and 3' values are per-base coverage averaged across all top transcripts. 5' and 3' ends are 200 base pairs. Gap % is the total cumulative gap length divided by the total cumulative transcript lengths.

-
-
+
+

Tag metrics

@@ -157,7 +157,7 @@ Any close genotype matches found under a different sample name, allowing homozyg Check for contamination of tag#0 BAM file by reads from upstream runs

-
+

VerifyBamID contamination check

From 5037f61fcd03e5d390f6e1f18367e073a8df88e7 Mon Sep 17 00:00:00 2001 From: "Ruben E. Bautista" Date: Thu, 8 Dec 2016 12:01:31 +0000 Subject: [PATCH 16/24] Keep Perl Critic happy-changes --- lib/npg_qc/autoqc/checks/rna_seqc.pm | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/npg_qc/autoqc/checks/rna_seqc.pm b/lib/npg_qc/autoqc/checks/rna_seqc.pm index b7ed194e4..c4dfd91aa 100644 --- a/lib/npg_qc/autoqc/checks/rna_seqc.pm +++ b/lib/npg_qc/autoqc/checks/rna_seqc.pm @@ -277,10 +277,10 @@ sub _parse_metrics { } my $i = 0; my $results = {}; - foreach(@keys){ + foreach my $key (@keys){ chomp $values[$i]; - chomp $_; - $results->{$_} = $values[$i]; + chomp $key; + $results->{$key} = $values[$i]; $i++; } return $results; From 5afa49f6ca23177539f9b63ac839975ef10604f5 Mon Sep 17 00:00:00 2001 From: "Ruben E. Bautista" Date: Mon, 12 Dec 2016 13:46:45 +0000 Subject: [PATCH 17/24] Remove column `path` from table rna_seqc; remove Maybe[] from data types where appropriate - Change data type of rna_seqc result attributes from Maybe[Num] to Num. - Update rna_seqc Schema to reflect removed column. - Add file upgrade_schema-60.x to create table rna_seqc. - Update Changes file --- Changes | 1 + lib/npg_qc/Schema/Result/RnaSeqc.pm | 16 ++--------- lib/npg_qc/autoqc/results/rna_seqc.pm | 2 +- scripts/upgrade_schema/upgrade_schema-60.x | 33 ++++++++++++++++++++++ 4 files changed, 37 insertions(+), 15 deletions(-) create mode 100644 scripts/upgrade_schema/upgrade_schema-60.x diff --git a/Changes b/Changes index 5055c03fc..29fbb290b 100644 --- a/Changes +++ b/Changes @@ -1,5 +1,6 @@ LIST OF CHANGES FOR NPG-QC PACKAGE + - upgrade_schema-60.x to create rna_seqc table has been included. - npg_qc::autoqc::qc_store - load rna_seqc results into npg_qc db (requires rna_seqc table exists in database). - Updated data/schema.txt including definition for rna_seqc table. diff --git a/lib/npg_qc/Schema/Result/RnaSeqc.pm b/lib/npg_qc/Schema/Result/RnaSeqc.pm index 12f6cc46a..bbd456322 100644 --- a/lib/npg_qc/Schema/Result/RnaSeqc.pm +++ b/lib/npg_qc/Schema/Result/RnaSeqc.pm @@ -61,8 +61,6 @@ __PACKAGE__->table('rna_seqc'); is_auto_increment: 1 is_nullable: 0 -Auto-generated primary key - =head2 id_seq_composition data_type: 'bigint' @@ -70,14 +68,6 @@ Auto-generated primary key is_foreign_key: 1 is_nullable: 0 -A foreign key referencing the id_seq_composition column of the seq_composition table - -=head2 path - - data_type: 'varchar' - is_nullable: 1 - size: 256 - =head2 info data_type: 'text' @@ -195,8 +185,6 @@ __PACKAGE__->add_columns( is_foreign_key => 1, is_nullable => 0, }, - 'path', - { data_type => 'varchar', is_nullable => 1, size => 256 }, 'info', { data_type => 'text', is_nullable => 1 }, 'rrna', @@ -278,8 +266,8 @@ __PACKAGE__->belongs_to( with 'npg_qc::Schema::Flators', 'npg_qc::autoqc::role::result'; -# Created by DBIx::Class::Schema::Loader v0.07046 @ 2016-12-06 11:41:23 -# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:T+W05fIQCWmHH4bJUw2MYw +# Created by DBIx::Class::Schema::Loader v0.07046 @ 2016-12-12 11:36:35 +# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:t/Y5qHFkstoZgjKVXoIxLw # You can replace this text with custom code or comments, and it will be preserved on regeneration diff --git a/lib/npg_qc/autoqc/results/rna_seqc.pm b/lib/npg_qc/autoqc/results/rna_seqc.pm index 5f1e8fec0..16a85ee4a 100644 --- a/lib/npg_qc/autoqc/results/rna_seqc.pm +++ b/lib/npg_qc/autoqc/results/rna_seqc.pm @@ -27,7 +27,7 @@ Readonly::Array my @ATTRIBUTES => qw/ rrna has [ @ATTRIBUTES ] => ( is => 'rw', - isa => 'Maybe[Num]', + isa => 'Num', required => 0, ); diff --git a/scripts/upgrade_schema/upgrade_schema-60.x b/scripts/upgrade_schema/upgrade_schema-60.x new file mode 100644 index 000000000..814485485 --- /dev/null +++ b/scripts/upgrade_schema/upgrade_schema-60.x @@ -0,0 +1,33 @@ +-- +-- Table structure for table `rna_seqc` +-- + +CREATE TABLE IF NOT EXISTS `rna_seqc` ( + `id_rna_seqc` bigint(20) unsigned NOT NULL AUTO_INCREMENT COMMENT 'Auto-generated primary key', + `id_seq_composition` bigint(20) unsigned NOT NULL COMMENT 'A foreign key referencing the id_seq_composition column of the seq_composition table', + `info` text, + `rrna` float unsigned DEFAULT NULL, + `rrna_rate` float unsigned DEFAULT NULL, + `exonic_rate` float unsigned DEFAULT NULL, + `expression_profiling_efficiency` float unsigned DEFAULT NULL, + `genes_detected` float unsigned DEFAULT NULL, + `end_1_sense` float unsigned DEFAULT NULL, + `end_1_antisense` float unsigned DEFAULT NULL, + `end_2_sense` float unsigned DEFAULT NULL, + `end_2_antisense` float unsigned DEFAULT NULL, + `end_1_pct_sense` float unsigned DEFAULT NULL, + `end_2_pct_sense` float unsigned DEFAULT NULL, + `mean_per_base_cov` float unsigned DEFAULT NULL, + `mean_cv` float unsigned DEFAULT NULL, + `end_5_norm` float unsigned DEFAULT NULL, + `end_3_norm` float unsigned DEFAULT NULL, + `other_metrics` text, + PRIMARY KEY (`id_rna_seqc`), + KEY `rna_seqc_compos` (`id_seq_composition`), + CONSTRAINT `rna_seqc_compos` FOREIGN KEY (`id_seq_composition`) REFERENCES `seq_composition` (`id_seq_composition`) ON DELETE NO ACTION ON UPDATE NO ACTION +) ENGINE=InnoDB DEFAULT CHARSET=utf8; +-- +-- +-- +GRANT SELECT ON `rna_seqc` TO nqcro; + From 6afcfdde0c753140f4944057b29936533dcf823e Mon Sep 17 00:00:00 2001 From: "Ruben E. Bautista" Date: Mon, 12 Dec 2016 14:06:00 +0000 Subject: [PATCH 18/24] Update MANIFEST --- MANIFEST | 1 + 1 file changed, 1 insertion(+) diff --git a/MANIFEST b/MANIFEST index a79558926..5b79d02d3 100644 --- a/MANIFEST +++ b/MANIFEST @@ -264,6 +264,7 @@ lib/npg_qc/Schema/Result/QXYield.pm lib/npg_qc/Schema/Result/RecipeFile.pm lib/npg_qc/Schema/Result/RefMatch.pm lib/npg_qc/Schema/Result/RefSnpInfo.pm +lib/npg_qc/Schema/Result/RnaSeqc.pm lib/npg_qc/Schema/Result/RunAndPair.pm lib/npg_qc/Schema/Result/RunConfig.pm lib/npg_qc/Schema/Result/RunGraph.pm From 86e21126a0d3e8e055971fe520fd773a1b0baebd Mon Sep 17 00:00:00 2001 From: "Ruben E. Bautista" Date: Thu, 5 Jan 2017 17:18:58 +0000 Subject: [PATCH 19/24] Create RNA-SeQC output directory by default; improved tests Check: - Make attributes lazy+build instead of lazy_build - Add new attribute to create a directory by default into which RNA-SeQC's report will be stored (the directory itself is created by RNA-SeQC) - Fix the parsing code to reflect this change - Set to undef fields with value of NaN when parsing Result: - Add new output_dir directory to RNA-SeQC result object Tests: - Create subtests for categories of tests - Add tests for parsing the metrics file - Improve testing by adding more units - Add auxiliary files for testing --- lib/npg_qc/autoqc/checks/rna_seqc.pm | 104 ++++++++++++++++---------- lib/npg_qc/autoqc/results/rna_seqc.pm | 7 +- t/60-autoqc-checks-rna_seqc.t | 74 +++++++++++------- t/60-autoqc-results-rna_seqc.t | 17 ++++- 4 files changed, 129 insertions(+), 73 deletions(-) diff --git a/lib/npg_qc/autoqc/checks/rna_seqc.pm b/lib/npg_qc/autoqc/checks/rna_seqc.pm index c4dfd91aa..9b149c1d2 100644 --- a/lib/npg_qc/autoqc/checks/rna_seqc.pm +++ b/lib/npg_qc/autoqc/checks/rna_seqc.pm @@ -55,9 +55,19 @@ has '+aligner' => (default => 'fasta', is => 'ro', writer => '_set_aligner',); -has 'qc_report_dir' => (is => 'ro', - isa => 'NpgTrackingDirectory', - required => 1,); +has 'output_dir' => (is => 'ro', + isa => 'Str', + required => 0, + lazy => 1, + builder => '_build_output_dir',); + +sub _build_output_dir { + my ($self) = @_; + my $rpt_dir; + my $qc_out_path = $self->qc_out; + my $output_dir = File::Spec->catdir($qc_out_path, $self->result->filename_root . q[_rna_seqc]); + return $output_dir; +} has '_java_jar_path' => (is => 'ro', isa => 'NpgCommonResolvedPathJarFile', @@ -69,11 +79,12 @@ has '_ttype_gtf_column' => (is => 'ro', isa => 'Int', default => 2,); -has '_alignments_in_bam' => (is => 'ro', - isa => 'Maybe[Bool]', - lazy_build => 1,); +has '_alignments_in_bam' => (is => 'ro', + isa => 'Bool', + lazy => 1, + builder => '_build_alignments_in_bam',); -sub _build__alignments_in_bam { +sub _build_alignments_in_bam { my $self = shift; my $aligned = 0; my $command = $self->samtools_irods_cmd . ' view -H ' . $self->_bam_file . ' |'; @@ -87,11 +98,12 @@ sub _build__alignments_in_bam { return $aligned; } -has '_is_paired_end' => (is => 'ro', - isa => 'Maybe[Bool]', - lazy_build => 1,); +has '_is_paired_end' => (is => 'ro', + isa => 'Bool', + lazy => 1, + builder => '_build_is_paired_end',); -sub _build__is_paired_end { +sub _build_is_paired_end { my ($self) = @_; my $paired = 0; my $flag; @@ -114,11 +126,12 @@ sub _build__is_paired_end { return $paired; } -has '_is_rna_alignment' => (is => 'ro', - isa => 'Maybe[Bool]', - lazy_build => 1,); +has '_is_rna_alignment' => (is => 'ro', + isa => 'Bool', + lazy => 1, + builder => '_build_is_rna_alignment',); -sub _build__is_rna_alignment { +sub _build_is_rna_alignment { my ($self) = @_; my $rna_alignment = 0; my $command = $self->samtools_irods_cmd . ' view -H ' . $self->_bam_file . ' |'; @@ -133,12 +146,13 @@ sub _build__is_rna_alignment { } -has '_input_str' => (is => 'ro', - isa => 'Str', - lazy_build => 1, - init_arg => undef,); +has '_input_str' => (is => 'ro', + isa => 'Str', + lazy => 1, + builder => '_build_input_str', + init_arg => undef,); -sub _build__input_str { +sub _build_input_str { my ($self) = @_; my $sample_id = $self->lims->sample_id; my $library_name = $self->lims->library_name // $sample_id; @@ -147,40 +161,47 @@ sub _build__input_str { return qq["$library_names[0]|$input_file|$sample_id"]; } -has '_ref_genome' => (is => 'ro', - isa => 'Maybe[Str]', - lazy_build => 1,); +has '_ref_genome' => (is => 'ro', + isa => 'Str', + required => 0, + lazy => 1, + builder => '_build_ref_genome',); -sub _build__ref_genome { +sub _build_ref_genome { my ($self) = @_; my $reference_fasta = $self->refs->[0] // q[]; return $reference_fasta; } -has '_bam_file' => (is => 'ro', - isa => 'NpgTrackingReadableFile', - lazy_build => 1,); +has '_bam_file' => (is => 'ro', + isa => 'NpgTrackingReadableFile', + lazy => 1, + builder => '_build_bam_file',); -sub _build__bam_file { +sub _build_bam_file { my $self = shift; return $self->input_files->[0]; } -has '_annotation_gtf' => (is => 'ro', - isa => 'Maybe[Str]', - lazy_build => 1,); +has '_annotation_gtf' => (is => 'ro', + isa => 'Str', + required => 0, + lazy => 1, + builder => '_build_annotation_gtf',); -sub _build__annotation_gtf { +sub _build_annotation_gtf { my $self = shift; my $trans_gtf = $self->rnaseqc_gtf_file // q[]; return $trans_gtf; } -has '_ref_rrna' => (is => 'ro', - isa => 'Maybe[Str]', - lazy_build => 1,); +has '_ref_rrna' => (is => 'ro', + isa => 'Str', + required => 0, + lazy => 1, + builder => '_build_ref_rrna',); -sub _build__ref_rrna { +sub _build_ref_rrna { my $self = shift; my ($organism, $strain, $transcriptome) = $self->parse_reference_genome($self->lims->reference_genome); $self->_set_aligner($RRNA_ALIGNER); @@ -203,7 +224,7 @@ sub _command { my $command = $self->java_cmd. sprintf q[ -Xmx4000m -XX:+UseSerialGC -XX:-UsePerfData -jar %s -s %s -o %s -r %s -t %s -ttype %d %s %s], $self->_java_jar_path, $self->_input_str, - $self->qc_report_dir, + $self->output_dir, $self->_ref_genome, $self->_annotation_gtf, $self->_ttype_gtf_column, @@ -260,9 +281,9 @@ override 'execute' => sub { sub _parse_metrics { my ($self) = @_; - my $filename = File::Spec->catfile($self->qc_report_dir, $METRICS_FILE_NAME); + my $filename = File::Spec->catfile($self->output_dir, $METRICS_FILE_NAME); if (! -e $filename) { - croak q[Metrics file is not available, cannot parse RNA-SeQC metrics]; + croak qq[No such file $filename: cannot parse RNA-SeQC metrics]; } my $fh = IO::File->new($filename, 'r'); my @lines; @@ -292,8 +313,8 @@ sub _save_results { my $value = $results->{$key}; if (defined $value) { my $attr_name = $RNASEQC_METRICS_FIELDS_MAPPING{$key}; - if ($value eq q[?]) { - carp qq[Field $attr_name is set to '?', skipping...]; + if ($value eq q[NaN]) { + carp qq[Value of $attr_name is 'NaN', skipping...]; } else { $self->result->$attr_name($value); } @@ -301,6 +322,7 @@ sub _save_results { delete $results->{$key}; } $self->result->other_metrics($results); + $self->result->output_dir($self->output_dir); return; } __PACKAGE__->meta->make_immutable(); diff --git a/lib/npg_qc/autoqc/results/rna_seqc.pm b/lib/npg_qc/autoqc/results/rna_seqc.pm index 16a85ee4a..3185cb2b0 100644 --- a/lib/npg_qc/autoqc/results/rna_seqc.pm +++ b/lib/npg_qc/autoqc/results/rna_seqc.pm @@ -28,14 +28,17 @@ Readonly::Array my @ATTRIBUTES => qw/ rrna has [ @ATTRIBUTES ] => ( is => 'rw', isa => 'Num', - required => 0, -); + required => 0,); has 'other_metrics' => (isa => 'HashRef[Str]', is => 'rw', default => sub { {} }, required => 0,); +has 'output_dir' => (is => 'rw', + isa => 'Str', + required => 0,); + __PACKAGE__->meta->make_immutable; 1; diff --git a/t/60-autoqc-checks-rna_seqc.t b/t/60-autoqc-checks-rna_seqc.t index 917f00998..58f03a13e 100644 --- a/t/60-autoqc-checks-rna_seqc.t +++ b/t/60-autoqc-checks-rna_seqc.t @@ -1,8 +1,9 @@ use strict; use warnings; use Cwd qw/getcwd abs_path/; -use Test::More tests => 17; +use Test::More tests => 5; use Test::Exception; +use Test::Warn; use File::Temp qw/ tempdir /; use_ok ('npg_qc::autoqc::checks::rna_seqc'); @@ -19,47 +20,73 @@ my $repos = getcwd . '/t/data/autoqc/rna_seqc'; `touch $dir/RNA-SeQC.jar`; -{ +subtest 'Find CLASSPATH' => sub { + plan tests => 3; my $rnaseqc = npg_qc::autoqc::checks::rna_seqc->new( id_run => 17550, position => 3, tag_index => 8, path => 't/data/autoqc/rna_seqc/data', - repository => $repos, - qc_report_dir => q[t/data],); + repository => $repos,); isa_ok ($rnaseqc, 'npg_qc::autoqc::checks::rna_seqc'); lives_ok { $rnaseqc->result; } 'result object created'; local $ENV{CLASSPATH} = q[]; throws_ok {npg_qc::autoqc::checks::rna_seqc->new(id_run => 2, path => q[mypath], position => 1, qc_report_dir => q[t/data])} qr/Can\'t find \'RNA-SeQC\.jar\' because CLASSPATH is not set/, q[Fails to create object when RNA-SeQC.jar not found]; -} +}; -{ +subtest 'Input and output paths' => sub { + plan tests => 3; throws_ok { my $qc = npg_qc::autoqc::checks::rna_seqc->new( id_run => 17550, position => 3, tag_index => 8, path => q[nonexisting], - repository => $repos, - qc_report_dir => q[t/data],); + repository => $repos,); $qc->execute() } qr/directory nonexisting does not exist/, 'execute: error on nonexisting path'; -} - -{ + my $run = 17550; + my $pos = 3; + my $tag = 13; my $check = npg_qc::autoqc::checks::rna_seqc->new( - id_run => 17550, - position => 3, - tag_index => 13, + id_run => $run, + position => $pos, + tag_index => $tag, path => 't/data/autoqc/rna_seqc/data', - repository => $repos, - qc_report_dir => q[t/data],); + repository => $repos); lives_ok { $check->execute } 'no error when input not found'; -} + my $filename_root = $check->result->filename_root; + my $output_dir_shouldbe = join q[/], $check->path, $filename_root.q[_rna_seqc]; + is($check->output_dir, $output_dir_shouldbe, q[output directory is formed correctly]); +}; -{ +subtest 'Parse metrics' => sub { + plan tests => 4; + my $rnaseqc = npg_qc::autoqc::checks::rna_seqc->new( + id_run => 17550, + position => 3, + tag_index => 8, + path => 't/data/autoqc/rna_seqc/data', + repository => $repos,); + my $metrics_hash; + my $results_hash; + throws_ok {$rnaseqc->_parse_metrics()} qr/No\ such\ file\ t\/data\/autoqc\/rna_seqc\/data\/17550\_3\#8\_rna\_seqc\/metrics\.tsv\:\ cannot\ parse\ RNA-SeQC\ metrics/, + 'error if metrics file is not found where expected'; + $rnaseqc = npg_qc::autoqc::checks::rna_seqc->new( + id_run => 18407, + position => 1, + tag_index => 7, + path => 't/data/autoqc/rna_seqc/data', + repository => $repos,); + lives_ok {$metrics_hash = $rnaseqc->_parse_metrics()} q[parsing RNA-SeQC metrics.tsv ok]; + warning_like {$results_hash = $rnaseqc->_save_results($metrics_hash)} {carped => qr/Value of .* is 'NaN'/}, q[saving results ok - a NaN carp was caught]; + is ($results_hash->{'end_3_norm'}, undef, q[fields with value NaN are skipped]); +}; + +subtest 'Argument input files' => sub { + plan tests => 11; my $ref_repos_dir = join q[/],$dir,'references'; my $ref_dir = join q[/], $ref_repos_dir,'Mus_musculus','GRCm38','all'; `mkdir -p $ref_dir/fasta`; @@ -85,8 +112,7 @@ my $repos = getcwd . '/t/data/autoqc/rna_seqc'; repository => $repos, ref_repository => $ref_repos_dir, transcriptome_repository => $trans_repos_dir, - _alignments_in_bam => 0, - qc_report_dir => q[t/data],); + _alignments_in_bam => 0); is($check->_bam_file, 't/data/autoqc/rna_seqc/data/17550_3#8.bam', 'bam file path for id run 17550 lane 3 tag 8'); lives_ok { $check->execute } 'execution ok for no alignments in BAM'; like ($check->result->comments, qr/BAM file is not aligned/, 'comment when bam file is not aligned'); @@ -97,7 +123,6 @@ my $repos = getcwd . '/t/data/autoqc/rna_seqc'; tag_index => 8, path => 't/data/autoqc/rna_seqc/data', repository => $repos, - qc_report_dir => q[t/data], _ref_genome => q[], transcriptome_repository => $trans_repos_dir,); lives_ok { $check->execute } 'execution ok for no reference genome file'; @@ -109,7 +134,6 @@ my $repos = getcwd . '/t/data/autoqc/rna_seqc'; tag_index => 8, path => 't/data/autoqc/rna_seqc/data', repository => $repos, - qc_report_dir => q[t/data], _annotation_gtf => q[], ref_repository => $ref_repos_dir,); lives_ok { $check->execute } 'execution ok for no annotation file'; @@ -125,7 +149,6 @@ my $repos = getcwd . '/t/data/autoqc/rna_seqc'; tag_index => 1, path => 't/data/autoqc/rna_seqc/data', repository => $repos, - qc_report_dir => q[t/data], ref_repository => $ref_repos_dir, transcriptome_repository => $trans_repos_dir,); throws_ok { $check->execute } qr/Binary fasta reference for Danio_rerio, zv9, all does not exist/, @@ -142,11 +165,10 @@ my $repos = getcwd . '/t/data/autoqc/rna_seqc'; path => 't/data/autoqc/rna_seqc/data', repository => $repos, ref_repository => $ref_repos_dir, - transcriptome_repository => $trans_repos_dir, - qc_report_dir => q[t/data],); + transcriptome_repository => $trans_repos_dir,); is($check->_bam_file, 't/data/autoqc/rna_seqc/data/17550_1#1.bam', 'bam file path for id run 17550 lane 1 tag 1'); lives_ok { $check->execute } 'execution ok for no RNA alignment'; like ($check->result->comments, qr/BAM file is not RNA alignment/, 'comment when bam file is not RNA alignment'); -} +}; 1; diff --git a/t/60-autoqc-results-rna_seqc.t b/t/60-autoqc-results-rna_seqc.t index 0e29f9d92..985dc6822 100644 --- a/t/60-autoqc-results-rna_seqc.t +++ b/t/60-autoqc-results-rna_seqc.t @@ -1,15 +1,24 @@ use strict; use warnings; -use Test::More tests => 4; +use Test::More tests => 3; use Test::Exception; use_ok ('npg_qc::autoqc::results::rna_seqc'); -{ - my $r = npg_qc::autoqc::results::rna_seqc->new(id_run => 12, position => 3, path => q[mypath]); +subtest 'Loading check' => sub { + plan tests => 4; + my $r = npg_qc::autoqc::results::rna_seqc->new(id_run => 18407, position => 1, tag_index => 7, path => q[mypath]); isa_ok ($r, 'npg_qc::autoqc::results::rna_seqc'); is($r->check_name(), 'rna seqc', 'check name'); is($r->class_name(), 'rna_seqc', 'class name'); -} + is ($r->filename4serialization(), '18407_1#7.rna_seqc.json', 'default file name'); +}; + +subtest 'Testing utility methods' => sub { + plan tests => 2; + my $r; + lives_ok {$r = npg_qc::autoqc::results::rna_seqc->load('t/data/autoqc/rna_seqc/data/15911_1#1.rna_seqc.json');} 'load serialised empty result'; + lives_ok {$r = npg_qc::autoqc::results::rna_seqc->load('t/data/autoqc/rna_seqc/data/18407_1#7.rna_seqc.json');} 'load serialised valid result'; +}; 1; \ No newline at end of file From a1d816154054c46c1d3f2d89cc77331dae0e829d Mon Sep 17 00:00:00 2001 From: "Ruben E. Bautista" Date: Fri, 6 Jan 2017 10:34:26 +0000 Subject: [PATCH 20/24] Fix Critic criticisms - Add missing auxiliary tests files --- lib/npg_qc/autoqc/checks/rna_seqc.pm | 1 - t/data/autoqc/rna_seqc/data/15911_1#1.rna_seqc.json | 1 + t/data/autoqc/rna_seqc/data/18407_1#7.rna_seqc.json | 1 + t/data/autoqc/rna_seqc/data/18407_1#7_rna_seqc/metrics.tsv | 2 ++ 4 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 t/data/autoqc/rna_seqc/data/15911_1#1.rna_seqc.json create mode 100644 t/data/autoqc/rna_seqc/data/18407_1#7.rna_seqc.json create mode 100644 t/data/autoqc/rna_seqc/data/18407_1#7_rna_seqc/metrics.tsv diff --git a/lib/npg_qc/autoqc/checks/rna_seqc.pm b/lib/npg_qc/autoqc/checks/rna_seqc.pm index 9b149c1d2..902b6e300 100644 --- a/lib/npg_qc/autoqc/checks/rna_seqc.pm +++ b/lib/npg_qc/autoqc/checks/rna_seqc.pm @@ -63,7 +63,6 @@ has 'output_dir' => (is => 'ro', sub _build_output_dir { my ($self) = @_; - my $rpt_dir; my $qc_out_path = $self->qc_out; my $output_dir = File::Spec->catdir($qc_out_path, $self->result->filename_root . q[_rna_seqc]); return $output_dir; diff --git a/t/data/autoqc/rna_seqc/data/15911_1#1.rna_seqc.json b/t/data/autoqc/rna_seqc/data/15911_1#1.rna_seqc.json new file mode 100644 index 000000000..fdcbad542 --- /dev/null +++ b/t/data/autoqc/rna_seqc/data/15911_1#1.rna_seqc.json @@ -0,0 +1 @@ +{"__CLASS__":"npg_qc::autoqc::results::rna_seqc","comments":"BAM file is not RNA alignment","composition":{"__CLASS__":"npg_tracking::glossary::composition-85.5","components":[{"__CLASS__":"npg_tracking::glossary::composition::component::illumina-85.5","id_run":15911,"position":1,"subset":"all","tag_index":1}]},"id_run":15911,"info":{"Check":"npg_qc::autoqc::checks::rna_seqc","Check_version":"0","Jar":"RNA-SeqQC RNA-SeQC.jar"},"other_metrics":{},"path":"data","position":1,"tag_index":1} \ No newline at end of file diff --git a/t/data/autoqc/rna_seqc/data/18407_1#7.rna_seqc.json b/t/data/autoqc/rna_seqc/data/18407_1#7.rna_seqc.json new file mode 100644 index 000000000..a57a854db --- /dev/null +++ b/t/data/autoqc/rna_seqc/data/18407_1#7.rna_seqc.json @@ -0,0 +1 @@ +{"__CLASS__":"npg_qc::autoqc::results::rna_seqc","composition":{"__CLASS__":"npg_tracking::glossary::composition-85.4","components":[{"__CLASS__":"npg_tracking::glossary::composition::component::illumina-85.4","id_run":18407,"position":1,"tag_index":7}]},"end_1_antisense":1530,"end_1_pct_sense":6.9908814,"end_1_sense":115,"end_2_antisense":95,"end_2_pct_sense":93.716934,"end_2_sense":1417,"end_3_norm":1.0583231,"end_5_norm":0.7867761,"exonic_rate":0.001790527,"expression_profiling_efficiency":0.0014548181,"genes_detected":12,"id_run":18407,"info":{"Check":"npg_qc::autoqc::checks::rna_seqc","Check_version":"0","Jar":"RNA-SeqQC RNA-SeQC.jar"},"mean_cv":1.4219271,"mean_per_base_cov":2.9021204,"other_metrics":{"Alternative Aligments":"23685","Base Mismatch Rate":"0.001572254","Chimeric Pairs":"9098","Cumul. Gap Length":"62246","Duplication Rate of Mapped":"0.041722357","End 1 Mapping Rate":"0.84337074","End 1 Mismatch Rate":"0.0015389597","End 2 Mapping Rate":"0.78164583","End 2 Mismatch Rate":"0.0016081773","Estimated Library Size":"58717226","Failed Vendor QC Check":"0","Fragment Length Mean":"147","Fragment Length StdDev":"70","Gap %":"0.4247801","Intergenic Rate":"0.99788785","Intragenic Rate":"0.0021107893","Intronic Rate":"3.202623E-4","Mapped":"1495649","Mapped Pairs":"660763","Mapped Unique":"1433247","Mapped Unique Rate of Total":"0.7786085","Mapping Rate":"0.8125083","No. Covered 5'":"21","Note":"2477423","Num. Gaps":"227","Read Length":"150","Sample":"15358794","Split Reads":"50","Total Purity Filtered Reads Sequenced":"1840780","Transcripts Detected":"28","Unique Rate of Mapped":"0.95827764","Unpaired Reads":"0"},"path":"data","position":1,"rrna":4257,"rrna_rate":0.0023126067,"tag_index":7} diff --git a/t/data/autoqc/rna_seqc/data/18407_1#7_rna_seqc/metrics.tsv b/t/data/autoqc/rna_seqc/data/18407_1#7_rna_seqc/metrics.tsv new file mode 100644 index 000000000..02d53d585 --- /dev/null +++ b/t/data/autoqc/rna_seqc/data/18407_1#7_rna_seqc/metrics.tsv @@ -0,0 +1,2 @@ +Sample Note End 2 Mapping Rate Chimeric Pairs Intragenic Rate Num. Gaps Exonic Rate Mapping Rate 5' Norm Genes Detected Unique Rate of Mapped 3' Norm Read Length Mean Per Base Cov. End 1 Mismatch Rate Fragment Length StdDev Estimated Library Size Mapped Intergenic Rate Total Purity Filtered Reads Sequenced rRNA Failed Vendor QC Check Mean CV Transcripts Detected Mapped Pairs Cumul. Gap Length Gap % Unpaired Reads Intronic Rate Mapped Unique Rate of Total Expression Profiling Efficiency Mapped Unique End 2 Mismatch Rate End 2 Antisense Alternative Aligments End 2 Sense Fragment Length Mean End 1 Antisense Split Reads Base Mismatch Rate End 1 Sense End 1 % Sense rRNA rate End 1 Mapping Rate No. Covered 5' Duplication Rate of Mapped End 2 % Sense +15358794 2477423 0.78164583 9098 NaN 227 0.001790527 0.8125083 0.7867761 12 0.95827764 NaN 150 2.9021204 0.0015389597 70 58717226 1495649 NaN 1840780 4257 0 1.4219271 28 660763 62246 0.4247801 0 3.202623E-4 0.7786085 0.0014548181 1433247 0.0016081773 95 23685 1417 147 1530 50 0.001572254 115 6.9908814 0.0023126067 0.84337074 21 0.041722357 93.716934 From 5196baa5996dc0a990322cde5af4750dd2d10749 Mon Sep 17 00:00:00 2001 From: "Ruben E. Bautista" Date: Wed, 18 Jan 2017 14:37:20 +0000 Subject: [PATCH 21/24] Change the path for the RNA-SeQC report accessible from the template in npg_qc_viewer - This is to reflect the changes that happened in the check, i.e. the creation of the directory inside the archive/lane/qc directory by default. --- npg_qc_viewer/root/src/ui_checks/rna_seqc.tt2 | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/npg_qc_viewer/root/src/ui_checks/rna_seqc.tt2 b/npg_qc_viewer/root/src/ui_checks/rna_seqc.tt2 index f8d90a7bb..842d3730e 100644 --- a/npg_qc_viewer/root/src/ui_checks/rna_seqc.tt2 +++ b/npg_qc_viewer/root/src/ui_checks/rna_seqc.tt2 @@ -61,12 +61,12 @@ run_folder_glob = rfglob _ rfname; END; END; - run_position_dir = check.id_run _ '_' _ check.position; - rna_seqc_report_dir = run_position_dir; + rna_seqc_output_dir = check.id_run _ '_' _ check.position; IF check.tag_index.defined; - rna_seqc_report_dir = run_position_dir _ '/' _ run_position_dir _ '%23' _ check.tag_index; - END %] -

Other RNA-SeQC metrics.

+ rna_seqc_output_dir = rna_seqc_output_dir _ '%23' _ check.tag_index; + END; + rna_seqc_output_dir = rna_seqc_output_dir _ '_rna_seqc' %] +

Other RNA-SeQC metrics.

[% END; END %]
From 7108641c631116ac78f561aad0bb888f5cd77257 Mon Sep 17 00:00:00 2001 From: "Ruben E. Bautista" Date: Wed, 18 Jan 2017 14:40:43 +0000 Subject: [PATCH 22/24] Add tests for autoqc::Schema::Result::RnaSeqc --- t/50-schema-result-RnaSeqc.t | 56 ++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 t/50-schema-result-RnaSeqc.t diff --git a/t/50-schema-result-RnaSeqc.t b/t/50-schema-result-RnaSeqc.t new file mode 100644 index 000000000..2a5c9aee1 --- /dev/null +++ b/t/50-schema-result-RnaSeqc.t @@ -0,0 +1,56 @@ +use strict; +use warnings; +use Test::More tests => 2; +use Test::Exception; +use File::Temp qw/ tempdir /; +use Cwd qw/getcwd abs_path/; +use Archive::Extract; +use Perl6::Slurp; +use JSON; +use npg_testing::db; + +use_ok('npg_qc::Schema::Result::RnaSeqc'); + + +my $schema = Moose::Meta::Class->create_anon_class( + roles => [qw/npg_testing::db/]) + ->new_object({})->create_test_db(q[npg_qc::Schema]); + +my $tempdir = tempdir( CLEANUP => 1); +my $repos = getcwd . q[/t/data/autoqc/rna_seqc]; +my $archive = join q[/], $repos, q[data]; + +my $rs = $schema->resultset('RnaSeqc'); +my $rc = $rs->result_class; + +sub _get_data { + my $file_name = shift; + my $json = slurp join(q[/], $archive, $file_name); + my $values = from_json($json); + foreach my $key (keys %{$values}) { + if (!$rc->has_column($key)) { + delete $values->{$key}; + } + } + return $values; +} + +subtest 'load results with a composition fk' => sub { + plan tests => 4; + + my $values = _get_data('18407_1#7.rna_seqc.json'); + my $fk_row = $schema->resultset('SeqComposition')->create({digest => '45678', size => 2}); + + my $object = $rs->new_result($values); + isa_ok($object, 'npg_qc::Schema::Result::RnaSeqc'); + throws_ok {$object->insert()} + qr/NOT NULL constraint failed: rna_seqc.id_seq_composition/, + 'foreign key referencing the composition table absent - error'; + + $object->id_seq_composition($fk_row->id_seq_composition); + lives_ok { $object->insert() } 'insert with fk is ok'; + my $a_rs = $rs->search({}); + is ($a_rs->count, 1, q[one row created in the table]); +}; + +1; \ No newline at end of file From 62ffb52954036d66c4485b6c1c6452e8452ba603 Mon Sep 17 00:00:00 2001 From: "Ruben E. Bautista" Date: Thu, 19 Jan 2017 13:53:01 +0000 Subject: [PATCH 23/24] Add unique constraint to RnaSeqc DBix schema - This is necessary for the npg_autoqc_data.pl script to work for this check. --- lib/npg_qc/Schema/Result/RnaSeqc.pm | 22 ++++++++++++++++++++-- lib/npg_qc/Schema/Result/SeqComposition.pm | 12 ++++++------ 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/lib/npg_qc/Schema/Result/RnaSeqc.pm b/lib/npg_qc/Schema/Result/RnaSeqc.pm index bbd456322..a2ac66045 100644 --- a/lib/npg_qc/Schema/Result/RnaSeqc.pm +++ b/lib/npg_qc/Schema/Result/RnaSeqc.pm @@ -61,6 +61,8 @@ __PACKAGE__->table('rna_seqc'); is_auto_increment: 1 is_nullable: 0 +Auto-generated primary key + =head2 id_seq_composition data_type: 'bigint' @@ -68,6 +70,8 @@ __PACKAGE__->table('rna_seqc'); is_foreign_key: 1 is_nullable: 0 +A foreign key referencing the id_seq_composition column of the seq_composition table + =head2 info data_type: 'text' @@ -233,6 +237,20 @@ __PACKAGE__->add_columns( __PACKAGE__->set_primary_key('id_rna_seqc'); +=head1 UNIQUE CONSTRAINTS + +=head2 C + +=over 4 + +=item * L + +=back + +=cut + +__PACKAGE__->add_unique_constraint('rna_seqc_id_compos_unq', ['id_seq_composition']); + =head1 RELATIONS =head2 seq_composition @@ -266,8 +284,8 @@ __PACKAGE__->belongs_to( with 'npg_qc::Schema::Flators', 'npg_qc::autoqc::role::result'; -# Created by DBIx::Class::Schema::Loader v0.07046 @ 2016-12-12 11:36:35 -# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:t/Y5qHFkstoZgjKVXoIxLw +# Created by DBIx::Class::Schema::Loader v0.07046 @ 2017-01-19 11:15:22 +# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:2mZZ7aTIqW6xAZ63EQaX1g # You can replace this text with custom code or comments, and it will be preserved on regeneration diff --git a/lib/npg_qc/Schema/Result/SeqComposition.pm b/lib/npg_qc/Schema/Result/SeqComposition.pm index 1a543a963..2bfb655d6 100644 --- a/lib/npg_qc/Schema/Result/SeqComposition.pm +++ b/lib/npg_qc/Schema/Result/SeqComposition.pm @@ -135,16 +135,16 @@ __PACKAGE__->add_unique_constraint('unq_seq_compos_ps', ['id_seq_composition', ' =head1 RELATIONS -=head2 rna_seqcs +=head2 rna_seqc -Type: has_many +Type: might_have Related object: L =cut -__PACKAGE__->has_many( - 'rna_seqcs', +__PACKAGE__->might_have( + 'rna_seqc', 'npg_qc::Schema::Result::RnaSeqc', { 'foreign.id_seq_composition' => 'self.id_seq_composition' }, { cascade_copy => 0, cascade_delete => 0 }, @@ -199,8 +199,8 @@ __PACKAGE__->has_many( ); -# Created by DBIx::Class::Schema::Loader v0.07045 @ 2016-10-25 14:50:10 -# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:8NLCyZyGS5x1emB/RF7omg +# Created by DBIx::Class::Schema::Loader v0.07046 @ 2017-01-19 11:15:22 +# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:sUtyrkK1I4aaxjciaI6mLQ # You can replace this text with custom code or comments, and it will be preserved on regeneration From ecff38d6f6821d10d9bba8427682697b315a3da2 Mon Sep 17 00:00:00 2001 From: "Ruben E. Bautista" Date: Thu, 19 Jan 2017 14:34:02 +0000 Subject: [PATCH 24/24] Update SQL scripts, Changes and tidy up code - Modify SQL commands to reflect changes in rna_seqc table. - Update upgrade-schema-60.x SQL script as well as data/schema.txt. - Change rna_seqc check POD 'DESCRIPTION' to be more accurate. - Delete instances of the qc_report_dir attribute which is no longer used. - Update Changes file with latest er... changes. --- Changes | 13 +++++++------ data/schema.txt | 4 ++-- lib/npg_qc/autoqc/checks/rna_seqc.pm | 9 +++++---- scripts/upgrade_schema/upgrade_schema-60.x | 5 +++-- t/60-autoqc-checks-rna_seqc.t | 2 +- 5 files changed, 18 insertions(+), 15 deletions(-) diff --git a/Changes b/Changes index 29fbb290b..290c33485 100644 --- a/Changes +++ b/Changes @@ -1,14 +1,15 @@ LIST OF CHANGES FOR NPG-QC PACKAGE + - remove attribute qc_report_dir from check object: the output directory + is created by default using the sample's filename root - upgrade_schema-60.x to create rna_seqc table has been included. - - npg_qc::autoqc::qc_store - load rna_seqc results into npg_qc db - (requires rna_seqc table exists in database). - - Updated data/schema.txt including definition for rna_seqc table. + - npg_qc::autoqc::qc_store - load rna_seqc results into npg_qc db: + requires rna_seqc table exists in database. + - updated data/schema.txt including definition for rna_seqc table. - SeQC: - - Added template for rna_seqc check with selected metrics shown + - added template for rna_seqc check with selected metrics shown in summary - - Include a link to original RNA-SeQC report only visible - when db_lookup=0. + - include a link to original RNA-SeQC report in check's template - translation from a database composition representation to the npg_tracking::glossary::composition type object - db query for compisition-based tables should include a condition diff --git a/data/schema.txt b/data/schema.txt index 7e46aaf8a..3e4071ce7 100644 --- a/data/schema.txt +++ b/data/schema.txt @@ -1140,7 +1140,6 @@ DROP TABLE IF EXISTS `rna_seqc`; CREATE TABLE `rna_seqc` ( `id_rna_seqc` bigint(20) unsigned NOT NULL AUTO_INCREMENT COMMENT 'Auto-generated primary key', `id_seq_composition` bigint(20) unsigned NOT NULL COMMENT 'A foreign key referencing the id_seq_composition column of the seq_composition table', - `path` varchar(256) DEFAULT NULL, `info` text, `rrna` float unsigned DEFAULT NULL, `rrna_rate` float unsigned DEFAULT NULL, @@ -1159,9 +1158,10 @@ CREATE TABLE `rna_seqc` ( `end_3_norm` float unsigned DEFAULT NULL, `other_metrics` text, PRIMARY KEY (`id_rna_seqc`), + UNIQUE KEY `rna_seqc_id_compos_unq` (`id_seq_composition`), KEY `rna_seqc_compos` (`id_seq_composition`), CONSTRAINT `rna_seqc_compos` FOREIGN KEY (`id_seq_composition`) REFERENCES `seq_composition` (`id_seq_composition`) ON DELETE NO ACTION ON UPDATE NO ACTION -) ENGINE=InnoDB DEFAULT CHARSET=utf8; +) ENGINE=InnoDB DEFAULT CHARSET=latin1; /*!40101 SET character_set_client = @saved_cs_client */; -- diff --git a/lib/npg_qc/autoqc/checks/rna_seqc.pm b/lib/npg_qc/autoqc/checks/rna_seqc.pm index 902b6e300..2e25e741f 100644 --- a/lib/npg_qc/autoqc/checks/rna_seqc.pm +++ b/lib/npg_qc/autoqc/checks/rna_seqc.pm @@ -339,10 +339,11 @@ npg_qc::autoqc::checks::rna_seqc =head1 DESCRIPTION -QC check that runs Broad Institute's RNA-SeQC software over an RNA sample. -Files generated by RNA-SeQC are overwriten everytime it is executed and except -for the directory where the metrics are stored (named after Sample ID) all use -the same names. The user must consider this when passing the value of qc_report_dir. +QC check that runs Broad Institute's RNA-SeQC; a java program which computes a +series of quality control metrics for RNA-seq data. The output consists of +HTML reports and tab delimited files of metrics data from which a selection of +them are extracted to generate an autoqc result. The output directory is +created by default using the sample's filename root. =head1 SUBROUTINES/METHODS diff --git a/scripts/upgrade_schema/upgrade_schema-60.x b/scripts/upgrade_schema/upgrade_schema-60.x index 814485485..d594287b2 100644 --- a/scripts/upgrade_schema/upgrade_schema-60.x +++ b/scripts/upgrade_schema/upgrade_schema-60.x @@ -2,7 +2,7 @@ -- Table structure for table `rna_seqc` -- -CREATE TABLE IF NOT EXISTS `rna_seqc` ( +CREATE TABLE `rna_seqc` ( `id_rna_seqc` bigint(20) unsigned NOT NULL AUTO_INCREMENT COMMENT 'Auto-generated primary key', `id_seq_composition` bigint(20) unsigned NOT NULL COMMENT 'A foreign key referencing the id_seq_composition column of the seq_composition table', `info` text, @@ -23,9 +23,10 @@ CREATE TABLE IF NOT EXISTS `rna_seqc` ( `end_3_norm` float unsigned DEFAULT NULL, `other_metrics` text, PRIMARY KEY (`id_rna_seqc`), + UNIQUE KEY `rna_seqc_id_compos_unq` (`id_seq_composition`), KEY `rna_seqc_compos` (`id_seq_composition`), CONSTRAINT `rna_seqc_compos` FOREIGN KEY (`id_seq_composition`) REFERENCES `seq_composition` (`id_seq_composition`) ON DELETE NO ACTION ON UPDATE NO ACTION -) ENGINE=InnoDB DEFAULT CHARSET=utf8; +) ENGINE=InnoDB DEFAULT CHARSET=latin1; -- -- -- diff --git a/t/60-autoqc-checks-rna_seqc.t b/t/60-autoqc-checks-rna_seqc.t index 58f03a13e..a5c491d79 100644 --- a/t/60-autoqc-checks-rna_seqc.t +++ b/t/60-autoqc-checks-rna_seqc.t @@ -31,7 +31,7 @@ subtest 'Find CLASSPATH' => sub { isa_ok ($rnaseqc, 'npg_qc::autoqc::checks::rna_seqc'); lives_ok { $rnaseqc->result; } 'result object created'; local $ENV{CLASSPATH} = q[]; - throws_ok {npg_qc::autoqc::checks::rna_seqc->new(id_run => 2, path => q[mypath], position => 1, qc_report_dir => q[t/data])} + throws_ok {npg_qc::autoqc::checks::rna_seqc->new(id_run => 2, path => q[mypath], position => 1,)} qr/Can\'t find \'RNA-SeQC\.jar\' because CLASSPATH is not set/, q[Fails to create object when RNA-SeQC.jar not found]; };