diff --git a/doc/CHANGELOG.md b/doc/CHANGELOG.md index 222816cdc..ed46cd2a3 100644 --- a/doc/CHANGELOG.md +++ b/doc/CHANGELOG.md @@ -4,6 +4,18 @@ Change log for WTSI genotyping pipeline Latest version is hosted at: https://github.com/wtsi-npg/genotyping +Release 1.14.1: 2017-03-28 +-------------------------- + +Added: +- Fluidigm QC: + - Compute QC metrics and write in CSV format + - Can update an existing CSV file + - Command-line script qc_fluidigm.pl +- Fluidigm archiver: + - Compress Fluidigm data into .tar.gz archive files + - Script archive_fluidigm_genotypes.pl + Release 1.14.0: 2017-02-07 -------------------------- @@ -70,76 +82,6 @@ Fixed: - Support repeat scans from Infinium database -Release 1.13.2: 2016-11-10 --------------------------- - -Changed: -- Hotfix: Add cgp and ddd references to WTSI::NPG::Genotyping::QC::Identity - - -Release 1.13.1: 2016-07-28 --------------------------- - -Changed: -- Made install.sh more transparent and portable. -- Updated WTSI-DNAP-Utilities and perl-irods-wrap versions in install.sh. - -Fixed: -- Default LSF queue for Ruby workflows - - -Release 1.13.0: 2016-06-20 --------------------------- - -Added: -- install.sh script to install pipeline and its Perl dependencies -- Documentation for Bayesian identity check - -Changed: -- Modified ready_workflow.pl to better align with user SOP -- Use try/catch to handle unexpected errors in retrieving QC plex results -from iRODS -- Updated reference genome for Sequenom iRODS query -- Update perl-irods-wrap dependency to 2.4.0; removes unhelpful warning -messages to STDERR. This in turn requires baton version >= 0.16.4. - -Removed: -- Script publish_infinium_file_list.pl; superseded by other publish scripts - - -Release 1.12.1: 2016-05-13 --------------------------- - -Fixed: -- Support repeat scans from Infinium database - - -Release 1.13.0: 2016-06-20 --------------------------- - -Added: -- install.sh script to install pipeline and its Perl dependencies -- Documentation for Bayesian identity check - -Changed: -- Modified ready_workflow.pl to better align with user SOP -- Use try/catch to handle unexpected errors in retrieving QC plex results -from iRODS -- Updated reference genome for Sequenom iRODS query -- Update perl-irods-wrap dependency to 2.4.0; removes unhelpful warning -messages to STDERR. This in turn requires baton version >= 0.16.4. - -Removed: -- Script publish_infinium_file_list.pl; superseded by other publish scripts - - -Release 1.12.1: 2016-05-13 --------------------------- - -Fixed: -- Support repeat scans from Infinium database - - Release 1.12.0: 2016-04-07 -------------------------- diff --git a/src/perl/MANIFEST b/src/perl/MANIFEST index 34b59fb08..781a32737 100644 --- a/src/perl/MANIFEST +++ b/src/perl/MANIFEST @@ -21,6 +21,7 @@ bin/publish_infinium_analysis.pl bin/publish_infinium_genotypes.pl bin/publish_sequenom_genotypes.pl bin/publish_snpset.pl +bin/qc_fluidigm.pl bin/query_project_samples.pl bin/ready_external.pl bin/ready_infinium.pl @@ -113,6 +114,7 @@ lib/WTSI/NPG/Genotyping/Fluidigm/AssayResultSet.pm lib/WTSI/NPG/Genotyping/Fluidigm/Collector.pm lib/WTSI/NPG/Genotyping/Fluidigm/ExportFile.pm lib/WTSI/NPG/Genotyping/Fluidigm/Publisher.pm +lib/WTSI/NPG/Genotyping/Fluidigm/QC.pm lib/WTSI/NPG/Genotyping/Fluidigm/ResultSet.pm lib/WTSI/NPG/Genotyping/Fluidigm/Subscriber.pm lib/WTSI/NPG/Genotyping/GenderMarker.pm @@ -354,6 +356,11 @@ t/fluidigm_publisher/repub/0123456789/0123456789.csv t/fluidigm_publisher/repub/0123456789/Data/aramis.tif t/fluidigm_publisher/repub/0123456789/Data/athos.tif t/fluidigm_publisher/repub/0123456789/Data/porthos.tif +t/fluidigm_qc.t +t/fluidigm_qc/1381735059/S01_1381735059.csv +t/fluidigm_qc/1381735059/S02_1381735059.csv +t/fluidigm_qc/fluidigm_qc.csv +t/fluidigm_qc/fluidigm_qc_outdated_md5.csv t/fluidigm_resultset.t t/fluidigm_resultset/complete/0123456789/0123456789.csv t/fluidigm_resultset/complete/0123456789/Data/aramis.tif @@ -708,6 +715,7 @@ t/WTSI/NPG/Genotyping/Fluidigm/AssayResultSetTest.pm t/WTSI/NPG/Genotyping/Fluidigm/AssayResultTest.pm t/WTSI/NPG/Genotyping/Fluidigm/ExportFileTest.pm t/WTSI/NPG/Genotyping/Fluidigm/PublisherTest.pm +t/WTSI/NPG/Genotyping/Fluidigm/QCTest.pm t/WTSI/NPG/Genotyping/Fluidigm/ResultSetTest.pm t/WTSI/NPG/Genotyping/Fluidigm/SubscriberTest.pm t/WTSI/NPG/Genotyping/GenderMarkerCallTest.pm diff --git a/src/perl/bin/qc_fluidigm.pl b/src/perl/bin/qc_fluidigm.pl new file mode 100755 index 000000000..c54e638ea --- /dev/null +++ b/src/perl/bin/qc_fluidigm.pl @@ -0,0 +1,301 @@ +#!/usr/bin/env perl + +package main; + +use strict; +use warnings; +use Cwd qw(abs_path); +use File::Copy qw(cp); +use File::Temp qw(tempfile); +use File::Spec::Functions qw(tmpdir); +use Getopt::Long; +use Log::Log4perl qw(:levels); +use Pod::Usage; + +use WTSI::DNAP::Utilities::ConfigureLogger qw(log_init); +use WTSI::NPG::Genotyping::Fluidigm::AssayDataObject; +use WTSI::NPG::Genotyping::Fluidigm::QC; +use WTSI::NPG::iRODS; +use WTSI::NPG::Utilities qw(user_session_log); + +my $uid = `whoami`; +chomp($uid); +my $session_log = user_session_log($uid, 'qc_fluidigm'); + +our $VERSION = ''; + +run() unless caller(); + +sub run { + my $debug; + my $in_place; + my $log4perl_config; + my $new_csv; + my $old_csv; + my $query_path; + my $verbose; + my @filter_key; + my @filter_value; + my $stdio; + + GetOptions('debug' => \$debug, + 'filter-key=s' => \@filter_key, + 'filter-value=s' => \@filter_value, + 'help' => sub { pod2usage(-verbose => 2, + -exitval => 0) }, + 'in-place' => \$in_place, + 'logconf=s' => \$log4perl_config, + 'new-csv=s' => \$new_csv, + 'old-csv=s' => \$old_csv, + 'query-path=s' => \$query_path, + 'verbose' => \$verbose, + '' => \$stdio); # Permits trailing '-' for STDIN + + # validate command line options and populate filter + my @filter; + if ($stdio) { + if ($query_path or @filter_key) { + pod2usage(-msg => "The --query-path and --filter-key options ". + "are incompatible with reading from STDIN\n", + -exitval => 2); + } + } else { + if (! defined $query_path) { + pod2usage(-msg => "If inputs are not supplied on STDIN, must ". + "specify --query-path\n", + -exitval => 2); + } + if (scalar @filter_key != scalar @filter_value) { + pod2usage(-msg => "There must be equal numbers of filter keys " . + "and values\n", + -exitval => 2); + } + while (@filter_key) { + push @filter, [pop @filter_key, pop @filter_value]; + } + } + if ($in_place) { + if (defined $new_csv) { + pod2usage(-msg => "The --new-csv and --in-place options ". + "are incompatible\n", + -exitval => 2); + } elsif (! defined $old_csv) { + pod2usage(-msg => "The --old-csv option is required for ". + "--in-place", + -exitval => 2); + } else { + $new_csv = $old_csv; + } + } + # set up logging + my @log_levels; + if ($debug) { push @log_levels, $DEBUG; } + if ($verbose) { push @log_levels, $INFO; } + log_init(config => $log4perl_config, + file => $session_log, + levels => \@log_levels); + my $log = Log::Log4perl->get_logger('main'); + + # Get input Fluidigm paths from STDIN or iRODS + my $irods = WTSI::NPG::iRODS->new(); + my @fluidigm_data; + if ($stdio) { + while (my $line = <>) { + chomp $line; + push @fluidigm_data, $line; + } + } else { + @fluidigm_data = + $irods->find_objects_by_meta($query_path, + [fluidigm_plate => '%', 'like'], + [fluidigm_well => '%', 'like'], + [type => 'csv'], + @filter); + } + $log->info("Received ", scalar @fluidigm_data, + " Fluidigm data object paths"); + + # Find output filehandle + my $fh; + my $temp; + if (defined $new_csv) { + if (defined $old_csv && abs_path($old_csv) eq abs_path($new_csv)) { + ($fh, $temp) = tempfile('qc_fluidigm_XXXXXX', + DIR => tmpdir(), + UNLINK => 1, # delete on script exit + ); + $log->debug("Created temporary file $temp for CSV output"); + } else { + open $fh, ">", $new_csv || + $log->logcroak("Cannot open output '", $new_csv, "'"); + $log->debug("Opened path $new_csv for CSV output"); + } + } else { + $log->debug("Writing output to STDOUT"); + $fh = *STDOUT; + } + # Write updated QC results + my %args = ( + data_object_paths => \@fluidigm_data, + ); + if (defined $old_csv) { $args{'csv_path'} = $old_csv; } + my $qc = WTSI::NPG::Genotyping::Fluidigm::QC->new(\%args); + $qc->write_csv($fh); + if (defined $new_csv) { + close $fh || $log->logcroak("Cannot close CSV output"); + } + if (defined $temp) { + my $cp_ok = cp($temp, $new_csv); + if ($cp_ok) { + $log->debug('Copied temporary file ', $temp, ' to ', $new_csv); + } else { + $log->logcroak('Failed to copy temporary file ', $temp, + 'to ', $new_csv); + } + } +} + +__END__ + +=head1 NAME + +qc_fluidigm + +=head1 SYNOPSIS + +Options: + + --filter-key Additional filter to limit set of dataObjs acted on. + --filter-value + --help Display help. + --in-place If given, write QC results to the file specified by + --old-csv. Raises an error if --old-csv is not supplied. + Incompatible with --new-csv. + --logconf A log4perl configuration file. Optional. + --new-csv Path of a CSV file for QC output. Optional; if --new-csv + and --in-place are not given, output will be written + to STDOUT. + --old-csv Path of a CSV file from which to read existing QC records. + Optional. + --query-path An iRODS path to query for Fluidigm DataObjects. Required, + unless iRODS paths are supplied on STDIN using the '-' + option. + --verbose Print messages while processing. Optional. + +=head1 DESCRIPTION + +Read published Fluidigm genotyping data from iRODS; cross-reference +with existing QC records, if any; and write updated QC records in CSV +format. + +=head2 Input + +=head3 Fluidigm data + +Input data from iRODS may be found via a metadata query, or from paths +given on STDIN. Note that if data on iRODS is changed while the script +is running, the changes will not necessarily appear in the output. + +=over + +=item * + +To query iRODS metadata, specify a search path using the --query-path option. +The default query is for data objects with 'fluidigm_plate' and +'fluidigm_well' attributes, and a 'type' attribute with value 'csv'. +Additional query keys and values may be specified with the --filter-key +and --filter-value options. + +=item * + +To read from STDIN, terminate the command line with the '-' option. In +this mode, the --query-path, --filter-key and --filter-value options +are invalid. + +=back + +=head3 Existing QC results + +If desired, specify existing QC results using the --old-csv option. + +CSV records are identified by their plate and well. If a record in the +existing CSV has the same plate and well as one of the input iRODS data +objects, and a different md5 checksum, it will be replaced with a record +generated from the data object. Otherwise, the original record is output +unchanged. Order of the existing CSV results is preserved. + +If an input data object does not appear in the existing CSV, a record for +it will be appended to the output. If no existing CSV path is given, the +script will simply write CSV output for all the inputs. + +=head2 Output + +=head3 CSV fields + +=over + +=item 1. Sample identifier + +=item 2. Call rate: Defined as field (9) / field (8), if field (8) is non-zero; zero otherwise. + +=item 3. Size of resultset (total assay results) + +=item 4. Total calls + +=item 5. Total controls + +=item 6. Total empty + +=item 7. Total valid + +=item 8. Total template assays: Defined as assays which are not empty and not controls. + +=item 9. Total template calls: Defined as template assays in (8) which are calls. + +=item 10. Fluidigm plate + +=item 11. Fluidigm well + +=item 12. md5 checksum + +=back + +Items 1 through 9 are taken from the fluidigm assay result file, while 10 +through 12 are from iRODS metadata. + +=head3 Output location + +=over + +=item * If the --in-place option is given, the script will replace the existing CSV file given by --old-csv. + +=item * If the --new-csv option is supplied, the script will write to the given file. + +=item * If neither option is given, results will be written to STDOUT. + +=back + + +=head1 METHODS + +None + +=head1 AUTHOR + +Iain Bancarz + +=head1 COPYRIGHT AND DISCLAIMER + +Copyright (c) 2017 Genome Research Limited. All Rights Reserved. + +This program is free software: you can redistribute it and/or modify +it under the terms of the Perl Artistic License or the GNU General +Public License as published by the Free Software Foundation, either +version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +=cut diff --git a/src/perl/lib/WTSI/NPG/Genotyping/Fluidigm/AssayResult.pm b/src/perl/lib/WTSI/NPG/Genotyping/Fluidigm/AssayResult.pm index 325a6a92f..a081b1c02 100644 --- a/src/perl/lib/WTSI/NPG/Genotyping/Fluidigm/AssayResult.pm +++ b/src/perl/lib/WTSI/NPG/Genotyping/Fluidigm/AssayResult.pm @@ -119,6 +119,42 @@ sub is_call { $self->converted_call ne $NO_CALL); } +=head2 is_template_assay + + Arg [1] : None + + Example : $result->is_template + Description: Return True if is_empty and is_control are both false, ie. + the assay contains an experimental sample (also known as a + template). + + Returntype : Bool + +=cut + +sub is_template_assay { + my ($self) = @_; + + return !($self->is_empty() || $self->is_control()); +} + +=head2 is_template_call + + Arg [1] : None + + Example : $result->is_template_call + Description: Return True if is_template and is_call are both true. + + Returntype : Bool + +=cut + +sub is_template_call { + my ($self) = @_; + + return $self->is_template_assay() && $self->is_call(); +} + =head2 is_valid Arg [1] : None diff --git a/src/perl/lib/WTSI/NPG/Genotyping/Fluidigm/AssayResultSet.pm b/src/perl/lib/WTSI/NPG/Genotyping/Fluidigm/AssayResultSet.pm index 155c756d7..b294dd847 100644 --- a/src/perl/lib/WTSI/NPG/Genotyping/Fluidigm/AssayResultSet.pm +++ b/src/perl/lib/WTSI/NPG/Genotyping/Fluidigm/AssayResultSet.pm @@ -24,6 +24,73 @@ has 'assay_results' => builder => '_build_assay_results', lazy => 1); +has 'call_rate' => + (is => 'ro', + isa => 'Num', + init_arg => undef, + lazy => 1, + builder => '_build_call_rate', + documentation => 'QC metric, defined as total_template_calls / '. + 'total_template_assays. Measures the call rate excluding control '. + 'and empty assays.', +); + +has 'total_calls' => + (is => 'ro', + isa => 'Int', + init_arg => undef, + lazy => 1, + builder => '_build_total_calls', + documentation => 'Number of assay results for which is_call is True', +); + +has 'total_controls' => + (is => 'ro', + isa => 'Int', + init_arg => undef, + lazy => 1, + builder => '_build_total_controls', + documentation => 'Number of assay results for which is_control is True', +); + +has 'total_empty' => + (is => 'ro', + isa => 'Int', + init_arg => undef, + lazy => 1, + builder => '_build_total_empty', + documentation => 'Number of assay results for which is_empty is True', +); + +has 'total_template_assay_calls' => + (is => 'ro', + isa => 'Int', + init_arg => undef, + lazy => 1, + builder => '_build_total_template_assay_calls', + documentation => 'Number of assay results for which '. + 'is_template_call is True', +); + +has 'total_template_assays' => + (is => 'ro', + isa => 'Int', + init_arg => undef, + lazy => 1, + builder => '_build_total_template_assays', + documentation => 'Number of assay results for which '. + 'is_template_assay is True', +); + +has 'total_valid' => + (is => 'ro', + isa => 'Int', + init_arg => undef, + lazy => 1, + builder => '_build_total_valid', + documentation => 'Number of assay results for which is_valid is True', +); + around BUILDARGS => sub { my ($orig, $class, @args) = @_; @@ -185,6 +252,62 @@ sub filter_on_confidence { return \@filtered_results; } + +=head2 summary_fields + + Arg [1] : None + + Example : $summary_fields = $result->summary_fields(); + Description: Return an ArrayRef containing summary values. Call rate is + rounded to 4 decimal places for subsequent output. The + string 'NA' denotes an empty sample ID, which may occur + for an empty well. + Returntype : ArrayRef + +=cut + +sub summary_fields { + my ($self) = @_; + + my $id_string = $self->canonical_sample_id() || 'NA'; + + my @fields = ( + $id_string, + sprintf("%.4f", $self->call_rate), + $self->size(), + $self->total_calls, + $self->total_controls, + $self->total_empty, + $self->total_valid, + $self->total_template_assays, + $self->total_template_assay_calls, + ); + return \@fields; +} + +=head2 summary_string + + Arg [1] : None + + Example : $summary_string = $result->summary_string(); + Description: Return a comma-separated string containing summary values, + same as those returned by summary_fields(). + Returntype : Str + +=cut + +sub summary_string { + my ($self) = @_; + my $csv = Text::CSV->new ({ binary => 1 }); + $csv->combine(@{$self->summary_fields()}); + my $string = $csv->string(); + if (! defined $string) { + $self->logconfess("Unable to generate CSV string from input '", + $csv->error_input, "'"); + } + return $string; +} + sub _build_assay_results { my ($self) = @_; @@ -208,6 +331,57 @@ sub _build_assay_results { return $records; } +sub _build_call_rate { + my ($self,) = @_; + my $call_rate = 0; + if ($self->total_template_assays != 0) { + $call_rate = + $self->total_template_assay_calls / $self->total_template_assays; + } + return $call_rate; +} + +sub _build_total_calls { + my ($self,) = @_; + return $self->_count_matching_assays('is_call'); +} + +sub _build_total_controls { + my ($self,) = @_; + return $self->_count_matching_assays('is_control'); +} + +sub _build_total_empty { + my ($self,) = @_; + return $self->_count_matching_assays('is_empty'); +} + +sub _build_total_template_assay_calls { + my ($self,) = @_; + return $self->_count_matching_assays('is_template_call'); +} + +sub _build_total_template_assays { + my ($self,) = @_; + return $self->_count_matching_assays('is_template_assay'); +} + +sub _build_total_valid { + my ($self,) = @_; + return $self->_count_matching_assays('is_valid'); +} + +sub _count_matching_assays { + # method to count AssayResults which return True for a given object method + # eg. count instances of is_empty, is_call, is_invalid + my ($self, $sub_boolean) = @_; + my $count = 0; + foreach my $ar (@{$self->assay_results}) { + if ($ar->$sub_boolean()) { $count++; } + } + return $count; +} + sub _parse_assay_results { my ($self, $fh) = @_; @@ -275,6 +449,7 @@ sub _parse_assay_results { return \@records; } + __PACKAGE__->meta->make_immutable; no Moose; @@ -294,11 +469,11 @@ for a number of SNPs. =head1 AUTHOR -Keith James +Keith James , Iain Bancarz =head1 COPYRIGHT AND DISCLAIMER -Copyright (C) 2014, 2015 Genome Research Limited. All Rights Reserved. +Copyright (C) 2014, 2015, 2017 Genome Research Limited. All Rights Reserved. This program is free software: you can redistribute it and/or modify it under the terms of the Perl Artistic License or the GNU General diff --git a/src/perl/lib/WTSI/NPG/Genotyping/Fluidigm/Collector.pm b/src/perl/lib/WTSI/NPG/Genotyping/Fluidigm/Collector.pm index d840177ad..1aca76d8e 100644 --- a/src/perl/lib/WTSI/NPG/Genotyping/Fluidigm/Collector.pm +++ b/src/perl/lib/WTSI/NPG/Genotyping/Fluidigm/Collector.pm @@ -1,9 +1,13 @@ use utf8; +use sigtrap qw/die untrapped normal-signals + stack-trace any error-signals/; +# sigtrap ensures tempdir is deleted after control-C or similar package WTSI::NPG::Genotyping::Fluidigm::Collector; use DateTime; -use File::Temp qw/tempdir/; +use File::Temp; +use File::Spec; use WTSI::NPG::iRODS; use WTSI::NPG::Utilities qw/md5sum/; @@ -162,7 +166,8 @@ sub irods_publication_ok { sub _get_md5_by_address { my ($self, $export_file) = @_; - my $tmpdir = tempdir("fluidigm_samples_XXXXXX", CLEANUP => 1); + my $tmpdir = File::Temp->newdir("fluidigm_samples_XXXXXX", + dir => File::Spec->tmpdir() ); my %md5_by_address; foreach my $address (@{$export_file->addresses}) { my $filename = $export_file->fluidigm_filename($address); @@ -170,8 +175,9 @@ sub _get_md5_by_address { my $records = $export_file->write_assay_result_data($address, $file); my $md5 = md5sum($file); $md5_by_address{$address} = $md5; - $self->debug("Wrote $records records for address $address into ", - "temp file '", $file, "', with md5 '", $md5, "'"); + $self->debug("Wrote ", $records, " records for address ", + $address, " into temp file ", $file, + ", with md5 ", $md5); } return %md5_by_address; } diff --git a/src/perl/lib/WTSI/NPG/Genotyping/Fluidigm/QC.pm b/src/perl/lib/WTSI/NPG/Genotyping/Fluidigm/QC.pm new file mode 100644 index 000000000..88ab02b8f --- /dev/null +++ b/src/perl/lib/WTSI/NPG/Genotyping/Fluidigm/QC.pm @@ -0,0 +1,427 @@ + +package WTSI::NPG::Genotyping::Fluidigm::QC; + +use Moose; + +use Set::Scalar; +use Text::CSV; +use Try::Tiny; + +use WTSI::NPG::iRODS; +use WTSI::NPG::iRODS::Metadata; +use WTSI::NPG::Genotyping::Fluidigm::AssayDataObject; + +our $VERSION = ''; + +our $PLATE_INDEX = 9; +our $WELL_INDEX = 10; +our $MD5_INDEX = 11; +our $EXPECTED_FIELDS_TOTAL = 12; + +our $REPORTING_BLOCK_SIZE = 1000; + +with 'WTSI::DNAP::Utilities::Loggable'; + +has 'checksums_by_path' => + (is => 'ro', + isa => 'HashRef[Str]', + init_arg => undef, + lazy => 1, + builder => '_build_checksums_by_path', + documentation => 'The md5 checksum for each input iRODS path. ', +); + +has 'csv' => + (is => 'ro', + isa => 'Text::CSV', + init_arg => undef, + lazy => 1, + default => sub { return Text::CSV->new ({ binary => 1, }); }, + documentation => 'Object for processing data in CSV format', +); + +has 'csv_path' => + (is => 'ro', + isa => 'Maybe[Str]', + documentation => 'Path for input of existing QC results. Optional; '. + 'if not defined, omit CSV input.', +); + +has 'data_object_paths' => + (is => 'ro', + isa => 'ArrayRef[Str]', + required => 1, + documentation => 'iRODS paths for results which may be added to QC', +); + +has 'irods' => + (is => 'ro', + isa => 'WTSI::NPG::iRODS', + lazy => 1, + builder => '_build_irods', +); + +has 'paths_by_plate_well' => + (is => 'ro', + isa => 'HashRef[HashRef[Str]]', + init_arg => undef, + lazy => 1, + builder => '_build_paths_by_plate_well', + documentation => 'Input iRODS paths, indexed by plate and well. ', +); + + +=head2 csv_fields + + Arg [1] : WTSI::NPG::Genotyping::Fluidigm::AssayDataObject + + Example : my $fields = $qc->csv_update_fields($assay_data_object); + + Description: Find QC data for the given AssayDataObject, for CSV output. + + CSV format consists of the fields returned by the + summary_string() method of + WTSI::NPG::Genotyping::Fluidigm::AssayResultSet; + and three additional fields, denoting the Fluidigm + plate, Fluidigm well, and md5 checksum. + + Returntype : [ArrayRef] CSV fields for update + +=cut + +sub csv_fields { + my ($self, $obj) = @_; + my @fields = @{$obj->assay_resultset->summary_fields}; + # Find Fluidigm plate/well from object metadata + my ($plate, $well); + my $plate_avu = $obj->get_avu($FLUIDIGM_PLATE_NAME); + my $well_avu = $obj->get_avu($FLUIDIGM_PLATE_WELL); + if ($plate_avu) { + $plate = $plate_avu->{'value'}; + } else { + $self->logcroak("$FLUIDIGM_PLATE_NAME AVU not found for data ", + "object '", $obj->str, "'"); + } + if ($well_avu) { + $well = $well_avu->{'value'}; + } else { + $self->logcroak("$FLUIDIGM_PLATE_WELL AVU not found for data ", + "object '", $obj->str, "'"); + } + # Append plate, well, and md5 checksum + push @fields, $plate, $well, $obj->checksum; + return \@fields; +} + +=head2 csv_string + + Arg [1] : WTSI::NPG::Genotyping::Fluidigm::AssayDataObject + + Example : my $str = $qc->csv_string($assay_data_object); + Description: Find updated QC data for the given AssayDataObject. + Return string for CSV output. + Returntype : Str + +=cut + +sub csv_string { + my ($self, $assay_data_object) = @_; + my $fields = $self->csv_fields($assay_data_object); + my $status = $self->csv->combine(@{$fields}); + if (! defined $status) { + $self->logcroak("Error combining CSV inputs: '", + $self->csv->error_input, "'"); + } + return $self->csv->string(); +} + +=head2 rewrite_existing_csv + + Arg [1] : Filehandle + + Example : my $data_object_paths = $qc->rewrite_existing_csv($fh); + + Description: Read the existing CSV file, and write an updated version to the + given filehandle. Records will be updated if there is a + matching data object with the same plate and well, and a + different checksum; otherwise the original record is output + unchanged. + + Returns the set of data object paths which match + the plate and well of an existing CSV record. + + Returntype : Set::Scalar + +=cut + +sub rewrite_existing_csv { + my ($self, $out) = @_; + my $existing_paths = Set::Scalar->new(); + if (! defined $self->csv_path) { + $self->logwarn('Existing CSV path is not defined; cannot rewrite ', + 'previous results'); + return $existing_paths; + } + my $matched = 0; + my $updated = 0; + my $total = 0; + open my $in, "<", $self->csv_path || + $self->logcroak("Cannot open CSV path '", $self->csv_path, "'"); + while (<$in>) { + my $original_csv_line = $_; + chomp; + my @fields = $self->_parse_csv_fields($_); + my $plate = $fields[$PLATE_INDEX]; + my $well = $fields[$WELL_INDEX]; + my $update_path = $self->paths_by_plate_well->{$plate}{$well}; + if (defined $update_path) { + $existing_paths->insert($update_path); + $matched++; + my $md5 = $fields[$MD5_INDEX]; + if ($md5 eq $self->checksums_by_path->{$update_path}) { + $self->debug('No update for plate ', $plate, ', well ', + $well, '; md5 checksum is unchanged'); + print $out $original_csv_line; + } else { + $self->debug('Updating plate ', $plate, ', well ', + $well, ' from data object path', + $update_path); + my $update_obj = $self->_get_fluidigm_data_obj($update_path); + print $out $self->csv_string($update_obj)."\n"; + $updated++; + } + } else { + $self->debug('No update for plate ', $plate, ', well ', + $well, '; no corresponding data object was found'); + print $out $original_csv_line; + } + $total++; + } + close $in || + $self->logcroak("Cannot close CSV path '", $self->csv_path, "'"); + $self->info('Rewrote ', $total, ' existing CSV records for Fluidigm ', + 'QC; matched ', $matched, ' data objects; updated ', + $updated, ' records'); + return $existing_paths; +} + + +=head2 write_csv + + Arg [1] : Filehandle for output + + Example : $qc->write_csv($fh); + + Description: Write an updated CSV to the given filehandle. Output + consists of records in the existing CSV file, + updated as appropriate; and records for any new data + objects which do not appear in the existing file. (If the + existing CSV file is not defined, this method simply writes + CSV records for all data objects.) + + Output for new data objects is sorted in (plate, well) order. + + Returntype : Returns True on completion + +=cut + +sub write_csv { + my ($self, $out) = @_; + my $existing_paths; # data object paths which match existing CSV records + if (defined $self->csv_path) { + $existing_paths = $self->rewrite_existing_csv($out); + } + my $total = 0; + my @update_lines; + foreach my $obj_path (@{$self->data_object_paths}) { + if (defined $existing_paths && $existing_paths->has($obj_path)) { + $self->debug('Object ', $obj_path, ' already exists in CSV'); + } else { + $self->debug('Finding new CSV output for object ', $obj_path); + my $data_obj = $self->_get_fluidigm_data_obj($obj_path); + push @update_lines, $self->csv_string($data_obj)."\n"; + $total++; + } + } + $self->info('Found ', $total, ' new CSV records for Fluidigm QC'); + my $sort_ref = $self->_by_plate_well(); + my @sorted_lines = sort $sort_ref @update_lines; + $self->debug('Sorted ', $total, ' new records in (plate, well) order'); + foreach my $line (@sorted_lines) { print $out $line; } + $self->debug('Wrote ', $total, ' new records to output'); + return 1; +} + +{ + # Block structure enables %meta_by_path to be shared between two + # builder methods. This allows metadata to be gathered on only one + # pass through iRODS, while at the same time having lazy attributes. + + my %meta_by_path; + + sub _populate_meta_by_path { + my ($self,) = @_; + my $total = scalar @{$self->data_object_paths}; + $self->info('Finding (checksum, plate, well) metadata for ', $total, + ' data object paths'); + my $count = 0; + foreach my $obj_path (@{$self->data_object_paths}) { + my $data_obj = $self->_get_fluidigm_data_obj($obj_path); + my $checksum = $data_obj->checksum; + my $plate = $data_obj->get_avu($FLUIDIGM_PLATE_NAME)->{'value'}; + my $well = $data_obj->get_avu($FLUIDIGM_PLATE_WELL)->{'value'}; + $meta_by_path{$obj_path} = [$checksum, $plate, $well]; + $count++; + if ($count % $REPORTING_BLOCK_SIZE == 0) { + $self->debug('Found (plate, well) index and checksum for ', + $count, ' of ', $total, ' data object paths'); + } + } + $self->info('Finished processing ', $total, ' data object paths'); + return 1; + } + + sub _build_checksums_by_path { + my ($self,) = @_; + if (! %meta_by_path) { + $self->debug('No metadata found for checksums; ', + 'populating from iRODS'); + $self->_populate_meta_by_path(); + } + my %checksums_by_path; + foreach my $obj_path (@{$self->data_object_paths}) { + my $values = $meta_by_path{$obj_path}; + my ($checksum, $plate, $well) = @{$values}; + if (defined $checksums_by_path{$obj_path}) { + $self->logcroak('iRODS data object path ', $obj_path, + ' appears more than once in inputs'); + } + $checksums_by_path{$obj_path} = $checksum; + } + return \%checksums_by_path; + } + + sub _build_paths_by_plate_well { + my ($self,) = @_; + if (! %meta_by_path) { + $self->debug('No metadata found for plate/well; ', + 'populating from iRODS'); + $self->_populate_meta_by_path(); + } + my %paths_by_plate_well; + foreach my $obj_path (@{$self->data_object_paths}) { + my $values = $meta_by_path{$obj_path}; + my ($checksum, $plate, $well) = @{$values}; + if (defined $paths_by_plate_well{$plate}{$well}) { + $self->logcroak('Duplicate plate ', $plate, ' and well ', + $well, ' for data objects: ', $obj_path, ', ', + $paths_by_plate_well{$plate}{$well} + ); + } + $paths_by_plate_well{$plate}{$well} = $obj_path; + } + return \%paths_by_plate_well; + } +} + +sub _build_irods { + # use instead of a default, to allow irods attribute to be lazy + my ($self,) = @_; + return WTSI::NPG::iRODS->new; +} + +sub _by_plate_well { + # return a coderef used to sort CSV lines in (plate, well) order + my ($self,) = @_; + + return sub { + my @fields_a = $self->_parse_csv_fields($a); + my @fields_b = $self->_parse_csv_fields($b); + my $plate_a = $fields_a[$PLATE_INDEX]; + my $plate_b = $fields_b[$PLATE_INDEX]; + my $well_a = $fields_a[$WELL_INDEX]; + my $well_b = $fields_b[$WELL_INDEX]; + my @well_fields_a = split(/S[0]*/msx, $well_a); + my $well_num_a = pop @well_fields_a; + my @well_fields_b = split(/S[0]*/msx, $well_b); + my $well_num_b = pop @well_fields_b; + + return $plate_a <=> $plate_b || $well_num_a <=> $well_num_b; + }; +} + +sub _get_fluidigm_data_obj { + # safely create a Fluidigm AssayDataObject from path + my ($self, $obj_path) = @_; + my $data_obj; + try { + $data_obj = WTSI::NPG::Genotyping::Fluidigm::AssayDataObject->new + ($self->irods, $obj_path); + } catch { + $self->logcroak("Unable to create Fluidigm DataObject from ", + "iRODS path '", $obj_path, "': $_"); + }; + return $data_obj; +} + +sub _parse_csv_fields { + my ($self, $input) = @_; + # parse input string and check it is a valid Fluidigm QC record + $self->csv->parse($input); + my @fields = $self->csv->fields(); + if (! @fields) { + $self->logcroak("Unable to parse CSV input: '", + $self->csv->error_input(), "'"); + } elsif (scalar @fields != $EXPECTED_FIELDS_TOTAL) { + $self->logcroak("Expected ", $EXPECTED_FIELDS_TOTAL, + " fields, found ", scalar @fields, + " from input: ", $input); + } + return @fields; +} + + +__PACKAGE__->meta->make_immutable; + +no Moose; + +1; + + +__END__ + +=head1 NAME + +WTSI::NPG::Genotyping::Fluidigm::QC + +=head1 DESCRIPTION + +A class to process quality control metrics for Fluidigm results. + +Find QC metric values from iRODS for CSV output. Optionally, can supply a +CSV file with existing QC records, which will be updated if the checksum of +the corresponding iRODS data object has changed. + +Output consists of any existing records in their original order, followed +by new records in (plate, well) order. Each (plate, well) pair will have +exactly one record in the output. + +=head1 AUTHOR + +Iain Bancarz + +=head1 COPYRIGHT AND DISCLAIMER + +Copyright (C) 2017 Genome Research Limited. All Rights Reserved. + +This program is free software: you can redistribute it and/or modify +it under the terms of the Perl Artistic License or the GNU General +Public License as published by the Free Software Foundation, either +version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +=cut diff --git a/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/AssayResultSetTest.pm b/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/AssayResultSetTest.pm index 09564aec1..2d23ee472 100644 --- a/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/AssayResultSetTest.pm +++ b/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/AssayResultSetTest.pm @@ -8,7 +8,7 @@ use warnings; use base qw(WTSI::NPG::Test); use File::Spec; -use Test::More tests => 68; +use Test::More tests => 70; use Test::Exception; Log::Log4perl::init('./etc/log4perl_tests.conf'); @@ -96,6 +96,19 @@ sub size : Test(1) { cmp_ok($resultset->size, '==', 96, 'Expected size'); } +sub string : Test(1) { + cmp_ok($resultset->summary_string(), + 'eq', + 'ABC0123456789,1.0000,96,96,70,70,96,26,26', + 'Expected CSV string'); +} + +sub summary_fields : Test(1) { + my $expected_fields = + ['ABC0123456789', '1.0000', 96, 96, 70, 70, 96, 26, 26]; + is_deeply($resultset->summary_fields, $expected_fields, 'Expected summary'); +} + sub assay_results : Test(3) { cmp_ok(scalar @{$resultset->assay_results}, '==', 96, 'Contains expected number of assay results'); diff --git a/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/AssayResultTest.pm b/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/AssayResultTest.pm index 7927f7e2e..635d2a3a1 100644 --- a/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/AssayResultTest.pm +++ b/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/AssayResultTest.pm @@ -8,7 +8,7 @@ use warnings; use base qw(WTSI::NPG::Test); use File::Spec; -use Test::More tests => 402; +use Test::More tests => 407; use Test::Exception; Log::Log4perl::init('./etc/log4perl_tests.conf'); @@ -244,6 +244,91 @@ sub is_call : Test(5) { str => '')->is_call, 'Is not call 3'); } +sub is_template : Test(5) { + + # AssayResult is a template assay iff not empty and not control + ok(WTSI::NPG::Genotyping::Fluidigm::AssayResult->new + (assay => 'S01-A01', + snp_assayed => 'rs0123456', + x_allele => 'G', + y_allele => 'T', + sample_name => 'ABC0123456789', + type => 'Unknown', + auto => 'XY', + confidence => 0.1, + final => 'XY', + converted_call => 'G:T', + x_intensity => 0.1, + y_intensity => 0.1, + str => '')->is_template_assay, 'Is template assay 1'); + + # Empty AssayResult is not a template assay + ok(!WTSI::NPG::Genotyping::Fluidigm::AssayResult->new + (assay => 'S01-A01', + snp_assayed => 'rs0123456', + x_allele => 'G', + y_allele => 'T', + sample_name => '[ Empty ]', + type => 'Unknown', + auto => 'XY', + confidence => 0.1, + final => 'XY', + converted_call => 'G:T', + x_intensity => 0.1, + y_intensity => 0.1, + str => '')->is_template_assay, 'Is template assay 2'); + + # Control AssayResult is not a template assay + ok(!WTSI::NPG::Genotyping::Fluidigm::AssayResult->new + (assay => 'S01-A01', + snp_assayed => '', + x_allele => 'G', + y_allele => 'T', + sample_name => 'ABC0123456789', + type => 'NTC', + auto => 'XY', + confidence => 0.1, + final => 'XY', + converted_call => 'G:T', + x_intensity => 0.1, + y_intensity => 0.1, + str => '')->is_template_assay, 'Is template assay 3'); + + # AssayResult is a template assay *and* a call + ok(WTSI::NPG::Genotyping::Fluidigm::AssayResult->new + (assay => 'S01-A01', + snp_assayed => 'rs0123456', + x_allele => 'G', + y_allele => 'T', + sample_name => 'ABC0123456789', + type => 'Unknown', + auto => 'XY', + confidence => 0.1, + final => 'XY', + converted_call => 'G:T', + x_intensity => 0.1, + y_intensity => 0.1, + str => '')->is_template_call, 'Is template call 1'); + + # AssayResult is a template assay *and not* a call + ok(!WTSI::NPG::Genotyping::Fluidigm::AssayResult->new + (assay => 'S01-A01', + snp_assayed => 'rs0123456', + x_allele => 'G', + y_allele => 'T', + sample_name => 'ABC0123456789', + type => 'Unknown', + auto => 'XY', + confidence => 0.1, + final => 'No Call', + converted_call => 'No Call', + x_intensity => 0.1, + y_intensity => 0.1, + str => '')->is_template_call, 'Is template call 2'); + +} + + sub is_valid : Test(4) { # Evaluate whether a result is valid. See is_call. 'No Call' and 'invalid' # are distinct and represent different experimental outcomes. diff --git a/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/QCTest.pm b/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/QCTest.pm new file mode 100644 index 000000000..92be8a445 --- /dev/null +++ b/src/perl/t/WTSI/NPG/Genotyping/Fluidigm/QCTest.pm @@ -0,0 +1,227 @@ +use utf8; + +package WTSI::NPG::Genotyping::Fluidigm::QCTest; + +use strict; +use warnings; + +use base qw(WTSI::NPG::Test); +use File::Copy qw/copy/; +use File::Slurp qw/read_file/; +use File::Temp qw/tempdir/; +use Set::Scalar; +use Test::More tests => 17; +use Test::Exception; +use Text::CSV; + +our $logconf = './etc/log4perl_tests.conf'; +Log::Log4perl::init($logconf); +our $log = Log::Log4perl->get_logger(); + +BEGIN { use_ok('WTSI::NPG::Genotyping::Fluidigm::QC'); } + +use WTSI::NPG::Genotyping::Fluidigm::AssayDataObject; +use WTSI::NPG::Genotyping::Fluidigm::QC; +use WTSI::NPG::iRODS; +use WTSI::NPG::iRODS::Metadata; # has attribute name constants +use WTSI::NPG::Utilities qw(md5sum); + +my $script = 'qc_fluidigm.pl'; +my $plate = '1381735059'; +my $data_path = "./t/fluidigm_qc/$plate"; +my $irods_tmp_coll; +my @irods_paths; +my $pid = $$; +my $tmp; +my $csv_name = 'fluidigm_qc.csv'; +my $csv_name_outdated = 'fluidigm_qc_outdated_md5.csv'; + +sub make_fixture : Test(setup) { + $tmp = tempdir('Fluidigm_QC_test_XXXXXX', CLEANUP => 1 ); + copy("./t/fluidigm_qc/$csv_name", $tmp); + copy("./t/fluidigm_qc/$csv_name_outdated", $tmp); + my $irods = WTSI::NPG::iRODS->new; + $irods_tmp_coll = $irods->add_collection("FluidigmQCTest.$pid"); + $irods->put_collection($data_path, $irods_tmp_coll); + foreach my $well (qw/S01 S02/) { + my $name = $well.'_'.$plate.'.csv'; + my $irods_path = $irods_tmp_coll.'/'.$plate.'/'.$name; + $irods->add_object_avu($irods_path, 'type', 'csv'); + $irods->add_object_avu($irods_path, $FLUIDIGM_PLATE_NAME, $plate); + $irods->add_object_avu($irods_path, $FLUIDIGM_PLATE_WELL, $well); + push @irods_paths, $irods_path; + } +} + +sub teardown : Test(teardown) { + @irods_paths = (); + my $irods = WTSI::NPG::iRODS->new; + $irods->remove_collection($irods_tmp_coll); +} + +sub require : Test(1) { + require_ok('WTSI::NPG::Genotyping::Fluidigm::QC'); +} + +sub csv_output : Test(5) { + my $qc = _create_qc_object(); + my $irods = WTSI::NPG::iRODS->new; + my @data_objects; + foreach my $obj_path (@irods_paths) { + my $obj = WTSI::NPG::Genotyping::Fluidigm::AssayDataObject->new + ($irods, $obj_path); + push @data_objects, $obj; + } + my $fields; + lives_ok(sub {$fields = $qc->csv_fields($data_objects[1]); }, + 'CSV fields found OK'); + my $expected_fields = [ + 'XYZ0987654321', + '0.9231', + 96, + 94, + 70, + 70, + 96, + 26, + 24, + '1381735059', + 'S02', + '73ca301a0a9e1b9cf87d4daf59eb2815', + ]; + is_deeply($fields, $expected_fields, + 'Field contents match expected values'); + my $string; + lives_ok(sub {$string = $qc->csv_string($data_objects[1]); }, + 'CSV string found OK'); + my $expected_string = 'XYZ0987654321,0.9231,96,94,70,70,96,26,24,'. + '1381735059,S02,73ca301a0a9e1b9cf87d4daf59eb2815'; + ok($string eq $expected_string, + 'CSV string contents match expected values'); + + $data_objects[0]->remove_avu($FLUIDIGM_PLATE_WELL, 'S01'); + dies_ok(sub { $qc->csv_fields($data_objects[0]); }, + 'Dies without required metadata'); +} + +sub rewrite_existing : Test(2) { + my $qc = _create_qc_object(); + my $output_path = "$tmp/rewritten.csv"; + open my $fh, ">", $output_path || + $log->logcroak("Cannot open CSV output ", $output_path); + my $got_paths = $qc->rewrite_existing_csv($fh); + close $fh || $log->logcroak("Cannot close CSV output ", $output_path); + my $expected_paths = Set::Scalar->new(); + my $path = $irods_tmp_coll.'/1381735059/S01_1381735059.csv'; + $expected_paths->insert($path); + is_deeply($got_paths, $expected_paths, 'Data object paths match'); + my $contents = read_file($output_path); + my $md5 = '11413e77cde2a8dcca89705fe5b25a2d'; + my $expected = 'ABC0123456789,1.0000,96,96,70,70,96,26,26,1381735059'. + ",S01,$md5\n"; + cmp_ok($contents, 'eq', $expected, 'Rewritten CSV file contents OK'); +} + +sub script_metaquery : Test(2) { + my $cmd = "$script --query-path $irods_tmp_coll ". + "--old-csv $tmp/$csv_name --in-place --logconf $logconf --debug"; + $log->info("Running command '$cmd'"); + ok(system($cmd)==0, "Script with --in-place and metaquery exits OK"); + my $msg = 'Script in-place CSV output matches expected values'; + _validate_csv_output("$tmp/$csv_name", $msg); +} + +sub script_update : Test(2) { + # ensure an entry with outdated md5 checksum is replaced + my $cmd = "$script --query-path $irods_tmp_coll ". + "--old-csv $tmp/$csv_name_outdated ". + "--in-place --logconf $logconf --debug"; + $log->info("Running command '$cmd'"); + ok(system($cmd)==0, "Script with outdated input exits OK"); + _validate_csv_output("$tmp/$csv_name_outdated", + 'Script updated md5 checksum in CSV'); +} + +sub script_stdin : Test(2) { + my $fh; + my $input_path = $tmp."/test_inputs.txt"; + open $fh, ">", $input_path || + $log->logcroak("Cannot open '$input_path'"); + foreach my $path (@irods_paths) { + print $fh $path."\n"; + } + close $fh || $log->logcroak("Cannot close '$input_path'"); + my $new_csv = "$tmp/fluidigm_qc_output.csv"; + my $cmd = "$script --new-csv $new_csv --old-csv $tmp/$csv_name ". + "--logconf $logconf - < $input_path"; + $log->info("Running command '$cmd'"); + ok(system($cmd)==0, "Script with STDIN and new CSV file exits OK"); + _validate_csv_output($new_csv, 'Script CSV output OK, input from STDIN'); +} + +sub write_all : Test(2) { + my $qc = _create_qc_object(); + my $output_path = "$tmp/qc_output.csv"; + open my $fh, ">", $output_path || + $log->logcroak("Cannot open CSV output ", $output_path); + ok($qc->write_csv($fh), 'write_csv method returns OK'); + close $fh || $log->logcroak("Cannot close CSV output ", $output_path); + _validate_csv_output($output_path, 'CSV output from QC object OK'); +} + +sub _create_qc_object { + my $irods = WTSI::NPG::iRODS->new; + # 1 of the 2 AssayDataObjects is already present in fluidigm_qc.csv + # updated contents will contain QC results for the other AssayDataObject + my $csv_path = "$tmp/$csv_name"; + my $qc = WTSI::NPG::Genotyping::Fluidigm::QC->new( + csv_path => $csv_path, + data_object_paths => \@irods_paths, + ); + return $qc; +} + +sub _validate_csv_output { + # check that CSV output matches the expected values + # run an is_deeply test with the given message + my ($csv_path, $message, ) = @_; + # check the CSV output + my $csv = Text::CSV->new ( { binary => 1 } ); + open my $fh, "<", "$csv_path" || + $log->logcroak("Cannot open input '$csv_path'"); + my $contents = $csv->getline_all($fh); + close $fh || $log->logcroak("Cannot close input '$csv_path'"); + my $expected_contents = [ + [ + 'ABC0123456789', + '1.0000', + 96, + 96, + 70, + 70, + 96, + 26, + 26, + '1381735059', + 'S01', + '11413e77cde2a8dcca89705fe5b25a2d', + ], [ + 'XYZ0987654321', + '0.9231', + 96, + 94, + 70, + 70, + 96, + 26, + 24, + '1381735059', + 'S02', + '73ca301a0a9e1b9cf87d4daf59eb2815', + ], + ]; + is_deeply($contents, $expected_contents, $message); +} + + +1; diff --git a/src/perl/t/fluidigm_qc.t b/src/perl/t/fluidigm_qc.t new file mode 100644 index 000000000..70178d21e --- /dev/null +++ b/src/perl/t/fluidigm_qc.t @@ -0,0 +1,9 @@ + +use utf8; + +use strict; +use warnings; + +use WTSI::NPG::Genotyping::Fluidigm::QCTest; + +WTSI::NPG::Genotyping::Fluidigm::QCTest->runtests; diff --git a/src/perl/t/fluidigm_qc/1381735059/S01_1381735059.csv b/src/perl/t/fluidigm_qc/1381735059/S01_1381735059.csv new file mode 100644 index 000000000..2ccdf591c --- /dev/null +++ b/src/perl/t/fluidigm_qc/1381735059/S01_1381735059.csv @@ -0,0 +1,96 @@ +S01-A01 GS34251 T C ABC0123456789 Unknown XY 100 XY T:C 0.1 0.1 +S01-A02 GS34251 T C ABC0123456789 Unknown XY 99 XY T:C 0.1 0.1 +S01-A03 GS35220 C T ABC0123456789 Unknown XY 98 XY C:T 0.1 0.1 +S01-A04 GS35220 C T ABC0123456789 Unknown XY 97 XY C:T 0.1 0.1 +S01-A05 rs11096957 T G ABC0123456789 Unknown XY 100 XY T:G 0.1 0.1 +S01-A06 rs12828016 G T ABC0123456789 Unknown XY 100 XY G:T 0.1 0.1 +S01-A07 rs156697 A G ABC0123456789 Unknown XY 100 XY A:G 0.1 0.1 +S01-A08 rs1801262 T C ABC0123456789 Unknown XY 100 XY T:C 0.1 0.1 +S01-A09 rs1805034 C T ABC0123456789 Unknown XY 100 XY C:T 0.1 0.1 +S01-A10 rs1805087 A G ABC0123456789 Unknown XY 100 XY A:G 0.1 0.1 +S01-A11 rs2247870 G A ABC0123456789 Unknown XY 100 XY G:A 0.1 0.1 +S01-A12 rs2286963 T G ABC0123456789 Unknown XY 100 XY T:G 0.1 0.1 +S01-A13 rs3742207 T G ABC0123456789 Unknown XY 100 XY T:G 0.1 0.1 +S01-A14 rs3795677 G A ABC0123456789 Unknown XY 100 XY G:A 0.1 0.1 +S01-A15 rs4075254 G A ABC0123456789 Unknown XY 100 XY G:A 0.1 0.1 +S01-A16 rs4619 A G ABC0123456789 Unknown XY 100 XY A:G 0.1 0.1 +S01-A17 rs4843075 G A ABC0123456789 Unknown XY 100 XY G:A 0.1 0.1 +S01-A18 rs5215 C T ABC0123456789 Unknown XY 100 XY C:T 0.1 0.1 +S01-A19 rs6166 C T ABC0123456789 Unknown XY 100 XY C:T 0.1 0.1 +S01-A20 rs649058 G A ABC0123456789 Unknown XY 100 XY G:A 0.1 0.1 +S01-A21 rs6557634 T C ABC0123456789 Unknown XY 100 XY T:C 0.1 0.1 +S01-A22 rs6759892 T G ABC0123456789 Unknown XY 100 XY T:G 0.1 0.1 +S01-A23 rs7298565 G A ABC0123456789 Unknown XY 100 XY G:A 0.1 0.1 +S01-A24 rs753381 T C ABC0123456789 Unknown XY 100 XY T:C 0.1 0.1 +S01-A25 rs7627615 G A ABC0123456789 Unknown XY 100 XY G:A 0.1 0.1 +S01-A26 rs8065080 T C ABC0123456789 Unknown XY 100 XY T:C 0.1 0.1 +S01-A27 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A28 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A29 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A30 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A31 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A32 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A33 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A34 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A35 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A36 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A37 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A38 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A39 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A40 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A41 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A42 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A43 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A44 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A45 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A46 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A47 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A48 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A49 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A50 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A51 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A52 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A53 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A54 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A55 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A56 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A57 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A58 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A59 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A60 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A61 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A62 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A63 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A64 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A65 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A66 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A67 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A68 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A69 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A70 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A71 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A72 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A73 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A74 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A75 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A76 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A77 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A78 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A79 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A80 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A81 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A82 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A83 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A84 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A85 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A86 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A87 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A88 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A89 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A90 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A91 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A92 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A93 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A94 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A95 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S01-A96 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 diff --git a/src/perl/t/fluidigm_qc/1381735059/S02_1381735059.csv b/src/perl/t/fluidigm_qc/1381735059/S02_1381735059.csv new file mode 100644 index 000000000..4738b4532 --- /dev/null +++ b/src/perl/t/fluidigm_qc/1381735059/S02_1381735059.csv @@ -0,0 +1,96 @@ +S02-A01 GS34251 T C XYZ0987654321 Unknown XY 100 XY T:C 0.1 0.1 +S02-A02 GS34251 T C XYZ0987654321 Unknown XY 99 XY T:C 0.1 0.1 +S02-A03 GS35220 C T XYZ0987654321 Unknown XY 98 XY C:T 0.1 0.1 +S02-A04 GS35220 C T XYZ0987654321 Unknown XY 97 XY C:T 0.1 0.1 +S02-A05 rs11096957 T G XYZ0987654321 Unknown XY 100 No Call No Call 0.1 0.1 +S02-A06 rs12828016 G T XYZ0987654321 Unknown XY 100 No Call No Call 0.1 0.1 +S02-A07 rs156697 A G XYZ0987654321 Unknown XY 100 XY A:G 0.1 0.1 +S02-A08 rs1801262 T C XYZ0987654321 Unknown XY 100 XY T:C 0.1 0.1 +S02-A09 rs1805034 C T XYZ0987654321 Unknown XY 100 XY C:T 0.1 0.1 +S02-A10 rs1805087 A G XYZ0987654321 Unknown XY 100 XY A:G 0.1 0.1 +S02-A11 rs2247870 G A XYZ0987654321 Unknown XY 100 XY G:A 0.1 0.1 +S02-A12 rs2286963 T G XYZ0987654321 Unknown XY 100 XY T:G 0.1 0.1 +S02-A13 rs3742207 T G XYZ0987654321 Unknown XY 100 XY T:G 0.1 0.1 +S02-A14 rs3795677 G A XYZ0987654321 Unknown XY 100 XY G:A 0.1 0.1 +S02-A15 rs4075254 G A XYZ0987654321 Unknown XY 100 XY G:A 0.1 0.1 +S02-A16 rs4619 A G XYZ0987654321 Unknown XY 100 XY A:G 0.1 0.1 +S02-A17 rs4843075 G A XYZ0987654321 Unknown XY 100 XY G:A 0.1 0.1 +S02-A18 rs5215 C T XYZ0987654321 Unknown XY 100 XY C:T 0.1 0.1 +S02-A19 rs6166 C T XYZ0987654321 Unknown XY 100 XY C:T 0.1 0.1 +S02-A20 rs649058 G A XYZ0987654321 Unknown XY 100 XY G:A 0.1 0.1 +S02-A21 rs6557634 T C XYZ0987654321 Unknown XY 100 XY T:C 0.1 0.1 +S02-A22 rs6759892 T G XYZ0987654321 Unknown XY 100 XY T:G 0.1 0.1 +S02-A23 rs7298565 G A XYZ0987654321 Unknown XY 100 XY G:A 0.1 0.1 +S02-A24 rs753381 T C XYZ0987654321 Unknown XY 100 XY T:C 0.1 0.1 +S02-A25 rs7627615 G A XYZ0987654321 Unknown XY 100 XY G:A 0.1 0.1 +S02-A26 rs8065080 T C XYZ0987654321 Unknown XY 100 XY T:C 0.1 0.1 +S02-A27 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A28 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A29 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A30 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A31 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A32 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A33 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A34 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A35 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A36 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A37 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A38 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A39 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A40 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A41 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A42 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A43 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A44 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A45 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A46 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A47 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A48 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A49 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A50 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A51 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A52 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A53 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A54 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A55 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A56 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A57 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A58 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A59 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A60 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A61 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A62 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A63 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A64 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A65 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A66 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A67 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A68 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A69 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A70 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A71 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A72 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A73 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A74 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A75 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A76 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A77 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A78 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A79 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A80 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A81 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A82 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A83 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A84 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A85 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A86 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A87 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A88 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A89 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A90 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A91 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A92 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A93 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A94 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A95 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 +S02-A96 [ Empty ] NTC NTC 100 NTC NTC 0.1 0.1 diff --git a/src/perl/t/fluidigm_qc/fluidigm_qc.csv b/src/perl/t/fluidigm_qc/fluidigm_qc.csv new file mode 100644 index 000000000..add6dff97 --- /dev/null +++ b/src/perl/t/fluidigm_qc/fluidigm_qc.csv @@ -0,0 +1 @@ +ABC0123456789,1.0000,96,96,70,70,96,26,26,1381735059,S01,11413e77cde2a8dcca89705fe5b25a2d diff --git a/src/perl/t/fluidigm_qc/fluidigm_qc_outdated_md5.csv b/src/perl/t/fluidigm_qc/fluidigm_qc_outdated_md5.csv new file mode 100644 index 000000000..8302e6cf5 --- /dev/null +++ b/src/perl/t/fluidigm_qc/fluidigm_qc_outdated_md5.csv @@ -0,0 +1 @@ +ABC0123456789,1.0000,96,96,70,70,96,26,26,1381735059,S01,734b53d21b748751d06c342e5ee526ec