Skip to content

Commit

Permalink
PacBio iRODS data - set QC state metadata
Browse files Browse the repository at this point in the history
... when it is safe to do so.
  • Loading branch information
mgcam committed Mar 21, 2024
1 parent 2ee59ad commit a5f3ea0
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 3 deletions.
45 changes: 45 additions & 0 deletions lib/WTSI/NPG/HTS/PacBio/Annotator.pm
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,51 @@ sub make_tag_metadata {
return $self->_make_multi_value_metadata(\@run_records, $method_attr);
}

=head2 make_qc_metadata
Arg [n] PacBio run database records,
List[WTSI::DNAP::Warehouse::Schema::Result::PacBioRun].
+
Example : my @avus = $ann->make_qc_metadata(@run_records);
Description: Return QC outcome AVU metadata for a single product.
An empty list is returned if the input list contains
either no records or multiple records or the only record
is not linked to a record in the pac_bio_product_metrics
table.
This method should be called in the context of a single
iRODS object. If, according to a record in the pac_bio_run
table, a well contains multiple samples, but in practice
no deplexing was done, when trying to establish data
provenance we might get multiple pac_bio_run table rows.
Opting out of assigning a QC outcome in this case is
a conscious conservative decision that was made at the
time of writing (March 2024).
Returntype : List[HashRef]
=cut

sub make_qc_metadata {
my ($self, @run_records) = @_;

my @avus = ();
if (@run_records == 1) {
my @product_metrics = $run_records[0]->pac_bio_product_metrics()->all();
# Absence of linked product records is not unknown, one linked product
# record is normal, multiple linked records is, most likely, an error.
if (@product_metrics == 1) {
my $qc_outcome = $product_metrics[0]->qc();
if (defined $qc_outcome) {
push @avus, $self->make_avu($QC_STATE, $qc_outcome);
}
}
}

return @avus;
}

sub _make_multi_value_metadata {
my ($self, $objs, $method_attr) = @_;
# The method_attr argument is a map of method name to attribute name
Expand Down
3 changes: 2 additions & 1 deletion lib/WTSI/NPG/HTS/PacBio/MetaUpdater.pm
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ sub update_secondary_metadata {
try {
my @run_records = $self->find_pacbio_runs(
$id_run, $well, $tag_id, $plate_number);
my @secondary_avus = $self->make_secondary_metadata(@run_records);
my @secondary_avus = map { $self->$_(@run_records) }
qw/make_secondary_metadata make_qc_metadata/;
$obj->update_secondary_metadata(@secondary_avus);

$self->info("Updated metadata on '$path' ",
Expand Down
36 changes: 34 additions & 2 deletions t/lib/WTSI/NPG/HTS/PacBio/MetaUpdaterTest.pm
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ sub require : Test(1) {
require_ok('WTSI::NPG::HTS::PacBio::MetaUpdater');
}

sub update_secondary_metadata : Test(7) {
sub update_secondary_metadata : Test(8) {
my $irods = WTSI::NPG::iRODS->new(environment => \%ENV,
strict_baton_version => 0);
my $updater = WTSI::NPG::HTS::PacBio::MetaUpdater->new
Expand Down Expand Up @@ -135,7 +135,20 @@ sub update_secondary_metadata : Test(7) {
'plate_number', $plate_number); # Update primary metadata
my $new_study_name = 'Updated for the test';
$study_row->update({name => $new_study_name}); # Update study name in mlwh

# Create a linked product row with QC outcome defined.
my $rw_row = $wh_schema->resultset('PacBioRunWellMetric')->create({
pac_bio_run_name => $run_name,
well_label => $well_label,
plate_number => $plate_number,
id_pac_bio_product => 'A' x 64,
instrument_type => 'SomeType'
});
my $p_row = $wh_schema->resultset('PacBioProductMetric')->create({
id_pac_bio_rw_metrics_tmp => $rw_row->id_pac_bio_rw_metrics_tmp,
id_pac_bio_tmp => $row->id_pac_bio_tmp,
id_pac_bio_product => $rw_row->id_pac_bio_product
});

# Call the updater again.
$updater->update_secondary_metadata(\@paths_to_update);

Expand Down Expand Up @@ -165,6 +178,25 @@ sub update_secondary_metadata : Test(7) {

is_deeply($updated_as_dict, $expected_as_dict,
'Updated metadata is correct after the update');

$p_row->update({qc => 1});
$expected_as_dict->{$QC_STATE} = 1;
for my $key ((map { $_ . '_history'} ($STUDY_NAME, $PACBIO_STUDY_NAME))) {
delete $expected_as_dict->{$key};
}
# Call the updater again.
$updater->update_secondary_metadata(\@paths_to_update);
# Inspect the metadata after the update.
$updated = WTSI::NPG::HTS::DataObject->new
(collection => $irods_tmp_coll,
data_object => $data_file,
irods => $irods)->metadata;

$updated_as_dict = {};
for my $meta (@{$updated}) {
$updated_as_dict->{$meta->{'attribute'}} = $meta->{'value'};
}
is($expected_as_dict->{$QC_STATE}, 1, 'Updated metadata contains qc outcome');
}

1;

0 comments on commit a5f3ea0

Please sign in to comment.