Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

First commit of MAPD analysis support in npg_tracking #500

Open
wants to merge 2 commits into
base: devel
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 91 additions & 0 deletions lib/npg_tracking/data/mapd.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
package npg_tracking::data::mapd;

use Moose;

our $VERSION = '0';

extends 'npg_tracking::data::reference';

has 'bin_size' => (
isa => 'Num',
is => 'ro',
required => 0,
);

has 'read_length' => (
isa => 'Num',
is => 'ro',
required => 0,
);

with 'npg_tracking::data::mapd::find';

__PACKAGE__->meta->make_immutable;

no Moose;

1;
__END__

=head1 NAME

npg_tracking::data::mapd

=head1 VERSION

=head1 SYNOPSIS

=head1 DESCRIPTION

A wrapper class for finding the location of MAPD files.

=head1 SUBROUTINES/METHODS

=head2 id_run

=head2 position

=head2 tag_index

=head2 rpt_list

=head1 DIAGNOSTICS

=head1 CONFIGURATION AND ENVIRONMENT

=head1 DEPENDENCIES

=over

=item Moose

=back

=head1 INCOMPATIBILITIES

=head1 BUGS AND LIMITATIONS

=head1 AUTHOR

Ruben Bautista E<lt>[email protected]<gt>

=head1 LICENSE AND COPYRIGHT

Copyright (C) 2018 GRL

This file is part of NPG.

NPG is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.

=cut
192 changes: 192 additions & 0 deletions lib/npg_tracking/data/mapd/find.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
package npg_tracking::data::mapd::find;

use Moose::Role;
use Carp;
use Readonly;
use File::Spec;
use npg_tracking::util::abs_path qw(abs_path);

with qw/npg_tracking::data::reference::find/;

requires qw/read_length bin_size/;

our $VERSION = '0';

Readonly::Scalar my $MAPPABILITY_FILE => 'Combined_%s_%s_%s_%dbases_mappable_bins_GCperc_INPUT.txt';
Readonly::Scalar my $MAPPABILITY_BED_FILE => 'Combined_%s_%s_%s_%dbases_mappable_bins.bed';

has 'mappablebins_path' => (
isa => 'Maybe[Str]',
is => 'ro',
lazy_build => 1,
);

has 'mappability_file' => (
isa => 'Maybe[Str]',
is => 'ro',
lazy_build => 1,
);

has 'mappability_bed_file' => (
isa => 'Maybe[Str]',
is => 'ro',
lazy_build => 1,
);

has 'chromosomes_path' => (
isa => 'Maybe[Str]',
is => 'ro',
lazy_build => 1,
);

has 'chromosomes_file' => (
isa => 'Maybe[Str]',
is => 'ro',
lazy_build => 1,
);

sub _build_mappablebins_path {
my $self = shift;
return $self->_find_path(q[MappableBINS]);
}

sub _build_chromosomes_path {
my $self = shift;
return $self->_find_path(q[chromosomes]);
}

sub _build_mappability_file {
my $self = shift;
return $self->_find_mappability_file(q[txt]);
}

sub _build_mappability_bed_file {
my $self = shift;
return $self->_find_mappability_file(q[bed]);
}

sub _build_chromosomes_file {
my $self = shift;
return $self->_find_file(q[chromosomes], q[txt]);
}

sub _find_path {
my ($self, $dir_name) = @_;
my $path;
my ($organism, $strain) = $self->parse_reference_genome($self->lims->reference_genome);
if ($organism && $strain) {
$path = abs_path($self->custom_analysis_repository . "/mapd/$organism/$strain/$dir_name");
}
return $path;
}

sub _find_file {
my ($self, $subfolder, $file_type) = @_;
my $path = $self->_find_path($subfolder);
my @files;
if ($path) {
@files = glob $path . q[/*.] . $file_type;
}
if (scalar @files > 1) {
croak qq[More than one $file_type file in $path];
}
if (scalar @files == 0) {
if ($subfolder && -d $subfolder) {
$self->messages->push(qq[Directory $subfolder exists, but no such *.$file_type file exist]);
}
return;
}
return $files[0];
}

sub _find_mappability_file {
my ($self, $file_type) = @_;
my @files;
my $mappablebins_path = $self->mappablebins_path;
if ($mappablebins_path) {
@files = glob $mappablebins_path . q[/*.] . $file_type;
}
if (scalar @files == 0) {
$self->messages->push(q[Directory ]. $mappablebins_path.
q[ exists, but no such *.]. $file_type.
q[ file(s) exist]);
return;
}
my $mappability_file;
if ($file_type eq q[bed]) {
$mappability_file = $MAPPABILITY_BED_FILE;
} elsif ($file_type eq q[txt]) {
$mappability_file = $MAPPABILITY_FILE;
}
my ($organism, $strain) = $self->parse_reference_genome($self->lims->reference_genome);
if ($organism && $strain) {
$mappability_file = sprintf $mappability_file,
$organism, $strain, $self->bin_size, $self->read_length;
$mappability_file = File::Spec->catfile($self->mappablebins_path, $mappability_file);
if (! -e $mappability_file) {
$self->messages->push(q[Mappability file ]. $mappability_file.
q[ not found in ]. $mappablebins_path);
return;
}
}
return $mappability_file;
}

1;
__END__

=head1 NAME

npg_tracking::data::mapd::find

=head1 SYNOPSIS

package MyPackage;
use Moose;
with qw{npg_tracking::data::mapd::find};


=head1 DESCRIPTION

A Moose role for finding the location of MAPD files.

=head1 SUBROUTINES/METHODS

=head1 DIAGNOSTICS

=head1 CONFIGURATION AND ENVIRONMENT

=head1 DEPENDENCIES

=over

=item Moose::Role

=item Carp

=back

=head1 INCOMPATIBILITIES

=head1 BUGS AND LIMITATIONS

=head1 AUTHOR

Ruben Bautista

=head1 LICENSE AND COPYRIGHT

Copyright (C) 2018 Genome Research Limited

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
17 changes: 17 additions & 0 deletions lib/npg_tracking/data/reference/list.pm
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ Readonly::Scalar our $TAG_SETS_DIR => q[tag_sets];
Readonly::Scalar our $TAXON_IDS_DIR => q[taxon_ids];
Readonly::Scalar our $BIN_DIR => q[bin];
Readonly::Scalar our $ORG_NAME_DELIM => q[_];
Readonly::Scalar our $CUSTOM_ANALYSIS_DIR=> q[custom_analysis];

Readonly::Scalar our $LAST => -1;
Readonly::Scalar our $SECOND_FROM_END => -2;
Expand Down Expand Up @@ -597,6 +598,22 @@ sub ref_file_prefix {
croak qq[Reference file with .fa or .fasta or .fna extension not found in $fasta_dir];
}

=head2 custom_analysis repository

An absolute path to the custom_analysis repository.

=cut
has 'custom_analysis_repository' => (isa => 'NPG_TRACKING_REFERENCE_REPOSITORY',
is => 'ro',
required => 0,
lazy_build => 1,
);
sub _build_custom_analysis_repository {
my $self = shift;
return catdir($self->repository, $CUSTOM_ANALYSIS_DIR);
}


no Moose::Role;

1;
Expand Down
70 changes: 70 additions & 0 deletions t/10-mapd-find.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
use strict;
use warnings;
use Test::More tests => 2;
use Test::Exception;
use File::Basename;
use File::Copy;
use File::Find;
use File::Path qw(make_path);
use File::Spec::Functions qw(catfile catdir);
use File::Spec qw(splitpath);
use File::Temp qw(tempdir);

my $tmp_repos = tempdir(CLEANUP => 1);
local $ENV{NPG_WEBSERVICE_CACHE_DIR} = $tmp_repos;

my $ref_dir = catdir($tmp_repos,'custom_analysis','mapd','Homo_sapiens','1000Genomes_hs37d5');
make_path(catdir($ref_dir, 'MappableBINS'), catdir($ref_dir, 'chromosomes'));
`touch $ref_dir/MappableBINS/Combined_Homo_sapiens_1000Genomes_hs37d5_100000_151bases_mappable_bins_GCperc_INPUT.txt`;
`touch $ref_dir/MappableBINS/Combined_Homo_sapiens_1000Genomes_hs37d5_100000_151bases_mappable_bins.bed`;
`touch $ref_dir/MappableBINS/Combined_Homo_sapiens_1000Genomes_hs37d5_500000_151bases_mappable_bins_GCperc_INPUT.txt`;
`touch $ref_dir/MappableBINS/Combined_Homo_sapiens_1000Genomes_hs37d5_500000_151bases_mappable_bins.bed`;
`touch $ref_dir/chromosomes/chr_list.txt`;

my $central = 't/data/mapd/';

use_ok('npg_tracking::data::mapd');

local $ENV{'NPG_CACHED_SAMPLESHEET_FILE'} = catfile($central, 'metadata_cache', 'samplesheet_27128.csv');

subtest 'find mapd files 1' => sub {
plan tests => 8;

my $test = npg_tracking::data::mapd->new(
id_run => 27128,
position => 1,
tag_index => 1,
repository => $tmp_repos,
read_length => 151,
bin_size => 100000,);

isa_ok($test, 'npg_tracking::data::mapd');

is($test->lims->reference_genome, 'Homo_sapiens (1000Genomes_hs37d5 + ensembl_75_transcriptome)',
'reference genome ok');

like($test->custom_analysis_repository, qr/$tmp_repos\/custom_analysis/smx,
'custom analysis repository path is correct');

my ($organism, $strain) = $test->parse_reference_genome($test->lims->reference_genome);
my $mappablebins_path = catdir($tmp_repos, 'custom_analysis', 'mapd', $organism, $strain, 'MappableBINS');
my $chromosomes_path = catdir($tmp_repos, 'custom_analysis', 'mapd', $organism, $strain, 'chromosomes');

is($test->mappablebins_path, $mappablebins_path,
'mappablebins path is correct');

is($test->chromosomes_path, $chromosomes_path,
'chromosomes path is correct');

is(basename($test->mappability_file), 'Combined_Homo_sapiens_1000Genomes_hs37d5_100000_151bases_mappable_bins_GCperc_INPUT.txt',
'finds mappability file');

is(basename($test->mappability_bed_file), 'Combined_Homo_sapiens_1000Genomes_hs37d5_100000_151bases_mappable_bins.bed',
'finds mappability bed file');

is(basename($test->chromosomes_file), 'chr_list.txt',
'finds chromosomes list file');
};


1;
Loading