Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deleted aggregate_xlanes method from st::api::lims #799

Merged
merged 1 commit into from
Feb 7, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 2 additions & 135 deletions lib/st/api/lims.pm
Original file line number Diff line number Diff line change
@@ -801,140 +801,6 @@ sub is_composition {
return $self->rpt_list ? 1 : 0;
}

=head2 aggregate_xlanes

For a run-level st::api::lims object returns a list of st::api::lims
objects representing aggregated entities.

Aggregation is performed across all lanes of the lims object, unless
an explicit list of positions is gived as an argument.

If all lanes are pools, agreggation is performed per tag index (plex)
and the list contains one or more objects, each one representing a tag
index (plex). An entry for tag index zero is added as well.

If lanes are libraries, aggregation of these libraries
is performed and the list contains one object.

It is possible to aggregate one lane, though practically it does not
make much sense.

List members represent compositions and have rpt_list attribute set.

my $l = st::api::lims->new(id_run => 44);
my $a = $l->aggregate_xlanes();
my $a = $l->aggregate_xlanes(qw/2 3/);

Assuming run id 44, for two lanes representing the same pool of four tag
indexes (1, 2, 3, 4), the list members will have the following values
of the rpt_list attribute:

44:1:0;44:2:0
44:1:1;44:2:1
44:1:2;44:2:2
44:1:3;44:2:3
44:1:4;44:2:4

The new objects has the same driver settings as the original object.

=cut

sub aggregate_xlanes {
my ($self, @positions) = @_;

if ($self->is_composition || $self->position) {
croak 'Not run-level object';
}

my $lanes_ia = $self->children_ia;

#####
# If a list of positions is given, restrict the operation to
# this set of positions.
#
if (@positions) {
my $reduced = {};
foreach my $p (@positions) {
if (!exists $lanes_ia->{$p}) {
croak sprintf 'Requested position %i does not exists in %s',
$p,
$self->to_string();
}
$reduced->{$p} = $lanes_ia->{$p};
}
$lanes_ia = $reduced;
}

my @lanes = sort { $a->position <=> $b->position } values %{$lanes_ia};
@positions = keys %{$lanes_ia};

#####
# We cannot have a mixture of pools and libraries.
#
my @pools = grep {$_} map { $_->is_pool ? 1 : 0 } @lanes;
if (@pools != 0 && @pools != @lanes) {
croak sprintf 'Both pools and libraries in lanes %s in %s',
join(q[, ], @positions),
$self->to_string();
}

#####
# Test function. Certain attrubutes should be the same
# across all objects of the lims array (first arg.).
#
my $can_merge = sub {
my ($lims, @attrs) = @_;
for my $attr_name (@attrs) {
my @values = grep { defined $_ } map { $_->$attr_name } @{$lims};
if (@values != @{$lims}) {
croak qq[$attr_name is not defined for one of lims objects];
}
@values = uniq @values;
if (@values != 1) {
croak qq[$attr_name is not the same across lims objects list];
}
}
return;
}; # End of test function

my $init = $self->copy_init_args();
delete $init->{'id_run'};

my $lims4compisitions = {};
my @test_attrs = qw/sample_id library_id/;
my $lanes_rpt_list = npg_tracking::glossary::rpt->deflate_rpts(\@lanes);
my @aggregated = ();

if (!@pools) {
$can_merge->(\@lanes, @test_attrs); # Test consistency
push @aggregated, __PACKAGE__->new(%{$init}, rpt_list => $lanes_rpt_list);
} else {
my @sizes = uniq (map { $_->num_children } @lanes);
if (@sizes != 1) { # Test consistency
croak 'Different number of plexes in lanes';
}

#####
# The each_arrayref function is given a list of arrays of plex-level st::api::lims
# objects, each array represent all plexes in a lane. The arrays of plexes are ordered
# by tag index. The each_arrayref function returns an iterator, which on each invocation
# collates and returns a list of first, second, etc, array members in the first, second,
# etc, invocation respectively.
#
my $ea = each_arrayref map { [$_->children()] } @lanes;
while ( my @plexes = $ea->() ) {
$can_merge->(\@plexes, @test_attrs, 'tag_index'); # Test consistency
push @aggregated, __PACKAGE__->new(%{$init},
rpt_list => npg_tracking::glossary::rpt->deflate_rpts(\@plexes));
}
# Add object for tag zero
push @aggregated, __PACKAGE__->new(%{$init},
rpt_list => npg_tracking::glossary::rpt->tag_zero_rpt_list($lanes_rpt_list));
}

return @aggregated;
}

=head2 aggregate_libraries

Given a list of lane-level C<st::api::lims> objects, finds their children,
@@ -1391,7 +1257,8 @@ Marina Gourtovaia E<lt>[email protected]<gt>

=head1 LICENSE AND COPYRIGHT

Copyright (C) 2013,2014,2015,2016,2017,2018,2019,2020,2021,2023 Genome Research Ltd.
Copyright (C) 2013,2014,2015,2016,2017,2018,2019,2020,2021,2023,2024
Genome Research Ltd.

This file is part of NPG.

181 changes: 1 addition & 180 deletions t/40-st-lims-merge.t
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use strict;
use warnings;
use Test::More tests => 11;
use Test::More tests => 9;
use Test::Exception;
use List::MoreUtils qw/all none/;
use File::Slurp;
@@ -129,157 +129,6 @@ subtest 'Create tag zero object' => sub {
}
};

subtest 'Aggregation across lanes for pools' => sub {
plan tests => 89;

local $ENV{NPG_CACHED_SAMPLESHEET_FILE} =
't/data/test40_lims/samplesheet_novaseq4lanes.csv';

my $l = st::api::lims->new(rpt_list => '25846:1:3');
throws_ok { $l->aggregate_xlanes() } qr/Not run-level object/,
'method cannot be run for a composition';
$l = st::api::lims->new(id_run => 25846, position => 1);
throws_ok { $l->aggregate_xlanes() } qr/Not run-level object/,
'method cannot be run for a lane-level object';
$l = st::api::lims->new(id_run => 25846, position => 1, tag_index => 4);
throws_ok { $l->aggregate_xlanes() } qr/Not run-level object/,
'method cannot be run for a plex-level object';

$l = st::api::lims->new(id_run => 25846);

throws_ok { $l->aggregate_xlanes(qw/2 10/) }
qr/Requested position 10 does not exists in /,
'error if requested position does not exist';

my @merged = $l->aggregate_xlanes();
is (scalar @merged, 23, 'number of aggregates is number of tags plus two');
my $tag_zero = pop @merged;
my $tag_spiked = pop @merged;
my $tag_last = pop @merged;
my $tag_first = shift @merged;
is ($tag_zero->rpt_list, '25846:1:0;25846:2:0;25846:3:0;25846:4:0',
'rpt list for tag zero object');
is ($tag_spiked->rpt_list, '25846:1:888;25846:2:888;25846:3:888;25846:4:888',
'rpt list for spiked in tag object');
is ($tag_last->rpt_list, '25846:1:21;25846:2:21;25846:3:21;25846:4:21',
'rpt list for tag 21 object');
is ($tag_first->rpt_list, '25846:1:1;25846:2:1;25846:3:1;25846:4:1',
'rpt list for tag 1 object');

@merged = $l->aggregate_xlanes(qw/1 4/);
is (scalar @merged, 23, 'number of aggregates is number of tags plus two');
$tag_zero = pop @merged;
$tag_spiked = pop @merged;
$tag_last = pop @merged;
$tag_first = shift @merged;
is ($tag_zero->rpt_list, '25846:1:0;25846:4:0',
'rpt list for tag zero object');
is ($tag_spiked->rpt_list, '25846:1:888;25846:4:888',
'rpt list for spiked in tag object');
is ($tag_last->rpt_list, '25846:1:21;25846:4:21',
'rpt list for tag 21 object');
is ($tag_first->rpt_list, '25846:1:1;25846:4:1',
'rpt list for tag 1 object');

@merged = $l->aggregate_xlanes(qw/1/);
is (scalar @merged, 23, 'number of aggregates is number of tags plus two');
$tag_zero = pop @merged;
$tag_spiked = pop @merged;
$tag_last = pop @merged;
$tag_first = shift @merged;
is ($tag_zero->rpt_list, '25846:1:0', 'rpt list for tag zero object');
is ($tag_spiked->rpt_list, '25846:1:888', 'rpt list for spiked in tag object');
is ($tag_last->rpt_list, '25846:1:21', 'rpt list for tag 21 object');
is ($tag_first->rpt_list, '25846:1:1', 'rpt list for tag 1 object');

@merged = $l->aggregate_xlanes();
is (scalar @merged, 23, 'number of aggregates is number of tags plus two');
$tag_zero = pop @merged;
$tag_spiked = pop @merged;
$tag_last = pop @merged;
$tag_first = shift @merged;
is ($tag_zero->rpt_list, '25846:1:0;25846:2:0;25846:3:0;25846:4:0',
'rpt list for tag zero object');
is ($tag_spiked->rpt_list, '25846:1:888;25846:2:888;25846:3:888;25846:4:888',
'rpt list for spiked in tag object');
is ($tag_last->rpt_list, '25846:1:21;25846:2:21;25846:3:21;25846:4:21',
'rpt list for tag 21 object');
is ($tag_first->rpt_list, '25846:1:1;25846:2:1;25846:3:1;25846:4:1',
'rpt list for tag 1 object');
ok ((none {defined $_->id_run} ($tag_zero, $tag_spiked, $tag_first, $tag_last)),
"id_run not defined");

_compare_properties([$tag_first, $tag_last, $tag_zero, $tag_spiked]);

ok ($tag_spiked->is_phix_spike, 'is phix spike');
ok (!$tag_first->is_phix_spike, 'is not phix spike');
ok (!$tag_zero->is_phix_spike, 'is not phix spike');

is (join(q[:], $tag_zero->study_names), 'Illumina Controls:NovaSeq testing',
'study names including spiked phix');
is (join(q[:], $tag_zero->study_names(1)), 'Illumina Controls:NovaSeq testing',
'sudy names including spiked phix');
is (join(q[:], $tag_zero->study_names(0)), 'NovaSeq testing',
'study names excluding spiked phix');

my @sample_names = qw/
5318STDY7462457 5318STDY7462458 5318STDY7462459 5318STDY7462460 5318STDY7462461
5318STDY7462462 5318STDY7462463 5318STDY7462464 5318STDY7462465 5318STDY7462466
5318STDY7462467 5318STDY7462468 5318STDY7462469 5318STDY7462470 5318STDY7462471
5318STDY7462472 5318STDY7462473 5318STDY7462474 5318STDY7462475 5318STDY7462476
5318STDY7462477 /;

is (join(q[:], $tag_zero->sample_names(0)), join(q[:], @sample_names),
'sample names excluding spiked phix');
push @sample_names, 'phiX_for_spiked_buffers';
is (join(q[:], $tag_zero->sample_names()), join(q[:], @sample_names),
'sample names including spiked phix');
is (join(q[:], $tag_zero->sample_names(1)), join(q[:], @sample_names),
'sample names including spiked phix');

local $ENV{NPG_CACHED_SAMPLESHEET_FILE} = q[];

my $id_run = 47995;
$l = st::api::lims->new(
id_run => $id_run,
id_flowcell_lims => 98292,
driver_type => 'ml_warehouse',
mlwh_schema => $schema_wh,
);

@merged = $l->aggregate_xlanes(qw/1 2/);
is (scalar @merged, 19, 'number of aggregates is number of tags plus two');
$tag_zero = pop @merged;
$tag_spiked = pop @merged;
$tag_last = pop @merged;
$tag_first = shift @merged;
is ($tag_zero->rpt_list, "$id_run:1:0;$id_run:2:0",
'rpt list for tag zero object');
my @tag_zero_sample_names = $tag_zero->sample_names();
is (@tag_zero_sample_names, 18, '18 sample names are retrieved');
is ($tag_zero_sample_names[0], '6751STDY13219539',
'first sample name is correct');
is ($tag_spiked->rpt_list, "$id_run:1:888;$id_run:2:888",
'rpt list for spiked in tag object');
is ($tag_last->rpt_list, "$id_run:1:17;$id_run:2:17",
'rpt list for tag 21 object');
is ($tag_first->rpt_list, "$id_run:1:1;$id_run:2:1",
'rpt list for tag 1 object');
};

subtest 'Aggregation across lanes for non-pools' => sub {
plan tests => 14;

local $ENV{NPG_CACHED_SAMPLESHEET_FILE} = 't/data/test40_lims/samplesheet_rapidrun_nopool.csv';
my @merged = st::api::lims->new(id_run => 22672)->aggregate_xlanes();
is (scalar @merged, 1, 'one object returned');
my $l = $merged[0];
is ($l->rpt_list, '22672:1;22672:2', 'correct rpt_list');
ok (!defined $l->id_run, "id_run not defined");
ok (!$l->is_phix_spike, 'is not phix spike');
_compare_properties_2($l);
};

subtest 'Error conditions in aggregation by library' => sub {
plan tests => 4;

@@ -562,34 +411,6 @@ sub _compare_properties {
'default_tag_sequence' => 'TCGAGCGT',
'study_alignments_in_bam' => 1,
'study_contains_nonconsented_human' => 0
},
{
'sample_id' => undef,
'sample_name' => undef,
'sample_common_name' => 'Homo sapiens',
'study_id' => 5318,
'study_name' => 'NovaSeq testing',
'reference_genome' => 'Homo_sapiens (1000Genomes_hs37d5 + ensembl_75_transcriptome)',
'library_id' => undef,
'library_name' => undef,
'library_type' => 'Standard',
'default_tag_sequence' => undef,
'study_alignments_in_bam' => 1,
'study_contains_nonconsented_human' => 0
},
{
'sample_id' => '1255141',
'sample_name' => 'phiX_for_spiked_buffers',
'sample_common_name' => undef,
'study_id' => 198,
'study_name' => 'Illumina Controls',
'reference_genome' => undef,
'library_id' => '17883061',
'library_name' => '17883061',
'library_type' => undef,
'default_tag_sequence' => 'ACAACGCAATC',
'study_alignments_in_bam' => 1,
'study_contains_nonconsented_human' => 0
}
];