Skip to content

Commit

Permalink
Added tests for merging by library.
Browse files Browse the repository at this point in the history
Also refactored method itself.
  • Loading branch information
mgcam committed Nov 14, 2023
1 parent 00ba90e commit db64035
Show file tree
Hide file tree
Showing 3 changed files with 284 additions and 123 deletions.
145 changes: 98 additions & 47 deletions lib/st/api/lims.pm
Original file line number Diff line number Diff line change
Expand Up @@ -912,84 +912,135 @@ sub aggregate_xlanes {

=head2 aggregate_libraries
Given a list of lane-level st::api::lims objects, finds their children, which
can be merged and analysed together across all or some of the lanes.
All argument lane objects should belong to the same run. It is assumed that
they all use the same st::api::lims driver type.
Returns two lists of objects, one for merged entities and one for singletons,
which are wrapped into a dictionary. Either of these lists can be empty. Both
of the lists are guaranteed not to be empty at the same time.
Tag zero objects are neither added nor explicitly removed. Objects for
spiked-in controls (if present) are added to the list of singletons.
The lists of singletons and merges are sorted. For a given set of input lane
st::api::lims objects the same lists are always returned.
Criteria for entities to be eligible for a merge:
* they are not controls,
* they belong to the same library,
* they share the same tag index, therefore belong to different lanes,
* they belong to the same study.
This method can be used both as instance and as a class method.
my $all_lims = st::api::lims->aggregate_libraries($run_lims->children());
for my $l (@{$all_lims->{'singles'}}) {
print 'No merge for ' . $l->to_string;
}
for my $l (@{$all_lims->{'merges'}}) {
print 'Merged entity ' . $l->to_string;
}
=cut

sub aggregate_libraries() {
my ($self, $lane_lims_array) = @_;

my @id_runs = uniq map { $_->id_run } @{$lane_lims_array};
if (@id_runs != 1) {
croak 'Multiple run IDs in a potential library merge';
}
# This restriction might be lifted in future.
_check_value_is_unique('id_run', 'run IDs', $lane_lims_array);

my $all_lims_objects = {};
my $lims_objects_by_library = {};

my @singles = ();
my @all_single_lims_objs = map { $_->is_pool ? $_->children() : $_ }
@{$lane_lims_array};

foreach my $obj (@all_single_lims_objs) {
if ($obj->is_control()) { # Do not merge spiked PhiX libraries.
push @{$all_lims_objects->{'single'}}, $obj;
if ($obj->is_control()) {
push @singles, $obj;
} else {
push @{$lims_objects_by_library->{$obj->library_id}}, $obj;
}
}

my $merge_set = {};

# Do not use $self for this to retain ability to use this method as a class
# method.
# Get the common st::api::lims driver arguments, which will be used
# to create objects for merged entities.
# Do not use $self for copying the driver arguments in order to retain
# ability to use this method as a class method.
my %init = %{$lane_lims_array->[0]->_driver_arguments()};
delete $init{position};
delete $init{id_run};

my $merges = {};
my $lane_set_delim = q[,];
foreach my $library_id (keys %{$lims_objects_by_library}) {
my @lib_lims = @{$lims_objects_by_library->{$library_id}};
if (@lib_lims == 1) {
push @{$all_lims_objects->{'single'}}, $lib_lims[0];
push @singles, @lib_lims;
} else {
my @study_ids = uniq map { $_->study_id } @lib_lims;
if (@study_ids != 1) {
croak 'Multiple studies in a potential merge';
}
my @tag_indexes =
uniq
map { defined $_->tag_index ? $_->tag_index : 'undefined' }
@lib_lims;
if (@tag_indexes != 1) {
croak 'Inconsistent tag indexes in a potential merge';
}
my @lanes = uniq map {$_->position} @lib_lims;
if (@lanes != @lib_lims) {
croak 'Intra-lane merge is detected in a potential merge';
}
my $lane_set = join q[,], @lanes;
$all_lims_objects->{'merges'}->{$lane_set}->{$tag_indexes[0]} =
__PACKAGE__->new(
%init,
rpt_list => npg_tracking::glossary::rpt->deflate_rpts(\@lib_lims)
);
_check_merge_correctness(\@lib_lims);
my $lane_set = join $lane_set_delim,
sort { $a <=> $b } map { $_->position } @lib_lims;
my $tag_index = $lib_lims[0]->tag_index;
$merges->{$lane_set}->{$tag_index} = __PACKAGE__->new(
%init,
rpt_list => npg_tracking::glossary::rpt->deflate_rpts(\@lib_lims)
);
}
}

# Lane sets should not intersect.
my @positions =
map { (split /,/smx, $_) }
keys %{$all_lims_objects->{'merges'}};
if (@positions != uniq @positions) {
# Lane sets should not intersect. Error if a lane belongs to multiple sets.
my @all_lanes_in_merged_sets = map { (split /$lane_set_delim/smx, $_) }
keys %{$merges};
if (@all_lanes_in_merged_sets != uniq @all_lanes_in_merged_sets) {
croak 'No clean split between lanes in potential merges';
}

# Within each set sort LIMS objects by tag index and keep
# the sorted list.
foreach my $lane_set ( keys %{$all_lims_objects->{'merges'}} ) {
my @sorted_lims_onjects =
map {$all_lims_objects->{'merges'}->{$lane_set}->{$_}}
(sort { $a <=> $b } keys %{$all_lims_objects->{'merges'}->{$lane_set}});
$all_lims_objects->{'merges'}->{$lane_set} = \@sorted_lims_onjects;
my $all_lims_objects = {'singles' => [], 'merges' => []};
# Arrange in a predictable consistent orger.
foreach my $lane_set ( sort keys %{$merges} ) {
my @tag_indexes = sort { $a <=> $b } keys %{$merges->{$lane_set}};
push @{$all_lims_objects->{'merges'}},
(map { $merges->{$lane_set}->{$_} } @tag_indexes);
}
$all_lims_objects->{'singles'} = [
sort { $a->position <=> $b->position || $a->tag_index <=> $b->tag_index}
@singles
];

# Sanity check.
my $num_objects = @{$all_lims_objects->{'merges'}} +
@{$all_lims_objects->{'singles'}};
if ($num_objects == 0) {
croak 'Invalid aggregation by library, no objects returned';
}

return $all_lims_objects;
}

sub _check_merge_correctness{
my $lib_lims = shift;
_check_value_is_unique('study_id', 'studies', $lib_lims);
_check_value_is_unique('tag_index', 'tag indexes', $lib_lims);
my @lanes = uniq map {$_->position} @{$lib_lims};
if (@lanes != @{$lib_lims}) {
croak 'Intra-lane merge is detected in a potential merge';
}
return;
}

sub _check_value_is_unique {
my ($method_name, $property_name, $objects) = @_;
my @values = uniq
map { defined $_->$method_name ? $_->$method_name : 'undefined' }
@{$objects};
if (@values != 1) {
croak "Multiple $property_name in a potential library merge";
}
return;
}

=head2 create_tag_zero_object
Using id_run and position values of this object, creates and returns
Expand Down
Loading

0 comments on commit db64035

Please sign in to comment.