diff --git a/Changes b/Changes index d5d26eaf..b45ce77f 100644 --- a/Changes +++ b/Changes @@ -1,5 +1,11 @@ LIST OF CHANGES +release 101.4.0 (2024-08-30) + - Added species_from_reference_genome method to st::api::lims. If the + reference_genome value is defined, this new method returns the name of the + species. parse_reference_genome from npg_tracking::data::reference::find + is used for parsing the reference genome string. + release 101.3.0 (2024-08-05) - Provided a direct access to the default section of the study configuration file. diff --git a/lib/st/api/lims.pm b/lib/st/api/lims.pm index b169e783..281ff7ab 100644 --- a/lib/st/api/lims.pm +++ b/lib/st/api/lims.pm @@ -12,6 +12,7 @@ use Class::Load qw/load_class/; use npg_tracking::util::types; use npg_tracking::glossary::rpt; use npg_tracking::glossary::composition::factory::rpt_list; +use npg_tracking::data::reference::find; our $VERSION = '0'; @@ -602,6 +603,34 @@ sub _build_reference_genome { return $rg; } +=head2 species_from_reference_genome + +Extracts the species name from the value of the C attribute +and returns it. Returns an undefined value if the value of the C +attribute is not defined or if the the C string does not match +the expected pattern. + +Examples: + + reference_genome: 'Homo_sapiens (GRCh38_full_analysis_set_plus_decoy_hla)' + species: 'Homo_sapiens' + + reference_genome: 'Mus_musculus (GRCm38 + ensembl_84_transcriptome)' + species: 'Mus_musculus' +=cut +sub species_from_reference_genome { + my $self = shift; + + if ($self->reference_genome) { + my @genome_as_array = npg_tracking::data::reference::find + ->parse_reference_genome($self->reference_genome); + if (@genome_as_array) { + return $genome_as_array[0]; + } + } + return; +} + sub _trim_value { my ($self, $value) = @_; if ($value) { @@ -1311,6 +1340,8 @@ __END__ =item npg_tracking::glossary::composition::component::illumina +=item npg_tracking::data::reference::find + =back =head1 INCOMPATIBILITIES diff --git a/t/40-st-lims.t b/t/40-st-lims.t index de57e25c..02ec3ac5 100644 --- a/t/40-st-lims.t +++ b/t/40-st-lims.t @@ -284,7 +284,7 @@ subtest 'Samplesheet driver for a one-component composition' => sub { }; subtest 'Samplesheet driver for arbitrary compositions' => sub { - plan tests => 69; + plan tests => 75; my $path = 't/data/samplesheet/novaseq_multirun.csv'; local $ENV{NPG_CACHED_SAMPLESHEET_FILE} = $path; @@ -309,6 +309,8 @@ subtest 'Samplesheet driver for arbitrary compositions' => sub { is($o->library_name, '22802061', 'library name'); is($o->reference_genome, 'Homo_sapiens (GRCh38_15_plus_hs38d1) [minimap2]', 'reference genome'); + is($o->species_from_reference_genome, 'Homo_sapiens', + 'species reference genome'); } $ss = $children[0]; @@ -422,7 +424,7 @@ subtest 'Insert size' => sub { }; subtest 'Study and sample properties' => sub { - plan tests => 75; + plan tests => 83; local $ENV{NPG_CACHED_SAMPLESHEET_FILE} = 't/data/samplesheet/4pool4libs_extended.csv'; @@ -442,6 +444,8 @@ subtest 'Study and sample properties' => sub { is( $lims->study_reference_genome, q[ ], 'study reference genome'); is( $lims->reference_genome, 'Haemonchus_contortus (V1_21June13)', 'reference genome'); + is( $lims->species_from_reference_genome, 'Haemonchus_contortus', + 'species from reference genome'); # Individual plex. $lims = st::api::lims->new(id_run => 9999, position => 7, tag_index=> 76); @@ -461,6 +465,8 @@ subtest 'Study and sample properties' => sub { is( $lims->study_reference_genome, 'Mus_musculus (GRCm38)', 'study reference genome'); is( $lims->reference_genome, 'Mus_musculus (GRCm38)', 'reference genome'); + is( $lims->species_from_reference_genome, 'Mus_musculus', + 'species from reference genome'); # Indexed lane and tag zero for the same lane. for my $l ( @@ -475,6 +481,8 @@ subtest 'Study and sample properties' => sub { is( $l->study_reference_genome, 'Mus_musculus (GRCm38)', 'study reference genome'); is( $l->reference_genome, 'Mus_musculus (GRCm38)', 'reference genome'); + is( $lims->species_from_reference_genome, 'Mus_musculus', + 'species from reference genome'); } local $ENV{NPG_CACHED_SAMPLESHEET_FILE} = @@ -494,6 +502,8 @@ subtest 'Study and sample properties' => sub { is( $l->sample_reference_genome, undef, 'sample reference genome undefined'); is( $l->study_reference_genome, $ref, 'study reference genome'); is( $l->reference_genome, undef, 'no fallback to study'); + is( $l->species_from_reference_genome, undef, + 'species from reference genome is undefined'); } my $ref2 = 'Homo_sapiens (GRCh38_full_analysis_set_plus_decoy_hla)';