broadinstitute · jlchang · Jul 31, 2024 · Jul 22, 2024 · Jul 23, 2024 · Jul 23, 2024
diff --git a/Gemfile.lock b/Gemfile.lock
@@ -426,8 +426,8 @@ GEM
       mime-types (>= 1.16, < 4.0)
       netrc (~> 0.8)
     retriable (3.1.2)
-    rexml (3.2.8)
-      strscan (>= 3.0.9)
+    rexml (3.3.2)
+      strscan
     rubocop (1.36.0)
       json (~> 2.3)
       parallel (~> 1.10)

diff --git a/app/javascript/components/upload/upload-utils.js b/app/javascript/components/upload/upload-utils.js
@@ -42,7 +42,7 @@ const PROPERTIES_NOT_TO_SEND = [
 ]
 
 const PROPERTIES_AS_JSON = ['custom_color_updates']
-const DEEPLY_NESTED_PROPS = ['data_fragments']
+const DEEPLY_NESTED_PROPS = ['custom_color_updates', 'data_fragments']
 
 /** gets an object representing a new, empty study file.  Does not communicate to server */
 export function newStudyFileObj(studyId) {

diff --git a/app/lib/cluster_viz_service.rb b/app/lib/cluster_viz_service.rb
@@ -67,7 +67,7 @@ def self.load_spatial_options(study)
   # only options allowed are 1000, 10000, 20000, and 100000
   # will only provide options if subsampling has completed for a cluster
   def self.subsampling_options(cluster)
-    return [] if cluster.nil? || cluster.is_subsampling?
+    return [] if cluster.nil? || cluster.is_subsampling? || !cluster.subsampled
 
     ClusterGroup::SUBSAMPLE_THRESHOLDS.select { |sample| sample < cluster.points }
   end

diff --git a/app/models/ann_data_ingest_parameters.rb b/app/models/ann_data_ingest_parameters.rb
@@ -49,13 +49,16 @@ class AnnDataIngestParameters
   NON_ATTRIBUTE_PARAMS = %i[file_size machine_type].freeze
 
   # GCE machine types and file size ranges for handling fragment extraction
-  # produces a hash with entries like { 'n2-highmem-4' => 0..4.gigabytes }
-  EXTRACT_MACHINE_TYPES = [4, 8, 16, 32].map.with_index do |cores, index|
-    floor = index == 0 ? 0 : (cores / 2).gigabytes
-    limit = (cores * 8).gigabytes
+  # produces a hash with entries like { 'n2-highmem-4' => 0..24.gigabytes }
+  # adjust (core * n) to n=4 for faster scaling (ie. n2-highmem-4 for 0 to 16G)
+  NUM_CORES = [4, 8, 16, 32, 48, 64, 80, 96].freeze
+  RAM_PER_CORE = NUM_CORES.map { |core| (core * 6).gigabytes }.freeze
+  EXTRACT_MACHINE_TYPES = NUM_CORES.map.with_index do |cores, index|
+    floor = index == 0 ? 0 : RAM_PER_CORE[index - 1]
+    limit = index == NUM_CORES.count - 1 ? RAM_PER_CORE[index] * 2 : RAM_PER_CORE[index]
     # ranges that use '...' exclude the given end value.
     { "n2d-highmem-#{cores}" => floor...limit }
-  end.reduce({}, :merge)
+  end.reduce({}, :merge).freeze
 
   attr_accessor(*PARAM_DEFAULTS.keys)
 
@@ -70,7 +73,9 @@ def initialize(attributes = nil)
     # machine_type default is declared here to allow for autoscaling with optional override
     # see https://ruby-doc.org/core-3.1.0/Range.html#method-i-3D-3D-3D for range detection doc
     if @machine_type.nil?
-      self.machine_type = EXTRACT_MACHINE_TYPES.detect { |_, mem_range| mem_range === file_size }&.first || 'n2d-highmem-4'
+      self.machine_type = EXTRACT_MACHINE_TYPES.detect do |_, mem_range|
+                            mem_range === file_size
+                          end&.first || 'n2d-highmem-4'
     end
   end
 

diff --git a/app/models/concerns/parameterizable.rb b/app/models/concerns/parameterizable.rb
@@ -18,7 +18,7 @@ module Parameterizable
   # https://cloud.google.com/compute/docs/general-purpose-machines
   GCE_MACHINE_TYPES = %w[n2 n2d].map do |family|
     %w[standard highmem highcpu].map do |series|
-      [2, 4, 8, 16, 32, 64, 96].map do |cores|
+      [2, 4, 8, 16, 32, 48, 64, 80, 96].map do |cores|
         [family, series, cores].join('-')
       end
     end

diff --git a/lib/label_sorter.rb b/lib/label_sorter.rb
@@ -5,8 +5,8 @@ class LabelSorter
   attr_reader :lowercase, :natural_types, :type_order
 
   def initialize(str)
-    @str = str
-    @lowercase = str.downcase
+    @str = str.to_s # safeguard against nil or non-string values
+    @lowercase = @str.downcase
     @natural_types = @lowercase.scan(/\d+|\D+/).map { |s| s =~ /\d/ ? s.to_i : s }
     @type_order = @natural_types.map { |el| el.is_a?(Integer) ? :i : :s }.join
   end
@@ -24,6 +24,11 @@ def to_s
   end
 
   def self.natural_sort(values)
-    values.map { |v| new(v) }.sort.map { |el| el.to_s }
+    sorted = values.map { |v| new(v) }.sort.map { |el| el.to_s }
+    # move any blank/Unspecified entries to the end to allow use of first color for actual label
+    if sorted.first.blank? || sorted.first == AnnotationVizService::MISSING_VALUE_LABEL
+      sorted << sorted.shift
+    end
+    sorted
   end
 end
diff --git a/test/api/visualization/annotations_controller_test.rb b/test/api/visualization/annotations_controller_test.rb
@@ -191,14 +191,13 @@ class AnnotationsControllerTest < ActionDispatch::IntegrationTest
       {
         name: 'cell_type', type: 'group', scope: 'study', values: %w(big --Unspecified--),
         identifier: 'cell_type--group--study',
-        color_map: { '--Unspecified--' => '#e41a1c', big: '#377eb8' }.with_indifferent_access
+        color_map: { big: '#e41a1c', '--Unspecified--' => '#377eb8' }.with_indifferent_access
       },
       {
         name: 'nCount_RNA', type: 'numeric', scope: 'study', values: [],
         identifier: 'nCount_RNA--numeric--study'
       }
     ]
-    expected_annotations[2][:values][1] = '--Unspecified--'
     assert_equal expected_annotations, annotations
     assert_empty Api::V1::Visualization::AnnotationsController.get_facet_annotations(
       @basic_study, cluster, 'does-not-exist--group--study'

diff --git a/test/lib/label_sorter_test.rb b/test/lib/label_sorter_test.rb
@@ -28,4 +28,14 @@ class LabelSorterTest < ActiveSupport::TestCase
     assert_equal 1, last <=> first
     assert_equal 1, last <=> middle
   end
+
+  test 'should move blank or unspecified to the end' do
+    random = @labels.take(10).shuffle
+    blank_label = [''] + random
+    sorted = LabelSorter.natural_sort(blank_label)
+    assert sorted.last.blank?
+    unspecified = [AnnotationVizService::MISSING_VALUE_LABEL] + random
+    sorted = LabelSorter.natural_sort(unspecified)
+    assert_equal AnnotationVizService::MISSING_VALUE_LABEL, sorted.last
+  end
 end
diff --git a/test/models/ann_data_ingest_parameters_test.rb b/test/models/ann_data_ingest_parameters_test.rb
@@ -58,7 +58,7 @@ class AnnDataIngestParametersTest < ActiveSupport::TestCase
     cmd = '--ingest-anndata --anndata-file gs://bucket_id/test.h5ad --obsm-keys ["X_umap", "X_tsne"] --extract ' \
           '["cluster", "metadata", "processed_expression"]'
     assert_equal cmd, extraction.to_options_array.join(' ')
-    assert_equal 'n2d-highmem-16', extraction.machine_type
+    assert_equal 'n2d-highmem-32', extraction.machine_type
   end
 
   test 'should validate cluster params' do
@@ -98,8 +98,8 @@ class AnnDataIngestParametersTest < ActiveSupport::TestCase
 
   test 'should set default machine type and allow override' do
     params = AnnDataIngestParameters.new(@extract_params)
-    assert_equal 'n2d-highmem-16', params.machine_type
-    new_machine = 'n2d-highmem-32'
+    assert_equal 'n2d-highmem-32', params.machine_type
+    new_machine = 'n2d-highmem-80'
     params.machine_type = new_machine
     assert_equal new_machine, params.machine_type
     assert params.valid?

diff --git a/test/services/cluster_viz_service_test.rb b/test/services/cluster_viz_service_test.rb
@@ -122,6 +122,7 @@ class ClusterVizServiceTest < ActiveSupport::TestCase
                           cells: cells
                       })
     cluster = @study.cluster_groups.by_name(cluster_name)
+    cluster.update(subsampled: true)
     options = ClusterVizService.subsampling_options(cluster)
     assert_equal [1000], options
   end

diff --git a/test/services/expression_viz_service_test.rb b/test/services/expression_viz_service_test.rb
@@ -241,9 +241,9 @@ def load_all_genes(study)
     # cells A & B belong to 'bar', and cell C belongs to the blank label
     expected_output = {
       bar: {
-        y: [0.0, 3.0], cells: %w(A B), annotations: [], name: 'bar', color: @color_list[1]
+        y: [0.0, 3.0], cells: %w(A B), annotations: [], name: 'bar', color: @color_list[0]
       }, "#{AnnotationVizService::MISSING_VALUE_LABEL}": {
-        y: [1.5], cells: %w(C), annotations: [], name: AnnotationVizService::MISSING_VALUE_LABEL, color: @color_list[0]
+        y: [1.5], cells: %w(C), annotations: [], name: AnnotationVizService::MISSING_VALUE_LABEL, color: @color_list[1]
       }
     }
     assert_equal expected_output.with_indifferent_access, violin_data.with_indifferent_access