Merge pull request #2007 from broadinstitute/development

Release 1.69.0
broadinstitute · Apr 8, 2024 · ff3ddb4 · ff3ddb4
2 parents ed57599 + 7e8fa00
commit ff3ddb4
Show file tree

Hide file tree

Showing 9 changed files with 77 additions and 130 deletions.
diff --git a/app/javascript/components/bookmarks/BookmarkManager.jsx b/app/javascript/components/bookmarks/BookmarkManager.jsx
@@ -111,8 +111,10 @@ export default function BookmarkManager({bookmarks, studyAccession, clearExplore
 
   /** close and reopen bookmark form to fix positioning issues */
   function reopenBookmarkForm() {
-    formRef.current.handleToggle()
-    formRef.current.handleToggle()
+    if (formRef && formRef.current) {
+      formRef.current.handleToggle()
+      formRef.current.handleToggle()
+    }
   }
 
   /* keep track of opened top-level tab */
@@ -205,7 +207,7 @@ export default function BookmarkManager({bookmarks, studyAccession, clearExplore
       enableSaveButton(true)
       setShowError(false)
       updateBookmarkList(bookmark)
-      resetForm()
+      setShowError(false)
       log(logEvent)
     } catch (error) {
       enableSaveButton(true)

diff --git a/app/javascript/components/search/results/ResultsPanel.jsx b/app/javascript/components/search/results/ResultsPanel.jsx
@@ -78,11 +78,7 @@ const FacetResultsFooter = ({ studySearchState }) => {
         <div className="">
           <p>Our advanced search is metadata-powered.
           By selecting filters, your search <b>targets only studies that use ontology terms</b> in their metadata file.
-          Currently, almost 25% of public studies supply that metadata.</p>
-          {/*
-            84 of 353 studies as of 2021-06-22,
-            per https://docs.google.com/spreadsheets/d/1FSpP2XTrG9FqAqD9X-BHxkCZae9vxZA3cQLow8mn-bk
-          */}
+          Many, but not all, public studies supply that metadata.</p>
           Learn more about our search capability on our{' '}
           <a className= "link-darker-blue" href="https://singlecell.zendesk.com/hc/en-us/articles/360061006431-Search-Studies"
             target="_blank" rel="noreferrer">documentation

diff --git a/app/lib/azul_search_service.rb b/app/lib/azul_search_service.rb
@@ -236,39 +236,29 @@ def self.is_analysis_file(file_summary)
   def self.get_file_summary_info(accessions)
     client = ApplicationController.hca_azul_client
     file_query = { 'project' => { 'is' => accessions } }
-    if client.query_too_large?(file_query)
-      # cut query in half to prevent HTTP 413
-      queries = accessions.each_slice((accessions.size / 2.0).round).map { |list| { 'project' => { 'is' => list } } }
-    else
-      queries = [{ 'project' => { 'is' => accessions } }]
-    end
-    all_results = []
-    Parallel.map(queries, in_threads: queries.size) do |query|
-      results = client.projects(query: query)
-      results['hits'].map do |entry|
-        entry_hash = entry.with_indifferent_access
-        project_hash = entry_hash[:projects].first # there will only ever be one project here
-        accession = project_hash[:projectShortname]
-        result = {
-          study_source: 'HCA',
-          name: project_hash[:projectTitle],
-          accession: accession,
-          description: project_hash[:projectDescription],
-          studyFiles: [
-            {
-              project_id: project_hash[:projectId],
-              file_type: 'Project Manifest',
-              count: 1,
-              upload_file_size: 1.megabyte, # placeholder filesize as we don't know until manifest is downloaded
-              name: "#{accession}.tsv"
-            }
-          ]
-        }.with_indifferent_access
-        project_file_info = extract_file_information(entry_hash)
-        result[:studyFiles] += project_file_info if project_file_info.any?
-        all_results << result
-      end
+    results = client.projects(query: file_query)
+    results['hits'].map do |entry|
+      entry_hash = entry.with_indifferent_access
+      project_hash = entry_hash[:projects].first # there will only ever be one project here
+      accession = project_hash[:projectShortname]
+      result = {
+        study_source: 'HCA',
+        name: project_hash[:projectTitle],
+        accession:,
+        description: project_hash[:projectDescription],
+        studyFiles: [
+          {
+            project_id: project_hash[:projectId],
+            file_type: 'Project Manifest',
+            count: 1,
+            upload_file_size: 1.megabyte, # placeholder filesize as we don't know until manifest is downloaded
+            name: "#{accession}.tsv"
+          }
+        ]
+      }.with_indifferent_access
+      project_file_info = extract_file_information(entry_hash)
+      result[:studyFiles] += project_file_info if project_file_info.any?
+      result
     end
-    all_results
   end
 end
diff --git a/app/models/hca_azul_client.rb b/app/models/hca_azul_client.rb
@@ -104,7 +104,7 @@ def process_api_request(http_method, path, payload: nil, retry_count: 0)
   # * *raises*
   #   - (RestClient::Exception) => if HTTP request fails for any reason
   def execute_http_request(http_method, path, payload = nil)
-    response = RestClient::Request.execute(method: http_method, url: path, payload: payload, headers: DEFAULT_HEADERS)
+    response = RestClient::Request.execute(method: http_method, url: path, payload:, headers: DEFAULT_HEADERS)
     # handle response using helper
     handle_response(response)
   end
@@ -163,7 +163,7 @@ def catalogs
   #
   # * *params*
   #   - +catalog+ (String) => HCA catalog name (optional)
-  #   - +query+ (Hash) => query object from :format_query_object
+  #   - +query+ (Hash) => query object
   #   - +size+ (Integer) => number of results to return (default is 200)
   #
   # * *returns*
@@ -172,18 +172,16 @@ def catalogs
   # * *raises*
   #   - (ArgumentError) => if catalog is not in self.all_catalogs
   def projects(catalog: nil, query: {}, size: MAX_RESULTS)
-    base_path = "#{api_root}/index/projects"
-    base_path += "?filters=#{format_hash_as_query_string(query)}"
-    base_path += "&size=#{size}"
+    base_path = "#{api_root}/index/projects?size=#{size}"
     path = append_catalog(base_path, catalog)
-    process_api_request(:get, path)
+    process_api_request(:post, path, payload: create_query_filters(query))
   end
 
   # simulate OR logic by splitting project queries on facet and joining results
   #
   # * *params*
   #   - +catalog+ (String) => HCA catalog name (optional)
-  #   - +query+ (Hash) => query object from :format_query_object
+  #   - +query+ (Hash) => query object
   #   - +size+ (Integer) => number of results to return (default is 200)
   #
   # * *returns*
@@ -196,7 +194,7 @@ def projects_by_facet(catalog: nil, query: {}, size: MAX_RESULTS)
     isolated_queries = query.each_pair.map { |facet, filters| { facet => filters } }
     Rails.logger.info "Splitting above query into #{isolated_queries.size} requests and joining results"
     Parallel.map(isolated_queries, in_threads: isolated_queries.size) do |project_query|
-      results = projects(catalog: catalog, query: project_query, size: size)
+      results = projects(catalog: catalog, query: project_query, size:)
       results['hits'].each do |result|
         project_id = result['projects'].first['projectId']
         unless all_results['project_ids'].include?(project_id)
@@ -242,21 +240,20 @@ def project_manifest_link(project_id, catalog: nil, format: 'compact')
 
     base_path = "#{api_root}/fetch/manifest/files"
     project_filter = { 'projectId' => { 'is' => [project_id] } }
-    filter_query = format_hash_as_query_string(project_filter)
-    base_path += "?filters=#{filter_query}&format=#{format}"
     path = append_catalog(base_path, catalog)
+    payload = create_query_filters(project_filter)
     # since manifest files are generated on-demand, keep making requests until the Status code is 302 (Found)
     # Status 301 means that the manifest is still being generated; if no manifest is ready after 30s, return anyway
     time_slept = 0
-    manifest_info = process_api_request(:get, path)
+    manifest_info = process_api_request(:put, path, payload:)
     while manifest_info['Status'] == 301
       break if time_slept >= MAX_MANIFEST_TIMEOUT
 
       interval = manifest_info['Retry-After']
       Rails.logger.info "Manifest still generating for #{project_id} (#{format}), retrying in #{interval}"
       sleep interval
       time_slept += interval
-      manifest_info = process_api_request(:get, path)
+      manifest_info = process_api_request(:put, path, payload:)
     end
     manifest_info
   end
@@ -271,12 +268,11 @@ def project_manifest_link(project_id, catalog: nil, format: 'compact')
   # * *returns*
   #   - (Hash) => List of files matching query
   def files(catalog: nil, query: {}, size: MAX_RESULTS)
-    base_path = "#{api_root}/index/files"
-    query_string = format_hash_as_query_string(query)
-    base_path += "?filters=#{query_string}&size=#{size}"
+    base_path = "#{api_root}/index/files?size=#{size}"
+    payload = create_query_filters(query)
     path = append_catalog(base_path, catalog)
     # make API request, but fold in project information to each result so that this is preserved for later use
-    raw_results = process_api_request(:get, path)['hits']
+    raw_results = process_api_request(:post, path, payload:)['hits']
     results = []
     raw_results.each do |result|
       files = result['files']
@@ -397,29 +393,17 @@ def merge_query_objects(*query_objects)
     merged_query
   end
 
-  # take a Hash/JSON object and format as a query string parameter
-  #
-  # * *params*
-  #   - +query_params+ (Hash) => Hash of query parameters
-  #
-  # * *returns*
-  #   - (String) => URL-encoded string version of query parameters
-  def format_hash_as_query_string(query_params)
-    # replace Ruby => assignment operators with JSON standard colons (:)
-    sanitized_params = query_params.to_s.gsub(/=>/, ':')
-    CGI.escape(sanitized_params)
-  end
-
-  # determine if a requested query is too long and would result in an HTTP 413 Payload Too Large exception
+  # create a query filter object to use in a request body
   #
   # * *params*
-  #   - +query_params+ (Hash) => Hash of query parameters
+  #   - +query+ (Hash) => Hash of query parameters
   #
   # * *returns*
-  #   - (Boolean) => T/F if query would be too large
-  def query_too_large?(query_params)
-    formatted_query = format_hash_as_query_string(query_params)
-    formatted_query.size >= MAX_QUERY_LENGTH
+  #   - (String) => JSON query object, with double-encoded 'filters' parameter
+  def create_query_filters(query)
+    {
+      filters: query.to_json
+    }.to_json
   end
 
   # append the HCA catalog name, if passed to a method

diff --git a/config/application.rb b/config/application.rb
@@ -29,7 +29,7 @@ class Application < Rails::Application
     config.middleware.use Rack::Brotli
 
     # Docker image for file parsing via scp-ingest-pipeline
-    config.ingest_docker_image = 'gcr.io/broad-singlecellportal-staging/scp-ingest-pipeline:1.29.0'
+    config.ingest_docker_image = 'gcr.io/broad-singlecellportal-staging/scp-ingest-pipeline:1.30.0'
 
     # Docker image for image pipeline jobs
     config.image_pipeline_docker_image = 'gcr.io/broad-singlecellportal-staging/image-pipeline:0.1.0_c2b090043'

diff --git a/package.json b/package.json
@@ -105,7 +105,7 @@
     "prop-types": "^15.7.2",
     "react-select-event": "^5.3.0",
     "stylelint-prettier": "^1.1.2",
-    "vite": "^2.9.17",
+    "vite": "^2.9.18",
     "vite-plugin-ruby": "^3.0.5"
   }
 }
diff --git a/test/integration/external/hca_azul_client_test.rb b/test/integration/external/hca_azul_client_test.rb
@@ -166,31 +166,21 @@ def get_entries_from_response(response, key)
   # TODO: SCP-5564
   #   - Fix "Global bulk download breaks in default use"
   #   - Re-enable this test
-  # test 'should get HCA metadata tsv link' do
-  #   skip_if_api_down
-  #   manifest_info = @hca_azul_client.project_manifest_link(@project_id)
-  #   assert manifest_info.present?
-  #   assert_equal 302, manifest_info['Status']
-  #   # make GET on manifest URL and assert contents are HCA metadata
-  #   manifest_response = RestClient.get manifest_info['Location']
-  #   assert_equal 200, manifest_response.code
-  #   raw_manifest = manifest_response.body.split("\r\n")
-  #   headers = raw_manifest.first.split("\t")
-  #   project_id_header = 'project.provenance.document_id'
-  #   assert headers.include? project_id_header
-  #   project_idx = headers.index(project_id_header)
-  #   data_row = raw_manifest.sample.split("\t")
-  #   assert_equal @project_id, data_row[project_idx]
-  # end
-
-  test 'should format object for query string' do
-    query_object = { 'foo' => { 'bar' => 'baz' } }
-    expected_response = '%7B%22foo%22%3A%7B%22bar%22%3A%22baz%22%7D%7D'
-    formatted_query = @hca_azul_client.format_hash_as_query_string(query_object)
-    assert_equal expected_response, formatted_query
-    # assert we can get back to original object, but as JSON
-    query_as_json = CGI.unescape(formatted_query)
-    assert_equal query_object.to_json, query_as_json
+  test 'should get HCA metadata tsv link' do
+    skip_if_api_down
+    manifest_info = @hca_azul_client.project_manifest_link(@project_id)
+    assert manifest_info.present?
+    assert_equal 302, manifest_info['Status']
+    # make GET on manifest URL and assert contents are HCA metadata
+    manifest_response = RestClient.get manifest_info['Location']
+    assert_equal 200, manifest_response.code
+    raw_manifest = manifest_response.body.split("\r\n")
+    headers = raw_manifest.first.split("\t")
+    project_id_header = 'project.provenance.document_id'
+    assert headers.include? project_id_header
+    project_idx = headers.index(project_id_header)
+    data_row = raw_manifest.sample.split("\t")
+    assert_equal @project_id, data_row[project_idx]
   end
 
   test 'should format query object from search facets' do
@@ -202,6 +192,14 @@ def get_entries_from_response(response, key)
     assert_equal expected_query, query
   end
 
+  test 'should create query filters object' do
+    project_id = SecureRandom.uuid
+    query = { 'projectId' => { 'is' => [project_id] } }
+    query_object = @hca_azul_client.create_query_filters(query)
+    expected_query = "{\"filters\":\"{\\\"projectId\\\":{\\\"is\\\":[\\\"#{project_id}\\\"]}}\"}"
+    assert_equal expected_query, query_object
+  end
+
   test 'should format numeric facet query' do
     age_facet = [
       {
@@ -306,13 +304,6 @@ def get_entries_from_response(response, key)
     end
   end
 
-  test 'should determine if query is too large' do
-    accession_list = 1.upto(500).map { |n| "FakeHCAProject#{n}" }
-    query = { project: { is: accession_list } }
-    assert @hca_azul_client.query_too_large?(query)
-    assert_not @hca_azul_client.query_too_large?({ project: { is: accession_list.take(10) } })
-  end
-
   test 'should not retry any error status code' do
     ApiHelpers::RETRY_STATUS_CODES.each do |code|
       assert_not @hca_azul_client.should_retry?(code)

diff --git a/test/services/azul_search_service_test.rb b/test/services/azul_search_service_test.rb
@@ -242,20 +242,4 @@ class AzulSearchServiceTest < ActiveSupport::TestCase
     expected_term_match = { total: 2, terms: { lung: 2 } }.with_indifferent_access
     assert_equal expected_term_match, AzulSearchService.get_search_term_weights(result, %w[lung])
   end
-
-  test 'should split large queries into two requests' do
-    accession_list = 1.upto(500).map { |n| "FakeHCAProject#{n}" }
-    query = { 'project' => { 'is' => accession_list } }
-    project_result = @human_tcell_response['hits'].first
-    mock_query_result = { 'hits' => Array.new(250, project_result) }
-    mock = Minitest::Mock.new
-    mock.expect :query_too_large?, true, [query]
-    mock.expect :projects, mock_query_result, [Hash]
-    mock.expect :projects, mock_query_result, [Hash]
-    ApplicationController.stub :hca_azul_client, mock do
-      results = AzulSearchService.get_file_summary_info(accession_list)
-      mock.verify
-      assert results.count == 500
-    end
-  end
 end
diff --git a/yarn.lock b/yarn.lock
@@ -9587,10 +9587,10 @@ vite-plugin-ruby@^3.0.5:
     debug "^4.3.4"
     fast-glob "^3.2.12"
 
-vite@^2.9.17:
-  version "2.9.17"
-  resolved "https://registry.yarnpkg.com/vite/-/vite-2.9.17.tgz#6b770525e12fa2a2e3a0fa0d028d304f4f7dc7d4"
-  integrity sha512-XxcRzra6d7xrKXH66jZUgb+srThoPu+TLJc06GifUyKq9JmjHkc1Numc8ra0h56rju2jfVWw3B3fs5l3OFMvUw==
+vite@^2.9.18:
+  version "2.9.18"
+  resolved "https://registry.yarnpkg.com/vite/-/vite-2.9.18.tgz#74e2a83b29da81e602dac4c293312cc575f091c7"
+  integrity sha512-sAOqI5wNM9QvSEE70W3UGMdT8cyEn0+PmJMTFvTB8wB0YbYUWw3gUbY62AOyrXosGieF2htmeLATvNxpv/zNyQ==
   dependencies:
     esbuild "^0.14.27"
     postcss "^8.4.13"