diff --git a/app/javascript/components/bookmarks/BookmarkManager.jsx b/app/javascript/components/bookmarks/BookmarkManager.jsx
index 1f930ef116..87ad17fdf3 100644
--- a/app/javascript/components/bookmarks/BookmarkManager.jsx
+++ b/app/javascript/components/bookmarks/BookmarkManager.jsx
@@ -111,8 +111,10 @@ export default function BookmarkManager({bookmarks, studyAccession, clearExplore
/** close and reopen bookmark form to fix positioning issues */
function reopenBookmarkForm() {
- formRef.current.handleToggle()
- formRef.current.handleToggle()
+ if (formRef && formRef.current) {
+ formRef.current.handleToggle()
+ formRef.current.handleToggle()
+ }
}
/* keep track of opened top-level tab */
@@ -205,7 +207,7 @@ export default function BookmarkManager({bookmarks, studyAccession, clearExplore
enableSaveButton(true)
setShowError(false)
updateBookmarkList(bookmark)
- resetForm()
+ setShowError(false)
log(logEvent)
} catch (error) {
enableSaveButton(true)
diff --git a/app/javascript/components/search/results/ResultsPanel.jsx b/app/javascript/components/search/results/ResultsPanel.jsx
index ae6db986e2..60aaed7827 100644
--- a/app/javascript/components/search/results/ResultsPanel.jsx
+++ b/app/javascript/components/search/results/ResultsPanel.jsx
@@ -78,11 +78,7 @@ const FacetResultsFooter = ({ studySearchState }) => {
Our advanced search is metadata-powered.
By selecting filters, your search targets only studies that use ontology terms in their metadata file.
- Currently, almost 25% of public studies supply that metadata.
- {/*
- 84 of 353 studies as of 2021-06-22,
- per https://docs.google.com/spreadsheets/d/1FSpP2XTrG9FqAqD9X-BHxkCZae9vxZA3cQLow8mn-bk
- */}
+ Many, but not all, public studies supply that metadata.
Learn more about our search capability on our{' '}
documentation
diff --git a/app/lib/azul_search_service.rb b/app/lib/azul_search_service.rb
index 9aea17c482..09c8a2336e 100644
--- a/app/lib/azul_search_service.rb
+++ b/app/lib/azul_search_service.rb
@@ -236,39 +236,29 @@ def self.is_analysis_file(file_summary)
def self.get_file_summary_info(accessions)
client = ApplicationController.hca_azul_client
file_query = { 'project' => { 'is' => accessions } }
- if client.query_too_large?(file_query)
- # cut query in half to prevent HTTP 413
- queries = accessions.each_slice((accessions.size / 2.0).round).map { |list| { 'project' => { 'is' => list } } }
- else
- queries = [{ 'project' => { 'is' => accessions } }]
- end
- all_results = []
- Parallel.map(queries, in_threads: queries.size) do |query|
- results = client.projects(query: query)
- results['hits'].map do |entry|
- entry_hash = entry.with_indifferent_access
- project_hash = entry_hash[:projects].first # there will only ever be one project here
- accession = project_hash[:projectShortname]
- result = {
- study_source: 'HCA',
- name: project_hash[:projectTitle],
- accession: accession,
- description: project_hash[:projectDescription],
- studyFiles: [
- {
- project_id: project_hash[:projectId],
- file_type: 'Project Manifest',
- count: 1,
- upload_file_size: 1.megabyte, # placeholder filesize as we don't know until manifest is downloaded
- name: "#{accession}.tsv"
- }
- ]
- }.with_indifferent_access
- project_file_info = extract_file_information(entry_hash)
- result[:studyFiles] += project_file_info if project_file_info.any?
- all_results << result
- end
+ results = client.projects(query: file_query)
+ results['hits'].map do |entry|
+ entry_hash = entry.with_indifferent_access
+ project_hash = entry_hash[:projects].first # there will only ever be one project here
+ accession = project_hash[:projectShortname]
+ result = {
+ study_source: 'HCA',
+ name: project_hash[:projectTitle],
+ accession:,
+ description: project_hash[:projectDescription],
+ studyFiles: [
+ {
+ project_id: project_hash[:projectId],
+ file_type: 'Project Manifest',
+ count: 1,
+ upload_file_size: 1.megabyte, # placeholder filesize as we don't know until manifest is downloaded
+ name: "#{accession}.tsv"
+ }
+ ]
+ }.with_indifferent_access
+ project_file_info = extract_file_information(entry_hash)
+ result[:studyFiles] += project_file_info if project_file_info.any?
+ result
end
- all_results
end
end
diff --git a/app/models/hca_azul_client.rb b/app/models/hca_azul_client.rb
index d92f53e10a..b7cd7f9368 100644
--- a/app/models/hca_azul_client.rb
+++ b/app/models/hca_azul_client.rb
@@ -104,7 +104,7 @@ def process_api_request(http_method, path, payload: nil, retry_count: 0)
# * *raises*
# - (RestClient::Exception) => if HTTP request fails for any reason
def execute_http_request(http_method, path, payload = nil)
- response = RestClient::Request.execute(method: http_method, url: path, payload: payload, headers: DEFAULT_HEADERS)
+ response = RestClient::Request.execute(method: http_method, url: path, payload:, headers: DEFAULT_HEADERS)
# handle response using helper
handle_response(response)
end
@@ -163,7 +163,7 @@ def catalogs
#
# * *params*
# - +catalog+ (String) => HCA catalog name (optional)
- # - +query+ (Hash) => query object from :format_query_object
+ # - +query+ (Hash) => query object
# - +size+ (Integer) => number of results to return (default is 200)
#
# * *returns*
@@ -172,18 +172,16 @@ def catalogs
# * *raises*
# - (ArgumentError) => if catalog is not in self.all_catalogs
def projects(catalog: nil, query: {}, size: MAX_RESULTS)
- base_path = "#{api_root}/index/projects"
- base_path += "?filters=#{format_hash_as_query_string(query)}"
- base_path += "&size=#{size}"
+ base_path = "#{api_root}/index/projects?size=#{size}"
path = append_catalog(base_path, catalog)
- process_api_request(:get, path)
+ process_api_request(:post, path, payload: create_query_filters(query))
end
# simulate OR logic by splitting project queries on facet and joining results
#
# * *params*
# - +catalog+ (String) => HCA catalog name (optional)
- # - +query+ (Hash) => query object from :format_query_object
+ # - +query+ (Hash) => query object
# - +size+ (Integer) => number of results to return (default is 200)
#
# * *returns*
@@ -196,7 +194,7 @@ def projects_by_facet(catalog: nil, query: {}, size: MAX_RESULTS)
isolated_queries = query.each_pair.map { |facet, filters| { facet => filters } }
Rails.logger.info "Splitting above query into #{isolated_queries.size} requests and joining results"
Parallel.map(isolated_queries, in_threads: isolated_queries.size) do |project_query|
- results = projects(catalog: catalog, query: project_query, size: size)
+ results = projects(catalog: catalog, query: project_query, size:)
results['hits'].each do |result|
project_id = result['projects'].first['projectId']
unless all_results['project_ids'].include?(project_id)
@@ -242,13 +240,12 @@ def project_manifest_link(project_id, catalog: nil, format: 'compact')
base_path = "#{api_root}/fetch/manifest/files"
project_filter = { 'projectId' => { 'is' => [project_id] } }
- filter_query = format_hash_as_query_string(project_filter)
- base_path += "?filters=#{filter_query}&format=#{format}"
path = append_catalog(base_path, catalog)
+ payload = create_query_filters(project_filter)
# since manifest files are generated on-demand, keep making requests until the Status code is 302 (Found)
# Status 301 means that the manifest is still being generated; if no manifest is ready after 30s, return anyway
time_slept = 0
- manifest_info = process_api_request(:get, path)
+ manifest_info = process_api_request(:put, path, payload:)
while manifest_info['Status'] == 301
break if time_slept >= MAX_MANIFEST_TIMEOUT
@@ -256,7 +253,7 @@ def project_manifest_link(project_id, catalog: nil, format: 'compact')
Rails.logger.info "Manifest still generating for #{project_id} (#{format}), retrying in #{interval}"
sleep interval
time_slept += interval
- manifest_info = process_api_request(:get, path)
+ manifest_info = process_api_request(:put, path, payload:)
end
manifest_info
end
@@ -271,12 +268,11 @@ def project_manifest_link(project_id, catalog: nil, format: 'compact')
# * *returns*
# - (Hash) => List of files matching query
def files(catalog: nil, query: {}, size: MAX_RESULTS)
- base_path = "#{api_root}/index/files"
- query_string = format_hash_as_query_string(query)
- base_path += "?filters=#{query_string}&size=#{size}"
+ base_path = "#{api_root}/index/files?size=#{size}"
+ payload = create_query_filters(query)
path = append_catalog(base_path, catalog)
# make API request, but fold in project information to each result so that this is preserved for later use
- raw_results = process_api_request(:get, path)['hits']
+ raw_results = process_api_request(:post, path, payload:)['hits']
results = []
raw_results.each do |result|
files = result['files']
@@ -397,29 +393,17 @@ def merge_query_objects(*query_objects)
merged_query
end
- # take a Hash/JSON object and format as a query string parameter
- #
- # * *params*
- # - +query_params+ (Hash) => Hash of query parameters
- #
- # * *returns*
- # - (String) => URL-encoded string version of query parameters
- def format_hash_as_query_string(query_params)
- # replace Ruby => assignment operators with JSON standard colons (:)
- sanitized_params = query_params.to_s.gsub(/=>/, ':')
- CGI.escape(sanitized_params)
- end
-
- # determine if a requested query is too long and would result in an HTTP 413 Payload Too Large exception
+ # create a query filter object to use in a request body
#
# * *params*
- # - +query_params+ (Hash) => Hash of query parameters
+ # - +query+ (Hash) => Hash of query parameters
#
# * *returns*
- # - (Boolean) => T/F if query would be too large
- def query_too_large?(query_params)
- formatted_query = format_hash_as_query_string(query_params)
- formatted_query.size >= MAX_QUERY_LENGTH
+ # - (String) => JSON query object, with double-encoded 'filters' parameter
+ def create_query_filters(query)
+ {
+ filters: query.to_json
+ }.to_json
end
# append the HCA catalog name, if passed to a method
diff --git a/config/application.rb b/config/application.rb
index f1963afd33..96285eb48e 100644
--- a/config/application.rb
+++ b/config/application.rb
@@ -29,7 +29,7 @@ class Application < Rails::Application
config.middleware.use Rack::Brotli
# Docker image for file parsing via scp-ingest-pipeline
- config.ingest_docker_image = 'gcr.io/broad-singlecellportal-staging/scp-ingest-pipeline:1.29.0'
+ config.ingest_docker_image = 'gcr.io/broad-singlecellportal-staging/scp-ingest-pipeline:1.30.0'
# Docker image for image pipeline jobs
config.image_pipeline_docker_image = 'gcr.io/broad-singlecellportal-staging/image-pipeline:0.1.0_c2b090043'
diff --git a/package.json b/package.json
index 41ef15c9fc..8fce1eca10 100644
--- a/package.json
+++ b/package.json
@@ -105,7 +105,7 @@
"prop-types": "^15.7.2",
"react-select-event": "^5.3.0",
"stylelint-prettier": "^1.1.2",
- "vite": "^2.9.17",
+ "vite": "^2.9.18",
"vite-plugin-ruby": "^3.0.5"
}
}
diff --git a/test/integration/external/hca_azul_client_test.rb b/test/integration/external/hca_azul_client_test.rb
index be032d3e25..dcf6bd40bf 100644
--- a/test/integration/external/hca_azul_client_test.rb
+++ b/test/integration/external/hca_azul_client_test.rb
@@ -166,31 +166,21 @@ def get_entries_from_response(response, key)
# TODO: SCP-5564
# - Fix "Global bulk download breaks in default use"
# - Re-enable this test
- # test 'should get HCA metadata tsv link' do
- # skip_if_api_down
- # manifest_info = @hca_azul_client.project_manifest_link(@project_id)
- # assert manifest_info.present?
- # assert_equal 302, manifest_info['Status']
- # # make GET on manifest URL and assert contents are HCA metadata
- # manifest_response = RestClient.get manifest_info['Location']
- # assert_equal 200, manifest_response.code
- # raw_manifest = manifest_response.body.split("\r\n")
- # headers = raw_manifest.first.split("\t")
- # project_id_header = 'project.provenance.document_id'
- # assert headers.include? project_id_header
- # project_idx = headers.index(project_id_header)
- # data_row = raw_manifest.sample.split("\t")
- # assert_equal @project_id, data_row[project_idx]
- # end
-
- test 'should format object for query string' do
- query_object = { 'foo' => { 'bar' => 'baz' } }
- expected_response = '%7B%22foo%22%3A%7B%22bar%22%3A%22baz%22%7D%7D'
- formatted_query = @hca_azul_client.format_hash_as_query_string(query_object)
- assert_equal expected_response, formatted_query
- # assert we can get back to original object, but as JSON
- query_as_json = CGI.unescape(formatted_query)
- assert_equal query_object.to_json, query_as_json
+ test 'should get HCA metadata tsv link' do
+ skip_if_api_down
+ manifest_info = @hca_azul_client.project_manifest_link(@project_id)
+ assert manifest_info.present?
+ assert_equal 302, manifest_info['Status']
+ # make GET on manifest URL and assert contents are HCA metadata
+ manifest_response = RestClient.get manifest_info['Location']
+ assert_equal 200, manifest_response.code
+ raw_manifest = manifest_response.body.split("\r\n")
+ headers = raw_manifest.first.split("\t")
+ project_id_header = 'project.provenance.document_id'
+ assert headers.include? project_id_header
+ project_idx = headers.index(project_id_header)
+ data_row = raw_manifest.sample.split("\t")
+ assert_equal @project_id, data_row[project_idx]
end
test 'should format query object from search facets' do
@@ -202,6 +192,14 @@ def get_entries_from_response(response, key)
assert_equal expected_query, query
end
+ test 'should create query filters object' do
+ project_id = SecureRandom.uuid
+ query = { 'projectId' => { 'is' => [project_id] } }
+ query_object = @hca_azul_client.create_query_filters(query)
+ expected_query = "{\"filters\":\"{\\\"projectId\\\":{\\\"is\\\":[\\\"#{project_id}\\\"]}}\"}"
+ assert_equal expected_query, query_object
+ end
+
test 'should format numeric facet query' do
age_facet = [
{
@@ -306,13 +304,6 @@ def get_entries_from_response(response, key)
end
end
- test 'should determine if query is too large' do
- accession_list = 1.upto(500).map { |n| "FakeHCAProject#{n}" }
- query = { project: { is: accession_list } }
- assert @hca_azul_client.query_too_large?(query)
- assert_not @hca_azul_client.query_too_large?({ project: { is: accession_list.take(10) } })
- end
-
test 'should not retry any error status code' do
ApiHelpers::RETRY_STATUS_CODES.each do |code|
assert_not @hca_azul_client.should_retry?(code)
diff --git a/test/services/azul_search_service_test.rb b/test/services/azul_search_service_test.rb
index 2f17dbf377..34cbc7128c 100644
--- a/test/services/azul_search_service_test.rb
+++ b/test/services/azul_search_service_test.rb
@@ -242,20 +242,4 @@ class AzulSearchServiceTest < ActiveSupport::TestCase
expected_term_match = { total: 2, terms: { lung: 2 } }.with_indifferent_access
assert_equal expected_term_match, AzulSearchService.get_search_term_weights(result, %w[lung])
end
-
- test 'should split large queries into two requests' do
- accession_list = 1.upto(500).map { |n| "FakeHCAProject#{n}" }
- query = { 'project' => { 'is' => accession_list } }
- project_result = @human_tcell_response['hits'].first
- mock_query_result = { 'hits' => Array.new(250, project_result) }
- mock = Minitest::Mock.new
- mock.expect :query_too_large?, true, [query]
- mock.expect :projects, mock_query_result, [Hash]
- mock.expect :projects, mock_query_result, [Hash]
- ApplicationController.stub :hca_azul_client, mock do
- results = AzulSearchService.get_file_summary_info(accession_list)
- mock.verify
- assert results.count == 500
- end
- end
end
diff --git a/yarn.lock b/yarn.lock
index c04b0b2f30..ce65eb885c 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -9587,10 +9587,10 @@ vite-plugin-ruby@^3.0.5:
debug "^4.3.4"
fast-glob "^3.2.12"
-vite@^2.9.17:
- version "2.9.17"
- resolved "https://registry.yarnpkg.com/vite/-/vite-2.9.17.tgz#6b770525e12fa2a2e3a0fa0d028d304f4f7dc7d4"
- integrity sha512-XxcRzra6d7xrKXH66jZUgb+srThoPu+TLJc06GifUyKq9JmjHkc1Numc8ra0h56rju2jfVWw3B3fs5l3OFMvUw==
+vite@^2.9.18:
+ version "2.9.18"
+ resolved "https://registry.yarnpkg.com/vite/-/vite-2.9.18.tgz#74e2a83b29da81e602dac4c293312cc575f091c7"
+ integrity sha512-sAOqI5wNM9QvSEE70W3UGMdT8cyEn0+PmJMTFvTB8wB0YbYUWw3gUbY62AOyrXosGieF2htmeLATvNxpv/zNyQ==
dependencies:
esbuild "^0.14.27"
postcss "^8.4.13"