diff --git a/app/javascript/components/bookmarks/BookmarkManager.jsx b/app/javascript/components/bookmarks/BookmarkManager.jsx index 1f930ef116..87ad17fdf3 100644 --- a/app/javascript/components/bookmarks/BookmarkManager.jsx +++ b/app/javascript/components/bookmarks/BookmarkManager.jsx @@ -111,8 +111,10 @@ export default function BookmarkManager({bookmarks, studyAccession, clearExplore /** close and reopen bookmark form to fix positioning issues */ function reopenBookmarkForm() { - formRef.current.handleToggle() - formRef.current.handleToggle() + if (formRef && formRef.current) { + formRef.current.handleToggle() + formRef.current.handleToggle() + } } /* keep track of opened top-level tab */ @@ -205,7 +207,7 @@ export default function BookmarkManager({bookmarks, studyAccession, clearExplore enableSaveButton(true) setShowError(false) updateBookmarkList(bookmark) - resetForm() + setShowError(false) log(logEvent) } catch (error) { enableSaveButton(true) diff --git a/app/javascript/components/search/results/ResultsPanel.jsx b/app/javascript/components/search/results/ResultsPanel.jsx index ae6db986e2..60aaed7827 100644 --- a/app/javascript/components/search/results/ResultsPanel.jsx +++ b/app/javascript/components/search/results/ResultsPanel.jsx @@ -78,11 +78,7 @@ const FacetResultsFooter = ({ studySearchState }) => {

Our advanced search is metadata-powered. By selecting filters, your search targets only studies that use ontology terms in their metadata file. - Currently, almost 25% of public studies supply that metadata.

- {/* - 84 of 353 studies as of 2021-06-22, - per https://docs.google.com/spreadsheets/d/1FSpP2XTrG9FqAqD9X-BHxkCZae9vxZA3cQLow8mn-bk - */} + Many, but not all, public studies supply that metadata.

Learn more about our search capability on our{' '} documentation diff --git a/app/lib/azul_search_service.rb b/app/lib/azul_search_service.rb index 9aea17c482..09c8a2336e 100644 --- a/app/lib/azul_search_service.rb +++ b/app/lib/azul_search_service.rb @@ -236,39 +236,29 @@ def self.is_analysis_file(file_summary) def self.get_file_summary_info(accessions) client = ApplicationController.hca_azul_client file_query = { 'project' => { 'is' => accessions } } - if client.query_too_large?(file_query) - # cut query in half to prevent HTTP 413 - queries = accessions.each_slice((accessions.size / 2.0).round).map { |list| { 'project' => { 'is' => list } } } - else - queries = [{ 'project' => { 'is' => accessions } }] - end - all_results = [] - Parallel.map(queries, in_threads: queries.size) do |query| - results = client.projects(query: query) - results['hits'].map do |entry| - entry_hash = entry.with_indifferent_access - project_hash = entry_hash[:projects].first # there will only ever be one project here - accession = project_hash[:projectShortname] - result = { - study_source: 'HCA', - name: project_hash[:projectTitle], - accession: accession, - description: project_hash[:projectDescription], - studyFiles: [ - { - project_id: project_hash[:projectId], - file_type: 'Project Manifest', - count: 1, - upload_file_size: 1.megabyte, # placeholder filesize as we don't know until manifest is downloaded - name: "#{accession}.tsv" - } - ] - }.with_indifferent_access - project_file_info = extract_file_information(entry_hash) - result[:studyFiles] += project_file_info if project_file_info.any? - all_results << result - end + results = client.projects(query: file_query) + results['hits'].map do |entry| + entry_hash = entry.with_indifferent_access + project_hash = entry_hash[:projects].first # there will only ever be one project here + accession = project_hash[:projectShortname] + result = { + study_source: 'HCA', + name: project_hash[:projectTitle], + accession:, + description: project_hash[:projectDescription], + studyFiles: [ + { + project_id: project_hash[:projectId], + file_type: 'Project Manifest', + count: 1, + upload_file_size: 1.megabyte, # placeholder filesize as we don't know until manifest is downloaded + name: "#{accession}.tsv" + } + ] + }.with_indifferent_access + project_file_info = extract_file_information(entry_hash) + result[:studyFiles] += project_file_info if project_file_info.any? + result end - all_results end end diff --git a/app/models/hca_azul_client.rb b/app/models/hca_azul_client.rb index d92f53e10a..b7cd7f9368 100644 --- a/app/models/hca_azul_client.rb +++ b/app/models/hca_azul_client.rb @@ -104,7 +104,7 @@ def process_api_request(http_method, path, payload: nil, retry_count: 0) # * *raises* # - (RestClient::Exception) => if HTTP request fails for any reason def execute_http_request(http_method, path, payload = nil) - response = RestClient::Request.execute(method: http_method, url: path, payload: payload, headers: DEFAULT_HEADERS) + response = RestClient::Request.execute(method: http_method, url: path, payload:, headers: DEFAULT_HEADERS) # handle response using helper handle_response(response) end @@ -163,7 +163,7 @@ def catalogs # # * *params* # - +catalog+ (String) => HCA catalog name (optional) - # - +query+ (Hash) => query object from :format_query_object + # - +query+ (Hash) => query object # - +size+ (Integer) => number of results to return (default is 200) # # * *returns* @@ -172,18 +172,16 @@ def catalogs # * *raises* # - (ArgumentError) => if catalog is not in self.all_catalogs def projects(catalog: nil, query: {}, size: MAX_RESULTS) - base_path = "#{api_root}/index/projects" - base_path += "?filters=#{format_hash_as_query_string(query)}" - base_path += "&size=#{size}" + base_path = "#{api_root}/index/projects?size=#{size}" path = append_catalog(base_path, catalog) - process_api_request(:get, path) + process_api_request(:post, path, payload: create_query_filters(query)) end # simulate OR logic by splitting project queries on facet and joining results # # * *params* # - +catalog+ (String) => HCA catalog name (optional) - # - +query+ (Hash) => query object from :format_query_object + # - +query+ (Hash) => query object # - +size+ (Integer) => number of results to return (default is 200) # # * *returns* @@ -196,7 +194,7 @@ def projects_by_facet(catalog: nil, query: {}, size: MAX_RESULTS) isolated_queries = query.each_pair.map { |facet, filters| { facet => filters } } Rails.logger.info "Splitting above query into #{isolated_queries.size} requests and joining results" Parallel.map(isolated_queries, in_threads: isolated_queries.size) do |project_query| - results = projects(catalog: catalog, query: project_query, size: size) + results = projects(catalog: catalog, query: project_query, size:) results['hits'].each do |result| project_id = result['projects'].first['projectId'] unless all_results['project_ids'].include?(project_id) @@ -242,13 +240,12 @@ def project_manifest_link(project_id, catalog: nil, format: 'compact') base_path = "#{api_root}/fetch/manifest/files" project_filter = { 'projectId' => { 'is' => [project_id] } } - filter_query = format_hash_as_query_string(project_filter) - base_path += "?filters=#{filter_query}&format=#{format}" path = append_catalog(base_path, catalog) + payload = create_query_filters(project_filter) # since manifest files are generated on-demand, keep making requests until the Status code is 302 (Found) # Status 301 means that the manifest is still being generated; if no manifest is ready after 30s, return anyway time_slept = 0 - manifest_info = process_api_request(:get, path) + manifest_info = process_api_request(:put, path, payload:) while manifest_info['Status'] == 301 break if time_slept >= MAX_MANIFEST_TIMEOUT @@ -256,7 +253,7 @@ def project_manifest_link(project_id, catalog: nil, format: 'compact') Rails.logger.info "Manifest still generating for #{project_id} (#{format}), retrying in #{interval}" sleep interval time_slept += interval - manifest_info = process_api_request(:get, path) + manifest_info = process_api_request(:put, path, payload:) end manifest_info end @@ -271,12 +268,11 @@ def project_manifest_link(project_id, catalog: nil, format: 'compact') # * *returns* # - (Hash) => List of files matching query def files(catalog: nil, query: {}, size: MAX_RESULTS) - base_path = "#{api_root}/index/files" - query_string = format_hash_as_query_string(query) - base_path += "?filters=#{query_string}&size=#{size}" + base_path = "#{api_root}/index/files?size=#{size}" + payload = create_query_filters(query) path = append_catalog(base_path, catalog) # make API request, but fold in project information to each result so that this is preserved for later use - raw_results = process_api_request(:get, path)['hits'] + raw_results = process_api_request(:post, path, payload:)['hits'] results = [] raw_results.each do |result| files = result['files'] @@ -397,29 +393,17 @@ def merge_query_objects(*query_objects) merged_query end - # take a Hash/JSON object and format as a query string parameter - # - # * *params* - # - +query_params+ (Hash) => Hash of query parameters - # - # * *returns* - # - (String) => URL-encoded string version of query parameters - def format_hash_as_query_string(query_params) - # replace Ruby => assignment operators with JSON standard colons (:) - sanitized_params = query_params.to_s.gsub(/=>/, ':') - CGI.escape(sanitized_params) - end - - # determine if a requested query is too long and would result in an HTTP 413 Payload Too Large exception + # create a query filter object to use in a request body # # * *params* - # - +query_params+ (Hash) => Hash of query parameters + # - +query+ (Hash) => Hash of query parameters # # * *returns* - # - (Boolean) => T/F if query would be too large - def query_too_large?(query_params) - formatted_query = format_hash_as_query_string(query_params) - formatted_query.size >= MAX_QUERY_LENGTH + # - (String) => JSON query object, with double-encoded 'filters' parameter + def create_query_filters(query) + { + filters: query.to_json + }.to_json end # append the HCA catalog name, if passed to a method diff --git a/config/application.rb b/config/application.rb index f1963afd33..96285eb48e 100644 --- a/config/application.rb +++ b/config/application.rb @@ -29,7 +29,7 @@ class Application < Rails::Application config.middleware.use Rack::Brotli # Docker image for file parsing via scp-ingest-pipeline - config.ingest_docker_image = 'gcr.io/broad-singlecellportal-staging/scp-ingest-pipeline:1.29.0' + config.ingest_docker_image = 'gcr.io/broad-singlecellportal-staging/scp-ingest-pipeline:1.30.0' # Docker image for image pipeline jobs config.image_pipeline_docker_image = 'gcr.io/broad-singlecellportal-staging/image-pipeline:0.1.0_c2b090043' diff --git a/package.json b/package.json index 41ef15c9fc..8fce1eca10 100644 --- a/package.json +++ b/package.json @@ -105,7 +105,7 @@ "prop-types": "^15.7.2", "react-select-event": "^5.3.0", "stylelint-prettier": "^1.1.2", - "vite": "^2.9.17", + "vite": "^2.9.18", "vite-plugin-ruby": "^3.0.5" } } diff --git a/test/integration/external/hca_azul_client_test.rb b/test/integration/external/hca_azul_client_test.rb index be032d3e25..dcf6bd40bf 100644 --- a/test/integration/external/hca_azul_client_test.rb +++ b/test/integration/external/hca_azul_client_test.rb @@ -166,31 +166,21 @@ def get_entries_from_response(response, key) # TODO: SCP-5564 # - Fix "Global bulk download breaks in default use" # - Re-enable this test - # test 'should get HCA metadata tsv link' do - # skip_if_api_down - # manifest_info = @hca_azul_client.project_manifest_link(@project_id) - # assert manifest_info.present? - # assert_equal 302, manifest_info['Status'] - # # make GET on manifest URL and assert contents are HCA metadata - # manifest_response = RestClient.get manifest_info['Location'] - # assert_equal 200, manifest_response.code - # raw_manifest = manifest_response.body.split("\r\n") - # headers = raw_manifest.first.split("\t") - # project_id_header = 'project.provenance.document_id' - # assert headers.include? project_id_header - # project_idx = headers.index(project_id_header) - # data_row = raw_manifest.sample.split("\t") - # assert_equal @project_id, data_row[project_idx] - # end - - test 'should format object for query string' do - query_object = { 'foo' => { 'bar' => 'baz' } } - expected_response = '%7B%22foo%22%3A%7B%22bar%22%3A%22baz%22%7D%7D' - formatted_query = @hca_azul_client.format_hash_as_query_string(query_object) - assert_equal expected_response, formatted_query - # assert we can get back to original object, but as JSON - query_as_json = CGI.unescape(formatted_query) - assert_equal query_object.to_json, query_as_json + test 'should get HCA metadata tsv link' do + skip_if_api_down + manifest_info = @hca_azul_client.project_manifest_link(@project_id) + assert manifest_info.present? + assert_equal 302, manifest_info['Status'] + # make GET on manifest URL and assert contents are HCA metadata + manifest_response = RestClient.get manifest_info['Location'] + assert_equal 200, manifest_response.code + raw_manifest = manifest_response.body.split("\r\n") + headers = raw_manifest.first.split("\t") + project_id_header = 'project.provenance.document_id' + assert headers.include? project_id_header + project_idx = headers.index(project_id_header) + data_row = raw_manifest.sample.split("\t") + assert_equal @project_id, data_row[project_idx] end test 'should format query object from search facets' do @@ -202,6 +192,14 @@ def get_entries_from_response(response, key) assert_equal expected_query, query end + test 'should create query filters object' do + project_id = SecureRandom.uuid + query = { 'projectId' => { 'is' => [project_id] } } + query_object = @hca_azul_client.create_query_filters(query) + expected_query = "{\"filters\":\"{\\\"projectId\\\":{\\\"is\\\":[\\\"#{project_id}\\\"]}}\"}" + assert_equal expected_query, query_object + end + test 'should format numeric facet query' do age_facet = [ { @@ -306,13 +304,6 @@ def get_entries_from_response(response, key) end end - test 'should determine if query is too large' do - accession_list = 1.upto(500).map { |n| "FakeHCAProject#{n}" } - query = { project: { is: accession_list } } - assert @hca_azul_client.query_too_large?(query) - assert_not @hca_azul_client.query_too_large?({ project: { is: accession_list.take(10) } }) - end - test 'should not retry any error status code' do ApiHelpers::RETRY_STATUS_CODES.each do |code| assert_not @hca_azul_client.should_retry?(code) diff --git a/test/services/azul_search_service_test.rb b/test/services/azul_search_service_test.rb index 2f17dbf377..34cbc7128c 100644 --- a/test/services/azul_search_service_test.rb +++ b/test/services/azul_search_service_test.rb @@ -242,20 +242,4 @@ class AzulSearchServiceTest < ActiveSupport::TestCase expected_term_match = { total: 2, terms: { lung: 2 } }.with_indifferent_access assert_equal expected_term_match, AzulSearchService.get_search_term_weights(result, %w[lung]) end - - test 'should split large queries into two requests' do - accession_list = 1.upto(500).map { |n| "FakeHCAProject#{n}" } - query = { 'project' => { 'is' => accession_list } } - project_result = @human_tcell_response['hits'].first - mock_query_result = { 'hits' => Array.new(250, project_result) } - mock = Minitest::Mock.new - mock.expect :query_too_large?, true, [query] - mock.expect :projects, mock_query_result, [Hash] - mock.expect :projects, mock_query_result, [Hash] - ApplicationController.stub :hca_azul_client, mock do - results = AzulSearchService.get_file_summary_info(accession_list) - mock.verify - assert results.count == 500 - end - end end diff --git a/yarn.lock b/yarn.lock index c04b0b2f30..ce65eb885c 100644 --- a/yarn.lock +++ b/yarn.lock @@ -9587,10 +9587,10 @@ vite-plugin-ruby@^3.0.5: debug "^4.3.4" fast-glob "^3.2.12" -vite@^2.9.17: - version "2.9.17" - resolved "https://registry.yarnpkg.com/vite/-/vite-2.9.17.tgz#6b770525e12fa2a2e3a0fa0d028d304f4f7dc7d4" - integrity sha512-XxcRzra6d7xrKXH66jZUgb+srThoPu+TLJc06GifUyKq9JmjHkc1Numc8ra0h56rju2jfVWw3B3fs5l3OFMvUw== +vite@^2.9.18: + version "2.9.18" + resolved "https://registry.yarnpkg.com/vite/-/vite-2.9.18.tgz#74e2a83b29da81e602dac4c293312cc575f091c7" + integrity sha512-sAOqI5wNM9QvSEE70W3UGMdT8cyEn0+PmJMTFvTB8wB0YbYUWw3gUbY62AOyrXosGieF2htmeLATvNxpv/zNyQ== dependencies: esbuild "^0.14.27" postcss "^8.4.13"