Skip to content

Commit

Permalink
Merge pull request #2007 from broadinstitute/development
Browse files Browse the repository at this point in the history
Release 1.69.0
  • Loading branch information
bistline authored Apr 8, 2024
2 parents ed57599 + 7e8fa00 commit ff3ddb4
Show file tree
Hide file tree
Showing 9 changed files with 77 additions and 130 deletions.
8 changes: 5 additions & 3 deletions app/javascript/components/bookmarks/BookmarkManager.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,10 @@ export default function BookmarkManager({bookmarks, studyAccession, clearExplore

/** close and reopen bookmark form to fix positioning issues */
function reopenBookmarkForm() {
formRef.current.handleToggle()
formRef.current.handleToggle()
if (formRef && formRef.current) {
formRef.current.handleToggle()
formRef.current.handleToggle()
}
}

/* keep track of opened top-level tab */
Expand Down Expand Up @@ -205,7 +207,7 @@ export default function BookmarkManager({bookmarks, studyAccession, clearExplore
enableSaveButton(true)
setShowError(false)
updateBookmarkList(bookmark)
resetForm()
setShowError(false)
log(logEvent)
} catch (error) {
enableSaveButton(true)
Expand Down
6 changes: 1 addition & 5 deletions app/javascript/components/search/results/ResultsPanel.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,7 @@ const FacetResultsFooter = ({ studySearchState }) => {
<div className="">
<p>Our advanced search is metadata-powered.
By selecting filters, your search <b>targets only studies that use ontology terms</b> in their metadata file.
Currently, almost 25% of public studies supply that metadata.</p>
{/*
84 of 353 studies as of 2021-06-22,
per https://docs.google.com/spreadsheets/d/1FSpP2XTrG9FqAqD9X-BHxkCZae9vxZA3cQLow8mn-bk
*/}
Many, but not all, public studies supply that metadata.</p>
Learn more about our search capability on our{' '}
<a className= "link-darker-blue" href="https://singlecell.zendesk.com/hc/en-us/articles/360061006431-Search-Studies"
target="_blank" rel="noreferrer">documentation
Expand Down
56 changes: 23 additions & 33 deletions app/lib/azul_search_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -236,39 +236,29 @@ def self.is_analysis_file(file_summary)
def self.get_file_summary_info(accessions)
client = ApplicationController.hca_azul_client
file_query = { 'project' => { 'is' => accessions } }
if client.query_too_large?(file_query)
# cut query in half to prevent HTTP 413
queries = accessions.each_slice((accessions.size / 2.0).round).map { |list| { 'project' => { 'is' => list } } }
else
queries = [{ 'project' => { 'is' => accessions } }]
end
all_results = []
Parallel.map(queries, in_threads: queries.size) do |query|
results = client.projects(query: query)
results['hits'].map do |entry|
entry_hash = entry.with_indifferent_access
project_hash = entry_hash[:projects].first # there will only ever be one project here
accession = project_hash[:projectShortname]
result = {
study_source: 'HCA',
name: project_hash[:projectTitle],
accession: accession,
description: project_hash[:projectDescription],
studyFiles: [
{
project_id: project_hash[:projectId],
file_type: 'Project Manifest',
count: 1,
upload_file_size: 1.megabyte, # placeholder filesize as we don't know until manifest is downloaded
name: "#{accession}.tsv"
}
]
}.with_indifferent_access
project_file_info = extract_file_information(entry_hash)
result[:studyFiles] += project_file_info if project_file_info.any?
all_results << result
end
results = client.projects(query: file_query)
results['hits'].map do |entry|
entry_hash = entry.with_indifferent_access
project_hash = entry_hash[:projects].first # there will only ever be one project here
accession = project_hash[:projectShortname]
result = {
study_source: 'HCA',
name: project_hash[:projectTitle],
accession:,
description: project_hash[:projectDescription],
studyFiles: [
{
project_id: project_hash[:projectId],
file_type: 'Project Manifest',
count: 1,
upload_file_size: 1.megabyte, # placeholder filesize as we don't know until manifest is downloaded
name: "#{accession}.tsv"
}
]
}.with_indifferent_access
project_file_info = extract_file_information(entry_hash)
result[:studyFiles] += project_file_info if project_file_info.any?
result
end
all_results
end
end
54 changes: 19 additions & 35 deletions app/models/hca_azul_client.rb
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def process_api_request(http_method, path, payload: nil, retry_count: 0)
# * *raises*
# - (RestClient::Exception) => if HTTP request fails for any reason
def execute_http_request(http_method, path, payload = nil)
response = RestClient::Request.execute(method: http_method, url: path, payload: payload, headers: DEFAULT_HEADERS)
response = RestClient::Request.execute(method: http_method, url: path, payload:, headers: DEFAULT_HEADERS)
# handle response using helper
handle_response(response)
end
Expand Down Expand Up @@ -163,7 +163,7 @@ def catalogs
#
# * *params*
# - +catalog+ (String) => HCA catalog name (optional)
# - +query+ (Hash) => query object from :format_query_object
# - +query+ (Hash) => query object
# - +size+ (Integer) => number of results to return (default is 200)
#
# * *returns*
Expand All @@ -172,18 +172,16 @@ def catalogs
# * *raises*
# - (ArgumentError) => if catalog is not in self.all_catalogs
def projects(catalog: nil, query: {}, size: MAX_RESULTS)
base_path = "#{api_root}/index/projects"
base_path += "?filters=#{format_hash_as_query_string(query)}"
base_path += "&size=#{size}"
base_path = "#{api_root}/index/projects?size=#{size}"
path = append_catalog(base_path, catalog)
process_api_request(:get, path)
process_api_request(:post, path, payload: create_query_filters(query))
end

# simulate OR logic by splitting project queries on facet and joining results
#
# * *params*
# - +catalog+ (String) => HCA catalog name (optional)
# - +query+ (Hash) => query object from :format_query_object
# - +query+ (Hash) => query object
# - +size+ (Integer) => number of results to return (default is 200)
#
# * *returns*
Expand All @@ -196,7 +194,7 @@ def projects_by_facet(catalog: nil, query: {}, size: MAX_RESULTS)
isolated_queries = query.each_pair.map { |facet, filters| { facet => filters } }
Rails.logger.info "Splitting above query into #{isolated_queries.size} requests and joining results"
Parallel.map(isolated_queries, in_threads: isolated_queries.size) do |project_query|
results = projects(catalog: catalog, query: project_query, size: size)
results = projects(catalog: catalog, query: project_query, size:)
results['hits'].each do |result|
project_id = result['projects'].first['projectId']
unless all_results['project_ids'].include?(project_id)
Expand Down Expand Up @@ -242,21 +240,20 @@ def project_manifest_link(project_id, catalog: nil, format: 'compact')

base_path = "#{api_root}/fetch/manifest/files"
project_filter = { 'projectId' => { 'is' => [project_id] } }
filter_query = format_hash_as_query_string(project_filter)
base_path += "?filters=#{filter_query}&format=#{format}"
path = append_catalog(base_path, catalog)
payload = create_query_filters(project_filter)
# since manifest files are generated on-demand, keep making requests until the Status code is 302 (Found)
# Status 301 means that the manifest is still being generated; if no manifest is ready after 30s, return anyway
time_slept = 0
manifest_info = process_api_request(:get, path)
manifest_info = process_api_request(:put, path, payload:)
while manifest_info['Status'] == 301
break if time_slept >= MAX_MANIFEST_TIMEOUT

interval = manifest_info['Retry-After']
Rails.logger.info "Manifest still generating for #{project_id} (#{format}), retrying in #{interval}"
sleep interval
time_slept += interval
manifest_info = process_api_request(:get, path)
manifest_info = process_api_request(:put, path, payload:)
end
manifest_info
end
Expand All @@ -271,12 +268,11 @@ def project_manifest_link(project_id, catalog: nil, format: 'compact')
# * *returns*
# - (Hash) => List of files matching query
def files(catalog: nil, query: {}, size: MAX_RESULTS)
base_path = "#{api_root}/index/files"
query_string = format_hash_as_query_string(query)
base_path += "?filters=#{query_string}&size=#{size}"
base_path = "#{api_root}/index/files?size=#{size}"
payload = create_query_filters(query)
path = append_catalog(base_path, catalog)
# make API request, but fold in project information to each result so that this is preserved for later use
raw_results = process_api_request(:get, path)['hits']
raw_results = process_api_request(:post, path, payload:)['hits']
results = []
raw_results.each do |result|
files = result['files']
Expand Down Expand Up @@ -397,29 +393,17 @@ def merge_query_objects(*query_objects)
merged_query
end

# take a Hash/JSON object and format as a query string parameter
#
# * *params*
# - +query_params+ (Hash) => Hash of query parameters
#
# * *returns*
# - (String) => URL-encoded string version of query parameters
def format_hash_as_query_string(query_params)
# replace Ruby => assignment operators with JSON standard colons (:)
sanitized_params = query_params.to_s.gsub(/=>/, ':')
CGI.escape(sanitized_params)
end

# determine if a requested query is too long and would result in an HTTP 413 Payload Too Large exception
# create a query filter object to use in a request body
#
# * *params*
# - +query_params+ (Hash) => Hash of query parameters
# - +query+ (Hash) => Hash of query parameters
#
# * *returns*
# - (Boolean) => T/F if query would be too large
def query_too_large?(query_params)
formatted_query = format_hash_as_query_string(query_params)
formatted_query.size >= MAX_QUERY_LENGTH
# - (String) => JSON query object, with double-encoded 'filters' parameter
def create_query_filters(query)
{
filters: query.to_json
}.to_json
end

# append the HCA catalog name, if passed to a method
Expand Down
2 changes: 1 addition & 1 deletion config/application.rb
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class Application < Rails::Application
config.middleware.use Rack::Brotli

# Docker image for file parsing via scp-ingest-pipeline
config.ingest_docker_image = 'gcr.io/broad-singlecellportal-staging/scp-ingest-pipeline:1.29.0'
config.ingest_docker_image = 'gcr.io/broad-singlecellportal-staging/scp-ingest-pipeline:1.30.0'

# Docker image for image pipeline jobs
config.image_pipeline_docker_image = 'gcr.io/broad-singlecellportal-staging/image-pipeline:0.1.0_c2b090043'
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@
"prop-types": "^15.7.2",
"react-select-event": "^5.3.0",
"stylelint-prettier": "^1.1.2",
"vite": "^2.9.17",
"vite": "^2.9.18",
"vite-plugin-ruby": "^3.0.5"
}
}
55 changes: 23 additions & 32 deletions test/integration/external/hca_azul_client_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -166,31 +166,21 @@ def get_entries_from_response(response, key)
# TODO: SCP-5564
# - Fix "Global bulk download breaks in default use"
# - Re-enable this test
# test 'should get HCA metadata tsv link' do
# skip_if_api_down
# manifest_info = @hca_azul_client.project_manifest_link(@project_id)
# assert manifest_info.present?
# assert_equal 302, manifest_info['Status']
# # make GET on manifest URL and assert contents are HCA metadata
# manifest_response = RestClient.get manifest_info['Location']
# assert_equal 200, manifest_response.code
# raw_manifest = manifest_response.body.split("\r\n")
# headers = raw_manifest.first.split("\t")
# project_id_header = 'project.provenance.document_id'
# assert headers.include? project_id_header
# project_idx = headers.index(project_id_header)
# data_row = raw_manifest.sample.split("\t")
# assert_equal @project_id, data_row[project_idx]
# end

test 'should format object for query string' do
query_object = { 'foo' => { 'bar' => 'baz' } }
expected_response = '%7B%22foo%22%3A%7B%22bar%22%3A%22baz%22%7D%7D'
formatted_query = @hca_azul_client.format_hash_as_query_string(query_object)
assert_equal expected_response, formatted_query
# assert we can get back to original object, but as JSON
query_as_json = CGI.unescape(formatted_query)
assert_equal query_object.to_json, query_as_json
test 'should get HCA metadata tsv link' do
skip_if_api_down
manifest_info = @hca_azul_client.project_manifest_link(@project_id)
assert manifest_info.present?
assert_equal 302, manifest_info['Status']
# make GET on manifest URL and assert contents are HCA metadata
manifest_response = RestClient.get manifest_info['Location']
assert_equal 200, manifest_response.code
raw_manifest = manifest_response.body.split("\r\n")
headers = raw_manifest.first.split("\t")
project_id_header = 'project.provenance.document_id'
assert headers.include? project_id_header
project_idx = headers.index(project_id_header)
data_row = raw_manifest.sample.split("\t")
assert_equal @project_id, data_row[project_idx]
end

test 'should format query object from search facets' do
Expand All @@ -202,6 +192,14 @@ def get_entries_from_response(response, key)
assert_equal expected_query, query
end

test 'should create query filters object' do
project_id = SecureRandom.uuid
query = { 'projectId' => { 'is' => [project_id] } }
query_object = @hca_azul_client.create_query_filters(query)
expected_query = "{\"filters\":\"{\\\"projectId\\\":{\\\"is\\\":[\\\"#{project_id}\\\"]}}\"}"
assert_equal expected_query, query_object
end

test 'should format numeric facet query' do
age_facet = [
{
Expand Down Expand Up @@ -306,13 +304,6 @@ def get_entries_from_response(response, key)
end
end

test 'should determine if query is too large' do
accession_list = 1.upto(500).map { |n| "FakeHCAProject#{n}" }
query = { project: { is: accession_list } }
assert @hca_azul_client.query_too_large?(query)
assert_not @hca_azul_client.query_too_large?({ project: { is: accession_list.take(10) } })
end

test 'should not retry any error status code' do
ApiHelpers::RETRY_STATUS_CODES.each do |code|
assert_not @hca_azul_client.should_retry?(code)
Expand Down
16 changes: 0 additions & 16 deletions test/services/azul_search_service_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -242,20 +242,4 @@ class AzulSearchServiceTest < ActiveSupport::TestCase
expected_term_match = { total: 2, terms: { lung: 2 } }.with_indifferent_access
assert_equal expected_term_match, AzulSearchService.get_search_term_weights(result, %w[lung])
end

test 'should split large queries into two requests' do
accession_list = 1.upto(500).map { |n| "FakeHCAProject#{n}" }
query = { 'project' => { 'is' => accession_list } }
project_result = @human_tcell_response['hits'].first
mock_query_result = { 'hits' => Array.new(250, project_result) }
mock = Minitest::Mock.new
mock.expect :query_too_large?, true, [query]
mock.expect :projects, mock_query_result, [Hash]
mock.expect :projects, mock_query_result, [Hash]
ApplicationController.stub :hca_azul_client, mock do
results = AzulSearchService.get_file_summary_info(accession_list)
mock.verify
assert results.count == 500
end
end
end
8 changes: 4 additions & 4 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -9587,10 +9587,10 @@ vite-plugin-ruby@^3.0.5:
debug "^4.3.4"
fast-glob "^3.2.12"

vite@^2.9.17:
version "2.9.17"
resolved "https://registry.yarnpkg.com/vite/-/vite-2.9.17.tgz#6b770525e12fa2a2e3a0fa0d028d304f4f7dc7d4"
integrity sha512-XxcRzra6d7xrKXH66jZUgb+srThoPu+TLJc06GifUyKq9JmjHkc1Numc8ra0h56rju2jfVWw3B3fs5l3OFMvUw==
vite@^2.9.18:
version "2.9.18"
resolved "https://registry.yarnpkg.com/vite/-/vite-2.9.18.tgz#74e2a83b29da81e602dac4c293312cc575f091c7"
integrity sha512-sAOqI5wNM9QvSEE70W3UGMdT8cyEn0+PmJMTFvTB8wB0YbYUWw3gUbY62AOyrXosGieF2htmeLATvNxpv/zNyQ==
dependencies:
esbuild "^0.14.27"
postcss "^8.4.13"
Expand Down

0 comments on commit ff3ddb4

Please sign in to comment.