From 98301086d4e1716e5d680ecd8c31ae8f670eeaaa Mon Sep 17 00:00:00 2001 From: Joel Thibault <2701406+jmthibault79@users.noreply.github.com> Date: Tue, 14 Jan 2025 11:38:26 -0500 Subject: [PATCH] [risk=low][RW-14230] Add a genomics extraction testing tool (#9034) * alt version of submitGenomicExtractionJob() --- api/build.gradle | 10 + api/libproject/devstart.rb | 104 +++++++- .../genomics/GenomicExtractionService.java | 56 +++-- .../api/CohortReviewControllerTest.java | 1 - .../workbench/api/CohortsControllerTest.java | 1 - .../api/ConceptSetsControllerTest.java | 1 - .../workbench/api/DataSetControllerTest.java | 1 - .../GenomicExtractionServiceTest.java | 23 +- .../workbench/profile/ProfileServiceTest.java | 1 - .../workspaces/WorkspaceServiceTest.java | 1 - .../tools/RunGenomicExtractionWorkflow.java | 236 ++++++++++++++++++ 11 files changed, 403 insertions(+), 32 deletions(-) create mode 100644 api/tools/src/main/java/org/pmiops/workbench/tools/RunGenomicExtractionWorkflow.java diff --git a/api/build.gradle b/api/build.gradle index 8f01fcd7c2c..86fd19eb079 100644 --- a/api/build.gradle +++ b/api/build.gradle @@ -945,6 +945,7 @@ task listDisks(type: JavaExec) { } } +// ./project.rb send-email task sendEmail(type: JavaExec) { classpath = sourceSets.__tools__.runtimeClasspath mainClass = "org.pmiops.workbench.tools.SendEmail" @@ -953,6 +954,15 @@ task sendEmail(type: JavaExec) { } } +// ./project.rb run-extraction +task runGenomicExtraction(type: JavaExec) { + classpath = sourceSets.__tools__.runtimeClasspath + mainClass = "org.pmiops.workbench.tools.RunGenomicExtractionWorkflow" + if (project.hasProperty("appArgs")) { + args Eval.me(appArgs) + } +} + dependencies { testImplementation 'au.com.dius:pact-jvm-consumer-java8:4.0.10' testImplementation 'au.com.dius:pact-jvm-consumer-junit5:4.0.10' diff --git a/api/libproject/devstart.rb b/api/libproject/devstart.rb index 2185de88504..86f233b23ca 100644 --- a/api/libproject/devstart.rb +++ b/api/libproject/devstart.rb @@ -3171,7 +3171,7 @@ def send_email(cmd_name, *args) '--given_name [given name]', String, ->(opts, v) { opts.given_name = v }, - 'User name.') + 'User given (first) name.') op.add_typed_option( '--contact [contact email]', @@ -3186,7 +3186,7 @@ def send_email(cmd_name, *args) 'If specified, sends the DISABLE_USER egress email. Defaults to the SUSPEND_COMPUTE egress email.') op.opts.disable = false - op.add_validator ->(opts) { raise ArgumentError unless opts.username and opts.contact and opts.email == VALID_EMAIL_OPTION } + op.add_validator ->(opts) { raise ArgumentError unless opts.username and opts.given_name and opts.contact and opts.email == VALID_EMAIL_OPTION } op.parse.validate @@ -3222,3 +3222,103 @@ def send_email(cmd_name, *args) :description => "Sends a system email. Currently limited to egress emails.", :fn => ->(*args) {send_email(SEND_EMAIL_CMD, *args)} }) + +def run_genomic_extraction(cmd_name, *args) + common = Common.new + + op = WbOptionsParser.new(cmd_name, args) + + op.add_typed_option( + '--project [project]', + String, + ->(opts, v) { opts.project = v }, + 'AoU environment GCP project full name. Used to pick MySQL instance & credentials.') + op.opts.project = TEST_PROJECT + + op.add_typed_option( + '--namespace [workspace namespace]', + String, + ->(opts, v) { opts.namespace = v }, + 'The workspace namespace to run the extraction from.') + + op.add_typed_option( + '--dataset_id [dataset id]', + String, + ->(opts, v) { opts.dataset_id = v }, + 'The dataset to record in the DB as associated with this extraction (arbitrary but must exist).') + + op.add_typed_option( + '--person_ids ["id1, id2, id3"]', + String, + ->(opts, v) { opts.person_ids = v }, + 'The person IDs to be used in the extraction.') + + op.add_typed_option( + '--legacy [true/false]', + String, + ->(opts, v) { opts.legacy = v }, + 'Use legacy (v7 and earlier) workflow (true) or v8+ workflow (false).') + + op.add_typed_option( + '--filter_set [filter set]', + String, + ->(opts, v) { opts.filter_set = v }, + 'Filter set name.') + + op.add_typed_option( + '--cdr_bq_project [project]', + String, + ->(opts, v) { opts.cdr_bq_project = v }, + "The CDR's BigQuery project.") + + op.add_typed_option( + '--wgs_bq_dataset [dataset]', + String, + ->(opts, v) { opts.wgs_bq_dataset = v }, + "The CDR's WGS BigQuery dataset") + + op.add_validator ->(opts) { + raise ArgumentError unless opts.namespace and opts.dataset_id and opts.person_ids and opts.legacy and opts.filter_set and opts.cdr_bq_project and opts.wgs_bq_dataset + } + + op.parse.validate + + gradle_args = ([ + ["--namespace", op.opts.namespace], + ["--dataset_id", op.opts.dataset_id], + ["--person_ids", op.opts.person_ids], + ["--legacy", op.opts.legacy], + ["--filter_set", op.opts.filter_set], + ["--cdr_bq_project", op.opts.cdr_bq_project], + ["--wgs_bq_dataset", op.opts.wgs_bq_dataset], + ]).map { |kv| "#{kv[0]}=#{kv[1]}" } + # Gradle args need to be single-quote wrapped. + gradle_args.map! { |f| "'#{f}'" } + + # Create a cloud context and apply the DB connection variables to the environment. + # These will be read by Gradle and passed as Spring Boot properties to the command-line. + gcc = GcloudContextV2.new(op) + gcc.validate() + ENV.update(read_db_vars(gcc)) + CloudSqlProxyContext.new(gcc.project).run do + common.run_inline %W{./gradlew runGenomicExtraction -PappArgs=[#{gradle_args.join(',')}]} + end + +end + +GENOMIC_EXTRACTION_CMD = "run-extraction" + +# example usage: +# ./project.rb run-extraction \ +# --namespace aou-rw-test-0bead07c \ +# --dataset_id 65204 \ +# --person-ids "20201244" \ +# --legacy false \ +# --filter_set echo-controls \ +# --cdr_bq_project fc-aou-cdr-synth-test-2 \ +# --wgs_bq_dataset echo_controls +Common.register_command({ + :invocation => GENOMIC_EXTRACTION_CMD, + :description => "Runs a genomic extraction workflow. Requires a workspace in the Controlled Tier but can vary from what it specifies in its CDR Configuration.", + :fn => ->(*args) {run_genomic_extraction(GENOMIC_EXTRACTION_CMD, *args)} +}) \ No newline at end of file diff --git a/api/src/main/java/org/pmiops/workbench/genomics/GenomicExtractionService.java b/api/src/main/java/org/pmiops/workbench/genomics/GenomicExtractionService.java index 214ac8d734f..f415609dd39 100644 --- a/api/src/main/java/org/pmiops/workbench/genomics/GenomicExtractionService.java +++ b/api/src/main/java/org/pmiops/workbench/genomics/GenomicExtractionService.java @@ -315,13 +315,16 @@ private List getFailureCauses(FirecloudSubmission firecloudSubmission) { } private Map getWorkflowInputs( - DbWorkspace workspace, WgsCohortExtractionConfig cohortExtractionConfig, String extractionUuid, List personIds, String extractionFolder, String outputDir, - boolean useLegacyWorkflow) { + boolean useLegacyWorkflow, + String filterSetName, + String bigQueryProject, + String wgsBigqueryDataset, + String workspaceGoogleProject) { String[] destinationParts = cohortExtractionConfig.extractionDestinationDataset.split("\\."); if (destinationParts.length != 2) { @@ -333,7 +336,6 @@ private Map getWorkflowInputs( Map maybeInputs = new HashMap<>(); - String filterSetName = workspace.getCdrVersion().getWgsFilterSetName(); if (!Strings.isNullOrEmpty(filterSetName)) { // If set, apply a joint callset filter during the extraction. There may be multiple such // filters defined within a GVS BigQuery dataset (see the filter_set table to view options). @@ -390,13 +392,9 @@ private Map getWorkflowInputs( + "\"") .put(EXTRACT_WORKFLOW_NAME + ".destination_project_id", "\"" + destinationParts[0] + "\"") .put(EXTRACT_WORKFLOW_NAME + ".destination_dataset_name", "\"" + destinationParts[1] + "\"") - .put( - EXTRACT_WORKFLOW_NAME + ".gvs_project", - "\"" + workspace.getCdrVersion().getBigqueryProject() + "\"") - .put( - EXTRACT_WORKFLOW_NAME + ".gvs_dataset", - "\"" + workspace.getCdrVersion().getWgsBigqueryDataset() + "\"") - .put(EXTRACT_WORKFLOW_NAME + ".query_project", "\"" + workspace.getGoogleProject() + "\"") + .put(EXTRACT_WORKFLOW_NAME + ".gvs_project", "\"" + bigQueryProject + "\"") + .put(EXTRACT_WORKFLOW_NAME + ".gvs_dataset", "\"" + wgsBigqueryDataset + "\"") + .put(EXTRACT_WORKFLOW_NAME + ".query_project", "\"" + workspaceGoogleProject + "\"") // Will produce files named "interval_1.vcf.gz", "interval_32.vcf.gz", // etc .put(EXTRACT_WORKFLOW_NAME + ".output_file_base_name", "\"interval\"") @@ -408,11 +406,15 @@ private Map getWorkflowInputs( public GenomicExtractionJob submitGenomicExtractionJob( DbWorkspace workspace, DbDataset dataSet, TanagraGenomicDataRequest tanagraGenomicDataRequest) throws ApiException { + var cdrVersion = workspace.getCdrVersion(); - boolean isTanagraEnabled = workspace.isCDRAndWorkspaceTanagraEnabled(); + // we use different workflows based on the CDR version: + // one version for v7 or earlier, and one for v8 or later + boolean useLegacyWorkflow = + !Boolean.TRUE.equals(cdrVersion.getNeedsV8GenomicExtractionWorkflow()); List personIds = - isTanagraEnabled + workspace.isCDRAndWorkspaceTanagraEnabled() ? genomicDatasetService.getTanagraPersonIdsWithWholeGenome( workspace, tanagraGenomicDataRequest) : genomicDatasetService.getPersonIdsWithWholeGenome(dataSet); @@ -428,10 +430,25 @@ public GenomicExtractionJob submitGenomicExtractionJob( personIds.size(), MAX_EXTRACTION_SAMPLE_COUNT)); } - // we use different workflows based on the CDR version: - // one version for v7 or earlier, and one for v8 or later - boolean useLegacyWorkflow = - !Boolean.TRUE.equals(workspace.getCdrVersion().getNeedsV8GenomicExtractionWorkflow()); + return submitGenomicExtractionJob( + workspace, + dataSet, + personIds, + useLegacyWorkflow, + cdrVersion.getWgsFilterSetName(), + cdrVersion.getBigqueryProject(), + cdrVersion.getWgsBigqueryDataset()); + } + + public GenomicExtractionJob submitGenomicExtractionJob( + DbWorkspace workspace, + DbDataset dataSet, + List personIds, + boolean useLegacyWorkflow, + String filterSetName, + String bigQueryProject, + String wgsBigQueryDataset) + throws ApiException { WgsCohortExtractionConfig cohortExtractionConfig = workbenchConfigProvider.get().wgsCohortExtraction; @@ -465,13 +482,16 @@ public GenomicExtractionJob submitGenomicExtractionJob( new FirecloudMethodConfiguration() .inputs( getWorkflowInputs( - workspace, cohortExtractionConfig, extractionUuid, personIds, extractionFolder, outputDir, - useLegacyWorkflow)) + useLegacyWorkflow, + filterSetName, + bigQueryProject, + wgsBigQueryDataset, + workspace.getGoogleProject())) .methodConfigVersion(versionedConfig.methodRepoVersion) .methodRepoMethod(createRepoMethodParameter(versionedConfig)) .name(extractionUuid) diff --git a/api/src/test/java/org/pmiops/workbench/api/CohortReviewControllerTest.java b/api/src/test/java/org/pmiops/workbench/api/CohortReviewControllerTest.java index cd16c9c8a35..8e4b528d540 100644 --- a/api/src/test/java/org/pmiops/workbench/api/CohortReviewControllerTest.java +++ b/api/src/test/java/org/pmiops/workbench/api/CohortReviewControllerTest.java @@ -300,7 +300,6 @@ public static Table asMap() { DataSetService.class, DirectoryService.class, FireCloudService.class, - InitialCreditsService.class, IamService.class, InitialCreditsService.class, LeonardoApiClient.class, diff --git a/api/src/test/java/org/pmiops/workbench/api/CohortsControllerTest.java b/api/src/test/java/org/pmiops/workbench/api/CohortsControllerTest.java index c6fc5c3d6ad..89d2f1cef49 100644 --- a/api/src/test/java/org/pmiops/workbench/api/CohortsControllerTest.java +++ b/api/src/test/java/org/pmiops/workbench/api/CohortsControllerTest.java @@ -210,7 +210,6 @@ public class CohortsControllerTest { DirectoryService.class, FeaturedWorkspaceMapper.class, FireCloudService.class, - InitialCreditsService.class, LeonardoApiClient.class, IamService.class, InitialCreditsService.class, diff --git a/api/src/test/java/org/pmiops/workbench/api/ConceptSetsControllerTest.java b/api/src/test/java/org/pmiops/workbench/api/ConceptSetsControllerTest.java index d3c7218ce60..75a53be3477 100644 --- a/api/src/test/java/org/pmiops/workbench/api/ConceptSetsControllerTest.java +++ b/api/src/test/java/org/pmiops/workbench/api/ConceptSetsControllerTest.java @@ -253,7 +253,6 @@ public class ConceptSetsControllerTest { FeaturedWorkspaceMapper.class, FireCloudService.class, FirecloudMapperImpl.class, - InitialCreditsService.class, IamService.class, InitialCreditsService.class, MailService.class, diff --git a/api/src/test/java/org/pmiops/workbench/api/DataSetControllerTest.java b/api/src/test/java/org/pmiops/workbench/api/DataSetControllerTest.java index 3ad03815f33..e7e74b748de 100644 --- a/api/src/test/java/org/pmiops/workbench/api/DataSetControllerTest.java +++ b/api/src/test/java/org/pmiops/workbench/api/DataSetControllerTest.java @@ -272,7 +272,6 @@ public class DataSetControllerTest { ConceptBigQueryService.class, DirectoryService.class, FeaturedWorkspaceMapper.class, - InitialCreditsService.class, IamService.class, InitialCreditsService.class, MailService.class, diff --git a/api/src/test/java/org/pmiops/workbench/genomics/GenomicExtractionServiceTest.java b/api/src/test/java/org/pmiops/workbench/genomics/GenomicExtractionServiceTest.java index 890f18f2f5e..eeca92db56f 100644 --- a/api/src/test/java/org/pmiops/workbench/genomics/GenomicExtractionServiceTest.java +++ b/api/src/test/java/org/pmiops/workbench/genomics/GenomicExtractionServiceTest.java @@ -66,6 +66,7 @@ import org.pmiops.workbench.jira.JiraService; import org.pmiops.workbench.jira.model.CreatedIssue; import org.pmiops.workbench.model.GenomicExtractionJob; +import org.pmiops.workbench.model.TanagraGenomicDataRequest; import org.pmiops.workbench.model.TerraJobStatus; import org.pmiops.workbench.rawls.model.RawlsWorkspaceAccessLevel; import org.pmiops.workbench.rawls.model.RawlsWorkspaceDetails; @@ -459,7 +460,8 @@ private DbWgsExtractCromwellSubmission createSubmissionAndMockMonitorCall( public void submitExtractionJob() throws ApiException { when(mockGenomicDatasetService.getPersonIdsWithWholeGenome(any())) .thenReturn(List.of("1", "2", "3")); - genomicExtractionService.submitGenomicExtractionJob(targetWorkspace, dataset, null); + TanagraGenomicDataRequest tanagraRequest = null; + genomicExtractionService.submitGenomicExtractionJob(targetWorkspace, dataset, tanagraRequest); ArgumentCaptor argument = ArgumentCaptor.forClass(FirecloudMethodConfiguration.class); @@ -488,7 +490,8 @@ public void submitExtractionJob() throws ApiException { @Test public void submitExtractionJob_outputVcfsInCorrectBucket() throws ApiException { when(mockGenomicDatasetService.getPersonIdsWithWholeGenome(any())).thenReturn(List.of("1")); - genomicExtractionService.submitGenomicExtractionJob(targetWorkspace, dataset, null); + TanagraGenomicDataRequest tanagraRequest = null; + genomicExtractionService.submitGenomicExtractionJob(targetWorkspace, dataset, tanagraRequest); ArgumentCaptor argument = ArgumentCaptor.forClass(FirecloudMethodConfiguration.class); @@ -508,7 +511,8 @@ public void submitExtractionJob_many() throws ApiException { LongStream.range(1, 376).boxed().map(Object::toString).toList(); when(mockGenomicDatasetService.getPersonIdsWithWholeGenome(any())) .thenReturn(largePersonIdList); - genomicExtractionService.submitGenomicExtractionJob(targetWorkspace, dataset, null); + TanagraGenomicDataRequest tanagraRequest = null; + genomicExtractionService.submitGenomicExtractionJob(targetWorkspace, dataset, tanagraRequest); ArgumentCaptor argument = ArgumentCaptor.forClass(FirecloudMethodConfiguration.class); @@ -534,7 +538,8 @@ public void submitExtractionJob_v8() throws ApiException { .setNeedsV8GenomicExtractionWorkflow(true)); targetWorkspace = workspaceDao.save(targetWorkspace.setCdrVersion(cdrV8)); - genomicExtractionService.submitGenomicExtractionJob(targetWorkspace, dataset, null); + TanagraGenomicDataRequest tanagraRequest = null; + genomicExtractionService.submitGenomicExtractionJob(targetWorkspace, dataset, tanagraRequest); ArgumentCaptor argument = ArgumentCaptor.forClass(FirecloudMethodConfiguration.class); @@ -552,9 +557,12 @@ public void submitExtractionJob_noWgsData() { when(mockGenomicDatasetService.getPersonIdsWithWholeGenome(any())) .thenReturn(Collections.emptyList()); + TanagraGenomicDataRequest tanagraRequest = null; assertThrows( FailedPreconditionException.class, - () -> genomicExtractionService.submitGenomicExtractionJob(targetWorkspace, dataset, null)); + () -> + genomicExtractionService.submitGenomicExtractionJob( + targetWorkspace, dataset, tanagraRequest)); } @Test @@ -564,9 +572,12 @@ public void submitExtractionJob_tooManySamples() { when(mockGenomicDatasetService.getPersonIdsWithWholeGenome(any())) .thenReturn(largePersonIdList); + TanagraGenomicDataRequest tanagraRequest = null; assertThrows( FailedPreconditionException.class, - () -> genomicExtractionService.submitGenomicExtractionJob(targetWorkspace, dataset, null)); + () -> + genomicExtractionService.submitGenomicExtractionJob( + targetWorkspace, dataset, tanagraRequest)); } @Test diff --git a/api/src/test/java/org/pmiops/workbench/profile/ProfileServiceTest.java b/api/src/test/java/org/pmiops/workbench/profile/ProfileServiceTest.java index 516b4e40b40..6c25de26fb3 100644 --- a/api/src/test/java/org/pmiops/workbench/profile/ProfileServiceTest.java +++ b/api/src/test/java/org/pmiops/workbench/profile/ProfileServiceTest.java @@ -143,7 +143,6 @@ private static Profile createValidProfile() { AccessModuleService.class, AccessTierService.class, InitialCreditsService.class, - InitialCreditsService.class, NewUserSatisfactionSurveyService.class, ProfileAuditor.class, VerifiedInstitutionalAffiliationDao.class, diff --git a/api/src/test/java/org/pmiops/workbench/workspaces/WorkspaceServiceTest.java b/api/src/test/java/org/pmiops/workbench/workspaces/WorkspaceServiceTest.java index 3dee962a2b9..1e5e6a07649 100644 --- a/api/src/test/java/org/pmiops/workbench/workspaces/WorkspaceServiceTest.java +++ b/api/src/test/java/org/pmiops/workbench/workspaces/WorkspaceServiceTest.java @@ -115,7 +115,6 @@ public class WorkspaceServiceTest { ConceptSetService.class, DataSetService.class, FeaturedWorkspaceMapper.class, - InitialCreditsService.class, IamService.class, InitialCreditsService.class, ProfileMapper.class, diff --git a/api/tools/src/main/java/org/pmiops/workbench/tools/RunGenomicExtractionWorkflow.java b/api/tools/src/main/java/org/pmiops/workbench/tools/RunGenomicExtractionWorkflow.java new file mode 100644 index 00000000000..7fc2a10bdcb --- /dev/null +++ b/api/tools/src/main/java/org/pmiops/workbench/tools/RunGenomicExtractionWorkflow.java @@ -0,0 +1,236 @@ +package org.pmiops.workbench.tools; + +import com.google.api.services.oauth2.model.Userinfo; +import jakarta.inject.Provider; +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.logging.Logger; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.pmiops.workbench.access.AccessTierServiceImpl; +import org.pmiops.workbench.api.BigQueryService; +import org.pmiops.workbench.auth.UserAuthentication; +import org.pmiops.workbench.cdr.CdrVersionMapperImpl; +import org.pmiops.workbench.cdr.CdrVersionService; +import org.pmiops.workbench.cohortbuilder.CohortQueryBuilder; +import org.pmiops.workbench.cohorts.CohortMapperImpl; +import org.pmiops.workbench.cohorts.CohortService; +import org.pmiops.workbench.config.BigQueryConfig; +import org.pmiops.workbench.dataset.GenomicDatasetServiceImpl; +import org.pmiops.workbench.db.dao.DataSetDao; +import org.pmiops.workbench.db.dao.WorkspaceDao; +import org.pmiops.workbench.db.model.DbDataset; +import org.pmiops.workbench.db.model.DbUser; +import org.pmiops.workbench.db.model.DbWorkspace; +import org.pmiops.workbench.firecloud.ApiException; +import org.pmiops.workbench.firecloud.FireCloudCacheConfig; +import org.pmiops.workbench.firecloud.FireCloudConfig; +import org.pmiops.workbench.firecloud.FireCloudServiceImpl; +import org.pmiops.workbench.firecloud.FirecloudApiClientFactory; +import org.pmiops.workbench.genomics.GenomicExtractionMapperImpl; +import org.pmiops.workbench.genomics.GenomicExtractionService; +import org.pmiops.workbench.google.GoogleConfig; +import org.pmiops.workbench.jira.JiraService; +import org.pmiops.workbench.rawls.RawlsApiClientFactory; +import org.pmiops.workbench.rawls.RawlsConfig; +import org.pmiops.workbench.sam.SamApiClientFactory; +import org.pmiops.workbench.sam.SamConfig; +import org.pmiops.workbench.sam.SamRetryHandler; +import org.pmiops.workbench.utils.mappers.CommonMappers; +import org.pmiops.workbench.utils.mappers.FirecloudMapperImpl; +import org.pmiops.workbench.workspaces.WorkspaceAuthService; +import org.springframework.beans.factory.config.ConfigurableBeanFactory; +import org.springframework.boot.CommandLineRunner; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.Import; +import org.springframework.context.annotation.Scope; + +@Import({ + AccessTierServiceImpl.class, + BigQueryConfig.class, // injects com.google.cloud.bigquery.BigQuery + BigQueryService.class, + CdrVersionMapperImpl.class, + CdrVersionService.class, + CohortMapperImpl.class, + CohortQueryBuilder.class, + CohortService.class, + CommonMappers.class, + FireCloudCacheConfig.class, + FireCloudConfig.class, + FireCloudServiceImpl.class, + FirecloudApiClientFactory.class, + FirecloudMapperImpl.class, + GenomicDatasetServiceImpl.class, + GenomicExtractionMapperImpl.class, + GenomicExtractionService.class, + GoogleConfig.class, // injects com.google.cloud.iam.credentials.v1.IamCredentialsClient + JiraService.class, + RawlsApiClientFactory.class, + RawlsConfig.class, + SamApiClientFactory.class, + SamConfig.class, + SamRetryHandler.class, + WorkspaceAuthService.class, +}) +@Configuration +public class RunGenomicExtractionWorkflow extends Tool { + private static final Logger log = Logger.getLogger(RunGenomicExtractionWorkflow.class.getName()); + + private static DbUser dbUser; + + @Bean + @Scope(value = ConfigurableBeanFactory.SCOPE_PROTOTYPE) + static DbUser user() { + // initialized below, from workspace creator + return dbUser; + } + + @Bean + @Scope(value = ConfigurableBeanFactory.SCOPE_PROTOTYPE) + static UserAuthentication userAuthentication( + Provider userProvider, FirecloudApiClientFactory factory) throws IOException { + + DbUser user = userProvider.get(); + Userinfo info = new Userinfo(); // unclear if this is used, so leaving empty + String impersonatedBearerToken = + factory.getDelegatedUserCredentials(user.getUsername()).getAccessToken().getTokenValue(); + + return new UserAuthentication( + user, info, impersonatedBearerToken, UserAuthentication.UserType.RESEARCHER); + } + + private static final Option workspaceNamespaceOpt = + Option.builder() + .longOpt("namespace") + .desc("The workspace namespace to run the extraction from") + .required() + .hasArg() + .build(); + private static final Option datasetOpt = + Option.builder() + .longOpt("dataset_id") + .desc("The dataset ID to record in the DB (but not actually use) for the extraction") + .required() + .hasArg() + .build(); + private static final Option personIdsOpt = + Option.builder() + .longOpt("person_ids") + .desc("The person IDs to use in the extraction") + .required() + .hasArg() + .build(); + private static final Option legacyOpt = + Option.builder() + .longOpt("legacy") + .desc("Use legacy (v7) workflow (true/false)") + .required() + .hasArg() + .build(); + private static final Option filterSetOpt = + Option.builder().longOpt("filter_set").desc("Filter set name").required().hasArg().build(); + private static final Option bqProjOpt = + Option.builder() + .longOpt("cdr_bq_project") + .desc("The CDR's BigQuery project") + .required() + .hasArg() + .build(); + private static final Option wgsDatasetOpt = + Option.builder() + .longOpt("wgs_bq_dataset") + .desc("The CDR's WGS BigQuery dataset") + .required() + .hasArg() + .build(); + + private static final Options options = + new Options() + .addOption(workspaceNamespaceOpt) + .addOption(datasetOpt) + .addOption(personIdsOpt) + .addOption(legacyOpt) + .addOption(filterSetOpt) + .addOption(bqProjOpt) + .addOption(wgsDatasetOpt); + + private static void extract( + String[] args, + GenomicExtractionService service, + CdrVersionService cdrVersionService, + DataSetDao dataSetDao, + WorkspaceDao workspaceDao) + throws ParseException, ApiException { + CommandLine opts = new DefaultParser().parse(options, args); + + String namespace = opts.getOptionValue(workspaceNamespaceOpt.getLongOpt()); + DbWorkspace workspace = + workspaceDao + .getByNamespace(namespace) + .orElseThrow( + () -> + new IllegalArgumentException( + String.format("Workspace namespace %s not found", namespace))); + dbUser = workspace.getCreator(); + + // We need to set this to build the dataset. + // Therefore, the Workspace still needs to have a valid CDR in the Controlled Tier. + cdrVersionService.setCdrVersion(workspace.getCdrVersion()); + + long datasetId = Long.parseLong(opts.getOptionValue(datasetOpt.getLongOpt())); + DbDataset dataSet = + dataSetDao + .findByDataSetIdAndWorkspaceId(datasetId, workspace.getWorkspaceId()) + .orElseThrow( + () -> + new IllegalArgumentException( + String.format( + "Dataset %d not found in workspace %s", datasetId, namespace))); + + List personIds = + Arrays.stream(opts.getOptionValue(personIdsOpt.getLongOpt()).split(",")) + .map(String::trim) + .toList(); + + boolean useLegacyWorkflow = + Boolean.parseBoolean(opts.getOptionValue(legacyOpt.getLongOpt(), "false")); + String filterSetName = opts.getOptionValue(filterSetOpt.getLongOpt()); + String bigQueryProject = opts.getOptionValue(bqProjOpt.getLongOpt()); + String wgsBigQueryDataset = opts.getOptionValue(wgsDatasetOpt.getLongOpt()); + + service.submitGenomicExtractionJob( + workspace, + dataSet, + personIds, + useLegacyWorkflow, + filterSetName, + bigQueryProject, + wgsBigQueryDataset); + } + + @Bean + public CommandLineRunner run( + GenomicExtractionService service, + CdrVersionService cdrVersionService, + DataSetDao dataSetDao, + WorkspaceDao workspaceDao) { + return args -> { + // project.rb swallows exceptions, so we need to catch and log them here + try { + extract(args, service, cdrVersionService, dataSetDao, workspaceDao); + } catch (Exception e) { + log.severe("Error: " + e.getMessage()); + e.printStackTrace(); + } + }; + } + + public static void main(String[] args) { + CommandLineToolConfig.runCommandLine(RunGenomicExtractionWorkflow.class, args); + } +}