Skip to content

Commit

Permalink
[risk=low][RW-14230] Add a genomics extraction testing tool (#9034)
Browse files Browse the repository at this point in the history
* alt version of submitGenomicExtractionJob()
  • Loading branch information
jmthibault79 authored Jan 14, 2025
1 parent bbb7217 commit 9830108
Show file tree
Hide file tree
Showing 11 changed files with 403 additions and 32 deletions.
10 changes: 10 additions & 0 deletions api/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -945,6 +945,7 @@ task listDisks(type: JavaExec) {
}
}

// ./project.rb send-email
task sendEmail(type: JavaExec) {
classpath = sourceSets.__tools__.runtimeClasspath
mainClass = "org.pmiops.workbench.tools.SendEmail"
Expand All @@ -953,6 +954,15 @@ task sendEmail(type: JavaExec) {
}
}

// ./project.rb run-extraction
task runGenomicExtraction(type: JavaExec) {
classpath = sourceSets.__tools__.runtimeClasspath
mainClass = "org.pmiops.workbench.tools.RunGenomicExtractionWorkflow"
if (project.hasProperty("appArgs")) {
args Eval.me(appArgs)
}
}

dependencies {
testImplementation 'au.com.dius:pact-jvm-consumer-java8:4.0.10'
testImplementation 'au.com.dius:pact-jvm-consumer-junit5:4.0.10'
Expand Down
104 changes: 102 additions & 2 deletions api/libproject/devstart.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3171,7 +3171,7 @@ def send_email(cmd_name, *args)
'--given_name [given name]',
String,
->(opts, v) { opts.given_name = v },
'User name.')
'User given (first) name.')

op.add_typed_option(
'--contact [contact email]',
Expand All @@ -3186,7 +3186,7 @@ def send_email(cmd_name, *args)
'If specified, sends the DISABLE_USER egress email. Defaults to the SUSPEND_COMPUTE egress email.')
op.opts.disable = false

op.add_validator ->(opts) { raise ArgumentError unless opts.username and opts.contact and opts.email == VALID_EMAIL_OPTION }
op.add_validator ->(opts) { raise ArgumentError unless opts.username and opts.given_name and opts.contact and opts.email == VALID_EMAIL_OPTION }

op.parse.validate

Expand Down Expand Up @@ -3222,3 +3222,103 @@ def send_email(cmd_name, *args)
:description => "Sends a system email. Currently limited to egress emails.",
:fn => ->(*args) {send_email(SEND_EMAIL_CMD, *args)}
})

def run_genomic_extraction(cmd_name, *args)
common = Common.new

op = WbOptionsParser.new(cmd_name, args)

op.add_typed_option(
'--project [project]',
String,
->(opts, v) { opts.project = v },
'AoU environment GCP project full name. Used to pick MySQL instance & credentials.')
op.opts.project = TEST_PROJECT

op.add_typed_option(
'--namespace [workspace namespace]',
String,
->(opts, v) { opts.namespace = v },
'The workspace namespace to run the extraction from.')

op.add_typed_option(
'--dataset_id [dataset id]',
String,
->(opts, v) { opts.dataset_id = v },
'The dataset to record in the DB as associated with this extraction (arbitrary but must exist).')

op.add_typed_option(
'--person_ids ["id1, id2, id3"]',
String,
->(opts, v) { opts.person_ids = v },
'The person IDs to be used in the extraction.')

op.add_typed_option(
'--legacy [true/false]',
String,
->(opts, v) { opts.legacy = v },
'Use legacy (v7 and earlier) workflow (true) or v8+ workflow (false).')

op.add_typed_option(
'--filter_set [filter set]',
String,
->(opts, v) { opts.filter_set = v },
'Filter set name.')

op.add_typed_option(
'--cdr_bq_project [project]',
String,
->(opts, v) { opts.cdr_bq_project = v },
"The CDR's BigQuery project.")

op.add_typed_option(
'--wgs_bq_dataset [dataset]',
String,
->(opts, v) { opts.wgs_bq_dataset = v },
"The CDR's WGS BigQuery dataset")

op.add_validator ->(opts) {
raise ArgumentError unless opts.namespace and opts.dataset_id and opts.person_ids and opts.legacy and opts.filter_set and opts.cdr_bq_project and opts.wgs_bq_dataset
}

op.parse.validate

gradle_args = ([
["--namespace", op.opts.namespace],
["--dataset_id", op.opts.dataset_id],
["--person_ids", op.opts.person_ids],
["--legacy", op.opts.legacy],
["--filter_set", op.opts.filter_set],
["--cdr_bq_project", op.opts.cdr_bq_project],
["--wgs_bq_dataset", op.opts.wgs_bq_dataset],
]).map { |kv| "#{kv[0]}=#{kv[1]}" }
# Gradle args need to be single-quote wrapped.
gradle_args.map! { |f| "'#{f}'" }

# Create a cloud context and apply the DB connection variables to the environment.
# These will be read by Gradle and passed as Spring Boot properties to the command-line.
gcc = GcloudContextV2.new(op)
gcc.validate()
ENV.update(read_db_vars(gcc))
CloudSqlProxyContext.new(gcc.project).run do
common.run_inline %W{./gradlew runGenomicExtraction -PappArgs=[#{gradle_args.join(',')}]}
end

end

GENOMIC_EXTRACTION_CMD = "run-extraction"

# example usage:
# ./project.rb run-extraction \
# --namespace aou-rw-test-0bead07c \
# --dataset_id 65204 \
# --person-ids "20201244" \
# --legacy false \
# --filter_set echo-controls \
# --cdr_bq_project fc-aou-cdr-synth-test-2 \
# --wgs_bq_dataset echo_controls
Common.register_command({
:invocation => GENOMIC_EXTRACTION_CMD,
:description => "Runs a genomic extraction workflow. Requires a workspace in the Controlled Tier but can vary from what it specifies in its CDR Configuration.",
:fn => ->(*args) {run_genomic_extraction(GENOMIC_EXTRACTION_CMD, *args)}
})
Original file line number Diff line number Diff line change
Expand Up @@ -315,13 +315,16 @@ private List<String> getFailureCauses(FirecloudSubmission firecloudSubmission) {
}

private Map<String, String> getWorkflowInputs(
DbWorkspace workspace,
WgsCohortExtractionConfig cohortExtractionConfig,
String extractionUuid,
List<String> personIds,
String extractionFolder,
String outputDir,
boolean useLegacyWorkflow) {
boolean useLegacyWorkflow,
String filterSetName,
String bigQueryProject,
String wgsBigqueryDataset,
String workspaceGoogleProject) {

String[] destinationParts = cohortExtractionConfig.extractionDestinationDataset.split("\\.");
if (destinationParts.length != 2) {
Expand All @@ -333,7 +336,6 @@ private Map<String, String> getWorkflowInputs(

Map<String, String> maybeInputs = new HashMap<>();

String filterSetName = workspace.getCdrVersion().getWgsFilterSetName();
if (!Strings.isNullOrEmpty(filterSetName)) {
// If set, apply a joint callset filter during the extraction. There may be multiple such
// filters defined within a GVS BigQuery dataset (see the filter_set table to view options).
Expand Down Expand Up @@ -390,13 +392,9 @@ private Map<String, String> getWorkflowInputs(
+ "\"")
.put(EXTRACT_WORKFLOW_NAME + ".destination_project_id", "\"" + destinationParts[0] + "\"")
.put(EXTRACT_WORKFLOW_NAME + ".destination_dataset_name", "\"" + destinationParts[1] + "\"")
.put(
EXTRACT_WORKFLOW_NAME + ".gvs_project",
"\"" + workspace.getCdrVersion().getBigqueryProject() + "\"")
.put(
EXTRACT_WORKFLOW_NAME + ".gvs_dataset",
"\"" + workspace.getCdrVersion().getWgsBigqueryDataset() + "\"")
.put(EXTRACT_WORKFLOW_NAME + ".query_project", "\"" + workspace.getGoogleProject() + "\"")
.put(EXTRACT_WORKFLOW_NAME + ".gvs_project", "\"" + bigQueryProject + "\"")
.put(EXTRACT_WORKFLOW_NAME + ".gvs_dataset", "\"" + wgsBigqueryDataset + "\"")
.put(EXTRACT_WORKFLOW_NAME + ".query_project", "\"" + workspaceGoogleProject + "\"")
// Will produce files named "interval_1.vcf.gz", "interval_32.vcf.gz",
// etc
.put(EXTRACT_WORKFLOW_NAME + ".output_file_base_name", "\"interval\"")
Expand All @@ -408,11 +406,15 @@ private Map<String, String> getWorkflowInputs(
public GenomicExtractionJob submitGenomicExtractionJob(
DbWorkspace workspace, DbDataset dataSet, TanagraGenomicDataRequest tanagraGenomicDataRequest)
throws ApiException {
var cdrVersion = workspace.getCdrVersion();

boolean isTanagraEnabled = workspace.isCDRAndWorkspaceTanagraEnabled();
// we use different workflows based on the CDR version:
// one version for v7 or earlier, and one for v8 or later
boolean useLegacyWorkflow =
!Boolean.TRUE.equals(cdrVersion.getNeedsV8GenomicExtractionWorkflow());

List<String> personIds =
isTanagraEnabled
workspace.isCDRAndWorkspaceTanagraEnabled()
? genomicDatasetService.getTanagraPersonIdsWithWholeGenome(
workspace, tanagraGenomicDataRequest)
: genomicDatasetService.getPersonIdsWithWholeGenome(dataSet);
Expand All @@ -428,10 +430,25 @@ public GenomicExtractionJob submitGenomicExtractionJob(
personIds.size(), MAX_EXTRACTION_SAMPLE_COUNT));
}

// we use different workflows based on the CDR version:
// one version for v7 or earlier, and one for v8 or later
boolean useLegacyWorkflow =
!Boolean.TRUE.equals(workspace.getCdrVersion().getNeedsV8GenomicExtractionWorkflow());
return submitGenomicExtractionJob(
workspace,
dataSet,
personIds,
useLegacyWorkflow,
cdrVersion.getWgsFilterSetName(),
cdrVersion.getBigqueryProject(),
cdrVersion.getWgsBigqueryDataset());
}

public GenomicExtractionJob submitGenomicExtractionJob(
DbWorkspace workspace,
DbDataset dataSet,
List<String> personIds,
boolean useLegacyWorkflow,
String filterSetName,
String bigQueryProject,
String wgsBigQueryDataset)
throws ApiException {

WgsCohortExtractionConfig cohortExtractionConfig =
workbenchConfigProvider.get().wgsCohortExtraction;
Expand Down Expand Up @@ -465,13 +482,16 @@ public GenomicExtractionJob submitGenomicExtractionJob(
new FirecloudMethodConfiguration()
.inputs(
getWorkflowInputs(
workspace,
cohortExtractionConfig,
extractionUuid,
personIds,
extractionFolder,
outputDir,
useLegacyWorkflow))
useLegacyWorkflow,
filterSetName,
bigQueryProject,
wgsBigQueryDataset,
workspace.getGoogleProject()))
.methodConfigVersion(versionedConfig.methodRepoVersion)
.methodRepoMethod(createRepoMethodParameter(versionedConfig))
.name(extractionUuid)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,6 @@ public static Table<Long, CriteriaType, String> asMap() {
DataSetService.class,
DirectoryService.class,
FireCloudService.class,
InitialCreditsService.class,
IamService.class,
InitialCreditsService.class,
LeonardoApiClient.class,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,6 @@ public class CohortsControllerTest {
DirectoryService.class,
FeaturedWorkspaceMapper.class,
FireCloudService.class,
InitialCreditsService.class,
LeonardoApiClient.class,
IamService.class,
InitialCreditsService.class,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,6 @@ public class ConceptSetsControllerTest {
FeaturedWorkspaceMapper.class,
FireCloudService.class,
FirecloudMapperImpl.class,
InitialCreditsService.class,
IamService.class,
InitialCreditsService.class,
MailService.class,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,6 @@ public class DataSetControllerTest {
ConceptBigQueryService.class,
DirectoryService.class,
FeaturedWorkspaceMapper.class,
InitialCreditsService.class,
IamService.class,
InitialCreditsService.class,
MailService.class,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
import org.pmiops.workbench.jira.JiraService;
import org.pmiops.workbench.jira.model.CreatedIssue;
import org.pmiops.workbench.model.GenomicExtractionJob;
import org.pmiops.workbench.model.TanagraGenomicDataRequest;
import org.pmiops.workbench.model.TerraJobStatus;
import org.pmiops.workbench.rawls.model.RawlsWorkspaceAccessLevel;
import org.pmiops.workbench.rawls.model.RawlsWorkspaceDetails;
Expand Down Expand Up @@ -459,7 +460,8 @@ private DbWgsExtractCromwellSubmission createSubmissionAndMockMonitorCall(
public void submitExtractionJob() throws ApiException {
when(mockGenomicDatasetService.getPersonIdsWithWholeGenome(any()))
.thenReturn(List.of("1", "2", "3"));
genomicExtractionService.submitGenomicExtractionJob(targetWorkspace, dataset, null);
TanagraGenomicDataRequest tanagraRequest = null;
genomicExtractionService.submitGenomicExtractionJob(targetWorkspace, dataset, tanagraRequest);

ArgumentCaptor<FirecloudMethodConfiguration> argument =
ArgumentCaptor.forClass(FirecloudMethodConfiguration.class);
Expand Down Expand Up @@ -488,7 +490,8 @@ public void submitExtractionJob() throws ApiException {
@Test
public void submitExtractionJob_outputVcfsInCorrectBucket() throws ApiException {
when(mockGenomicDatasetService.getPersonIdsWithWholeGenome(any())).thenReturn(List.of("1"));
genomicExtractionService.submitGenomicExtractionJob(targetWorkspace, dataset, null);
TanagraGenomicDataRequest tanagraRequest = null;
genomicExtractionService.submitGenomicExtractionJob(targetWorkspace, dataset, tanagraRequest);

ArgumentCaptor<FirecloudMethodConfiguration> argument =
ArgumentCaptor.forClass(FirecloudMethodConfiguration.class);
Expand All @@ -508,7 +511,8 @@ public void submitExtractionJob_many() throws ApiException {
LongStream.range(1, 376).boxed().map(Object::toString).toList();
when(mockGenomicDatasetService.getPersonIdsWithWholeGenome(any()))
.thenReturn(largePersonIdList);
genomicExtractionService.submitGenomicExtractionJob(targetWorkspace, dataset, null);
TanagraGenomicDataRequest tanagraRequest = null;
genomicExtractionService.submitGenomicExtractionJob(targetWorkspace, dataset, tanagraRequest);

ArgumentCaptor<FirecloudMethodConfiguration> argument =
ArgumentCaptor.forClass(FirecloudMethodConfiguration.class);
Expand All @@ -534,7 +538,8 @@ public void submitExtractionJob_v8() throws ApiException {
.setNeedsV8GenomicExtractionWorkflow(true));
targetWorkspace = workspaceDao.save(targetWorkspace.setCdrVersion(cdrV8));

genomicExtractionService.submitGenomicExtractionJob(targetWorkspace, dataset, null);
TanagraGenomicDataRequest tanagraRequest = null;
genomicExtractionService.submitGenomicExtractionJob(targetWorkspace, dataset, tanagraRequest);

ArgumentCaptor<FirecloudMethodConfiguration> argument =
ArgumentCaptor.forClass(FirecloudMethodConfiguration.class);
Expand All @@ -552,9 +557,12 @@ public void submitExtractionJob_noWgsData() {
when(mockGenomicDatasetService.getPersonIdsWithWholeGenome(any()))
.thenReturn(Collections.emptyList());

TanagraGenomicDataRequest tanagraRequest = null;
assertThrows(
FailedPreconditionException.class,
() -> genomicExtractionService.submitGenomicExtractionJob(targetWorkspace, dataset, null));
() ->
genomicExtractionService.submitGenomicExtractionJob(
targetWorkspace, dataset, tanagraRequest));
}

@Test
Expand All @@ -564,9 +572,12 @@ public void submitExtractionJob_tooManySamples() {
when(mockGenomicDatasetService.getPersonIdsWithWholeGenome(any()))
.thenReturn(largePersonIdList);

TanagraGenomicDataRequest tanagraRequest = null;
assertThrows(
FailedPreconditionException.class,
() -> genomicExtractionService.submitGenomicExtractionJob(targetWorkspace, dataset, null));
() ->
genomicExtractionService.submitGenomicExtractionJob(
targetWorkspace, dataset, tanagraRequest));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,6 @@ private static Profile createValidProfile() {
AccessModuleService.class,
AccessTierService.class,
InitialCreditsService.class,
InitialCreditsService.class,
NewUserSatisfactionSurveyService.class,
ProfileAuditor.class,
VerifiedInstitutionalAffiliationDao.class,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@ public class WorkspaceServiceTest {
ConceptSetService.class,
DataSetService.class,
FeaturedWorkspaceMapper.class,
InitialCreditsService.class,
IamService.class,
InitialCreditsService.class,
ProfileMapper.class,
Expand Down
Loading

0 comments on commit 9830108

Please sign in to comment.