Skip to content

Commit

Permalink
Apply a fix that Miguel made here: #8785
Browse files Browse the repository at this point in the history
This fixes a bug in the partioning logic used in creating Hail VAT inputs.
  • Loading branch information
gbggrant committed Jul 11, 2024
1 parent f6d964b commit 4810131
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 6 deletions.
2 changes: 1 addition & 1 deletion scripts/variantstore/wdl/GvsUtils.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ task GetToolVersions {
# GVS generally uses the smallest `alpine` version of the Google Cloud SDK as it suffices for most tasks, but
# there are a handlful of tasks that require the larger GNU libc-based `slim`.
String cloud_sdk_slim_docker = "gcr.io/google.com/cloudsdktool/cloud-sdk:435.0.0-slim"
String variants_docker = "us-central1-docker.pkg.dev/broad-dsde-methods/gvs/variants:2024-06-06-alpine-b96f6b13570b"
String variants_docker = "us-central1-docker.pkg.dev/broad-dsde-methods/gvs/variants:2024-07-11-alpine-f5b116022c7b"
String gatk_docker = "us-central1-docker.pkg.dev/broad-dsde-methods/gvs/gatk:2024-06-18-gatkbase-f997ff1369ab"
String variants_nirvana_docker = "us.gcr.io/broad-dsde-methods/variantstore:nirvana_2022_10_19"
String real_time_genomics_docker = "docker.io/realtimegenomics/rtg-tools:latest"
Expand Down
10 changes: 5 additions & 5 deletions scripts/variantstore/wdl/extract/hail_create_vat_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,12 +190,12 @@ def main(vds, ancestry_file_location, sites_only_vcf_path, dry_run_n_parts=None)
sites_only_vcf_path = sites_only_vcf_path.replace(r".vcf.bgz", f'_dryrun.vcf.bgz')
else:
n_rounds = 5
parts_per_round = n_parts // n_rounds
# Add in 'n_rounds - 1' to include all of the partitions in the set of groups, otherwise we would omit the final
# n_parts % n_rounds partitions.
parts_per_round = (n_parts + n_rounds - 1) // n_rounds
ht_paths = [sites_only_vcf_path.replace(r".sites-only.vcf.bgz", f'_{i}.ht') for i in range(n_rounds)]
# HACK for cluster stalled on the final group: redo only the final group, the preceding groups have completed.
for i in []:
# HACK for parts_per_round bug on line 193: use n_parts as the top end of the range.
part_range = range(i*parts_per_round, n_parts)
for i in range(n_rounds):
part_range = range(i*parts_per_round, min((i+1)*parts_per_round, n_parts))
vds_part = hl.vds.VariantDataset(
vds.reference_data._filter_partitions(part_range),
vds.variant_data._filter_partitions(part_range),
Expand Down

0 comments on commit 4810131

Please sign in to comment.