From 014f69fea1424f08e5878a7c1dfb7ca7a118bbf3 Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Thu, 10 Aug 2023 14:51:25 -0400
Subject: [PATCH 1/3] bugfix output location of fasta in download_annotations;
 pass NCBI API key to tasks: download_fasta, download_annotations

bugfix output location of fasta in download_annotations; pass NCBI API key to tasks: download_fasta, download_annotations
---
 pipes/WDL/tasks/tasks_ncbi.wdl | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/pipes/WDL/tasks/tasks_ncbi.wdl b/pipes/WDL/tasks/tasks_ncbi.wdl
index bcd46bc5c..ba7f9c25f 100644
--- a/pipes/WDL/tasks/tasks_ncbi.wdl
+++ b/pipes/WDL/tasks/tasks_ncbi.wdl
@@ -5,17 +5,25 @@ task download_fasta {
     String         out_prefix
     Array[String]+ accessions
     String         emailAddress
+    String         apiKeyNCBI
 
     String         docker = "quay.io/broadinstitute/viral-phylo:2.1.20.2"
   }
 
+  parameter_meta {
+    out_prefix:   { description: "basename of the output fasta file. Will contain multiple sequences if multiple accessions are specified" }
+    accessions:   { description: "accessions of sequences to download" }
+    apiKeyNCBI:   { description: "NCBI API key for more frequent requests; see: https://support.nlm.nih.gov/knowledgebase/article/KA-05317/en-us" }
+  }
+
   command {
     ncbi.py --version | tee VERSION
     ncbi.py fetch_fastas \
+        --combinedFilePrefix ${out_prefix} \
+        ~{'--api_key ' + apiKeyNCBI} \
         ${emailAddress} \
         . \
-        ${sep=' ' accessions} \
-        --combinedFilePrefix ${out_prefix} \
+        ${sep=' ' accessions}
   }
 
   output {
@@ -36,31 +44,42 @@ task download_annotations {
   input {
     Array[String]+ accessions
     String         emailAddress
+    String         apiKey
     String         combined_out_prefix
 
     String         docker = "quay.io/broadinstitute/viral-phylo:2.1.20.2"
   }
 
+  parameter_meta {
+    combined_out_prefix: { description: "basename of the output fasta file. Will contain multiple sequences if multiple accessions are specified" }
+    accessions:          { description: "accessions for which sequences and feature tables will be downloaded" }
+    apiKeyNCBI:          { description: "NCBI API key for more frequent requests; see: https://support.nlm.nih.gov/knowledgebase/article/KA-05317/en-us" }
+  }
+
   command <<<
     set -ex -o pipefail
     ncbi.py --version | tee VERSION
     ncbi.py fetch_feature_tables \
+        ~{'--api_key ' + apiKeyNCBI} \
         ~{emailAddress} \
         ./ \
         ~{sep=' ' accessions} \
         --loglevel DEBUG
     mkdir -p combined
+    pushd combined
     ncbi.py fetch_fastas \
+        --combinedFilePrefix "~{combined_out_prefix}" \
+        ~{'--api_key ' + apiKeyNCBI} \
+        --forceOverwrite \
         ~{emailAddress} \
         ./ \
         ~{sep=' ' accessions} \
-        --combinedFilePrefix "combined/~{combined_out_prefix}" \
-        --forceOverwrite \
         --loglevel DEBUG
+    popd
   >>>
 
   output {
-    File        combined_fasta   = "~{combined_out_prefix}.fasta"
+    File        combined_fasta   = "combined/~{combined_out_prefix}.fasta"
     Array[File] genomes_fasta    = glob("*.fasta")
     Array[File] features_tbl     = glob("*.tbl")
     String      viralngs_version = read_string("VERSION")

From 81c17a5772890c3b00819d5b512463dc8012a362 Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Thu, 10 Aug 2023 14:56:17 -0400
Subject: [PATCH 2/3] write combined fasta as temp file, then move to final
 output path in subdir

write combined fasta as temp file, then move to final output path in subdir to avoid potential collision in the vase where the output prefix matches one of the accessions
---
 pipes/WDL/tasks/tasks_ncbi.wdl | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/pipes/WDL/tasks/tasks_ncbi.wdl b/pipes/WDL/tasks/tasks_ncbi.wdl
index ba7f9c25f..66fbf7770 100644
--- a/pipes/WDL/tasks/tasks_ncbi.wdl
+++ b/pipes/WDL/tasks/tasks_ncbi.wdl
@@ -18,16 +18,18 @@ task download_fasta {
 
   command {
     ncbi.py --version | tee VERSION
+    mkdir -p combined
     ncbi.py fetch_fastas \
-        --combinedFilePrefix ${out_prefix} \
+        --combinedFilePrefix "tmp.${out_prefix}" \
         ~{'--api_key ' + apiKeyNCBI} \
         ${emailAddress} \
         . \
         ${sep=' ' accessions}
+    mv "tmp.${out_prefix}.fasta" "combined/${out_prefix}.fasta"
   }
 
   output {
-    File   sequences_fasta  = "${out_prefix}.fasta"
+    File   sequences_fasta  = "combined/${out_prefix}.fasta"
     String viralngs_version = read_string("VERSION")
   }
 
@@ -66,16 +68,15 @@ task download_annotations {
         ~{sep=' ' accessions} \
         --loglevel DEBUG
     mkdir -p combined
-    pushd combined
     ncbi.py fetch_fastas \
-        --combinedFilePrefix "~{combined_out_prefix}" \
+        --combinedFilePrefix "temp.~{combined_out_prefix}" \
         ~{'--api_key ' + apiKeyNCBI} \
         --forceOverwrite \
         ~{emailAddress} \
         ./ \
         ~{sep=' ' accessions} \
         --loglevel DEBUG
-    popd
+    mv "temp.~{combined_out_prefix}.fasta" "combined/~{combined_out_prefix}.fasta"
   >>>
 
   output {

From fe0e8601ebf11fde7ab70b35e34f4dbcc949988c Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Thu, 10 Aug 2023 16:39:56 -0400
Subject: [PATCH 3/3] s/apiKey/apiKeyNCBI/

---
 pipes/WDL/tasks/tasks_ncbi.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipes/WDL/tasks/tasks_ncbi.wdl b/pipes/WDL/tasks/tasks_ncbi.wdl
index 66fbf7770..c192f60de 100644
--- a/pipes/WDL/tasks/tasks_ncbi.wdl
+++ b/pipes/WDL/tasks/tasks_ncbi.wdl
@@ -46,7 +46,7 @@ task download_annotations {
   input {
     Array[String]+ accessions
     String         emailAddress
-    String         apiKey
+    String         apiKeyNCBI
     String         combined_out_prefix
 
     String         docker = "quay.io/broadinstitute/viral-phylo:2.1.20.2"