Skip to content

Commit

Permalink
BuildIndices tweaks from a test run
Browse files Browse the repository at this point in the history
  • Loading branch information
mr-c committed Apr 17, 2024
1 parent 5525624 commit 1ae25d8
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 17 deletions.
17 changes: 9 additions & 8 deletions wdl2cwl/tests/cwl_files/BuildIndices.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ steps:
- entryname: script.bash
entry: |4

set -eo pipefail
set -exo pipefail

## download fasta
wget $("ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_" + inputs.organism + "/release_" + inputs.gtf_version)/$("GRC" + inputs.organism_prefix + "38.primary_assembly.genome.fa").gz
Expand Down Expand Up @@ -230,7 +230,7 @@ steps:
- entryname: script.bash
entry: |4+

set -eo pipefail
set -exo pipefail


# index the fasta file
Expand Down Expand Up @@ -305,7 +305,7 @@ steps:
- entryname: script.bash
entry: |4

set -eo pipefail
set -exo pipefail

mkdir star
STAR --runMode genomeGenerate \
Expand Down Expand Up @@ -394,9 +394,9 @@ steps:
- entryname: script.bash
entry: |4

set -eo pipefail
set -exo pipefail

/script/modify_gtf_$(inputs.organism).sh $(inputs.references.genome_fa.path) $(inputs.references.annotation_gtf.path)
bash -ex /script/modify_gtf_$(inputs.organism).sh $(inputs.references.genome_fa.path) $(inputs.references.annotation_gtf.path)

mkdir star
STAR --runMode genomeGenerate \
Expand Down Expand Up @@ -629,24 +629,25 @@ steps:
entry: |4+


set -ex
HISAT2_DIR=/opt/tools/hisat2-2.1.0

# Compressed fasta required here
gzip $(inputs.references.genome_fa.path)
gzip $(inputs.references.genome_fa.path) -c > $(inputs.references.genome_fa.basename).gz

# download snp file
wget http://hgdownload.cse.ucsc.edu/goldenPath/$(inputs.genome_short_string)/database/$("snp" + inputs.dbsnp_version + "Common.txt").gz
gunzip $("snp" + inputs.dbsnp_version + "Common.txt").gz

# extract snps, splice sites, and exon information
$HISAT2_DIR/hisat2_extract_snps_UCSC.py $(inputs.references.genome_fa.path).gz $("snp" + inputs.dbsnp_version + "Common.txt") genome
$HISAT2_DIR/hisat2_extract_snps_haplotypes_UCSC.py $(inputs.references.genome_fa.basename).gz $("snp" + inputs.dbsnp_version + "Common.txt") genome
$HISAT2_DIR/hisat2_extract_splice_sites.py $(inputs.references.annotation_gtf.path) > genome.ss
$HISAT2_DIR/hisat2_extract_exons.py $(inputs.references.annotation_gtf.path) > genome.exon

# build the hisat2 reference
$HISAT2_DIR/hisat2-build \
-p 8 \
genome.fa \
$(inputs.references.genome_fa.path) \
--snp genome.snp \
--haplotype genome.haplotype \
--ss genome.ss \
Expand Down
2 changes: 1 addition & 1 deletion wdl2cwl/tests/inputs/BuildIndices_mouse.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ gtf_version: "M21"
organism: "mouse"
organism_prefix: "m"
genome_short_string: "mm10"
dbsnp_version: "150"
dbsnp_version: "142"
17 changes: 9 additions & 8 deletions wdl2cwl/tests/wdl_files/BuildIndices.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ task GetReferences {
String annotation_gtf = "gencode.v~{gtf_version}.primary_assembly.annotation.gtf"

command <<<
set -eo pipefail
set -exo pipefail

## download fasta
wget ~{ftp_path}/~{genome_fa}.gz
Expand Down Expand Up @@ -62,7 +62,7 @@ task BuildStar {
String star_index_name = "~{ref_name}.tar"

command <<<
set -eo pipefail
set -exo pipefail

mkdir star
STAR --runMode genomeGenerate \
Expand Down Expand Up @@ -105,9 +105,9 @@ task BuildStarSingleNucleus {
String annotation_gtf_introns = "introns_modified_gencode.v~{gtf_version}.primary_assembly.annotation.gtf"

command <<<
set -eo pipefail
set -exo pipefail

/script/modify_gtf_~{organism}.sh ~{references.genome_fa} ~{references.annotation_gtf}
bash -ex /script/modify_gtf_~{organism}.sh ~{references.genome_fa} ~{references.annotation_gtf}

mkdir star
STAR --runMode genomeGenerate \
Expand Down Expand Up @@ -254,24 +254,25 @@ task BuildHisat2SnpHaplotypeSplicing {

command <<<

set -ex
HISAT2_DIR=/opt/tools/hisat2-2.1.0

# Compressed fasta required here
gzip ~{references.genome_fa}
gzip ~{references.genome_fa} -c > ~{basename(references.genome_fa)}.gz

# download snp file
wget http://hgdownload.cse.ucsc.edu/goldenPath/~{genome_short_string}/database/~{snp_file}.gz
gunzip ~{snp_file}.gz

# extract snps, splice sites, and exon information
$HISAT2_DIR/hisat2_extract_snps_UCSC.py ~{references.genome_fa}.gz ~{snp_file} genome
$HISAT2_DIR/hisat2_extract_snps_haplotypes_UCSC.py ~{basename(references.genome_fa)}.gz ~{snp_file} genome
$HISAT2_DIR/hisat2_extract_splice_sites.py ~{references.annotation_gtf} > genome.ss
$HISAT2_DIR/hisat2_extract_exons.py ~{references.annotation_gtf} > genome.exon

# build the hisat2 reference
$HISAT2_DIR/hisat2-build \
-p 8 \
genome.fa \
~{references.genome_fa} \
--snp genome.snp \
--haplotype genome.haplotype \
--ss genome.ss \
Expand Down Expand Up @@ -331,7 +332,7 @@ task BuildIntervalList {
String interval_list_name = basename(references.annotation_gtf, ".gtf") + ".interval_list"

command <<<
set -eo pipefail
set -exo pipefail


# index the fasta file
Expand Down

0 comments on commit 1ae25d8

Please sign in to comment.