Skip to content

Commit

Permalink
Fixes download_reference
Browse files Browse the repository at this point in the history
jzieve committed Apr 4, 2020
1 parent fc4f38c commit 2a369c0
Showing 1 changed file with 15 additions and 7 deletions.
22 changes: 15 additions & 7 deletions scripts/download_reference.sh
Original file line number Diff line number Diff line change
@@ -18,10 +18,13 @@ fi
output_dir=`dirname ${output_file}`
genome_build=${2:-"37"}

# Mitochondrial refseq moved to a different folder, hence the "mt_remote"
if [ ${genome_build} = "37" ]; then
remote="ftp://ftp.ncbi.nlm.nih.gov/genomes/Homo_sapiens/ARCHIVE/BUILD.37.3/Assembled_chromosomes/seq"
remote="ftp://ftp.ncbi.nlm.nih.gov/genomes/refseq/vertebrate_mammalian/Homo_sapiens/all_assembly_versions/GCF_000001405.25_GRCh37.p13/GCF_000001405.25_GRCh37.p13_assembly_structure/Primary_Assembly/assembled_chromosomes/FASTA/"
mt_remote="ftp://ftp.ncbi.nlm.nih.gov/genomes/refseq/vertebrate_mammalian/Homo_sapiens/all_assembly_versions/GCF_000001405.25_GRCh37.p13/GCF_000001405.25_GRCh37.p13_assembly_structure/non-nuclear/assembled_chromosomes/FASTA/"
elif [ ${genome_build} = "38" ]; then
remote="ftp://ftp.ncbi.nlm.nih.gov/genomes/Homo_sapiens/ARCHIVE/ANNOTATION_RELEASE.109/Assembled_chromosomes/seq/"
remote="ftp://ftp.ncbi.nlm.nih.gov/genomes/refseq/vertebrate_mammalian/Homo_sapiens/all_assembly_versions/GCF_000001405.39_GRCh38.p13/GCF_000001405.39_GRCh38.p13_assembly_structure/Primary_Assembly/assembled_chromosomes/FASTA/"
mt_remote="ftp://ftp.ncbi.nlm.nih.gov/genomes/refseq/vertebrate_mammalian/Homo_sapiens/all_assembly_versions/GCF_000001405.39_GRCh38.p13/GCF_000001405.39_GRCh38.p13_assembly_structure/non-nuclear/assembled_chromosomes/FASTA/"
else
echo "Error: Unsupported genome build ${genome_build}, valid values are 37,38"
exit 1
@@ -30,16 +33,21 @@ fi
temp_dir=`mktemp -d 2>/dev/null || mktemp -d -t ${output_dir}`
pushd ${temp_dir}

for chrom in `seq 1 22` X Y MT
for chrom in `seq 1 22` X Y
do
wget ${remote}/*_ref_*chr${chrom}.fa.gz
wget ${remote}/chr${chrom}.fna.gz
done
wget ${mt_remote}/chrMT.fna.gz

for chrom in `seq 1 22` X Y MT
build_fa(){
echo ">${1}" >> ${2}
gunzip -c chr${1}.fna.gz | grep -v ">" >> "${2}"
}
for chrom in `seq 1 22` X Y
do
echo ">${chrom}" >> ${output_file}
gunzip -c *_ref_*chr${chrom}.fa.gz | grep -v ">" >> "${output_file}"
build_fa $chrom $output_file
done
build_fa "MT" $output_file

if hash samtools 2>/dev/null; then
samtools faidx ${output_file}

0 comments on commit 2a369c0

Please sign in to comment.