From aa104b9ba5341c7be4fb4f0a78e8d84902e4029a Mon Sep 17 00:00:00 2001 From: Xin Kong <357787252@qq.com> Date: Wed, 15 Jul 2020 20:00:12 +0800 Subject: [PATCH] implement re-download script for double check --- re-download-dataset.sh | 74 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 re-download-dataset.sh diff --git a/re-download-dataset.sh b/re-download-dataset.sh new file mode 100644 index 0000000..7bb12fe --- /dev/null +++ b/re-download-dataset.sh @@ -0,0 +1,74 @@ +#!/bin/bash +# Copyright 2019 David Bishai. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Number of processes to run in parallel. +NUM_PROC=6 + +# Dataset split to download. +# Options: train, test, index. +SPLIT=$1 + +# Inclusive upper limit for file downloads. Should be set according to split: +# train --> 499. +# test --> 19. +# index --> 99. +N=$2 + +download_check_and_extract() { + local i=$1 + images_file_name=images_$1.tar + images_md5_file_name=md5.images_$1.txt + images_tar_url=https://s3.amazonaws.com/google-landmark/$SPLIT/$images_file_name + images_md5_url=https://s3.amazonaws.com/google-landmark/md5sum/$SPLIT/$images_md5_file_name + if [[ "$OSTYPE" == "linux-gnu" ]]; then + images_md5="$(md5sum "$images_file_name")" + elif [[ "$OSTYPE" == "darwin"* ]]; then + images_md5="$(md5 -r "$images_file_name")" + fi + md5_1="$(cut -d' ' -f1<<<"$images_md5")" + md5_2="$(cut -d' ' -f1<< /dev/null + curl -Os $images_md5_url > /dev/null + if [[ "$OSTYPE" == "linux-gnu" ]]; then + images_md5="$(md5sum "$images_file_name")" + elif [[ "$OSTYPE" == "darwin"* ]]; then + images_md5="$(md5 -r "$images_file_name")" + fi + md5_1="$(cut -d' ' -f1<<<"$images_md5")" + md5_2="$(cut -d' ' -f1<<$N?$N:$upper)) + for j in $(seq -f "%03g" $i $limit); do download_check_and_extract "$j" & done + wait +done