From ee3d90d95d9b5b31ca7d5122ebad84d643165ce8 Mon Sep 17 00:00:00 2001 From: Edwin Sutanto Date: Wed, 25 Dec 2019 19:31:12 +0700 Subject: [PATCH 1/2] Change FTP downloader to wget wget can recognize the FTP proxy given. If there are any failures when downloading, the program will warn the users later in the ungzipping part. --- scripts/rsync_from_ncbi.pl | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/scripts/rsync_from_ncbi.pl b/scripts/rsync_from_ncbi.pl index ffacce5..0ce621d 100755 --- a/scripts/rsync_from_ncbi.pl +++ b/scripts/rsync_from_ncbi.pl @@ -80,30 +80,7 @@ if ($use_ftp) { print STDERR "Step 1/2: Performing ftp file transfer of requested files\n"; - my $ftp = Net::FTP->new($SERVER, Passive => 1) - or die "$PROG: FTP connection error: $@\n"; - $ftp->login($FTP_USER, $FTP_PASS) - or die "$PROG: FTP login error: " . $ftp->message() . "\n"; - $ftp->binary() - or die "$PROG: FTP binary mode error: " . $ftp->message() . "\n"; - $ftp->cwd($SERVER_PATH) - or die "$PROG: FTP CD error: " . $ftp->message() . "\n"; - open MANIFEST, "<", "manifest.txt" - or die "$PROG: can't open manifest: $!\n"; - mkdir "all" or die "$PROG: can't create 'all' directory: $!\n"; - chdir "all" or die "$PROG: can't chdir into 'all' directory: $!\n"; - while () { - chomp; - $ftp->get($_) - or do { - my $msg = $ftp->message(); - if ($msg !~ /: No such file or directory$/) { - warn "$PROG: unable to download $_: $msg\n"; - } - }; - } - close MANIFEST; - chdir ".." or die "$PROG: can't return to correct directory: $!\n"; + system("sed 's|^|ftp://${SERVER}${SERVER_PATH}/|' < manifest.txt | xargs -P 8 wget -q --backups=1 -t 2 --ftp-user $FTP_USER --ftp-password $FTP_PASS -P all"); } else { print STDERR "Step 1/2: Performing rsync file transfer of requested files\n"; From 9973dd263bcb788c1ea586f542ae7f6823ee1ba0 Mon Sep 17 00:00:00 2001 From: Edwin Sutanto Date: Thu, 26 Dec 2019 08:40:09 +0700 Subject: [PATCH 2/2] Use HTTPS instead of FTP, less likely to drop connections --- scripts/download_genomic_library.sh | 2 +- scripts/download_taxonomy.sh | 2 +- scripts/rsync_from_ncbi.pl | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/download_genomic_library.sh b/scripts/download_genomic_library.sh index c890d4e..d99c07b 100755 --- a/scripts/download_genomic_library.sh +++ b/scripts/download_genomic_library.sh @@ -14,7 +14,7 @@ set -e # Stop on error LIBRARY_DIR="$KRAKEN2_DB_NAME/library" NCBI_SERVER="ftp.ncbi.nlm.nih.gov" -FTP_SERVER="ftp://$NCBI_SERVER" +FTP_SERVER="https://$NCBI_SERVER" RSYNC_SERVER="rsync://$NCBI_SERVER" THIS_DIR=$PWD diff --git a/scripts/download_taxonomy.sh b/scripts/download_taxonomy.sh index 6fbae7b..f2f42fd 100755 --- a/scripts/download_taxonomy.sh +++ b/scripts/download_taxonomy.sh @@ -13,7 +13,7 @@ set -e # Stop on error TAXONOMY_DIR="$KRAKEN2_DB_NAME/taxonomy" NCBI_SERVER="ftp.ncbi.nlm.nih.gov" RSYNC_SERVER="rsync://$NCBI_SERVER" -FTP_SERVER="ftp://$NCBI_SERVER" +FTP_SERVER="https://$NCBI_SERVER" mkdir -p "$TAXONOMY_DIR" cd "$TAXONOMY_DIR" diff --git a/scripts/rsync_from_ncbi.pl b/scripts/rsync_from_ncbi.pl index 0ce621d..d418697 100755 --- a/scripts/rsync_from_ncbi.pl +++ b/scripts/rsync_from_ncbi.pl @@ -80,7 +80,7 @@ if ($use_ftp) { print STDERR "Step 1/2: Performing ftp file transfer of requested files\n"; - system("sed 's|^|ftp://${SERVER}${SERVER_PATH}/|' < manifest.txt | xargs -P 8 wget -q --backups=1 -t 2 --ftp-user $FTP_USER --ftp-password $FTP_PASS -P all"); + system("sed 's|^|https://${SERVER}${SERVER_PATH}/|' < manifest.txt | xargs -n 20 -P 8 wget -q --backups=1 -t 2 --ftp-user $FTP_USER --ftp-password $FTP_PASS -P all"); } else { print STDERR "Step 1/2: Performing rsync file transfer of requested files\n";