From 9a8822ffd1e24278ccd15b2996a89cad7427da3f Mon Sep 17 00:00:00 2001 From: teodorpopescuQB <83948905+teodorpopescuQB@users.noreply.github.com> Date: Tue, 13 Feb 2024 18:03:55 +0000 Subject: [PATCH] Replace mmCIF rsync FTP download with AWS CLI download from Amazon S3 PDB Snaphots (#566) * Adjust configuration to use PDB100 instead of PDB70 database * revert config json change * Replace rsync ftp mmcif database download with aws cli download from s3 * Adjust code to check for awscli and only download via AWS if variable is set to true * Keep calling rsync after aws download to get latest changes --------- Co-authored-by: Milot Mirdita --- setup_databases.sh | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/setup_databases.sh b/setup_databases.sh index 7c4e8256..64ae1449 100755 --- a/setup_databases.sh +++ b/setup_databases.sh @@ -6,9 +6,13 @@ WORKDIR="${1:-$(pwd)}" PDB_SERVER="${2:-"rsync.wwpdb.org::ftp"}" PDB_PORT="${3:-"33444"}" +# do initial download of the PDB through aws? +# still syncs latest structures through rsync +PDB_AWS_DOWNLOAD="${4:-}" +PDB_AWS_SNAPSHOT="20240101" + UNIREF30DB="uniref30_2302" MMSEQS_NO_INDEX=${MMSEQS_NO_INDEX:-} - cd "${WORKDIR}" hasCommand () { @@ -23,6 +27,12 @@ if [ "$STRATEGY" = "" ]; then fail "No download tool found in PATH. Please install aria2c, curl or wget." fi +if [ -n "${PDB_AWS_DOWNLOAD}" ]; then + if ! hasCommand aws; then + fail "aws command not found in PATH. Please install the aws command line tool." + fi +fi + downloadFile() { URL="$1" OUTPUT="$2" @@ -95,6 +105,10 @@ fi if [ ! -f PDB_MMCIF_READY ]; then mkdir -p pdb/divided mkdir -p pdb/obsolete + if [ -n "${PDB_AWS_DOWNLOAD}" ]; then + aws s3 cp --no-sign-request --recursive s3://pdbsnapshots/${PDB_AWS_SNAPSHOT}/pub/pdb/data/structures/divided/mmCIF/ pdb/divided/ + aws s3 cp --no-sign-request --recursive s3://pdbsnapshots/${PDB_AWS_SNAPSHOT}/pub/pdb/data/structures/obsolete/mmCIF/ pdb/obsolete/ + fi rsync -rlpt -v -z --delete --port=${PDB_PORT} ${PDB_SERVER}/data/structures/divided/mmCIF/ pdb/divided rsync -rlpt -v -z --delete --port=${PDB_PORT} ${PDB_SERVER}/data/structures/obsolete/mmCIF/ pdb/obsolete touch PDB_MMCIF_READY