-
Notifications
You must be signed in to change notification settings - Fork 32
/
copy_s3_content.sh
executable file
·35 lines (32 loc) · 1.13 KB
/
copy_s3_content.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#!/bin/bash
# this scripts creates a local directory for running FMBench
# without any s3 dependency and copies all relevant files
# for public FMBench content
BASE_DIR=${1:-/tmp}
FMBENCH_READ_DIR=$BASE_DIR/fmbench-read
FMBENCH_WRITE_DIR=$BASE_DIR/fmbench-write
BUCKET=aws-blogs-artifacts-public
mkdir -p $FMBENCH_READ_DIR
mkdir -p $FMBENCH_READ_DIR/tokenizer
mkdir -p $FMBENCH_READ_DIR/llama2_tokenizer
mkdir -p $FMBENCH_READ_DIR/llama3_tokenizer
mkdir -p $FMBENCH_READ_DIR/llama3_1_tokenizer
mkdir -p $FMBENCH_READ_DIR/llama3_2_tokenizer
mkdir -p $FMBENCH_READ_DIR/mistral_tokenizer
wget https://${BUCKET}.s3.amazonaws.com/artifacts/ML-FMBT/manifest.txt -P ${FMBENCH_READ_DIR}/
# First create all directories from manifest
for i in `cat ${FMBENCH_READ_DIR}/manifest.txt`
do
dir_path=`dirname $i`
mkdir -p ${FMBENCH_READ_DIR}/$dir_path
done
# Then download all non-.keep files
for i in `cat ${FMBENCH_READ_DIR}/manifest.txt`
do
# Skip if filename contains ".keep" in it
if echo "$i" | grep -q ".keep"; then
continue
fi
dir_path=`dirname $i`
wget https://${BUCKET}.s3.amazonaws.com/artifacts/ML-FMBT/$i -P ${FMBENCH_READ_DIR}/$dir_path
done