-
Notifications
You must be signed in to change notification settings - Fork 17
/
_upload_data.py
88 lines (73 loc) · 3.11 KB
/
_upload_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import click
from osfclient.api import OSF
from pathlib import Path
import tarfile
# this script does the same as (from terminal)
# osf -r -p your_password -u your_username upload local_path remote_path
REMOTE_PATH = 'meg' # remote path where to store the data on OSF
# if the data in this path already exists it will be overwritten
PROJECT_CODE_PUBLIC = 't4uf8' # to find your PROJECT_CODE navigate to your OSF
# project on the web. The link will be something of this type:
# https://osf.io/t4uf8/ , here t4uf8 is the PROJECT_CODE
PROJECT_CODE_PRIVATE = 'vw8sh'
@click.command()
@click.option(
"--username", required=True,
help="Your username to the private repository"
)
@click.option(
"--password", required=True,
help="Your password to the private repository"
)
@click.option(
"--local_path", required=True,
help="path where you store all the data"
)
def upload_to_osf(username, password, local_path):
# All the data in the data folder will be:
# 1. split to public and private data directories if not done already
# 2. zipped to tar.gz format
# 3. uploaded to private and public osf repositiories
local_path = Path(local_path)
remote_path = Path(REMOTE_PATH)
if not local_path.is_dir():
raise RuntimeError(f"Expected source ({local_path})"
"to be a directory")
osf = OSF(username=username, password=password)
# ########################################################
# TODO: make the split to public and private data directories
# to have a path:
# local_path
# |---public
# |---private
# all the data in the public directory will be added to the
# public repo, and from private directory to the private repo
#
# here the split has already been done beforehand
# make sure there are private and public subdirs in your data directory
assert (local_path / 'private').is_dir()
assert (local_path / 'public').is_dir()
project_codes = [PROJECT_CODE_PUBLIC, PROJECT_CODE_PRIVATE]
project_types = ['public', 'private']
for project_code, project_type in zip(project_codes, project_types):
print(f'compressing {project_type} data')
used_dir = local_path / project_type
tar_name = local_path / (project_type + '.tar.gz')
# add files from the given dir to your archive
with tarfile.open(tar_name, "w:gz") as tar_handle:
for next_file in used_dir.rglob('*'):
if not next_file.is_file():
continue
print(next_file)
remote_name = next_file.relative_to(used_dir)
tar_handle.add(next_file, arcname=remote_name)
print(f'uploading {project_type} data')
# establish the connection with the correct repo on osf
project = osf.project(project_code)
store = project.storage('osfstorage')
with open(tar_name, 'rb') as fp:
fname = remote_path / (project_type + '.tar.gz')
store.create_file(fname, fp, force=True)
print(f'successfully uploaded {fname} to {REMOTE_PATH}')
if __name__ == "__main__":
upload_to_osf()