-
Notifications
You must be signed in to change notification settings - Fork 4
/
fetch_tz.py
149 lines (104 loc) · 4.28 KB
/
fetch_tz.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import ftplib
from os import path
from os import remove
import sys
from tzdata_files import get_tzdata_files, get_sig_files
from tzdata_files import data_key as dkey
from tzdata_files import sig_key as skey
import yaml
class InvalidSizeError(OSError):
pass
def get_file_size(ftp_conn, fname, old_type='A'):
ftp_conn.sendcmd('TYPE I')
s = ftp_conn.size(fname)
ftp_conn.sendcmd('TYPE ' + old_type)
return s
def download_file(ftp_conn, fpath, fname, exp_size=None):
with open(fpath, 'wb') as cf:
f.retrbinary('RETR ' + fname, cf.write)
if exp_size is not None and path.getsize(fpath) != exp_size:
remove(fpath)
raise InvalidSizeError('Downloaded file size does not match expectation.')
class InvalidStatusError(OSError):
pass
def fail_on_invalid_status(f, retr, status_code):
if not retr.startswith(str(status_code)):
f.quit()
raise InvalidStatusError(retr)
if __name__ == '__main__':
# Load the configuration
from config import load_config
keys = ('rate_limit', 'fetch_errors_out', 'ftp_server',
'dir_loc', 'tzdata_loc', 'iana_sig_loc')
(rate_limit, errors_out, ftp_server,
dir_loc, data_loc, sig_loc) = load_config(keys)
dir_loc = dir_loc.split('/')
# Initialize the connection
f = ftplib.FTP(ftp_server)
retr = f.login()
fail_on_invalid_status(f, retr, '230')
# Browse to the correct directory
for cdir in dir_loc:
retr = f.cwd(cdir)
fail_on_invalid_status(f, retr, '250')
# Get a list of all data files and signatures
flist = f.nlst()
data_files = get_tzdata_files(flist)
data_files = get_sig_files(flist, c_dict=data_files)
# Download any missing files and generate preliminary metadata files
errors = []
tzdata_to_download = []
signatures_to_download = []
# Figure out what's missing
for version, subdict in data_files.items():
dfname = subdict.get(dkey, None)
if dfname is None:
continue
sfname = subdict.get(skey, None)
data_fpath = path.join(data_loc, dfname)
sig_fpath = path.join(sig_loc, sfname) if sfname is not None else None
for fname, fpath, lapp in ((dfname, data_fpath, tzdata_to_download),
(sfname, sig_fpath, signatures_to_download)):
if fname is None:
continue
if not path.exists(fpath):
exp_size = get_file_size(f, fname)
lapp.append((fpath, fname, exp_size))
# Go through and download everything
print("Downloading tzdata files.")
for ii, (fpath, fname, exp_size) in enumerate(tzdata_to_download):
label = dict(fname=fname, ii=ii, total=len(tzdata_to_download))
sys.stdout.write("Downloading file {ii} of {total}: {fname}\r".format(**label))
download_file(f, fpath, fname, exp_size)
sys.stdout.write("\r")
print("Downloading signature files.")
for ii, (fpath, fname, exp_size) in enumerate(signatures_to_download):
label = dict(fname=fname, ii=ii, total=len(tzdata_to_download))
sys.stdout.write("Downloading file {ii} of {total}: {fname}\r".format(**label))
download_file(f, fpath, fname, exp_size)
sys.stdout.write("\r\n")
for version, subdict in data_files.items():
dfname = subdict.get(dkey, None)
if dfname is None:
continue
sfname = subdict.get(skey, None)
data_fpath = path.join(data_loc, dfname)
sig_fpath = path.join(sig_loc, sfname) if sfname is not None else None
valid = False
# If it doesn't exist, download it
for fname, fpath in ((dfname, data_fpath), (sfname, sig_fpath)):
if fname is None:
continue
if not path.exists(fpath):
exp_size = get_file_size(f, fname)
try:
download_file(f, fpath, fname, exp_size)
except InvalidSizeError as e:
errors.append((fname, fpath, e))
break
f.quit()
if len(errors):
with open(errors_out, 'w') as yf:
yaml.dump(errors_out)
print("Errors occurred during the download - " +
" see {err_out} for details".format(err_out=errors_out))