-
Notifications
You must be signed in to change notification settings - Fork 400
/
tzparse.py
executable file
·186 lines (147 loc) · 7.03 KB
/
tzparse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
# Copyright The IETF Trust 2012-2019, All Rights Reserved
#!/usr/bin/env python
"""
NAME
tzparse
SYNOPSIS
>>> tzparse("2008-09-08 14:40:35 +0200", "%Y-%m-%d %H:%M:%S %Z")
datetime.datetime(2008, 9, 8, 14, 40, 35, tzinfo=pytz.FixedOffset(120))
>>> print(tzparse("14:40:35 CEST, 08 Sep 2008", "%H:%M:%S %Z, %d %b %Y"))
2008-09-08 14:40:35+02:00
DESCRIPTION
This describes the python 'tzparse' module. It exports only one function: tzparse().
tzparse() parses a string according to a specified format, exactly as time.strptime()
does, but with the added capability to parse most common timezone specifications,
such as 'UTC', the standard timezones ('NST', 'EST', 'CST', 'MST', 'PST', 'HNY'
[North America], 'WET', 'CET', 'EET', 'MSK' [Europe], and more), the summer timezones
('CEST', 'EEST', 'EDT', PDT' etc.), military timezones ('A' .. 'Z') and numeric
timezone indications ('+0200', '-0700', '-03:30' etc.).
The time zone specification may be placed anywhere, not only at the end.
tzparse() calls time.strptime() to parse everything except the timezone. To parse
the timezone, it first tries to use the pytz module, but if that doesn't give
any joy, it falls back to a hardcoded list of common time zone abbreviations and
their offset from UTC.
BUGS
* tzparse() cannot parse all valid RFC 3339 formats: it doesn't extract
fractional seconds, and the underlying time.strptime() doesn't parse fractional
seconds.
* Parsing according to format specifications using the generic %c, %x and %X
specifiers will only succeed if there are explicit delimiting characters
between the %Z specifier and the %c, %x or %X part.
COPYRIGHT
Copyright 2009 Henrik Levkowetz
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import re
import time
from datetime import datetime as Datetime, timedelta as Timedelta # For re-export. pyflakes:ignore
import pytz
tzdef = {
"A": "+0100", "ACDT": "+1030", "ACST": "+0930", "ADT": "-0300",
"AEDT": "+1100", "AEST": "+1000", "AKDT": "-0800", "AKST": "-0900",
"AST": "-0400", "AWDT": "+0900", "AWST": "+0800", "B": "+0200",
"BST": "+0100", "C": "+0300", "CDT": "+1030", "CDT": "-0500",
"CEDT": "+0200", "CEST": "+0200", "CET": "+0100", "CST": "+1030",
"CST": "+0930", "CST": "-0600", "CXT": "+0700", "D": "+0400",
"E": "+0500", "EDT": "+1100", "EDT": "-0400", "EEDT": "+0300",
"EEST": "+0300", "EET": "+0200", "EST": "+1100", "EST": "+1000",
"EST": "-0500", "F": "+0600", "G": "+0700", "GMT": "+0000",
"H": "+0800", "HAA": "-0300", "HAC": "-0500", "HADT": "-0900",
"HAE": "-0400", "HAP": "-0700", "HAR": "-0600", "HAST": "-1000",
"HAT": "-0230", "HAY": "-0800", "HNA": "-0400", "HNC": "-0600",
"HNE": "-0500", "HNP": "-0800", "HNR": "-0700", "HNT": "-0330",
"HNY": "-0900", "I": "+0900", "IST": "+0100", "K": "+1000",
"L": "+1100", "M": "+1200", "MDT": "-0600", "MESZ": "+0200",
"MEZ": "+0100", "MSD": "+0400", "MSK": "+0300", "MST": "-0700",
"N": "-0100", "NDT": "-0230", "NFT": "+1130", "NST": "-0330",
"O": "-0200", "P": "-0300", "PDT": "-0700", "PST": "-0800",
"Q": "-0400", "R": "-0500", "S": "-0600", "T": "-0700",
"U": "-0800", "UTC": "+0000", "V": "-0900", "W": "-1000",
"WDT": "+0900", "WEDT": "+0100", "WEST": "+0100", "WET": "+0000",
"WST": "+0900", "WST": "+0800", "X": "-1100", "Y": "-1200",
"Z": "+0000",
}
def tzparse(string, format):
# It's surprising that there's no tz parsing capability in the python standard
# library...
"""
Given a time specification string and a format, tzparse() returns a localized
datetime.datetime.
>>> print(tzparse("9 Oct 2009 CEST 13:58", "%d %b %Y %Z %H:%M"))
2009-10-09 13:58:00+02:00
>>> print(tzparse("9 Oct 2009 13:58:00 Europe/Stockholm", "%d %b %Y %H:%M:%S %Z"))
2009-10-09 13:58:00+02:00
>>> print(tzparse("9 Oct 2009 13:58:00 +0200", "%d %b %Y %H:%M:%S %Z"))
2009-10-09 13:58:00+02:00
>>> print(tzparse("Fri, 9 Oct 2009 13:58:00 +0200", "%a, %d %b %Y %H:%M:%S %Z"))
2009-10-09 13:58:00+02:00
>>> print(tzparse("2009-10-09 13:58:00 EST", '%Y-%m-%d %H:%M:%S %Z'))
2009-10-09 13:58:00-05:00
>>> print(tzparse("2009-10-09 13:58:00+02:00", "%Y-%m-%d %H:%M:%S%Z"))
2009-10-09 13:58:00+02:00
>>> print(tzparse("1985-04-12T23:20:50Z", "%Y-%m-%dT%H:%M:%S%Z"))
1985-04-12 23:20:50+00:00
>>> print(tzparse("1996-12-19T16:39:57-08:00", "%Y-%m-%dT%H:%M:%S%Z"))
1996-12-19 16:39:57-08:00
>>> print(tzparse("1996-12-19T16:39:57", "%Y-%m-%dT%H:%M:%S"))
1996-12-19 16:39:57+01:00
"""
if not "%Z" in format:
timetuple = time.strptime(string, format)
tzstr = time.tzname[0]
else:
# extract the %Z part from the format and build a pattern to extract it
# from the string, too.
def fmt2pat(s):
s = re.sub("%[dHIjmMSUwWyY]", r"\\d+", s)
s = re.sub("%[aAbBp]", r"\\w+", s)
s = re.sub("%[cxX]", ".+", s)
s = s.replace("%%", "%")
return s
frontfmt, backfmt = format.split("%Z")
frontpat = "^" + fmt2pat(frontfmt)
backpat = fmt2pat(backfmt) + "$"
frontstr = re.search(frontpat, string) and re.search(frontpat, string).group(0) or ""
backstr = re.search(backpat, string) and re.search(backpat, string).group(0) or ""
tzstr = string.replace(frontstr, "").replace(backstr, "") # This will fail is backstr occurs twice
timetuple = time.strptime(frontstr+backstr, frontfmt+backfmt)
dt = Datetime(*timetuple[:6])
if not tzstr:
tzstr = time.tzname[0]
#raise ValueError("No timezone string found in '%s', but format contained %Z: '%s'."%(string, format))
try:
tz = pytz.timezone(tzstr)
except KeyError:
if tzstr in tzdef:
# if we know the offset of the abbreviation, fall back to that
tzstr = tzdef[tzstr]
if re.search("^[+-][0-9][0-9]:?[0-9][0-9]$", tzstr):
if ":" in tzstr:
tzstr = tzstr[:3]+tzstr[4:]
# convert numeric timezone to minutes
sign = tzstr[0]
h = int(tzstr[1:3])
m = h*60 + int(tzstr[3:5])
if sign == "-":
m = -m
tz = pytz.FixedOffset(m)
else:
raise ValueError("Unknown timezone '%s'" % tzstr)
dt = tz.localize(dt)
return dt
if __name__ == "__main__":
import sys
if len(sys.argv[1:]) == 2:
print(tzparse(sys.argv[1], sys.argv[2]))
else:
print("Running module tests:\n")
import doctest
print(doctest.testmod())