-
Notifications
You must be signed in to change notification settings - Fork 31
/
imap_upload.py
executable file
·828 lines (739 loc) · 34 KB
/
imap_upload.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
#!/usr/bin/python3
# coding=utf-8
import codecs
import email
import email.header
import getpass
import imaplib
import locale
import mailbox
import math
import optparse
import re
import socket
import sys
import time
import unicodedata
import urllib.request, urllib.parse, urllib.error
import os
import traceback
import io
import csv
from optparse import OptionParser
from urllib.parse import urlparse
from imapclient import imap_utf7
__version__ = "2.0.0"
if sys.version_info < (3, 5):
print("IMAP Upload requires Python 3.5 or later.")
sys.exit(1)
# Avoid "KeyError: 'content-transfer-encoding'" error.
# Inspired from https://github.com/python/cpython/issues/71508#issuecomment-1093718177
# This workaround might no longer needed when python 3.10 is a minimum version
# That way you could just use msg.as_string()
class ImapUploadMessage(email.message.Message):
@staticmethod
def as_string(self):
# Work around for https://bugs.python.org/issue27321 and
# https://bugs.python.org/issue32330.
try:
value = email.message.Message.as_string(self)
except (KeyError, LookupError, UnicodeEncodeError):
value = email.message.Message.as_bytes(self).decode(
'ascii', 'replace')
# Also ensure no unicode surrogates in the returned string.
return email.utils._sanitize(value)
class MyOptionParser(OptionParser):
def __init__(self):
usage = "usage: python %prog [options] (MBOX|-r MBOX_FOLDER) [DEST]\n"\
" MBOX UNIX style mbox file.\n"\
" MBOX_FOLDER folder containing subfolder trees of mbox files\n"\
" DEST is imap[s]://[USER[:PASSWORD]@]HOST[:PORT][/BOX]\n"\
" DEST has a priority over the options."
self.google_takeout_supported_languages = [ "en", "es", "ca", "de" ]
OptionParser.__init__(self, usage,
version="IMAP Upload " + __version__)
self.add_option("-r", action="store_true",
help="recursively search sub-folders")
self.add_option("--gmail", action="callback", nargs=0,
callback=self.enable_gmail,
help="setup for Gmail. Equivalents to "
"--host=imap.gmail.com --port=993 "
"--ssl --retry=3")
self.add_option("--office365", action="callback", nargs=0,
callback=self.enable_office365,
help="setup for Office365. Equivalents to "
"--host=outlook.office365.com --port=993 "
"--ssl --retry=3")
self.add_option("--fastmail", action="callback", nargs=0,
callback=self.enable_fastmail,
help="setup for Fastmail hosted IMAP. Equivalent to "
"--host=imap.fastmail.com --port=993 "
"--ssl --retry=3")
self.add_option("--email-only-folders", action="store_true",
help="use for servers that do not allow storing emails and subfolders in the same folder"
"only works with -r")
self.add_option("--host",
help="destination hostname [default: %default]")
self.add_option("--port", type="int",
help="destination port number [default: 143, 993 for SSL]")
self.add_option("--ssl", action="store_true",
help="use SSL connection")
self.add_option("--box",
help="destination mail box name [default: %default]")
self.add_option("--user", help="login name [default: empty]")
self.add_option("--password", help="login password")
self.add_option("--retry", type="int", metavar="COUNT",
help="retry COUNT times on connection abort. "
"0 disables [default: %default]")
self.add_option("--error", metavar="ERR_MBOX",
help="append failured messages to the file ERR_MBOX")
self.add_option("--time-fields", metavar="LIST", type="string", nargs=1,
action="callback", callback=self.set_time_fields,
help="try to get delivery time of message from "
"the fields in the LIST. "
'Specify any of "from", "received" and '
'"date" separated with comma in order of '
'priority (e.g. "date,received"). '
'"from" is From_ line of mbox format. '
'"received" is "Received:" field and "date" '
'is "Date:" field in RFC 2822. '
'[default: from,received,date]')
self.add_option("--list_boxes", action="store_true",
help="list all mail boxes in the IMAP server")
self.add_option("--folder-separator", type="string",
help="change folder separator-character default")
self.add_option("--google-takeout", action="store_true",
help="Import Google Takeout using labels as folders.")
self.add_option("--google-takeout-box-as-base-folder", action="store_true",
help="Use given box as base folder.")
self.add_option("--google-takeout-first-label", action="store_true",
help="Only import first label from the email.")
self.add_option("--google-takeout-label-priority", type="string",
help="Priority of labels, if --google-takeout-first-label is used")
self.add_option("--google-takeout-language",
help="[Use specific language. Supported languages: '%s'. " % (" ".join(self.google_takeout_supported_languages)) + "default: %default]" )
self.add_option("--maximum-size-exceeded-are-warnings", action="store_true",
help="Treat 'maximum size exceeded messages' as warnings and not as errors.")
self.add_option("--debug", action="store_true",
help="Debug: Make some error messages more verbose.")
self.add_option("--dry-run", action="store_true",
help="Do not perform IMAP writing actions")
self.set_defaults(host="localhost",
ssl=False,
r=False,
email_only_folders=False,
user="",
password="",
box="INBOX",
retry=0,
error=None,
time_fields=["from", "received", "date"],
folder_separator="/",
google_takeout=False,
google_takeout_box_as_base_folder=False,
google_takeout_first_label=False,
google_takeout_label_priority="",
google_takeout_language="en",
maximum_size_exceeded_are_warnings=False,
debug=False,
dry_run=False,
)
def enable_gmail(self, option, opt_str, value, parser):
parser.values.ssl = True
parser.values.host = "imap.gmail.com"
parser.values.port = 993
parser.values.retry = 3
def enable_office365(self, option, opt_str, value, parser):
parser.values.ssl = True
parser.values.host = "outlook.office365.com"
parser.values.port = 993
parser.values.retry = 3
def enable_fastmail(self, option, opt_str, value, parser):
parser.values.ssl = True
parser.values.host = "imap.fastmail.com"
parser.values.port = 993
parser.values.retry = 3
def set_time_fields(self, option, opt_str, value, parser):
fields = []
if value != "":
fields = value.split(",")
# Assert that list contains only valid fields
if set(fields) - set(["from", "received", "date"]):
self.error("Invalid value '%s' for --time-fields" % value)
self.values.time_fields = fields
def parse_args(self, args):
(options, args) = OptionParser.parse_args(self, args)
if len(args) < 1 and not options.list_boxes:
self.error("Missing MBOX")
if len(args) > 2:
self.error("Extra argument")
if len(args) > 1:
dest = self.parse_dest(args[1])
for (k, v) in dest.__dict__.items():
setattr(options, k, v)
if ((options.google_takeout_box_as_base_folder) and (not (options.google_takeout))):
self.error("--google-takeout-box-as-base-folder needs --google-takeout option")
if ((options.google_takeout_first_label) and (not (options.google_takeout))):
self.error("--google-takeout-first-label needs --google-takeout option")
if ((options.google_takeout_label_priority) and (not (options.google_takeout_first_label))):
self.error("--google-takeout-label-priority needs --google-takeout-first-label option")
if (not (options.google_takeout_language in self.google_takeout_supported_languages)):
self.error("--google-takeout-language: '%s' is not a supported language. Supported languages: '%s'." % (options.google_takeout_language, " ".join(self.google_takeout_supported_languages)))
if options.port is None:
options.port = [143, 993][options.ssl]
if not options.list_boxes:
options.src = args[0]
return options
def parse_dest(self, dest):
try:
dest, ssl = re.subn("^imaps:", "imap:", dest)
dest = urlparse(dest)
options = optparse.Values()
options.ssl = bool(ssl)
options.host = dest.hostname
options.port = [143, 993][options.ssl]
if dest.port:
options.port = dest.port
if dest.username:
options.user = urllib.parse.unquote(dest.username)
if dest.password:
options.password = urllib.parse.unquote(dest.password)
if len(dest.path):
options.box = dest.path[1:] # trim the first `/'
return options
except:
self.error("Invalid DEST")
def error(self, msg):
raise optparse.OptParseError(self.get_usage() + "\n" + msg)
def si_prefix(n, prefixes=("", "k", "M", "G", "T", "P", "E", "Z", "Y"),
block=1024, threshold=1):
"""Get SI prefix and reduced number."""
if (n < block * threshold or len(prefixes) == 1):
return (n, prefixes[0])
return si_prefix(n / block, prefixes[1:])
def decode_header_to_string(header):
"""Decodes an email message header (possibly RFC2047-encoded)
into a string, while working around https://bugs.python.org/issue22833"""
def _decode(value, encoding):
if isinstance(value, str):
return value
if ((not encoding) or (encoding == 'unknown-8bit')):
encoding = 'ascii'
return value.decode(encoding, 'replace')
return "".join(
_decode(bytestr, encoding)
for bytestr, encoding in email.header.decode_header(header))
all_chars = (chr(i) for i in range(sys.maxunicode))
categories = {'Cc', 'Cf', 'Cs', 'Co', 'Cn'}
control_chars = ''.join(c for c in all_chars if unicodedata.category(c) in categories)
# or equivalently and much more efficiently
#control_chars = ''.join(map(chr, itertools.chain(range(0x00,0x20), range(0x7f,0xa0))))
control_char_re = re.compile('[%s]' % re.escape(control_chars))
def remove_control_chars(s):
return control_char_re.sub('', s)
all_chars = (chr(i) for i in range(sys.maxunicode))
categories = {'Cc', 'Cf', 'Cs', 'Co', 'Cn'}
control_chars = ''.join(c for c in all_chars if unicodedata.category(c) in categories)
# or equivalently and much more efficiently
#control_chars = ''.join(map(chr, itertools.chain(range(0x00,0x20), range(0x7f,0xa0))))
control_char_re = re.compile('[%s]' % re.escape(control_chars))
def remove_control_chars(s):
return control_char_re.sub('', s)
class Progress():
"""Store and output progress information."""
def __init__(self, total_count, google_takeout=False, google_takeout_first_label=False,
google_takeout_label_priority=None, google_takeout_language="en"):
self.total_count = total_count
self.ok_count = 0
self.warning_count = 0
self.count = 0
self.format = "%" + str(len(str(total_count))) + "d/" + \
str(total_count) + " %5.1f %-2s %s "
self.google_takeout = google_takeout
self.google_takeout_first_label = google_takeout_first_label
self.google_takeout_label_priority = google_takeout_label_priority
self.google_takeout_language = google_takeout_language
def begin(self, msg):
"""Called when start processing of a new message."""
self.time_began = time.time()
size, prefix = si_prefix(float(len(ImapUploadMessage.as_string(msg))), threshold=0.8)
sbj = decode_header_to_string(msg["subject"] or "")
if self.google_takeout:
if (self.google_takeout_language == "en"):
gmail_inbox_str = r"Inbox"
gmail_sent_str = r"Sent"
gmail_draft_str = "Draft"
gmail_important_str = u'Important'
gmail_open_str = u'Open'
gmail_unseen_str = u"Unread"
gmail_category_str = r"^Category_"
gmail_imap_str = r'^IMAP_'
gmail_trash_str = "Trash"
elif (self.google_takeout_language == "es"):
gmail_inbox_str = r"Recibidos"
gmail_sent_str = r"Enviados"
gmail_draft_str = "Borradores"
gmail_important_str = u'Importante'
gmail_open_str = u'Abierto'
gmail_unseen_str = u"No leídos"
gmail_category_str = r"^Categor.a:"
gmail_imap_str = r'^IMAP_'
gmail_trash_str = "Papelera"
elif (self.google_takeout_language == "ca"):
gmail_inbox_str = r"Safata d'entrada"
gmail_sent_str = r"Enviats"
gmail_draft_str = "Esborranys"
gmail_important_str = u'Importants'
gmail_open_str = u'Oberts'
gmail_unseen_str = u"No llegits"
gmail_category_str = r"^Categor.a"
gmail_imap_str = r'^IMAP_'
gmail_trash_str = "Paperera"
elif (self.google_takeout_language == "de"):
gmail_inbox_str = r"Posteingang"
gmail_sent_str = r"Gesendet"
gmail_draft_str = "Entwürfe"
gmail_important_str = u'Wichtig'
gmail_open_str = u'Geöffnet'
gmail_unseen_str = u"Ungelesen"
gmail_category_str = r"^Kategorie_"
gmail_imap_str = r'^IMAP_'
gmail_trash_str = "Papierkorb"
label = decode_header_to_string(msg["x-gmail-labels"] or "")
sanitized_label = re.sub(r"\n\r", "", label)
sanitized_label = re.sub(r"\r\n", "", sanitized_label)
sanitized_label = re.sub(r"\r", " ", sanitized_label)
sanitized_label = re.sub(r"\n", "", sanitized_label)
label = sanitized_label
label = re.sub(gmail_inbox_str, "INBOX", label)
label = re.sub(gmail_sent_str, "Sent", label)
csv_file = io.StringIO(label)
csv_reader = csv.reader(csv_file, delimiter=',', quotechar='"')
labels = []
for csv_line in csv_reader:
for csv_label in csv_line:
labels.append(csv_label)
labels_without_categories = []
for i in range(len(labels)):
if (not (re.match(gmail_category_str,labels[i]))):
labels_without_categories.append(labels[i])
labels = labels_without_categories
labels_without_special_imap_dirs = []
for i in range(len(labels)):
if (not (re.match(gmail_imap_str,labels[i]))):
labels_without_special_imap_dirs.append(labels[i])
labels = labels_without_special_imap_dirs
sanitized_labels = []
for i in range(len(labels)):
sanitized_label = re.sub(r":", "_", labels[i])
sanitized_labels.append(sanitized_label)
labels = sanitized_labels
if labels.count(gmail_open_str) > 0:
labels.remove(gmail_open_str)
if labels.count(u'INBOX') > 0:
labels.remove(u'INBOX')
flags = []
if labels.count(gmail_unseen_str) > 0:
labels.remove(gmail_unseen_str)
else:
flags.append('\Seen')
if labels.count(gmail_important_str) > 0:
flags.append('\Flagged')
labels.remove(gmail_important_str)
if ((labels.count(gmail_sent_str) > 0) and (len(labels) > 1)):
labels.remove(gmail_sent_str)
if labels.count(gmail_trash_str) > 0:
labels.remove(gmail_trash_str)
labels.append('Trash')
if len(labels):
msg.flags = " ".join(flags)
else:
msg.flags = []
msg.boxes = []
if len(labels) != 0:
if labels.count(gmail_draft_str):
msg.boxes.append(['Drafts'])
else:
if labels.count('Spam'):
msg.boxes.append(['Junk'])
else:
for i in range(len(labels)):
box = re.sub(r"\?", "", labels[i])
msg.boxes.append(box.split("/"))
if len(msg.boxes) == 0:
msg.boxes.append(["INBOX"])
if self.google_takeout_first_label:
only_label = self.get_label_by_prio(msg.boxes)
msg.boxes = []
msg.boxes.append(only_label)
print(self.format % \
(self.count + 1, size, prefix + "B", '{:30.30}'.format(remove_control_chars(sbj))),
"to [%s]" % (",".join(x[0] for x in msg.boxes)), end=' ')
else:
print(self.format % \
(self.count + 1, size, prefix + "B", '{:30.30}'.format(remove_control_chars(sbj))), end=' ')
def get_label_by_prio(self, labels):
labels = [label[0] for label in labels]
for label in self.google_takeout_label_priority:
if label in labels:
return [label]
# prevent using label Archive, if others are available
if labels[0] == "Archived" and len(labels) > 1:
return [labels[1]]
# return fist label if we do not have other hints
return [labels[0]]
def endOk(self):
"""Called when a message was processed successfully."""
self.count += 1
self.ok_count += 1
print("OK (%d sec)" % \
math.ceil(time.time() - self.time_began))
def endError(self, err):
"""Called when an error has occurred while processing a message."""
print("ERROR (%s)" % err)
def endWarning(self, err):
"""Called when a warning has occurred while processing a message."""
self.warning_count += 1
print("WARNING (%s)" % err)
def endAll(self):
"""Called when all message was processed."""
print("Done. (OK: %d, WARNING: %d, ERROR: %d)" % \
(self.ok_count, self.warning_count, (self.total_count - self.ok_count - self.warning_count)))
def upload(imap, box, src, err, time_fields, google_takeout=False, google_takeout_first_label=False,
google_takeout_label_priority=None, google_takeout_box_as_base_folder=False, google_takeout_language="en",
debug=False, maximum_size_exceeded_are_warnings=False):
print("Uploading to {}...".format(box))
print("Counting the mailbox (it could take a while for the large one).")
p = Progress(len(src), google_takeout=google_takeout, google_takeout_first_label=google_takeout_first_label,
google_takeout_label_priority=google_takeout_label_priority,
google_takeout_language=google_takeout_language)
for i, msg in src.iteritems():
maximumMessageSizeWarning = False
try:
p.begin(msg)
if google_takeout:
if google_takeout_box_as_base_folder:
msg_boxes = []
for i in range(len(msg.boxes)):
msg_box = []
msg_box.append(box)
msg_box.extend(msg.boxes[i])
msg_boxes.append(msg_box)
else:
msg_boxes = msg.boxes
for i in range(len(msg_boxes)):
r, r2 = imap.upload(box, msg.get_delivery_time(time_fields),
ImapUploadMessage.as_string(msg), msg.flags, msg_boxes[i], 3)
if r != "OK":
raise Exception(r2[0]) # FIXME: Should use custom class
else:
r, r2 = imap.upload(box, msg.get_delivery_time(time_fields),
ImapUploadMessage.as_string(msg), None, None, 3)
if r != "OK":
raise Exception(r2[0]) # FIXME: Should use custom class
p.endOk()
continue
except socket.error as e:
p.endError("Socket error: " + str(e))
except Exception as e:
maximumMessageSizeWarning = maximum_size_exceeded_are_warnings and re.search(r'maximum message size exceeded', repr(e))
if (maximumMessageSizeWarning):
if debug:
p.endWarning(traceback.format_exc())
else:
p.endWarning(e)
else:
if debug:
p.endError(traceback.format_exc())
else:
p.endError(e)
if ((err is not None) and (not maximumMessageSizeWarning)):
err.add(msg)
p.endAll()
def recursive_upload(imap, box, src, err, time_fields, email_only_folders, separator, debug=False):
usrc = str(src)
if debug: print("Visiting directory %s" % (usrc))
for file in os.listdir(usrc):
path = usrc + os.sep + file
if os.path.isdir(path):
fileName, fileExtension = os.path.splitext(file)
if not box:
subbox = fileName
else:
subbox = box + separator + fileName
recursive_upload(imap, subbox, path, err, time_fields, email_only_folders, separator, debug)
elif file.endswith("mbox"):
print("Found mailbox at {}...".format(path))
mbox = mailbox.mbox(path, create=False)
if (email_only_folders and has_mixed_content(src)):
target_box = box + separator + src.split(os.sep)[-1]
else:
target_box = file.split('.')[0] if (box is None or box == "") else box
if err:
err = mailbox.mbox(err)
upload(imap, target_box, mbox, err, time_fields)
elif file.endswith(".msf"):
print("Found Thunderbird mailbox at {}...".format(path))
mbox = mailbox.mbox(path.replace(".msf",""), create=False)
if (email_only_folders and has_mixed_content(src)):
target_box = box + separator + src.split(os.sep)[-1]
else:
target_box = file.split('.')[0] if (box is None or box == "") else box
if err:
err = mailbox.mbox(err)
upload(imap, target_box, mbox, err, time_fields)
else:
print("Skipping unknown file (no mbox ending): %s" % (file))
def has_mixed_content(src):
dirFound = False
mboxFound = False
for file in os.listdir(src):
path = src + os.sep + file
if (os.path.isdir(path)):
dirFound = True
elif file.endswith("mbox"):
mboxFound = True
return dirFound and mboxFound
def pretty_print_mailboxes(boxes):
for box in boxes:
box = imap_utf7.decode(box)
x = re.search("\(((\\\\[A-Za-z]+\s*)+)\) \"(.*)\" \"?(.*)\"?",box)
if not x:
print("Could not parse: {}".format(box))
continue
raw_name = x.group(4)
sep = x.group(3)
raw_flags = x.group(1)
print("{:40s}{}".format(pretty_mailboxes_name(raw_name, sep), pretty_flags(raw_flags)))
def pretty_mailboxes_name(name, sep):
depth = name.count(sep)
spacer = " "
branch = "+- " if (depth>0) else ""
slash = name.rfind(sep)
clean_name = name if (slash == -1) else name[slash+1:]
return "{0}{1}\"{2}\"".format( spacer*depth, branch, clean_name)
def pretty_flags(raw_flags):
flags = raw_flags.replace("\\HasChildren", "")
flags = flags.replace("\\HasNoChildren", "")
flags = flags.replace("\\", "#")
flags = flags.split()
return "\t".join(flags)
def get_delivery_time(self, fields):
"""Extract delivery time from message.
Try to extract the time data from given fields of message.
The fields is a list and can consist of any of the following:
* "from" From_ line of mbox format.
* "received" The first "Received:" field in RFC 2822.
* "date" "Date:" field in RFC 2822.
Return the current time if the fields is empty or no field
had valid value.
"""
def get_from_time(self):
"""Extract the time from From_ line."""
time_str = self.get_from().split(" ", 1)[1]
t = time_str.replace(",", " ").lower()
t = re.sub(" (sun|mon|tue|wed|thu|fri|sat) ", " ",
" " + t + " ")
if t.find(":") == -1:
t += " 00:00:00"
return t
def get_received_time(self):
"""Extract the time from the first "Received:" field."""
t = self["received"]
t = t.split(";", 1)[1]
t = t.lstrip()
return t
def get_date_time(self):
"""Extract the time from "Date:" field."""
return self["date"]
for field in fields:
try:
t = vars()["get_" + field + "_time"](self)
t = email.utils.parsedate_tz(t)
t = email.utils.mktime_tz(t)
# Do not allow the time before 1970-01-01 because
# some IMAP server (i.e. Gmail) ignore it, and
# some MUA (Outlook Express?) set From_ date to
# 1965-01-01 for all messages.
if t < 0:
continue
return t
except:
pass
# All failed. Return current time.
return time.time()
# Directly attach get_delivery_time() to the mailbox.mboxMessage
# as a method.
# I want to use the factory parameter of mailbox.mbox()
# but it seems not to work in Python 2.5.4.
mailbox.mboxMessage.get_delivery_time = get_delivery_time
class IMAPUploader:
def __init__(self, host, port, ssl, box, user, password, retry, folder_separator, dry_run):
self.imap = None
self.host = host
self.port = port
self.ssl = ssl
self.user = user
self.password = password
self.retry = retry
self.box = box
self.created_directories_cache = []
self.separator = folder_separator
self.dry_run = dry_run
def upload(self, box, delivery_time, message, flags = None, google_takeout_box_path = None, retry = None):
if retry is None:
retry = self.retry
if flags is None:
flags = []
try:
self.open()
if type(message) == str:
message = message.encode('utf-8', 'surrogateescape').decode('utf-8')
message = bytes(message, 'utf-8')
if google_takeout_box_path is not None: # Google Takeout
self.create_folder(google_takeout_box_path)
google_takeout_box = self.separator.join(google_takeout_box_path)
google_takeout_box_imap_command = '"' + google_takeout_box + '"'
return self.imap.append(imap_utf7.encode(google_takeout_box_imap_command), flags, delivery_time, message)
else: # Default behaviour
box_imap_command = '"' + box + '"'
self.imap_create(imap_utf7.encode(box_imap_command))
return self.imap.append(imap_utf7.encode(box_imap_command), flags, delivery_time, message)
except (imaplib.IMAP4.abort, socket.error):
self.close()
if retry == 0:
raise
print("(Reconnect)", end=' ')
time.sleep(5)
return self.upload(box, delivery_time, message, flags, google_takeout_box_path, retry - 1)
def create_folder(self, google_takeout_box_path):
i = 1
while i <= len(google_takeout_box_path):
google_takeout_box = self.separator.join(google_takeout_box_path[0:i])
google_takeout_box_imap_command = '"' + google_takeout_box + '"'
if google_takeout_box != "INBOX":
try:
self.imap_create(imap_utf7.encode(google_takeout_box_imap_command))
except:
print ("Cannot create box %s" % google_takeout_box)
i += 1
def imap_create(self, box):
if box not in self.created_directories_cache:
self.imap.create(box)
self.created_directories_cache.append(box)
def enable_dry_run(self):
def dummy_create(a):
print(f"Called create with {a}")
return True
def dummy_append(a, b, c, d):
print(f"Called append with '{a}'")
return ("OK", "")
self.imap.create = dummy_create
self.imap.append = dummy_append
def open(self):
if self.imap:
return
imap_class = [imaplib.IMAP4, imaplib.IMAP4_SSL][self.ssl]
self.imap = imap_class(self.host, self.port)
if self.dry_run:
self.enable_dry_run()
self.imap.socket().settimeout(60)
self.imap.login(self.user, self.password)
self.created_directories_cache = []
try:
self.imap_create(self.box)
except Exception as e:
print("(create error: )" + str(e))
def close(self):
if not self.imap:
return
self.imap.shutdown()
self.imap = None
def list_boxes(self):
try:
self.open()
status, mailboxes = self.imap.list()
return mailboxes
except (imaplib.IMAP4.abort, socket.error):
self.close()
def main(args=None):
try:
# Setup locale
# Set LC_TIME to "C" so that imaplib.Time2Internaldate()
# uses English month name.
locale.setlocale(locale.LC_ALL, "")
locale.setlocale(locale.LC_TIME, "C")
# Encoding of the sys.stderr
enc = locale.getlocale()[1] or "utf_8"
sys.stderr = codecs.lookup(enc)[-1](sys.stderr, errors="ignore")
# Parse arguments
if args is None:
args = sys.argv[1:]
parser = MyOptionParser()
options = parser.parse_args(args)
if len(str(options.user)) == 0:
print("User name: ", end=' ', flush=True)
options.user = sys.stdin.readline().rstrip("\n")
if len(str(options.password)) == 0:
options.password = getpass.getpass()
options = options.__dict__
list_boxes = options.pop("list_boxes")
err = options.pop("error")
time_fields = options.pop("time_fields")
recurse = options.pop("r")
email_only_folders = options.pop("email_only_folders")
separator = options["folder_separator"]
google_takeout = options.pop("google_takeout")
maximum_size_exceeded_are_warnings = options.pop("maximum_size_exceeded_are_warnings")
google_takeout_box_as_base_folder = options.pop("google_takeout_box_as_base_folder")
google_takeout_first_label = options.pop("google_takeout_first_label")
google_takeout_label_priority = options.pop("google_takeout_label_priority").split(",")
google_takeout_language = options.pop("google_takeout_language")
debug = options.pop("debug")
# Connect to the server and login
print("Connecting to %s:%s." % (options["host"], options["port"]))
if (list_boxes):
print("Just list mail boxes!")
uploader = IMAPUploader(**options)
uploader.open()
if debug: print("Connection successful")
pretty_print_mailboxes(uploader.list_boxes())
else:
src = options.pop("src")
uploader = IMAPUploader(**options)
uploader.open()
if debug: print("Connection successful")
if(not recurse):
# Prepare source and error mbox
src = mailbox.mbox(src, create=False)
if err:
err = mailbox.mbox(err)
upload(uploader, options["box"], src, err, time_fields, google_takeout, google_takeout_first_label,
google_takeout_label_priority, google_takeout_box_as_base_folder, google_takeout_language, debug, maximum_size_exceeded_are_warnings)
else:
recursive_upload(uploader, "", src, err, time_fields, email_only_folders, separator, debug)
return 0
except optparse.OptParseError as e:
print(e)
return 2
except mailbox.NoSuchMailboxError as e:
print("No such mailbox:", e)
return 1
except socket.timeout as e:
print("Timed out")
return 1
except imaplib.IMAP4.error as e:
print("IMAP4 error:", e)
return 1
except KeyboardInterrupt as e:
print("Interrupted")
return 130
except Exception as e:
exc_type, exc_obj, exc_tb = sys.exc_info()
print("An unknown error has occurred [{}]: {}".format(exc_tb.tb_lineno), e)
return 1
if __name__ == "__main__":
print("IMAP Upload (v{})".format(__version__))
result = main()
sys.stdout.flush()
sys.exit(result)