forked from xot/ssst
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ssst-comment
executable file
·284 lines (247 loc) · 8.9 KB
/
ssst-comment
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
#!/usr/local/bin/python3
#
# SSST-COMMENT
#
# Add a new comment to a blogpost
#
# Part of SSST:
# A simple static site tool to maintain websites based on markdown and pandoc
#
# Author: Jaap-henk Hoepman ([email protected])
import argparse, sys, os, re, email, datetime
from pathlib import Path
from email.policy import SMTP
# Path to the root of the source files tree
src_root = Path('./src')
#---
# WARNINGS AND ERRORS
#---
# logfile
logfile = None
# Abort after warning
pedantic = False
# Verbosity
loglevel = -1
# Outpur the error message, and log it when required. Then exit
# - msg: the error message
def error (msg: str):
m = 'ssst-comment: error: %s\n' % str(msg)
sys.stderr.write(m)
if logfile != None:
logfile.write(m)
sys.exit (1)
# Output the warning message, and write it to the log file if it was specified.
# Exit when pedantic processing was specified.
# - msg: the warning message
def warning (msg: str):
m = 'ssst-comment: warning: %s\n' % str(msg)
sys.stderr.write(m)
if logfile != None:
logfile.write(m)
if pedantic:
sys.exit (1)
# Output a log message when its level is lower than the specifief verbosity
# on the command line. Always write the message to the log file if it was
# specified
# - level: the log-level for this message
# - msg: the log message
def log (level: int, msg: str):
if level == 0:
m = '# '
else:
m = '-' * level + ' '
m = m + str(msg) + '\n'
if (level <= loglevel):
sys.stdout.write(m)
if logfile != None:
logfile.write(m)
# ---
# FOLDER FUNCTIONS
# ---
# Remove the trailing slash from folder names
def remove_trailing_slash (s):
if len(s) == 0:
return s
if s[-1] == '/':
return s[:-1]
return s
# ---
# PROCESSING MAIL
# ---
subject_re = re.compile(r'COMMENT: (.*)')
# Parse the subject string to obtain the pathname to store the comment in;
# Finds the first empty comment file slot and returns this as a full path
# - subjectstr: subjetc, which looks like this: 'NOTIFICATION: <stem>'
# - returns: full path to store comment in
def get_fname(subjectstr: str) -> Path:
log(1,'Parsing subject ' + subjectstr)
match_obj = subject_re.match(subjectstr)
if match_obj:
stem = match_obj.group(1)
else:
error('Could not parse subject')
# prepend the src_root to get the full path for the stem
fullstem = src_root / stem
# check if parent exists
parent = fullstem.parent
if not parent.exists():
error('Folder to store comment in does not exist ' + str(parent))
# try all possible fullpaths for the stem by appending .1.mdc .2.mdc .3
# until a non existing filename is found
count = 1
fullpath = Path(str(fullstem) + str(count) + '.mdc')
while fullpath.exists():
count = count + 1
fullpath = Path(str(fullstem) + str(count) + '.mdc')
return fullpath
# regular expression matching a basic email address: any nonspace string
# containing exactly one @
basic_addr_re = re.compile(r'([^ ]*@[^ ]*)')
# regular expression matching a complex email address: any nonspace
complex_addr_re = re.compile(r'([^<]*)<[^ ]*@[^ ]*>')
# Parse From: header accordign to RFC 5322:
# https://tools.ietf.org/html/rfc5322
# from = "From:" mailbox-list CRLF
# sender = "Sender:" mailbox CRLF
# mailbox = name-addr / addr-spec
# name-addr = [display-name] angle-addr
# angle-addr = [CFWS] "<" addr-spec ">" [CFWS] /
# obs-angle-addr
# addr-spec = local-part "@" domain
#
# - fromstr: the string containing the value of the From header
# (i.e without From:)
def get_author(fromstr: str) -> str:
# We assume a single mailbox, and use the following rule of thumb: either
# - the mailbox is a string without spaces containing @, in which case we
# return this full email address
# - the mailbox is a string starting with a full name (possibly enclosed
# in quotes) which ends with an email address enclosed in angular brackets
log(1,'Parsing author ' + fromstr)
matchobj = basic_addr_re.match(fromstr)
if matchobj:
log(2,'Found a basic address')
return matchobj.group(1)
matchobj = complex_addr_re.match(fromstr)
if matchobj:
log(2,'Found a complex address')
return matchobj.group(1)
error('Could not parse author ' + fromstr)
# Parse an SMTP date string like "Sun, 19 Apr 2020 08:42:00 +0000"
# - date: string containing date
def get_date(datestr: str) -> str:
log(1,'Parsing date ' + datestr)
time = datetime.datetime.strptime(datestr,'%a, %d %b %Y %H:%M:%S %z')
return str(time.date())
from html.parser import HTMLParser
# https://gist.github.com/lingnet/e3da99c1a580552bfbf7f0550453d989
class HTMLtoText(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self._buf = []
self.hide_output = False
def handle_starttag(self, tag, attrs):
if tag in ('p', 'br') and not self.hide_output:
self._buf.append('\n')
elif tag in ('script', 'style'):
self.hide_output = True
def handle_startendtag(self, tag, attrs):
if tag == 'br':
self._buf.append('\n')
def handle_endtag(self, tag):
if tag == 'p':
self._buf.append('\n')
elif tag in ('script', 'style'):
self.hide_output = False
def handle_data(self, text):
if text and not self.hide_output:
self._buf.append(re.sub(r'\s+', ' ', text))
def handle_entityref(self, name):
if name in name2codepoint and not self.hide_output:
c = chr(name2codepoint[name])
self._buf.append(c)
def handle_charref(self, name):
if not self.hide_output:
n = int(name[1:], 16) if name.startswith('x') else int(name)
self._buf.append(chr(n))
def get_text(self):
return re.sub(r' +', ' ', ''.join(self._buf))
# Retrun the first text part in the message; convert to plaintext if html
def get_content(msg: email.message) -> str:
log(1,'Getting content')
# first try to get plain text
for part in msg.walk():
if part.get_content_type() == 'text/plain':
return part.get_content()
# if that fails get html and convert to plaint text
for part in msg.walk():
if part.get_content_type() == 'text/html':
html = part.get_content()
parser = HTMLtoText()
parser.feed(html)
parser.close()
return parser.get_text()
error('Message does not contain text part')
# Add a comment as received by email into the appropriate position in the source
# tree
# - mail: filename containing mail containing the comment
def add (mail: str):
log(1,'Processing ' + mail)
with open(mail, 'rb') as fp:
# parse the message
msg = email.message_from_binary_file(fp, policy=SMTP)
# get the filename to store the comment im
if 'Subject' in msg:
fname = get_fname(msg['Subject'])
else:
error('Subject missing')
# get the author
if 'From' in msg:
author = get_author(msg['From'])
else:
error('From missing')
# get the date
if 'Date' in msg:
date = get_date(msg['Date'])
else:
error('Date missing')
# get the content
content = get_content(msg)
log(2,content)
log(0,'Writing comment from ' + author + ' dated ' + date + ' to ' + str(fname))
with open(fname,'w') as outf:
outf.write('---\n')
outf.write("author: '" + author + "'\n")
outf.write("date: '" + date + "'\n")
outf.write('---\n')
outf.write(content)
# Set up arguments parsing
parser = argparse.ArgumentParser(description='SSST-omment. Add a new comment to a blogpost.')
parser.add_argument('mail', help='Mail containing comment to add.')
parser.add_argument('-s', '--source', default=src_root,
help='Path to the root of the source files tree')
parser.add_argument('-v', '--verbosity', default=loglevel,
help='Verbosity (-1, the default, is silent)')
parser.add_argument('-l', '--logfile', default='ssst-comment.log',
help='Name of file to store all log messages in')
parser.add_argument('-p', '--pedantic', default = False, action='store_true',
help='Abort after warning')
args = parser.parse_args()
# Apply arguments
src_root = Path(args.source)
if not src_root.exists():
error('Source tree %s does not exist.' % src_root)
mail = args.mail
if not os.path.exists(mail):
error('Post %s does not exist.' % mail)
pedantic = args.pedantic
loglevel = int(args.verbosity)
logname = args.logfile
# open logfile
if logname != '':
logfile = open(logname,'w')
# do it
add(mail)
# close the logfile if necessary
if logfile != None:
logfile.close()