forked from Dieterbe/dieterblog
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pyblosxom-to-hugo.py
executable file
·132 lines (113 loc) · 3.93 KB
/
pyblosxom-to-hugo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#!/usr/bin/env python3
# note this script is not super clean or failsafe.
# but it works for me
py="."
hugo="hugotest"
import glob
import re
import pathlib
import html
import sys
def fixheader(lines, guid, draft=False):
date = ""
time = ""
tags = []
for i, line in enumerate(lines):
line = line.strip()
if i == 0:
# replace entity refs like & to &
# if title contains ", those should be escaped for hugo
title = html.unescape(line.replace('"', '\\\"'))
else:
if "pubdate " in line:
date = re.sub(".*pubdate *", "", line)
elif "pubtime " in line:
time = re.sub(".*pubtime *", "", line)
elif "tags " in line:
tags = re.sub(".*tags *", "", line).split(",")
elif "guid " in line:
guid = re.sub(".*guid *", "", line)
elif line.startswith("#"):
raise Exception("unknown header directive: %s", line)
elif line == "":
# ignore empty line
pass
else:
break
if not date or not time:
raise Exception("incomplete header")
newheader = [
"+++\n",
"title = \"%s\"\n" % title,
"date = \"%sT%s-04:00\"\n" % (date, time),
"tags = [%s]\n" % ', '.join(['"%s"' % i for i in tags]),
"guid = \"%s\"\n" % guid,
]
if draft:
newheader.append("draft = true\n")
newheader.append("+++\n")
#print("header", lines[0:i])
#print("not header", lines[i+1:i+10])
lines = newheader + lines[i:]
return lines
def fixreadmore(lines):
return map(lambda line: line.replace("<!--break-->", "<!--more-->"), lines)
# note, not robust. e.g. will fail if you have </code> in your code block
def fixhighlight(lines):
out = []
for i, line in enumerate(lines):
# due to how processing works in hugo, CDATA sections would show up literally. we of course don't want that
line = line.replace('<![CDATA[', '')
line = line.replace(']]>', '')
# quote lang arg because lang can be like html+php
line = re.sub('<code lang="([a-zA-Z\+]+)">', '{{< highlight "\\1" "style=default" >}}', line)
line = line.replace('<code>', '{{< highlight "c" "style=default" >}}') # sorry i really don't think we can do better than this, pygments invoked via hugo needs a lang specified
out.append(line.replace('</code>', '{{< /highlight >}}'))
return out
def process_entry(entry):
base = entry[8:]
draft = False
if base[-4:] == ".txt":
new = hugo + "/content/post/" +base[:-4] + ".html"
elif ".txt" in base: # .txt.draft, .txt.unpub, etc
draft = True
new = hugo + "/content/post/" + re.sub("\.txt.*", ".html", base)
else: # .draft, .unpub
draft = True
p = pathlib.PurePath(entry)
new = hugo + "/content/post/" + p.with_suffix('.html').name
print(base, "---->", new)
f = open(entry, "r")
lines = f.readlines()
lines = fixhighlight(fixreadmore(fixheader(lines, base[:-4], draft)))
f.close()
f = open(new, "w")
f.write(''.join(lines))
f.close()
def process_page(page):
base = page[6:]
# for my case we can assume .txt extension
new = hugo + "/content/" +base[:-4] + ".html"
print(base, "---->", new)
f = open(page, "r")
lines = f.readlines()
lines = fixheader(lines, base[:-4])
f.close()
f = open(new, "w")
f.write("\n".join(lines))
f.close()
if len(sys.argv) > 1:
for i in sys.argv[1:]:
if i.startswith('entries/'):
process_entry(i)
elif i.startswith('pages/'):
process_page(i)
else:
print("Don't know how to process", i)
else:
entries = glob.glob('entries/*')
for entry in entries:
process_entry(entry)
pages = glob.glob('pages/*')
for page in pages:
process_page(page)