-
Notifications
You must be signed in to change notification settings - Fork 24
/
org2opml.py
157 lines (131 loc) · 4.88 KB
/
org2opml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#! /usr/bin/env python
#
# Converts Emacs Org files to OPML so that Mindmaps can be generated
# using both Freemind and Mindnode
#
# @author: Sreejith K <[email protected]>
# Created on 1 Aug 2013
import re
import os
import sys
import codecs
import xml.etree.ElementTree as ET
from xml.dom import minidom
class Node(object):
"""Represents a Node. Also stores the references to
all its children which are also Node instances.
"""
def __init__(self, level, text):
self.level = level
self.text = text
self.children = []
def add_child(self, node):
"""Add a chld Node.
"""
self.children.append(node)
class OrgParser(object):
# Regular expressions for parsing the metadata
NODE_RE = re.compile('(?P<level>[*]+)\s+(?P<text>.*)')
TITLE_RE = re.compile('TITLE\s*:\s+(?P<title>.*)')
AUTHOR_RE = re.compile('AUTHOR\s*:\s+(?P<author>.*)')
ROOT_RE = re.compile('ROOT\s*:\s+(?P<root>.*)')
def __init__(self, org_file):
self.org_file = org_file
self.title = ''
self.author = ''
self.root_name = ''
self.nodes = []
self.prev_node = None
with codecs.open(org_file, 'r', encoding='UTF-8') as f:
self.content = f.readlines()
def parse(self):
"""Parse the content line by line
"""
for line in self.content:
line = line.strip()
if line.startswith('#+'):
self.handle_meta(line[2:])
elif line.startswith('*'):
self.add_node(line)
def handle_meta(self, line):
"""Parse the metadata
"""
if line.startswith('TITLE'):
match = self.TITLE_RE.search(line)
if match:
self.title = match.group('title')
elif line.startswith('AUTHOR'):
match = self.AUTHOR_RE.search(line)
if match:
self.author = match.group('author')
elif line.startswith('ROOT'):
match = self.ROOT_RE.search(line)
if match:
self.root_name = match.group('root')
def add_node(self, line):
"""Create a node. Set the level and text. Assigns the parent Node
"""
match = self.NODE_RE.match(line)
if match:
level = match.group('level').count('*')
text = match.group('text')
newnode = Node(level=level, text=text)
if level == 1:
try:
self.nodes[level - 1].append(newnode)
except IndexError:
self.nodes.append([newnode])
else:
parent = self.nodes[level - 2][-1]
parent.add_child(newnode)
try:
self.nodes[level - 1].append(newnode)
except IndexError:
self.nodes.append([newnode])
def to_opml(self):
"""Export the parsed Node information to OPML format
"""
skip_root = False
# If there is only one root node. Make it as the root node in OPML
if len(self.nodes) == 1:
self.root_name = self.nodes[0].text
skip_root = True
root = ET.Element('opml', attrib={'version': '1.0'})
head = ET.SubElement(root, 'head')
title = ET.SubElement(head, 'title')
title.text = self.title
author = ET.SubElement(head, 'ownername')
author.text = self.author
body = ET.SubElement(root, 'body')
outline = ET.SubElement(body, 'outline', attrib={
'text': self.root_name})
# Recursively iterate the Node and construct the XML ElementTree
def iterate_children(node, ol):
for child in node.children:
element = ET.SubElement(
ol, 'outline', attrib={'text': child.text})
iterate_children(child, element)
# Iterate through the root nodes represented by single *
for root_node in self.nodes[0]:
if not skip_root:
ol = ET.SubElement(outline, 'outline', attrib={
'text': root_node.text})
iterate_children(root_node, ol)
else:
iterate_children(root_node, outline)
opml_file = os.path.splitext(self.org_file)[0] + '.opml'
# This code writes ugly XML
# tree = ET.ElementTree(root)
# tree.write(opml_file, encoding='UTF-8', xml_declaration=True)
# Pretty print the XML into the file
xmlstr = minidom.parseString(ET.tostring(root)).toprettyxml(encoding='UTF-8')
with open(opml_file, 'w') as f:
f.write(xmlstr)
return opml_file
if __name__ == '__main__':
if len(sys.argv) < 2:
print 'Usage: org2opml.py <input-org-file>'
sys.exit(-2)
p = OrgParser(sys.argv[1])
p.parse()
print 'Exporting to OPML: %s' % p.to_opml()