-
Notifications
You must be signed in to change notification settings - Fork 1
/
GraphVizualizer.py
executable file
·146 lines (128 loc) · 4.89 KB
/
GraphVizualizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#!/usr/bin/env python
# -*- coding: iso-8859-1 -*-
import sys
from cStringIO import StringIO
from rdflib import Literal, BNode, Namespace, URIRef, RDF
from rdflib.Graph import Graph
try:
import pydot
import networkx as nx
except ImportError:
pass
from SesameStore import SesameStore
TRIPLESTORE = "http://localhost:8080/openrdf-sesame"
REPOSITORY = "lagen.nu"
DCT = Namespace('http://purl.org/dc/terms/')
RINFO = Namespace('http://rinfo.lagrummet.se/taxo/2007/09/rinfo/pub#')
EURLEX = Namespace('http://lagen.nu/eurlex#')
def sfs_label_transform(label):
if "(1960:729)" in label:
return "Upphovsrättslagen"
elif "(1915:218)" in label:
return "Avtalslagen";
else:
return re.sub(' \(\d+:\d+\)','',label)
def sfs_type_transform(rdftype):
if rdftype == RINFO['KonsolideradGrundforfattning']:
return "box"
else:
return "ellipse"
# TODO: implement parametrization (once we learn in what way we wish
# it to work)
def parametrize_query(q,args):
return q
def get_rdf_graph(store,queries):
g = Graph()
if not queries:
# get every single triple in the store
print "getting serialized"
nt = store.get_serialized("nt")
print "parsing graph"
g.parse(StringIO(nt),format="nt")
else:
for q in queries:
nt = store.construct(q,format="nt")
g.parse(StringIO(nt),format="nt")
return g
def rdf_to_dot(rdfgraph, label, link, labeltransform, typetransform):
dotgraph = pydot.Dot()
for (s,p,o) in rdfgraph:
# possibly unify S
node = pydot.Node(s)
if not dotgraph.get_node(node):
dotgraph.add_node(node)
if p == label:
node.label = labeltransform(o)
if p == RDF.type:
node.shape = typetransform(o)
if p == link and type(o) == URIRef:
target = pydot.Node(o)
dotgraph.add_edge(node,target)
return dotgraph
def rdf_to_nx(rdfgraph):
nxgraph = nx.DiGraph()
for (s,p,o) in rdfgraph:
if p == EURLEX["cites"]:
s1 = s.split("/")[-1]
o1 = o.split("/")[-1]
print "Adding %s -> %s" % (s1,o1)
nxgraph.add_edge(s1,o1)
return nxgraph
configs = {'sfs':{'context':'<urn:x-local:sfs>',
'label':DCT['title'],
'labeltransform':sfs_label_transform,
'typetransform':sfs_type_transform,
'link':DCT['references'],
'format':'dot', #maybe GEXF in the future
'renderer':'twopi'},
'ecj': {'queries':["""PREFIX eurlex: <http://lagen.nu/eurlex#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
CONSTRUCT { ?x eurlex:casenum ?z .
?x eurlex:cites ?y }
WHERE { ?x eurlex:cites ?y .
?y rdf:type ?w .
?x eurlex:casenum ?z }"""],
'label':EURLEX['casenum'],
'context':'<urn:x-local:ecj>',
'format':'networkx'}
}
if __name__ == "__main__":
if len(sys.argv) < 2:
print "Usage: %s configuration [arguments]" % sys.argv[0]
print "Available configurations: %s" % ", ".join(sorted(configs.keys()))
sys.exit()
confid = sys.argv[1]
if len(sys.argv) > 1:
args = sys.argv[2:]
conf = configs[confid]
store = SesameStore(TRIPLESTORE,REPOSITORY,conf['context'])
queries = []
if 'queries' in conf:
for q in conf['queries']:
queries.append(parametrize_query(q,args))
print "Getting graph from %d queries" % len(queries)
rdfgraph = get_rdf_graph(store,queries)
print "Graph contains %s triples" % len(rdfgraph)
if conf['format'] == 'dot':
print "converting rdf graph to dot graph"
dotgraph = rdf_to_dot(rdfgraph,
conf['label'],
conf['link'],
conf['labeltransform'],
conf['typetransform'])
print "serializing dot graph"
dotgraph.write("out.dot")
if 'renderer' in conf:
print "rendering dot graph"
dotgraph.write_png("out.png", prog=conf['renderer'])
elif conf['format'] == 'networkx':
print "Converting rdf graph to networkx graph"
nxgraph = rdf_to_nx(rdfgraph)
import matplotlib.pyplot as plt
nx.write_graphml(nxgraph, "out.graphml")
print "out.graphml created"
ranked = nx.pagerank(nxgraph)
import pprint
pprint.pprint(ranked)
else:
print "Unknown graph format %s" % conf['format']