-
Notifications
You must be signed in to change notification settings - Fork 1
/
regal_utils.py
107 lines (89 loc) · 4.75 KB
/
regal_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import numpy as np
import scipy.io as sio
import sklearn.metrics.pairwise
from scipy.sparse import csr_matrix, coo_matrix
from sklearn.neighbors import KDTree
import scipy.sparse as sp
from scipy.spatial.distance import cosine
def get_embedding_similarities(embed, embed2 = None, sim_measure = "euclidean", num_top = None):
n_nodes, dim = embed.shape
if embed2 is None:
embed2 = embed
if num_top is not None: #KD tree with only top similarities computed
kd_sim = kd_align(embed, embed2, distance_metric = sim_measure, num_top = num_top)
return kd_sim
#All pairwise distance computation
if sim_measure == "cosine":
similarity_matrix = sklearn.metrics.pairwise.cosine_similarity(embed, embed2)
else:
similarity_matrix = sklearn.metrics.pairwise.euclidean_distances(embed, embed2)
similarity_matrix = np.exp(-similarity_matrix)
return similarity_matrix
#Split embeddings in half
#Right now asssume graphs are same size (as done in paper's experiments)
#NOTE: to handle graphs of different sizes, pass in an arbitrary split index
#Similarly, to embed >2 graphs, change to pass in a list of splits and return list of embeddings
def get_embeddings(combined_embed, graph_split_idx = None):
if graph_split_idx is None:
graph_split_idx = int(combined_embed.shape[0] / 2)
dim = combined_embed.shape[1]
embed1 = combined_embed[:graph_split_idx]
embed2 = combined_embed[graph_split_idx:]
return embed1, embed2
#alignments are dictionary of the form node_in_graph 1 : node_in_graph2
#rows of alignment matrix are nodes in graph 1, columns are nodes in graph2
def score(alignment_matrix, true_alignments = None):
if true_alignments is None: #assume it's just identity permutation
return np.sum(np.diagonal(alignment_matrix))
else:
nodes_g1 = [int(node_g1) for node_g1 in true_alignments.keys()]
nodes_g2 = [int(true_alignments[node_g1]) for node_g1 in true_alignments.keys()]
return np.sum(alignment_matrix[nodes_g1, nodes_g2])
def score_embeddings_matrices(embed1, embed2, topk = None, similarity_threshold = None, normalize = False, true_alignments = None, sim="cosine"):
similarity_matrix = get_embedding_similarities(embed1, embed2, sim_measure = sim)
alignment_matrix = get_estimated_alignment_matrix(similarity_matrix, similarity_threshold, normalize)
score = score_alignment_matrix(alignment_matrix, topk = topk, true_alignments = true_alignments)
return score
def kd_align(emb1, emb2, normalize=False, distance_metric = "euclidean", num_top = 50):
kd_tree = KDTree(emb2, metric = distance_metric)
row = np.array([])
col = np.array([])
data = np.array([])
dist, ind = kd_tree.query(emb1, k = num_top)
print("queried alignments")
row = np.array([])
for i in range(emb1.shape[0]):
row = np.concatenate((row, np.ones(num_top)*i))
col = ind.flatten()
data = np.exp(-dist).flatten()
sparse_align_matrix = coo_matrix((data, (row, col)), shape=(emb1.shape[0], emb2.shape[0]))
return sparse_align_matrix.tocsr()
def score_alignment_matrix(alignment_matrix, topk = None, topk_score_weighted = False, true_alignments = None):
n_nodes = alignment_matrix.shape[0]
correct_nodes = []
if topk is None:
row_sums = alignment_matrix.sum(axis=1)
row_sums[row_sums == 0] = 1e-6 #shouldn't affect much since dividing 0 by anything is 0
alignment_matrix = alignment_matrix / row_sums[:, np.newaxis] #normalize
alignment_score = score(alignment_matrix, true_alignments = true_alignments)
else:
alignment_score = 0
if not sp.issparse(alignment_matrix):
sorted_indices = np.argsort(alignment_matrix)
for node_index in range(n_nodes):
target_alignment = node_index #default: assume identity mapping, and the node should be aligned to itself
if true_alignments is not None: #if we have true alignments (which we require), use those for each node
target_alignment = int(true_alignments[node_index])
if sp.issparse(alignment_matrix):
row, possible_alignments, possible_values = sp.find(alignment_matrix[node_index])
node_sorted_indices = possible_alignments[possible_values.argsort()]
else:
node_sorted_indices = sorted_indices[node_index]
if target_alignment in node_sorted_indices[-topk:]:
if topk_score_weighted:
alignment_score += 1.0 / (n_nodes - np.argwhere(sorted_indices[node_index] == target_alignment)[0])
else:
alignment_score += 1
correct_nodes.append(node_index)
alignment_score /= float(n_nodes)
return alignment_score, set(correct_nodes)