-
Notifications
You must be signed in to change notification settings - Fork 3
/
emde.py
41 lines (32 loc) · 1.43 KB
/
emde.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import numpy as np
from sklearn.random_projection import GaussianRandomProjection
class Emde:
def __init__(self, sketch_depth: int, sketch_width: int):
bits = 16
self.sketch_depth = sketch_depth
self.sketch_width = sketch_width
self.sp = GaussianRandomProjection(n_components=bits * sketch_depth)
def init_biases(self, v):
self.biases = np.array([np.percentile(v[:, i], q=50.0, axis=0) for i in range(v.shape[1])])
def discretize(self, v):
v = ((np.sign(v - self.biases) + 1) / 2).astype(np.uint8)
v = np.packbits(v, axis=-1)
v = np.frombuffer(np.ascontiguousarray(v), dtype=np.uint16).reshape(v.shape[0], -1) % self.sketch_width
return v
def fit(self, v):
self.sp = self.sp.fit(v)
vv = self.sp.transform(v)
self.init_biases(vv)
def transform(self, v):
v = self.sp.transform(v)
v = self.discretize(v)
return v
def transform_to_absolute_codes(self, codes: np.array):
pos_index = np.array([i * self.sketch_width for i in range(self.sketch_depth)], dtype=np.int_)
index = codes + pos_index
return index
def calculate_absolute_emde_codes(sketch_depth: int, sketch_width: int, embeddings: np.array):
emde = Emde(sketch_depth=sketch_depth, sketch_width=sketch_width)
emde.fit(v=embeddings)
codes = emde.transform(v=embeddings)
return emde.transform_to_absolute_codes(codes=codes)