forked from Stonesjtu/Pytorch-NCE
-
Notifications
You must be signed in to change notification settings - Fork 0
/
index_linear.py
59 lines (47 loc) · 2.11 KB
/
index_linear.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
"""An index linear class for generic NCE module"""
import torch
import torch.nn as nn
class IndexLinear(nn.Linear):
"""A linear layer that only decodes the results of provided indices
Args:
target_idx: indices of target words
noise_idx: indices of noise words
input: input matrix
Shape:
- target_idx :math:`(B, N)` where `max(M) <= N` B is batch size
- noise_idx :math:`(B, N, N_r)` where `max(M) <= N`
- Input :math:`(B, N, in\_features)`
Return:
- target_score :math:`(N, 1)`
- noise_score :math:`(N, N_r)` the un-normalized score
"""
nce = True
def __init__(self, input_size, output_size):
super(IndexLinear, self).__init__(input_size, output_size)
self.reset_parameters()
def forward(self, target_idx, noise_idx, input):
"""
Shape:
- target_batch :math:`(N, E, 1+N_r)`where `N = length, E = embedding size, N_r = noise ratio`
"""
# flatten the following matrix
input = input.view(-1, input.size(-1))
if not self.nce:
score = super(IndexLinear, self).forward(input) # (N, V)
return score
original_size = target_idx.size() # the size will be used to pack the output of indexlinear
target_idx = target_idx.view(-1)
noise_idx = noise_idx.view(-1, noise_idx.size(-1))
indices = torch.cat([target_idx.unsqueeze(-1), noise_idx], dim=-1)
# the pytorch's [] operator can't BP correctly with redundant indices
# before version 0.2.0
input = input.unsqueeze(1)
target_batch = self.weight.index_select(0, indices.view(-1)).view(*indices.size(), -1).transpose(1,2)
bias = self.bias.index_select(0, indices.view(-1)).view_as(indices).unsqueeze(1)
out = torch.baddbmm(1, bias, 1, input, target_batch).view(*original_size, -1)
target_score, noise_score = out[:, :, 0], out[:, :, 1:]
return target_score, noise_score
def reset_parameters(self):
init_range = 0.1
self.bias.data.fill_(0)
self.weight.data.uniform_(-init_range, init_range)