Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reverse the readme to the original one #6

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,4 @@ ENV/

# Rope project settings
.ropeproject
/.idea/
33 changes: 24 additions & 9 deletions PointerLSTM.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import keras.backend as K
from keras.activations import tanh, softmax
from keras.engine import InputSpec
from keras.layers import LSTM
import keras
import tensorflow.keras.backend as K
from tensorflow.keras.activations import tanh, softmax
from tensorflow.keras.layers import InputSpec
from tensorflow.keras.layers import LSTM
from tensorflow import keras


class Attention(keras.layers.Layer):
Expand All @@ -12,12 +12,12 @@ class Attention(keras.layers.Layer):

def __init__(self, hidden_dimensions, name='attention'):
super(Attention, self).__init__(name=name, trainable=True)
self.hidden_dimensions = hidden_dimensions
self.W1 = keras.layers.Dense(hidden_dimensions, use_bias=False)
self.W2 = keras.layers.Dense(hidden_dimensions, use_bias=False)
self.V = keras.layers.Dense(1, use_bias=False)

def call(self, encoder_outputs, dec_output, mask=None):

w1_e = self.W1(encoder_outputs)
w2_d = self.W2(dec_output)
tanh_output = tanh(w1_e + w2_d)
Expand All @@ -28,6 +28,11 @@ def call(self, encoder_outputs, dec_output, mask=None):
att_shape = K.shape(attention_weights)
return K.reshape(attention_weights, (att_shape[0], att_shape[1]))

def get_config(self):
return {
"hidden_dimensions": self.hidden_dimensions,
}


class Decoder(keras.layers.Layer):
"""
Expand All @@ -36,6 +41,7 @@ class Decoder(keras.layers.Layer):

def __init__(self, hidden_dimensions):
super(Decoder, self).__init__()
self.hidden_dimensions = hidden_dimensions
self.lstm = keras.layers.LSTM(
hidden_dimensions, return_sequences=False, return_state=True)

Expand All @@ -50,15 +56,19 @@ def get_initial_state(self, inputs):
def process_inputs(self, x_input, initial_states, constants):
return self.lstm._process_inputs(x_input, initial_states, constants)

def get_config(self):
return {
"hidden_dimensions": self.hidden_dimensions,
}


class PointerLSTM(keras.layers.Layer):
"""
PointerLSTM
"""

def __init__(self, hidden_dimensions, name='pointer', **kwargs):
super(PointerLSTM, self).__init__(
hidden_dimensions, name=name, **kwargs)
super(PointerLSTM, self).__init__(name=name, **kwargs)
self.hidden_dimensions = hidden_dimensions
self.attention = Attention(hidden_dimensions)
self.decoder = Decoder(hidden_dimensions)
Expand Down Expand Up @@ -97,7 +107,7 @@ def call(self, x, training=None, mask=None, states=None):
return outputs

def step(self, x_input, states):
x_input = K.expand_dims(x_input,1)
x_input = K.expand_dims(x_input, 1)
input_shape = self.input_spec[0].shape
en_seq = states[-1]
_, [h, c] = self.decoder(x_input, states[:-1])
Expand All @@ -111,3 +121,8 @@ def get_output_shape_for(self, input_shape):

def compute_output_shape(self, input_shape):
return (input_shape[0], input_shape[1], input_shape[1])

def get_config(self):
return {
"hidden_dimensions": self.hidden_dimensions,
}
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
## Code upgrade of [Pointer Networks](http://arxiv.org/pdf/1511.06391v4.pdf) to run on keras>=2.4.3 and tensorflow>=2.2.0
The original author code is at https://github.com/keon/pointer-networks.git.
# [Pointer Networks](http://arxiv.org/pdf/1511.06391v4.pdf) in keras
3 changes: 1 addition & 2 deletions requirement.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
keras==2.4.3
tensorflow==2.2.0
tensorflow==2.8.0
35 changes: 18 additions & 17 deletions run.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,26 @@
from keras.models import Model
from keras.layers import LSTM, Input
from keras.callbacks import LearningRateScheduler
from keras.utils.np_utils import to_categorical
from pointer import PointerLSTM
from tensorflow.keras.models import Model
from tensorflow.keras.layers import LSTM, Input
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.utils import to_categorical
from PointerLSTM import PointerLSTM
import pickle
import tsp_data as tsp
import numpy as np
import keras
from tensorflow import keras


def scheduler(epoch):
if epoch < nb_epochs/4:
if epoch < nb_epochs / 4:
return learning_rate
elif epoch < nb_epochs/2:
return learning_rate*0.5
return learning_rate*0.1
elif epoch < nb_epochs / 2:
return learning_rate * 0.5
return learning_rate * 0.1


print("preparing dataset...")
t = tsp.Tsp()
X, Y = t.next_batch(10000)
x_test, y_test = t.next_batch(1000)
X, Y = t.next_batch(1000)
x_test, y_test = t.next_batch(10)

YY = []
for y in Y:
Expand All @@ -29,23 +30,23 @@ def scheduler(epoch):
hidden_size = 128
seq_len = 10
nb_epochs = 10000
learning_rate = 0.1
learning_rate = 0.01

print("building model...")
main_input = Input(shape=(seq_len, 2), name='main_input')

encoder,state_h, state_c = LSTM(hidden_size,return_sequences = True, name="encoder",return_state=True)(main_input)
decoder = PointerLSTM(hidden_size, name="decoder")(encoder,states=[state_h, state_c])
encoder, state_h, state_c = LSTM(hidden_size, return_sequences=True, name="encoder", return_state=True)(main_input)
decoder = PointerLSTM(hidden_size, name="decoder")(encoder, states=[state_h, state_c])

model = Model(main_input, decoder)
print(model.summary())
model.compile(optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy'])

model.fit(X, YY, epochs=nb_epochs, batch_size=64,)
model.fit(X, YY, epochs=nb_epochs, batch_size=64, )
print(model.predict(x_test))
print('evaluate : ',model.evaluate(x_test,to_categorical(y_test)))
print('evaluate : ', model.evaluate(x_test, to_categorical(y_test)))
print("------")
print(to_categorical(y_test))
model.save_weights('model_weight_100.hdf5')