diff --git a/.gitignore b/.gitignore index 72364f9..01df5df 100644 --- a/.gitignore +++ b/.gitignore @@ -87,3 +87,4 @@ ENV/ # Rope project settings .ropeproject +/.idea/ diff --git a/PointerLSTM.py b/PointerLSTM.py index 4e16c78..bf52115 100644 --- a/PointerLSTM.py +++ b/PointerLSTM.py @@ -1,8 +1,8 @@ -import keras.backend as K -from keras.activations import tanh, softmax -from keras.engine import InputSpec -from keras.layers import LSTM -import keras +import tensorflow.keras.backend as K +from tensorflow.keras.activations import tanh, softmax +from tensorflow.keras.layers import InputSpec +from tensorflow.keras.layers import LSTM +from tensorflow import keras class Attention(keras.layers.Layer): @@ -12,12 +12,12 @@ class Attention(keras.layers.Layer): def __init__(self, hidden_dimensions, name='attention'): super(Attention, self).__init__(name=name, trainable=True) + self.hidden_dimensions = hidden_dimensions self.W1 = keras.layers.Dense(hidden_dimensions, use_bias=False) self.W2 = keras.layers.Dense(hidden_dimensions, use_bias=False) self.V = keras.layers.Dense(1, use_bias=False) def call(self, encoder_outputs, dec_output, mask=None): - w1_e = self.W1(encoder_outputs) w2_d = self.W2(dec_output) tanh_output = tanh(w1_e + w2_d) @@ -28,6 +28,11 @@ def call(self, encoder_outputs, dec_output, mask=None): att_shape = K.shape(attention_weights) return K.reshape(attention_weights, (att_shape[0], att_shape[1])) + def get_config(self): + return { + "hidden_dimensions": self.hidden_dimensions, + } + class Decoder(keras.layers.Layer): """ @@ -36,6 +41,7 @@ class Decoder(keras.layers.Layer): def __init__(self, hidden_dimensions): super(Decoder, self).__init__() + self.hidden_dimensions = hidden_dimensions self.lstm = keras.layers.LSTM( hidden_dimensions, return_sequences=False, return_state=True) @@ -50,6 +56,11 @@ def get_initial_state(self, inputs): def process_inputs(self, x_input, initial_states, constants): return self.lstm._process_inputs(x_input, initial_states, constants) + def get_config(self): + return { + "hidden_dimensions": self.hidden_dimensions, + } + class PointerLSTM(keras.layers.Layer): """ @@ -57,8 +68,7 @@ class PointerLSTM(keras.layers.Layer): """ def __init__(self, hidden_dimensions, name='pointer', **kwargs): - super(PointerLSTM, self).__init__( - hidden_dimensions, name=name, **kwargs) + super(PointerLSTM, self).__init__(name=name, **kwargs) self.hidden_dimensions = hidden_dimensions self.attention = Attention(hidden_dimensions) self.decoder = Decoder(hidden_dimensions) @@ -97,7 +107,7 @@ def call(self, x, training=None, mask=None, states=None): return outputs def step(self, x_input, states): - x_input = K.expand_dims(x_input,1) + x_input = K.expand_dims(x_input, 1) input_shape = self.input_spec[0].shape en_seq = states[-1] _, [h, c] = self.decoder(x_input, states[:-1]) @@ -111,3 +121,8 @@ def get_output_shape_for(self, input_shape): def compute_output_shape(self, input_shape): return (input_shape[0], input_shape[1], input_shape[1]) + + def get_config(self): + return { + "hidden_dimensions": self.hidden_dimensions, + } diff --git a/README.md b/README.md index aea042a..1d6f456 100644 --- a/README.md +++ b/README.md @@ -1,2 +1 @@ -## Code upgrade of [Pointer Networks](http://arxiv.org/pdf/1511.06391v4.pdf) to run on keras>=2.4.3 and tensorflow>=2.2.0 -The original author code is at https://github.com/keon/pointer-networks.git. +# [Pointer Networks](http://arxiv.org/pdf/1511.06391v4.pdf) in keras diff --git a/requirement.txt b/requirement.txt index 1969db4..7b694f0 100644 --- a/requirement.txt +++ b/requirement.txt @@ -1,2 +1 @@ -keras==2.4.3 -tensorflow==2.2.0 \ No newline at end of file +tensorflow==2.8.0 \ No newline at end of file diff --git a/run.py b/run.py index 5f2883f..9e56fdc 100644 --- a/run.py +++ b/run.py @@ -1,25 +1,26 @@ -from keras.models import Model -from keras.layers import LSTM, Input -from keras.callbacks import LearningRateScheduler -from keras.utils.np_utils import to_categorical -from pointer import PointerLSTM +from tensorflow.keras.models import Model +from tensorflow.keras.layers import LSTM, Input +from tensorflow.keras.callbacks import LearningRateScheduler +from tensorflow.keras.utils import to_categorical +from PointerLSTM import PointerLSTM import pickle import tsp_data as tsp import numpy as np -import keras +from tensorflow import keras def scheduler(epoch): - if epoch < nb_epochs/4: + if epoch < nb_epochs / 4: return learning_rate - elif epoch < nb_epochs/2: - return learning_rate*0.5 - return learning_rate*0.1 + elif epoch < nb_epochs / 2: + return learning_rate * 0.5 + return learning_rate * 0.1 + print("preparing dataset...") t = tsp.Tsp() -X, Y = t.next_batch(10000) -x_test, y_test = t.next_batch(1000) +X, Y = t.next_batch(1000) +x_test, y_test = t.next_batch(10) YY = [] for y in Y: @@ -29,13 +30,13 @@ def scheduler(epoch): hidden_size = 128 seq_len = 10 nb_epochs = 10000 -learning_rate = 0.1 +learning_rate = 0.01 print("building model...") main_input = Input(shape=(seq_len, 2), name='main_input') -encoder,state_h, state_c = LSTM(hidden_size,return_sequences = True, name="encoder",return_state=True)(main_input) -decoder = PointerLSTM(hidden_size, name="decoder")(encoder,states=[state_h, state_c]) +encoder, state_h, state_c = LSTM(hidden_size, return_sequences=True, name="encoder", return_state=True)(main_input) +decoder = PointerLSTM(hidden_size, name="decoder")(encoder, states=[state_h, state_c]) model = Model(main_input, decoder) print(model.summary()) @@ -43,9 +44,9 @@ def scheduler(epoch): loss='categorical_crossentropy', metrics=['accuracy']) -model.fit(X, YY, epochs=nb_epochs, batch_size=64,) +model.fit(X, YY, epochs=nb_epochs, batch_size=64, ) print(model.predict(x_test)) -print('evaluate : ',model.evaluate(x_test,to_categorical(y_test))) +print('evaluate : ', model.evaluate(x_test, to_categorical(y_test))) print("------") print(to_categorical(y_test)) model.save_weights('model_weight_100.hdf5')