keon · tchaye59 · Jul 26, 2020 · Jul 26, 2020 · May 13, 2022
diff --git a/.gitignore b/.gitignore
@@ -87,3 +87,4 @@ ENV/
 
 # Rope project settings
 .ropeproject
+/.idea/
diff --git a/PointerLSTM.py b/PointerLSTM.py
@@ -1,8 +1,8 @@
-import keras.backend as K
-from keras.activations import tanh, softmax
-from keras.engine import InputSpec
-from keras.layers import LSTM
-import keras
+import tensorflow.keras.backend as K
+from tensorflow.keras.activations import tanh, softmax
+from tensorflow.keras.layers import InputSpec
+from tensorflow.keras.layers import LSTM
+from tensorflow import keras
 
 
 class Attention(keras.layers.Layer):
@@ -12,12 +12,12 @@ class Attention(keras.layers.Layer):
 
     def __init__(self, hidden_dimensions, name='attention'):
         super(Attention, self).__init__(name=name, trainable=True)
+        self.hidden_dimensions = hidden_dimensions
         self.W1 = keras.layers.Dense(hidden_dimensions, use_bias=False)
         self.W2 = keras.layers.Dense(hidden_dimensions, use_bias=False)
         self.V = keras.layers.Dense(1, use_bias=False)
 
     def call(self, encoder_outputs, dec_output, mask=None):
-
         w1_e = self.W1(encoder_outputs)
         w2_d = self.W2(dec_output)
         tanh_output = tanh(w1_e + w2_d)
@@ -28,6 +28,11 @@ def call(self, encoder_outputs, dec_output, mask=None):
         att_shape = K.shape(attention_weights)
         return K.reshape(attention_weights, (att_shape[0], att_shape[1]))
 
+    def get_config(self):
+        return {
+            "hidden_dimensions": self.hidden_dimensions,
+        }
+
 
 class Decoder(keras.layers.Layer):
     """
@@ -36,6 +41,7 @@ class Decoder(keras.layers.Layer):
 
     def __init__(self, hidden_dimensions):
         super(Decoder, self).__init__()
+        self.hidden_dimensions = hidden_dimensions
         self.lstm = keras.layers.LSTM(
             hidden_dimensions, return_sequences=False, return_state=True)
 
@@ -50,15 +56,19 @@ def get_initial_state(self, inputs):
     def process_inputs(self, x_input, initial_states, constants):
         return self.lstm._process_inputs(x_input, initial_states, constants)
 
+    def get_config(self):
+        return {
+            "hidden_dimensions": self.hidden_dimensions,
+        }
+
 
 class PointerLSTM(keras.layers.Layer):
     """
         PointerLSTM
     """
 
     def __init__(self, hidden_dimensions, name='pointer', **kwargs):
-        super(PointerLSTM, self).__init__(
-            hidden_dimensions, name=name, **kwargs)
+        super(PointerLSTM, self).__init__(name=name, **kwargs)
         self.hidden_dimensions = hidden_dimensions
         self.attention = Attention(hidden_dimensions)
         self.decoder = Decoder(hidden_dimensions)
@@ -97,7 +107,7 @@ def call(self, x, training=None, mask=None, states=None):
         return outputs
 
     def step(self, x_input, states):
-        x_input = K.expand_dims(x_input,1)
+        x_input = K.expand_dims(x_input, 1)
         input_shape = self.input_spec[0].shape
         en_seq = states[-1]
         _, [h, c] = self.decoder(x_input, states[:-1])
@@ -111,3 +121,8 @@ def get_output_shape_for(self, input_shape):
 
     def compute_output_shape(self, input_shape):
         return (input_shape[0], input_shape[1], input_shape[1])
+
+    def get_config(self):
+        return {
+            "hidden_dimensions": self.hidden_dimensions,
+        }
diff --git a/README.md b/README.md
@@ -1,2 +1 @@
-## Code upgrade of [Pointer Networks](http://arxiv.org/pdf/1511.06391v4.pdf) to run on keras>=2.4.3 and tensorflow>=2.2.0 
-The original author code is at https://github.com/keon/pointer-networks.git.
+# [Pointer Networks](http://arxiv.org/pdf/1511.06391v4.pdf) in keras
diff --git a/requirement.txt b/requirement.txt
@@ -1,2 +1 @@
-keras==2.4.3
-tensorflow==2.2.0
+tensorflow==2.8.0
diff --git a/run.py b/run.py
@@ -1,25 +1,26 @@
-from keras.models import Model
-from keras.layers import LSTM, Input
-from keras.callbacks import LearningRateScheduler
-from keras.utils.np_utils import to_categorical
-from pointer import PointerLSTM
+from tensorflow.keras.models import Model
+from tensorflow.keras.layers import LSTM, Input
+from tensorflow.keras.callbacks import LearningRateScheduler
+from tensorflow.keras.utils import to_categorical
+from PointerLSTM import PointerLSTM
 import pickle
 import tsp_data as tsp
 import numpy as np
-import keras
+from tensorflow import keras
 
 
 def scheduler(epoch):
-    if epoch < nb_epochs/4:
+    if epoch < nb_epochs / 4:
         return learning_rate
-    elif epoch < nb_epochs/2:
-        return learning_rate*0.5
-    return learning_rate*0.1
+    elif epoch < nb_epochs / 2:
+        return learning_rate * 0.5
+    return learning_rate * 0.1
+
 
 print("preparing dataset...")
 t = tsp.Tsp()
-X, Y = t.next_batch(10000)
-x_test, y_test = t.next_batch(1000)
+X, Y = t.next_batch(1000)
+x_test, y_test = t.next_batch(10)
 
 YY = []
 for y in Y:
@@ -29,23 +30,23 @@ def scheduler(epoch):
 hidden_size = 128
 seq_len = 10
 nb_epochs = 10000
-learning_rate = 0.1
+learning_rate = 0.01
 
 print("building model...")
 main_input = Input(shape=(seq_len, 2), name='main_input')
 
-encoder,state_h, state_c = LSTM(hidden_size,return_sequences = True, name="encoder",return_state=True)(main_input)
-decoder = PointerLSTM(hidden_size, name="decoder")(encoder,states=[state_h, state_c])
+encoder, state_h, state_c = LSTM(hidden_size, return_sequences=True, name="encoder", return_state=True)(main_input)
+decoder = PointerLSTM(hidden_size, name="decoder")(encoder, states=[state_h, state_c])
 
 model = Model(main_input, decoder)
 print(model.summary())
 model.compile(optimizer='adam',
               loss='categorical_crossentropy',
               metrics=['accuracy'])
 
-model.fit(X, YY, epochs=nb_epochs, batch_size=64,)
+model.fit(X, YY, epochs=nb_epochs, batch_size=64, )
 print(model.predict(x_test))
-print('evaluate : ',model.evaluate(x_test,to_categorical(y_test)))
+print('evaluate : ', model.evaluate(x_test, to_categorical(y_test)))
 print("------")
 print(to_categorical(y_test))
 model.save_weights('model_weight_100.hdf5')