Renamed and Updated Notebook on AutoEncoder

leriomaggio · Apr 12, 2017 · e71185e · e71185e
1 parent 0fc0146
commit e71185e
Show file tree

Hide file tree

Showing 3 changed files with 20 additions and 16 deletions.
diff --git a/...rning - AutoEncoders and Embeddings.ipynb → 3.1 AutoEncoders and Embeddings.ipynb b/...rning - AutoEncoders and Embeddings.ipynb → 3.1 AutoEncoders and Embeddings.ipynb
@@ -416,7 +416,7 @@
    },
    "outputs": [],
    "source": [
-    "DATA_DIRECTORY = os.path.join(os.path.abspath(os.path.curdir), 'data')"
+    "DATA_DIRECTORY = os.path.join(os.path.abspath(os.path.curdir), 'data', 'word_embeddings')"
    ]
   },
   {
@@ -522,7 +522,8 @@
    "cell_type": "code",
    "execution_count": 10,
    "metadata": {
-    "collapsed": true
+    "collapsed": false,
+    "scrolled": true
    },
    "outputs": [
     {
@@ -1702,7 +1703,9 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Train convolutional network for sentiment analysis. Based on\n",
+    "Train convolutional network for sentiment analysis. \n",
+    "\n",
+    "Based on\n",
     "\"Convolutional Neural Networks for Sentence Classification\" by Yoon Kim\n",
     "http://arxiv.org/pdf/1408.5882v2.pdf\n",
     "\n",
@@ -1760,13 +1763,14 @@
    ],
    "source": [
     "import numpy as np\n",
-    "import data_helpers\n",
-    "from w2v import train_word2vec\n",
+    "import word_embedding\n",
+    "from word2vec import train_word2vec\n",
     "\n",
     "from keras.models import Sequential, Model\n",
     "from keras.layers import (Activation, Dense, Dropout, Embedding, \n",
-    "                          Flatten, Input, Merge, \n",
-    "                          Convolution1D, MaxPooling1D)\n",
+    "                          Flatten, Input, \n",
+    "                          Conv1D, MaxPooling1D)\n",
+    "from keras.layers.merge import Concatenate\n",
     "\n",
     "np.random.seed(2)"
    ]
@@ -1870,7 +1874,7 @@
    "source": [
     "# Load data\n",
     "print(\"Loading data...\")\n",
-    "x, y, vocabulary, vocabulary_inv = data_helpers.load_data()\n",
+    "x, y, vocabulary, vocabulary_inv = word_embedding.load_data()\n",
     "\n",
     "if model_variation=='CNN-non-static' or model_variation=='CNN-static':\n",
     "    embedding_weights = train_word2vec(x, vocabulary_inv, \n",
@@ -1935,17 +1939,17 @@
     "graph_in = Input(shape=(sequence_length, embedding_dim))\n",
     "convs = []\n",
     "for fsz in filter_sizes:\n",
-    "    conv = Convolution1D(nb_filter=num_filters,\n",
-    "                         filter_length=fsz,\n",
-    "                         border_mode='valid',\n",
-    "                         activation='relu',\n",
-    "                         subsample_length=1)(graph_in)\n",
+    "    conv = Conv1D(filters=num_filters,\n",
+    "                  filter_length=fsz,\n",
+    "                  padding='valid',\n",
+    "                  activation='relu',\n",
+    "                  strides=1)(graph_in)\n",
     "    pool = MaxPooling1D(pool_length=2)(conv)\n",
     "    flatten = Flatten()(pool)\n",
     "    convs.append(flatten)\n",
     "    \n",
     "if len(filter_sizes)>1:\n",
-    "    out = Merge(mode='concat')(convs)\n",
+    "    out = Concatenate()(convs)\n",
     "else:\n",
     "    out = convs[0]\n",
     "\n",

diff --git a/w2v.py → word2vec.py b/w2v.py → word2vec.py
diff --git a/data_helpers.py → word_embedding.py b/data_helpers.py → word_embedding.py
@@ -33,9 +33,9 @@ def load_data_and_labels():
     Returns split sentences and labels.
     """
     # Load data from files
-    positive_examples = list(open("./data/rt-polarity.pos", encoding='ISO-8859-1').readlines())
+    positive_examples = list(open("./data/word_embeddings/rt-polarity.pos", encoding='ISO-8859-1').readlines())
     positive_examples = [s.strip() for s in positive_examples]
-    negative_examples = list(open("./data/rt-polarity.neg", encoding='ISO-8859-1').readlines())
+    negative_examples = list(open("./data/word_embeddings/rt-polarity.neg", encoding='ISO-8859-1').readlines())
     negative_examples = [s.strip() for s in negative_examples]
     # Split by words
     x_text = positive_examples + negative_examples