diff --git a/3.1 Unsupervised Learning - AutoEncoders and Embeddings.ipynb b/3.1 AutoEncoders and Embeddings.ipynb old mode 100755 new mode 100644 similarity index 99% rename from 3.1 Unsupervised Learning - AutoEncoders and Embeddings.ipynb rename to 3.1 AutoEncoders and Embeddings.ipynb index 4d44a60..9071d84 --- a/3.1 Unsupervised Learning - AutoEncoders and Embeddings.ipynb +++ b/3.1 AutoEncoders and Embeddings.ipynb @@ -416,7 +416,7 @@ }, "outputs": [], "source": [ - "DATA_DIRECTORY = os.path.join(os.path.abspath(os.path.curdir), 'data')" + "DATA_DIRECTORY = os.path.join(os.path.abspath(os.path.curdir), 'data', 'word_embeddings')" ] }, { @@ -522,7 +522,8 @@ "cell_type": "code", "execution_count": 10, "metadata": { - "collapsed": true + "collapsed": false, + "scrolled": true }, "outputs": [ { @@ -1702,7 +1703,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Train convolutional network for sentiment analysis. Based on\n", + "Train convolutional network for sentiment analysis. \n", + "\n", + "Based on\n", "\"Convolutional Neural Networks for Sentence Classification\" by Yoon Kim\n", "http://arxiv.org/pdf/1408.5882v2.pdf\n", "\n", @@ -1760,13 +1763,14 @@ ], "source": [ "import numpy as np\n", - "import data_helpers\n", - "from w2v import train_word2vec\n", + "import word_embedding\n", + "from word2vec import train_word2vec\n", "\n", "from keras.models import Sequential, Model\n", "from keras.layers import (Activation, Dense, Dropout, Embedding, \n", - " Flatten, Input, Merge, \n", - " Convolution1D, MaxPooling1D)\n", + " Flatten, Input, \n", + " Conv1D, MaxPooling1D)\n", + "from keras.layers.merge import Concatenate\n", "\n", "np.random.seed(2)" ] @@ -1870,7 +1874,7 @@ "source": [ "# Load data\n", "print(\"Loading data...\")\n", - "x, y, vocabulary, vocabulary_inv = data_helpers.load_data()\n", + "x, y, vocabulary, vocabulary_inv = word_embedding.load_data()\n", "\n", "if model_variation=='CNN-non-static' or model_variation=='CNN-static':\n", " embedding_weights = train_word2vec(x, vocabulary_inv, \n", @@ -1935,17 +1939,17 @@ "graph_in = Input(shape=(sequence_length, embedding_dim))\n", "convs = []\n", "for fsz in filter_sizes:\n", - " conv = Convolution1D(nb_filter=num_filters,\n", - " filter_length=fsz,\n", - " border_mode='valid',\n", - " activation='relu',\n", - " subsample_length=1)(graph_in)\n", + " conv = Conv1D(filters=num_filters,\n", + " filter_length=fsz,\n", + " padding='valid',\n", + " activation='relu',\n", + " strides=1)(graph_in)\n", " pool = MaxPooling1D(pool_length=2)(conv)\n", " flatten = Flatten()(pool)\n", " convs.append(flatten)\n", " \n", "if len(filter_sizes)>1:\n", - " out = Merge(mode='concat')(convs)\n", + " out = Concatenate()(convs)\n", "else:\n", " out = convs[0]\n", "\n", diff --git a/w2v.py b/word2vec.py similarity index 100% rename from w2v.py rename to word2vec.py diff --git a/data_helpers.py b/word_embedding.py similarity index 94% rename from data_helpers.py rename to word_embedding.py index ee65a82..9b351d0 100644 --- a/data_helpers.py +++ b/word_embedding.py @@ -33,9 +33,9 @@ def load_data_and_labels(): Returns split sentences and labels. """ # Load data from files - positive_examples = list(open("./data/rt-polarity.pos", encoding='ISO-8859-1').readlines()) + positive_examples = list(open("./data/word_embeddings/rt-polarity.pos", encoding='ISO-8859-1').readlines()) positive_examples = [s.strip() for s in positive_examples] - negative_examples = list(open("./data/rt-polarity.neg", encoding='ISO-8859-1').readlines()) + negative_examples = list(open("./data/word_embeddings/rt-polarity.neg", encoding='ISO-8859-1').readlines()) negative_examples = [s.strip() for s in negative_examples] # Split by words x_text = positive_examples + negative_examples