Skip to content

Commit

Permalink
Renamed and Updated Notebook on AutoEncoder
Browse files Browse the repository at this point in the history
  • Loading branch information
leriomaggio committed Apr 12, 2017
1 parent 0fc0146 commit e71185e
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 16 deletions.
32 changes: 18 additions & 14 deletions ...rning - AutoEncoders and Embeddings.ipynb → 3.1 AutoEncoders and Embeddings.ipynb
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,7 @@
},
"outputs": [],
"source": [
"DATA_DIRECTORY = os.path.join(os.path.abspath(os.path.curdir), 'data')"
"DATA_DIRECTORY = os.path.join(os.path.abspath(os.path.curdir), 'data', 'word_embeddings')"
]
},
{
Expand Down Expand Up @@ -522,7 +522,8 @@
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": true
"collapsed": false,
"scrolled": true
},
"outputs": [
{
Expand Down Expand Up @@ -1702,7 +1703,9 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Train convolutional network for sentiment analysis. Based on\n",
"Train convolutional network for sentiment analysis. \n",
"\n",
"Based on\n",
"\"Convolutional Neural Networks for Sentence Classification\" by Yoon Kim\n",
"http://arxiv.org/pdf/1408.5882v2.pdf\n",
"\n",
Expand Down Expand Up @@ -1760,13 +1763,14 @@
],
"source": [
"import numpy as np\n",
"import data_helpers\n",
"from w2v import train_word2vec\n",
"import word_embedding\n",
"from word2vec import train_word2vec\n",
"\n",
"from keras.models import Sequential, Model\n",
"from keras.layers import (Activation, Dense, Dropout, Embedding, \n",
" Flatten, Input, Merge, \n",
" Convolution1D, MaxPooling1D)\n",
" Flatten, Input, \n",
" Conv1D, MaxPooling1D)\n",
"from keras.layers.merge import Concatenate\n",
"\n",
"np.random.seed(2)"
]
Expand Down Expand Up @@ -1870,7 +1874,7 @@
"source": [
"# Load data\n",
"print(\"Loading data...\")\n",
"x, y, vocabulary, vocabulary_inv = data_helpers.load_data()\n",
"x, y, vocabulary, vocabulary_inv = word_embedding.load_data()\n",
"\n",
"if model_variation=='CNN-non-static' or model_variation=='CNN-static':\n",
" embedding_weights = train_word2vec(x, vocabulary_inv, \n",
Expand Down Expand Up @@ -1935,17 +1939,17 @@
"graph_in = Input(shape=(sequence_length, embedding_dim))\n",
"convs = []\n",
"for fsz in filter_sizes:\n",
" conv = Convolution1D(nb_filter=num_filters,\n",
" filter_length=fsz,\n",
" border_mode='valid',\n",
" activation='relu',\n",
" subsample_length=1)(graph_in)\n",
" conv = Conv1D(filters=num_filters,\n",
" filter_length=fsz,\n",
" padding='valid',\n",
" activation='relu',\n",
" strides=1)(graph_in)\n",
" pool = MaxPooling1D(pool_length=2)(conv)\n",
" flatten = Flatten()(pool)\n",
" convs.append(flatten)\n",
" \n",
"if len(filter_sizes)>1:\n",
" out = Merge(mode='concat')(convs)\n",
" out = Concatenate()(convs)\n",
"else:\n",
" out = convs[0]\n",
"\n",
Expand Down
File renamed without changes.
4 changes: 2 additions & 2 deletions data_helpers.py → word_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ def load_data_and_labels():
Returns split sentences and labels.
"""
# Load data from files
positive_examples = list(open("./data/rt-polarity.pos", encoding='ISO-8859-1').readlines())
positive_examples = list(open("./data/word_embeddings/rt-polarity.pos", encoding='ISO-8859-1').readlines())
positive_examples = [s.strip() for s in positive_examples]
negative_examples = list(open("./data/rt-polarity.neg", encoding='ISO-8859-1').readlines())
negative_examples = list(open("./data/word_embeddings/rt-polarity.neg", encoding='ISO-8859-1').readlines())
negative_examples = [s.strip() for s in negative_examples]
# Split by words
x_text = positive_examples + negative_examples
Expand Down

0 comments on commit e71185e

Please sign in to comment.