-
Notifications
You must be signed in to change notification settings - Fork 0
/
kerasCNN.py
executable file
·98 lines (85 loc) · 3.14 KB
/
kerasCNN.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/usr/bin/python3 -W all
'''
source: https://github.com/keras-team/keras/blob/master/examples/imdb_cnn.py
This example demonstrates the use of Convolution1D for text classification.
Gets to 0.89 test accuracy after 2 epochs.
90s/epoch on Intel i5 2.4Ghz CPU.
10s/epoch on Tesla K40 GPU.
'''
from __future__ import print_function
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.layers import Embedding
from keras.layers import Conv1D, GlobalMaxPooling1D
from keras.datasets import imdb
import kerasRun
import numpy
import sys
def singleRun(x_train,y_train,x_test,y_test):
# set parameters:
max_features = 5000
maxlen = 49526 # was 400
batch_size = 32
embedding_dims = 50
filters = 250
kernel_size = 3
hidden_dims = 250
epochs = 2
print('Build model...')
model = Sequential()
# we start off with an efficient embedding layer which maps
# our vocab indices into embedding_dims dimensions
model.add(Embedding(max_features,
embedding_dims,
input_length=maxlen))
model.add(Dropout(0.2))
# we add a Convolution1D, which will learn filters
# word group filters of size filter_length:
model.add(Conv1D(filters,
kernel_size,
padding='valid',
activation='relu',
strides=1))
# we use max pooling:
model.add(GlobalMaxPooling1D())
# We add a vanilla hidden layer:
model.add(Dense(hidden_dims))
model.add(Dropout(0.2))
model.add(Activation('relu'))
# We project onto a single unit output layer, and squash it with a sigmoid:
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.compile(loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy'])
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
validation_data=(x_test, y_test))
return()
def main(argv):
#print('Pad sequences (samples x time)')
#x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
#x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
#######################################
trainFile, testFile = kerasRun.processOpts(argv)
trainData = kerasRun.readData(trainFile)
trainText = trainData["text"]
trainClasses = trainData["classes"]
testData = kerasRun.readData(testFile)
combinedList = list(trainText)
combinedList.extend(testData["text"])
numericData = kerasRun.makeNumeric(combinedList)
testText = numericData[len(trainText):]
trainText = numericData[:len(trainText)]
combinedList = list(trainClasses)
combinedList.extend(testData["classes"])
numericData = kerasRun.makeNumeric(combinedList)
testClasses = numericData[len(trainClasses):]
trainClasses = numericData[:len(trainClasses)]
tmp = numpy.array(trainText)
print("{0}".format(tmp.shape))
singleRun(trainText,trainClasses,testText,testClasses)
if __name__ == "__main__":
sys.exit(main(sys.argv))