-
Notifications
You must be signed in to change notification settings - Fork 0
/
ProjectANN.py
479 lines (426 loc) · 24.1 KB
/
ProjectANN.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
'''
updated on 03/04/2018
v1.0
@author: Stephen Rawlings
Final Year Project
'''
import winreg # to use windows registry to ID guids given by netifaces
import netifaces # used to identify netwrok interafces on a system and returning the ocrresponding guid
import pickle # used to save the model for further testing and use
import csv #python standard for csv work
import pyshark # tshark wrapper used to capture and parse packets
import time #allows for time be used
import datetime #allows for dates to be used
import pandas # data handler for input into Aritificial neural network
from timeit import default_timer as timer
from sklearn.preprocessing import LabelEncoder
from DrawAnnv2 import DrawNN
def main():
print(__doc__)
int = netifaces.interfaces()
mlp_live_iteration = 0
allowed_IP = ['192.168.1.1', '192.168.1.2', '192.168.1.3', '192.168.1.4']
#cap = pyshark.FileCapture('test.pcap') # For training
def get_ip_layer_name(pkt): #allows the program to differentiate between ipv4 and ipv6, needed for correct parsing of packets
for layer in pkt.layers:
if layer._layer_name == 'ip':
return 4
elif layer._layer_name == 'ipv6':
return 6
def packet_info(cap): # Goes through each packet in capture or live_capture, displays various information about each packet
start_time = time.time()
try:
i = 1
for pkt in cap:
i += 1
try:
if pkt.highest_layer != 'ARP':
ip = None
ip_layer = get_ip_layer_name(pkt)
if ip_layer == 4:
ip = pkt.ip
elif ip_layer == 6:
ip = pkt.ipv6
print ('Packet %d' % i)
print (pkt.highest_layer)
print (pkt.transport_layer)
print('Time', time.strftime("%Y-%m-%d %H:%M:%S"))
print ('Layer: ipv%d' % get_ip_layer_name(pkt))
print ('Source IP:', ip.src)
print ('Destination IP:', ip.dst)
print ('Length: ', pkt.length)
try:
print ('Source Port', pkt[pkt.transport_layer].srcport)
print ('Destination Port', pkt[pkt.transport_layer].dstport)
except AttributeError:
print ('Source Port: ', 0)
print ('Destination Port: ', 0)
print (i/(time.time() - start_time))
print ('')
else:
arp = pkt.arp
print(pkt.highest_layer)
print(pkt.transport_layer)
print('Layer: ipv4' )
print('Time', time.strftime("%Y-%m-%d %H:%M:%S"))
print('Source IP: ', arp.src_proto_ipv4)
print('Destination IP: ', arp.dst_proto_ipv4)
print ('Length: ', pkt.length)
print ('Source Port: ', 0)
print ('Destination Port: ', 0)
print (i/(time.time() - start_time))
print()
except (AttributeError, UnboundLocalError, TypeError) as e:
pass
return
except KeyboardInterrupt:
pass
def csvgather(cap): # creates/rewrites 'test.csv' file - writes header row - goes through packets, writing a row to the csv for each packet
start_time = time.time()
with open('Data.csv', 'w', newline='') as csvfile:
filewriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
filewriter.writerow(
['Highest Layer', 'Transport Layer', 'Source IP', 'Dest IP', 'Source Port', 'Dest Port',
'Packet Length', 'Packets/Time', 'target'])
i = 0
start = timer()
for pkt in cap:
end = timer()
try:
if pkt.highest_layer != 'ARP':
ip = None
ip_layer = get_ip_layer_name(pkt)
if ip_layer == 4:
ip = pkt.ip
ipv = 0 # target test
if pkt.transport_layer == None:
transport_layer = 'None'
else:
transport_layer = pkt.transport_layer
elif ip_layer == 6:
ip = pkt.ipv6
ipv = 1 # target test
try:
if ip.src not in allowed_IP:
ipcat = 1
target = 1
else:
ipcat = 0
target = 0
filewriter.writerow([pkt.highest_layer, transport_layer, ipcat, ip.dst,
pkt[pkt.transport_layer].srcport,
pkt[pkt.transport_layer].dstport,
pkt.length, i / (time.time() - start_time), target])
#print("Time: ", time.time() - start_time)
#print("Packets Collected:", i)
i += 1
except AttributeError:
if ip.src not in allowed_IP:
ipcat = 1
target = 1
else:
ipcat = 0
target = 0
filewriter.writerow(
[pkt.highest_layer, transport_layer, ipcat, ip.dst, 0, 0,
pkt.length, i / (time.time() - start_time), target])
print("Time: ", time.time() - start_time)
print("Packets Collected:", i)
i += 1
else:
if pkt.arp.src_proto_ipv4 not in allowed_IP:
ipcat = 1
target = 1
else:
ipcat = 0
target = 0
arp = pkt.arp
filewriter.writerow(
[pkt.highest_layer, transport_layer, ipcat, arp.dst_proto_ipv4, 0, 0,
pkt.length, i / (time.time() - start_time), target])
print("Time: ", time.time() - start_time)
print("Packets Collected:", i)
i += 1
except (UnboundLocalError, AttributeError) as e:
pass
def int_names(int_guids): # Looks up the GUID of the network interfaces found in the registry, then converts them into an identifiable format
int_names = int_names = ['(unknown)' for i in range(len(int_guids))]
reg = winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE)
reg_key = winreg.OpenKey(reg, r'SYSTEM\CurrentControlSet\Control\Network\{4d36e972-e325-11ce-bfc1-08002be10318}')
for i in range(len(int_guids)):
try:
reg_subkey = winreg.OpenKey(reg_key, int_guids[i] + r'\Connection')
int_names[i] = winreg.QueryValueEx(reg_subkey, 'Name')[0]
except FileNotFoundError:
pass
return int_names
def LabelEncoding(data): # encodes the categorical data within the csv used for training, turns the categorical values into integer values
data = pandas.read_csv('TestingData.csv', delimiter=',')
columnsToEncode = list(data.select_dtypes(include=['category', 'object']))
#print(data.dtypes) #Prints each columns d_type
#print(columnsToEncode) #Prints categorical features
le = LabelEncoder()
for feature in columnsToEncode:
try:
data[feature] = le.fit_transform(data[feature])
#print(data[feature])
except:
print ('error' + feature)
return data
def csv_data_check(): # Displays the data within the chosen csv, allows the user to view ALL data, ONLY NUMERICAL or ONLY CATEGORICAL
l_data = input("Name of csv file: ")
data = pandas.read_csv(l_data, delimiter=',')
read_choice = input("""How would you like to view the data?
All (a)
Numerical Only (n)
Categorical Only (c)
""")
if read_choice == "a":
print(data)
elif read_choice == "n":
print(data._get_numeric_data())
elif read_choice == "c":
print(data.select_dtypes(include='object'))
def Load_model(): # loads a saved model to use for both training
filename = input("Model to load?")
loaded_model = pickle.load(open(filename, 'rb'))
print(loaded_model.coefs_)
print(loaded_model.loss_)
return loaded_model
def int_choice(): #allows the user to choose interface
for i, value in enumerate(int_names(int)):
print(i, value)
print('\n')
iface = input("Please select interface: ")
cap = pyshark.LiveCapture(interface= iface)
cap.sniff_continuously(packet_count=None)
return cap
def MLP(): #Primarily used for training either a new model or updating a previous model
l_data = input("Name of CSV file? ") # User inputs name of Dataset CSV file
load = input("Load model?") # Asks user if thye want to train a saved algorithm, if no, new model is created and trained
if load == 'y':
mlp = Load_model()
else:
from sklearn.neural_network import MLPClassifier #imports the neural network class from Sci-kit learn
mlp = MLPClassifier(hidden_layer_sizes=(100,100),activation='logistic', max_iter=1000, verbose=True, tol=0.00000001, early_stopping = True, shuffle = True) # Designates the setting of the model before training
#hidden_layer_sizes = array of the hidden layer of the network, (5) = one layer of 5 nodes, (5,5) = 2 layers, both with 5 nodes
#activation = activation function, 'logistic' is equivalent ot the sigmoid activation function
#max_iter = max3imum amoung of iterations that the model will do
#Verbose = whether the model prints the iteration and loss function per iteration
#tol = the decimal place the use wants the loss function to reach
data = pandas.read_csv(l_data, delimiter=',')# reads CSV
data = LabelEncoding(data) #Encodes the categorical data into int input data the model can use
#print("Encoded Data: ", "\n", data) # entire encoded block for testing and checking values
#print(data.keys())
X = data[['Highest Layer', 'Transport Layer', 'Source IP', 'Dest IP', 'Source Port', 'Dest Port','Packet Length', 'Packets/Time']] # Data used to train
#print ("Features: ", "\n", X)
y = data['target'] # targets for the MLP
# print ("Targets: ", "\n", y)
from sklearn.model_selection import train_test_split #Needed to split the data into the training and testing
from sklearn.preprocessing import StandardScaler #required to so that all the inputs are in a comparable range
X_train, X_test, y_train, y_test = train_test_split(X, y)
#scaler = StandardScaler()
#scaler.fit(X_train)
#X_train = scaler.transform(X_train)
#X_test = scaler.transform(X_test)
#print(X_train) # Training data (Features)
#print(X_test) # Testing data (features
start_time = timer()
mlp.fit(X_train, y_train) # fit is used to actually train the model
#print(mlp.predict(X_test))
end_time = timer()
time_taken = end_time - start_time
predictions = mlp.predict(X_test)
print()
#print("First 50 Predictions: ", "\n" ,mlp.predict(X_test)[0:50]) #Prints first 50 predictions
print()
#print("First 50 Probabilities: ", "\n",mlp.predict_proba(X_test)[0:50])#Prints first 50 probabilities
print()
print("Number of Iterations: ", mlp.n_iter_)
print()
hostile = 0
safe = 0
for check in predictions:
if check == 1:
hostile += 1
else:
safe += 1
print("Safe Packets: ", safe)
print("Hostile Packets: ", hostile)
print("Time Taken:", time_taken)
from sklearn.metrics import classification_report,confusion_matrix
print("Confusion Matrix: ", "\n", confusion_matrix(y_test,predictions))
print()
print ("Classification Report: ", "\n", classification_report(y_test,predictions))
print()
ci = input("do you want to see weights and intercepts? " )
if ci == 'y':
print("Model Coefficients (Weights): ", "\n", mlp.coefs_)
print()
print("Model Intercepts (Nodes): ", "\n", mlp.intercepts_)
else:
pass
save = input("Save model? ")
if save == 'y':
filename = input("Filename for saving?: ")
pickle.dump(mlp, open(filename, 'wb'))
def MLP_Live_predict(cap, modelname, mlp_live_iteration): # similar to MLP(), used for real-time classification and not for training
data = pandas.read_csv('LiveAnn.csv', delimiter=',') # reads CSV
data = LiveLabelEncoding(data)
print("Processing Data", "\n")
print(data)
X = data[['Highest Layer', 'Transport Layer', 'Source IP', 'Dest IP', 'Source Port', 'Dest Port','Packet Length', 'Packets/Time' ]] # Data used to train
from sklearn.preprocessing import StandardScaler
#scaler = StandardScaler()
#scaler.fit(X)
#X = scaler.transform(X)
loaded_model = pickle.load(open(modelname, 'rb')) # loads model
#print("Model Coeffcients ", loaded_model.coefs_) # load model coefs
lmlp = loaded_model
predictions = lmlp.predict(X) # preditcions made by model
hostile = 0 # this block counts how many 'hostile' packets have been predicted by the model
safe = 0
for check in predictions:
if check == 1: # change to 0 to force ddos attack
hostile += 1
else:
safe += 1
print("Safe Packets: ", safe)
print("Possible Hostile Packets: ", hostile)
print(100 * hostile/(safe + hostile))
print ("\n")
mlp_live_iteration += 1
if hostile >= ((safe + hostile)/2):
testwrite = open('log.txt', 'a+')
testwrite.write('Attack Detected at: ')
testwrite.write(datetime.datetime.now().strftime("%Y-%m-%d %H:%M"))
testwrite.write('\n')
testwrite.write('Packets collected: ')
testwrite.write(str(safe + hostile))
testwrite.write('\n')
return ("Attack")
else:
testwrite = open('log.txt', 'a+')
testwrite.write('Normal Activity Detected at: ')
testwrite.write(datetime.datetime.now().strftime("%Y-%m-%d %H:%M"))
testwrite.write('\n')
testwrite.write('Packets collected: ')
testwrite.write(str(safe + hostile))
testwrite.write('\n \n')
return mlp_live_iteration
#print("Predictions")
#print (predictions)
#from sklearn.metrics import classification_report,confusion_matrix
#print(confusion_matrix(y,predictions))
#print(classification_report(y,predictions))
def csv_interval_gather(cap): # creates/rewrites 'Live.csv' file with 30 second intervals- writes header row - goes through packets, writing a row to the csv for each packet
start_time = time.time()
with open ('LiveAnn.csv', 'w', newline='') as csvfile:
filewriter = csv.writer(csvfile, delimiter=',' , quotechar='|', quoting=csv.QUOTE_MINIMAL)
filewriter.writerow(['Highest Layer', 'Transport Layer', 'Source IP', 'Dest IP', 'Source Port', 'Dest Port','Packet Length', 'Packets/Time'])
i = 0
start = timer()
for pkt in cap:
end = timer()
if (end - start < 30):
try:
if pkt.highest_layer != 'ARP':
print("Packets Collected:", i)
if pkt.highest_layer != 'ARP':
ip = None
ip_layer = get_ip_layer_name(pkt)
if ip_layer == 4:
ip = pkt.ip
#ipv = 0 # target test
if pkt.transport_layer == None:
transport_layer = 'None'
else:
transport_layer = pkt.transport_layer
elif ip_layer == 6:
ip = pkt.ipv6
#ipv = 1 # target test
try:
if ip.src not in allowed_IP:
ipcat = 1
else:
ipcat = 0
filewriter.writerow([pkt.highest_layer, transport_layer, ipcat, ip.dst, pkt[pkt.transport_layer].srcport, pkt[pkt.transport_layer].dstport,pkt.length, i/(time.time() - start_time)])
print ("Time: ", time.time() - start_time)
i += 1
except AttributeError:
if ip.src not in allowed_IP:
ipcat = 1
else:
ipcat = 0
filewriter.writerow([pkt.highest_layer, transport_layer, ipcat, ip.dst, 0, 0, pkt.length, i/(time.time() - start_time)])
print ("Time: ", time.time() - start_time)
i += 1
else:
if pkt.arp.src_proto_ipv4 not in allowed_IP:
ipcat = 1
else:
ipcat = 0
arp = pkt.arp
filewriter.writerow([pkt.highest_layer , transport_layer, ipcat, arp.dst_proto_ipv4, 0, 0, pkt.length, i/(time.time() - start_time)])
print ("Time: ", time.time() - start_time)
i += 1
except (UnboundLocalError, AttributeError) as e:
pass
else:
return
def LiveLabelEncoding(data): # same as LabelEncoding(), but use for realtime
data = pandas.read_csv('LiveAnn.csv', delimiter=',')
columnsToEncode = list(data.select_dtypes(include=['category', 'object']))
print(columnsToEncode)
le = LabelEncoder()
for feature in columnsToEncode:
try:
data[feature] = le.fit_transform(data[feature])
#print(data[feature])
except:
print ('error ' + feature)
return data
def menu(): #Basic Menu
ans = True
live = True
while ans:
print ("""
1. Visual Packet Sniffer
2. ANN Data gatherer
3. Neural Network Trainer
4. Data Check
5. Live Neural Network
6. Visual Model (Work In Progress)
7. Exit
""")
ans = input("What would you like to do? ")
if ans=="1":
cap = int_choice()
packet_info(cap)
elif ans=="2":
cap = int_choice()
print("Now Gathering data....")
csvgather(cap)
elif ans=="3":
MLP()
elif ans =="4":
csv_data_check()
elif ans == "5":
cap = int_choice()
modelname = input("Please input model: ")
try:
while live:
csv_interval_gather(cap)
if MLP_Live_predict(cap, modelname, mlp_live_iteration) == "Attack": #if an attack had been detectedm then print date and time of the attack
live = False
print("DDoS ATTACK DETECTED! @ ", datetime.datetime.now().strftime("%Y-%m-%d %H:%M"))
MLP_Live_predict(cap, modelname, mlp_live_iteration) == 0
except KeyboardInterrupt:
pass
elif ans == "6":
network = DrawNN([8,100,100,1])
network.draw()
elif ans == "7":
break
menu()
main()