-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
106 lines (89 loc) · 3.4 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.metrics import confusion_matrix
from sklearn import metrics
import matplotlib.pyplot as plt
file = pd.read_csv("drug200.csv")
categories = {'HIGH': 0, 'NORMAL': 1, 'LOW': 2}
categories_dg = {'drugA': 0, 'drugB': 1, 'drugC': 2, 'drugY': 3, 'drugX': 4}
categories_sex = {'F': 0, 'M': 1}
list_BP = []
list_Ch = []
list_Dg = []
list_Sx = []
def categories_map(data, word, list_):
for value in data[word]:
list_.append(value)
for i in range(len(list_)):
if list_[i] == 'HIGH':
list_[i] = categories['HIGH']
if list_[i] == 'NORMAL':
list_[i] = categories['NORMAL']
if list_[i] == 'LOW':
list_[i] = categories['LOW']
file[word] = list_
def drugs_map(data, word, list_):
for value in data[word]:
list_.append(value)
for i in range(len(list_)):
if list_[i] == 'drugA':
list_[i] = categories_dg['drugA']
if list_[i] == 'drugB':
list_[i] = categories_dg['drugB']
if list_[i] == 'drugC':
list_[i] = categories_dg['drugC']
if list_[i] == 'drugY':
list_[i] = categories_dg['drugY']
if list_[i] == 'drugX':
list_[i] = categories_dg['drugX']
file[word] = list_
def sex_map(data, word, list_):
for value in data[word]:
list_.append(value)
for i in range(len(list_)):
if list_[i] == 'F':
list_[i] = categories_sex['F']
if list_[i] == 'M':
list_[i] = categories_sex['M']
file[word] = list_
categories_map(file, 'BP', list_BP)
categories_map(file, 'Cholesterol', list_Ch)
drugs_map(file, 'Drug', list_Dg)
sex_map(file, 'Sex', list_Sx)
X = file.drop('Drug', axis=1)
X = X.to_numpy()
X = preprocessing.StandardScaler().fit_transform(X)
y = list_Dg
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LogisticRegression(solver='lbfgs', max_iter=1000)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
fig, ax = plt.subplots(figsize=(8, 8))
im = ax.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
ax.figure.colorbar(im, ax=ax)
ax.set(xticks=np.arange(cm.shape[1]),
yticks=np.arange(cm.shape[0]),
xticklabels=categories_dg.keys(), yticklabels=categories_dg.keys(),
title='Confusion Matrix',
ylabel='True label',
xlabel='Predicted label')
plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")
fmt = 'd'
thresh = cm.max() / 2.
for i in range(cm.shape[0]):
for j in range(cm.shape[1]):
ax.text(j, i, format(cm[i, j], fmt),
ha="center", va="center",
color="white" if cm[i, j] > thresh else "black")
fig.tight_layout()
plt.savefig('graph.png')
accuracy = metrics.accuracy_score(y_test, y_pred)
precision = metrics.precision_score(y_test, y_pred, average='macro')
recall = metrics.recall_score(y_test, y_pred, average='macro')
print(f"Accuracy:{accuracy*100:.2f}")
print(f"Precision:{ precision*100:.2f}")
print(f"Recall:{ recall*100:.2f}")