Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

separate for malware gan 🚧 #182

Open
wants to merge 51 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
b90a0f8
separate for malware gan :construction:
trung-le-234 Jun 8, 2022
53569b4
update wandb to log data
walter-234 Jun 9, 2022
5b007a9
update wandb log image
walter-234 Jun 10, 2022
783488b
run for TPU info-GAN
walter-234 Jun 10, 2022
5fc4c4a
[update][IG] upload new dataset to config to run
walter-234 Jun 11, 2022
d856cce
[update][IG] upload new dataset to config to run
walter-234 Jun 13, 2022
6661550
[update][IG] update infogan to run with random config
walter-234 Jun 13, 2022
f92b86f
[fix] update wandb
trung-le-234 Jun 14, 2022
beba0a6
[update][IG] optimize and clean code
walter-234 Jun 14, 2022
9b75080
[update][IG] optimize code
walter-234 Jun 15, 2022
a39db86
[update][IG] run model and save model
walter-234 Jun 16, 2022
0dc7365
[update][IG] optimize code
walter-234 Jun 16, 2022
e72d6c5
[update][IG] optimize code
walter-234 Jun 18, 2022
83c9352
[update][IG] clean code #1
walter-234 Jun 19, 2022
956ba47
[Classifier] update classifier model and training model
walter-234 Jun 19, 2022
12638f7
[GAN] update new params to train with 64px and 128 px
walter-234 Jun 21, 2022
b6744a6
[GAN] update new config to train with 64px and 128 px
walter-234 Jun 21, 2022
a3b8e0e
[GAN] optimize code
walter-234 Jun 22, 2022
bd05b28
[GAN] update PE feature to training data
walter-234 Jun 23, 2022
8afb55b
[GAN] update params to train
walter-234 Jun 24, 2022
92a9f71
[GAN] update params to train
walter-234 Jun 24, 2022
2df9d1b
[GAN] update version training with new latent_code params
walter-234 Jun 24, 2022
ed46b8e
[GAN] update generate image for wandb
walter-234 Jun 24, 2022
61baa28
[GAN] update param epoch to test
walter-234 Jun 24, 2022
b4a4105
[GAN] fix bug to run on colab
walter-234 Jun 24, 2022
d78f71f
[GAN] update classes to train
walter-234 Jun 25, 2022
55a1aeb
[GAN] update streamlit_app_v3.py
walter-234 Jun 26, 2022
9674f2c
[GAN] update weights to deploy live
walter-234 Jun 26, 2022
4a7aef0
[Streamlit] update code for demo streamlit
walter-234 Jun 27, 2022
117ede5
[GAN] update config params and function for helper
walter-234 Jun 27, 2022
a63deaa
[GAN] update new data for training gan model
walter-234 Jun 27, 2022
fb0a9af
[GAN] update new data for training gan model
walter-234 Jun 29, 2022
662d132
[GAN] update final data and optimize code
walter-234 Jul 3, 2022
619c993
[GAN] optimaize code
walter-234 Jul 5, 2022
8c37174
[GAN] optimaize code
walter-234 Jul 19, 2022
b742093
[AC-GAN] Update more parameters for handling AC-GAN
walter-234 Sep 17, 2022
6404bbe
[AC-GAN] Update training AC-GAN models
walter-234 Sep 17, 2022
80f51cb
[Config] Update new traing epochs
walter-234 Sep 18, 2022
a0696f3
[Config] Update min max value for checksum
walter-234 Sep 18, 2022
69f5d52
[Config] Update more param for training
walter-234 Sep 18, 2022
2ea9da2
[GAN] update sh file to download data and config training
walter-234 Sep 18, 2022
383d01e
[Config] optimize code
walter-234 Sep 22, 2022
bed62f7
[Config] fix image size and update batch size
walter-234 Sep 22, 2022
325c015
[WanDB] Re-run wandb
walter-234 Sep 22, 2022
7ef7e78
[GenerateSample] Config new way to generate sample while training
walter-234 Sep 22, 2022
b87add1
Update git ignore
walter-234 Sep 22, 2022
c2b4183
[ACGAN] update save model
walter-234 Sep 22, 2022
c381e8e
[ACGAN] fix model to push image
walter-234 Sep 22, 2022
b9e667c
[Categorical] Update Type Tensor and help wanDB
walter-234 Sep 22, 2022
16a825a
[AC-GAN] Fix AC-GAN
walter-234 Sep 22, 2022
c53d40c
Optimize code
walter-234 Sep 22, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,10 @@ implementations/*/images
implementations/*/saved_models

__pycache__
venv
.idea

/applications/gan_malware/local/
/applications/data/
/applications/gan_malware/images/
/applications/gan_malware/weights/
Empty file added applications/__init__.py
Empty file.
6 changes: 6 additions & 0 deletions applications/data/download_rans_dataset.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
FILE=$dataset-v1
TAR_FILE=./$FILE.zip
URL="https://drive.google.com/uc?id=1Yt_jVhj-eBaGsb8fXhECcoKZtA9T70q9&export=download"
gdown $URL -O TAR_FILE
unzip TAR_FILE
rm -r TAR_FILE
Empty file.
168 changes: 168 additions & 0 deletions applications/gan_malware/classifier_normal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision.datasets import ImageFolder
from config.config_core import cuda, FloatTensor, LongTensor
from torch.autograd import Variable


def imshow(img):
img = img / 2 + 0.5 # unnormalize
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0)))
plt.show()


transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

batch_size = 32
num_epoch = 20
# PATH_TO_GET_DATA = "../data/dataset-v1.1/fake_data"
PATH_TO_GET_DATA = "../data/dataset-v2.1/fake_data_32"
data = ImageFolder(PATH_TO_GET_DATA, transform)
n_test = int(np.floor(0.2 * len(data)))
n_train = len(data) - n_test
train_ds, test_ds = random_split(data, [n_train, n_test])
train_loader = torch.utils.data.DataLoader(train_ds, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_ds, batch_size=batch_size, shuffle=False)

classes = data.classes


class Classifier(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
# self.fc3 = nn.Linear(84, 6)
self.fc3 = nn.Linear(84, 16)

def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = torch.flatten(x, 1) # flatten all dimensions except batch
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x


net = Classifier()
criterion = nn.CrossEntropyLoss()

if cuda:
print("We are using GPU for training models")
net.cuda()
criterion.cuda()

optimizer = optim.Adam(net.parameters(), lr=0.001)
running_loss = []
running_accuracy = []
for epoch in range(num_epoch): # loop over the dataset multiple times
for i, batch_data in enumerate(train_loader):
# get the inputs; data is a list of [inputs, labels]
inputs = Variable(batch_data[0].type(FloatTensor))
labels = Variable(batch_data[1].type(LongTensor))

# zero the parameter gradients
optimizer.zero_grad()

# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()

# print statistics
_, predictions = torch.max(outputs, 1)
# collect the correct predictions for each class
correct = 0
for label, prediction in zip(labels, predictions):
if label == prediction:
correct += 1
running_accuracy.append(correct / batch_size)
running_loss.append(loss.item())

print(f'[epoch {epoch + 1}, batch {i}] loss: {loss.item():.5f}')
# running_loss = 0.0

print('Finished Training')
epochs = range(num_epoch)
plt.plot(epochs, running_loss, 'g', label='Training loss')
plt.title('Training loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

plt.plot(epochs, running_accuracy, 'g', label='Training accuracy')
plt.title('Training accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy (%)')
plt.legend()
plt.show()

PATH = 'weights/classifier_normal.pt'
torch.save(net.state_dict(), PATH)

dataiter = iter(test_loader)
images, labels = dataiter.next()

# print images
imshow(torchvision.utils.make_grid(images))
print('GroundTruth: ', ' '.join(f'{classes[labels[j]]:5s}' for j in range(16)))

net = Classifier()
net.load_state_dict(torch.load(PATH))

outputs = net(images)

_, predicted = torch.max(outputs, 1)
print('Predicted: ', ' '.join(f'{classes[predicted[j]]:5s}' for j in range(16)))

correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
for data in test_loader:
images, labels = data
# calculate outputs by running images through the network
outputs = net(images)
# the class with the highest energy is what we choose as prediction
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on test images: {100 * correct // total} %')

# prepare to count predictions for each class
correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}

# again no gradients needed
with torch.no_grad():
for data in test_loader:
images, labels = data
outputs = net(images)
_, predictions = torch.max(outputs, 1)
# collect the correct predictions for each class
for label, prediction in zip(labels, predictions):
if label == prediction:
correct_pred[classes[label]] += 1
total_pred[classes[label]] += 1

# print accuracy for each class
for classname, correct_count in correct_pred.items():
accuracy = 100 * float(correct_count) / total_pred[classname]
print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')
187 changes: 187 additions & 0 deletions applications/gan_malware/classifier_normal_v2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision.datasets import ImageFolder
from config.config_core import cuda, FloatTensor, LongTensor
from torch.autograd import Variable


def imshow(img):
img = img / 2 + 0.5 # un-normalize
np_img = img.numpy()
plt.imshow(np.transpose(np_img, (1, 2, 0)))
plt.show()


class Classifier(nn.Module):
def __init__(self):
super().__init__()
# size 32
# self.conv1 = nn.Conv2d(3, 6, 5)
# self.pool = nn.MaxPool2d(2, 2)
# self.conv2 = nn.Conv2d(6, 16, 5)
# self.fc1 = nn.Linear(16 * 5 * 5, 120)
# self.fc2 = nn.Linear(120, 84)
# self.fc3 = nn.Linear(84, 16)
# -----------
# size 64
# self.conv1 = nn.Conv2d(3, 16, 3)
# self.pool = nn.MaxPool2d(2, 2)
# self.conv2 = nn.Conv2d(16, 32, 3)
# self.fc1 = nn.Linear(32 * 14 * 14, 120 * 2)
# self.fc2 = nn.Linear(120 * 2, 84 * 2)
# self.fc3 = nn.Linear(84 * 2, 16)
# -----------
# size 128
self.conv1 = nn.Conv2d(3, 16, 3)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(16, 32, 3)
self.fc1 = nn.Linear(32 * 30 * 30, 120 * 2)
self.fc2 = nn.Linear(120 * 2, 84 * 2)
self.fc3 = nn.Linear(84 * 2, 16)

def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = torch.flatten(x, 1) # flatten all dimensions except batch
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x


def main():
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

batch_size = 32
num_epoch = 10
PATH_TO_GET_DATA = "../data/dataset-v2/fake_data_128"
data = ImageFolder(PATH_TO_GET_DATA, transform)
n_test = int(np.floor(0.2 * len(data)))
n_train = len(data) - n_test
train_ds, test_ds = random_split(data, [n_train, n_test])
train_loader = torch.utils.data.DataLoader(train_ds, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_ds, batch_size=batch_size, shuffle=False)

classes = data.classes

net = Classifier()
criterion = nn.CrossEntropyLoss()

if cuda:
print("We are using GPU for training models")
net.cuda()
criterion.cuda()

optimizer = optim.Adam(net.parameters(), lr=0.001)
running_loss = []
running_accuracy = []
for epoch in range(num_epoch): # loop over the dataset multiple times
for i, batch_data in enumerate(train_loader):
# get the inputs; data is a list of [inputs, labels]
inputs = Variable(batch_data[0].type(FloatTensor))
labels = Variable(batch_data[1].type(LongTensor))

# zero the parameter gradients
optimizer.zero_grad()

# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()

# print statistics
_, predictions = torch.max(outputs, 1)
# collect the correct predictions for each class
correct = 0
for label, prediction in zip(labels, predictions):
if label == prediction:
correct += 1
running_accuracy.append(correct / batch_size)
running_loss.append(loss.item())

print(f'[epoch {epoch + 1}, batch {i}] loss: {loss.item():.5f}')
# running_loss = 0.0

print('Finished Training')
# epochs = range(num_epoch)
plt.plot(running_loss, 'g', label='Training loss')
plt.title('Training loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

plt.plot(running_accuracy, 'g', label='Training accuracy')
plt.title('Training accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy (%)')
plt.legend()
plt.show()

PATH = 'weights/classifier_normal.pt'
torch.save(net.state_dict(), PATH)

dataiter = iter(test_loader)
images, labels = dataiter.next()

# print images
imshow(torchvision.utils.make_grid(images))
print('GroundTruth: ', ' '.join(f'{classes[labels[j]]:5s}' for j in range(8)))

net = Classifier()
net.load_state_dict(torch.load(PATH))

outputs = net(images)

_, predicted = torch.max(outputs, 1)
print('Predicted: ', ' '.join(f'{classes[predicted[j]]:5s}' for j in range(8)))

correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
for data in test_loader:
images, labels = data
# calculate outputs by running images through the network
outputs = net(images)
# the class with the highest energy is what we choose as prediction
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on test images: {100 * correct // total} % ({correct}/{total})')

# prepare to count predictions for each class
correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}

# again no gradients needed
with torch.no_grad():
for data in test_loader:
images, labels = data
outputs = net(images)
_, predictions = torch.max(outputs, 1)
# collect the correct predictions for each class
for label, prediction in zip(labels, predictions):
if label == prediction:
correct_pred[classes[label]] += 1
total_pred[classes[label]] += 1

# print accuracy for each class
for classname, correct_count in correct_pred.items():
accuracy = 100 * float(correct_count) / total_pred[classname]
print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} % ({correct_count}/{total_pred[classname]})')


if __name__ == '__main__':
main()
Loading