eriklindernoren · walter-234 · Jun 8, 2022 · Jun 9, 2022 · Jun 10, 2022 · Jun 10, 2022
diff --git a/.gitignore b/.gitignore
@@ -10,3 +10,10 @@ implementations/*/images
 implementations/*/saved_models
 
 __pycache__
+venv
+.idea
+
+/applications/gan_malware/local/
+/applications/data/
+/applications/gan_malware/images/
+/applications/gan_malware/weights/
diff --git a/applications/__init__.py b/applications/__init__.py
diff --git a/applications/data/download_rans_dataset.sh b/applications/data/download_rans_dataset.sh
@@ -0,0 +1,6 @@
+FILE=$dataset-v1
+TAR_FILE=./$FILE.zip
+URL="https://drive.google.com/uc?id=1Yt_jVhj-eBaGsb8fXhECcoKZtA9T70q9&export=download"
+gdown $URL -O TAR_FILE
+unzip TAR_FILE
+rm -r TAR_FILE
diff --git a/applications/gan_malware/__init__.py b/applications/gan_malware/__init__.py
diff --git a/applications/gan_malware/classifier_normal.py b/applications/gan_malware/classifier_normal.py
@@ -0,0 +1,168 @@
+import torch
+import torchvision
+import torchvision.transforms as transforms
+import matplotlib.pyplot as plt
+import numpy as np
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+from torch.utils.data import DataLoader, random_split
+from torchvision.datasets import ImageFolder
+from config.config_core import cuda, FloatTensor, LongTensor
+from torch.autograd import Variable
+
+
+def imshow(img):
+    img = img / 2 + 0.5  # unnormalize
+    npimg = img.numpy()
+    plt.imshow(np.transpose(npimg, (1, 2, 0)))
+    plt.show()
+
+
+transform = transforms.Compose(
+    [transforms.ToTensor(),
+     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+
+batch_size = 32
+num_epoch = 20
+# PATH_TO_GET_DATA = "../data/dataset-v1.1/fake_data"
+PATH_TO_GET_DATA = "../data/dataset-v2.1/fake_data_32"
+data = ImageFolder(PATH_TO_GET_DATA, transform)
+n_test = int(np.floor(0.2 * len(data)))
+n_train = len(data) - n_test
+train_ds, test_ds = random_split(data, [n_train, n_test])
+train_loader = torch.utils.data.DataLoader(train_ds, batch_size=batch_size, shuffle=True)
+test_loader = torch.utils.data.DataLoader(test_ds, batch_size=batch_size, shuffle=False)
+
+classes = data.classes
+
+
+class Classifier(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.conv1 = nn.Conv2d(3, 6, 5)
+        self.pool = nn.MaxPool2d(2, 2)
+        self.conv2 = nn.Conv2d(6, 16, 5)
+        self.fc1 = nn.Linear(16 * 5 * 5, 120)
+        self.fc2 = nn.Linear(120, 84)
+        # self.fc3 = nn.Linear(84, 6)
+        self.fc3 = nn.Linear(84, 16)
+
+    def forward(self, x):
+        x = self.pool(F.relu(self.conv1(x)))
+        x = self.pool(F.relu(self.conv2(x)))
+        x = torch.flatten(x, 1)  # flatten all dimensions except batch
+        x = F.relu(self.fc1(x))
+        x = F.relu(self.fc2(x))
+        x = self.fc3(x)
+        return x
+
+
+net = Classifier()
+criterion = nn.CrossEntropyLoss()
+
+if cuda:
+    print("We are using GPU for training models")
+    net.cuda()
+    criterion.cuda()
+
+optimizer = optim.Adam(net.parameters(), lr=0.001)
+running_loss = []
+running_accuracy = []
+for epoch in range(num_epoch):  # loop over the dataset multiple times
+    for i, batch_data in enumerate(train_loader):
+        # get the inputs; data is a list of [inputs, labels]
+        inputs = Variable(batch_data[0].type(FloatTensor))
+        labels = Variable(batch_data[1].type(LongTensor))
+
+        # zero the parameter gradients
+        optimizer.zero_grad()
+
+        # forward + backward + optimize
+        outputs = net(inputs)
+        loss = criterion(outputs, labels)
+        loss.backward()
+        optimizer.step()
+
+        # print statistics
+        _, predictions = torch.max(outputs, 1)
+        # collect the correct predictions for each class
+        correct = 0
+        for label, prediction in zip(labels, predictions):
+            if label == prediction:
+                correct += 1
+        running_accuracy.append(correct / batch_size)
+        running_loss.append(loss.item())
+
+        print(f'[epoch {epoch + 1}, batch {i}] loss: {loss.item():.5f}')
+        # running_loss = 0.0
+
+print('Finished Training')
+epochs = range(num_epoch)
+plt.plot(epochs, running_loss, 'g', label='Training loss')
+plt.title('Training loss')
+plt.xlabel('Epochs')
+plt.ylabel('Loss')
+plt.legend()
+plt.show()
+
+plt.plot(epochs, running_accuracy, 'g', label='Training accuracy')
+plt.title('Training accuracy')
+plt.xlabel('Epochs')
+plt.ylabel('Accuracy (%)')
+plt.legend()
+plt.show()
+
+PATH = 'weights/classifier_normal.pt'
+torch.save(net.state_dict(), PATH)
+
+dataiter = iter(test_loader)
+images, labels = dataiter.next()
+
+# print images
+imshow(torchvision.utils.make_grid(images))
+print('GroundTruth: ', ' '.join(f'{classes[labels[j]]:5s}' for j in range(16)))
+
+net = Classifier()
+net.load_state_dict(torch.load(PATH))
+
+outputs = net(images)
+
+_, predicted = torch.max(outputs, 1)
+print('Predicted: ', ' '.join(f'{classes[predicted[j]]:5s}' for j in range(16)))
+
+correct = 0
+total = 0
+# since we're not training, we don't need to calculate the gradients for our outputs
+with torch.no_grad():
+    for data in test_loader:
+        images, labels = data
+        # calculate outputs by running images through the network
+        outputs = net(images)
+        # the class with the highest energy is what we choose as prediction
+        _, predicted = torch.max(outputs.data, 1)
+        total += labels.size(0)
+        correct += (predicted == labels).sum().item()
+
+print(f'Accuracy of the network on test images: {100 * correct // total} %')
+
+# prepare to count predictions for each class
+correct_pred = {classname: 0 for classname in classes}
+total_pred = {classname: 0 for classname in classes}
+
+# again no gradients needed
+with torch.no_grad():
+    for data in test_loader:
+        images, labels = data
+        outputs = net(images)
+        _, predictions = torch.max(outputs, 1)
+        # collect the correct predictions for each class
+        for label, prediction in zip(labels, predictions):
+            if label == prediction:
+                correct_pred[classes[label]] += 1
+            total_pred[classes[label]] += 1
+
+# print accuracy for each class
+for classname, correct_count in correct_pred.items():
+    accuracy = 100 * float(correct_count) / total_pred[classname]
+    print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')
diff --git a/applications/gan_malware/classifier_normal_v2.py b/applications/gan_malware/classifier_normal_v2.py
@@ -0,0 +1,187 @@
+import torch
+import torchvision
+import torchvision.transforms as transforms
+import matplotlib.pyplot as plt
+import numpy as np
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+from torch.utils.data import DataLoader, random_split
+from torchvision.datasets import ImageFolder
+from config.config_core import cuda, FloatTensor, LongTensor
+from torch.autograd import Variable
+
+
+def imshow(img):
+    img = img / 2 + 0.5  # un-normalize
+    np_img = img.numpy()
+    plt.imshow(np.transpose(np_img, (1, 2, 0)))
+    plt.show()
+
+
+class Classifier(nn.Module):
+    def __init__(self):
+        super().__init__()
+        # size 32
+        # self.conv1 = nn.Conv2d(3, 6, 5)
+        # self.pool = nn.MaxPool2d(2, 2)
+        # self.conv2 = nn.Conv2d(6, 16, 5)
+        # self.fc1 = nn.Linear(16 * 5 * 5, 120)
+        # self.fc2 = nn.Linear(120, 84)
+        # self.fc3 = nn.Linear(84, 16)
+        # -----------
+        # size 64
+        # self.conv1 = nn.Conv2d(3, 16, 3)
+        # self.pool = nn.MaxPool2d(2, 2)
+        # self.conv2 = nn.Conv2d(16, 32, 3)
+        # self.fc1 = nn.Linear(32 * 14 * 14, 120 * 2)
+        # self.fc2 = nn.Linear(120 * 2, 84 * 2)
+        # self.fc3 = nn.Linear(84 * 2, 16)
+        # -----------
+        # size 128
+        self.conv1 = nn.Conv2d(3, 16, 3)
+        self.pool = nn.MaxPool2d(2, 2)
+        self.conv2 = nn.Conv2d(16, 32, 3)
+        self.fc1 = nn.Linear(32 * 30 * 30, 120 * 2)
+        self.fc2 = nn.Linear(120 * 2, 84 * 2)
+        self.fc3 = nn.Linear(84 * 2, 16)
+
+    def forward(self, x):
+        x = self.pool(F.relu(self.conv1(x)))
+        x = self.pool(F.relu(self.conv2(x)))
+        x = torch.flatten(x, 1)  # flatten all dimensions except batch
+        x = F.relu(self.fc1(x))
+        x = F.relu(self.fc2(x))
+        x = self.fc3(x)
+        return x
+
+
+def main():
+    transform = transforms.Compose(
+        [transforms.ToTensor(),
+         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+
+    batch_size = 32
+    num_epoch = 10
+    PATH_TO_GET_DATA = "../data/dataset-v2/fake_data_128"
+    data = ImageFolder(PATH_TO_GET_DATA, transform)
+    n_test = int(np.floor(0.2 * len(data)))
+    n_train = len(data) - n_test
+    train_ds, test_ds = random_split(data, [n_train, n_test])
+    train_loader = torch.utils.data.DataLoader(train_ds, batch_size=batch_size, shuffle=True)
+    test_loader = torch.utils.data.DataLoader(test_ds, batch_size=batch_size, shuffle=False)
+
+    classes = data.classes
+
+    net = Classifier()
+    criterion = nn.CrossEntropyLoss()
+
+    if cuda:
+        print("We are using GPU for training models")
+        net.cuda()
+        criterion.cuda()
+
+    optimizer = optim.Adam(net.parameters(), lr=0.001)
+    running_loss = []
+    running_accuracy = []
+    for epoch in range(num_epoch):  # loop over the dataset multiple times
+        for i, batch_data in enumerate(train_loader):
+            # get the inputs; data is a list of [inputs, labels]
+            inputs = Variable(batch_data[0].type(FloatTensor))
+            labels = Variable(batch_data[1].type(LongTensor))
+
+            # zero the parameter gradients
+            optimizer.zero_grad()
+
+            # forward + backward + optimize
+            outputs = net(inputs)
+            loss = criterion(outputs, labels)
+            loss.backward()
+            optimizer.step()
+
+            # print statistics
+            _, predictions = torch.max(outputs, 1)
+            # collect the correct predictions for each class
+            correct = 0
+            for label, prediction in zip(labels, predictions):
+                if label == prediction:
+                    correct += 1
+            running_accuracy.append(correct / batch_size)
+            running_loss.append(loss.item())
+
+            print(f'[epoch {epoch + 1}, batch {i}] loss: {loss.item():.5f}')
+            # running_loss = 0.0
+
+    print('Finished Training')
+    # epochs = range(num_epoch)
+    plt.plot(running_loss, 'g', label='Training loss')
+    plt.title('Training loss')
+    plt.xlabel('Epochs')
+    plt.ylabel('Loss')
+    plt.legend()
+    plt.show()
+
+    plt.plot(running_accuracy, 'g', label='Training accuracy')
+    plt.title('Training accuracy')
+    plt.xlabel('Epochs')
+    plt.ylabel('Accuracy (%)')
+    plt.legend()
+    plt.show()
+
+    PATH = 'weights/classifier_normal.pt'
+    torch.save(net.state_dict(), PATH)
+
+    dataiter = iter(test_loader)
+    images, labels = dataiter.next()
+
+    # print images
+    imshow(torchvision.utils.make_grid(images))
+    print('GroundTruth: ', ' '.join(f'{classes[labels[j]]:5s}' for j in range(8)))
+
+    net = Classifier()
+    net.load_state_dict(torch.load(PATH))
+
+    outputs = net(images)
+
+    _, predicted = torch.max(outputs, 1)
+    print('Predicted: ', ' '.join(f'{classes[predicted[j]]:5s}' for j in range(8)))
+
+    correct = 0
+    total = 0
+    # since we're not training, we don't need to calculate the gradients for our outputs
+    with torch.no_grad():
+        for data in test_loader:
+            images, labels = data
+            # calculate outputs by running images through the network
+            outputs = net(images)
+            # the class with the highest energy is what we choose as prediction
+            _, predicted = torch.max(outputs.data, 1)
+            total += labels.size(0)
+            correct += (predicted == labels).sum().item()
+
+    print(f'Accuracy of the network on test images: {100 * correct // total} % ({correct}/{total})')
+
+    # prepare to count predictions for each class
+    correct_pred = {classname: 0 for classname in classes}
+    total_pred = {classname: 0 for classname in classes}
+
+    # again no gradients needed
+    with torch.no_grad():
+        for data in test_loader:
+            images, labels = data
+            outputs = net(images)
+            _, predictions = torch.max(outputs, 1)
+            # collect the correct predictions for each class
+            for label, prediction in zip(labels, predictions):
+                if label == prediction:
+                    correct_pred[classes[label]] += 1
+                total_pred[classes[label]] += 1
+
+    # print accuracy for each class
+    for classname, correct_count in correct_pred.items():
+        accuracy = 100 * float(correct_count) / total_pred[classname]
+        print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} % ({correct_count}/{total_pred[classname]})')
+
+
+if __name__ == '__main__':
+    main()