-
Notifications
You must be signed in to change notification settings - Fork 0
/
proj1_helpers.py
52 lines (42 loc) · 1.66 KB
/
proj1_helpers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# -*- coding: utf-8 -*-
"""some helper functions for project 1."""
import csv
import numpy as np
from implementations import sigmoid
def load_csv_data(data_path, sub_sample=False):
"""Loads data and returns y (class labels), tX (features) and ids (event ids)"""
y = np.genfromtxt(data_path, delimiter=",", skip_header=1, dtype=str, usecols=1)
x = np.genfromtxt(data_path, delimiter=",", skip_header=1)
ids = x[:, 0].astype(np.int)
input_data = x[:, 2:]
# convert class labels from strings to binary (-1,1)
yb = np.ones(len(y))
yb[np.where(y=='b')] = 0
# sub-sample
if sub_sample:
yb = yb[::50]
input_data = input_data[::50]
ids = ids[::50]
return yb, input_data, ids
def predict_labels(weights, data,method):
"""Generates class predictions given weights, and a test data matrix"""
if method=="logistic":
y_pred= sigmoid(np.dot(data, weights))
else :
y_pred = np.dot(data, weights)
y_pred[np.where(y_pred <= 1/2)] = 0
y_pred[np.where(y_pred > 1/2)] = 1
return y_pred
def create_csv_submission(ids, y_pred, name):
"""
Creates an output file in .csv format for submission to Kaggle or AIcrowd
Arguments: ids (event ids associated with each prediction)
y_pred (predicted class labels)
name (string name of .csv output file to be created)
"""
with open(name, 'w') as csvfile:
fieldnames = ['Id', 'Prediction']
writer = csv.DictWriter(csvfile, delimiter=",", fieldnames=fieldnames)
writer.writeheader()
for r1, r2 in zip(ids, y_pred):
writer.writerow({'Id':int(r1),'Prediction':int(r2)})