-
Notifications
You must be signed in to change notification settings - Fork 0
/
MK_run.py
97 lines (77 loc) · 2.42 KB
/
MK_run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# -*- coding: utf-8 -*-
"""
Markovian machine learning classifier for the market states prediction
Created by : BenMobility
Created on Sat Dec 12 15:29:25 2020
Modified on :
UPDATE :
1- only take the markov classifier
2- make it a function out of it
"""
#usual imports
import numpy as np
import pandas as pd
#random choices
from random import choices
# split function
def train_test_split(data, n_train):
"""
SPLIT TRAIN AND TEST sets from a univariate dataset
Parameters
----------
data : n x d dataframe pandas
it contains the label and the features of the dataset.
n_train : scalar
tells how much rows you want to keep for the train set
Returns
-------
TYPE
train dataset from the first row to the scalar input.
TYPE
test dataset starts from the scalar input row until the end of the
original dataset.
"""
return data[:n_train, :], data[n_train:, :]
#markovian prediction
def mk_predict():
"""
Parameters
----------
None
Returns
-------
y_predicted : n x 1, integers
It returns the predicted values of a 80/20 split from the prepro data
y_true : n x 1, integers
the true labels of the dataset
"""
# load the dataset
PATH = "Data\preprocessed.csv"
data = pd.read_csv(PATH, header=0, index_col=0)
data = data.to_numpy()
#split
ratio = 0.80
n_train = int(len(data) * ratio)
train, test = train_test_split(data, n_train)
# Markov first order
#initialize dataframe
df = np.zeros((len(np.unique(data)), len(np.unique(data))))
#compute the weight for each unique label in the dataset
for i in range(len(np.unique(train[:,0]))-1):
t = train[train[:,0] ==i]
unique, counts = np.unique(t[:,1], return_counts=True)
weight = counts / sum(counts)
# save the weights in df
for j in range(len(weight)-1):
df[i,j] = weight[j]
#initialize the predidicted labels array
yhat_mark = np.zeros(len(test))
# predicting the next label with the previous on the test dataset
# with the random.choices library and their weights based on markov
# first order
for i in range(len(test)):
b = test[i,0]
yhat_mark[i]=choices(np.unique(data), df[b,:])[0]
#get the true labels only
y_true = test[:,0]
return yhat_mark, y_true