-
Notifications
You must be signed in to change notification settings - Fork 6
/
linear_regression.py
165 lines (144 loc) · 5.66 KB
/
linear_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
from random import random, sample, choice
from math import floor
from tqdm import tqdm
from numpy import array, dot, mean
from numpy.linalg import pinv
from sys import exit
def generate_data():
"""
We will generate data with a clear pattern.
This ensures we have an idea of the desired result.
This is only for demonstration purposes, real data is needed in practice.
"""
coeff = [0.4, -0.3, 0.2, -0.1]
x = [[random() for j in range(len(coeff))] for i in range(1000)]
y = [dot(i, coeff) for i in x]
return array(x), array(y)
def multiple_linear_regression(inputs, outputs):
"""
Get the best expected outcome.
This is expected to equal the coefficients in generate_data().
"""
X, Y = array(inputs), array(outputs)
X_t, Y_t = X.transpose(), Y.transpose()
coeff = dot((pinv((dot(X_t, X)))), (dot(X_t, Y)))
Y_p = dot(X, coeff)
Y_mean = mean(Y)
SST = array([(i - Y_mean) ** 2 for i in Y]).sum()
SSR = array([(i - j) ** 2 for i, j in zip(Y, Y_p)]).sum()
COD = (1 - (SSR / SST)) * 100.0
av_error = (SSR / len(Y))
return {'COD': COD, 'coeff': coeff, 'error': av_error}
def check_termination_condition(best_individual):
"""
Check if the current_best_individual is better of equal to the expected.
"""
if ((best_individual['COD'] >= 99.0)
or (generation_count == max_generations)):
return True
else:
return False
def create_individual(individual_size):
"""
Create an individual.
"""
return [random() for i in range(individual_size)]
def create_population(individual_size, population_size):
"""
Create an initial population.
"""
return [create_individual(individual_size) for i in range(population_size)]
def get_fitness(individual, inputs):
"""
Calculate the fitness of an individual.
Return the Coefficient of Determination, average error and weight.
We use the error to get the best individual.
"""
predicted_outputs = dot(array(inputs), array(individual))
output_mean = mean(outputs)
SST = array(
[(i - output_mean) ** 2 for i in outputs]).sum()
SSR = array(
[(i - j) ** 2 for i, j in zip(outputs, predicted_outputs)]).sum()
COD = (1 - (SSR / SST)) * 100.0
av_error = (SSR / len(outputs))
return {'COD': COD, 'error': av_error, 'coeff': individual}
def evaluate_population(population):
"""
Evaluate a population of individuals and return the best among them.
"""
fitness_list = [get_fitness(individual, inputs)
for individual in tqdm(population)]
error_list = sorted(fitness_list, key=lambda i: i['error'])
best_individuals = error_list[: selection_size]
best_individuals_stash.append(best_individuals[0]['coeff'])
print('Error: ', best_individuals[0]['error'],
'COD: ', best_individuals[0]['COD'])
return best_individuals
def crossover(parent_1, parent_2):
"""
Return offspring given two parents.
Unlike real scenarios, genes in the chromosomes aren't necessarily linked.
"""
child = {}
loci = [i for i in range(0, individual_size)]
loci_1 = sample(loci, floor(0.5*(individual_size)))
loci_2 = [i for i in loci if i not in loci_1]
chromosome_1 = [[i, parent_1['coeff'][i]] for i in loci_1]
chromosome_2 = [[i, parent_2['coeff'][i]] for i in loci_2]
child.update({key: value for (key, value) in chromosome_1})
child.update({key: value for (key, value) in chromosome_2})
return [child[i] for i in loci]
def mutate(individual):
"""
Mutate an individual.
The gene transform decides whether we'll add or deduct a random value.
"""
loci = [i for i in range(0, individual_size)]
no_of_genes_mutated = floor(probability_of_gene_mutating*individual_size)
loci_to_mutate = sample(loci, no_of_genes_mutated)
for locus in loci_to_mutate:
gene_transform = choice([-1, 1])
change = gene_transform*random()
individual[locus] = individual[locus] + change
return individual
def get_new_generation(selected_individuals):
"""
Given selected individuals, create a new population by mating them.
Here we also apply variation operations like mutation and crossover.
"""
parent_pairs = [sample(selected_individuals, 2)
for i in range(population_size)]
offspring = [crossover(pair[0], pair[1]) for pair in parent_pairs]
offspring_indices = [i for i in range(population_size)]
offspring_to_mutate = sample(
offspring_indices,
floor(probability_of_individual_mutating*population_size)
)
mutated_offspring = [[i, mutate(offspring[i])]
for i in offspring_to_mutate]
for child in mutated_offspring:
offspring[child[0]] = child[1]
return offspring
inputs, outputs = generate_data()
individual_size = len(inputs[0])
population_size = 1000
selection_size = floor(0.1*population_size)
max_generations = 50
probability_of_individual_mutating = 0.1
probability_of_gene_mutating = 0.25
best_possible = multiple_linear_regression(inputs, outputs)
best_individuals_stash = [create_individual(individual_size)]
initial_population = create_population(individual_size, 1000)
current_population = initial_population
termination = False
generation_count = 0
while termination is False:
current_best_individual = get_fitness(best_individuals_stash[-1], inputs)
print('Generation: ', generation_count)
best_individuals = evaluate_population(current_population)
current_population = get_new_generation(best_individuals)
termination = check_termination_condition(current_best_individual)
generation_count += 1
else:
print(get_fitness(best_individuals_stash[-1], inputs))