-
Notifications
You must be signed in to change notification settings - Fork 4
/
QModel.py
434 lines (304 loc) · 11.3 KB
/
QModel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
from DecisionMaking.Configuration import ConfigurationError
from DecisionMaking.Constants import *
from DecisionMaking.Exceptions import *
"""
Represents a Q-state in the Q-model
"""
class QState(object):
def __init__(self, action, qvalue=0):
self.action = action
self.qvalue = qvalue
self.num_taken = 0
action_type, action_value = action
if action_type == ADD_VMS:
self.action_name = "Add %s VMs " % action_value
elif action_type == REMOVE_VMS:
self.action_name = "Remove %s VMs" % action_value
else:
self.action_name = "no op "
"""
Returns the action that corresponds to this q-state
"""
def get_action(self):
return self.action
"""
Returns the q-value of this q-state
"""
def get_qvalue(self):
return self.qvalue
"""
Sets the q-value of the q-state
"""
def set_qvalue(self, qvalue):
self.qvalue = qvalue
"""
Increments the number of times this action has been taken
"""
def incr_taken(self):
self.num_taken += 1
"""
Returns the number of times this action has been taken
"""
def get_num_taken(self):
return self.num_taken
"""
String representation for a Q-state
"""
def __str__(self):
return "Action: %s \tQ-value: %2.3f \tTaken: %d" % \
(self.action_name, self.qvalue, self.num_taken)
def __repr__(self):
return str(self)
"""
Represents a State in the Q-model
"""
class State:
def __init__(self, parameters = []):
self.parameters = list(parameters)
self.qstates = []
self.num_visited = 0
"""
Returns the list of parameter names and values for this state
"""
def get_parameters(self):
return self.parameters
"""
Adds a parameter name-value tuple to the list of parameters for this state
"""
def add_new_parameter(self, name, values):
self.parameters.append((name, values))
"""
Returns the value of the given parameter for this state
"""
def get_parameter(self, param):
for par, values in self.parameters:
if par == param:
return values
"""
Adds a new q-state to the list of q-states for this state
"""
def add_qstate(self, qstate):
self.qstates.append(qstate)
"""
Returns a list containing all the q-states of this state
"""
def get_qstates(self):
return self.qstates
"""
Returns the q-state corresponding to the given action
"""
def get_qstate(self, action):
for qs in self.qstates:
if qs.get_action() == action:
return qs
"""
Returns the action with the highest Q-value
"""
def get_optimal_action(self):
best_action = self.qstates[0].get_action()
best_qvalue = self.qstates[0].get_qvalue()
for qs in self.qstates[1:]:
if qs.get_qvalue() > best_qvalue:
best_qvalue = qs.get_qvalue()
best_action = qs.get_action()
return best_action
"""
Returns the maximum q-value of all the q-states of this state
"""
def get_max_qvalue(self):
return max([qs.get_qvalue() for qs in self.qstates])
"""
Returns all the possible actions from this state
"""
def get_legal_actions(self):
return [qs.get_action() for qs in self.qstates]
"""
Increments the nubmer of times this state has been visited
"""
def visit(self):
self.num_visited += 1
"""
String representation for a State
"""
def __str__(self):
return str(self.parameters)
def __repr__(self):
return str(self.parameters)
"""
Prints the details of the state and its Q-states
"""
def print_detailed(self):
print("%s, visited: %d" % (str(self.parameters), self.num_visited))
for qs in self.get_qstates():
print(qs)
"""
Class that implements a Q-learning model for a Markov Decision Process.
Only models Q-values instead of transitions and rewards.
"""
class QModel:
"""
Sets up all the states and all needed parameters needed for the model
"""
def __init__(self, conf):
required_fields = [PARAMETERS, ACTIONS, DISCOUNT, INITIAL_QVALUES, LEARNING_RATE]
for f in required_fields:
if not f in conf:
raise ConfigurationError("%s not provided in the configuration" % f)
self.learning_rate = conf[LEARNING_RATE]
self.discount = conf[DISCOUNT]
self.states = [State()]
self.current_state = None
self._assert_modeled_params(conf)
# create all the states of the model
params = self._get_parameters(conf[PARAMETERS])
for name, values in params.items():
self._update_states(name, values)
self._set_maxima_minima(params, conf[ACTIONS])
self._add_qstates(conf[ACTIONS], conf[INITIAL_QVALUES])
"""
Asserts that action dependent parameters are being modeled
"""
def _assert_modeled_params(self, conf):
if ADD_VMS in conf[ACTIONS] or REMOVE_VMS in conf[ACTIONS]:
if not NUMBER_OF_VMS in conf[PARAMETERS]:
raise ConfigurationError("Add/Remove VM actions require %s parameter" % NUMBER_OF_VMS)
# TODO the rest of the actions
"""
The values of each model parameter are represented as a [min, max] touple.
This method asserts that values are provided for all the parameters and converts
distinct values to [min, max] touples.
"""
def _get_parameters(self, parameters):
new_params = {}
for name, par in parameters.items():
# we convert both values and limits to pairs of limits so we can treat them uniformly
if VALUES in par:
if not isinstance(par[VALUES], list):
raise ConfigurationError("Provided values for %s must be in a list" % name)
if len(par[VALUES]) <= 1:
raise ConfigurationError("At least two values must be provided for " + name)
values = []
for v in par[VALUES]:
values.append((v, v))
new_params[str(name)] = values
elif LIMITS in par:
if not isinstance(par[LIMITS], list):
raise ConfigurationError("Provided limits for %s must be in a list" % name)
if len(par[LIMITS]) <= 2:
raise ConfigurationError("At least three limits must be provided for " + name)
values = []
for i in range(1, len(par[LIMITS])):
values.append((par[LIMITS][i-1], par[LIMITS][i]))
new_params[str(name)] = values
else:
raise ConfigurationError("Values or limits must be provided for parameter " + name)
return new_params
"""
Initializes the current state with the given measurements
"""
def set_state(self, measurements):
self.current_state = self._get_state(measurements)
"""
Returns all the possible actions from the current state
"""
def get_legal_actions(self):
if self.current_state is None:
raise StateNotSetError()
return self.current_state.get_legal_actions()
"""
Returns the optimal next action derived from the q-values of the current state
"""
def suggest_action(self):
if self.current_state is None:
raise StateNotSetError()
return self.current_state.get_optimal_action()
"""
Extends the current states to include all the possible values of the
given parameter, multiplying their number with the number of values
of the parameter.
"""
def _update_states(self, name, values):
new_states = []
for value in values:
for s in self.states:
new_state = State(s.get_parameters())
new_state.add_new_parameter(name, value)
new_states.append(new_state)
self.states = new_states
"""
Stores the maxima and minima for the parameters that have actions that
need to be limited
"""
def _set_maxima_minima(self, parameters, actions):
if ADD_VMS in actions or REMOVE_VMS in actions:
vm_values = parameters[NUMBER_OF_VMS]
self.max_VMs = max([max(x) for x in vm_values])
self.min_VMs = min([min(x) for x in vm_values])
# TODO the rest of the actions
"""
Adds the given actions to all the states
"""
def _add_qstates(self, actions, qvalue):
for action_type, values in actions.items():
for action_value in values:
action = (action_type, action_value)
for s in self.states:
if self._is_permissible(s, action):
s.add_qstate(QState(action, qvalue))
"""
Returns true if we are allowed to take that action from that state
"""
def _is_permissible(self, state, action):
action_type, action_value = action
if action_type == ADD_VMS:
param_values = state.get_parameter(NUMBER_OF_VMS)
return max(param_values) + action_value <= self.max_VMs
elif action_type == REMOVE_VMS:
param_values = state.get_parameter(NUMBER_OF_VMS)
return min(param_values) - action_value >= self.min_VMs
# TODO the rest of the actions
return True
"""
Returns the state that corresponds to given set of measurementes
"""
def _get_state(self, measurements): # TODO this with indexing
for s in self.states:
matches = True
for name, values in s.get_parameters():
min_v, max_v = values
if measurements[name] < min_v or measurements[name] > max_v:
matches = False
break
if matches:
return s
"""
Updates the Q-value accordingly for a transition to the state deriving
from the given measurements, after performing the given action and
receiving the given reward.
"""
def update(self, action, measurements, reward):
if self.current_state is None:
raise StateNotSetError()
self.current_state.visit()
qstate = self.current_state.get_qstate(action)
if qstate is None:
# TODO log
return
new_state = self._get_state(measurements)
a = self.learning_rate
g = self.discount
qvalue = (1 - a)*qstate.get_qvalue() + a*(reward + g*new_state.get_max_qvalue())
qstate.set_qvalue(qvalue)
qstate.incr_taken()
self.current_state = new_state
"""
Prints the states of the model.
If detailed is True it also prints the q-states
"""
def print_model(self, detailed=False):
for s in self.states:
if detailed:
s.print_detailed()
print("")
else:
print(s)