-
Notifications
You must be signed in to change notification settings - Fork 0
/
create_comite_dataset.py
162 lines (131 loc) · 5.43 KB
/
create_comite_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import gym
import turtle_robot_gym
import numpy as np
import random
from keras import Sequential
from keras.layers import Input, Flatten, Dense
import rl
from rl.memory import SequentialMemory
from rl.agents.dqn import DQNAgent
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy
import tensorflow as tf
import pickle
class create_dqn():
def __init__(self,architecture,weights):
self.architecture=architecture
self.weights=weights
def create_model(self):
#Feed-Forward Neural Network Model for Deep Q Learning (DQN)
model = Sequential()
#Input is 1 observation vector, and the number of observations in that vector
model.add(Input(shape=(1,5)))
model.add(Flatten())
for i in range(len(self.architecture)):
#Hidden layers
model.add(Dense(self.architecture[i], activation='relu'))
model.add(Dense(4, activation='linear'))
return model
def create_agent(self):
model=self.create_model()
memory = SequentialMemory(limit=50000, window_length=1)
# setup the Linear annealed policy with the EpsGreedyQPolicy as the inner policy
policy = LinearAnnealedPolicy(inner_policy= EpsGreedyQPolicy(), # policy used to select actions
attr='eps', # attribute in the inner policy to vary
value_max=1.0, # maximum value of attribute that is varying
value_min=0.1, # minimum value of attribute that is varying
value_test=0.05, # test if the value selected is < 0.05
nb_steps=10000)
dqn = DQNAgent(model=model, # Q-Network model
nb_actions=env.action_space.n, # number of actions
memory=memory, # experience replay memory
nb_steps_warmup=25, # how many steps are waited before starting experience replay
target_model_update=1e-2, # how often the target network is updated
policy=policy)
dqn.compile(tf.keras.optimizers.Adam(learning_rate=1e-3), metrics=['mae','accuracy'])
dqn.load_weights(self.weights)
return dqn
#Laberinto 3x3
setup = { 'width': 3,
'height': 3,
'walls': [(1,1),(0,2)],
'start': (0,0),
'goal': (1,2),
'theta': 0
}
#Laberinto 4x4
'''setup = { 'width': 4,
'height': 4,
'walls': [(1,1),(2,0),(2,1),(3,1),(3,3)],
'start': (0,0),
'goal': (2,3),
'theta': 0
} '''
#Laberinto 5x5
'''setup = { 'width': 5,
'height': 5,
'walls': [(1,1),(3,0),(2,2),(2,3),(3,1),(4,2)],
'start': (0,0),
'goal': (3,2),
'theta': 0
} '''
#Laberinto 6x6
'''setup = { 'width': 6,
'height': 6,
'walls': [(1,1),(0,5),(1,2),(1,3),(3,3),(2,4),(2,5),(5,4)],
'start': (0,0),
'goal': (5,5),
'theta': 0
}'''
env = gym.make('TurtleRobotEnv-v1_2', **setup)
def get_prediction(state,list_dqn,list_qlear):
state_qlear= list(map(str, state))
state_qlear=''.join(state_qlear)
final_prediction=[0,0,0,0]
for i in range(len(list_dqn)):
dqn_prediction=list_dqn[i].compute_q_values(np.reshape(state,(1,5)))
normalized_dqn_predictions = [(x - min(dqn_prediction)) / (max(dqn_prediction) - min(dqn_prediction)) for x in dqn_prediction]
for i in range(len(final_prediction)):
final_prediction[i]+=normalized_dqn_predictions[i]
for i in range(len(list_qlear)):
Q=list_qlear[i][0]
visited_states=list_qlear[i][1]
if state_qlear not in visited_states:
normalized_qlear_predictions=[0,0,0,0]
else:
indice=visited_states.index(state_qlear)
qlear_prediction=Q[indice]
normalized_qlear_predictions=[(x - min(qlear_prediction)) / (max(qlear_prediction) - min(qlear_prediction)) for x in qlear_prediction]
for i in range(len(final_prediction)-1):
final_prediction[i]+=normalized_qlear_predictions[i]
return final_prediction
list_qlear=[]
list_dqn=[]
#CREAMOS DQN
modeldqn=create_dqn([64,64,64],'models/3x3_turtle_weights64x3.h5').create_agent()
list_dqn.append(modeldqn)
modeldqn=create_dqn([12,12],'models/3x3distilled_weights_64x3_to_12x2.h5').create_agent()
list_dqn.append(modeldqn)
#CREAMOS Q-LEARNING
with open('models/Qlear3x3.pkl', 'rb') as f:
qlear = pickle.load(f)
list_qlear.append(qlear)
f1=open('data/estados.txt','w')
f2=open('data/predicciones.txt','w')
contador=0
for i in range(1):
OldState=env.reset()
done=False
while not done:
predictions= get_prediction(OldState,list_dqn,list_qlear)
#f1.write(str(OldState[0])+' '+str(OldState[1])+' '+str(OldState[2])+' '+str(OldState[3])+' '+str(OldState[4])+' '+'\n')
#f2.write(str(predictions[0])+' '+str(predictions[1])+' '+str(predictions[2])+' '+str(predictions[3])+'\n')
action=np.argmax(predictions)
new_state, reward, done, info = env.step(action)
OldState=new_state
env.render(action=action, reward=reward)
contador+=1
if reward==10:
print('AAAAAAAAAAA')
print(contador)
f1.close()
f2.close()