-
Notifications
You must be signed in to change notification settings - Fork 0
/
mainDQN.py
118 lines (106 loc) · 4.53 KB
/
mainDQN.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import gym
#import matplotlib.pyplot as plt
import turtle_robot_gym
from keras import Sequential
from keras.layers import Input, Flatten, Dense
import rl
from rl.memory import SequentialMemory
from rl.agents.dqn import DQNAgent
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy
import tensorflow as tf
#from tensorflow.keras.optimizers import Adam
laberinto='3x3'
if laberinto=='3x3':
#Laberinto 3x3
setup = { 'width': 3,
'height': 3,
'walls': [(1,1),(0,2)],
'start': (0,0),
'goal': (1,2),
'theta': 0
}
if laberinto=='4x4':
#Laberinto 4x4
setup = { 'width': 4,
'height': 4,
'walls': [(1,1),(2,0),(2,1),(3,1),(3,3)],
'start': (0,0),
'goal': (2,3),
'theta': 0
}
if laberinto=='5x5':
#Laberinto 5x5
setup = { 'width': 5,
'height': 5,
'walls': [(1,1),(0,3),(2,2),(2,3),(3,1),(4,2)],
'start': (0,0),
'goal': (3,2),
'theta': 0
}
if laberinto=='6x6':
#Laberinto 6x6
setup = { 'width': 6,
'height': 6,
'walls': [(1,1),(0,5),(1,2),(1,3),(3,3),(2,4),(2,5),(5,4)],
'start': (0,0),
'goal': (5,5),
'theta': 0
}
ENV_NAME = 'TurtleRobotEnv-v1_2'
env = gym.make(ENV_NAME, **setup)
print('Observation space ', env.observation_space.shape[0])
print('Action space ', env.action_space.n)
# setup experience replay buffer
memory = SequentialMemory(limit=50000, window_length=1)
# setup the Linear annealed policy with the EpsGreedyQPolicy as the inner policy
policy = LinearAnnealedPolicy(inner_policy= EpsGreedyQPolicy(), # policy used to select actions
attr='eps', # attribute in the inner policy to vary
value_max=1.0, # maximum value of attribute that is varying
value_min=0.1, # minimum value of attribute that is varying
value_test=0.05, # test if the value selected is < 0.05
nb_steps=10000) # the number of steps between value_max and value_min
if laberinto=='5x5' or laberinto=='6x6':
#Feed-Forward Neural Network Model for Deep Q Learning (DQN)
model = Sequential()
#Input is 1 observation vector, and the number of observations in that vector
model.add(Input(shape=(1,5)))
model.add(Flatten())
#Hidden layers with 24 nodes each
model.add(Dense(96, activation='relu'))
model.add(Dense(192, activation='relu'))
model.add(Dense(96, activation='relu'))
model.add(Dense(env.action_space.n, activation='linear'))
if laberinto=='3x3' or laberinto=='4x4':
#Feed-Forward Neural Network Model for Deep Q Learning (DQN)
model = Sequential()
#Input is 1 observation vector, and the number of observations in that vector
model.add(Input(shape=(1,5)))
model.add(Flatten())
#Hidden layers with 24 nodes each
model.add(Dense(64, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(env.action_space.n, activation='linear'))
for i in range(1):
#Feed-Forward Neural Network Architecture Summary
print(model.summary())
#Defining DQN Agent for DQN Model
dqn = DQNAgent(model=model, # Q-Network model
nb_actions=env.action_space.n, # number of actions
memory=memory, # experience replay memory
nb_steps_warmup=25, # how many steps are waited before starting experience replay
target_model_update=1e-2, # how often the target network is updated
policy=policy) # the action selection policy
# Finally, we configure and compile our agent.
#We can use built-in tensorflow.keras Adam optimizer and evaluation metrics
dqn.compile(tf.keras.optimizers.Adam(learning_rate=1e-3), metrics=['mae','accuracy'])
#Finally fit and train the agent
history = dqn.fit(env,nb_steps=25000, visualize=False, verbose=1)
f=open('data/AccionesDQN.txt','a')
#f.write(str(history.history['nb_episode_steps']))
#f.write('\n')
#f.close()
# Finally, evaluate and test our algorithm for 20 episodes.
dqn.test(env, nb_episodes=20, visualize=False)
# Save weights
model.save_weights('models/'+laberinto+'_turtle_weights64x3_2.h5')