-
Notifications
You must be signed in to change notification settings - Fork 0
/
replay_memory.py
29 lines (23 loc) · 995 Bytes
/
replay_memory.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import random
import collections
import numpy as np
class ReplayMemory(object):
def __init__(self,max_size):
self.buffer=collections.deque(maxlen=max_size)
def append(self,exp):
self.buffer.append(exp)
def sample(self,batch_size):
mini_batch=random.sample(self.buffer,batch_size)
obs_batch, action_batch, reward_batch, next_obs_batch, done_batch = [], [], [], [], []
for experience in mini_batch:
s, a, r, s_p, done = experience
obs_batch.append(s)
action_batch.append(a)
reward_batch.append(r)
next_obs_batch.append(s_p)
done_batch.append(done)
return np.array(obs_batch).astype('float32'), \
np.array(action_batch).astype('float32'), np.array(reward_batch).astype('float32'),\
np.array(next_obs_batch).astype('float32'), np.array(done_batch).astype('float32')
def __len__(self):
return len(self.buffer)