forked from emilio-cartoni/REAL2021_starter_kit
-
Notifications
You must be signed in to change notification settings - Fork 0
/
my_controller.py
106 lines (96 loc) · 3.79 KB
/
my_controller.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
from real_robots.policy import BasePolicy
from baseline.policy import Baseline
class RandomPolicy(BasePolicy):
def __init__(self, action_space, observation_space):
self.action_space = action_space
self.observation_space = observation_space
self.render = False
def start_intrinsic_phase(self):
"""
The evaluator will call this function to signal the start of the
Intrinsic Phase.
Next observation will be from the intrinsic phase.
"""
pass
def end_intrinsic_phase(self, observation, reward, done):
"""
The evaluator will call this function to signal the end of the
Intrinsic Phase.
It passes the observation, reward and done flag values computed at
the end of the last step of the Intrinsic Phase.
"""
pass
def start_extrinsic_phase(self):
"""
The evaluator will call this function to signal the start of the
Extrinsic Phase.
Next observation will be from the extrinsic phase.
"""
pass
def end_extrinsic_phase(self):
"""
The evaluator will call this function to signal the end of the
Extrinsic Phase.
"""
pass
def start_extrinsic_trial(self):
"""
The evaluator will call this function to signal the start of each
extrinsic trial.
Next observation will have a (new) goal.
"""
pass
def end_extrinsic_trial(self, observation, reward, done):
"""
The evaluator will call this function to signal the end of each
extrinsic trial.
It passes the observation, reward and done flag values computed at
the end of the last step of the extrinsic trial.
"""
pass
def step(self, observation, reward, done):
"""
The step function will receive the observation, reward and done signals
from the environment and computes the next action to take.
Parameters
----------
observation : dict
The dictionary will contain the following entries:
- "joint_positions"
Values of the joints position of the arm,
including the gripper.
- "touch_sensors"
Values recorded by the touch sensors.
- "retina"
Image of the environment from the camera
above the table.
- "goal"
Image of the goal, showing how the objects
should be arranged in the environment.
If the ENVIRONMENT_TYPE is "easy", these additional
observations are also provided in the same dictionary:
- "object_positions"
a dictionary with a key for each object on the table with
associated position and orientation of the object
- "goal_positions"
a dictionary with the goal position of each object
- "mask"
a segmentation mask of the retina image where for each
pixel there is an integer index that identifies which
object is in that pixel (i.e. -1 is a background pixel,
0 is the robot, 1 is the table, etc).
- "goal_mask"
a segmentation mask of the goal image
reward: float
This will be always zero.
done: bool
This will be True when:
- intrisic phase ends
- an extrinsic trial ends
otherwise it will always be false.
"""
action = self.action_space.sample()
action['render'] = self.render
return action
# SubmittedPolicy=RandomPolicy
SubmittedPolicy = Baseline