-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexample_ppo2.py
More file actions
45 lines (39 loc) · 1.1 KB
/
example_ppo2.py
File metadata and controls
45 lines (39 loc) · 1.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
"""
Train an agent using Proximal Policy Optimization from OpenAI Baselines
"""
import retro
from baselines.common.vec_env import SubprocVecEnv
from baselines.common.retro_wrappers import make_retro, wrap_deepmind_retro
from train_ppo import ppo2
game = 'Pitfall-Atari2600'
state = retro.State.DEFAULT
scenario = 'scenario'
record = False
verbose = 1
quiet = 0
obs_type = 'image'
players = 1
def main():
def make_env():
obs_type = retro.Observations.IMAGE # retro.Observations.RAM
env = retro.make(game=game, state=state, scenario=scenario, record=record, players=players, obs_type=obs_type)
# env = retro.make(game=game, state=state, scenario=scenario)
env = wrap_deepmind_retro(env)
return env
venv = SubprocVecEnv([make_env] * 8)
ppo2.learn(
network='cnn',
env=venv,
total_timesteps=int(1e6),
nsteps=128,
nminibatches=4,
lam=0.95,
gamma=0.99,
noptepochs=4,
log_interval=1,
ent_coef=.01,
lr=lambda f: f * 2.5e-4,
cliprange=0.1,
)
if __name__ == '__main__':
main()