-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path1002.py
More file actions
79 lines (59 loc) · 1.76 KB
/
1002.py
File metadata and controls
79 lines (59 loc) · 1.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# Follows a random search idea from:
# http://kvfrans.com/simple-algoritms-for-solving-cartpole/
# import gym
import gym.wrappers
import numpy as np
import time
#
float_formatter = lambda x: "%.6f" % x
np.set_printoptions(formatter={'float_kind': float_formatter})
#
# https://github.com/openai/gym/wiki/CartPole-v0
#
env = gym.make('CartPole-v0')
env = gym.wrappers.Monitor(env, 'tmp/cartpole-experiment-1', force=True)
#
EP_MAX = 1000
T_MAX = 200
#
# Solved Requirements:
# Considered solved when the average reward is
# greater than or equal to 195.0 over 100 consecutive trials.
CONSECUTIVE_EPS_TO_SOLVE = 100
T_TO_SOLVE = 195
#
t_best = 0
params_best = np.zeros(4)
consecutive_success = 0
for ep in range(EP_MAX):
# noinspection PyRedeclaration
observation = env.reset()
if consecutive_success == 0:
params = np.random.random(4) * 2 - 1
print("ep: {}".format(ep))
print("params: {}".format(params))
t = 0
while True:
t += 1
env.render()
# print("observation: {}".format(observation))
action = 1 if np.dot(observation, params) > 0 else 0
#
observation, reward, done, info = env.step(action)
if done:
print("Episode finished after {} timesteps".format(t))
time.sleep(1)
break
if t > t_best:
print("Good! Episode ran more than previous best ({}) timesteps".format(
t_best))
t_best = t
if t > T_TO_SOLVE:
consecutive_success += 1
else:
consecutive_success = 0
print("Consecutive Successes: {}".format(consecutive_success))
if consecutive_success == CONSECUTIVE_EPS_TO_SOLVE:
print("WOW! {} consecutive success! problem solved".format(
consecutive_success))
break