-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathmainLoop.py
More file actions
62 lines (60 loc) · 2.49 KB
/
mainLoop.py
File metadata and controls
62 lines (60 loc) · 2.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import gym
from model import DeepQNetwork, Agent
from utils import plotLearning
import numpy as np
from gym import wrappers
if __name__ == '__main__':
env = gym.make('SpaceInvaders-v0')
brain = Agent(gamma=0.95, epsilon=1.0,
alpha=0.003, maxMemorySize=5000,
replace=None)
while brain.memCntr < brain.memSize:
observation = env.reset()
done = False
while not done:
# 0 no action, 1 fire, 2 move right, 3 move left, 4 move right fire, 5 move left fire
action = env.action_space.sample()
observation_, reward, done, info = env.step(action)
if done and info['ale.lives'] == 0:
reward = -100
brain.storeTransition(np.mean(observation[15:200,30:125], axis=2), action, reward,
np.mean(observation_[15:200,30:125], axis=2))
observation = observation_
print('done initializing memory')
scores = []
epsHistory = []
numGames = 50
batch_size=32
# uncomment the line below to record every episode.
env = wrappers.Monitor(env, "tmp/space-invaders-1", video_callable=lambda episode_id: True, force=True)
for i in range(numGames):
print('starting game ', i+1, 'epsilon: %.4f' % brain.EPSILON)
epsHistory.append(brain.EPSILON)
done = False
observation = env.reset()
frames = [np.sum(observation[15:200,30:125], axis=2)]
score = 0
lastAction = 0
while not done:
if len(frames) == 3:
action = brain.chooseAction(frames)
frames = []
else:
action = lastAction
observation_, reward, done, info = env.step(action)
score += reward
frames.append(np.sum(observation_[15:200,30:125], axis=2))
if done and info['ale.lives'] == 0:
reward = -100
brain.storeTransition(np.mean(observation[15:200,30:125], axis=2), action, reward,
np.mean(observation_[15:200,30:125], axis=2))
observation = observation_
brain.learn(batch_size)
lastAction = action
#env.render(
scores.append(score)
print('score:',score)
x = [i+1 for i in range(numGames)]
fileName = str(numGames) + 'Games' + 'Gamma' + str(brain.GAMMA) + \
'Alpha' + str(brain.ALPHA) + 'Memory' + str(brain.memSize)+ '.png'
plotLearning(x, scores, epsHistory, fileName)