Skip to content

Commit c62cec4

Browse files
committed
Apply some style changes
1 parent 4dab448 commit c62cec4

File tree

4 files changed

+23
-21
lines changed

4 files changed

+23
-21
lines changed

dqn_agent.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from typing import ValuesView, List, Optional
2+
13
from keras import Model
24
from keras.models import Sequential
35
from keras.layers import Dense
@@ -76,7 +78,7 @@ def random_value(self):
7678
"""Random score for a certain action"""
7779
return random.random()
7880

79-
def predict_value(self, state) -> float:
81+
def predict_value(self, state: np.ndarray) -> float:
8082
"""Predicts the score for a certain state"""
8183
return self.model.predict(state)[0]
8284

@@ -88,21 +90,19 @@ def act(self, state):
8890
else:
8991
return self.predict_value(state)
9092

91-
def best_state(self, states):
93+
def best_state(self, states: ValuesView[List[int]]) -> List[int]:
9294
"""Returns the best state for a given collection of states"""
93-
max_value = None
94-
best_state = None
95-
9695
if random.random() <= self.epsilon:
9796
return random.choice(list(states))
9897
else:
98+
max_value: Optional[float] = None
99+
best_state: Optional[List[int]] = None
99100
for state in states:
100101
# ask the neural network about the best value
101102
value = self.predict_value(np.reshape(state, [1, self.state_size]))
102103
if not max_value or value > max_value:
103104
max_value = value
104105
best_state = state
105-
106106
return best_state
107107

108108
def train(self, batch_size=32, epochs=3):

run_eval.py

+1
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ def enumerate_run_eval(episodes: int = 128, render: bool = False):
5757
# 'tetris-20190802-221032-ms25000-e1-ese2000-d0.99',
5858
# 'tetris-20190802-033219-ms20000-e1-ese2000-d0.95',
5959
# ]
60+
dirs = ['tetris-20190802-221032-ms25000-e1-ese2000-d0.99']
6061
max_scores = []
6162
for d in dirs:
6263
print(f"Evaluating dir '{d}'")

run_train.py

+15-14
Original file line numberDiff line numberDiff line change
@@ -29,37 +29,38 @@ def __init__(self):
2929

3030

3131
# Run dqn with Tetris
32-
def dqn(ac: AgentConf):
32+
# noinspection PyShadowingNames
33+
def dqn(conf: AgentConf):
3334
env = Tetris()
3435

3536
agent = DQNAgent(env.get_state_size(),
36-
n_neurons=ac.n_neurons, activations=ac.activations,
37-
epsilon=ac.epsilon, epsilon_min=ac.epsilon_min, epsilon_stop_episode=ac.epsilon_stop_episode,
38-
mem_size=ac.mem_size, discount=ac.discount, replay_start_size=ac.replay_start_size)
37+
n_neurons=conf.n_neurons, activations=conf.activations,
38+
epsilon=conf.epsilon, epsilon_min=conf.epsilon_min, epsilon_stop_episode=conf.epsilon_stop_episode,
39+
mem_size=conf.mem_size, discount=conf.discount, replay_start_size=conf.replay_start_size)
3940

4041
timestamp_str = datetime.now().strftime("%Y%m%d-%H%M%S")
4142
# conf.mem_size = mem_size
4243
# conf.epochs = epochs
4344
# conf.epsilon_stop_episode = epsilon_stop_episode
4445
# conf.discount = discount
45-
log_dir = f'logs/tetris-{timestamp_str}-ms{ac.mem_size}-e{ac.epochs}-ese{ac.epsilon_stop_episode}-d{ac.discount}'
46+
log_dir = f'logs/tetris-{timestamp_str}-ms{conf.mem_size}-e{conf.epochs}-ese{conf.epsilon_stop_episode}-d{conf.discount}'
4647
log = CustomTensorBoard(log_dir=log_dir)
4748

4849
print(f"AGENT_CONF = {log_dir}")
4950

5051
scores = []
5152

52-
episodes_wrapped: Iterable[int] = tqdm(range(ac.episodes))
53+
episodes_wrapped: Iterable[int] = tqdm(range(conf.episodes))
5354
for episode in episodes_wrapped:
5455
current_state = env.reset()
5556
done = False
5657
steps = 0
5758

5859
# update render flag
59-
render = True if ac.render_every and episode % ac.render_every == 0 else False
60+
render = True if conf.render_every and episode % conf.render_every == 0 else False
6061

6162
# game
62-
while not done and (not ac.max_steps or steps < ac.max_steps):
63+
while not done and (not conf.max_steps or steps < conf.max_steps):
6364
next_states = env.get_next_states()
6465
best_state = agent.best_state(next_states.values())
6566

@@ -80,16 +81,16 @@ def dqn(ac: AgentConf):
8081
scores.append(env.get_game_score())
8182

8283
# train
83-
if episode % ac.train_every == 0:
84+
if episode % conf.train_every == 0:
8485
# n = len(agent.memory)
8586
# print(f" agent.memory.len: {n}")
86-
agent.train(batch_size=ac.batch_size, epochs=ac.epochs)
87+
agent.train(batch_size=conf.batch_size, epochs=conf.epochs)
8788

8889
# logs
89-
if ac.log_every and episode and episode % ac.log_every == 0:
90-
avg_score = mean(scores[-ac.log_every:])
91-
min_score = min(scores[-ac.log_every:])
92-
max_score = max(scores[-ac.log_every:])
90+
if conf.log_every and episode and episode % conf.log_every == 0:
91+
avg_score = mean(scores[-conf.log_every:])
92+
min_score = min(scores[-conf.log_every:])
93+
max_score = max(scores[-conf.log_every:])
9394
log.log(episode, avg_score=avg_score, min_score=min_score, max_score=max_score)
9495
# save_model
9596
save_model(agent.model, f'{log_dir}/model.hdf', overwrite=True, include_optimizer=True)

tetris.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ def _height(self, board):
227227

228228
return sum_height, max_height, min_height
229229

230-
def _get_board_props(self, board):
230+
def _get_board_props(self, board) -> List[int]:
231231
"""Get properties of the board"""
232232
lines, board = self._clear_lines(board)
233233
holes = self._number_of_holes(board)

0 commit comments

Comments
 (0)