Apply some style changes

nlinker · nlinker · commit c62cec4ed3d3 · 2019-09-01T22:49:57.000+06:00
diff --git a/dqn_agent.py b/dqn_agent.py
@@ -1,3 +1,5 @@
+from typing import ValuesView, List, Optional
+
 from keras import Model
 from keras.models import Sequential
 from keras.layers import Dense
@@ -76,7 +78,7 @@ def random_value(self):
         """Random score for a certain action"""
         return random.random()
 
-    def predict_value(self, state) -> float:
+    def predict_value(self, state: np.ndarray) -> float:
         """Predicts the score for a certain state"""
         return self.model.predict(state)[0]
 
@@ -88,21 +90,19 @@ def act(self, state):
         else:
             return self.predict_value(state)
 
-    def best_state(self, states):
+    def best_state(self, states: ValuesView[List[int]]) -> List[int]:
         """Returns the best state for a given collection of states"""
-        max_value = None
-        best_state = None
-
         if random.random() <= self.epsilon:
             return random.choice(list(states))
         else:
+            max_value: Optional[float] = None
+            best_state: Optional[List[int]] = None
             for state in states:
                 # ask the neural network about the best value
                 value = self.predict_value(np.reshape(state, [1, self.state_size]))
                 if not max_value or value > max_value:
                     max_value = value
                     best_state = state
-
         return best_state
 
     def train(self, batch_size=32, epochs=3):
diff --git a/run_eval.py b/run_eval.py
@@ -57,6 +57,7 @@ def enumerate_run_eval(episodes: int = 128, render: bool = False):
     #     'tetris-20190802-221032-ms25000-e1-ese2000-d0.99',
     #     'tetris-20190802-033219-ms20000-e1-ese2000-d0.95',
     # ]
+    dirs = ['tetris-20190802-221032-ms25000-e1-ese2000-d0.99']
     max_scores = []
     for d in dirs:
         print(f"Evaluating dir '{d}'")
diff --git a/run_train.py b/run_train.py
@@ -29,37 +29,38 @@ def __init__(self):
 
 
 # Run dqn with Tetris
-def dqn(ac: AgentConf):
+# noinspection PyShadowingNames
+def dqn(conf: AgentConf):
     env = Tetris()
 
     agent = DQNAgent(env.get_state_size(),
-                     n_neurons=ac.n_neurons, activations=ac.activations,
-                     epsilon=ac.epsilon, epsilon_min=ac.epsilon_min, epsilon_stop_episode=ac.epsilon_stop_episode,
-                     mem_size=ac.mem_size, discount=ac.discount, replay_start_size=ac.replay_start_size)
+                     n_neurons=conf.n_neurons, activations=conf.activations,
+                     epsilon=conf.epsilon, epsilon_min=conf.epsilon_min, epsilon_stop_episode=conf.epsilon_stop_episode,
+                     mem_size=conf.mem_size, discount=conf.discount, replay_start_size=conf.replay_start_size)
 
     timestamp_str = datetime.now().strftime("%Y%m%d-%H%M%S")
     # conf.mem_size = mem_size
     # conf.epochs = epochs
     # conf.epsilon_stop_episode = epsilon_stop_episode
     # conf.discount = discount
-    log_dir = f'logs/tetris-{timestamp_str}-ms{ac.mem_size}-e{ac.epochs}-ese{ac.epsilon_stop_episode}-d{ac.discount}'
+    log_dir = f'logs/tetris-{timestamp_str}-ms{conf.mem_size}-e{conf.epochs}-ese{conf.epsilon_stop_episode}-d{conf.discount}'
     log = CustomTensorBoard(log_dir=log_dir)
 
     print(f"AGENT_CONF = {log_dir}")
 
     scores = []
 
-    episodes_wrapped: Iterable[int] = tqdm(range(ac.episodes))
+    episodes_wrapped: Iterable[int] = tqdm(range(conf.episodes))
     for episode in episodes_wrapped:
         current_state = env.reset()
         done = False
         steps = 0
 
         # update render flag
-        render = True if ac.render_every and episode % ac.render_every == 0 else False
+        render = True if conf.render_every and episode % conf.render_every == 0 else False
 
         # game
-        while not done and (not ac.max_steps or steps < ac.max_steps):
+        while not done and (not conf.max_steps or steps < conf.max_steps):
             next_states = env.get_next_states()
             best_state = agent.best_state(next_states.values())
 
@@ -80,16 +81,16 @@ def dqn(ac: AgentConf):
         scores.append(env.get_game_score())
 
         # train
-        if episode % ac.train_every == 0:
+        if episode % conf.train_every == 0:
             # n = len(agent.memory)
             # print(f" agent.memory.len: {n}")
-            agent.train(batch_size=ac.batch_size, epochs=ac.epochs)
+            agent.train(batch_size=conf.batch_size, epochs=conf.epochs)
 
         # logs
-        if ac.log_every and episode and episode % ac.log_every == 0:
-            avg_score = mean(scores[-ac.log_every:])
-            min_score = min(scores[-ac.log_every:])
-            max_score = max(scores[-ac.log_every:])
+        if conf.log_every and episode and episode % conf.log_every == 0:
+            avg_score = mean(scores[-conf.log_every:])
+            min_score = min(scores[-conf.log_every:])
+            max_score = max(scores[-conf.log_every:])
             log.log(episode, avg_score=avg_score, min_score=min_score, max_score=max_score)
     # save_model
     save_model(agent.model, f'{log_dir}/model.hdf', overwrite=True, include_optimizer=True)
diff --git a/tetris.py b/tetris.py
@@ -227,7 +227,7 @@ def _height(self, board):
 
         return sum_height, max_height, min_height
 
-    def _get_board_props(self, board):
+    def _get_board_props(self, board) -> List[int]:
         """Get properties of the board"""
         lines, board = self._clear_lines(board)
         holes = self._number_of_holes(board)