@@ -29,37 +29,38 @@ def __init__(self):
29
29
30
30
31
31
# Run dqn with Tetris
32
- def dqn (ac : AgentConf ):
32
+ # noinspection PyShadowingNames
33
+ def dqn (conf : AgentConf ):
33
34
env = Tetris ()
34
35
35
36
agent = DQNAgent (env .get_state_size (),
36
- n_neurons = ac .n_neurons , activations = ac .activations ,
37
- epsilon = ac .epsilon , epsilon_min = ac .epsilon_min , epsilon_stop_episode = ac .epsilon_stop_episode ,
38
- mem_size = ac .mem_size , discount = ac .discount , replay_start_size = ac .replay_start_size )
37
+ n_neurons = conf .n_neurons , activations = conf .activations ,
38
+ epsilon = conf .epsilon , epsilon_min = conf .epsilon_min , epsilon_stop_episode = conf .epsilon_stop_episode ,
39
+ mem_size = conf .mem_size , discount = conf .discount , replay_start_size = conf .replay_start_size )
39
40
40
41
timestamp_str = datetime .now ().strftime ("%Y%m%d-%H%M%S" )
41
42
# conf.mem_size = mem_size
42
43
# conf.epochs = epochs
43
44
# conf.epsilon_stop_episode = epsilon_stop_episode
44
45
# conf.discount = discount
45
- log_dir = f'logs/tetris-{ timestamp_str } -ms{ ac .mem_size } -e{ ac .epochs } -ese{ ac .epsilon_stop_episode } -d{ ac .discount } '
46
+ log_dir = f'logs/tetris-{ timestamp_str } -ms{ conf .mem_size } -e{ conf .epochs } -ese{ conf .epsilon_stop_episode } -d{ conf .discount } '
46
47
log = CustomTensorBoard (log_dir = log_dir )
47
48
48
49
print (f"AGENT_CONF = { log_dir } " )
49
50
50
51
scores = []
51
52
52
- episodes_wrapped : Iterable [int ] = tqdm (range (ac .episodes ))
53
+ episodes_wrapped : Iterable [int ] = tqdm (range (conf .episodes ))
53
54
for episode in episodes_wrapped :
54
55
current_state = env .reset ()
55
56
done = False
56
57
steps = 0
57
58
58
59
# update render flag
59
- render = True if ac .render_every and episode % ac .render_every == 0 else False
60
+ render = True if conf .render_every and episode % conf .render_every == 0 else False
60
61
61
62
# game
62
- while not done and (not ac .max_steps or steps < ac .max_steps ):
63
+ while not done and (not conf .max_steps or steps < conf .max_steps ):
63
64
next_states = env .get_next_states ()
64
65
best_state = agent .best_state (next_states .values ())
65
66
@@ -80,16 +81,16 @@ def dqn(ac: AgentConf):
80
81
scores .append (env .get_game_score ())
81
82
82
83
# train
83
- if episode % ac .train_every == 0 :
84
+ if episode % conf .train_every == 0 :
84
85
# n = len(agent.memory)
85
86
# print(f" agent.memory.len: {n}")
86
- agent .train (batch_size = ac .batch_size , epochs = ac .epochs )
87
+ agent .train (batch_size = conf .batch_size , epochs = conf .epochs )
87
88
88
89
# logs
89
- if ac .log_every and episode and episode % ac .log_every == 0 :
90
- avg_score = mean (scores [- ac .log_every :])
91
- min_score = min (scores [- ac .log_every :])
92
- max_score = max (scores [- ac .log_every :])
90
+ if conf .log_every and episode and episode % conf .log_every == 0 :
91
+ avg_score = mean (scores [- conf .log_every :])
92
+ min_score = min (scores [- conf .log_every :])
93
+ max_score = max (scores [- conf .log_every :])
93
94
log .log (episode , avg_score = avg_score , min_score = min_score , max_score = max_score )
94
95
# save_model
95
96
save_model (agent .model , f'{ log_dir } /model.hdf' , overwrite = True , include_optimizer = True )
0 commit comments