-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcartpole_dqn.py
71 lines (64 loc) · 2.28 KB
/
cartpole_dqn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from run_experiment_helper import *
from Agents.DQN import DQNAgent
class CardpoleDQN(RunExperiment):
def __init__(self):
RunExperiment.__init__(self, "CartPole-v1")
def get_agent(self, config):
return DQNAgent(config)
def get_agent_train_config(self, env):
return AgentConfig(
env=env,
n_episodes=100,
target_average=475,
update_every=1,
batch_size=64,
gamma=0.95,
learning_rate=1e-4,
deepq_double_learning=True,
deepq_dueling_networks=True,
per_active=True)
def get_agent_evolve_config(self, env, genome, episodes):
return AgentConfig(
env=env,
device="cpu",
n_episodes=episodes,
target_average=475,
update_every=genome['updateEvery'],
batch_size=genome['batchSize'],
gamma=genome['gamma'],
learning_rate=genome['learningRate'],
fc1_units=genome['fc1_units'],
fc2_units=genome['fc2_units'],
tau=genome['tau'],
memory_size=genome['memory_size'],
deepq_double_learning=True,
deepq_dueling_networks=True,
per_active=True)
def get_evolver_config(self):
return {
'episodes': 5,
'populationSize': 25,
'retainSize': 5,
'mutateOneGeneRandom': 5,
'mutateTwoGenesRandom': 5,
'crossoverOneGene': 5,
'crossOverTwoGenes': 5,
'mutateOneGeneClose': 5,
'generations': 2,
'randomSeed': 1,
'allPossibleGenesSimple': {
'learningRate': [0.01, 0.001, 0.002, 0.003, 0.004, 0.0001, 0.0005],
'batchSize': [32,64,128,256,512],
'gamma': [0.90, 0.92, 0.94, 0.96, 0.98, 0.9, 0.995],
'updateEvery': [1,2,3,4],
'fc1_units': [64,128,256,512],
'fc2_units': [64,128,256,512],
'memory_size': [int(1e4), int(1e5), int(1e6)],
'tau': [1e-2, 1e-3, 1e-4],
},
}
def run(self, runner):
return runner.run_agent()
if __name__ == "__main__":
run = CardpoleDQN()
run.command(sys.argv)