change the network architecture

Alfo5123 · Mar 14, 2018 · c069185 · c069185
1 parent a9dba53
commit c069185
Show file tree

Hide file tree

Showing 3 changed files with 59 additions and 18 deletions.
diff --git a/code/dqn/network.py b/code/dqn/network.py
@@ -26,14 +26,35 @@ def __init__(self, num_actions):
         self.bn2 = nn.BatchNorm2d(10)
         self.conv3 = nn.Conv2d(10, 10, kernel_size=3)
         self.bn3 = nn.BatchNorm2d(10)
-        self.head = nn.Linear(280, num_actions)
+        self.head = nn.Linear(420, num_actions)
 
     def forward(self, x):
-        x = F.relu(self.bn1(self.conv1(x)))
-        x = F.relu(self.bn2(self.conv2(x)))
-        x = F.relu(self.bn3(self.conv3(x)))
+        x = F.leaky_relu(self.bn1(self.conv1(x)))
+        x = F.leaky_relu(self.bn2(self.conv2(x)))
+        x = F.leaky_relu(self.bn3(self.conv3(x)))
         return self.head(x.view(x.size(0), -1))
 
+# class DQN(nn.Module):
+#     """
+#     Deep neural network with represents an agent.
+#     """
+#     def __init__(self, num_actions):
+#         super(DQN, self).__init__()
+#         self.conv1 = nn.Conv2d(1, 5, kernel_size=2)
+#         self.max_pool = nn.MaxPool2d((2,2))
+#         self.bn1 = nn.BatchNorm2d(5)
+#         self.conv2 = nn.Conv2d(5, 20, kernel_size=3)
+#         self.bn2 = nn.BatchNorm2d(20)
+#         self.linear = nn.Linear(80, 20)
+#         # self.bn3 = nn.BatchNorm1d(50)
+#         self.head = nn.Linear(20, num_actions)
+
+#     def forward(self, x):
+#         x = F.leaky_relu(self.max_pool(self.bn1(self.conv1(x))))
+#         x = F.leaky_relu((self.bn2(self.conv2(x))))
+#         x = F.leaky_relu(self.linear(x.view(x.size(0), -1)))
+#         return self.head(x)
+
 def select_action(state, model, num_actions,
                     EPS_START, EPS_END, EPS_DECAY, steps_done):
     """

diff --git a/code/dqn/trainingDQN.py b/code/dqn/trainingDQN.py
@@ -26,7 +26,6 @@ def trainDQN(file_name="DQN", env=GridworldEnv(1), batch_size=128,
     DQN training routine. Retuns rewards and durations logs.
     Plot environment screen
     """
-
     if is_plot:
         env.reset()
         plt.ion()

diff --git a/code/sql/network.py b/code/sql/network.py
@@ -14,26 +14,47 @@
 ByteTensor = torch.cuda.ByteTensor if use_cuda else torch.ByteTensor
 Tensor = FloatTensor
 
+
 class DQN(nn.Module):
     """
     Deep neural network with represents an agent.
     """
     def __init__(self, num_actions):
         super(DQN, self).__init__()
-        self.conv1 = nn.Conv2d(1, 10, kernel_size=2)
-        self.max_pool = nn.MaxPool2d((2,2))
-        self.bn1 = nn.BatchNorm2d(10)
-        self.conv2 = nn.Conv2d(10, 20, kernel_size=3)
-        self.bn2 = nn.BatchNorm2d(20)
-        self.linear = nn.Linear(80, 20)
-        # self.bn3 = nn.BatchNorm1d(50)
-        self.head = nn.Linear(20, num_actions)
+        self.conv1 = nn.Conv2d(1, 5, kernel_size=2)
+        self.bn1 = nn.BatchNorm2d(5)
+        self.conv2 = nn.Conv2d(5, 10, kernel_size=3)
+        self.bn2 = nn.BatchNorm2d(10)
+        self.conv3 = nn.Conv2d(10, 10, kernel_size=3)
+        self.bn3 = nn.BatchNorm2d(10)
+        self.head = nn.Linear(200, num_actions)
 
     def forward(self, x):
-        x = F.leaky_relu(self.max_pool(self.bn1(self.conv1(x))))
+        x = F.leaky_relu(self.bn1(self.conv1(x)))
         x = F.leaky_relu(self.bn2(self.conv2(x)))
-        x = F.leaky_relu(self.linear(x.view(x.size(0), -1)))
-        return self.head(x)
+        x = F.leaky_relu(self.bn3(self.conv3(x)))
+        return self.head(x.view(x.size(0), -1))
+
+# class DQN(nn.Module):
+#     """
+#     Deep neural network with represents an agent.
+#     """
+#     def __init__(self, num_actions):
+#         super(DQN, self).__init__()
+#         self.conv1 = nn.Conv2d(1, 10, kernel_size=2)
+#         self.max_pool = nn.MaxPool2d((2,2))
+#         self.bn1 = nn.BatchNorm2d(10)
+#         self.conv2 = nn.Conv2d(10, 20, kernel_size=3)
+#         self.bn2 = nn.BatchNorm2d(20)
+#         self.linear = nn.Linear(80, 20)
+#         # self.bn3 = nn.BatchNorm1d(50)
+#         self.head = nn.Linear(20, num_actions)
+
+#     def forward(self, x):
+#         x = F.leaky_relu(self.max_pool(self.bn1(self.conv1(x))))
+#         x = F.leaky_relu(self.bn2(self.conv2(x)))
+#         x = F.leaky_relu(self.linear(x.view(x.size(0), -1)))
+#         return self.head(x)
 
 def select_action(state, model, num_actions,
                     EPS_START, EPS_END, EPS_DECAY, steps_done):
@@ -93,6 +114,6 @@ def optimize_model(model, optimizer, memory, BATCH_SIZE, GAMMA, BETA):
     # Optimize the model
     optimizer.zero_grad()
     loss.backward()
-    # for param in model.parameters():
-    #     param.grad.data.clamp_(-1, 1)
+    for param in model.parameters():
+        param.grad.data.clamp_(-1, 1)
     optimizer.step()