Skip to content

Commit

Permalink
change the network architecture
Browse files Browse the repository at this point in the history
  • Loading branch information
koloskova committed Mar 14, 2018
1 parent a9dba53 commit c069185
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 18 deletions.
29 changes: 25 additions & 4 deletions code/dqn/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,35 @@ def __init__(self, num_actions):
self.bn2 = nn.BatchNorm2d(10)
self.conv3 = nn.Conv2d(10, 10, kernel_size=3)
self.bn3 = nn.BatchNorm2d(10)
self.head = nn.Linear(280, num_actions)
self.head = nn.Linear(420, num_actions)

def forward(self, x):
x = F.relu(self.bn1(self.conv1(x)))
x = F.relu(self.bn2(self.conv2(x)))
x = F.relu(self.bn3(self.conv3(x)))
x = F.leaky_relu(self.bn1(self.conv1(x)))
x = F.leaky_relu(self.bn2(self.conv2(x)))
x = F.leaky_relu(self.bn3(self.conv3(x)))
return self.head(x.view(x.size(0), -1))

# class DQN(nn.Module):
# """
# Deep neural network with represents an agent.
# """
# def __init__(self, num_actions):
# super(DQN, self).__init__()
# self.conv1 = nn.Conv2d(1, 5, kernel_size=2)
# self.max_pool = nn.MaxPool2d((2,2))
# self.bn1 = nn.BatchNorm2d(5)
# self.conv2 = nn.Conv2d(5, 20, kernel_size=3)
# self.bn2 = nn.BatchNorm2d(20)
# self.linear = nn.Linear(80, 20)
# # self.bn3 = nn.BatchNorm1d(50)
# self.head = nn.Linear(20, num_actions)

# def forward(self, x):
# x = F.leaky_relu(self.max_pool(self.bn1(self.conv1(x))))
# x = F.leaky_relu((self.bn2(self.conv2(x))))
# x = F.leaky_relu(self.linear(x.view(x.size(0), -1)))
# return self.head(x)

def select_action(state, model, num_actions,
EPS_START, EPS_END, EPS_DECAY, steps_done):
"""
Expand Down
1 change: 0 additions & 1 deletion code/dqn/trainingDQN.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ def trainDQN(file_name="DQN", env=GridworldEnv(1), batch_size=128,
DQN training routine. Retuns rewards and durations logs.
Plot environment screen
"""

if is_plot:
env.reset()
plt.ion()
Expand Down
47 changes: 34 additions & 13 deletions code/sql/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,26 +14,47 @@
ByteTensor = torch.cuda.ByteTensor if use_cuda else torch.ByteTensor
Tensor = FloatTensor


class DQN(nn.Module):
"""
Deep neural network with represents an agent.
"""
def __init__(self, num_actions):
super(DQN, self).__init__()
self.conv1 = nn.Conv2d(1, 10, kernel_size=2)
self.max_pool = nn.MaxPool2d((2,2))
self.bn1 = nn.BatchNorm2d(10)
self.conv2 = nn.Conv2d(10, 20, kernel_size=3)
self.bn2 = nn.BatchNorm2d(20)
self.linear = nn.Linear(80, 20)
# self.bn3 = nn.BatchNorm1d(50)
self.head = nn.Linear(20, num_actions)
self.conv1 = nn.Conv2d(1, 5, kernel_size=2)
self.bn1 = nn.BatchNorm2d(5)
self.conv2 = nn.Conv2d(5, 10, kernel_size=3)
self.bn2 = nn.BatchNorm2d(10)
self.conv3 = nn.Conv2d(10, 10, kernel_size=3)
self.bn3 = nn.BatchNorm2d(10)
self.head = nn.Linear(200, num_actions)

def forward(self, x):
x = F.leaky_relu(self.max_pool(self.bn1(self.conv1(x))))
x = F.leaky_relu(self.bn1(self.conv1(x)))
x = F.leaky_relu(self.bn2(self.conv2(x)))
x = F.leaky_relu(self.linear(x.view(x.size(0), -1)))
return self.head(x)
x = F.leaky_relu(self.bn3(self.conv3(x)))
return self.head(x.view(x.size(0), -1))

# class DQN(nn.Module):
# """
# Deep neural network with represents an agent.
# """
# def __init__(self, num_actions):
# super(DQN, self).__init__()
# self.conv1 = nn.Conv2d(1, 10, kernel_size=2)
# self.max_pool = nn.MaxPool2d((2,2))
# self.bn1 = nn.BatchNorm2d(10)
# self.conv2 = nn.Conv2d(10, 20, kernel_size=3)
# self.bn2 = nn.BatchNorm2d(20)
# self.linear = nn.Linear(80, 20)
# # self.bn3 = nn.BatchNorm1d(50)
# self.head = nn.Linear(20, num_actions)

# def forward(self, x):
# x = F.leaky_relu(self.max_pool(self.bn1(self.conv1(x))))
# x = F.leaky_relu(self.bn2(self.conv2(x)))
# x = F.leaky_relu(self.linear(x.view(x.size(0), -1)))
# return self.head(x)

def select_action(state, model, num_actions,
EPS_START, EPS_END, EPS_DECAY, steps_done):
Expand Down Expand Up @@ -93,6 +114,6 @@ def optimize_model(model, optimizer, memory, BATCH_SIZE, GAMMA, BETA):
# Optimize the model
optimizer.zero_grad()
loss.backward()
# for param in model.parameters():
# param.grad.data.clamp_(-1, 1)
for param in model.parameters():
param.grad.data.clamp_(-1, 1)
optimizer.step()

0 comments on commit c069185

Please sign in to comment.