Skip to content

Commit

Permalink
solved issue with None values
Browse files Browse the repository at this point in the history
  • Loading branch information
koloskova committed Mar 18, 2018
1 parent 80b49e3 commit 8e19cc5
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 15 deletions.
37 changes: 23 additions & 14 deletions code/distral_2col0/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,23 +24,29 @@ class DQN(nn.Module):
"""
def __init__(self, input_size, num_actions):
super(DQN, self).__init__()
self.l1 = nn.Linear(input_size,100) ## To play with different NN architectures
self.l2 = nn.Linear(100, num_actions)
self.linear1 = nn.Linear(input_size, 50)
self.linear2 = nn.Linear(50, 50)
self.head = nn.Linear(50, num_actions)

def forward(self, x):
return self.l2(F.relu(self.l1(x)))
x = F.leaky_relu(self.linear1(x))
x = F.leaky_relu(self.linear2(x))
return self.head(x)

class PolicyNetwork(nn.Module):
"""
Deep neural network which represents policy network.
"""
def __init__(self, input_size, num_actions):
super(PolicyNetwork, self).__init__()
self.l1 = nn.Linear(input_size,100) ## To play with different NN architectures
self.l2 = nn.Linear(100, num_actions)
self.linear1 = nn.Linear(input_size, 50)
self.linear2 = nn.Linear(50, 50)
self.head = nn.Linear(50, num_actions)

def forward(self, x):
return F.softmax(self.l2(F.relu(self.l1(x))))
x = F.leaky_relu(self.linear1(x))
x = F.leaky_relu(self.linear2(x))
return F.softmax(self.head(x))

def select_action(state, policy, model, num_actions,
EPS_START, EPS_END, EPS_DECAY, steps_done, alpha, beta):
Expand All @@ -63,15 +69,10 @@ def select_action(state, policy, model, num_actions,

#### FOUND ERROR: ( Q ) returns a tensor of nan at some point
if np.isnan( Q.sum(1).data[0]) :
print(Q)
print(state)
print("Q = ", Q)
print("state = ", state)

pi_i = torch.pow(pi0, alpha) * torch.exp(beta * (Q - V))

#if sum(pi_i.data.numpy()[0] < 0) > 0:
# print("Warning!!!: pi_i has negative values: pi_i", pi_i.data.numpy()[0])
# pi_i = torch.max(torch.zeros_like(pi_i), pi_i)
# probabilities = pi_i.data.numpy()[0]
m = Categorical(pi_i)
action = m.sample().data.view(1, 1)
return action
Expand Down Expand Up @@ -134,7 +135,15 @@ def optimize_model(policy, model, optimizer, memory, batch_size,
next_state_values = Variable(torch.zeros(batch_size).type(Tensor))
next_state_values[non_final_mask] = torch.log(
(torch.pow(policy(non_final_next_states), alpha)
* torch.exp(beta * model(non_final_next_states))).sum(1)) / beta
* (torch.exp(beta * model(non_final_next_states)) + 1e-16)).sum(1)) / beta
try:
np.isnan(next_state_values.sum().data[0])
except Exception:
print("next_state_values:", next_state_values)
print(policy(non_final_next_states))
print(torch.exp(beta * model(non_final_next_states)))
print(model(non_final_next_states))

# Now, we don't want to mess up the loss with a volatile flag, so let's
# clear it. After this, we'll just end up with a Variable that has
# requires_grad=False
Expand Down
2 changes: 1 addition & 1 deletion code/distral_2col0/trainingDistral2col0.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from envs.gridworld_env import GridworldEnv
from utils import plot_rewards, plot_durations, plot_state, get_screen

def trainD(file_name="Distral_2col", list_of_envs=[GridworldEnv(5),
def trainD(file_name="Distral_1col", list_of_envs=[GridworldEnv(5),
GridworldEnv(4), GridworldEnv(6)], batch_size=128, gamma=0.999, alpha=0.8,
beta=5, eps_start=0.9, eps_end=0.05, eps_decay=5,
is_plot=False, num_episodes=200,
Expand Down

0 comments on commit 8e19cc5

Please sign in to comment.