diff --git a/train.lua b/train.lua index 083a30d5..85ab55de 100644 --- a/train.lua +++ b/train.lua @@ -263,7 +263,8 @@ function feval(x) end ------------------------ misc ---------------------- -- transfer final state to initial state (BPTT) - init_state_global = rnn_state[#rnn_state] -- NOTE: I don't think this needs to be a clone, right? + -- NOTE: line below actually needs a clone. Otherwise, at t=1 during the backpropagation, rnn_state[0] will be equal to the init_state_global of the next batch, different than the one used in the forward + init_state_global = clone_list(rnn_state[#rnn_state]) -- NOTE: I don't think this needs to be a clone, right? -- clip gradient element-wise grad_params:clamp(-opt.grad_clip, opt.grad_clip) return loss, grad_params