Replies: 1 comment 1 reply
-
Hello, The 4s random termination makes me guess that you haven't recorded a custom reward function for your custom map? This is done by executing python -m tmrl --record-reward and driving to the finish line. Then you can do python -m tmrl --ckeck-environment and drive manually to check that the printed rewards make sense. |
Beta Was this translation helpful? Give feedback.
1 reply
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
-
I am trying to use tmrl as gym. And i am trying to train my model on a custom map but after 4 secs environment terminates itself randomly. This is the code that i have been using.
""import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from tmrl import get_environment
from collections import deque
import random
import time
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
class DQN(nn.Module):
def init(self, input_dim, output_dim):
super(DQN, self).init()
self.fc1 = nn.Linear(input_dim, 64)
self.fc2 = nn.Linear(64, 64)
self.fc3 = nn.Linear(64, output_dim)
class DQNAgent:
def init(self, state_shape, action_space):
self.state_shape = state_shape
self.action_space = action_space
self.memory = deque(maxlen=10000)
self.gamma = 0.95
self.epsilon = 1.0
self.epsilon_min = 0.01
self.epsilon_decay = 0.995
self.model = DQN(state_shape[0], action_space).to(device)
self.target_model = DQN(state_shape[0], action_space).to(device)
self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
self.update_target_model()
def preprocess_obs(obs):
speed = obs[0]
lidar = obs[1].flatten()
prev_actions = obs[2]
return np.concatenate([speed, lidar, prev_actions])
def action_to_env_action(action):
if action == 0:
return np.array([1.0, 0.0, -1.0]) # Full throttle, turn left
elif action == 1:
return np.array([1.0, 0.0, 0.0]) # Full throttle, go straight
else:
return np.array([1.0, 0.0, 1.0]) # Full throttle, turn right
def calculate_speed_reward(speed):
return speed * 0.1 # Scale the reward to be smaller than the main rewards
env = get_environment()
state_shape = (1 + 4*19 + 3,) # speed + flattened LIDAR + previous actions
action_space = 3 # Left, Straight, Right
agent = DQNAgent(state_shape, action_space)
batch_size = 32
n_episodes = 1000
time_limit = 300 # 30 seconds time limit
for e in range(n_episodes):
obs, info = env.reset()
state = preprocess_obs(obs)
total_reward = 0
done = False
start_time = time.time()
step_count = 0
episode_speeds = []
torch.save(agent.model.state_dict(), 'tmrl_dqn_model.pth')
print("Training finished.")""
And here's the config file that i have used . I am not able to find why the environment terminated itself after a fixed time.
""{
"RUN_NAME": "test",
"RESET_TRAINING": true,
"BUFFERS_MAXLEN": 5000000,
"RW_MAX_SAMPLES_PER_EPISODE": 10000,
"CUDA_TRAINING": true,
"CUDA_INFERENCE": false,
"VIRTUAL_GAMEPAD": true,
"DCAC": false,
"LOCALHOST_WORKER": true,
"LOCALHOST_TRAINER": true,
"PUBLIC_IP_SERVER": "0.0.0.0",
"PASSWORD": "==>TMRL@UseASecurePasswordHere!<==",
"TLS": false,
"TLS_HOSTNAME": "default",
"TLS_CREDENTIALS_DIRECTORY": "",
"NB_WORKERS": -1,
"WANDB_PROJECT": "tmrl",
"WANDB_ENTITY": "tmrl",
"WANDB_KEY": "df28d4daa98d2df2557d74caf78e40c68adaf288",
"PORT": 55555,
"LOCAL_PORT_SERVER": 55556,
"LOCAL_PORT_TRAINER": 55557,
"LOCAL_PORT_WORKER": 55558,
"BUFFER_SIZE": 536870912,
"HEADER_SIZE": 12,
"SOCKET_TIMEOUT_CONNECT_TRAINER": 300.0,
"SOCKET_TIMEOUT_ACCEPT_TRAINER": 300.0,
"SOCKET_TIMEOUT_CONNECT_ROLLOUT": 300.0,
"SOCKET_TIMEOUT_ACCEPT_ROLLOUT": 300.0,
"SOCKET_TIMEOUT_COMMUNICATE": 30.0,
"SELECT_TIMEOUT_OUTBOUND": 30.0,
"ACK_TIMEOUT_WORKER_TO_SERVER": 300.0,
"ACK_TIMEOUT_TRAINER_TO_SERVER": 300.0,
"ACK_TIMEOUT_SERVER_TO_WORKER": 300.0,
"ACK_TIMEOUT_SERVER_TO_TRAINER": 7200.0,
"RECV_TIMEOUT_TRAINER_FROM_SERVER": 7200.0,
"RECV_TIMEOUT_WORKER_FROM_SERVER": 600.0,
"WAIT_BEFORE_RECONNECTION": 10.0,
"LOOP_SLEEP_TIME": 1.0,
"MAX_EPOCHS": 10000,
"ROUNDS_PER_EPOCH": 100,
"TRAINING_STEPS_PER_ROUND": 200,
"MAX_TRAINING_STEPS_PER_ENVIRONMENT_STEP": 4.0,
"ENVIRONMENT_STEPS_BEFORE_TRAINING": 1000,
"UPDATE_MODEL_INTERVAL": 200,
"UPDATE_BUFFER_INTERVAL": 200,
"SAVE_MODEL_EVERY": 0,
"MEMORY_SIZE": 1000000,
"BATCH_SIZE": 256,
"ALG": {
"ALGORITHM": "SAC",
"LEARN_ENTROPY_COEF":false,
"LR_ACTOR":0.00001,
"LR_CRITIC":0.00005,
"LR_ENTROPY":0.0003,
"GAMMA":0.995,
"POLYAK":0.995,
"TARGET_ENTROPY":-0.5,
"ALPHA":0.01,
"REDQ_N":10,
"REDQ_M":2,
"REDQ_Q_UPDATES_PER_POLICY_UPDATE":20,
"OPTIMIZER_ACTOR": "adam",
"OPTIMIZER_CRITIC": "adam",
"BETAS_ACTOR": [0.997, 0.997],
"BETAS_CRITIC": [0.997, 0.997],
"L2_ACTOR": 0.0,
"L2_CRITIC": 0.0
},
"ENV": {
"RTGYM_INTERFACE": "TM20LIDAR",
"WINDOW_WIDTH": 958,
"WINDOW_HEIGHT": 488,
"SLEEP_TIME_AT_RESET": 1.5,
"IMG_HIST_LEN": 4,
"RTGYM_CONFIG": {
"time_step_duration": 0.05,
"start_obs_capture": 0.04,
"time_step_timeout_factor": 10.0,
"act_buf_len": 2,
"benchmark": false,
"wait_on_done": true,
"ep_max_length": 1000
},
"REWARD_CONFIG": {
"END_OF_TRACK": 100.0,
"CONSTANT_PENALTY": 0.0,
"CHECK_FORWARD": 500,
"CHECK_BACKWARD": 10,
"FAILURE_COUNTDOWN": 10,
"MIN_STEPS": 70,
"MAX_STRAY": 100.0
}
},
"VERSION": "0.6.0"
}""
Beta Was this translation helpful? Give feedback.
All reactions