ftr_envs/tasks/crossing/agents/ftr_algo_happo_cfg.yaml

params:

  seed: ${.config.seed}

  algo:
    name: happo

  load_checkpoint: ${if:${.config.checkpoint},True,False} # flag which sets whether to load the checkpoint
  load_path: ${.config.checkpoint} # path to the checkpoint to load

  config:
    seed: 40
    name: ftr_algo_happo
    experiment: ""
    device: cuda:0
    max_iterations: ""
    checkpoint: ""
    test: False

    num_agents: 2

  learn:
    agent_name: ${..config.name}
    full_experiment_name: logs/${resolve_default:${..config.name},${..config.experiment}}
    device: ${..config.device}
    device_name: ${..config.device}

    test: ${..config.test}


  env_name: ${basename:${.learn.full_experiment_name}}
  algorithm_name: happo
  experiment_name: check
  run_dir: ./logs
  use_centralized_V: True
  use_obs_instead_of_state: False
  num_env_steps: 3072000
  episode_length: 8
  n_rollout_threads: 80
  n_eval_rollout_threads: 1
  use_linear_lr_decay: False
  hidden_size: 512
  use_render: False
  recurrent_N: 1
  use_single_network: False

  save_interval: 1
  use_eval: False
  eval_interval: 25
  log_interval: 25
  eval_episodes: 10000

  gamma: 0.96
  gae_lambda: 0.95
  use_gae: True
  use_popart: True
  use_valuenorm: True
  use_proper_time_limits: False

  kl_threshold: 0.016
  ls_step: 10
  accept_ratio: 0.5
  clip_param: 0.2
  ppo_epoch: 5
  num_mini_batch: 1
  data_chunk_length:
  value_loss_coef: 1
  entropy_coef: 0.0
  max_grad_norm: 10
  huber_delta: 10.0
  use_recurrent_policy: False
  use_naive_recurrent_policy: False
  use_max_grad_norm: True
  use_clipped_value_loss: True
  use_huber_loss: True
  use_value_active_masks: False
  use_policy_active_masks: False

  lr: 5.e-4
  critic_lr: 5.e-4
  opti_eps: 1.e-5
  weight_decay: 0.0

  gain: 0.01
  actor_gain: 0.01
  use_orthogonal: True

  use_feature_normalization: True
  use_ReLU: True
  stacked_frames: 1
  layer_N: 2

  std_x_coef: 1
  std_y_coef: 0.5