Skip to content

Commit

Permalink
logging in aim after resuming works
Browse files Browse the repository at this point in the history
  • Loading branch information
FilyaGeikyan committed Aug 28, 2024
1 parent 5fce163 commit 9248e13
Showing 1 changed file with 6 additions and 4 deletions.
10 changes: 6 additions & 4 deletions train_configs/debug_model.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ log_freq = 1
enable_color_printing = true
enable_aim = true
save_aim_folder = "aim"
#aim_hash = "c6b4d8b340f74287b82ef928"
aim_hash = "1d56ec7bed87438684a8da6b"
#aim_experiment_name = "hello"

[model]
Expand All @@ -38,7 +38,7 @@ gradient_accumulation_steps = 1
seq_len = 2048
warmup_steps = 2 # lr scheduler warm up, normally 20% of the train steps
max_norm = 1.0 # grad norm clipping
steps = 20
steps = 30
data_parallel_degree = -1
tensor_parallel_degree = 1
compile = false
Expand All @@ -49,8 +49,10 @@ pipeline_parallel_degree = 1
enable_async_tensor_parallel = false

[checkpoint]
enable_checkpoint = false
folder = "checkpoint"
enable_checkpoint = true
save_folder = "checkpoint"
load_folder = "checkpoint"
create_seed_checkpoint = false
interval_type = "steps"
interval = 5
model_weights_only = false
Expand Down

0 comments on commit 9248e13

Please sign in to comment.