Skip to content

Commit

Permalink
reslove_conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
boss-chanon committed Feb 15, 2024
2 parents 50ea9ae + f4932b0 commit 292e0ba
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
HUGGINGFACE_DATASET_SRC_PATH=/workspace/datset1
JSONL_DATASET_SAVE_PATH=/workspace/datset2


python scripts/prepare_hf_datasets.py \
$HUGGINGFACE_DATASET_SRC_PATH \
$JSONL_DATASET_SAVE_PATH
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
module purge
source /opt/cray/pe/cpe/23.09/restore_lmod_system_defaults.sh
module load Miniconda3
module load cudatoolkit/23.3_11.8
module load PrgEnv-gnu
module load cpe-cuda

conda deactivate
conda activate /project/lt200056-opgpth/new/TinyLlama_2024/.conda_new


SOURCE_DIR=/workspace/source
TOKENIZER_DIR=/workspace/data
OUTPUT_DIR=/workspace/output

python scripts/prepare_openthaigpt.py \
--source_path $SOURCE_DIR \
--split train --percentage 1.0 \
--tokenizer_path $TOKENIZER_DIR \
--destination_path $OUTPUT_DIR

python scripts/prepare_openthaigpt.py \
--source_path $SOURCE_DIR \
--split eval --percentage 1.0 \
--tokenizer_path $TOKENIZER_DIR \
--destination_path $OUTPUT_DIR \
--chunk_size 524544
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@


# source /opt/cray/pje/cpe/23.09/restore_lmod_system_defaults.sh
module purge
module load Miniconda3/22.11.1-1
# module load cpe-cuda/23.03
module load cudatoolkit/23.3_11.8
module load gcc/11.2.0
module load PrgEnv-nvidia
# module load gcc/11.2
# module load PrgEnv-gnu
# module load cpe-cuda
# module load cudatoolkit/22.7_11.7
# module load craype-accel-nvidia80
# module load aws-ofi-nccl

TRAIN_DATA_DIR=/workspace/train
VAL_DATA_DIR=/workspace/val

export WANDB_MODE=offline
srun python pretrain/tinyllama.py \
--train_data_dir $TRAIN_DATA_DIR \
--val_data_dir $VAL_DATA_DIR \
--devices 4 \
--num_nodes 10 \

0 comments on commit 292e0ba

Please sign in to comment.