Skip to content

Commit

Permalink
verify
Browse files Browse the repository at this point in the history
Signed-off-by: Jiafu Zhang <[email protected]>
  • Loading branch information
jiafuzha committed Dec 20, 2023
1 parent f28c836 commit 7463d47
Show file tree
Hide file tree
Showing 6 changed files with 243 additions and 5 deletions.
153 changes: 153 additions & 0 deletions .github/workflows/workflow_finetune.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
name: Finetune

on:
workflow_call:
inputs:
ci_type:
type: string
default: 'pr'

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-ft
cancel-in-progress: true

jobs:
inference:
name: finetune test
strategy:
matrix:
model: [ EleutherAI/gpt-j-6b, meta-llama/Llama-2-7b-chat-hf, gpt2, bigscience/bloom-560m, facebook/opt-125m, mosaicml/mpt-7b-chat, huggyllama/llama-7b ]
isPR:
- ${{inputs.ci_type == 'pr'}}

exclude:
- { isPR: true }
include:
- { model: "EleutherAI/gpt-j-6b"}
- { model: "meta-llama/Llama-2-7b-chat-hf"}

runs-on: self-hosted

defaults:
run:
shell: bash
container:
image: ${{ vars.ACTIONS_RUNNER_CONTAINER_IMAGE }}
env:
http_proxy: ${{ vars.HTTP_PROXY_CONTAINER }}
https_proxy: ${{ vars.HTTPS_PROXY_CONTAINER }}
volumes:
- /var/run/docker.sock:/var/run/docker.sock

steps:
- name: Checkout
uses: actions/checkout@v2

- name: Load environment variables
run: cat ~/llm-ray-actions-runner/.env >> $GITHUB_ENV

- name: Build Docker Image
run: |
docker build ./ --build-arg CACHEBUST=1 --build-arg http_proxy=${{ vars.HTTP_PROXY_CONTAINER }} --build-arg https_proxy=${{ vars.HTTPS_PROXY_CONTAINER }} -f dev/docker/Dockerfile.cpu_and_deepspeed -t finetune:latest && yes | docker container prune && yes
docker image prune -f
- name: Start Docker Container
run: |
cid=$(docker ps -q --filter "name=finetune")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi
docker run -tid -v ${{ vars.MODEL_CACHE_PATH }}:/root/.cache/huggingface/hub -v ${{ vars.CODE_CHECKOUT_PATH }}:/root/llm-on-ray -e http_proxy=${{ vars.HTTP_PROXY_CONTAINER }} -e https_proxy=${{ vars.HTTPS_PROXY_CONTAINER }} --name="finetune" --hostname="finetune-container" finetune:latest
- name: Run Finetune Test
run: |
docker exec "finetune" bash -c "source \$(python -c 'import oneccl_bindings_for_pytorch as torch_ccl;print(torch_ccl.cwd)')/env/setvars.sh; RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING=1 ray start --head --node-ip-address 127.0.0.1 --ray-debugger-external; RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING=1 ray start --address='127.0.0.1:6379' --ray-debugger-external"
CMD=$(cat << EOF
conf_path = "finetune/finetune.conf"
with open(conf_path, encoding="utf-8") as reader:
result = eval(reader.read())
result['General']['base_model'] = "${{ matrix.model }}"
if "${{ matrix.model }}" == "mosaicml/mpt-7b-chat":
result['General']['config']['trust_remote_code'] = True
else:
result['General']['config']['trust_remote_code'] = False
if "${{ matrix.model }}" == "EleutherAI/gpt-j-6b" or "${{ matrix.model }}" == "gpt2":
result['General']["gpt_base_model"] = True
else:
result['General']["gpt_base_model"] = False
if "${{ matrix.model }}" == "meta-llama/Llama-2-7b-chat-hf":
result['General']["config"]["use_auth_token"] = "${{ env.HF_ACCESS_TOKEN }}"
else:
result['General']["config"]["use_auth_token"] = None
result['Training']['epochs'] = 1
if "${{ matrix.model }}" == "gpt2":
# to verify oneccl
result['Training']['num_training_workers'] = 2
else:
result['Training']['num_training_workers'] = 1
result['General']['lora_config'] = None
with open(conf_path, 'w') as output:
print(result, file=output)
EOF
)
docker exec "finetune" python -c "$CMD"
docker exec "finetune" bash -c "python finetune/finetune.py --config_path finetune/finetune.conf"
- name: Run PEFT-LoRA Test
run: |
docker exec "finetune" bash -c "rm -rf /tmp/llm-ray/*"
CMD=$(cat << EOF
conf_path = "finetune/finetune.conf"
with open(conf_path, encoding="utf-8") as reader:
result = eval(reader.read())
result['General']['lora_config'] = {
"task_type": "CAUSAL_LM",
"r": 8,
"lora_alpha": 32,
"lora_dropout": 0.1
}
with open(conf_path, 'w') as output:
print(result, file=output)
EOF
)
docker exec "finetune" python -c "$CMD"
docker exec "finetune" bash -c "python finetune/finetune.py --config_path finetune/finetune.conf"
- name: Run Deltatuner Test on DENAS-LoRA Model
run: |
if [[ ${{ matrix.model }} =~ ^(mosaicml\/mpt-7b-chat|huggyllama\/llama-7b|meta-llama\/Llama-2-7b-chat-hf)$ ]]; then
echo ${{ matrix.model }} is not supported!
else
docker exec "finetune" bash -c "rm -rf /tmp/llm-ray/*"
CMD=$(cat << EOF
import os
os.system("cp -r $(python -m pip show deltatuner | grep Location | cut -d: -f2)/deltatuner/conf/best_structure examples/")
conf_path = "finetune/finetune.conf"
with open(conf_path, encoding="utf-8") as reader:
result = eval(reader.read())
result['General']['lora_config'] = {
"task_type": "CAUSAL_LM",
"r": 8,
"lora_alpha": 32,
"lora_dropout": 0.1
}
result['General']['deltatuner_config'] = {
"algo": "lora",
"denas": True,
"best_model_structure": f"examples/best_structure/${{ matrix.model }}-best_structure.jsonl",
}
with open(conf_path, 'w') as output:
print(result, file=output)
EOF)
docker exec "finetune" python -c "$CMD"
docker exec "finetune" bash -c "python finetune/finetune.py --config_path finetune/finetune.conf"
fi
- name: Stop Ray
run: |
cid=$(docker ps -q --filter "name=finetune")
if [[ ! -z "$cid" ]]; then
docker exec "finetune" bash -c "ray stop"
fi
- name: Stop Container
if: success() || failure()
run: |
cid=$(docker ps -q --filter "name=finetune")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi
- name: Test Summary
run: echo "to be continued"
36 changes: 36 additions & 0 deletions .github/workflows/workflow_finetune_gpu.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: Finetune on Intel GPU

on:
workflow_call:

jobs:
finetune:
name: finetune on gpu test
strategy:
matrix:
model: [ pythia-6.9b, gpt-j-6b ]
runs-on: self-hosted

defaults:
run:
shell: bash
container:
image: ${{ vars.ACTIONS_RUNNER_CONTAINER_IMAGE }}
env:
http_proxy: ${{ vars.HTTP_PROXY_CONTAINER }}
https_proxy: ${{ vars.HTTPS_PROXY_CONTAINER }}
volumes:
- /var/run/docker.sock:/var/run/docker.sock

steps:
- name: Checkout
uses: actions/checkout@v2

- name: Running task on Intel GPU
run: |
rm ~/borealis-runner/llm-on-ray.tar.gz -f
tar zcf ~/borealis-runner/llm-on-ray.tar.gz -C ~/actions-runner/_work/llm-on-ray .
cd ~/borealis-runner/
python3 finetune_on_pvc.py --base_model "${{ matrix.model }}"
- name: Test Summary
run: echo "to be continued"
13 changes: 8 additions & 5 deletions .github/workflows/workflow_inference.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,18 @@ jobs:
model: mpt-7b

runs-on: self-hosted

defaults:
run:
shell: bash
container:
image: 10.1.2.13:5000/llmray-build
image: ${{ vars.ACTIONS_RUNNER_CONTAINER_IMAGE }}
env:
http_proxy: http://proxy-chain.intel.com:911
https_proxy: http://proxy-chain.intel.com:911
http_proxy: ${{ vars.HTTP_PROXY_CONTAINER }}
https_proxy: ${{ vars.HTTPS_PROXY_CONTAINER }}
volumes:
- /var/run/docker.sock:/var/run/docker.sock

steps:
- name: Checkout
uses: actions/checkout@v2
Expand All @@ -64,7 +66,7 @@ jobs:
DF_SUFFIX=".cpu_and_deepspeed"
fi
PREFIX=${{steps.prefix.outputs.prefix}}
docker build ./ --build-arg CACHEBUST=1 --build-arg http_proxy=http://proxy-chain.intel.com:911 --build-arg https_proxy=http://proxy-chain.intel.com:911 -f dev/docker/Dockerfile${DF_SUFFIX} -t ${PREFIX}:latest && yes | docker container prune && yes
docker build ./ --build-arg CACHEBUST=1 --build-arg http_proxy=${{ vars.HTTP_PROXY_CONTAINER }} --build-arg https_proxy=${{ vars.HTTPS_PROXY_CONTAINER }} -f dev/docker/Dockerfile${DF_SUFFIX} -t ${PREFIX}:latest && yes | docker container prune && yes
docker image prune -f
- name: Start Docker Container
Expand All @@ -73,7 +75,8 @@ jobs:
cid=$(docker ps -q --filter "name=${PREFIX}")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi
echo "pass is ${GITHUB_WORKSPACE}"
docker run -tid -v /mnt/DP_disk1/huggingface/cache/:/root/.cache/huggingface/hub -v /home/ci/actions-runner/_work/llm-on-ray/llm-on-ray:/root/llm-on-ray -e http_proxy=http://proxy-chain.intel.com:911 -e https_proxy=http://proxy-chain.intel.com:911 --name="${PREFIX}" --hostname="${PREFIX}-container" ${PREFIX}:latest
docker run -tid -v ${{ vars.MODEL_CACHE_PATH }}:/root/.cache/huggingface/hub -v ${{ vars.CODE_CHECKOUT_PATH }}:/root/llm-on-ray -e http_proxy=${{ vars.HTTP_PROXY_CONTAINER }} -e https_proxy=${{ vars.HTTPS_PROXY_CONTAINER }} --name="${PREFIX}" --hostname="${PREFIX}-container" ${PREFIX}:latest
docker run -tid -v ${{ vars.MODEL_CACHE_PATH }}:/root/.cache/huggingface/hub -v ${{ vars.CODE_CHECKOUT_PATH }}:/root/llm-on-ray -e http_proxy=${{ vars.HTTP_PROXY_CONTAINER }} -e https_proxy=${{ vars.HTTPS_PROXY_CONTAINER }} --name="${PREFIX}" --hostname="${PREFIX}-container" ${PREFIX}:latest
- name: Start Ray Cluster
run: |
Expand Down
23 changes: 23 additions & 0 deletions .github/workflows/workflow_orders on_merge.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
name: llm-ray inference & finetune

on:
push:
branches:
- main
paths:
- '.github/**'
- 'docker/**'
- 'common/**'
- 'dev/docker/**'
- 'finetune/**'
- 'inference/**'
- 'rlhf/**'
- 'tools/**'

jobs:

call-inference:
uses: ./.github/workflows/workflow_inference.yml

call-finetune:
uses: ./.github/workflows/workflow_finetune.yml
20 changes: 20 additions & 0 deletions .github/workflows/workflow_orders_nightly.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
name: llm-ray inference & finetune nightly

on:
schedule:
- cron: "0 16 * * *"

jobs:

call-inference:
uses: ./.github/workflows/workflow_inference.yml
with:
ci_type: nightly

call-finetune:
uses: ./.github/workflows/workflow_finetune.yml
with:
ci_type: nightly

call-finetune-on-intel-gpu:
uses: ./.github/workflows/workflow_finetune_gpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,6 @@ jobs:

call-inference:
uses: ./.github/workflows/workflow_inference.yml

call-finetune:
uses: ./.github/workflows/workflow_finetune.yml

0 comments on commit 7463d47

Please sign in to comment.