From 7463d47bad9cad1ab417425fa2c3590cf4c217b8 Mon Sep 17 00:00:00 2001
From: Jiafu Zhang <jiafu.zhang@intel.com>
Date: Wed, 20 Dec 2023 21:18:19 +0800
Subject: [PATCH] verify

Signed-off-by: Jiafu Zhang <jiafu.zhang@intel.com>
---
 .github/workflows/workflow_finetune.yml       | 153 ++++++++++++++++++
 .github/workflows/workflow_finetune_gpu.yml   |  36 +++++
 .github/workflows/workflow_inference.yml      |  13 +-
 .../workflows/workflow_orders on_merge.yml    |  23 +++
 .github/workflows/workflow_orders_nightly.yml |  20 +++
 ...w_orders.yml => workflow_orders_on_pr.yml} |   3 +
 6 files changed, 243 insertions(+), 5 deletions(-)
 create mode 100644 .github/workflows/workflow_finetune.yml
 create mode 100644 .github/workflows/workflow_finetune_gpu.yml
 create mode 100644 .github/workflows/workflow_orders on_merge.yml
 create mode 100644 .github/workflows/workflow_orders_nightly.yml
 rename .github/workflows/{workflow_orders.yml => workflow_orders_on_pr.yml} (82%)

diff --git a/.github/workflows/workflow_finetune.yml b/.github/workflows/workflow_finetune.yml
new file mode 100644
index 000000000..80c9e715d
--- /dev/null
+++ b/.github/workflows/workflow_finetune.yml
@@ -0,0 +1,153 @@
+name: Finetune
+
+on:
+  workflow_call:
+    inputs:
+      ci_type:
+        type: string
+        default: 'pr'
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-ft
+  cancel-in-progress: true
+
+jobs:
+  inference:
+    name: finetune test
+    strategy:
+      matrix:
+        model: [ EleutherAI/gpt-j-6b, meta-llama/Llama-2-7b-chat-hf, gpt2, bigscience/bloom-560m, facebook/opt-125m, mosaicml/mpt-7b-chat, huggyllama/llama-7b ]
+        isPR:
+          - ${{inputs.ci_type == 'pr'}}
+
+        exclude:
+          - { isPR: true }
+        include:
+          - { model: "EleutherAI/gpt-j-6b"}
+          - { model: "meta-llama/Llama-2-7b-chat-hf"}
+
+    runs-on: self-hosted
+
+    defaults:
+      run:
+        shell: bash
+    container:
+      image: ${{ vars.ACTIONS_RUNNER_CONTAINER_IMAGE }}
+      env:
+        http_proxy: ${{ vars.HTTP_PROXY_CONTAINER }}
+        https_proxy: ${{ vars.HTTPS_PROXY_CONTAINER }}
+      volumes:
+        - /var/run/docker.sock:/var/run/docker.sock
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+
+      - name: Load environment variables
+        run: cat ~/llm-ray-actions-runner/.env >> $GITHUB_ENV
+
+      - name: Build Docker Image
+        run: |
+          docker build ./ --build-arg CACHEBUST=1 --build-arg http_proxy=${{ vars.HTTP_PROXY_CONTAINER }} --build-arg https_proxy=${{ vars.HTTPS_PROXY_CONTAINER }} -f dev/docker/Dockerfile.cpu_and_deepspeed -t finetune:latest && yes | docker container prune && yes
+          docker image prune -f
+
+      - name: Start Docker Container
+        run: |
+          cid=$(docker ps -q --filter "name=finetune")
+          if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi
+          docker run -tid -v ${{ vars.MODEL_CACHE_PATH }}:/root/.cache/huggingface/hub -v ${{ vars.CODE_CHECKOUT_PATH }}:/root/llm-on-ray -e http_proxy=${{ vars.HTTP_PROXY_CONTAINER }} -e https_proxy=${{ vars.HTTPS_PROXY_CONTAINER }} --name="finetune" --hostname="finetune-container" finetune:latest
+      - name: Run Finetune Test
+        run: |
+          docker exec "finetune" bash -c "source \$(python -c 'import oneccl_bindings_for_pytorch as torch_ccl;print(torch_ccl.cwd)')/env/setvars.sh; RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING=1 ray start --head --node-ip-address 127.0.0.1 --ray-debugger-external; RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING=1  ray start --address='127.0.0.1:6379' --ray-debugger-external"
+          CMD=$(cat << EOF
+          conf_path = "finetune/finetune.conf"
+          with open(conf_path, encoding="utf-8") as reader:
+              result = eval(reader.read())
+              result['General']['base_model'] = "${{ matrix.model }}"
+              if "${{ matrix.model }}" == "mosaicml/mpt-7b-chat":
+                  result['General']['config']['trust_remote_code'] = True
+              else:
+                  result['General']['config']['trust_remote_code'] = False
+              if "${{ matrix.model }}" == "EleutherAI/gpt-j-6b" or "${{ matrix.model }}" == "gpt2":
+                  result['General']["gpt_base_model"] = True
+              else:
+                  result['General']["gpt_base_model"] = False
+              if "${{ matrix.model }}" == "meta-llama/Llama-2-7b-chat-hf":
+                  result['General']["config"]["use_auth_token"] = "${{ env.HF_ACCESS_TOKEN }}"
+              else:
+                  result['General']["config"]["use_auth_token"] = None
+              result['Training']['epochs'] = 1
+              if "${{ matrix.model }}" == "gpt2":
+                  # to verify oneccl
+                  result['Training']['num_training_workers'] = 2
+              else:
+                  result['Training']['num_training_workers'] = 1
+              result['General']['lora_config'] = None
+          with open(conf_path, 'w') as output:
+              print(result, file=output)
+          EOF
+          )
+          docker exec "finetune" python -c "$CMD"
+          docker exec "finetune" bash -c "python finetune/finetune.py --config_path finetune/finetune.conf"
+      - name: Run PEFT-LoRA Test
+        run: |
+          docker exec "finetune" bash -c "rm -rf /tmp/llm-ray/*"
+          CMD=$(cat << EOF
+          conf_path = "finetune/finetune.conf"
+          with open(conf_path, encoding="utf-8") as reader:
+              result = eval(reader.read())
+              result['General']['lora_config'] = {
+                  "task_type": "CAUSAL_LM",
+                  "r": 8,
+                  "lora_alpha": 32,
+                  "lora_dropout": 0.1
+              }
+          with open(conf_path, 'w') as output:
+              print(result, file=output)
+          EOF
+          )
+          docker exec "finetune" python -c "$CMD"
+          docker exec "finetune" bash -c "python finetune/finetune.py --config_path finetune/finetune.conf"
+      - name: Run Deltatuner Test on DENAS-LoRA Model
+        run: |
+          if [[ ${{ matrix.model }} =~ ^(mosaicml\/mpt-7b-chat|huggyllama\/llama-7b|meta-llama\/Llama-2-7b-chat-hf)$ ]]; then
+            echo ${{ matrix.model }} is not supported!
+          else
+            docker exec "finetune" bash -c "rm -rf /tmp/llm-ray/*"
+            CMD=$(cat << EOF
+          import os
+          os.system("cp -r $(python -m pip show deltatuner | grep Location | cut -d: -f2)/deltatuner/conf/best_structure examples/")
+          conf_path = "finetune/finetune.conf"
+          with open(conf_path, encoding="utf-8") as reader:
+              result = eval(reader.read())
+              result['General']['lora_config'] = {
+                  "task_type": "CAUSAL_LM",
+                  "r": 8,
+                  "lora_alpha": 32,
+                  "lora_dropout": 0.1
+              }
+              result['General']['deltatuner_config'] = {
+                  "algo": "lora",
+                  "denas": True,
+                  "best_model_structure": f"examples/best_structure/${{ matrix.model }}-best_structure.jsonl",
+              }
+          with open(conf_path, 'w') as output:
+              print(result, file=output)
+          EOF)
+            docker exec "finetune" python -c "$CMD"
+            docker exec "finetune" bash -c "python finetune/finetune.py --config_path finetune/finetune.conf"
+          fi
+      - name: Stop Ray
+        run: |
+          cid=$(docker ps -q --filter "name=finetune")
+          if [[ ! -z "$cid" ]]; then
+            docker exec "finetune" bash -c "ray stop"
+          fi
+          
+      - name: Stop Container
+        if: success() || failure()
+        run: |
+          cid=$(docker ps -q --filter "name=finetune")
+          if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi
+      - name: Test Summary
+        run: echo "to be continued"
diff --git a/.github/workflows/workflow_finetune_gpu.yml b/.github/workflows/workflow_finetune_gpu.yml
new file mode 100644
index 000000000..f18e4eaf5
--- /dev/null
+++ b/.github/workflows/workflow_finetune_gpu.yml
@@ -0,0 +1,36 @@
+name: Finetune on Intel GPU
+
+on:
+  workflow_call:
+
+jobs:
+  finetune:
+    name: finetune on gpu test
+    strategy:
+      matrix:
+        model: [ pythia-6.9b, gpt-j-6b ]
+    runs-on: self-hosted
+
+    defaults:
+      run:
+        shell: bash
+    container:
+      image: ${{ vars.ACTIONS_RUNNER_CONTAINER_IMAGE }}
+      env:
+        http_proxy: ${{ vars.HTTP_PROXY_CONTAINER }}
+        https_proxy: ${{ vars.HTTPS_PROXY_CONTAINER }}
+      volumes:
+        - /var/run/docker.sock:/var/run/docker.sock
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+
+      - name: Running task on Intel GPU
+        run: |
+          rm ~/borealis-runner/llm-on-ray.tar.gz -f
+          tar zcf ~/borealis-runner/llm-on-ray.tar.gz -C ~/actions-runner/_work/llm-on-ray .
+          cd ~/borealis-runner/
+          python3 finetune_on_pvc.py --base_model "${{ matrix.model }}"
+      - name: Test Summary
+        run: echo "to be continued"
\ No newline at end of file
diff --git a/.github/workflows/workflow_inference.yml b/.github/workflows/workflow_inference.yml
index c34acbbd0..21c510a40 100644
--- a/.github/workflows/workflow_inference.yml
+++ b/.github/workflows/workflow_inference.yml
@@ -32,16 +32,18 @@ jobs:
             model: mpt-7b
 
     runs-on: self-hosted
+
     defaults:
       run:
         shell: bash
     container:
-      image: 10.1.2.13:5000/llmray-build
+      image: ${{ vars.ACTIONS_RUNNER_CONTAINER_IMAGE }}
       env:
-        http_proxy: http://proxy-chain.intel.com:911
-        https_proxy: http://proxy-chain.intel.com:911
+        http_proxy: ${{ vars.HTTP_PROXY_CONTAINER }}
+        https_proxy: ${{ vars.HTTPS_PROXY_CONTAINER }}
       volumes:
         - /var/run/docker.sock:/var/run/docker.sock
+
     steps:
       - name: Checkout
         uses: actions/checkout@v2
@@ -64,7 +66,7 @@ jobs:
             DF_SUFFIX=".cpu_and_deepspeed"
           fi
           PREFIX=${{steps.prefix.outputs.prefix}}
-          docker build ./ --build-arg CACHEBUST=1 --build-arg http_proxy=http://proxy-chain.intel.com:911 --build-arg https_proxy=http://proxy-chain.intel.com:911 -f dev/docker/Dockerfile${DF_SUFFIX} -t ${PREFIX}:latest && yes | docker container prune && yes
+          docker build ./ --build-arg CACHEBUST=1 --build-arg http_proxy=${{ vars.HTTP_PROXY_CONTAINER }} --build-arg https_proxy=${{ vars.HTTPS_PROXY_CONTAINER }} -f dev/docker/Dockerfile${DF_SUFFIX} -t ${PREFIX}:latest && yes | docker container prune && yes
           docker image prune -f
 
       - name: Start Docker Container
@@ -73,7 +75,8 @@ jobs:
           cid=$(docker ps -q --filter "name=${PREFIX}")
           if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi
           echo "pass is ${GITHUB_WORKSPACE}"
-          docker run -tid -v /mnt/DP_disk1/huggingface/cache/:/root/.cache/huggingface/hub -v /home/ci/actions-runner/_work/llm-on-ray/llm-on-ray:/root/llm-on-ray -e http_proxy=http://proxy-chain.intel.com:911 -e https_proxy=http://proxy-chain.intel.com:911 --name="${PREFIX}" --hostname="${PREFIX}-container" ${PREFIX}:latest
+          docker run -tid -v ${{ vars.MODEL_CACHE_PATH }}:/root/.cache/huggingface/hub -v ${{ vars.CODE_CHECKOUT_PATH }}:/root/llm-on-ray -e http_proxy=${{ vars.HTTP_PROXY_CONTAINER }} -e https_proxy=${{ vars.HTTPS_PROXY_CONTAINER }} --name="${PREFIX}" --hostname="${PREFIX}-container" ${PREFIX}:latest
+          docker run -tid -v ${{ vars.MODEL_CACHE_PATH }}:/root/.cache/huggingface/hub -v ${{ vars.CODE_CHECKOUT_PATH }}:/root/llm-on-ray -e http_proxy=${{ vars.HTTP_PROXY_CONTAINER }} -e https_proxy=${{ vars.HTTPS_PROXY_CONTAINER }} --name="${PREFIX}" --hostname="${PREFIX}-container" ${PREFIX}:latest
 
       - name: Start Ray Cluster
         run: |
diff --git a/.github/workflows/workflow_orders on_merge.yml b/.github/workflows/workflow_orders on_merge.yml
new file mode 100644
index 000000000..e453f242b
--- /dev/null
+++ b/.github/workflows/workflow_orders on_merge.yml	
@@ -0,0 +1,23 @@
+name: llm-ray inference & finetune
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - '.github/**'
+      - 'docker/**'
+      - 'common/**'
+      - 'dev/docker/**'
+      - 'finetune/**'
+      - 'inference/**'
+      - 'rlhf/**'
+      - 'tools/**'
+
+jobs:
+
+  call-inference:
+    uses: ./.github/workflows/workflow_inference.yml
+
+  call-finetune:
+    uses: ./.github/workflows/workflow_finetune.yml
diff --git a/.github/workflows/workflow_orders_nightly.yml b/.github/workflows/workflow_orders_nightly.yml
new file mode 100644
index 000000000..9ee0fd202
--- /dev/null
+++ b/.github/workflows/workflow_orders_nightly.yml
@@ -0,0 +1,20 @@
+name: llm-ray inference & finetune nightly
+
+on:
+  schedule:
+    - cron: "0 16 * * *"
+
+jobs:
+
+  call-inference:
+    uses: ./.github/workflows/workflow_inference.yml
+    with:
+      ci_type: nightly
+
+  call-finetune:
+    uses: ./.github/workflows/workflow_finetune.yml
+    with:
+      ci_type: nightly
+
+  call-finetune-on-intel-gpu:
+    uses: ./.github/workflows/workflow_finetune_gpu.yml
\ No newline at end of file
diff --git a/.github/workflows/workflow_orders.yml b/.github/workflows/workflow_orders_on_pr.yml
similarity index 82%
rename from .github/workflows/workflow_orders.yml
rename to .github/workflows/workflow_orders_on_pr.yml
index 605b4cfc9..e13bccecf 100644
--- a/.github/workflows/workflow_orders.yml
+++ b/.github/workflows/workflow_orders_on_pr.yml
@@ -18,3 +18,6 @@ jobs:
 
   call-inference:
     uses: ./.github/workflows/workflow_inference.yml
+
+  call-finetune:
+    uses: ./.github/workflows/workflow_finetune.yml