Skip to content

Commit

Permalink
Merge pull request #1 from OpenThaiGPT/exp-4
Browse files Browse the repository at this point in the history
Exp 4
  • Loading branch information
pitikorn32 authored Feb 15, 2024
2 parents 4cf375d + cb5b939 commit 0d8e89e
Show file tree
Hide file tree
Showing 290 changed files with 52,801 additions and 0 deletions.
23 changes: 23 additions & 0 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
// README at: https://github.com/devcontainers/templates/tree/main/src/python
{
"name": "Python 3",
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
"dockerFile": "../docker/Dockerfile",
"runArgs": ["--gpus", "all"],

// Features to add to the dev container. More info: https://containers.dev/features.
// "features": {},

// Use 'forwardPorts' to make a list of ports inside the container available locally.
// "forwardPorts": [],

// Use 'postCreateCommand' to run commands after the container is created.
// "postCreateCommand": "pip3 install --user -r requirements.txt",

// Configure tool-specific properties.
// "customizations": {},

// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
// "remoteUser": "root"
}
3 changes: 3 additions & 0 deletions .dvc/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
/config.local
/tmp
/cache
4 changes: 4 additions & 0 deletions .dvc/config
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[core]
remote = openthaigpt
['remote "openthaigpt"']
url = gs://openthaigpt-pretraining
3 changes: 3 additions & 0 deletions .dvcignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Add patterns of files dvc should ignore, which could improve
# the performance. Learn more at
# https://dvc.org/doc/user-guide/dvcignore
3 changes: 3 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[flake8]
max-line-length = 88
ignore = N812, E203, W503
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
* text=auto eol=lf
17 changes: 17 additions & 0 deletions .github/pull_request_template.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
## Why this PR
Why we need this PR?

## Changes
- Write some changes here

## Related Issues
Close #

## Checklist
- [ ] PR should be in the [Naming convention](../../docs/PR_NAMING.md)
- [ ] Assign yourself in to Assigneees
- [ ] Tag related issues
- [ ] Constants name should be ALL_CAPITAL, function name should be snake_case, and class name should be CamelCase
- [ ] complex function/algorithm should have [Docstring](https://peps.python.org/pep-0257/)
- [ ] 1 PR should not have more than 200 lines changes (Exception for test files). If more than that please open multiple PRs
- [ ] At least PR reviewer must come from the task's team (model, eval, data)
31 changes: 31 additions & 0 deletions .github/workflows/build_docker.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: Build docker

on:
push:
branches:
- "main"

jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
- name: Build and push
uses: docker/build-push-action@v4
with:
context: .
file: ./docker/Dockerfile
push: true
tags: |
${{ secrets.DOCKER_USERNAME }}/${{ secrets.DOCKER_REPO_NAME }}:latest
${{ secrets.DOCKER_USERNAME }}/${{ secrets.DOCKER_REPO_NAME }}:${{ github.sha }}
cache-from: type=registry,ref=${{ secrets.DOCKER_USERNAME }}/${{ secrets.DOCKER_REPO_NAME }}:latest
cache-to: type=inline
45 changes: 45 additions & 0 deletions .github/workflows/lint.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
name: Linting and Formatting

on:
pull_request:
branches:
- "main"
push:
branches:
- "main"

concurrency:
group: linter:${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:
lint_and_format:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest]
python-version: ["3.9"] # requires python<=3.9

steps:
- name: Check out repository
uses: actions/checkout@v3

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
cache: "pip" # caching pip dependencies

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: Run Black
run: |
black --check .
- name: Run Flake8
if: ${{ always() }}
run: |
flake8
65 changes: 65 additions & 0 deletions .github/workflows/test_core.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
name: pytest and coverage report for Core

on:
pull_request:
paths:
- "src/core/**/*.py"
- "tests/core/**/*.py"

branches:
- "main"
push:
paths:
- "src/core/**/*.py"
- "tests/core/**/*.py"

branches:
- "main"

concurrency:
group: test_core:${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:
pytest_and_coverage_core:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest]
python-version: ["3.9"] # requires python<=3.9

steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
cache: "pip" # caching pip dependencies

- name: Cache pip
uses: actions/cache@v2
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('src/model/pyproject.toml') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install -e src/core
- name: Run Mypy
run: |
mypy ./src/core/openthaigpt_pretraining
- name: Run pytest with coverage
run: |
pytest --cov-report xml --cov=src/core tests/core/
- name: Upload coverage report to Codecov
uses: codecov/codecov-action@v3
with:
flags: unittests
files: ./coverage.xml
token: ${{ secrets.CODECOV_TOKEN }}
66 changes: 66 additions & 0 deletions .github/workflows/test_data.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
name: pytest and coverage report for Data

on:
pull_request:
paths:
- "src/data/**/*.py"
- "tests/data/**/*.py"

branches:
- "main"
push:
paths:
- "src/core/**/*.py"
- "tests/core/**/*.py"

branches:
- "main"

concurrency:
group: test_data:${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:
pytest_and_coverage_data:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest]
python-version: ["3.9"] # requires python<=3.9

steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
cache: "pip" # caching pip dependencies

- name: Cache pip
uses: actions/cache@v2
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('src/model/pyproject.toml') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install -e src/core
pip install -e src/data
- name: Run Mypy
run: |
mypy ./src/data/openthaigpt_pretraining_data
- name: Generate HTML coverage report
run: |
pytest --cov-report xml --cov=src/data tests/data/
- name: Upload coverage report to Codecov
uses: codecov/codecov-action@v3
with:
flags: unittests
files: ./coverage.xml
token: ${{ secrets.CODECOV_TOKEN }}
74 changes: 74 additions & 0 deletions .github/workflows/test_evaluation.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
name: pytest and coverage report for Evaluation

on:
pull_request:
paths:
- "src/evaluation/**/*.py"
- "tests/evaluation/**/*.py"

branches:
- "main"
push:
paths:
- "src/core/**/*.py"
- "tests/core/**/*.py"

branches:
- "main"

concurrency:
group: test_evaluation:${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:
pytest_and_coverage_evaluation:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest]
python-version: ["3.9"] # requires python<=3.9

- name: Cache pip
uses: actions/cache@v2
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('src/model/pyproject.toml') }}
restore-keys: |
${{ runner.os }}-pip-
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
cache: "pip" # caching pip dependencies

- name: Cache pip
uses: actions/cache@v2
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('src/model/pyproject.toml') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install -e src/core
pip install -e src/evaluation
- name: Run Mypy
run: |
mypy ./src/evaluation/openthaigpt_pretraining_evaluation
- name: Generate HTML coverage report
run: |
pytest --cov-report xml --cov=src/evaluation tests/evaluation/
- name: Upload coverage report to Codecov
uses: codecov/codecov-action@v3
with:
flags: unittests
files: ./coverage.xml
token: ${{ secrets.CODECOV_TOKEN }}
Loading

0 comments on commit 0d8e89e

Please sign in to comment.