Skip to content

Commit

Permalink
feat(train,docker): training script and docker env created
Browse files Browse the repository at this point in the history
  • Loading branch information
AdityaNG committed Feb 25, 2024
1 parent a115029 commit 8ec0bd1
Show file tree
Hide file tree
Showing 13 changed files with 394 additions and 10 deletions.
127 changes: 127 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
Dockerfile
.dockerignore
docker-compose.yml

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/

*.png
*.pfm
*.jpg
*.jpeg
*.pt
.DS_Store

nohup.out
wandb/
checkpoints/
weights/
input/
output_monodepth/
output_semseg/
checkpoints_pretrained/

docker/
core
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -130,3 +130,5 @@ dmypy.json

# templates
.github/templates/*

checkpoints/*
3 changes: 2 additions & 1 deletion LLaVA/llava/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@
from .language_model.llava_mpt import LlavaMptForCausalLM, LlavaMptConfig
from .language_model.llava_mistral import LlavaMistralForCausalLM, LlavaMistralConfig
except:
pass
import traceback
traceback.print_exc()
35 changes: 31 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,26 +33,53 @@ Read the [CONTRIBUTING.md](CONTRIBUTING.md) file.

## Running the scripts

```python3
```bash
python3 -m drivellava.scripts.generate_commavq_images
```

```python3
```bash
python3 -m drivellava.scripts.visualize_pose
```

```python3
```bash
python3 -m drivellava.scripts.generate_trajectory_templates
```

```python3
```bash
python3 -m drivellava.scripts.generate_sparse_llava_dataset
```

```bash
./scripts/extract_zips.sh ~/Datasets/commavq/ ~/Datasets/commavq
```

```bash
BNB_CUDA_VERSION=118 python3 -m drivellava.scripts.train
```

```bash
cd LLaVA

conda create -n llava python=3.10 -y
conda activate llava

pip install --upgrade pip # enable PEP 660 support


conda install pytorch==2.2.0 torchvision==0.17.0 torchaudio==2.2.0 pytorch-cuda=11.8 -c pytorch -c nvidia
conda install cudatoolkit=11.8 -c pytorch -c conda-forge

BNB_CUDA_VERSION=118
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/aditya/miniconda3/envs/llava/lib
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/python3.10/dist-packages/nvidia/cuda_runtime/lib
conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib

pip install flash-attn --no-build-isolation --no-cache-dir

pip install .

```

## TODO

- Training script
Expand Down
24 changes: 24 additions & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
version: "3.9"
services:
dev:
# Will build ./docker/Dockerfile
# This Dockerfile is for GPU based development

build:
context: .
dockerfile: ./docker/Dockerfile
volumes:
- ./:/app
- ~/.cache:/root/.cache
- ~/.torch:/root/.torch
- ~/.config:/root/.config
- ~/.bash_history:/root/.bash_history
- ~/.netrc:/root/.netrc
- ~/Datasets:/root/Datasets
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ['0']
capabilities: [gpu]
69 changes: 69 additions & 0 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
FROM nvidia/cuda:11.8.0-devel-ubuntu20.04


ENV DEBIAN_FRONTEND noninteractive

# Install dependencies
RUN apt-get update && apt-get install -y software-properties-common gcc && \
add-apt-repository -y ppa:deadsnakes/ppa
RUN apt-get update
RUN apt-get update && apt-get upgrade -y
RUN apt-get install -y software-properties-common gcc
RUN apt-get install -y python3.10 python3.10-dev python3.10-distutils python3-pip python3-apt python3.10-venv
RUN apt-get update && apt-get install -y git curl
RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10

RUN /usr/bin/python3.10 -m pip install --upgrade pip
# RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 # install pip

# Alias python3.10 to python3
RUN cp /usr/bin/python3.10 /usr/bin/python3

# Copy code in for installation
COPY ./ /app
WORKDIR /app

# make virtualenv
SHELL ["/bin/bash", "-c"]

# Install LLaVA
WORKDIR /app/LLaVA

RUN pip install .

WORKDIR /app/

# RUN \
# --mount=type=cache,target=/root/.cache/ \
# make install

RUN make install

RUN python3 -m pip install flash-attn --no-build-isolation --no-cache-dir
RUN python3 -m pip install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu118 --force-reinstall

# Install dependencies
# RUN /usr/bin/python3.10 -m pip install --upgrade pip
# RUN /usr/bin/python3.10 -m pip install torch==1.13.1+cu117 torchvision==0.14.1+cu117 torchaudio==0.13.1+cu117 --index-url https://download.pytorch.org/whl/cu117
# RUN /usr/bin/python3.10 -m pip install tqdm wandb opencv-python-headless pandas matplotlib==3.6.2 timm==0.6.12 scipy==1.9.3

# # Install requirements
# RUN /usr/bin/python3.10 -m pip install -r requirements.txt
# RUN /usr/bin/python3.10 -m pip install -r requirements-test.txt

# Remove code, without removing env
RUN find ./ ! -path './.venv*' ! -path './' -delete

# RUN mkdir /app
WORKDIR /app

# Env vars for the nvidia-container-runtime.
ENV NVIDIA_VISIBLE_DEVICES all
ENV NVIDIA_DRIVER_CAPABILITIES graphics,utility,compute

ENV BNB_CUDA_VERSION 118

# LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/python3.10/dist-packages/nvidia/cuda_runtime/lib
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/python3.10/dist-packages/nvidia/cuda_runtime/lib

RUN echo 'PS1="(DLV) \[\]\[\e]0;\u@\h: \w\a\]${debian_chroot:+($debian_chroot)}\[\033[01;32m\]\u@\h\[\033[00m\]:\[\033[01;34m\]\w\[\033[00m\]\$ \[\]"' >> /root/.bashrc
15 changes: 15 additions & 0 deletions docker/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Occupancy Network Docker Container

## Build

To build the docker container:
```bash
DOCKER_BUILDKIT=1 docker-compose build
```

## Run

Once you have built the docker container use the following command to start it up:
```bash
docker compose run dev
```
8 changes: 8 additions & 0 deletions drivellava/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,14 @@ def __getitem__(self, index):
# VAL_ENCODED_POSE = [x for x in VAL_ENCODED_POSE if os.path.isfile(x)]


# ENCODED_JSON_ALL = glob(os.path.join(COMMAVQ_DIR, "data_*", "*.json"))
# ENCODED_JSON_ALL += glob(os.path.join(COMMAVQ_DIR, "val", "*.json"))

ENCODED_JSON = glob(os.path.join(COMMAVQ_DIR, "data_*_to_*", "*.json"))
VAL_ENCODED_JSON = glob(os.path.join(COMMAVQ_DIR, "val", "*.json"))

ENCODED_JSON_ALL = ENCODED_JSON + VAL_ENCODED_JSON

def get_image_path(encoded_video_path: str, index: int) -> str:
return os.path.join(
encoded_video_path.replace("val", "img_val").replace(".npy", ""),
Expand Down
7 changes: 6 additions & 1 deletion drivellava/scripts/generate_sparse_llava_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,12 @@ def main():
"<image>\nYou are DriveLLaVA, a "
+ "self-driving car. You will select the "
+ "appropriate trrajectory token given the "
+ "above image as context"
+ "above image as context.\n"
+ "You may select one from the "
+ "following templates: "
+ ",".join(
trajectory_encoder.token2trajectory.keys()
)
),
},
{"from": "gpt", "value": trajectory_encoded},
Expand Down
Loading

0 comments on commit 8ec0bd1

Please sign in to comment.