feat(train,docker): training script and docker env created

AdityaNG · Feb 25, 2024 · 8ec0bd1 · 8ec0bd1
1 parent a115029
commit 8ec0bd1
Show file tree

Hide file tree

Showing 13 changed files with 394 additions and 10 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,127 @@
+Dockerfile
+.dockerignore
+docker-compose.yml
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+
+*.png
+*.pfm
+*.jpg
+*.jpeg
+*.pt
+.DS_Store
+
+nohup.out
+wandb/
+checkpoints/
+weights/
+input/
+output_monodepth/
+output_semseg/
+checkpoints_pretrained/
+
+docker/
+core
diff --git a/.gitignore b/.gitignore
@@ -130,3 +130,5 @@ dmypy.json
 
 # templates
 .github/templates/*
+
+checkpoints/*
diff --git a/LLaVA/llava/model/__init__.py b/LLaVA/llava/model/__init__.py
@@ -3,4 +3,5 @@
     from .language_model.llava_mpt import LlavaMptForCausalLM, LlavaMptConfig
     from .language_model.llava_mistral import LlavaMistralForCausalLM, LlavaMistralConfig
 except:
-    pass
+    import traceback
+    traceback.print_exc()
diff --git a/README.md b/README.md
@@ -33,26 +33,53 @@ Read the [CONTRIBUTING.md](CONTRIBUTING.md) file.
 
 ## Running the scripts
 
-```python3
+```bash
 python3 -m drivellava.scripts.generate_commavq_images
 ```
 
-```python3
+```bash
 python3 -m drivellava.scripts.visualize_pose
 ```
 
-```python3
+```bash
 python3 -m drivellava.scripts.generate_trajectory_templates
 ```
 
-```python3
+```bash
 python3 -m drivellava.scripts.generate_sparse_llava_dataset
 ```
 
 ```bash
 ./scripts/extract_zips.sh ~/Datasets/commavq/ ~/Datasets/commavq
 ```
 
+```bash
+BNB_CUDA_VERSION=118 python3 -m drivellava.scripts.train
+```
+
+```bash
+cd LLaVA
+
+conda create -n llava python=3.10 -y
+conda activate llava
+
+pip install --upgrade pip  # enable PEP 660 support
+
+
+conda install pytorch==2.2.0 torchvision==0.17.0 torchaudio==2.2.0 pytorch-cuda=11.8 -c pytorch -c nvidia
+conda install cudatoolkit=11.8 -c pytorch -c conda-forge
+
+BNB_CUDA_VERSION=118
+LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/aditya/miniconda3/envs/llava/lib
+LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/python3.10/dist-packages/nvidia/cuda_runtime/lib
+conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib
+
+pip install flash-attn --no-build-isolation --no-cache-dir
+
+pip install .
+
+```
+
 ## TODO
 
 - Training script

diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -0,0 +1,24 @@
+version: "3.9"
+services:
+  dev:
+    # Will build ./docker/Dockerfile
+    # This Dockerfile is for GPU based development
+
+    build:
+      context: .
+      dockerfile: ./docker/Dockerfile
+    volumes:
+      - ./:/app
+      - ~/.cache:/root/.cache
+      - ~/.torch:/root/.torch
+      - ~/.config:/root/.config
+      - ~/.bash_history:/root/.bash_history
+      - ~/.netrc:/root/.netrc
+      - ~/Datasets:/root/Datasets
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              device_ids: ['0']
+              capabilities: [gpu]
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -0,0 +1,69 @@
+FROM nvidia/cuda:11.8.0-devel-ubuntu20.04
+
+
+ENV DEBIAN_FRONTEND noninteractive
+
+# Install dependencies
+RUN apt-get update && apt-get install -y software-properties-common gcc && \
+    add-apt-repository -y ppa:deadsnakes/ppa
+RUN apt-get update
+RUN apt-get update && apt-get upgrade -y
+RUN apt-get install -y software-properties-common gcc
+RUN apt-get install -y python3.10 python3.10-dev python3.10-distutils python3-pip python3-apt python3.10-venv
+RUN apt-get update && apt-get install -y git curl
+RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10
+
+RUN /usr/bin/python3.10 -m pip install --upgrade pip
+# RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 # install pip
+
+# Alias python3.10 to python3
+RUN cp /usr/bin/python3.10 /usr/bin/python3
+
+# Copy code in for installation
+COPY ./ /app
+WORKDIR /app
+
+# make virtualenv 
+SHELL ["/bin/bash", "-c"]
+
+# Install LLaVA
+WORKDIR /app/LLaVA
+
+RUN pip install .
+
+WORKDIR /app/
+
+# RUN \
+#     --mount=type=cache,target=/root/.cache/ \
+#     make install
+
+RUN make install
+
+RUN python3 -m pip install flash-attn --no-build-isolation --no-cache-dir
+RUN python3 -m pip install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu118 --force-reinstall
+
+# Install dependencies
+# RUN /usr/bin/python3.10 -m pip install --upgrade pip
+# RUN /usr/bin/python3.10 -m pip install torch==1.13.1+cu117 torchvision==0.14.1+cu117 torchaudio==0.13.1+cu117 --index-url https://download.pytorch.org/whl/cu117
+# RUN /usr/bin/python3.10 -m pip install tqdm wandb opencv-python-headless pandas matplotlib==3.6.2 timm==0.6.12 scipy==1.9.3
+
+# # Install requirements
+# RUN /usr/bin/python3.10 -m pip install -r requirements.txt
+# RUN /usr/bin/python3.10 -m pip install -r requirements-test.txt
+
+# Remove code, without removing env
+RUN find ./ ! -path './.venv*' ! -path './' -delete
+
+# RUN mkdir /app
+WORKDIR /app
+
+# Env vars for the nvidia-container-runtime.
+ENV NVIDIA_VISIBLE_DEVICES all
+ENV NVIDIA_DRIVER_CAPABILITIES graphics,utility,compute
+
+ENV BNB_CUDA_VERSION 118
+
+# LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/python3.10/dist-packages/nvidia/cuda_runtime/lib
+ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/python3.10/dist-packages/nvidia/cuda_runtime/lib
+
+RUN echo 'PS1="(DLV) \[\]\[\e]0;\u@\h: \w\a\]${debian_chroot:+($debian_chroot)}\[\033[01;32m\]\u@\h\[\033[00m\]:\[\033[01;34m\]\w\[\033[00m\]\$ \[\]"' >> /root/.bashrc
diff --git a/docker/README.md b/docker/README.md
@@ -0,0 +1,15 @@
+# Occupancy Network Docker Container
+
+## Build
+
+To build the docker container:
+```bash
+DOCKER_BUILDKIT=1 docker-compose build
+```
+
+## Run
+
+Once you have built the docker container use the following command to start it up:
+```bash
+docker compose run dev
+```
diff --git a/drivellava/constants.py b/drivellava/constants.py
@@ -44,6 +44,14 @@ def __getitem__(self, index):
 # VAL_ENCODED_POSE = [x for x in VAL_ENCODED_POSE if os.path.isfile(x)]
 
 
+# ENCODED_JSON_ALL = glob(os.path.join(COMMAVQ_DIR, "data_*", "*.json"))
+# ENCODED_JSON_ALL += glob(os.path.join(COMMAVQ_DIR, "val", "*.json"))
+
+ENCODED_JSON = glob(os.path.join(COMMAVQ_DIR, "data_*_to_*", "*.json"))
+VAL_ENCODED_JSON = glob(os.path.join(COMMAVQ_DIR, "val", "*.json"))
+
+ENCODED_JSON_ALL = ENCODED_JSON + VAL_ENCODED_JSON
+
 def get_image_path(encoded_video_path: str, index: int) -> str:
     return os.path.join(
         encoded_video_path.replace("val", "img_val").replace(".npy", ""),

diff --git a/drivellava/scripts/generate_sparse_llava_dataset.py b/drivellava/scripts/generate_sparse_llava_dataset.py
@@ -186,7 +186,12 @@ def main():
                                 "<image>\nYou are DriveLLaVA, a "
                                 + "self-driving car. You will select the "
                                 + "appropriate trrajectory token given the "
-                                + "above image as context"
+                                + "above image as context.\n"
+                                + "You may select one from the "
+                                + "following templates: "
+                                + ",".join(
+                                    trajectory_encoder.token2trajectory.keys()
+                                )
                             ),
                         },
                         {"from": "gpt", "value": trajectory_encoded},
Original file line number	Diff line number	Diff line change
Expand Up		@@ -130,3 +130,5 @@ dmypy.json

		# templates
		.github/templates/*

		checkpoints/*