From 613e1113ff578eeb18885d9999693a1b9c8cddce Mon Sep 17 00:00:00 2001 From: Matt Feinberg Date: Wed, 20 Nov 2024 16:29:23 +0000 Subject: [PATCH] docs and cleanup --- .gitignore | 2 +- .project/spec.yaml | 3 - apps/.gitkeep | 0 apps/functions | 341 ---------------------------------------- apps/manager.py | 206 ------------------------ compose.yaml | 10 +- docs/0_3_1_configure.md | 29 +--- docs/0_4_start.md | 32 ++-- 8 files changed, 31 insertions(+), 592 deletions(-) delete mode 100644 apps/.gitkeep delete mode 100644 apps/functions delete mode 100755 apps/manager.py diff --git a/.gitignore b/.gitignore index d13d72f..d030363 100644 --- a/.gitignore +++ b/.gitignore @@ -6,7 +6,7 @@ # General ignores .DS_Store **/tmp -# code/config.yaml +code/config.yaml # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/.project/spec.yaml b/.project/spec.yaml index 353d937..acbfcbf 100644 --- a/.project/spec.yaml +++ b/.project/spec.yaml @@ -16,9 +16,6 @@ layout: - path: docs/ type: code storage: git -- path: apps/ - type: code - storage: git - path: data/ type: data storage: gitlfs diff --git a/apps/.gitkeep b/apps/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/apps/functions b/apps/functions deleted file mode 100644 index 0b51050..0000000 --- a/apps/functions +++ /dev/null @@ -1,341 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -BOOTUP=color -RES_COL=60 -MOVE_TO_COL="echo -en \\033[${RES_COL}G" -SETCOLOR_SUCCESS="echo -en \\033[1;32m" -SETCOLOR_FAILURE="echo -en \\033[1;31m" -SETCOLOR_WARNING="echo -en \\033[1;33m" -SETCOLOR_NORMAL="echo -en \\033[0;39m" -STOP_TIMEOUT=15 # seconds -APP_SRC=$(readlink -f $0) - -if [ "$1" == "config" ]; then _CONFIG=1; else _CONFIG=0; fi - -############################################################################### -# helper functions -############################################################################### - -# Infer if this is in Workbench by scanning the networks. -IN_NVWB=$(docker container inspect `hostname` | jq '.[0].NetworkSettings.Networks.workbench or false') -if [ "$IN_NVWB" == "true" ]; then - # Determine the network config for apps - DOCKER_NETWORK=" --network=workbench " - # Determine the best prefix for the app name -else - DOCKER_NETWORK=" " -fi - -# resolve a mount point to the host's directory -# usage: hostpath LOCAL_PATH -function hostpath() { - local localpath - localpath=$(/bin/bash -c "cd $1; pwd") - - docker inspect $(hostname) --format json | \ - jq -r '.[0].HostConfig.Mounts[] | select(.Target | startswith("'$localpath'")).Source' -} - -# extract settings from env variables -# usage: config_lkp VAR DEFAULT -function config_lkp() { - local varname default - varname="$1" - default="$2" - - if [[ $_CONFIG == 1 ]]; then - echo "$varname=$default" >&2 - fi - - echo ${!varname:-$2} -} - -############################################################################### -# lifecycle functions -############################################################################### - -# restart the contiainer -# usage: restart CONTAINER_NAME -function restart() { - stop "$1" - start "$1" -} - -# local wrapper for docker run and stop that include logging -function docker_run() { - exec "Starting the container" _docker_run $@ -} -function docker_stop() { - exec "Stopping the container" _docker_stop $@ -} - -# ensure the container is started and running -# usage: start CONTAINER_NAME -function start() { - local status_code name - name="$1" - { status "$name" > /dev/null; status_code=$?; } || true - case $status_code in - 0) success; echo "Container is already running." >&2 ;; - 1) echo "Container is stopped. Recreating." >&2; docker_stop; docker_run ;; - 2) echo "Container does not exist. Starting." >&2; docker_run ;; - esac -} - -# ensure the container is stopped -# usage: stop CONTAINER_NAME -function stop() { - local status_code name - name="$1" - { status "$name" > /dev/null; status_code=$?; } || true - case $status_code in - 0) echo "Container is running. Stopping." >&2; docker_stop;; - 1) echo "Container is stopped. Removing." >&2; docker_stop;; - 2) success; echo -n "Container does not exist.";; - *) ;; - esac -} - -# check the status of the associated container image -# usage: status CONTAINER_NAME -# status codes: -# 0: Container is running -# 1: Container exists, but is not running -# 2: Container does not exist -function status() { - local name - name="$1" - state=$( (docker inspect \ - --format '{{.State.Status}}' \ - $name 2> /dev/null \ - || echo -n "stopped") | xargs echo) - echo "Container status: $state" - - case "$state" in - stopped) - return 2 - ;; - running) - health "$name" - ;; - created | exited | paused | dead) - return 1 - ;; - *) - echo "Unrecognized container state: $state" >&2 - exit 1 - ;; - esac -} - -# check the health of the associated container image -# usage: health CONTAINER_NAME -# status codes: -# 0: Container is healthy or starting -# 1: Container is not healthy -function health() { - local name - name="$1" - state=$(docker inspect \ - --format '{{.State.Health.Status}}' \ - $name 2>&1 | head -n 1) - echo "Container health: $state" - - case "$state" in - healthy | starting | "") - return 0 - ;; - unhealthy) - return 1 - ;; - *) - echo "Unrecognized container health state: $state" >&2 - exit 1 - ;; - esac - -} - -# wait for the container to finish starting -# usage: wait_for CONTAINER_NAME -function wait_for() { - local name - name="$1" - echo "Waiting for the container to finish starting." - while true; do - health "$name" | grep "starting" || return 0 - sleep 5 - done -} - -# show the applications configuration parameters -# usage: config -function config() { - echo "$CONFIG_SCHEMA" -} - -# show the application's metadata -# uage: meta -function meta() { - _meta >&2 - echo "start_command: $APP_SRC start" >&2 - echo "stop_command: $APP_SRC stop" >&2 - echo "health_check_command: $APP_SRC status" >&2 -} - -# show the applications container image -# usage: image -function image() { - echo $IMAGE -} - -# show the applications default tag -# usage: tag -function tag() { - echo $TAG -} - -# the main entrypoint for most applications -# usage: main VERB CONTAINER_NAME -function main() { - local name verb - verb=$1 - name=$2 - case "$verb" in - status | start | stop | restart | wait_for | meta | image | tag) - $verb $name - ;; - config) - ;; - *) - echo "Usage: $0 {start|stop|restart|status|config}" - exit 1 - ;; - esac -} - -############################################################################### -# abstract functions -############################################################################### -function _docker_stop() { - echo "$0 function is not defined in the application definition." >&2 - exit 2 -} -function _docker_run() { - echo "$0 function is not defined in the application definition." >&2 - exit 2 -} -_meta() { - cat <<-EOM - name: New Application - type: custom - class: process - user_msg: |- - Milvus is now available at: - localhost:19530 - icon_url: "https://milvus.io/favicon-32x32.png" - EOM -} - - -############################################################################### -# logging functions -############################################################################### -echo_success() { - [ "$BOOTUP" = "color" ] && $MOVE_TO_COL - echo -n "[" - [ "$BOOTUP" = "color" ] && $SETCOLOR_SUCCESS - echo -n $" OK " - [ "$BOOTUP" = "color" ] && $SETCOLOR_NORMAL - echo -n "]" - echo -ne "\r" - return 0 -} - -echo_failure() { - [ "$BOOTUP" = "color" ] && $MOVE_TO_COL - echo -n "[" - [ "$BOOTUP" = "color" ] && $SETCOLOR_FAILURE - echo -n $"FAILED" - [ "$BOOTUP" = "color" ] && $SETCOLOR_NORMAL - echo -n "]" - echo -ne "\r" - return 1 -} - -echo_passed() { - [ "$BOOTUP" = "color" ] && $MOVE_TO_COL - echo -n "[" - [ "$BOOTUP" = "color" ] && $SETCOLOR_WARNING - echo -n $"PASSED" - [ "$BOOTUP" = "color" ] && $SETCOLOR_NORMAL - echo -n "]" - echo -ne "\r" - return 1 -} - -echo_warning() { - [ "$BOOTUP" = "color" ] && $MOVE_TO_COL - echo -n "[" - [ "$BOOTUP" = "color" ] && $SETCOLOR_WARNING - echo -n $"WARNING" - [ "$BOOTUP" = "color" ] && $SETCOLOR_NORMAL - echo -n "]" - echo -ne "\r" - return 1 -} - -# Log that something succeeded -success() { - echo_success - return 0 -} - -# Log that something failed -failure() { - local rc=$? - echo_failure - return $rc -} - -# Log that something passed, but may have had errors. Useful for fsck -passed() { - local rc=$? - echo_passed - return $rc -} - -# Log a warning -warning() { - local rc=$? - echo_warning - return $rc -} - -# run a command and log its result -# usage: exec "LOG_MESSAGE" COMMAND TO EXEC -function exec() { - local msg - msg="$1" - echo -n "$msg" >&2 - shift - - "$@" && success >&2 || failure >&2 - retcode=$? - - echo "$msg" >&2 - return $retcode -} diff --git a/apps/manager.py b/apps/manager.py deleted file mode 100755 index 876e78e..0000000 --- a/apps/manager.py +++ /dev/null @@ -1,206 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -from dataclasses import dataclass -from functools import cached_property -import logging -from pathlib import Path -import sys -import subprocess -from typing import Any, Optional, TypeVar -import yaml - - -logging.basicConfig() -_LOGGER = logging.getLogger("manager") -EMPTY = "__empty__" - - -def run_or_die(cmd: list[str], error_msg: str = "Error running command.") -> tuple[bytes, bytes]: - """Run a command and raise a RuntimeError if there is a problem.""" - proc = subprocess.run(cmd, capture_output=True) - if proc.returncode != 0: - raise RuntimeError(f"{error_msg}:\n{proc.stderr}") - return (proc.stdout, proc.stderr) - - -ValueT = TypeVar("ValueT") - - -def safe_index(needle: ValueT, haystack: list[ValueT]) -> Optional[int]: - """Find an item in a list, but return None if not found.""" - try: - return haystack.index(needle) - except ValueError: - return None - - -@dataclass -class RuntimeEnvironment: - """The possible runtime options.""" - - uninstall: bool = False - app: str = "" - verbose: bool = False - - @classmethod - def from_args(cls) -> "RuntimeEnvironment": - """Parse the CLI Arguments.""" - parser = argparse.ArgumentParser(prog="NVWB App Manager") - parser.add_argument("-u", "--uninstall", help="Uninstall the application from the menu.", action="store_true") - parser.add_argument("-v", "--verbose", help="Increase output verbosity.", action="store_true") - parser.add_argument("app", type=str, help="Path to the application to install/uninstall.") - args = parser.parse_args() - opts = RuntimeEnvironment(**vars(args)) - - if opts.verbose: - _LOGGER.setLevel(logging.DEBUG) - - _LOGGER.info("Using options: %s", opts) - return opts - - @cached_property - def root(self) -> str: - """Return the file path to the project root.""" - stdout, _ = run_or_die(["git", "rev-parse", "--show-toplevel"], "Error finding git repo's root.") - out = stdout.strip().decode("ascii") - return Path(out).absolute() - - @cached_property - def variables(self) -> str: - """Return the file path to the project's variables file.""" - return self.root.joinpath("variables.env") - - @cached_property - def spec(self) -> str: - """Return the file path to the project's variables file.""" - return self.root.joinpath(".project").joinpath("spec.yaml") - - -class App: - """A representation of an nvwb app.""" - - def __init__(self, path: str) -> None: - """Initialize the class.""" - self._path = Path(path).absolute() - - if not self._path.exists(): - raise RuntimeError(f"The specified application does not exist: {str(self._path)}") - - @cached_property - def config(self) -> dict[str, str]: - """Return the application's configuration values.""" - _LOGGER.info("Reading configuration for %s", str(self._path)) - _, stderr = run_or_die([(self._path), "config"], "Unable to read config from application.") - - return dict([list(map(lambda x: x.decode("ascii"), line.split(b"="))) for line in stderr.strip().split(b"\n")]) - - @cached_property - def meta(self) -> dict[str, Any]: - """Read the application's metadata entry.""" - _LOGGER.info("Reading metadata for %s", str(self._path)) - _, stderr = run_or_die([(self._path), "meta"], "Unable to read metadata from application.") - return yaml.safe_load(stderr) - - -def update_variables(path: Path, config: dict[str, str], uninstall=False) -> None: - """Update the variables file with the app's config.""" - existing_lines = open(path, "r").readlines() - existing_lines_index = [line.split("=")[0] for line in existing_lines] - if not existing_lines[-1].endswith("\n"): - existing_lines[-1] = existing_lines[-1] + "\n" - file_changed = False - - for new_var, new_value in config.items(): - exists_line_ind = safe_index(new_var, existing_lines_index) - exists = exists_line_ind is not None - - if exists and uninstall: - # remove the line from existing lines - _LOGGER.debug("Removing %s from variables.", new_var) - existing_lines.pop(exists_line_ind) - existing_lines_index.pop(exists_line_ind) - file_changed = True - - elif not exists and not uninstall: - # add the line to the existing lines - _LOGGER.debug("Adding %s to variables.", new_var) - new_line = f"{new_var}={new_value}\n" - existing_lines.append(new_line) - existing_lines_index.append(new_var) - file_changed = True - - # write the variables file - if file_changed: - _LOGGER.debug("Saving new variables file.") - open(path, "w").writelines(existing_lines) - return - _LOGGER.warning("App configuration is already installed.") - - -def update_spec(path: Path, app_meta: dict[str, str], uninstall=False) -> None: - """Update the projects specfile for this application.""" - with open(path, "r") as spec_file: - spec = yaml.load(spec_file, Loader=yaml.SafeLoader) - changed = False - - found = False - for app_ind, app in enumerate(spec.get("execution", {}).get("apps", [])): - if app.get("name", EMPTY) == app_meta.get("name", EMPTY): - found = True - break - - if found and uninstall: - _LOGGER.debug("Removing application from spec file.") - spec["execution"]["apps"].pop(app_ind) - changed = True - - elif not found and not uninstall: - _LOGGER.debug("Adding the application to the spec file.") - execution = spec.get("execution", {}) - execution["apps"] = execution.get("apps", []) - execution["apps"].append(app_meta) - changed = True - - if changed: - _LOGGER.debug("Saving updated project spec file.") - with open(path, "w") as spec_file: - yaml.safe_dump(spec, spec_file) - return - _LOGGER.warning("App is already loaded in spec file.") - - -def main() -> int: - """The main routine for the app manager.""" - _LOGGER.setLevel(logging.INFO) - env = RuntimeEnvironment.from_args() - app = App(env.app) - - _LOGGER.info("Updating project environment variables.") - update_variables(env.variables, app.config, env.uninstall) - - _LOGGER.info("Updating project spec file.") - update_spec(env.spec, app.meta, env.uninstall) - return 0 - - -if __name__ == "__main__": - try: - sys.exit(main()) - except RuntimeError as err: - _LOGGER.error(str(err)) - sys.exit(1) diff --git a/compose.yaml b/compose.yaml index 8f4a3b5..18e55d0 100644 --- a/compose.yaml +++ b/compose.yaml @@ -4,7 +4,7 @@ services: nv-embedqa-e5-v5: image: "nvcr.io/nim/nvidia/nv-embedqa-e5-v5:1.0.1" - profiles: ["2 GPUs", "3 GPUs"] + profiles: ["2 GPUs", "3+ GPUs"] deploy: resources: reservations: @@ -27,7 +27,7 @@ services: - default nv-rerankqa-mistral-4b-v3: image: "nvcr.io/nim/nvidia/nv-rerankqa-mistral-4b-v3:1.0.2" - profiles: ["3 GPUs"] + profiles: ["3+ GPUs"] runtime: "nvidia" deploy: resources: @@ -52,7 +52,7 @@ services: llm-nim: image: "nvcr.io/nim/meta/llama3-8b-instruct:1" - profiles: ["1 GPU", "2 GPUs", "3 GPUs"] + profiles: ["1 GPU", "2 GPUs", "3+ GPUs"] deploy: resources: reservations: @@ -76,7 +76,7 @@ services: milvus: image: "milvusdb/milvus:v2.4.6" - profiles: ["0 GPUs", "1 GPU", "2 GPUs", "3 GPUs"] + profiles: ["0 GPUs", "1 GPU", "2 GPUs", "3+ GPUs"] security_opt: - seccomp:unconfined environment: @@ -97,7 +97,7 @@ services: redis: image: "redis:7" - profiles: ["0 GPUs", "1 GPU", "2 GPUs", "3 GPUs"] + profiles: ["0 GPUs", "1 GPU", "2 GPUs", "3+ GPUs"] volumes: - redis:/data healthcheck: diff --git a/docs/0_3_1_configure.md b/docs/0_3_1_configure.md index f68a5f7..fa14d60 100644 --- a/docs/0_3_1_configure.md +++ b/docs/0_3_1_configure.md @@ -1,38 +1,15 @@ ## Configure this project -The project must be configured to work with local machine resources. +The project must be configured to work with your NGC API key.
Expand this section for a details on configuring this project. -1. Before running for the first time, project specific configuration must be provided. Project configuration is done using the *Environment* tab from the left-hand panel. +1. Before running for the first time, your NGC personal key must be configured in Workbench. This is done using the *Environment* tab from the left-hand panel. ![AI Workbench Side Menu](_static/nvwb_left_menu.png) -1. Scroll down to the **Variables** section and find *NGC_HOME* entry. It should be set to something like `~/.cache/nvidia-nims`. The value here is used by workbench. This same location also appears in the **Mounts** section that mounts this directory into the container. - -1. Scroll down to the **Secrets** section and find the *NGC_API_KEY* entry. Press *Configure* and provide the personal key for NGC that as generated earlier. - -1. Scroll down to the **Mounts** section. Here, there are two mounts to configure. - - a. Find the mount for /var/host-run. This is used to allow the development environment to access the host’s Docker daemon in a pattern called Docker out of Docker. Press **Configure** and provide the directory `/var/run`. - - ![AI Workbench Mount Menu](_static/nvwb_mount_varrun.png) - - b. Find the mount for /home/workbench/.cache/nvidia-nims. This mount is used as a runtime cache for NIMs where they can cache model files. Sharing this cache with the host reduces disk usage and network bandwidth. - - ![AI Workbench Mount Menu](_static/nvwb_mount_nim.png) - - If you don't already have a nim cache, or you aren't sure, use the following commands to create one at `/home/USER/.cache/nvidia-nims`. - - ```bash - mkdir -p ~/.cache/nvidia-nims - chmod 2777 ~/.cache/nvidia-nims - ``` - -1. A rebuild will occur after these settings have been changed. - -1. Once the build completes with a *Build Ready* message, all applications will be made available to you. +1. Scroll down to the **Secrets** section and find the *NGC_API_KEY* entry. Press *Configure* and provide the personal key for NGC that was generated earlier.
diff --git a/docs/0_4_start.md b/docs/0_4_start.md index f3a7088..83ad7ef 100644 --- a/docs/0_4_start.md +++ b/docs/0_4_start.md @@ -7,25 +7,37 @@ Even the most basic of LLM Chains depend on a few additional microservices. Thes Expand this section for details on starting the demo application. -> **HINT:** For each application, the debug output can be monitored in the UI by clicking the Output link in the lower left corner, selecting the dropdown menu, and choosing the application of interest. +> **HINT:** For each application, the debug output can be monitored in the UI by clicking the Output link in the lower left corner, selecting the dropdown menu, and choosing the application of interest, or *Compose* for applications started via compose. -1. All applications bundled in this workspace can be controlled by navigating to **Environment** > **Applications**. +1. The applications bundled in this workspace can be controlled by navigating to two tabs: -1. First, toggle on *Milvus Vector DB* and *Redis*. Milvus is used as an unstructured knowledge base and Redis is used to store conversation histories. + - **Environment** > **Applications** + - **Environment** > **Compose**. -1. Once these services have been started, the *Chain Server* can safely be started. This contains the custom LangChain code for performing our reasoning chain. By default, it will use the local Milvus and Redis, but use *ai.nvidia.com* for LLM and Embedding model inferencing. +1. First, navigate to the **Environment** > **Compose** tab. Using the dropdown menu, select the option according to your GPU configuration. All options, even 0 GPUs, will be able to run this project succesfully. Below is an outline of the available options as well as which services they will start up locally: -1. **[OPTIONAL]:** Next, start the *LLM NIM*. The first time the LLM NIM is started, it will take some time to download the image and the optimized models. + - 0 GPUs + - *Milvus Vector DB* and *Redis*. Milvus is used as an unstructured knowledge base and Redis is used to store conversation histories. + - 1 GPU + - Everything from 0 GPUs + - *LLM NIM*. The first time the LLM NIM is started, it will take some time to download the image and the optimized models. + a. During a long start, to confirm the LLM NIM is starting, the progress can be observed by viewing the logs by using the *Output* pane on the bottom left of the UI. - a. During a long start, to confirm the LLM NIM is starting, the progress can be observed by viewing the logs by using the *Output* pane on the bottom left of the UI. + b. If the logs indicate an authentication error, that means the provided *NGC_API_KEY* does not have access to the NIMs. Please verify it was generated correctly and in an NGC organization that has NVIDIA AI Enterprise support or trial. - b. If the logs indicate an authentication error, that means the provided *NGC_API_KEY* does not have access to the NIMs. Please verify it was generated correctly and in an NGC organization that has NVIDIA AI Enterprise support or trial. + c. If the logs appear to be stuck on `..........: Pull complete`. `..........: Verifying complete`, or `..........: Download complete`; this is all normal output from Docker that the various layers of the container image have been downloaded. - c. If the logs appear to be stuck on `..........: Pull complete`. `..........: Verifying complete`, or `..........: Download complete`; this is all normal output from Docker that the various layers of the container image have been downloaded. + d. Any other failures here need to be addressed. + - 2 GPU + - Everything from 0 and 1 GPUs + - *Embedding NIM* + - 3+ GPUs + - Everything from 0, 1, and 2 GPUs + - *Reranking NIM* - d. Any other failures here need to be addressed. +1. Once the compose services have been started, the *Chain Server* can safely be started. This contains the custom LangChain code for performing our reasoning chain. By default, it will use the local Milvus and Redis, but use *ai.nvidia.com* for LLM, Embedding, and Reranking model inferencing. -1. Once the *Chain Server* is up, the *Chat Interface* can be started. Starting the interface will automatically open it in a browser window. +1. Once the *Chain Server* is up, the *Chat Frontend* can be started. Starting the interface will automatically open it in a browser window. ![NIM Anywhere Frontend](_static/na_frontend.png)