config.properties_inferentia_tests

#!/bin/bash

# This file contains all customizable configuration items for the project
# It is pre-configured for re:Invent 2023 builder session for the following basic settings:
# using publicly shared Model container images for Inferentia 2 and publicly shared Test container images
# replace the original config.properties file with this version to run Tests on c5.4xlarge nodes

######################################################################
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
# SPDX-License-Identifier: MIT-0                                     #
######################################################################
# Project settings
verbose=true

# Model settings
huggingface_model_name=bert-base-multilingual-cased
huggingface_tokenizer_class=BertTokenizer
huggingface_model_class=BertForQuestionAnswering

# Compiler settings
# processor = cpu|gpu|inf1|inf2|graviton
processor=inf2
pipeline_cores=1
sequence_length=128
batch_size=1
test=True

# account is the current AWS user account. This setting is determined automatically.
account=$(aws sts get-caller-identity --query Account --output text)

# region is used to login if the registry is ecr 
region=us-west-2

# Container settings
# Default is the private ECR registry in the current AWS account.
# If registry is set, include the registry uri up to the image name,
# end the registry setting with /
# registry=${account}.dkr.ecr.${region}.amazonaws.com/
# registry_type=ecr
registry_type=ecr
base_image_name=aws-do-inference-base
base_image_tag=:v15-${processor}
#model_image_name=${huggingface_model_name}
#model_image_tag=:v15-${processor}

# if using pre-built public registry model image (may require authentication) use the following settings
registry=public.ecr.aws/a2u7h5w3/
model_image_name=bert-base-workshop
model_image_tag=:v15-${processor}

# Trace settings
# trace_opts_$processor is a processor-specific setting used by the docker run command in the trace.sh script
# This setting will be automatically assigned based on your processor value
trace_opts_cpu=""
trace_opts_gpu="--gpus 0"
trace_opts_inf1="-e AWS_NEURON_VISIBLE_DEVICES=ALL --privileged"
trace_opts_inf2="-e AWS_NEURON_VISIBLE_DEVICES=ALL --privileged"
trace_opts_graviton=""

# Deployment settings
# some of these settings apply only when the runtime is kubernetes
# runtime = docker | kubernetes
runtime=kubernetes
# number of models per model server
num_models=6
# quiet = False | True - sets whether the model server should print logs
quiet=False
# postprocess = True | False - sets whether tensors returned from model should be translated back to text or just returned
postprocess=True
# service_port=8080 - port on which model service will be exposed
service_port=8080
# Kubernetes-specific deployment settings
# instance_type = c5.xxx | g4dn.xlarge | g4dn.12xlarge | inf1.xlarge | inf2.8xlarge | c7g.4xlarge...
# A node group with the specified instance_type must exist in the cluster
# The instance type must have the processor configured above
# Example: processor=graviton, instance_type=c7g.4xlarge
instance_type=inf2.xlarge
# num_servers - number of model servers to deploy
# note that more than one model server can run on a node with multiple cpu/gpu/inferentia chips.
# example: 4 model servers fit on one inf1.6xlarge instance as it has 4 inferentia chips.
num_servers=2
# Kubernetes namespace
namespace=mpi
# Kubernetes app name
app_name=${huggingface_model_name}-${processor}
app_dir=app-${app_name}-${instance_type}

# Local Test image settings
#test_image_name=test-${huggingface_model_name}
#test_image_tag=:v15-cpu

# when using pre-built test image available in public ECR registry (may require authentication): 
# public.ecr.aws/a2u7h5w3/bert-base-workshop:test-v15-cpu
test_image_name=bert-base-workshop
test_image_tag=:test-v15-cpu

# request_frequency - time to sleep between two consecutive requests in curl tests
request_frequency=0.01
# Stop random request test after num_requests number of requests
num_requests=30
# Number of test containers to launch (default=1), use > 1 for scale testing
num_test_containers=10
# test_instance_type - when runtime is kubernetes, node instance type on which test pods will run
test_instance_type=c5.4xlarge
# test_namespace - when runtime is kubernetes, namespace where test pods will be created
test_namespace=mpi
# test_dir - when runtime is kubernetes, directory where test job/pod manifests are stored
test_dir=app-${test_image_name}-${instance_type}