-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathconfig.properties_inferentia_tests
108 lines (95 loc) · 4.41 KB
/
config.properties_inferentia_tests
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#!/bin/bash
# This file contains all customizable configuration items for the project
# It is pre-configured for re:Invent 2023 builder session for the following basic settings:
# using publicly shared Model container images for Inferentia 2 and publicly shared Test container images
# replace the original config.properties file with this version to run Tests on c5.4xlarge nodes
######################################################################
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
# SPDX-License-Identifier: MIT-0 #
######################################################################
# Project settings
verbose=true
# Model settings
huggingface_model_name=bert-base-multilingual-cased
huggingface_tokenizer_class=BertTokenizer
huggingface_model_class=BertForQuestionAnswering
# Compiler settings
# processor = cpu|gpu|inf1|inf2|graviton
processor=inf2
pipeline_cores=1
sequence_length=128
batch_size=1
test=True
# account is the current AWS user account. This setting is determined automatically.
account=$(aws sts get-caller-identity --query Account --output text)
# region is used to login if the registry is ecr
region=us-west-2
# Container settings
# Default is the private ECR registry in the current AWS account.
# If registry is set, include the registry uri up to the image name,
# end the registry setting with /
# registry=${account}.dkr.ecr.${region}.amazonaws.com/
# registry_type=ecr
registry_type=ecr
base_image_name=aws-do-inference-base
base_image_tag=:v15-${processor}
#model_image_name=${huggingface_model_name}
#model_image_tag=:v15-${processor}
# if using pre-built public registry model image (may require authentication) use the following settings
registry=public.ecr.aws/a2u7h5w3/
model_image_name=bert-base-workshop
model_image_tag=:v15-${processor}
# Trace settings
# trace_opts_$processor is a processor-specific setting used by the docker run command in the trace.sh script
# This setting will be automatically assigned based on your processor value
trace_opts_cpu=""
trace_opts_gpu="--gpus 0"
trace_opts_inf1="-e AWS_NEURON_VISIBLE_DEVICES=ALL --privileged"
trace_opts_inf2="-e AWS_NEURON_VISIBLE_DEVICES=ALL --privileged"
trace_opts_graviton=""
# Deployment settings
# some of these settings apply only when the runtime is kubernetes
# runtime = docker | kubernetes
runtime=kubernetes
# number of models per model server
num_models=6
# quiet = False | True - sets whether the model server should print logs
quiet=False
# postprocess = True | False - sets whether tensors returned from model should be translated back to text or just returned
postprocess=True
# service_port=8080 - port on which model service will be exposed
service_port=8080
# Kubernetes-specific deployment settings
# instance_type = c5.xxx | g4dn.xlarge | g4dn.12xlarge | inf1.xlarge | inf2.8xlarge | c7g.4xlarge...
# A node group with the specified instance_type must exist in the cluster
# The instance type must have the processor configured above
# Example: processor=graviton, instance_type=c7g.4xlarge
instance_type=inf2.xlarge
# num_servers - number of model servers to deploy
# note that more than one model server can run on a node with multiple cpu/gpu/inferentia chips.
# example: 4 model servers fit on one inf1.6xlarge instance as it has 4 inferentia chips.
num_servers=2
# Kubernetes namespace
namespace=mpi
# Kubernetes app name
app_name=${huggingface_model_name}-${processor}
app_dir=app-${app_name}-${instance_type}
# Local Test image settings
#test_image_name=test-${huggingface_model_name}
#test_image_tag=:v15-cpu
# when using pre-built test image available in public ECR registry (may require authentication):
# public.ecr.aws/a2u7h5w3/bert-base-workshop:test-v15-cpu
test_image_name=bert-base-workshop
test_image_tag=:test-v15-cpu
# request_frequency - time to sleep between two consecutive requests in curl tests
request_frequency=0.01
# Stop random request test after num_requests number of requests
num_requests=30
# Number of test containers to launch (default=1), use > 1 for scale testing
num_test_containers=10
# test_instance_type - when runtime is kubernetes, node instance type on which test pods will run
test_instance_type=c5.4xlarge
# test_namespace - when runtime is kubernetes, namespace where test pods will be created
test_namespace=mpi
# test_dir - when runtime is kubernetes, directory where test job/pod manifests are stored
test_dir=app-${test_image_name}-${instance_type}