Skip to content

Soak tests

Soak tests #878

Workflow file for this run

name: Soak tests
env:
AWS_DEFAULT_REGION: us-east-1
on:
schedule:
- cron: '0 14 * * 1,3,5' # Mon, Wed, Fri morning PST
workflow_dispatch:
inputs:
soak_config:
description: 'set memory/cpu threshold, soak time (s), emitter interval'
required: false
default: '-t 1800'
permissions:
id-token: write
contents: read
jobs:
soaking-test:
runs-on: ubuntu-22.04
name: Soak Test - (${{ matrix.language }}, ${{ matrix.sample-app }}, ${{ matrix.instrumentation-type }}, ${{ matrix.architecture }})
strategy:
fail-fast: false
matrix:
# FIXME: (enowell) Both .NET and Go Sample Apps want to Soak Test
# the same collector-only lambda layer. We count on Soaking Tests to
# test whether a layer is ready for release. However, the current
# workflow can only test one Sample App per Lambda Layer. We should
# create a separate workflow to soak-test Layers with multiple
# soak tests.
language: [ go, java, nodejs, python ]
sample-app: [ aws-sdk ]
instrumentation-type: [ wrapper ]
architecture: [ amd64, arm64 ]
include:
# FIXME: (enowell) Same problem as above, we cannot Soak Test the
# other java app (okhttp) because it will create its own Lambda Layer
# instead of soak test the same one as the `aws-sdk` sample app.
- language: java
sample-app: aws-sdk
instrumentation-type: agent
architecture: amd64
- language: java
sample-app: aws-sdk
instrumentation-type: agent
architecture: arm64
outputs:
go-wrapper-error: ${{ steps.set-layer-if-error-output.outputs.go-wrapper-error }}
nodejs-wrapper-error: ${{ steps.set-layer-if-error-output.outputs.nodejs-wrapper-error }}
python-wrapper-error: ${{ steps.set-layer-if-error-output.outputs.python-wrapper-error }}
java-agent-error: ${{ steps.set-layer-if-error-output.outputs.java-agent-error }}
java-wrapper-error: ${{ steps.set-layer-if-error-output.outputs.java-wrapper-error }}
# NOTE: (enowell) When we release a Lambda Layer, we will ALWAYS release
# all the architectures TOGETHER. So all architectures will be at the same
# version.
go-wrapper-version: ${{ steps.set-collector-layer-version-output.outputs.go-wrapper-version }}
nodejs-wrapper-version: ${{ steps.set-sdk-layer-version-output.outputs.nodejs-wrapper-version }}
python-wrapper-version: ${{ steps.set-sdk-layer-version-output.outputs.python-wrapper-version }}
java-agent-version: ${{ steps.set-sdk-layer-version-output.outputs.java-agent-version }}
java-wrapper-version: ${{ steps.set-sdk-layer-version-output.outputs.java-wrapper-version }}
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- uses: actions/setup-go@v5
with:
go-version: '~1.23.4'
check-latest: true
- uses: actions/setup-java@v4
if: ${{ matrix.language == 'java' }}
with:
distribution: corretto
java-version: '17'
- name: Cache (Java)
uses: actions/cache@v4
if: ${{ matrix.language == 'java' }}
with:
path: |
~/go/pkg/mod
~/.gradle/caches
~/.gradle/wrapper
key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }}-go-${{ hashFiles('**/go.sum') }}
restore-keys: |
${{ runner.os }}-gradle-
- name: Get default soaking test configuration
# CPU baseline was obtained empirically based on the max value observed for Go in a complete run of soak test
if: ${{ matrix.language != 'java' }}
run: |
echo SOAKING_TEST_CONFIG="-c 120 -m 70" | tee --append $GITHUB_ENV
- name: Get java soaking test configuration
# NOTE (enowell): Java's JVM is heavy and needs more memory than others.
if: ${{ matrix.language == 'java' }}
run: |
echo SOAKING_TEST_CONFIG="-c 200 -m 90" | tee --append $GITHUB_ENV
- uses: actions/setup-node@v4
if: ${{ matrix.language == 'nodejs' }}
with:
node-version: '16'
- name: Cache (NodeJS)
uses: actions/cache@v4
if: ${{ matrix.language == 'nodejs' }}
with:
path: |
~/go/pkg/mod
~/.npm
key: ${{ runner.os }}-node-${{ hashFiles('**/package.json') }}-go-${{ hashFiles('**/go.sum') }}
restore-keys: |
${{ runner.os }}-node-
- uses: actions/setup-python@v5
if: ${{ matrix.language == 'python' }}
with:
python-version: '3.x'
- name: Cache (Python)
uses: actions/cache@v4
if: ${{ matrix.language == 'python' }}
with:
path: |
~/go/pkg/mod
~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}-go-${{ hashFiles('**/go.sum') }}
restore-keys: |
${{ runner.os }}-pip-
- uses: actions/setup-dotnet@v4
if: ${{ matrix.language == 'dotnet' }}
with:
dotnet-version: '6.0.405'
- uses: aws-actions/[email protected]
with:
role-to-assume: ${{ secrets.INTEG_TEST_LAMBDA_ROLE_ARN }}
mask-aws-account-id: false
aws-region: ${{ env.AWS_DEFAULT_REGION }}
# Default session duration is 1 hour with OIDC.
role-duration-seconds: 14400 # 4 hours
- name: Patch ADOT
run: ./patch-upstream.sh
# Login to ECR since may be needed for Python build image.
- name: Login to Public ECR
uses: docker/login-action@v3
with:
registry: public.ecr.aws
- name: Build layers / functions
run: GOARCH=${{ matrix.architecture }} ./build.sh ${{ matrix.architecture }}
working-directory: ${{ matrix.language }}
- name: Get Lambda Layer `amd64` architecture value
if: ${{ matrix.architecture == 'amd64' }}
run: echo LAMBDA_FUNCTION_ARCH=x86_64 | tee --append $GITHUB_ENV
- name: Get Lambda Layer `arm64` architecture value
if: ${{ matrix.architecture == 'arm64' }}
run: echo LAMBDA_FUNCTION_ARCH=arm64 | tee --append $GITHUB_ENV
- name: Get terraform directory
run: |
echo TERRAFORM_DIRECTORY=${{ matrix.language }}/integration-tests/${{ matrix.sample-app }}/${{ matrix.instrumentation-type }} |
tee --append $GITHUB_ENV
- uses: hashicorp/setup-terraform@v2
with:
terraform_version: 1.3.1
- name: Initialize terraform
run: terraform init
working-directory: ${{ env.TERRAFORM_DIRECTORY }}
- name: Get terraform Lambda function name
run: |
echo TERRAFORM_LAMBDA_FUNCTION_NAME=lambda-${{ matrix.language }}-${{ matrix.sample-app }}-${{ matrix.instrumentation-type }}-${{ matrix.architecture }}-${{ github.run_id }} |
tee --append $GITHUB_ENV
# NOTE: (enowell) We don't need to include `sample-app` in the Lambda
# Layer name because different apps should be use the same layer, not
# create their own. However, if we ever Soak Test multiple apps, we need
# to BE CAREFUL about not creating duplicate layer with the same name.
- name: Get terraform Lambda layer name
run: |
echo TERRAFORM_LAMBDA_LAYER_NAME=aws-otel-${{ matrix.language }}-${{ matrix.instrumentation-type }}-${{ matrix.architecture }}-${{ github.sha }} |
tee --append $GITHUB_ENV
- name: Apply terraform
run: terraform apply -auto-approve
working-directory: ${{ env.TERRAFORM_DIRECTORY }}
env:
TF_VAR_sdk_layer_name: ${{ env.TERRAFORM_LAMBDA_LAYER_NAME }}
TF_VAR_collector_layer_name: ${{ env.TERRAFORM_LAMBDA_LAYER_NAME }}
TF_VAR_function_name: ${{ env.TERRAFORM_LAMBDA_FUNCTION_NAME }}
TF_VAR_architecture: ${{ env.LAMBDA_FUNCTION_ARCH }}
- name: Extract endpoint
id: extract-endpoint
run: terraform output -raw api-gateway-url
working-directory: ${{ env.TERRAFORM_DIRECTORY }}
- name: Extract AMP endpoint
if: ${{ matrix.language == 'java' && matrix.sample-app == 'aws-sdk' && matrix.instrumentation-type == 'agent' }}
id: extract-amp-endpoint
run: terraform output -raw amp_endpoint
working-directory: ${{ env.TERRAFORM_DIRECTORY }}
- name: Extract SDK layer arn
id: extract-sdk-layer-arn
if: ${{ matrix.language != 'dotnet' && matrix.language != 'go' }}
run: terraform output -raw sdk_layer_arn
working-directory: ${{ env.TERRAFORM_DIRECTORY }}
- name: Extract Collector layer arn
id: extract-collector-layer-arn
if: ${{ matrix.language == 'dotnet' || matrix.language == 'go' }}
run: terraform output -raw collector_layer_arn
working-directory: ${{ env.TERRAFORM_DIRECTORY }}
# NOTE: (enowell) `terraform output` outputs additional text we are
# not interested in because the `hashicorp/setup-terraform@v1` has a
# wrapper. We solve this by using separate steps, because this text
# doesn't show up when accessed in later steps.
#
# See more: https://github.com/hashicorp/setup-terraform/issues/20
- name: Set SDK layer version output
id: set-sdk-layer-version-output
if: ${{ matrix.language != 'dotnet' && matrix.language != 'go' }}
run: |
version=$(echo "${{ steps.extract-sdk-layer-arn.outputs.stdout }}" | cut -d : -f 8)
echo "Found version number: $version"
echo "${{ matrix.language }}-${{ matrix.instrumentation-type }}-version=$version" >> $GITHUB_OUTPUT
working-directory: ${{ env.TERRAFORM_DIRECTORY }}
- name: Set Collector layer version output
id: set-collector-layer-version-output
if: ${{ matrix.language == 'dotnet' || matrix.language == 'go' }}
run: |
version=$(echo "${{ steps.extract-collector-layer-arn.outputs.stdout }}" | cut -d : -f 8)
echo "Found version number: $version"
echo "${{ matrix.language }}-${{ matrix.instrumentation-type }}-version=$version" >> $GITHUB_OUTPUT
working-directory: ${{ env.TERRAFORM_DIRECTORY }}
- name: Send request to endpoint
run: curl -sS ${{ steps.extract-endpoint.outputs.stdout }}
- name: Checkout test framework
uses: actions/checkout@v4
with:
repository: aws-observability/aws-otel-test-framework
path: test-framework
- name: validate trace sample
run: |
cp adot/utils/expected-templates/${{ matrix.language }}-${{ matrix.sample-app }}-${{ matrix.instrumentation-type }}.json \
test-framework/validator/src/main/resources/expected-data-template/lambdaExpectedTrace.mustache
cd test-framework
./gradlew :validator:run --args="-c default-lambda-validation.yml --endpoint ${{ steps.extract-endpoint.outputs.stdout }} --region ${{ env.AWS_DEFAULT_REGION }}"
- name: validate java agent metric sample
if: ${{ matrix.language == 'java' && matrix.sample-app == 'aws-sdk' && matrix.instrumentation-type == 'agent' }}
run: |
cp adot/utils/expected-templates/${{ matrix.language }}-${{ matrix.sample-app }}-${{ matrix.instrumentation-type }}-metric.json \
test-framework/validator/src/main/resources/expected-data-template/ampExpectedMetric.mustache
cd test-framework
./gradlew :validator:run --args="-c prometheus-static-metric-validation.yml --cortex-instance-endpoint ${{ steps.extract-amp-endpoint.outputs.stdout }} --region ${{ env.AWS_DEFAULT_REGION }}"
- name: Run soak test
run:
>-
docker run
--rm
-e AWS_DEFAULT_REGION
-e AWS_ACCESS_KEY_ID
-e AWS_SECRET_ACCESS_KEY
-e AWS_SESSION_TOKEN
public.ecr.aws/aws-otel-test/lambda-soak:latest
-n ${{ env.TERRAFORM_LAMBDA_FUNCTION_NAME }}
-e ${{ steps.extract-endpoint.outputs.stdout }}
${{ github.event.inputs.soak_config }}
${{ env.SOAKING_TEST_CONFIG }}
-a ${{ matrix.architecture }}
- name: Set output if layer Soak Tests has error
id: set-layer-if-error-output
if: ${{ failure() }}
run: echo "${{ matrix.language }}-${{ matrix.instrumentation-type }}-error=FAILED" >> $GITHUB_OUTPUT
- name: Remove sdk layers from terraform management to prevent deletion.
if: ${{ matrix.language != 'go' }}
run: terraform state rm aws_lambda_layer_version.sdk_layer
working-directory: ${{ env.TERRAFORM_DIRECTORY }}
- name: Remove collector layers from terraform management to prevent deletion.
if: ${{ matrix.language == 'go' }}
run: terraform state rm aws_lambda_layer_version.collector_layer
working-directory: ${{ env.TERRAFORM_DIRECTORY }}
- name: Destroy terraform
if: always()
run: terraform destroy -auto-approve
working-directory: ${{ env.TERRAFORM_DIRECTORY }}
env:
TF_VAR_architecture: ${{ env.LAMBDA_FUNCTION_ARCH }}
output-keywords:
if: ${{ always() }}
name: Output (${{ matrix.language }}, ${{ matrix.instrumentation-type }}) Layer Keyword
runs-on: ubuntu-22.04
needs:
- soaking-test
strategy:
fail-fast: false
matrix:
language: [ go, java, nodejs, python ]
instrumentation-type: [ wrapper ]
include:
- language: java
instrumentation-type: agent
steps:
- name: Confirm none of the architecture soak tests for the layer failed
run: |
AT_LEAST_ONE_LAYER_SOAK_TEST_FAILED=$(
echo '${{ toJSON(needs.soaking-test.outputs) }}' |
jq '
."${{ matrix.language }}-${{ matrix.instrumentation-type }}-error" == "FAILED"
' || echo false
)
[[ $AT_LEAST_ONE_LAYER_SOAK_TEST_FAILED == false ]]
- name: Output keyword for (${{ matrix.language }}, ${{ matrix.instrumentation-type }}) layer
run: |
VERSION=$(
echo '${{ toJSON(needs.soaking-test.outputs) }}' |
jq -r '."${{ matrix.language }}-${{ matrix.instrumentation-type }}-version"'
)
echo "::warning::Layer ARN Keyword: arn:aws:lambda:${{ env.AWS_DEFAULT_REGION }}:611364707713:layer:aws-otel-${{ matrix.language }}-${{ matrix.instrumentation-type }}-<ARCHITECTURE>-${{ github.sha }}:$VERSION"
publish-soaking-status:
needs: [soaking-test]
if: ${{ always() }}
uses: ./.github/workflows/publish-status.yml
with:
namespace: 'ADOT/GitHubActions'
repository: ${{ github.repository }}
branch: ${{ github.ref_name }}
workflow: soaking
success: ${{ needs.soaking-test.result == 'success' }}
region: us-west-2
secrets:
roleArn: ${{ secrets.METRICS_ROLE_ARN }}