From 2c03e7ca3b9bf43c654152e2e714db26dc542936 Mon Sep 17 00:00:00 2001 From: Yuri Shkuro Date: Sun, 7 Jul 2024 23:54:51 -0400 Subject: [PATCH] [tracegen] Allow use of adaptive sampling Signed-off-by: Yuri Shkuro --- cmd/tracegen/main.go | 27 +++++++++++++++++-- go.mod | 1 + go.sum | 2 ++ scripts/adaptive-sampling-integration-test.sh | 17 ++++++++++++ 4 files changed, 45 insertions(+), 2 deletions(-) create mode 100644 scripts/adaptive-sampling-integration-test.sh diff --git a/cmd/tracegen/main.go b/cmd/tracegen/main.go index 30b70411078..9970c619128 100644 --- a/cmd/tracegen/main.go +++ b/cmd/tracegen/main.go @@ -19,8 +19,10 @@ import ( "errors" "flag" "fmt" + "time" "github.com/go-logr/zapr" + "go.opentelemetry.io/contrib/samplers/jaegerremote" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/exporters/otlp/otlptrace" "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" @@ -39,6 +41,8 @@ import ( "github.com/jaegertracing/jaeger/pkg/version" ) +var flagAdaptiveSamplingEndpoint string + func main() { zc := zap.NewDevelopmentConfig() zc.Level = zap.NewAtomicLevelAt(zapcore.Level(-8)) // level used by OTEL's Debug() @@ -51,6 +55,14 @@ func main() { fs := flag.CommandLine cfg := new(tracegen.Config) cfg.Flags(fs) + fs.StringVar( + &flagAdaptiveSamplingEndpoint, + "adaptive-sampling", + "", + "HTTP endpoint to use to retrieve sampling strategies, "+ + "e.g. http://localhost:14268/api/sampling. "+ + "When not specified a standard SDK sampler will be used "+ + "(see OTEL_TRACES_SAMPLER env var in OTEL docs)") flag.Parse() logger.Info(version.Get().String()) @@ -94,10 +106,21 @@ func createTracers(cfg *tracegen.Config, logger *zap.Logger) ([]trace.Tracer, fu logger.Sugar().Fatalf("resource creation failed: %s", err) } - tp := sdktrace.NewTracerProvider( + opts := []sdktrace.TracerProviderOption{ sdktrace.WithBatcher(exp, sdktrace.WithBlocking()), sdktrace.WithResource(res), - ) + } + if flagAdaptiveSamplingEndpoint != "" { + jaegerRemoteSampler := jaegerremote.New( + svc, + jaegerremote.WithSamplingServerURL(flagAdaptiveSamplingEndpoint), + jaegerremote.WithSamplingRefreshInterval(5*time.Second), + jaegerremote.WithInitialSampler(sdktrace.TraceIDRatioBased(0.5)), + ) + opts = append(opts, sdktrace.WithSampler(jaegerRemoteSampler)) + logger.Sugar().Infof("using adaptive sampling URL: %s", flagAdaptiveSamplingEndpoint) + } + tp := sdktrace.NewTracerProvider(opts...) tracers = append(tracers, tp.Tracer(cfg.Service)) shutdown = append(shutdown, tp.Shutdown) } diff --git a/go.mod b/go.mod index b45d71f79e3..59136d270ac 100644 --- a/go.mod +++ b/go.mod @@ -71,6 +71,7 @@ require ( go.opentelemetry.io/collector/receiver/otlpreceiver v0.104.0 go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.53.0 go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0 + go.opentelemetry.io/contrib/samplers/jaegerremote v0.22.0 go.opentelemetry.io/otel v1.28.0 go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.28.0 go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.28.0 diff --git a/go.sum b/go.sum index aaeb0c4b542..1fef2584e96 100644 --- a/go.sum +++ b/go.sum @@ -489,6 +489,8 @@ go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0 h1:4K4tsIX go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0/go.mod h1:jjdQuTGVsXV4vSs+CJ2qYDeDPf9yIJV23qlIzBm73Vg= go.opentelemetry.io/contrib/propagators/b3 v1.27.0 h1:IjgxbomVrV9za6bRi8fWCNXENs0co37SZedQilP2hm0= go.opentelemetry.io/contrib/propagators/b3 v1.27.0/go.mod h1:Dv9obQz25lCisDvvs4dy28UPh974CxkahRDUPsY7y9E= +go.opentelemetry.io/contrib/samplers/jaegerremote v0.22.0 h1:OYxqumWcd1yaV/qvCt1B7Sru9OeUNGjeXq/oldx3AGk= +go.opentelemetry.io/contrib/samplers/jaegerremote v0.22.0/go.mod h1:2tZTRqCbvx7nG57wUwd5NQpNVujOWnR84iPLllIH0Ok= go.opentelemetry.io/contrib/zpages v0.52.0 h1:MPgkMy0Cp3O5EdfVXP0ss3ujhEibysTM4eszx7E7d+E= go.opentelemetry.io/contrib/zpages v0.52.0/go.mod h1:fqG5AFdoYru3A3DnhibVuaaEfQV2WKxE7fYE1jgDRwk= go.opentelemetry.io/otel v1.28.0 h1:/SqNcYk+idO0CxKEUOtKQClMK/MimZihKYMruSMViUo= diff --git a/scripts/adaptive-sampling-integration-test.sh b/scripts/adaptive-sampling-integration-test.sh new file mode 100644 index 00000000000..99be9e59fd1 --- /dev/null +++ b/scripts/adaptive-sampling-integration-test.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +set -euf -o pipefail + +# This script is currently a placeholder. + +# Commands to run integration test: +# SAMPLING_STORAGE_TYPE=memory SAMPLING_CONFIG_TYPE=adaptive go run -tags=ui ./cmd/all-in-one --log-level=debug +# go run ./cmd/tracegen -adaptive-sampling=http://localhost:14268/api/sampling -pause=10ms -duration=60m + +# Check how strategy is changing +# curl 'http://localhost:14268/api/sampling?service=tracegen' | jq . + +# Issues +# - SDK does not report sampling probability in the tags the way Jaeger SDKs did +# - Server probably does not recognize spans as having adaptive sampling without sampler info +# - There is no way to modify target traces-per-second dynamically, must restart collector.