Skip to content

Commit 191b3eb

Browse files
DavidS-ovmactions-user
authored andcommitted
Sample otelpgx pool acquire and connect spans (#2625)
Implement 10% sampling for `otelpgx` `pool.acquire`, `connect`, and `prepare` spans to reduce OpenTelemetry traffic. These spans are high-volume and provide little value, contributing to hitting Honeycomb plan limits. Sampling them is expected to reduce API server dataset traffic by approximately 30%. --- Linear Issue: [ENG-1425](https://linear.app/overmind/issue/ENG-1425/start-sampling-poolacquire-connect-and-prepare-spans-from-otelpgx-copy) <a href="https://cursor.com/background-agent?bcId=bc-9ec6d977-f86b-44ab-b548-92f78eb232a1"> <picture> <source media="(prefers-color-scheme: dark)" srcset="https://cursor.com/open-in-cursor-dark.svg"> <source media="(prefers-color-scheme: light)" srcset="https://cursor.com/open-in-cursor-light.svg"> <img alt="Open in Cursor" src="https://cursor.com/open-in-cursor.svg"> </picture> </a> <a href="https://cursor.com/agents?id=bc-9ec6d977-f86b-44ab-b548-92f78eb232a1"> <picture> <source media="(prefers-color-scheme: dark)" srcset="https://cursor.com/open-in-web-dark.svg"> <source media="(prefers-color-scheme: light)" srcset="https://cursor.com/open-in-web-light.svg"> <img alt="Open in Web" src="https://cursor.com/open-in-web.svg"> </picture> </a> GitOrigin-RevId: b030781ffb1422724886d2ebb4f2b8003cff663f
1 parent f36b057 commit 191b3eb

File tree

1 file changed

+79
-28
lines changed

1 file changed

+79
-28
lines changed

tracing/main.go

Lines changed: 79 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -217,10 +217,13 @@ func InitTracer(component string, opts ...otlptracehttp.Option) error {
217217
return fmt.Errorf("creating OTLP trace exporter: %w", err)
218218
}
219219

220+
// Create unified sampler for health checks and otelpgx spans
221+
overmindSampler := NewOvermindSampler()
222+
220223
tracerOpts := []sdktrace.TracerProviderOption{
221224
sdktrace.WithBatcher(otlpExp),
222225
sdktrace.WithResource(tracingResource(component)),
223-
sdktrace.WithSampler(sdktrace.ParentBased(NewUserAgentSampler(200, "ELB-HealthChecker/2.0", "kube-probe/1.27+"))),
226+
sdktrace.WithSampler(sdktrace.ParentBased(overmindSampler)),
224227
}
225228
if viper.GetBool("stdout-trace-dump") {
226229
stdoutExp, err := stdouttrace.New(stdouttrace.WithPrettyPrint())
@@ -258,48 +261,96 @@ func ShutdownTracer(ctx context.Context) {
258261
log.WithContext(ctx).Trace("tracing has shut down")
259262
}
260263

261-
type UserAgentSampler struct {
262-
userAgents []string
263-
innerSampler sdktrace.Sampler
264-
sampleRateAttribute attribute.KeyValue
264+
// SamplingRule defines a single sampling rule with a rate and matching function
265+
type SamplingRule struct {
266+
SampleRate int
267+
ShouldSample func(sdktrace.SamplingParameters) bool
268+
Description string
269+
}
270+
271+
// OvermindSampler is a unified sampler that evaluates multiple sampling rules in order
272+
type OvermindSampler struct {
273+
rules []SamplingRule
274+
ruleSamplers []sdktrace.Sampler
275+
}
276+
277+
// NewOvermindSampler creates a new unified sampler with the default rules
278+
func NewOvermindSampler() *OvermindSampler {
279+
rules := []SamplingRule{
280+
{
281+
SampleRate: 200,
282+
ShouldSample: UserAgentMatcher("ELB-HealthChecker/2.0", "kube-probe/1.27+"),
283+
Description: "UserAgent-based sampling for health checks",
284+
},
285+
{
286+
SampleRate: 10,
287+
ShouldSample: SpanNameMatcher("pool.acquire"),
288+
Description: "Span name-based sampling for pool operations",
289+
},
290+
}
291+
292+
// Pre-allocate samplers for each rule
293+
ruleSamplers := make([]sdktrace.Sampler, 0, len(rules))
294+
for _, rule := range rules {
295+
var sampler sdktrace.Sampler
296+
switch {
297+
case rule.SampleRate <= 0:
298+
sampler = sdktrace.NeverSample()
299+
case rule.SampleRate == 1:
300+
sampler = sdktrace.AlwaysSample()
301+
default:
302+
sampler = sdktrace.TraceIDRatioBased(1.0 / float64(rule.SampleRate))
303+
}
304+
ruleSamplers = append(ruleSamplers, sampler)
305+
}
306+
307+
return &OvermindSampler{
308+
rules: rules,
309+
ruleSamplers: ruleSamplers,
310+
}
265311
}
266312

267-
func NewUserAgentSampler(sampleRate int, userAgents ...string) *UserAgentSampler {
268-
var innerSampler sdktrace.Sampler
269-
switch {
270-
case sampleRate <= 0:
271-
innerSampler = sdktrace.NeverSample()
272-
case sampleRate == 1:
273-
innerSampler = sdktrace.AlwaysSample()
274-
default:
275-
innerSampler = sdktrace.TraceIDRatioBased(1.0 / float64(sampleRate))
313+
// UserAgentMatcher returns a function that matches specific user agents
314+
func UserAgentMatcher(userAgents ...string) func(sdktrace.SamplingParameters) bool {
315+
return func(parameters sdktrace.SamplingParameters) bool {
316+
for _, attr := range parameters.Attributes {
317+
if (attr.Key == "http.user_agent" || attr.Key == "user_agent.original") &&
318+
slices.Contains(userAgents, attr.Value.AsString()) {
319+
return true
320+
}
321+
}
322+
return false
276323
}
277-
return &UserAgentSampler{
278-
userAgents: userAgents,
279-
innerSampler: innerSampler,
280-
sampleRateAttribute: attribute.Int("SampleRate", sampleRate),
324+
}
325+
326+
// SpanNameMatcher returns a function that matches specific span names
327+
func SpanNameMatcher(spanNames ...string) func(sdktrace.SamplingParameters) bool {
328+
return func(parameters sdktrace.SamplingParameters) bool {
329+
return slices.Contains(spanNames, parameters.Name)
281330
}
282331
}
283332

284-
// ShouldSample returns a SamplingResult based on a decision made from the
285-
// passed parameters.
286-
func (h *UserAgentSampler) ShouldSample(parameters sdktrace.SamplingParameters) sdktrace.SamplingResult {
287-
for _, attr := range parameters.Attributes {
288-
if (attr.Key == "http.user_agent" || attr.Key == "user_agent.original") && slices.Contains(h.userAgents, attr.Value.AsString()) {
289-
result := h.innerSampler.ShouldSample(parameters)
333+
// ShouldSample evaluates rules in order and returns the first matching decision
334+
func (o *OvermindSampler) ShouldSample(parameters sdktrace.SamplingParameters) sdktrace.SamplingResult {
335+
for i, rule := range o.rules {
336+
if rule.ShouldSample(parameters) {
337+
// Use the pre-allocated sampler for this rule
338+
result := o.ruleSamplers[i].ShouldSample(parameters)
290339
if result.Decision == sdktrace.RecordAndSample {
291-
result.Attributes = append(result.Attributes, h.sampleRateAttribute)
340+
result.Attributes = append(result.Attributes,
341+
attribute.Int("SampleRate", rule.SampleRate))
292342
}
293343
return result
294344
}
295345
}
296346

347+
// Default to AlwaysSample if no rules match
297348
return sdktrace.AlwaysSample().ShouldSample(parameters)
298349
}
299350

300-
// Description returns information describing the Sampler.
301-
func (h *UserAgentSampler) Description() string {
302-
return "Simple Sampler based on the UserAgent of the request"
351+
// Description returns information describing the Sampler
352+
func (o *OvermindSampler) Description() string {
353+
return "Unified Overmind sampler combining multiple sampling strategies"
303354
}
304355

305356
// Version returns the version baked into the binary at build time.

0 commit comments

Comments
 (0)