Skip to content

Commit 423be05

Browse files
authored
feat: add target group info metric (#3581)
* feat: target group info metric * docs: target group info metric
1 parent f6aa7ef commit 423be05

File tree

9 files changed

+130
-36
lines changed

9 files changed

+130
-36
lines changed

controllers/gateway/gateway_controller.go

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
"sigs.k8s.io/aws-load-balancer-controller/pkg/gateway/referencecounter"
2525
"sigs.k8s.io/aws-load-balancer-controller/pkg/gateway/routeutils"
2626
"sigs.k8s.io/aws-load-balancer-controller/pkg/k8s"
27+
awsmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/aws"
2728
lbcmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/lbc"
2829
metricsutil "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/util"
2930
"sigs.k8s.io/aws-load-balancer-controller/pkg/model/core"
@@ -51,13 +52,13 @@ const (
5152
var _ Reconciler = &gatewayReconciler{}
5253

5354
// NewNLBGatewayReconciler constructs a gateway reconciler to handle specifically for NLB gateways
54-
func NewNLBGatewayReconciler(routeLoader routeutils.Loader, referenceCounter referencecounter.ServiceReferenceCounter, cloud services.Cloud, k8sClient client.Client, eventRecorder record.EventRecorder, controllerConfig config.ControllerConfig, finalizerManager k8s.FinalizerManager, networkingManager networking.NetworkingManager, networkingSGReconciler networking.SecurityGroupReconciler, networkingSGManager networking.SecurityGroupManager, elbv2TaggingManager elbv2deploy.TaggingManager, subnetResolver networking.SubnetsResolver, vpcInfoProvider networking.VPCInfoProvider, backendSGProvider networking.BackendSGProvider, sgResolver networking.SecurityGroupResolver, logger logr.Logger, metricsCollector lbcmetrics.MetricCollector, reconcileCounters *metricsutil.ReconcileCounters) Reconciler {
55-
return newGatewayReconciler(constants.NLBGatewayController, elbv2model.LoadBalancerTypeNetwork, controllerConfig.NLBGatewayMaxConcurrentReconciles, constants.NLBGatewayTagPrefix, shared_constants.NLBGatewayFinalizer, routeLoader, referenceCounter, routeutils.L4RouteFilter, cloud, k8sClient, eventRecorder, controllerConfig, finalizerManager, networkingSGReconciler, networkingManager, networkingSGManager, elbv2TaggingManager, subnetResolver, vpcInfoProvider, backendSGProvider, sgResolver, nlbAddons, logger, metricsCollector, reconcileCounters.IncrementNLBGateway)
55+
func NewNLBGatewayReconciler(routeLoader routeutils.Loader, referenceCounter referencecounter.ServiceReferenceCounter, cloud services.Cloud, k8sClient client.Client, eventRecorder record.EventRecorder, controllerConfig config.ControllerConfig, finalizerManager k8s.FinalizerManager, networkingManager networking.NetworkingManager, networkingSGReconciler networking.SecurityGroupReconciler, networkingSGManager networking.SecurityGroupManager, elbv2TaggingManager elbv2deploy.TaggingManager, subnetResolver networking.SubnetsResolver, vpcInfoProvider networking.VPCInfoProvider, backendSGProvider networking.BackendSGProvider, sgResolver networking.SecurityGroupResolver, logger logr.Logger, metricsCollector lbcmetrics.MetricCollector, reconcileCounters *metricsutil.ReconcileCounters, targetGroupCollector awsmetrics.TargetGroupCollector) Reconciler {
56+
return newGatewayReconciler(constants.NLBGatewayController, elbv2model.LoadBalancerTypeNetwork, controllerConfig.NLBGatewayMaxConcurrentReconciles, constants.NLBGatewayTagPrefix, shared_constants.NLBGatewayFinalizer, routeLoader, referenceCounter, routeutils.L4RouteFilter, cloud, k8sClient, eventRecorder, controllerConfig, finalizerManager, networkingSGReconciler, networkingManager, networkingSGManager, elbv2TaggingManager, subnetResolver, vpcInfoProvider, backendSGProvider, sgResolver, nlbAddons, logger, metricsCollector, reconcileCounters.IncrementNLBGateway, targetGroupCollector)
5657
}
5758

5859
// NewALBGatewayReconciler constructs a gateway reconciler to handle specifically for ALB gateways
59-
func NewALBGatewayReconciler(routeLoader routeutils.Loader, cloud services.Cloud, k8sClient client.Client, referenceCounter referencecounter.ServiceReferenceCounter, eventRecorder record.EventRecorder, controllerConfig config.ControllerConfig, finalizerManager k8s.FinalizerManager, networkingManager networking.NetworkingManager, networkingSGReconciler networking.SecurityGroupReconciler, networkingSGManager networking.SecurityGroupManager, elbv2TaggingManager elbv2deploy.TaggingManager, subnetResolver networking.SubnetsResolver, vpcInfoProvider networking.VPCInfoProvider, backendSGProvider networking.BackendSGProvider, sgResolver networking.SecurityGroupResolver, logger logr.Logger, metricsCollector lbcmetrics.MetricCollector, reconcileCounters *metricsutil.ReconcileCounters) Reconciler {
60-
return newGatewayReconciler(constants.ALBGatewayController, elbv2model.LoadBalancerTypeApplication, controllerConfig.ALBGatewayMaxConcurrentReconciles, constants.ALBGatewayTagPrefix, shared_constants.ALBGatewayFinalizer, routeLoader, referenceCounter, routeutils.L7RouteFilter, cloud, k8sClient, eventRecorder, controllerConfig, finalizerManager, networkingSGReconciler, networkingManager, networkingSGManager, elbv2TaggingManager, subnetResolver, vpcInfoProvider, backendSGProvider, sgResolver, albAddons, logger, metricsCollector, reconcileCounters.IncrementALBGateway)
60+
func NewALBGatewayReconciler(routeLoader routeutils.Loader, cloud services.Cloud, k8sClient client.Client, referenceCounter referencecounter.ServiceReferenceCounter, eventRecorder record.EventRecorder, controllerConfig config.ControllerConfig, finalizerManager k8s.FinalizerManager, networkingManager networking.NetworkingManager, networkingSGReconciler networking.SecurityGroupReconciler, networkingSGManager networking.SecurityGroupManager, elbv2TaggingManager elbv2deploy.TaggingManager, subnetResolver networking.SubnetsResolver, vpcInfoProvider networking.VPCInfoProvider, backendSGProvider networking.BackendSGProvider, sgResolver networking.SecurityGroupResolver, logger logr.Logger, metricsCollector lbcmetrics.MetricCollector, reconcileCounters *metricsutil.ReconcileCounters, targetGroupCollector awsmetrics.TargetGroupCollector) Reconciler {
61+
return newGatewayReconciler(constants.ALBGatewayController, elbv2model.LoadBalancerTypeApplication, controllerConfig.ALBGatewayMaxConcurrentReconciles, constants.ALBGatewayTagPrefix, shared_constants.ALBGatewayFinalizer, routeLoader, referenceCounter, routeutils.L7RouteFilter, cloud, k8sClient, eventRecorder, controllerConfig, finalizerManager, networkingSGReconciler, networkingManager, networkingSGManager, elbv2TaggingManager, subnetResolver, vpcInfoProvider, backendSGProvider, sgResolver, albAddons, logger, metricsCollector, reconcileCounters.IncrementALBGateway, targetGroupCollector)
6162
}
6263

6364
// newGatewayReconciler constructs a reconciler that responds to gateway object changes
@@ -68,13 +69,13 @@ func newGatewayReconciler(controllerName string, lbType elbv2model.LoadBalancerT
6869
networkingManager networking.NetworkingManager, networkingSGManager networking.SecurityGroupManager, elbv2TaggingManager elbv2deploy.TaggingManager,
6970
subnetResolver networking.SubnetsResolver, vpcInfoProvider networking.VPCInfoProvider, backendSGProvider networking.BackendSGProvider,
7071
sgResolver networking.SecurityGroupResolver, supportedAddons []addon.Addon, logger logr.Logger, metricsCollector lbcmetrics.MetricCollector,
71-
reconcileTracker func(namespaceName types.NamespacedName)) Reconciler {
72+
reconcileTracker func(namespaceName types.NamespacedName), targetGroupCollector awsmetrics.TargetGroupCollector) Reconciler {
7273

7374
trackingProvider := tracking.NewDefaultProvider(gatewayTagPrefix, controllerConfig.ClusterName)
7475
modelBuilder := gatewaymodel.NewModelBuilder(subnetResolver, vpcInfoProvider, cloud.VpcID(), lbType, trackingProvider, elbv2TaggingManager, controllerConfig, cloud.EC2(), cloud.ELBV2(), cloud.ACM(), controllerConfig.FeatureGates, controllerConfig.ClusterName, controllerConfig.DefaultTags, sets.New(controllerConfig.ExternalManagedTags...), controllerConfig.DefaultSSLPolicy, controllerConfig.DefaultTargetType, controllerConfig.DefaultLoadBalancerScheme, backendSGProvider, sgResolver, controllerConfig.EnableBackendSecurityGroup, controllerConfig.DisableRestrictedSGRules, controllerConfig.IngressConfig.AllowedCertificateAuthorityARNs, supportedAddons, logger)
7576

7677
stackMarshaller := deploy.NewDefaultStackMarshaller()
77-
stackDeployer := deploy.NewDefaultStackDeployer(cloud, k8sClient, networkingManager, networkingSGManager, networkingSGReconciler, elbv2TaggingManager, controllerConfig, gatewayTagPrefix, logger, metricsCollector, controllerName)
78+
stackDeployer := deploy.NewDefaultStackDeployer(cloud, k8sClient, networkingManager, networkingSGManager, networkingSGReconciler, elbv2TaggingManager, controllerConfig, gatewayTagPrefix, logger, metricsCollector, controllerName, targetGroupCollector)
7879

7980
cfgResolver := newGatewayConfigResolver()
8081

controllers/ingress/group_controller.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package ingress
33
import (
44
"context"
55
"fmt"
6+
awsmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/aws"
67

78
"sigs.k8s.io/controller-runtime/pkg/reconcile"
89

@@ -52,7 +53,8 @@ func NewGroupReconciler(cloud services.Cloud, k8sClient client.Client, eventReco
5253
finalizerManager k8s.FinalizerManager, networkingSGManager networkingpkg.SecurityGroupManager,
5354
networkingManager networkingpkg.NetworkingManager, networkingSGReconciler networkingpkg.SecurityGroupReconciler, subnetsResolver networkingpkg.SubnetsResolver,
5455
elbv2TaggingManager elbv2deploy.TaggingManager, controllerConfig config.ControllerConfig, backendSGProvider networkingpkg.BackendSGProvider,
55-
sgResolver networkingpkg.SecurityGroupResolver, logger logr.Logger, metricsCollector lbcmetrics.MetricCollector, reconcileCounters *metricsutil.ReconcileCounters) *groupReconciler {
56+
sgResolver networkingpkg.SecurityGroupResolver, logger logr.Logger, metricsCollector lbcmetrics.MetricCollector, reconcileCounters *metricsutil.ReconcileCounters,
57+
targetGroupCollector awsmetrics.TargetGroupCollector) *groupReconciler {
5658

5759
annotationParser := annotations.NewSuffixAnnotationParser(annotations.AnnotationPrefixIngress)
5860
authConfigBuilder := ingress.NewDefaultAuthConfigBuilder(annotationParser)
@@ -68,7 +70,7 @@ func NewGroupReconciler(cloud services.Cloud, k8sClient client.Client, eventReco
6870
controllerConfig.EnableBackendSecurityGroup, controllerConfig.EnableManageBackendSecurityGroupRules, controllerConfig.DisableRestrictedSGRules, controllerConfig.IngressConfig.AllowedCertificateAuthorityARNs, controllerConfig.FeatureGates.Enabled(config.EnableIPTargetType), logger, metricsCollector)
6971
stackMarshaller := deploy.NewDefaultStackMarshaller()
7072
stackDeployer := deploy.NewDefaultStackDeployer(cloud, k8sClient, networkingManager, networkingSGManager, networkingSGReconciler, elbv2TaggingManager,
71-
controllerConfig, ingressTagPrefix, logger, metricsCollector, controllerName)
73+
controllerConfig, ingressTagPrefix, logger, metricsCollector, controllerName, targetGroupCollector)
7274
classLoader := ingress.NewDefaultClassLoader(k8sClient, true)
7375
classAnnotationMatcher := ingress.NewDefaultClassAnnotationMatcher(controllerConfig.IngressConfig.IngressClass)
7476
manageIngressesWithoutIngressClass := controllerConfig.IngressConfig.IngressClass == ""

controllers/service/service_controller.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package service
33
import (
44
"context"
55
"fmt"
6+
awsmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/aws"
67
"sigs.k8s.io/aws-load-balancer-controller/pkg/shared_constants"
78

89
"sigs.k8s.io/controller-runtime/pkg/reconcile"
@@ -43,7 +44,8 @@ func NewServiceReconciler(cloud services.Cloud, k8sClient client.Client, eventRe
4344
finalizerManager k8s.FinalizerManager, networkingManager networking.NetworkingManager, networkingSGManager networking.SecurityGroupManager,
4445
networkingSGReconciler networking.SecurityGroupReconciler, subnetsResolver networking.SubnetsResolver,
4546
vpcInfoProvider networking.VPCInfoProvider, elbv2TaggingManager elbv2deploy.TaggingManager, controllerConfig config.ControllerConfig,
46-
backendSGProvider networking.BackendSGProvider, sgResolver networking.SecurityGroupResolver, logger logr.Logger, metricsCollector lbcmetrics.MetricCollector, reconcileCounters *metricsutil.ReconcileCounters) *serviceReconciler {
47+
backendSGProvider networking.BackendSGProvider, sgResolver networking.SecurityGroupResolver, logger logr.Logger, metricsCollector lbcmetrics.MetricCollector, reconcileCounters *metricsutil.ReconcileCounters,
48+
targetGroupCollector awsmetrics.TargetGroupCollector) *serviceReconciler {
4749

4850
annotationParser := annotations.NewSuffixAnnotationParser(serviceAnnotationPrefix)
4951
trackingProvider := tracking.NewDefaultProvider(serviceTagPrefix, controllerConfig.ClusterName)
@@ -53,7 +55,7 @@ func NewServiceReconciler(cloud services.Cloud, k8sClient client.Client, eventRe
5355
controllerConfig.DefaultSSLPolicy, controllerConfig.DefaultTargetType, controllerConfig.DefaultLoadBalancerScheme, controllerConfig.FeatureGates.Enabled(config.EnableIPTargetType), serviceUtils,
5456
backendSGProvider, sgResolver, controllerConfig.EnableBackendSecurityGroup, controllerConfig.EnableManageBackendSecurityGroupRules, controllerConfig.DisableRestrictedSGRules, logger, metricsCollector, controllerConfig.FeatureGates.Enabled(config.EnableTCPUDPListenerType))
5557
stackMarshaller := deploy.NewDefaultStackMarshaller()
56-
stackDeployer := deploy.NewDefaultStackDeployer(cloud, k8sClient, networkingManager, networkingSGManager, networkingSGReconciler, elbv2TaggingManager, controllerConfig, serviceTagPrefix, logger, metricsCollector, controllerName)
58+
stackDeployer := deploy.NewDefaultStackDeployer(cloud, k8sClient, networkingManager, networkingSGManager, networkingSGReconciler, elbv2TaggingManager, controllerConfig, serviceTagPrefix, logger, metricsCollector, controllerName, targetGroupCollector)
5759
return &serviceReconciler{
5860
k8sClient: k8sClient,
5961
eventRecorder: eventRecorder,

docs/guide/metrics/prometheus/index.md

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -41,19 +41,20 @@ Following metrics are added:
4141
|------|------|-------------|
4242
| aws_api_calls_total | Counter | Total number of SDK API calls from the customer's code to AWS services |
4343
| aws_api_call_duration_seconds | Histogram | Perceived latency from when your code makes an SDK call, includes retries |
44-
| aws_api_call_call_retries | Counter | Number of times the SDK retried requests to AWS services for SDK API calls |
45-
| aws_api_requests_total | Counter | Total number of HTTP requests that the SDK made |
44+
| aws_api_call_call_retries | Counter | Number of times the SDK retried requests to AWS services for SDK API calls |
45+
| aws_api_requests_total | Counter | Total number of HTTP requests that the SDK made |
4646
| aws_request_duration_seconds | Histogram | Latency of an individual HTTP request to the service endpoint |
47-
| api_call_permission_errors_total | Counter | Number of failed AWS API calls due to auth or authorization failures |
48-
| api_call_service_limit_exceeded_errors_total | Counter | Number of failed AWS API calls due to exceeding service limit |
49-
| api_call_throttled_errors_total | Counter| Number of failed AWS API calls due to throttling error |
50-
| api_call_validation_errors_total | Counter | Number of failed AWS API calls due to validation error |
47+
| api_call_permission_errors_total | Counter | Number of failed AWS API calls due to auth or authorization failures |
48+
| api_call_service_limit_exceeded_errors_total | Counter | Number of failed AWS API calls due to exceeding service limit |
49+
| api_call_throttled_errors_total | Counter | Number of failed AWS API calls due to throttling error |
50+
| api_call_validation_errors_total | Counter | Number of failed AWS API calls due to validation error |
51+
| aws_target_group_info | Gauge | Information about target group |
5152
| awslbc_readiness_gate_ready_seconds | Histogram | Time to flip a readiness gate to true |
5253
| awslbc_reconcile_stage_duration | Histogram | Latency of different reconcile stages |
53-
| awslbc_reconcile_errors_total | Counter | Number of controller errors by error type |
54-
| awslbc_webhook_validation_failures_total | Counter | Number of validation errors by webhook type |
55-
| awslbc_webhook_mutation_failures_total | Counter | Number of mutation errors by webhook type |
56-
| awslbc_top_talkers | Gauge | Number of reconciliations by resource |
54+
| awslbc_reconcile_errors_total | Counter | Number of controller errors by error type |
55+
| awslbc_webhook_validation_failures_total | Counter | Number of validation errors by webhook type |
56+
| awslbc_webhook_mutation_failures_total | Counter | Number of mutation errors by webhook type |
57+
| awslbc_top_talkers | Gauge | Number of reconciliations by resource |
5758

5859

5960
## Accessing and Querying the Metrics in Prometheus UI
@@ -70,7 +71,7 @@ Once inside the Prometheus UI, you can use PromQL queries. Here are some example
7071
* Get the total reconcile count : `sum(awslbc_controller_reconcile_errors_total)`
7172
* Get the average reconcile duration for stage : `avg(awslbc_controller_reconcile_stage_duration_sum{controller="service", reconcile_stage="DNS_resolve"})`
7273
* Get the cached object: `sum(awslbc_cache_object_total)`
73-
74+
* Enrich metrics with information about target group: `aws_target_group_info * on(target_group) group_left last_over_time(aws_applicationelb_healthy_host_count_minimum[20m])`
7475

7576

7677
## Visualizing Metrics

main.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ type gatewayControllerConfig struct {
105105
reconcileCounters *metricsutil.ReconcileCounters
106106
serviceReferenceCounter referencecounter.ServiceReferenceCounter
107107
networkingManager networking.NetworkingManager
108+
targetGroupCollector awsmetrics.TargetGroupCollector
108109
}
109110

110111
func main() {
@@ -148,6 +149,7 @@ func main() {
148149

149150
reconcileCounters := metricsutil.NewReconcileCounters()
150151
lbcMetricsCollector := lbcmetrics.NewCollector(metrics.Registry, mgr, reconcileCounters, ctrl.Log.WithName("controller_metrics"))
152+
targetGroupCollector := awsmetrics.NewTargetGroupCollector(metrics.Registry)
151153

152154
clientSet, err := kubernetes.NewForConfig(mgr.GetConfig())
153155
if err != nil {
@@ -186,10 +188,12 @@ func main() {
186188
elbv2TaggingManager := elbv2deploy.NewDefaultTaggingManager(cloud.ELBV2(), cloud.VpcID(), controllerCFG.FeatureGates, cloud.RGT(), ctrl.Log)
187189
ingGroupReconciler := ingress.NewGroupReconciler(cloud, mgr.GetClient(), mgr.GetEventRecorderFor("ingress"),
188190
finalizerManager, sgManager, networkingManager, sgReconciler, subnetResolver, elbv2TaggingManager,
189-
controllerCFG, backendSGProvider, sgResolver, ctrl.Log.WithName("controllers").WithName("ingress"), lbcMetricsCollector, reconcileCounters)
191+
controllerCFG, backendSGProvider, sgResolver, ctrl.Log.WithName("controllers").WithName("ingress"), lbcMetricsCollector, reconcileCounters,
192+
targetGroupCollector)
190193
svcReconciler := service.NewServiceReconciler(cloud, mgr.GetClient(), mgr.GetEventRecorderFor("service"),
191194
finalizerManager, networkingManager, sgManager, sgReconciler, subnetResolver, vpcInfoProvider, elbv2TaggingManager,
192-
controllerCFG, backendSGProvider, sgResolver, ctrl.Log.WithName("controllers").WithName("service"), lbcMetricsCollector, reconcileCounters)
195+
controllerCFG, backendSGProvider, sgResolver, ctrl.Log.WithName("controllers").WithName("service"), lbcMetricsCollector, reconcileCounters,
196+
targetGroupCollector)
193197

194198
delayingQueue := workqueue.NewDelayingQueueWithConfig(workqueue.DelayingQueueConfig{
195199
Name: "delayed-target-group-binding",
@@ -244,6 +248,7 @@ func main() {
244248
reconcileCounters: reconcileCounters,
245249
networkingManager: networkingManager,
246250
serviceReferenceCounter: serviceReferenceCounter,
251+
targetGroupCollector: targetGroupCollector,
247252
}
248253

249254
enabledControllers := sets.Set[string]{}
@@ -448,6 +453,7 @@ func setupGatewayController(ctx context.Context, mgr ctrl.Manager, cfg *gatewayC
448453
logger,
449454
cfg.metricsCollector,
450455
cfg.reconcileCounters,
456+
cfg.targetGroupCollector,
451457
)
452458
case gateway_constants.ALBGatewayController:
453459
reconciler = gateway.NewALBGatewayReconciler(
@@ -469,6 +475,7 @@ func setupGatewayController(ctx context.Context, mgr ctrl.Manager, cfg *gatewayC
469475
logger,
470476
cfg.metricsCollector,
471477
cfg.reconcileCounters,
478+
cfg.targetGroupCollector,
472479
)
473480
default:
474481
return fmt.Errorf("unknown controller type: %s", controllerType)

0 commit comments

Comments
 (0)