@@ -9,15 +9,8 @@ import (
9
9
"bytes"
10
10
"cmp"
11
11
"context"
12
- "crypto/rand"
13
- "crypto/rsa"
14
- "crypto/x509"
15
- "crypto/x509/pkix"
16
- "encoding/base64"
17
12
"encoding/json"
18
- "encoding/pem"
19
13
"fmt"
20
- "math/big"
21
14
"net"
22
15
"net/http"
23
16
"os"
@@ -71,7 +64,7 @@ type TestMainConfig struct {
71
64
// When the inferenceExtension flag is set to true, it also installs the Inference Extension and the
72
65
// Inference Pool resources, and the Envoy Gateway configuration which are required for the tests.
73
66
func TestMain (m * testing.M , config TestMainConfig ) {
74
- ctx , cancel := context .WithDeadline (context .Background (), time .Now ().Add (10 * time .Minute ))
67
+ ctx , cancel := context .WithDeadline (context .Background (), time .Now ().Add (5 * time .Minute ))
75
68
76
69
// The following code sets up the kind cluster, installs the Envoy Gateway, and installs the AI Gateway.
77
70
// They must be idempotent and can be run multiple times so that we can run the tests multiple times on
@@ -160,11 +153,6 @@ func initKindCluster(ctx context.Context) (err error) {
160
153
return
161
154
}
162
155
163
- initLog ("\t Waiting for API server to be ready" )
164
- if err = waitForAPIServerReady (ctx ); err != nil {
165
- return
166
- }
167
-
168
156
initLog ("\t Loading Docker images into kind cluster" )
169
157
for _ , image := range []string {
170
158
"docker.io/envoyproxy/ai-gateway-controller:latest" ,
@@ -181,31 +169,6 @@ func initKindCluster(ctx context.Context) (err error) {
181
169
return nil
182
170
}
183
171
184
- func waitForAPIServerReady (ctx context.Context ) error {
185
- initLog ("\t \t Waiting for API server to be available" )
186
-
187
- // Try for up to 2 minutes
188
- timeout := time .After (2 * time .Minute )
189
- ticker := time .NewTicker (5 * time .Second )
190
- defer ticker .Stop ()
191
-
192
- for {
193
- select {
194
- case <- timeout :
195
- return fmt .Errorf ("timeout waiting for API server to be ready" )
196
- case <- ticker .C :
197
- cmd := Kubectl (ctx , "cluster-info" )
198
- cmd .Stdout = nil
199
- cmd .Stderr = nil
200
- if err := cmd .Run (); err == nil {
201
- initLog ("\t \t API server is ready" )
202
- return nil
203
- }
204
- initLog ("\t \t API server not ready yet, retrying..." )
205
- }
206
- }
207
- }
208
-
209
172
func initMetalLB (ctx context.Context ) (err error ) {
210
173
initLog ("Installing MetalLB" )
211
174
start := time .Now ()
@@ -437,19 +400,13 @@ func initEnvoyGateway(ctx context.Context, inferenceExtension bool) (err error)
437
400
initLog ("\t Helm Install" )
438
401
helm := exec .CommandContext (ctx , "go" , "tool" , "helm" , "upgrade" , "-i" , "eg" ,
439
402
"oci://docker.io/envoyproxy/gateway-helm" , "--version" , egVersion ,
440
- "-n" , "envoy-gateway-system" , "--create-namespace" , "--no-hooks" )
403
+ "-n" , "envoy-gateway-system" , "--create-namespace" )
441
404
helm .Stdout = os .Stdout
442
405
helm .Stderr = os .Stderr
443
406
if err = helm .Run (); err != nil {
444
407
return
445
408
}
446
409
447
- // Create webhook certificates since we skipped hooks
448
- initLog ("\t Creating webhook certificates" )
449
- if err = createEnvoyGatewayWebhookCerts (ctx ); err != nil {
450
- return fmt .Errorf ("failed to create webhook certificates: %w" , err )
451
- }
452
-
453
410
initLog ("\t Applying Patch for Envoy Gateway" )
454
411
if err = KubectlApplyManifest (ctx , "../../manifests/envoy-gateway-config/" ); err != nil {
455
412
return
@@ -464,19 +421,12 @@ func initEnvoyGateway(ctx context.Context, inferenceExtension bool) (err error)
464
421
if err = kubectlRestartDeployment (ctx , "envoy-gateway-system" , "envoy-gateway" ); err != nil {
465
422
return
466
423
}
467
-
468
- // Only wait for ratelimit deployment if it exists (it may not exist when hooks are skipped)
469
- if deploymentExists ("envoy-gateway-system" , "envoy-ratelimit" ) {
470
- initLog ("\t Waiting for Ratelimit deployment to be ready" )
471
- if err = kubectlWaitForDeploymentReady ("envoy-gateway-system" , "envoy-ratelimit" ); err != nil {
472
- return
473
- }
474
- } else {
475
- initLog ("\t Skipping ratelimit deployment wait (not created)" )
424
+ initLog ("\t Waiting for Ratelimit deployment to be ready" )
425
+ if err = kubectlWaitForDeploymentReady ("envoy-gateway-system" , "envoy-ratelimit" ); err != nil {
426
+ return
476
427
}
477
428
initLog ("\t Waiting for Envoy Gateway deployment to be ready" )
478
- // In constrained environments, use lenient readiness check
479
- return kubectlWaitForDeploymentReadyLenient ("envoy-gateway-system" , "envoy-gateway" )
429
+ return kubectlWaitForDeploymentReady ("envoy-gateway-system" , "envoy-gateway" )
480
430
}
481
431
482
432
func initAIGateway (ctx context.Context , aiGatewayHelmFlags []string ) (err error ) {
@@ -514,11 +464,11 @@ func initAIGateway(ctx context.Context, aiGatewayHelmFlags []string) (err error)
514
464
if err = kubectlRestartDeployment (ctx , "envoy-ai-gateway-system" , "ai-gateway-controller" ); err != nil {
515
465
return
516
466
}
517
- return kubectlWaitForDeploymentReadyLenient ("envoy-ai-gateway-system" , "ai-gateway-controller" )
467
+ return kubectlWaitForDeploymentReady ("envoy-ai-gateway-system" , "ai-gateway-controller" )
518
468
}
519
469
520
- // initAIGatewayFromRegistry initializes the AI Gateway from the OCI registry with the specified version.
521
- // This is used for upgrade testing where we start with a released version and upgrade to local charts .
470
+ // initAIGatewayFromRegistry installs AI Gateway from the registry using the specified version.
471
+ // This is used for upgrade testing to install a specific released version.
522
472
func initAIGatewayFromRegistry (ctx context.Context , version string ) (err error ) {
523
473
initLog ("Installing AI Gateway from registry" )
524
474
start := time .Now ()
@@ -548,7 +498,7 @@ func initAIGatewayFromRegistry(ctx context.Context, version string) (err error)
548
498
}
549
499
550
500
initLog ("\t Waiting for AI Gateway controller to be ready" )
551
- return kubectlWaitForDeploymentReadyLenient ("envoy-ai-gateway-system" , "ai-gateway-controller" )
501
+ return kubectlWaitForDeploymentReady ("envoy-ai-gateway-system" , "ai-gateway-controller" )
552
502
}
553
503
554
504
// UpgradeAIGatewayToLocal upgrades the AI Gateway from registry version to local charts.
@@ -558,24 +508,24 @@ func UpgradeAIGatewayToLocal(ctx context.Context, aiGatewayHelmFlags []string) (
558
508
start := time .Now ()
559
509
defer func () {
560
510
elapsed := time .Since (start )
561
- initLog (fmt .Sprintf ("\t done (took %.2fs in total)" , elapsed .Seconds ()))
511
+ initLog (fmt .Sprintf ("\t done (took %.2fs in total)\n " , elapsed .Seconds ()))
562
512
}()
563
513
564
- initLog ("\t Helm Upgrade CRDs to local" )
514
+ initLog ("\t Upgrading CRDs to local charts " )
565
515
helmCRD := exec .CommandContext (ctx , "go" , "tool" , "helm" , "upgrade" , "-i" , "ai-eg-crd" ,
566
516
"../../manifests/charts/ai-gateway-crds-helm" ,
567
- "-n" , "envoy-ai-gateway-system" )
517
+ "-n" , "envoy-ai-gateway-system" , "--create-namespace" )
568
518
helmCRD .Stdout = os .Stdout
569
519
helmCRD .Stderr = os .Stderr
570
520
if err = helmCRD .Run (); err != nil {
571
521
return
572
522
}
573
523
574
- initLog ("\t Helm Upgrade AI Gateway to local" )
524
+ initLog ("\t Upgrading AI Gateway to local charts " )
575
525
args := []string {
576
526
"tool" , "helm" , "upgrade" , "-i" , "ai-eg" ,
577
527
"../../manifests/charts/ai-gateway-helm" ,
578
- "-n" , "envoy-ai-gateway-system" ,
528
+ "-n" , "envoy-ai-gateway-system" , "--create-namespace" ,
579
529
}
580
530
args = append (args , aiGatewayHelmFlags ... )
581
531
@@ -586,12 +536,13 @@ func UpgradeAIGatewayToLocal(ctx context.Context, aiGatewayHelmFlags []string) (
586
536
return
587
537
}
588
538
589
- // Restart the controller to pick up the new changes in the AI Gateway.
590
- initLog ("\t Restart AI Gateway controller" )
539
+ initLog ("\t Restarting AI Gateway controller" )
591
540
if err = kubectlRestartDeployment (ctx , "envoy-ai-gateway-system" , "ai-gateway-controller" ); err != nil {
592
541
return
593
542
}
594
- return kubectlWaitForDeploymentReadyLenient ("envoy-ai-gateway-system" , "ai-gateway-controller" )
543
+
544
+ initLog ("\t Waiting for AI Gateway controller to be ready" )
545
+ return kubectlWaitForDeploymentReady ("envoy-ai-gateway-system" , "ai-gateway-controller" )
595
546
}
596
547
597
548
func initPrometheus (ctx context.Context ) (err error ) {
@@ -764,132 +715,3 @@ func (f PortForwarder) Kill() {
764
715
func (f PortForwarder ) Address () string {
765
716
return fmt .Sprintf ("http://127.0.0.1:%d" , f .localPort )
766
717
}
767
-
768
- // createEnvoyGatewayWebhookCerts creates the TLS certificates needed for Envoy Gateway webhooks
769
- // when the cert generation hooks are skipped.
770
- func createEnvoyGatewayWebhookCerts (ctx context.Context ) error {
771
- // Create a simple self-signed certificate for the webhook
772
- certPEM , keyPEM , err := generateSelfSignedCert ("envoy-gateway.envoy-gateway-system.svc" , "envoy-gateway-system" )
773
- if err != nil {
774
- return fmt .Errorf ("failed to generate self-signed certificate: %w" , err )
775
- }
776
-
777
- // Create the TLS secret
778
- secretManifest := fmt .Sprintf (`
779
- apiVersion: v1
780
- kind: Secret
781
- metadata:
782
- name: envoy-gateway
783
- namespace: envoy-gateway-system
784
- type: kubernetes.io/tls
785
- data:
786
- tls.crt: %s
787
- tls.key: %s
788
- ` , base64 .StdEncoding .EncodeToString (certPEM ), base64 .StdEncoding .EncodeToString (keyPEM ))
789
-
790
- return KubectlApplyManifestStdin (ctx , secretManifest )
791
- }
792
-
793
- // generateSelfSignedCert generates a self-signed certificate for the given service name and namespace.
794
- func generateSelfSignedCert (serviceName , namespace string ) (certPEM , keyPEM []byte , err error ) {
795
- // Generate a private key
796
- priv , err := rsa .GenerateKey (rand .Reader , 2048 )
797
- if err != nil {
798
- return nil , nil , fmt .Errorf ("failed to generate private key: %w" , err )
799
- }
800
-
801
- // Create certificate template
802
- template := x509.Certificate {
803
- SerialNumber : big .NewInt (1 ),
804
- Subject : pkix.Name {
805
- Organization : []string {"Envoy Gateway" },
806
- Country : []string {"US" },
807
- Province : []string {"" },
808
- Locality : []string {"" },
809
- StreetAddress : []string {"" },
810
- PostalCode : []string {"" },
811
- },
812
- NotBefore : time .Now (),
813
- NotAfter : time .Now ().Add (365 * 24 * time .Hour ), // Valid for 1 year
814
- KeyUsage : x509 .KeyUsageKeyEncipherment | x509 .KeyUsageDigitalSignature ,
815
- ExtKeyUsage : []x509.ExtKeyUsage {x509 .ExtKeyUsageServerAuth },
816
- IPAddresses : []net.IP {net .IPv4 (127 , 0 , 0 , 1 )},
817
- DNSNames : []string {serviceName , fmt .Sprintf ("%s.%s" , serviceName , namespace )},
818
- }
819
-
820
- // Create the certificate
821
- certDER , err := x509 .CreateCertificate (rand .Reader , & template , & template , & priv .PublicKey , priv )
822
- if err != nil {
823
- return nil , nil , fmt .Errorf ("failed to create certificate: %w" , err )
824
- }
825
-
826
- // Encode certificate to PEM
827
- certPEM = pem .EncodeToMemory (& pem.Block {Type : "CERTIFICATE" , Bytes : certDER })
828
-
829
- // Encode private key to PEM
830
- privDER , err := x509 .MarshalPKCS8PrivateKey (priv )
831
- if err != nil {
832
- return nil , nil , fmt .Errorf ("failed to marshal private key: %w" , err )
833
- }
834
- keyPEM = pem .EncodeToMemory (& pem.Block {Type : "PRIVATE KEY" , Bytes : privDER })
835
-
836
- return certPEM , keyPEM , nil
837
- }
838
-
839
- // deploymentExists checks if a deployment exists in the given namespace.
840
- func deploymentExists (namespace , name string ) bool {
841
- cmd := exec .Command ("kubectl" , "get" , "deployment" , name , "-n" , namespace , "--ignore-not-found" )
842
- output , err := cmd .Output ()
843
- if err != nil {
844
- return false
845
- }
846
- return len (strings .TrimSpace (string (output ))) > 0
847
- }
848
-
849
- // kubectlWaitForDeploymentReadyLenient waits for a deployment to have running pods,
850
- // which is more suitable for constrained environments where pods may not pass readiness checks
851
- // due to networking constraints.
852
- func kubectlWaitForDeploymentReadyLenient (namespace , deployment string ) error {
853
- // First wait for the deployment to be created
854
- cmd := Kubectl (context .Background (), "wait" , "--timeout=2m" , "-n" , namespace ,
855
- "deployment/" + deployment , "--for=create" )
856
- if err := cmd .Run (); err != nil {
857
- return fmt .Errorf ("error waiting for deployment %s creation in namespace %s: %w" , deployment , namespace , err )
858
- }
859
-
860
- // Get the deployment selector to wait for pods
861
- selectorCmd := Kubectl (context .Background (), "get" , "deployment" , deployment , "-n" , namespace ,
862
- "-o" , "jsonpath={.spec.selector.matchLabels}" )
863
- selectorCmd .Stdout = nil // Ensure we can capture output
864
- selectorOutput , err := selectorCmd .Output ()
865
- if err != nil {
866
- return fmt .Errorf ("error getting deployment selector for %s in namespace %s: %w" , deployment , namespace , err )
867
- }
868
-
869
- // Convert selector map to kubectl selector format
870
- selector := "app.kubernetes.io/instance=ai-eg,app.kubernetes.io/name=ai-gateway-helm" // Default for ai-gateway
871
- if strings .Contains (string (selectorOutput ), "control-plane" ) {
872
- selector = "control-plane=envoy-gateway" // For envoy-gateway
873
- }
874
-
875
- // Then wait for pods to be running (not necessarily ready)
876
- waitCmd := Kubectl (context .Background (), "wait" , "--timeout=3m" , "-n" , namespace ,
877
- "pod" , "--selector=" + selector , "--for=condition=Running" )
878
- if err := waitCmd .Run (); err != nil {
879
- // If pods aren't running, at least check if deployment has desired replicas
880
- initLog ("\t \t Pods not running, checking if deployment has replicas..." )
881
- statusCmd := Kubectl (context .Background (), "get" , "deployment" , deployment , "-n" , namespace ,
882
- "-o" , "jsonpath={.status.replicas}" )
883
- statusCmd .Stdout = nil // Ensure we can capture output
884
- output , err := statusCmd .Output ()
885
- if err != nil {
886
- return fmt .Errorf ("error checking deployment status for %s in namespace %s: %w" , deployment , namespace , err )
887
- }
888
- if strings .TrimSpace (string (output )) == "0" || len (strings .TrimSpace (string (output ))) == 0 {
889
- return fmt .Errorf ("deployment %s in namespace %s has no replicas" , deployment , namespace )
890
- }
891
- initLog ("\t \t Deployment has replicas, proceeding despite readiness issues" )
892
- }
893
-
894
- return nil
895
- }
0 commit comments