Skip to content

Commit 60ac5e8

Browse files
Copilotmathetake
andcommitted
Remove Envoy Gateway-specific workarounds and focus on upgrade test configuration
Co-authored-by: mathetake <[email protected]>
1 parent f818ed4 commit 60ac5e8

File tree

1 file changed

+19
-197
lines changed

1 file changed

+19
-197
lines changed

tests/internal/e2elib/e2elib.go

Lines changed: 19 additions & 197 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,8 @@ import (
99
"bytes"
1010
"cmp"
1111
"context"
12-
"crypto/rand"
13-
"crypto/rsa"
14-
"crypto/x509"
15-
"crypto/x509/pkix"
16-
"encoding/base64"
1712
"encoding/json"
18-
"encoding/pem"
1913
"fmt"
20-
"math/big"
2114
"net"
2215
"net/http"
2316
"os"
@@ -71,7 +64,7 @@ type TestMainConfig struct {
7164
// When the inferenceExtension flag is set to true, it also installs the Inference Extension and the
7265
// Inference Pool resources, and the Envoy Gateway configuration which are required for the tests.
7366
func TestMain(m *testing.M, config TestMainConfig) {
74-
ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(10*time.Minute))
67+
ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(5*time.Minute))
7568

7669
// The following code sets up the kind cluster, installs the Envoy Gateway, and installs the AI Gateway.
7770
// They must be idempotent and can be run multiple times so that we can run the tests multiple times on
@@ -160,11 +153,6 @@ func initKindCluster(ctx context.Context) (err error) {
160153
return
161154
}
162155

163-
initLog("\tWaiting for API server to be ready")
164-
if err = waitForAPIServerReady(ctx); err != nil {
165-
return
166-
}
167-
168156
initLog("\tLoading Docker images into kind cluster")
169157
for _, image := range []string{
170158
"docker.io/envoyproxy/ai-gateway-controller:latest",
@@ -181,31 +169,6 @@ func initKindCluster(ctx context.Context) (err error) {
181169
return nil
182170
}
183171

184-
func waitForAPIServerReady(ctx context.Context) error {
185-
initLog("\t\tWaiting for API server to be available")
186-
187-
// Try for up to 2 minutes
188-
timeout := time.After(2 * time.Minute)
189-
ticker := time.NewTicker(5 * time.Second)
190-
defer ticker.Stop()
191-
192-
for {
193-
select {
194-
case <-timeout:
195-
return fmt.Errorf("timeout waiting for API server to be ready")
196-
case <-ticker.C:
197-
cmd := Kubectl(ctx, "cluster-info")
198-
cmd.Stdout = nil
199-
cmd.Stderr = nil
200-
if err := cmd.Run(); err == nil {
201-
initLog("\t\tAPI server is ready")
202-
return nil
203-
}
204-
initLog("\t\tAPI server not ready yet, retrying...")
205-
}
206-
}
207-
}
208-
209172
func initMetalLB(ctx context.Context) (err error) {
210173
initLog("Installing MetalLB")
211174
start := time.Now()
@@ -437,19 +400,13 @@ func initEnvoyGateway(ctx context.Context, inferenceExtension bool) (err error)
437400
initLog("\tHelm Install")
438401
helm := exec.CommandContext(ctx, "go", "tool", "helm", "upgrade", "-i", "eg",
439402
"oci://docker.io/envoyproxy/gateway-helm", "--version", egVersion,
440-
"-n", "envoy-gateway-system", "--create-namespace", "--no-hooks")
403+
"-n", "envoy-gateway-system", "--create-namespace")
441404
helm.Stdout = os.Stdout
442405
helm.Stderr = os.Stderr
443406
if err = helm.Run(); err != nil {
444407
return
445408
}
446409

447-
// Create webhook certificates since we skipped hooks
448-
initLog("\tCreating webhook certificates")
449-
if err = createEnvoyGatewayWebhookCerts(ctx); err != nil {
450-
return fmt.Errorf("failed to create webhook certificates: %w", err)
451-
}
452-
453410
initLog("\tApplying Patch for Envoy Gateway")
454411
if err = KubectlApplyManifest(ctx, "../../manifests/envoy-gateway-config/"); err != nil {
455412
return
@@ -464,19 +421,12 @@ func initEnvoyGateway(ctx context.Context, inferenceExtension bool) (err error)
464421
if err = kubectlRestartDeployment(ctx, "envoy-gateway-system", "envoy-gateway"); err != nil {
465422
return
466423
}
467-
468-
// Only wait for ratelimit deployment if it exists (it may not exist when hooks are skipped)
469-
if deploymentExists("envoy-gateway-system", "envoy-ratelimit") {
470-
initLog("\tWaiting for Ratelimit deployment to be ready")
471-
if err = kubectlWaitForDeploymentReady("envoy-gateway-system", "envoy-ratelimit"); err != nil {
472-
return
473-
}
474-
} else {
475-
initLog("\tSkipping ratelimit deployment wait (not created)")
424+
initLog("\tWaiting for Ratelimit deployment to be ready")
425+
if err = kubectlWaitForDeploymentReady("envoy-gateway-system", "envoy-ratelimit"); err != nil {
426+
return
476427
}
477428
initLog("\tWaiting for Envoy Gateway deployment to be ready")
478-
// In constrained environments, use lenient readiness check
479-
return kubectlWaitForDeploymentReadyLenient("envoy-gateway-system", "envoy-gateway")
429+
return kubectlWaitForDeploymentReady("envoy-gateway-system", "envoy-gateway")
480430
}
481431

482432
func initAIGateway(ctx context.Context, aiGatewayHelmFlags []string) (err error) {
@@ -514,11 +464,11 @@ func initAIGateway(ctx context.Context, aiGatewayHelmFlags []string) (err error)
514464
if err = kubectlRestartDeployment(ctx, "envoy-ai-gateway-system", "ai-gateway-controller"); err != nil {
515465
return
516466
}
517-
return kubectlWaitForDeploymentReadyLenient("envoy-ai-gateway-system", "ai-gateway-controller")
467+
return kubectlWaitForDeploymentReady("envoy-ai-gateway-system", "ai-gateway-controller")
518468
}
519469

520-
// initAIGatewayFromRegistry initializes the AI Gateway from the OCI registry with the specified version.
521-
// This is used for upgrade testing where we start with a released version and upgrade to local charts.
470+
// initAIGatewayFromRegistry installs AI Gateway from the registry using the specified version.
471+
// This is used for upgrade testing to install a specific released version.
522472
func initAIGatewayFromRegistry(ctx context.Context, version string) (err error) {
523473
initLog("Installing AI Gateway from registry")
524474
start := time.Now()
@@ -548,7 +498,7 @@ func initAIGatewayFromRegistry(ctx context.Context, version string) (err error)
548498
}
549499

550500
initLog("\tWaiting for AI Gateway controller to be ready")
551-
return kubectlWaitForDeploymentReadyLenient("envoy-ai-gateway-system", "ai-gateway-controller")
501+
return kubectlWaitForDeploymentReady("envoy-ai-gateway-system", "ai-gateway-controller")
552502
}
553503

554504
// UpgradeAIGatewayToLocal upgrades the AI Gateway from registry version to local charts.
@@ -558,24 +508,24 @@ func UpgradeAIGatewayToLocal(ctx context.Context, aiGatewayHelmFlags []string) (
558508
start := time.Now()
559509
defer func() {
560510
elapsed := time.Since(start)
561-
initLog(fmt.Sprintf("\tdone (took %.2fs in total)", elapsed.Seconds()))
511+
initLog(fmt.Sprintf("\tdone (took %.2fs in total)\n", elapsed.Seconds()))
562512
}()
563513

564-
initLog("\tHelm Upgrade CRDs to local")
514+
initLog("\tUpgrading CRDs to local charts")
565515
helmCRD := exec.CommandContext(ctx, "go", "tool", "helm", "upgrade", "-i", "ai-eg-crd",
566516
"../../manifests/charts/ai-gateway-crds-helm",
567-
"-n", "envoy-ai-gateway-system")
517+
"-n", "envoy-ai-gateway-system", "--create-namespace")
568518
helmCRD.Stdout = os.Stdout
569519
helmCRD.Stderr = os.Stderr
570520
if err = helmCRD.Run(); err != nil {
571521
return
572522
}
573523

574-
initLog("\tHelm Upgrade AI Gateway to local")
524+
initLog("\tUpgrading AI Gateway to local charts")
575525
args := []string{
576526
"tool", "helm", "upgrade", "-i", "ai-eg",
577527
"../../manifests/charts/ai-gateway-helm",
578-
"-n", "envoy-ai-gateway-system",
528+
"-n", "envoy-ai-gateway-system", "--create-namespace",
579529
}
580530
args = append(args, aiGatewayHelmFlags...)
581531

@@ -586,12 +536,13 @@ func UpgradeAIGatewayToLocal(ctx context.Context, aiGatewayHelmFlags []string) (
586536
return
587537
}
588538

589-
// Restart the controller to pick up the new changes in the AI Gateway.
590-
initLog("\tRestart AI Gateway controller")
539+
initLog("\tRestarting AI Gateway controller")
591540
if err = kubectlRestartDeployment(ctx, "envoy-ai-gateway-system", "ai-gateway-controller"); err != nil {
592541
return
593542
}
594-
return kubectlWaitForDeploymentReadyLenient("envoy-ai-gateway-system", "ai-gateway-controller")
543+
544+
initLog("\tWaiting for AI Gateway controller to be ready")
545+
return kubectlWaitForDeploymentReady("envoy-ai-gateway-system", "ai-gateway-controller")
595546
}
596547

597548
func initPrometheus(ctx context.Context) (err error) {
@@ -764,132 +715,3 @@ func (f PortForwarder) Kill() {
764715
func (f PortForwarder) Address() string {
765716
return fmt.Sprintf("http://127.0.0.1:%d", f.localPort)
766717
}
767-
768-
// createEnvoyGatewayWebhookCerts creates the TLS certificates needed for Envoy Gateway webhooks
769-
// when the cert generation hooks are skipped.
770-
func createEnvoyGatewayWebhookCerts(ctx context.Context) error {
771-
// Create a simple self-signed certificate for the webhook
772-
certPEM, keyPEM, err := generateSelfSignedCert("envoy-gateway.envoy-gateway-system.svc", "envoy-gateway-system")
773-
if err != nil {
774-
return fmt.Errorf("failed to generate self-signed certificate: %w", err)
775-
}
776-
777-
// Create the TLS secret
778-
secretManifest := fmt.Sprintf(`
779-
apiVersion: v1
780-
kind: Secret
781-
metadata:
782-
name: envoy-gateway
783-
namespace: envoy-gateway-system
784-
type: kubernetes.io/tls
785-
data:
786-
tls.crt: %s
787-
tls.key: %s
788-
`, base64.StdEncoding.EncodeToString(certPEM), base64.StdEncoding.EncodeToString(keyPEM))
789-
790-
return KubectlApplyManifestStdin(ctx, secretManifest)
791-
}
792-
793-
// generateSelfSignedCert generates a self-signed certificate for the given service name and namespace.
794-
func generateSelfSignedCert(serviceName, namespace string) (certPEM, keyPEM []byte, err error) {
795-
// Generate a private key
796-
priv, err := rsa.GenerateKey(rand.Reader, 2048)
797-
if err != nil {
798-
return nil, nil, fmt.Errorf("failed to generate private key: %w", err)
799-
}
800-
801-
// Create certificate template
802-
template := x509.Certificate{
803-
SerialNumber: big.NewInt(1),
804-
Subject: pkix.Name{
805-
Organization: []string{"Envoy Gateway"},
806-
Country: []string{"US"},
807-
Province: []string{""},
808-
Locality: []string{""},
809-
StreetAddress: []string{""},
810-
PostalCode: []string{""},
811-
},
812-
NotBefore: time.Now(),
813-
NotAfter: time.Now().Add(365 * 24 * time.Hour), // Valid for 1 year
814-
KeyUsage: x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature,
815-
ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth},
816-
IPAddresses: []net.IP{net.IPv4(127, 0, 0, 1)},
817-
DNSNames: []string{serviceName, fmt.Sprintf("%s.%s", serviceName, namespace)},
818-
}
819-
820-
// Create the certificate
821-
certDER, err := x509.CreateCertificate(rand.Reader, &template, &template, &priv.PublicKey, priv)
822-
if err != nil {
823-
return nil, nil, fmt.Errorf("failed to create certificate: %w", err)
824-
}
825-
826-
// Encode certificate to PEM
827-
certPEM = pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: certDER})
828-
829-
// Encode private key to PEM
830-
privDER, err := x509.MarshalPKCS8PrivateKey(priv)
831-
if err != nil {
832-
return nil, nil, fmt.Errorf("failed to marshal private key: %w", err)
833-
}
834-
keyPEM = pem.EncodeToMemory(&pem.Block{Type: "PRIVATE KEY", Bytes: privDER})
835-
836-
return certPEM, keyPEM, nil
837-
}
838-
839-
// deploymentExists checks if a deployment exists in the given namespace.
840-
func deploymentExists(namespace, name string) bool {
841-
cmd := exec.Command("kubectl", "get", "deployment", name, "-n", namespace, "--ignore-not-found")
842-
output, err := cmd.Output()
843-
if err != nil {
844-
return false
845-
}
846-
return len(strings.TrimSpace(string(output))) > 0
847-
}
848-
849-
// kubectlWaitForDeploymentReadyLenient waits for a deployment to have running pods,
850-
// which is more suitable for constrained environments where pods may not pass readiness checks
851-
// due to networking constraints.
852-
func kubectlWaitForDeploymentReadyLenient(namespace, deployment string) error {
853-
// First wait for the deployment to be created
854-
cmd := Kubectl(context.Background(), "wait", "--timeout=2m", "-n", namespace,
855-
"deployment/"+deployment, "--for=create")
856-
if err := cmd.Run(); err != nil {
857-
return fmt.Errorf("error waiting for deployment %s creation in namespace %s: %w", deployment, namespace, err)
858-
}
859-
860-
// Get the deployment selector to wait for pods
861-
selectorCmd := Kubectl(context.Background(), "get", "deployment", deployment, "-n", namespace,
862-
"-o", "jsonpath={.spec.selector.matchLabels}")
863-
selectorCmd.Stdout = nil // Ensure we can capture output
864-
selectorOutput, err := selectorCmd.Output()
865-
if err != nil {
866-
return fmt.Errorf("error getting deployment selector for %s in namespace %s: %w", deployment, namespace, err)
867-
}
868-
869-
// Convert selector map to kubectl selector format
870-
selector := "app.kubernetes.io/instance=ai-eg,app.kubernetes.io/name=ai-gateway-helm" // Default for ai-gateway
871-
if strings.Contains(string(selectorOutput), "control-plane") {
872-
selector = "control-plane=envoy-gateway" // For envoy-gateway
873-
}
874-
875-
// Then wait for pods to be running (not necessarily ready)
876-
waitCmd := Kubectl(context.Background(), "wait", "--timeout=3m", "-n", namespace,
877-
"pod", "--selector="+selector, "--for=condition=Running")
878-
if err := waitCmd.Run(); err != nil {
879-
// If pods aren't running, at least check if deployment has desired replicas
880-
initLog("\t\tPods not running, checking if deployment has replicas...")
881-
statusCmd := Kubectl(context.Background(), "get", "deployment", deployment, "-n", namespace,
882-
"-o", "jsonpath={.status.replicas}")
883-
statusCmd.Stdout = nil // Ensure we can capture output
884-
output, err := statusCmd.Output()
885-
if err != nil {
886-
return fmt.Errorf("error checking deployment status for %s in namespace %s: %w", deployment, namespace, err)
887-
}
888-
if strings.TrimSpace(string(output)) == "0" || len(strings.TrimSpace(string(output))) == 0 {
889-
return fmt.Errorf("deployment %s in namespace %s has no replicas", deployment, namespace)
890-
}
891-
initLog("\t\tDeployment has replicas, proceeding despite readiness issues")
892-
}
893-
894-
return nil
895-
}

0 commit comments

Comments
 (0)