@@ -32,9 +32,13 @@ import (
3232)
3333
3434// log is for logging in this package.
35- var rayclusterlog = logf .Log .WithName ("raycluster-resource" )
35+ var (
36+ rayclusterlog = logf .Log .WithName ("raycluster-resource" )
37+ baseDomain string = ""
38+ )
3639
37- func SetupRayClusterWebhookWithManager (mgr ctrl.Manager , cfg * config.KubeRayConfiguration ) error {
40+ func SetupRayClusterWebhookWithManager (mgr ctrl.Manager , cfg * config.KubeRayConfiguration , domain string ) error {
41+ baseDomain = domain
3842 return ctrl .NewWebhookManagedBy (mgr ).
3943 For (& rayv1.RayCluster {}).
4044 WithDefaulter (& rayClusterDefaulter {
@@ -55,80 +59,226 @@ var _ webhook.CustomDefaulter = &rayClusterDefaulter{}
5559func (r * rayClusterDefaulter ) Default (ctx context.Context , obj runtime.Object ) error {
5660 raycluster := obj .(* rayv1.RayCluster )
5761
62+ oauthExists := false
63+ initHeadExists := false
64+ initWorkerExists := false
65+
66+ // Check for the create-cert Init Containers
67+ for _ , container := range raycluster .Spec .HeadGroupSpec .Template .Spec .InitContainers {
68+ if container .Name == "create-cert" {
69+ rayclusterlog .V (2 ).Info ("Head Init Containers already exist, no patch needed" )
70+ initHeadExists = true
71+ break // exits the for loop
72+ }
73+ }
74+ // Check fot the create-cert Init Container WorkerGroupSpec
75+ for _ , container := range raycluster .Spec .WorkerGroupSpecs [0 ].Template .Spec .InitContainers {
76+ if container .Name == "create-cert" {
77+ rayclusterlog .V (2 ).Info ("Worker Init Containers already exist, no patch needed" )
78+ initWorkerExists = true
79+ break // exits the for loop
80+ }
81+ }
82+
5883 if ! pointer .BoolDeref (r .Config .RayDashboardOAuthEnabled , true ) {
84+ // Still need to call init container patch even if oauth is disabled
85+ mtlsPatch (raycluster , initHeadExists , initWorkerExists )
5986 return nil
6087 }
6188
6289 // Check and add OAuth proxy if it does not exist
6390 for _ , container := range raycluster .Spec .HeadGroupSpec .Template .Spec .Containers {
6491 if container .Name == "oauth-proxy" {
6592 rayclusterlog .V (2 ).Info ("OAuth sidecar already exists, no patch needed" )
66- return nil
93+ oauthExists = true
6794 }
6895 }
96+ if ! oauthExists {
97+ rayclusterlog .V (2 ).Info ("Adding OAuth sidecar container" )
98+ // definition of the new container
99+ newOAuthSidecar := corev1.Container {
100+ Name : "oauth-proxy" ,
101+ Image : "registry.redhat.io/openshift4/ose-oauth-proxy@sha256:1ea6a01bf3e63cdcf125c6064cbd4a4a270deaf0f157b3eabb78f60556840366" ,
102+ Ports : []corev1.ContainerPort {
103+ {ContainerPort : 8443 , Name : "oauth-proxy" },
104+ },
105+ Args : []string {
106+ "--https-address=:8443" ,
107+ "--provider=openshift" ,
108+ "--openshift-service-account=" + raycluster .Name + "-oauth-proxy" ,
109+ "--upstream=http://localhost:8265" ,
110+ "--tls-cert=/etc/tls/private/tls.crt" ,
111+ "--tls-key=/etc/tls/private/tls.key" ,
112+ "--cookie-secret=$(COOKIE_SECRET)" ,
113+ "--openshift-delegate-urls={\" /\" :{\" resource\" :\" pods\" ,\" namespace\" :\" default\" ,\" verb\" :\" get\" }}" ,
114+ },
115+ VolumeMounts : []corev1.VolumeMount {
116+ {
117+ Name : "proxy-tls-secret" ,
118+ MountPath : "/etc/tls/private" ,
119+ ReadOnly : true ,
120+ },
121+ },
122+ }
69123
70- rayclusterlog .V (2 ).Info ("Adding OAuth sidecar container" )
71- // definition of the new container
72- newOAuthSidecar := corev1.Container {
73- Name : "oauth-proxy" ,
74- Image : "registry.redhat.io/openshift4/ose-oauth-proxy@sha256:1ea6a01bf3e63cdcf125c6064cbd4a4a270deaf0f157b3eabb78f60556840366" ,
75- Ports : []corev1.ContainerPort {
76- {ContainerPort : 8443 , Name : "oauth-proxy" },
77- },
78- Args : []string {
79- "--https-address=:8443" ,
80- "--provider=openshift" ,
81- "--openshift-service-account=" + raycluster .Name + "-oauth-proxy" ,
82- "--upstream=http://localhost:8265" ,
83- "--tls-cert=/etc/tls/private/tls.crt" ,
84- "--tls-key=/etc/tls/private/tls.key" ,
85- "--cookie-secret=$(COOKIE_SECRET)" ,
86- "--openshift-delegate-urls={\" /\" :{\" resource\" :\" pods\" ,\" namespace\" :\" default\" ,\" verb\" :\" get\" }}" ,
87- },
88- VolumeMounts : []corev1.VolumeMount {
89- {
90- Name : "proxy-tls-secret" ,
91- MountPath : "/etc/tls/private" ,
92- ReadOnly : true ,
124+ // Adding the new OAuth sidecar container
125+ raycluster .Spec .HeadGroupSpec .Template .Spec .Containers = append (raycluster .Spec .HeadGroupSpec .Template .Spec .Containers , newOAuthSidecar )
126+
127+ cookieSecret := corev1.EnvVar {
128+ Name : "COOKIE_SECRET" ,
129+ ValueFrom : & corev1.EnvVarSource {
130+ SecretKeyRef : & corev1.SecretKeySelector {
131+ LocalObjectReference : corev1.LocalObjectReference {
132+ Name : raycluster .Name + "-oauth-config" ,
133+ },
134+ Key : "cookie_secret" ,
135+ },
93136 },
137+ }
138+
139+ raycluster .Spec .HeadGroupSpec .Template .Spec .Containers [0 ].Env = append (
140+ raycluster .Spec .HeadGroupSpec .Template .Spec .Containers [0 ].Env ,
141+ cookieSecret ,
142+ )
143+
144+ tlsSecretVolume := corev1.Volume {
145+ Name : "proxy-tls-secret" ,
146+ VolumeSource : corev1.VolumeSource {
147+ Secret : & corev1.SecretVolumeSource {
148+ SecretName : raycluster .Name + "-proxy-tls-secret" ,
149+ },
150+ },
151+ }
152+
153+ raycluster .Spec .HeadGroupSpec .Template .Spec .Volumes = append (raycluster .Spec .HeadGroupSpec .Template .Spec .Volumes , tlsSecretVolume )
154+
155+ // Ensure the service account is set
156+ if raycluster .Spec .HeadGroupSpec .Template .Spec .ServiceAccountName == "" {
157+ raycluster .Spec .HeadGroupSpec .Template .Spec .ServiceAccountName = raycluster .Name + "-oauth-proxy"
158+ }
159+ }
160+
161+ mtlsPatch (raycluster , initHeadExists , initWorkerExists )
162+ return nil
163+ }
164+
165+ func mtlsPatch (raycluster * rayv1.RayCluster , initHeadExists bool , initWorkerExists bool ) {
166+
167+ rayclusterlog .V (2 ).Info ("creating json patch for RayCluster initContainers" )
168+
169+ // Volume Mounts for the Init Containers
170+ key_volumes := []corev1.VolumeMount {
171+ {
172+ Name : "ca-vol" ,
173+ MountPath : "/home/ray/workspace/ca" ,
174+ ReadOnly : true ,
175+ },
176+ {
177+ Name : "server-cert" ,
178+ MountPath : "/home/ray/workspace/tls" ,
179+ ReadOnly : false ,
94180 },
95181 }
96182
97- // Adding the new OAuth sidecar container
98- raycluster .Spec .HeadGroupSpec .Template .Spec .Containers = append (raycluster .Spec .HeadGroupSpec .Template .Spec .Containers , newOAuthSidecar )
183+ // Service name for basic interactive
184+ svcDomain := raycluster .Name + "-head-svc." + raycluster .Namespace + ".svc"
185+ // Ca Secret generated by the SDK
186+ secretName := `ca-secret-` + raycluster .Name
99187
100- cookieSecret := corev1.EnvVar {
101- Name : "COOKIE_SECRET" ,
102- ValueFrom : & corev1.EnvVarSource {
103- SecretKeyRef : & corev1.SecretKeySelector {
104- LocalObjectReference : corev1.LocalObjectReference {
105- Name : raycluster .Name + "-oauth-config" ,
188+ // Env variables for Worker & Head Containers
189+ envList := []corev1.EnvVar {
190+ {
191+ Name : "MY_POD_IP" ,
192+ ValueFrom : & corev1.EnvVarSource {
193+ FieldRef : & corev1.ObjectFieldSelector {
194+ FieldPath : "status.podIP" ,
106195 },
107- Key : "cookie_secret" ,
108196 },
109197 },
198+ {
199+ Name : "RAY_USE_TLS" ,
200+ Value : "1" ,
201+ },
202+ {
203+ Name : "RAY_TLS_SERVER_CERT" ,
204+ Value : "/home/ray/workspace/tls/server.crt" ,
205+ },
206+ {
207+ Name : "RAY_TLS_SERVER_KEY" ,
208+ Value : "/home/ray/workspace/tls/server.key" ,
209+ },
210+ {
211+ Name : "RAY_TLS_CA_CERT" ,
212+ Value : "/home/ray/workspace/tls/ca.crt" ,
213+ },
110214 }
111215
112- raycluster .Spec .HeadGroupSpec .Template .Spec .Containers [0 ].Env = append (
113- raycluster .Spec .HeadGroupSpec .Template .Spec .Containers [0 ].Env ,
114- cookieSecret ,
115- )
116-
117- tlsSecretVolume := corev1.Volume {
118- Name : "proxy-tls-secret" ,
119- VolumeSource : corev1.VolumeSource {
120- Secret : & corev1.SecretVolumeSource {
121- SecretName : raycluster .Name + "-proxy-tls-secret" ,
216+ // Volumes for the main container of Head and worker
217+ caVolumes := []corev1.Volume {
218+ {
219+ Name : "ca-vol" ,
220+ VolumeSource : corev1.VolumeSource {
221+ Secret : & corev1.SecretVolumeSource {
222+ SecretName : secretName ,
223+ },
224+ },
225+ },
226+ {
227+ Name : "server-cert" ,
228+ VolumeSource : corev1.VolumeSource {
229+ EmptyDir : & corev1.EmptyDirVolumeSource {},
122230 },
123231 },
124232 }
125233
126- raycluster .Spec .HeadGroupSpec .Template .Spec .Volumes = append (raycluster .Spec .HeadGroupSpec .Template .Spec .Volumes , tlsSecretVolume )
234+ if ! initHeadExists {
235+ rayClientRoute := "rayclient-" + raycluster .Name + "-" + raycluster .Namespace + "." + baseDomain
236+ initContainerHead := corev1.Container {
237+ Name : "create-cert" ,
238+ Image : "quay.io/project-codeflare/ray:latest-py39-cu118" ,
239+ Command : []string {
240+ "sh" ,
241+ "-c" ,
242+ `cd /home/ray/workspace/tls && openssl req -nodes -newkey rsa:2048 -keyout server.key -out server.csr -subj '/CN=ray-head' && printf "authorityKeyIdentifier=keyid,issuer\nbasicConstraints=CA:FALSE\nsubjectAltName = @alt_names\n[alt_names]\nDNS.1 = 127.0.0.1\nDNS.2 = localhost\nDNS.3 = ${FQ_RAY_IP}\nDNS.4 = $(awk 'END{print $1}' /etc/hosts)\nDNS.5 = ` + rayClientRoute + `\nDNS.6 = ` + svcDomain + `">./domain.ext && cp /home/ray/workspace/ca/* . && openssl x509 -req -CA ca.crt -CAkey ca.key -in server.csr -out server.crt -days 365 -CAcreateserial -extfile domain.ext` ,
243+ },
244+ VolumeMounts : key_volumes ,
245+ }
246+
247+ // Append the list of environment variables for the ray-head container
248+ for index , container := range raycluster .Spec .HeadGroupSpec .Template .Spec .Containers {
249+ if container .Name == "ray-head" {
250+ raycluster .Spec .HeadGroupSpec .Template .Spec .Containers [index ].Env = append (raycluster .Spec .HeadGroupSpec .Template .Spec .Containers [index ].Env , envList ... )
251+ }
252+ }
253+
254+ // Append the create-cert Init Container
255+ raycluster .Spec .HeadGroupSpec .Template .Spec .InitContainers = append (raycluster .Spec .HeadGroupSpec .Template .Spec .InitContainers , initContainerHead )
127256
128- // Ensure the service account is set
129- if raycluster .Spec .HeadGroupSpec .Template .Spec .ServiceAccountName == "" {
130- raycluster .Spec .HeadGroupSpec .Template .Spec .ServiceAccountName = raycluster .Name + "-oauth-proxy"
257+ // Append the CA volumes
258+ raycluster .Spec .HeadGroupSpec .Template .Spec .Volumes = append (raycluster .Spec .HeadGroupSpec .Template .Spec .Volumes , caVolumes ... )
131259 }
132260
133- return nil
261+ if ! initWorkerExists {
262+ initContainerWorker := corev1.Container {
263+ Name : "create-cert" ,
264+ Image : "quay.io/project-codeflare/ray:latest-py39-cu118" ,
265+ Command : []string {
266+ "sh" ,
267+ "-c" ,
268+ `cd /home/ray/workspace/tls && openssl req -nodes -newkey rsa:2048 -keyout server.key -out server.csr -subj '/CN=ray-head' && printf "authorityKeyIdentifier=keyid,issuer\nbasicConstraints=CA:FALSE\nsubjectAltName = @alt_names\n[alt_names]\nDNS.1 = 127.0.0.1\nDNS.2 = localhost\nDNS.3 = ${FQ_RAY_IP}\nDNS.4 = $(awk 'END{print $1}' /etc/hosts)">./domain.ext && cp /home/ray/workspace/ca/* . && openssl x509 -req -CA ca.crt -CAkey ca.key -in server.csr -out server.crt -days 365 -CAcreateserial -extfile domain.ext` ,
269+ },
270+ VolumeMounts : key_volumes ,
271+ }
272+ // Append the CA volumes
273+ raycluster .Spec .WorkerGroupSpecs [0 ].Template .Spec .Volumes = append (raycluster .Spec .WorkerGroupSpecs [0 ].Template .Spec .Volumes , caVolumes ... )
274+ // Append the create-cert Init Container
275+ raycluster .Spec .WorkerGroupSpecs [0 ].Template .Spec .InitContainers = append (raycluster .Spec .WorkerGroupSpecs [0 ].Template .Spec .InitContainers , initContainerWorker )
276+
277+ // Append the list of environment variables for the machine-learning container
278+ for index , container := range raycluster .Spec .WorkerGroupSpecs [0 ].Template .Spec .Containers {
279+ if container .Name == "machine-learning" {
280+ raycluster .Spec .WorkerGroupSpecs [0 ].Template .Spec .Containers [index ].Env = append (raycluster .Spec .WorkerGroupSpecs [0 ].Template .Spec .Containers [index ].Env , envList ... )
281+ }
282+ }
283+ }
134284}
0 commit comments