@@ -64,7 +64,7 @@ def create(self):
64
64
print ("Successful submitted HyperPodPytorchJob!" )
65
65
except Exception as e :
66
66
print (f"Failed to create HyperPodPytorchJob { self .metadata .name } !" )
67
- _handel_exception (e , self .metadata .name , self .metadata .namespace )
67
+ _handle_exception (e , self .metadata .name , self .metadata .namespace )
68
68
69
69
@classmethod
70
70
def list (cls , namespace = "default" ) -> List ["HyperPodPytorchJob" ]:
@@ -85,7 +85,7 @@ def list(cls, namespace="default") -> List["HyperPodPytorchJob"]:
85
85
return _load_hp_job_list (hp_job_list )
86
86
except Exception as e :
87
87
print (f"Failed to list HyperpodPytorchJobs!" )
88
- _handel_exception (e , "" , namespace )
88
+ _handle_exception (e , "" , namespace )
89
89
90
90
def delete (self ):
91
91
if not validate_cluster_connection ():
@@ -106,7 +106,7 @@ def delete(self):
106
106
print (f"Successful deleted HyperPodPytorchJob!" )
107
107
except Exception as e :
108
108
print (f"Failed to delete HyperPodPytorchJob { self .metadata .name } !" )
109
- _handel_exception (e , self .metadata .name , self .metadata .namespace )
109
+ _handle_exception (e , self .metadata .name , self .metadata .namespace )
110
110
111
111
@classmethod
112
112
def get (cls , name , namespace = "default" ) -> "HyperPodPytorchJob" :
@@ -128,7 +128,7 @@ def get(cls, name, namespace="default") -> "HyperPodPytorchJob":
128
128
return _load_hp_job (response )
129
129
except Exception as e :
130
130
print (f"Failed to describe HyperPodPytorchJob { name } : { e } " )
131
- _handel_exception (e , name , namespace )
131
+ _handle_exception (e , name , namespace )
132
132
133
133
def refresh (self ) -> "HyperPodPytorchJob" :
134
134
if not validate_cluster_connection ():
@@ -151,7 +151,7 @@ def refresh(self) -> "HyperPodPytorchJob":
151
151
)
152
152
except Exception as e :
153
153
print (f"Failed to refresh HyperPodPytorchJob { self .metadata .name } !" )
154
- _handel_exception (e , self .metadata .name , self .metadata .namespace )
154
+ _handle_exception (e , self .metadata .name , self .metadata .namespace )
155
155
156
156
def list_pods (self ) -> List [str ]:
157
157
if not validate_cluster_connection ():
@@ -172,28 +172,35 @@ def list_pods(self) -> List[str]:
172
172
return pods
173
173
except Exception as e :
174
174
print (f"Failed to list pod in namespace { self .metadata .namespace } !" )
175
- _handel_exception (e , self .metadata .name , self .metadata .namespace )
175
+ _handle_exception (e , self .metadata .name , self .metadata .namespace )
176
176
177
- def get_logs_from_pod (self , pod_name : str ) -> str :
177
+ def get_logs_from_pod (self , pod_name : str , container : Optional [ str ] = None ) -> str :
178
178
if not validate_cluster_connection ():
179
179
raise Exception (
180
180
"Failed to connect to the Kubernetes cluster. Please check your kubeconfig."
181
181
)
182
182
183
+ if container is None :
184
+ # If container name is not set, get logs from the first container in the pod
185
+ container = self .replicaSpecs [0 ].template .spec .containers [0 ].name
186
+
183
187
try :
184
188
config .load_kube_config ()
185
189
v1 = client .CoreV1Api ()
186
190
187
191
logs = v1 .read_namespaced_pod_log (
188
- name = pod_name , namespace = self .metadata .namespace , timestamps = True
192
+ name = pod_name ,
193
+ namespace = self .metadata .namespace ,
194
+ timestamps = True ,
195
+ container = container ,
189
196
)
190
197
return logs
191
198
except Exception as e :
192
199
print (f"Failed to get logs from pod { pod_name } !" )
193
- _handel_exception (e , self .metadata .name , self .metadata .namespace )
200
+ _handle_exception (e , self .metadata .name , self .metadata .namespace )
194
201
195
202
196
- def _handel_exception (e : Exception , name : str , namespace : str ):
203
+ def _handle_exception (e : Exception , name : str , namespace : str ):
197
204
print ("exception type" , type (e ))
198
205
if isinstance (e , ApiException ):
199
206
if e .status == 401 :
0 commit comments