@@ -36,14 +36,28 @@ def pytorch_create(version, config):
36
36
job_name = config .get ("name" )
37
37
namespace = config .get ("namespace" )
38
38
spec = config .get ("spec" )
39
- # Create job with or without namespace
40
- if namespace is None :
41
- job = HyperPodPytorchJob (metadata = Metadata (name = job_name ), spec = spec )
42
- else :
43
- job = HyperPodPytorchJob (
44
- metadata = Metadata (name = job_name , namespace = namespace ), spec = spec
45
- )
46
39
40
+ # Prepare metadata
41
+ metadata_kwargs = {"name" : job_name }
42
+ if namespace :
43
+ metadata_kwargs ["namespace" ] = namespace
44
+
45
+ # Prepare job kwargs
46
+ job_kwargs = {
47
+ "metadata" : Metadata (** metadata_kwargs ),
48
+ "replica_specs" : spec .get ("replica_specs" , [])
49
+ }
50
+
51
+ # Add nproc_per_node if present
52
+ if "nproc_per_node" in spec :
53
+ job_kwargs ["nproc_per_node" ] = spec ["nproc_per_node" ]
54
+
55
+ # Add run_policy if present
56
+ if "run_policy" in spec :
57
+ job_kwargs ["run_policy" ] = spec ["run_policy" ]
58
+
59
+ # Create job
60
+ job = HyperPodPytorchJob (** job_kwargs )
47
61
job .create ()
48
62
49
63
except Exception as e :
@@ -138,16 +152,14 @@ def pytorch_describe(job_name: str, namespace: str):
138
152
click .echo ("=" * 80 )
139
153
click .echo (f"Name: { job .metadata .name } " )
140
154
click .echo (f"Namespace: { job .metadata .namespace } " )
141
- click .echo (f"API Version: { job .apiVersion } " )
142
- click .echo (f"Kind: { job .kind } " )
143
155
144
156
# Print Spec details
145
157
click .echo ("\n Spec:" )
146
158
click .echo ("-" * 80 )
147
- click .echo (f"Processes per Node: { job .spec . nprocPerNode } " )
159
+ click .echo (f"Processes per Node: { job .nprocPerNode } " )
148
160
149
161
# Print Replica Specs
150
- for replica in job .spec . replicaSpecs :
162
+ for replica in job .replicaSpecs :
151
163
click .echo (f"\n Replica Spec:" )
152
164
click .echo (f" Name: { replica .name } " )
153
165
click .echo (f" Replicas: { replica .replicas } " )
@@ -169,9 +181,9 @@ def pytorch_describe(job_name: str, namespace: str):
169
181
# Print Run Policy
170
182
click .echo ("\n Run Policy:" )
171
183
click .echo ("-" * 80 )
172
- click .echo (f"Clean Pod Policy: { job .spec . runPolicy .cleanPodPolicy } " )
184
+ click .echo (f"Clean Pod Policy: { job .runPolicy .cleanPodPolicy } " )
173
185
click .echo (
174
- f"TTL Seconds After Finished: { job .spec . runPolicy .ttlSecondsAfterFinished } "
186
+ f"TTL Seconds After Finished: { job .runPolicy .ttlSecondsAfterFinished } "
175
187
)
176
188
177
189
# Print Status
0 commit comments