Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 0 additions & 9 deletions src/sagemaker/hyperpod/cli/commands/training.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,7 @@
import click
import logging
import os
import yaml
import shutil
import subprocess
from pathlib import Path
from sagemaker.hyperpod.training.hyperpod_pytorch_job import HyperPodPytorchJob
from sagemaker.hyperpod.common.config import Metadata
import tempfile
from typing import List, Dict, Any, Optional, Callable, get_args, get_origin, Literal
from sagemaker.hyperpod.cli.training_utils import generate_click_command
from importlib.metadata import entry_points
from hyperpod_pytorch_job_template.registry import SCHEMA_REGISTRY
from sagemaker.hyperpod.common.telemetry.telemetry_logging import (
_hyperpod_telemetry_emitter,
Expand Down
8 changes: 4 additions & 4 deletions src/sagemaker/hyperpod/training/hyperpod_pytorch_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
_HyperPodPytorchJob, HyperPodPytorchJobStatus
)
from sagemaker.hyperpod.common.config.metadata import Metadata
from kubernetes import client, config, __version__ as kubernetes_client_version
from typing import List, Optional, ClassVar, Tuple
from kubernetes import client, config
from typing import List, Optional, ClassVar
from sagemaker.hyperpod.common.utils import (
handle_exception,
get_default_namespace,
Expand Down Expand Up @@ -84,7 +84,7 @@ def create(self, debug=False):
plural=PLURAL,
body=config,
)
logger.info("Successfully submitted HyperPodPytorchJob!")
logger.info(f"Successfully submitted HyperPodPytorchJob '{self.metadata.name}'!")
except Exception as e:
logger.error(f"Failed to create HyperPodPytorchJob {self.metadata.name}!")
handle_exception(e, self.metadata.name, self.metadata.namespace)
Expand Down Expand Up @@ -131,7 +131,7 @@ def delete(self):
plural=PLURAL,
name=self.metadata.name,
)
logger.info(f"Successful deleted HyperPodPytorchJob!")
logger.info(f"Successful deleted HyperPodPytorchJob '{self.metadata.name}'!")
except Exception as e:
logger.error(f"Failed to delete HyperPodPytorchJob {self.metadata.name}!")
handle_exception(e, self.metadata.name, self.metadata.namespace)
Expand Down
10 changes: 0 additions & 10 deletions test/integration_tests/training/cli/test_cli_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,6 @@ def test_list_clusters(self, cluster_name):
"""Test listing clusters """
assert cluster_name

def test_set_cluster_context(self, cluster_name):
"""Test setting cluster context."""
result = execute_command([
"hyp", "set-cluster-context",
"--cluster-name", cluster_name
])
assert result.returncode == 0
context_line = result.stdout.strip().splitlines()[-1]
assert any(text in context_line for text in ["Updated context", "Added new context"])

def test_get_cluster_context(self):
"""Test getting current cluster context."""
result = execute_command(["hyp", "get-cluster-context"])
Expand Down
3 changes: 1 addition & 2 deletions test/integration_tests/training/sdk/test_sdk_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,9 @@ def test_list_jobs(self, pytorch_job):
job_names = [job.metadata.name for job in jobs]
assert pytorch_job.metadata.name in job_names

#
def test_refresh_job(self, pytorch_job):
pytorch_job.refresh()
time.sleep(15)
time.sleep(30)
assert pytorch_job.status is not None, "Job status should not be None"
logger.info(f"Refreshed job status:\n{yaml.dump(pytorch_job.status)}")

Expand Down
Loading