Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 48 additions & 53 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -337,24 +337,21 @@ Pre-trained Jumpstart models can be gotten from https://sagemaker.readthedocs.io
from sagemaker.hyperpod.inference.config.hp_jumpstart_endpoint_config import Model, Server, SageMakerEndpoint, TlsConfig
from sagemaker.hyperpod.inference.hp_jumpstart_endpoint import HPJumpStartEndpoint

model = Model(
model_id="deepseek-llm-r1-distill-qwen-1-5b",
model_version="2.0.4"
model=Model(
model_id='deepseek-llm-r1-distill-qwen-1-5b',
model_version='2.0.4',
)

server = Server(
instance_type="ml.g5.8xlarge"
server=Server(
instance_type='ml.g5.8xlarge',
)
endpoint_name=SageMakerEndpoint(name='<my-endpoint-name>')
tls_config=TlsConfig(tls_certificate_output_s3_uri='s3://<my-tls-bucket>')

endpoint_name = SageMakerEndpoint(name="endpoint-jumpstart")

tls_config = TlsConfig(tls_certificate_output_s3_uri="s3://sample-bucket")

js_endpoint = HPJumpStartEndpoint(
js_endpoint=HPJumpStartEndpoint(
model=model,
server=server,
sage_maker_endpoint=endpoint_name,
tls_config=tls_config
tls_config=tls_config,
)

js_endpoint.create()
Expand All @@ -370,51 +367,51 @@ print(response)
```


#### Creating a Custom Inference Endpoint
#### Creating a Custom Inference Endpoint (with S3)

```
from sagemaker.hyperpod.inference.config.hp_custom_endpoint_config import Model, Server, SageMakerEndpoint, TlsConfig, EnvironmentVariables
from sagemaker.hyperpod.inference.hp_custom_endpoint import HPCustomEndpoint
from sagemaker.hyperpod.inference.config.hp_endpoint_config import CloudWatchTrigger, Dimensions, AutoScalingSpec, Metrics, S3Storage, ModelSourceConfig, TlsConfig, EnvironmentVariables, ModelInvocationPort, ModelVolumeMount, Resources, Worker
from sagemaker.hyperpod.inference.hp_endpoint import HPEndpoint

model = Model(
model_source_type="s3",
model_location="test-pytorch-job/model.tar.gz",
s3_bucket_name="my-bucket",
s3_region="us-east-2",
prefetch_enabled=True
model_source_config = ModelSourceConfig(
model_source_type='s3',
model_location="<my-model-folder-in-s3>",
s3_storage=S3Storage(
bucket_name='<my-model-artifacts-bucket>',
region='us-east-2',
),
)

server = Server(
instance_type="ml.g5.8xlarge",
image_uri="763104351884.dkr.ecr.us-east-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.4.0-tgi2.3.1-gpu-py311-cu124-ubuntu22.04-v2.0",
container_port=8080,
model_volume_mount_name="model-weights"
)
environment_variables = [
EnvironmentVariables(name="HF_MODEL_ID", value="/opt/ml/model"),
EnvironmentVariables(name="SAGEMAKER_PROGRAM", value="inference.py"),
EnvironmentVariables(name="SAGEMAKER_SUBMIT_DIRECTORY", value="/opt/ml/model/code"),
EnvironmentVariables(name="MODEL_CACHE_ROOT", value="/opt/ml/model"),
EnvironmentVariables(name="SAGEMAKER_ENV", value="1"),
]

resources = {
"requests": {"cpu": "30000m", "nvidia.com/gpu": 1, "memory": "100Gi"},
"limits": {"nvidia.com/gpu": 1}
}

env = EnvironmentVariables(
HF_MODEL_ID="/opt/ml/model",
SAGEMAKER_PROGRAM="inference.py",
SAGEMAKER_SUBMIT_DIRECTORY="/opt/ml/model/code",
MODEL_CACHE_ROOT="/opt/ml/model",
SAGEMAKER_ENV="1"
worker = Worker(
image='763104351884.dkr.ecr.us-east-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.4.0-tgi2.3.1-gpu-py311-cu124-ubuntu22.04-v2.0',
model_volume_mount=ModelVolumeMount(
name='model-weights',
),
model_invocation_port=ModelInvocationPort(container_port=8080),
resources=Resources(
requests={"cpu": "30000m", "nvidia.com/gpu": 1, "memory": "100Gi"},
limits={"nvidia.com/gpu": 1}
),
environment_variables=environment_variables,
)

endpoint_name = SageMakerEndpoint(name="endpoint-custom-pytorch")

tls_config = TlsConfig(tls_certificate_output_s3_uri="s3://sample-bucket")
tls_config=TlsConfig(tls_certificate_output_s3_uri='s3://<my-tls-bucket-name>')

custom_endpoint = HPCustomEndpoint(
model=model,
server=server,
resources=resources,
environment=env,
sage_maker_endpoint=endpoint_name,
custom_endpoint = HPEndpoint(
endpoint_name='<my-endpoint-name>',
instance_type='ml.g5.8xlarge',
model_name='deepseek15b-test-model-name',
tls_config=tls_config,
model_source_config=model_source_config,
worker=worker,
)

custom_endpoint.create()
Expand All @@ -431,19 +428,17 @@ print(response)
#### Managing an Endpoint

```
endpoint_iterator = HPJumpStartEndpoint.list()
for endpoint in endpoint_iterator:
print(endpoint.name, endpoint.status)
endpoint_list = HPEndpoint.list()
print(endpoint_list[0])

logs = js_endpoint.get_logs()
print(logs)
print(custom_endpoint.get_operator_logs(since_hours=0.5))

```

#### Deleting an Endpoint

```
js_endpoint.delete()
custom_endpoint.delete()

```

Expand Down
29 changes: 19 additions & 10 deletions examples/inference/SDK/inference-fsx-model-e2e.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,19 @@
"metadata": {},
"outputs": [],
"source": [
"from sagemaker.hyperpod.hyperpod_manager import HyperPodManager\n",
"\n",
"HyperPodManager.list_clusters(region='us-east-2')\n",
"HyperPodManager.set_context('<hyperpod-cluster-name>', region='us-east-2')"
"from sagemaker.hyperpod import list_clusters, set_cluster_context\n",
"list_clusters(region='us-east-2')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "765ef3fd",
"metadata": {},
"outputs": [],
"source": [
"# choose the HP cluster\n",
"set_cluster_context('<my-cluster>', region='us-east-2')"
]
},
{
Expand All @@ -20,7 +29,7 @@
"metadata": {},
"outputs": [],
"source": [
"from sagemaker.hyperpod.inference.config.hp_endpoint_config import CloudWatchTrigger, PrometheusTrigger, AutoScalingSpec, ModelMetrics, Metrics, FsxStorage, ModelSourceConfig, Tags, TlsConfig, ConfigMapKeyRef, FieldRef, ResourceFieldRef, SecretKeyRef, ValueFrom, EnvironmentVariables, ModelInvocationPort, ModelVolumeMount, Claims, Resources, Worker\n",
"from sagemaker.hyperpod.inference.config.hp_endpoint_config import FsxStorage, ModelSourceConfig, TlsConfig, EnvironmentVariables, ModelInvocationPort, ModelVolumeMount, Resources, Worker\n",
"from sagemaker.hyperpod.inference.hp_endpoint import HPEndpoint\n",
"import yaml\n",
"import time"
Expand All @@ -33,13 +42,13 @@
"metadata": {},
"outputs": [],
"source": [
"tls_config=TlsConfig(tls_certificate_output_s3_uri='s3://<your-tls-bucket-name>')\n",
"tls_config=TlsConfig(tls_certificate_output_s3_uri='s3://<my-tls-bucket-name>')\n",
"\n",
"model_source_config = ModelSourceConfig(\n",
" model_source_type='fsx',\n",
" model_location=\"<your-model-folder-in-fsx>\",\n",
" model_location=\"<my-model-folder-in-fsx>\",\n",
" fsx_storage=FsxStorage(\n",
" file_system_id='<your-fs-id>'\n",
" file_system_id='<my-fs-id>'\n",
" ),\n",
")\n",
"\n",
Expand Down Expand Up @@ -73,7 +82,7 @@
"outputs": [],
"source": [
"fsx_endpoint = HPEndpoint(\n",
" endpoint_name='test-endpoint-name-fsx-pysdk',\n",
" endpoint_name='<my-endpoint-name>',\n",
" instance_type='ml.g5.8xlarge',\n",
" model_name='deepseek15b-fsx-test-pysdk',\n",
" tls_config=tls_config,\n",
Expand Down Expand Up @@ -165,7 +174,7 @@
"metadata": {},
"outputs": [],
"source": [
"endpoint = HPEndpoint.get(name='<your-endpoint-name>')"
"endpoint = HPEndpoint.get(name='<my-endpoint-name>')"
]
},
{
Expand Down
30 changes: 9 additions & 21 deletions examples/inference/SDK/inference-jumpstart-e2e.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,14 @@
"## Inference Operator PySDK E2E Expereience (JumpStart model)"
]
},
{
"cell_type": "markdown",
"id": "1b3ce5c1-3c3d-4139-b7ae-042f360f3032",
"metadata": {},
"source": [
"<b>Prerequisite:</b> Data scientists should list clusters and set cluster context"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e22c86d6-0d3d-4c51-bef0-3f4c59ce111c",
"metadata": {},
"outputs": [],
"source": [
"from sagemaker.hyperpod.hyperpod_manager import HyperPodManager"
"from sagemaker.hyperpod import list_clusters, set_cluster_context"
]
},
{
Expand All @@ -33,8 +25,7 @@
"metadata": {},
"outputs": [],
"source": [
"#Set region \n",
"region = \"us-west-2\""
"list_clusters(region='us-east-2')"
]
},
{
Expand All @@ -44,8 +35,8 @@
"metadata": {},
"outputs": [],
"source": [
"# choose the HP cluster user works on\n",
"HyperPodManager.set_context('sagemaker-hyperpod-eks-cluster-demo-05-01', region=region)"
"# choose the HP cluster\n",
"set_cluster_context('<my-cluster>', region='us-east-2')"
]
},
{
Expand All @@ -67,7 +58,7 @@
"from jumpstart_public_hub_visualization_utils import get_all_public_hub_model_data\n",
"\n",
"# Load and display SageMaker public hub models\n",
"get_all_public_hub_model_data(region=\"us-west-2\")"
"get_all_public_hub_model_data(region=\"us-east-2\")"
]
},
{
Expand Down Expand Up @@ -122,8 +113,8 @@
"server=Server(\n",
" instance_type='ml.g5.8xlarge',\n",
")\n",
"endpoint_name=SageMakerEndpoint(name='deepsek7bsme-testing-jumpstart-7-1')\n",
"tls_config=TlsConfig(tls_certificate_output_s3_uri='s3://tls-bucket-inf1-beta2')\n",
"endpoint_name=SageMakerEndpoint(name='<my-endpoint-name>')\n",
"tls_config=TlsConfig(tls_certificate_output_s3_uri='s3://<my-tls-bucket>')\n",
"\n",
"# create spec\n",
"js_endpoint=HPJumpStartEndpoint(\n",
Expand Down Expand Up @@ -230,7 +221,7 @@
"outputs": [],
"source": [
"# output is similar to kubectl describe jumpstartmodel\n",
"endpoint = HPJumpStartEndpoint.get(name='deepseek-llm-r1-distill-qwen-1-5b')\n",
"endpoint = HPJumpStartEndpoint.get(name='<my-endpoint-name>')\n",
"print_yaml(endpoint)"
]
},
Expand Down Expand Up @@ -265,10 +256,7 @@
"outputs": [],
"source": [
"# get operator logs\n",
"print(js_endpoint.get_operator_logs(since_hours=1))\n",
"\n",
"# get specific pod log\n",
"# js_endpoint.get_logs(pod='pod-name')"
"print(js_endpoint.get_operator_logs(since_hours=0.1))"
]
},
{
Expand Down
Loading
Loading