Skip to content

Commit 0bcee6d

Browse files
authored
Update inferenece SDK examples (#155)
* Update inferenece SDK examples * Update readme
1 parent 8034a24 commit 0bcee6d

File tree

4 files changed

+108
-130
lines changed

4 files changed

+108
-130
lines changed

README.md

Lines changed: 48 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -337,24 +337,21 @@ Pre-trained Jumpstart models can be gotten from https://sagemaker.readthedocs.io
337337
from sagemaker.hyperpod.inference.config.hp_jumpstart_endpoint_config import Model, Server, SageMakerEndpoint, TlsConfig
338338
from sagemaker.hyperpod.inference.hp_jumpstart_endpoint import HPJumpStartEndpoint
339339

340-
model = Model(
341-
model_id="deepseek-llm-r1-distill-qwen-1-5b",
342-
model_version="2.0.4"
340+
model=Model(
341+
model_id='deepseek-llm-r1-distill-qwen-1-5b',
342+
model_version='2.0.4',
343343
)
344-
345-
server = Server(
346-
instance_type="ml.g5.8xlarge"
344+
server=Server(
345+
instance_type='ml.g5.8xlarge',
347346
)
347+
endpoint_name=SageMakerEndpoint(name='<my-endpoint-name>')
348+
tls_config=TlsConfig(tls_certificate_output_s3_uri='s3://<my-tls-bucket>')
348349

349-
endpoint_name = SageMakerEndpoint(name="endpoint-jumpstart")
350-
351-
tls_config = TlsConfig(tls_certificate_output_s3_uri="s3://sample-bucket")
352-
353-
js_endpoint = HPJumpStartEndpoint(
350+
js_endpoint=HPJumpStartEndpoint(
354351
model=model,
355352
server=server,
356353
sage_maker_endpoint=endpoint_name,
357-
tls_config=tls_config
354+
tls_config=tls_config,
358355
)
359356

360357
js_endpoint.create()
@@ -370,51 +367,51 @@ print(response)
370367
```
371368
372369
373-
#### Creating a Custom Inference Endpoint
370+
#### Creating a Custom Inference Endpoint (with S3)
374371
375372
```
376-
from sagemaker.hyperpod.inference.config.hp_custom_endpoint_config import Model, Server, SageMakerEndpoint, TlsConfig, EnvironmentVariables
377-
from sagemaker.hyperpod.inference.hp_custom_endpoint import HPCustomEndpoint
373+
from sagemaker.hyperpod.inference.config.hp_endpoint_config import CloudWatchTrigger, Dimensions, AutoScalingSpec, Metrics, S3Storage, ModelSourceConfig, TlsConfig, EnvironmentVariables, ModelInvocationPort, ModelVolumeMount, Resources, Worker
374+
from sagemaker.hyperpod.inference.hp_endpoint import HPEndpoint
378375

379-
model = Model(
380-
model_source_type="s3",
381-
model_location="test-pytorch-job/model.tar.gz",
382-
s3_bucket_name="my-bucket",
383-
s3_region="us-east-2",
384-
prefetch_enabled=True
376+
model_source_config = ModelSourceConfig(
377+
model_source_type='s3',
378+
model_location="<my-model-folder-in-s3>",
379+
s3_storage=S3Storage(
380+
bucket_name='<my-model-artifacts-bucket>',
381+
region='us-east-2',
382+
),
385383
)
386384

387-
server = Server(
388-
instance_type="ml.g5.8xlarge",
389-
image_uri="763104351884.dkr.ecr.us-east-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.4.0-tgi2.3.1-gpu-py311-cu124-ubuntu22.04-v2.0",
390-
container_port=8080,
391-
model_volume_mount_name="model-weights"
392-
)
385+
environment_variables = [
386+
EnvironmentVariables(name="HF_MODEL_ID", value="/opt/ml/model"),
387+
EnvironmentVariables(name="SAGEMAKER_PROGRAM", value="inference.py"),
388+
EnvironmentVariables(name="SAGEMAKER_SUBMIT_DIRECTORY", value="/opt/ml/model/code"),
389+
EnvironmentVariables(name="MODEL_CACHE_ROOT", value="/opt/ml/model"),
390+
EnvironmentVariables(name="SAGEMAKER_ENV", value="1"),
391+
]
393392

394-
resources = {
395-
"requests": {"cpu": "30000m", "nvidia.com/gpu": 1, "memory": "100Gi"},
396-
"limits": {"nvidia.com/gpu": 1}
397-
}
398-
399-
env = EnvironmentVariables(
400-
HF_MODEL_ID="/opt/ml/model",
401-
SAGEMAKER_PROGRAM="inference.py",
402-
SAGEMAKER_SUBMIT_DIRECTORY="/opt/ml/model/code",
403-
MODEL_CACHE_ROOT="/opt/ml/model",
404-
SAGEMAKER_ENV="1"
393+
worker = Worker(
394+
image='763104351884.dkr.ecr.us-east-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.4.0-tgi2.3.1-gpu-py311-cu124-ubuntu22.04-v2.0',
395+
model_volume_mount=ModelVolumeMount(
396+
name='model-weights',
397+
),
398+
model_invocation_port=ModelInvocationPort(container_port=8080),
399+
resources=Resources(
400+
requests={"cpu": "30000m", "nvidia.com/gpu": 1, "memory": "100Gi"},
401+
limits={"nvidia.com/gpu": 1}
402+
),
403+
environment_variables=environment_variables,
405404
)
406405

407-
endpoint_name = SageMakerEndpoint(name="endpoint-custom-pytorch")
408-
409-
tls_config = TlsConfig(tls_certificate_output_s3_uri="s3://sample-bucket")
406+
tls_config=TlsConfig(tls_certificate_output_s3_uri='s3://<my-tls-bucket-name>')
410407

411-
custom_endpoint = HPCustomEndpoint(
412-
model=model,
413-
server=server,
414-
resources=resources,
415-
environment=env,
416-
sage_maker_endpoint=endpoint_name,
408+
custom_endpoint = HPEndpoint(
409+
endpoint_name='<my-endpoint-name>',
410+
instance_type='ml.g5.8xlarge',
411+
model_name='deepseek15b-test-model-name',
417412
tls_config=tls_config,
413+
model_source_config=model_source_config,
414+
worker=worker,
418415
)
419416

420417
custom_endpoint.create()
@@ -431,19 +428,17 @@ print(response)
431428
#### Managing an Endpoint
432429
433430
```
434-
endpoint_iterator = HPJumpStartEndpoint.list()
435-
for endpoint in endpoint_iterator:
436-
print(endpoint.name, endpoint.status)
431+
endpoint_list = HPEndpoint.list()
432+
print(endpoint_list[0])
437433

438-
logs = js_endpoint.get_logs()
439-
print(logs)
434+
print(custom_endpoint.get_operator_logs(since_hours=0.5))
440435

441436
```
442437
443438
#### Deleting an Endpoint
444439
445440
```
446-
js_endpoint.delete()
441+
custom_endpoint.delete()
447442

448443
```
449444

examples/inference/SDK/inference-fsx-model-e2e.ipynb

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,19 @@
77
"metadata": {},
88
"outputs": [],
99
"source": [
10-
"from sagemaker.hyperpod.hyperpod_manager import HyperPodManager\n",
11-
"\n",
12-
"HyperPodManager.list_clusters(region='us-east-2')\n",
13-
"HyperPodManager.set_context('<hyperpod-cluster-name>', region='us-east-2')"
10+
"from sagemaker.hyperpod import list_clusters, set_cluster_context\n",
11+
"list_clusters(region='us-east-2')"
12+
]
13+
},
14+
{
15+
"cell_type": "code",
16+
"execution_count": null,
17+
"id": "765ef3fd",
18+
"metadata": {},
19+
"outputs": [],
20+
"source": [
21+
"# choose the HP cluster\n",
22+
"set_cluster_context('<my-cluster>', region='us-east-2')"
1423
]
1524
},
1625
{
@@ -20,7 +29,7 @@
2029
"metadata": {},
2130
"outputs": [],
2231
"source": [
23-
"from sagemaker.hyperpod.inference.config.hp_endpoint_config import CloudWatchTrigger, PrometheusTrigger, AutoScalingSpec, ModelMetrics, Metrics, FsxStorage, ModelSourceConfig, Tags, TlsConfig, ConfigMapKeyRef, FieldRef, ResourceFieldRef, SecretKeyRef, ValueFrom, EnvironmentVariables, ModelInvocationPort, ModelVolumeMount, Claims, Resources, Worker\n",
32+
"from sagemaker.hyperpod.inference.config.hp_endpoint_config import FsxStorage, ModelSourceConfig, TlsConfig, EnvironmentVariables, ModelInvocationPort, ModelVolumeMount, Resources, Worker\n",
2433
"from sagemaker.hyperpod.inference.hp_endpoint import HPEndpoint\n",
2534
"import yaml\n",
2635
"import time"
@@ -33,13 +42,13 @@
3342
"metadata": {},
3443
"outputs": [],
3544
"source": [
36-
"tls_config=TlsConfig(tls_certificate_output_s3_uri='s3://<your-tls-bucket-name>')\n",
45+
"tls_config=TlsConfig(tls_certificate_output_s3_uri='s3://<my-tls-bucket-name>')\n",
3746
"\n",
3847
"model_source_config = ModelSourceConfig(\n",
3948
" model_source_type='fsx',\n",
40-
" model_location=\"<your-model-folder-in-fsx>\",\n",
49+
" model_location=\"<my-model-folder-in-fsx>\",\n",
4150
" fsx_storage=FsxStorage(\n",
42-
" file_system_id='<your-fs-id>'\n",
51+
" file_system_id='<my-fs-id>'\n",
4352
" ),\n",
4453
")\n",
4554
"\n",
@@ -73,7 +82,7 @@
7382
"outputs": [],
7483
"source": [
7584
"fsx_endpoint = HPEndpoint(\n",
76-
" endpoint_name='test-endpoint-name-fsx-pysdk',\n",
85+
" endpoint_name='<my-endpoint-name>',\n",
7786
" instance_type='ml.g5.8xlarge',\n",
7887
" model_name='deepseek15b-fsx-test-pysdk',\n",
7988
" tls_config=tls_config,\n",
@@ -165,7 +174,7 @@
165174
"metadata": {},
166175
"outputs": [],
167176
"source": [
168-
"endpoint = HPEndpoint.get(name='<your-endpoint-name>')"
177+
"endpoint = HPEndpoint.get(name='<my-endpoint-name>')"
169178
]
170179
},
171180
{

examples/inference/SDK/inference-jumpstart-e2e.ipynb

Lines changed: 9 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -8,22 +8,14 @@
88
"## Inference Operator PySDK E2E Expereience (JumpStart model)"
99
]
1010
},
11-
{
12-
"cell_type": "markdown",
13-
"id": "1b3ce5c1-3c3d-4139-b7ae-042f360f3032",
14-
"metadata": {},
15-
"source": [
16-
"<b>Prerequisite:</b> Data scientists should list clusters and set cluster context"
17-
]
18-
},
1911
{
2012
"cell_type": "code",
2113
"execution_count": null,
2214
"id": "e22c86d6-0d3d-4c51-bef0-3f4c59ce111c",
2315
"metadata": {},
2416
"outputs": [],
2517
"source": [
26-
"from sagemaker.hyperpod.hyperpod_manager import HyperPodManager"
18+
"from sagemaker.hyperpod import list_clusters, set_cluster_context"
2719
]
2820
},
2921
{
@@ -33,8 +25,7 @@
3325
"metadata": {},
3426
"outputs": [],
3527
"source": [
36-
"#Set region \n",
37-
"region = \"us-west-2\""
28+
"list_clusters(region='us-east-2')"
3829
]
3930
},
4031
{
@@ -44,8 +35,8 @@
4435
"metadata": {},
4536
"outputs": [],
4637
"source": [
47-
"# choose the HP cluster user works on\n",
48-
"HyperPodManager.set_context('sagemaker-hyperpod-eks-cluster-demo-05-01', region=region)"
38+
"# choose the HP cluster\n",
39+
"set_cluster_context('<my-cluster>', region='us-east-2')"
4940
]
5041
},
5142
{
@@ -67,7 +58,7 @@
6758
"from jumpstart_public_hub_visualization_utils import get_all_public_hub_model_data\n",
6859
"\n",
6960
"# Load and display SageMaker public hub models\n",
70-
"get_all_public_hub_model_data(region=\"us-west-2\")"
61+
"get_all_public_hub_model_data(region=\"us-east-2\")"
7162
]
7263
},
7364
{
@@ -122,8 +113,8 @@
122113
"server=Server(\n",
123114
" instance_type='ml.g5.8xlarge',\n",
124115
")\n",
125-
"endpoint_name=SageMakerEndpoint(name='deepsek7bsme-testing-jumpstart-7-1')\n",
126-
"tls_config=TlsConfig(tls_certificate_output_s3_uri='s3://tls-bucket-inf1-beta2')\n",
116+
"endpoint_name=SageMakerEndpoint(name='<my-endpoint-name>')\n",
117+
"tls_config=TlsConfig(tls_certificate_output_s3_uri='s3://<my-tls-bucket>')\n",
127118
"\n",
128119
"# create spec\n",
129120
"js_endpoint=HPJumpStartEndpoint(\n",
@@ -230,7 +221,7 @@
230221
"outputs": [],
231222
"source": [
232223
"# output is similar to kubectl describe jumpstartmodel\n",
233-
"endpoint = HPJumpStartEndpoint.get(name='deepseek-llm-r1-distill-qwen-1-5b')\n",
224+
"endpoint = HPJumpStartEndpoint.get(name='<my-endpoint-name>')\n",
234225
"print_yaml(endpoint)"
235226
]
236227
},
@@ -265,10 +256,7 @@
265256
"outputs": [],
266257
"source": [
267258
"# get operator logs\n",
268-
"print(js_endpoint.get_operator_logs(since_hours=1))\n",
269-
"\n",
270-
"# get specific pod log\n",
271-
"# js_endpoint.get_logs(pod='pod-name')"
259+
"print(js_endpoint.get_operator_logs(since_hours=0.1))"
272260
]
273261
},
274262
{

0 commit comments

Comments
 (0)