@@ -337,24 +337,21 @@ Pre-trained Jumpstart models can be gotten from https://sagemaker.readthedocs.io
337
337
from sagemaker.hyperpod.inference.config.hp_jumpstart_endpoint_config import Model, Server, SageMakerEndpoint, TlsConfig
338
338
from sagemaker.hyperpod.inference.hp_jumpstart_endpoint import HPJumpStartEndpoint
339
339
340
- model = Model(
341
- model_id=" deepseek-llm-r1-distill-qwen-1-5b" ,
342
- model_version=" 2.0.4"
340
+ model= Model(
341
+ model_id=' deepseek-llm-r1-distill-qwen-1-5b' ,
342
+ model_version=' 2.0.4',
343
343
)
344
-
345
- server = Server(
346
- instance_type="ml.g5.8xlarge"
344
+ server=Server(
345
+ instance_type='ml.g5.8xlarge',
347
346
)
347
+ endpoint_name=SageMakerEndpoint(name='<my-endpoint-name >')
348
+ tls_config=TlsConfig(tls_certificate_output_s3_uri='s3://<my-tls-bucket >')
348
349
349
- endpoint_name = SageMakerEndpoint(name="endpoint-jumpstart")
350
-
351
- tls_config = TlsConfig(tls_certificate_output_s3_uri="s3://sample-bucket")
352
-
353
- js_endpoint = HPJumpStartEndpoint(
350
+ js_endpoint=HPJumpStartEndpoint(
354
351
model=model,
355
352
server=server,
356
353
sage_maker_endpoint=endpoint_name,
357
- tls_config=tls_config
354
+ tls_config=tls_config,
358
355
)
359
356
360
357
js_endpoint.create()
@@ -370,51 +367,51 @@ print(response)
370
367
```
371
368
372
369
373
- #### Creating a Custom Inference Endpoint
370
+ #### Creating a Custom Inference Endpoint (with S3)
374
371
375
372
```
376
- from sagemaker.hyperpod.inference.config.hp_custom_endpoint_config import Model, Server, SageMakerEndpoint, TlsConfig, EnvironmentVariables
377
- from sagemaker.hyperpod.inference.hp_custom_endpoint import HPCustomEndpoint
373
+ from sagemaker.hyperpod.inference.config.hp_endpoint_config import CloudWatchTrigger, Dimensions, AutoScalingSpec, Metrics, S3Storage, ModelSourceConfig, TlsConfig, EnvironmentVariables, ModelInvocationPort, ModelVolumeMount, Resources, Worker
374
+ from sagemaker.hyperpod.inference.hp_endpoint import HPEndpoint
378
375
379
- model = Model(
380
- model_source_type="s3",
381
- model_location="test-pytorch-job/model.tar.gz",
382
- s3_bucket_name="my-bucket",
383
- s3_region="us-east-2",
384
- prefetch_enabled=True
376
+ model_source_config = ModelSourceConfig(
377
+ model_source_type='s3',
378
+ model_location="<my-model-folder-in-s3 >",
379
+ s3_storage=S3Storage(
380
+ bucket_name='<my-model-artifacts-bucket >',
381
+ region='us-east-2',
382
+ ),
385
383
)
386
384
387
- server = Server(
388
- instance_type="ml.g5.8xlarge",
389
- image_uri="763104351884.dkr.ecr.us-east-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.4.0-tgi2.3.1-gpu-py311-cu124-ubuntu22.04-v2.0",
390
- container_port=8080,
391
- model_volume_mount_name="model-weights"
392
- )
385
+ environment_variables = [
386
+ EnvironmentVariables(name="HF_MODEL_ID", value="/opt/ml/model"),
387
+ EnvironmentVariables(name="SAGEMAKER_PROGRAM", value="inference.py"),
388
+ EnvironmentVariables(name="SAGEMAKER_SUBMIT_DIRECTORY", value="/opt/ml/model/code"),
389
+ EnvironmentVariables(name="MODEL_CACHE_ROOT", value="/opt/ml/model"),
390
+ EnvironmentVariables(name="SAGEMAKER_ENV", value="1"),
391
+ ]
393
392
394
- resources = {
395
- "requests": {"cpu": "30000m", "nvidia. com/gpu": 1, "memory": "100Gi"} ,
396
- "limits": {"nvidia.com/gpu": 1}
397
- }
398
-
399
- env = EnvironmentVariables(
400
- HF_MODEL_ID="/opt/ml/model",
401
- SAGEMAKER_PROGRAM="inference.py" ,
402
- SAGEMAKER_SUBMIT_DIRECTORY="/opt/ml/model/code",
403
- MODEL_CACHE_ROOT="/opt/ml/model" ,
404
- SAGEMAKER_ENV="1"
393
+ worker = Worker(
394
+ image='763104351884.dkr.ecr.us-east-2.amazonaws. com/huggingface-pytorch-tgi-inference:2.4.0-tgi2.3.1-gpu-py311-cu124-ubuntu22.04-v2.0' ,
395
+ model_volume_mount=ModelVolumeMount(
396
+ name='model-weights',
397
+ ),
398
+ model_invocation_port=ModelInvocationPort(container_port=8080),
399
+ resources=Resources(
400
+ requests={"cpu": "30000m", "nvidia.com/gpu": 1, "memory": "100Gi"} ,
401
+ limits={"nvidia.com/gpu": 1}
402
+ ) ,
403
+ environment_variables=environment_variables,
405
404
)
406
405
407
- endpoint_name = SageMakerEndpoint(name="endpoint-custom-pytorch")
408
-
409
- tls_config = TlsConfig(tls_certificate_output_s3_uri="s3://sample-bucket")
406
+ tls_config=TlsConfig(tls_certificate_output_s3_uri='s3://<my-tls-bucket-name >')
410
407
411
- custom_endpoint = HPCustomEndpoint(
412
- model=model,
413
- server=server,
414
- resources=resources,
415
- environment=env,
416
- sage_maker_endpoint=endpoint_name,
408
+ custom_endpoint = HPEndpoint(
409
+ endpoint_name='<my-endpoint-name >',
410
+ instance_type='ml.g5.8xlarge',
411
+ model_name='deepseek15b-test-model-name',
417
412
tls_config=tls_config,
413
+ model_source_config=model_source_config,
414
+ worker=worker,
418
415
)
419
416
420
417
custom_endpoint.create()
@@ -431,19 +428,17 @@ print(response)
431
428
#### Managing an Endpoint
432
429
433
430
```
434
- endpoint_iterator = HPJumpStartEndpoint.list()
435
- for endpoint in endpoint_iterator:
436
- print(endpoint.name, endpoint.status)
431
+ endpoint_list = HPEndpoint.list()
432
+ print(endpoint_list[ 0] )
437
433
438
- logs = js_endpoint.get_logs()
439
- print(logs)
434
+ print(custom_endpoint.get_operator_logs(since_hours=0.5))
440
435
441
436
```
442
437
443
438
#### Deleting an Endpoint
444
439
445
440
```
446
- js_endpoint .delete()
441
+ custom_endpoint .delete()
447
442
448
443
```
449
444
0 commit comments