@@ -35,18 +35,30 @@ hyp create hyp-jumpstart-endpoint \
35
35
36
36
````{tab-item} SDK
37
37
```python
38
- from sagemaker.hyperpod.inference import HyperPodJumpstartEndpoint
38
+ from sagemaker.hyperpod.inference.config.hp_jumpstart_endpoint_config import Model, Server, SageMakerEndpoint, TlsConfig
39
+ from sagemaker.hyperpod.inference.hp_jumpstart_endpoint import HPJumpStartEndpoint
39
40
40
- # Create a JumpStart endpoint
41
- endpoint = HyperPodJumpstartEndpoint(
42
- endpoint_name="endpoint-jumpstart",
43
- model_id="jumpstart-model-id",
44
- instance_type="ml.g5.8xlarge",
45
- tls_output_s3_uri="s3://sample-bucket"
41
+ model = Model(
42
+ model_id="deepseek-llm-r1-distill-qwen-1-5b",
43
+ model_version="2.0.4"
44
+ )
45
+
46
+ server = Server(
47
+ instance_type="ml.g5.8xlarge"
46
48
)
47
49
48
- # Deploy the endpoint
49
- endpoint.create()
50
+ endpoint_name = SageMakerEndpoint(name="endpoint-jumpstart")
51
+
52
+ tls_config = TlsConfig(tls_certificate_output_s3_uri="s3://sample-bucket")
53
+
54
+ js_endpoint = HPJumpStartEndpoint(
55
+ model=model,
56
+ server=server,
57
+ sage_maker_endpoint=endpoint_name,
58
+ tls_config=tls_config
59
+ )
60
+
61
+ js_endpoint.create()
50
62
```
51
63
````
52
64
`````
@@ -68,19 +80,51 @@ hyp create hyp-custom-endpoint \
68
80
69
81
````{tab-item} SDK
70
82
```python
71
- from sagemaker.hyperpod.inference import HyperPodCustomEndpoint
83
+ from sagemaker.hyperpod.inference.config.hp_custom_endpoint_config import Model, Server, SageMakerEndpoint, TlsConfig, EnvironmentVariables
84
+ from sagemaker.hyperpod.inference.hp_custom_endpoint import HPCustomEndpoint
85
+
86
+ model = Model(
87
+ model_source_type="s3",
88
+ model_location="test-pytorch-job/model.tar.gz",
89
+ s3_bucket_name="my-bucket",
90
+ s3_region="us-east-2",
91
+ prefetch_enabled=True
92
+ )
72
93
73
- # Create a custom endpoint
74
- endpoint = HyperPodCustomEndpoint(
75
- endpoint_name="endpoint-custom",
76
- model_uri="s3://my-bucket/model-artifacts",
77
- image="123456789012.dkr.ecr.us-west-2.amazonaws.com/my-inference-image:latest",
94
+ server = Server(
78
95
instance_type="ml.g5.8xlarge",
79
- tls_output_s3_uri="s3://sample-bucket"
96
+ image_uri="763104351884.dkr.ecr.us-east-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.4.0-tgi2.3.1-gpu-py311-cu124-ubuntu22.04-v2.0",
97
+ container_port=8080,
98
+ model_volume_mount_name="model-weights"
99
+ )
100
+
101
+ resources = {
102
+ "requests": {"cpu": "30000m", "nvidia.com/gpu": 1, "memory": "100Gi"},
103
+ "limits": {"nvidia.com/gpu": 1}
104
+ }
105
+
106
+ env = EnvironmentVariables(
107
+ HF_MODEL_ID="/opt/ml/model",
108
+ SAGEMAKER_PROGRAM="inference.py",
109
+ SAGEMAKER_SUBMIT_DIRECTORY="/opt/ml/model/code",
110
+ MODEL_CACHE_ROOT="/opt/ml/model",
111
+ SAGEMAKER_ENV="1"
112
+ )
113
+
114
+ endpoint_name = SageMakerEndpoint(name="endpoint-custom-pytorch")
115
+
116
+ tls_config = TlsConfig(tls_certificate_output_s3_uri="s3://sample-bucket")
117
+
118
+ custom_endpoint = HPCustomEndpoint(
119
+ model=model,
120
+ server=server,
121
+ resources=resources,
122
+ environment=env,
123
+ sage_maker_endpoint=endpoint_name,
124
+ tls_config=tls_config,
80
125
)
81
126
82
- # Deploy the endpoint
83
- endpoint.create()
127
+ custom_endpoint.create()
84
128
```
85
129
````
86
130
`````
@@ -113,14 +157,15 @@ hyp list hyp-custom-endpoint
113
157
114
158
````{tab-item} SDK
115
159
```python
116
- from sagemaker.hyperpod.inference import HyperPodJumpstartEndpoint, HyperPodCustomEndpoint
160
+ from sagemaker.hyperpod.inference.hp_jumpstart_endpoint import HPJumpStartEndpoint
161
+ from sagemaker.hyperpod.inference.hp_custom_endpoint import HPCustomEndpoint
117
162
118
163
# List JumpStart endpoints
119
- jumpstart_endpoints = HyperPodJumpstartEndpoint .list()
164
+ jumpstart_endpoints = HPJumpStartEndpoint .list()
120
165
print(jumpstart_endpoints)
121
166
122
167
# List custom endpoints
123
- custom_endpoints = HyperPodCustomEndpoint .list()
168
+ custom_endpoints = HPCustomEndpoint .list()
124
169
print(custom_endpoints)
125
170
```
126
171
````
@@ -171,16 +216,8 @@ hyp invoke hyp-custom-endpoint \
171
216
172
217
````{tab-item} SDK
173
218
```python
174
- from sagemaker.hyperpod.inference import HyperPodCustomEndpoint
175
-
176
- # Load the endpoint
177
- endpoint = HyperPodCustomEndpoint.load(endpoint_name="endpoint-custom")
178
-
179
- # Invoke the endpoint
180
- response = endpoint.invoke(
181
- payload={"inputs": "What is machine learning?"},
182
- content_type="application/json"
183
- )
219
+ data = '{"inputs":"What is the capital of USA?"}'
220
+ response = endpoint.invoke(body=data).body.read()
184
221
print(response)
185
222
```
186
223
````
0 commit comments