Skip to content

Commit d90666a

Browse files
authored
Update JumpStartModel interface (#51)
* Update JumpStartModel interface Tested in Jupyter notebook that endpoint can be successfully invoked * Add refresh method * remove debugging print * Update HPEndpoint classes Tested using example notebooks * Add example notebooks These notebooks haven't been cleaned up and they are for internal review only. Commands are supposed to change later * Add metadata class
1 parent 6b2a6a5 commit d90666a

12 files changed

+1926
-423
lines changed
Lines changed: 336 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,336 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"id": "f8caf125-ab9d-4655-a59e-edbaeed9e919",
7+
"metadata": {},
8+
"outputs": [
9+
{
10+
"name": "stdout",
11+
"output_type": "stream",
12+
"text": [
13+
"Orchestrator Cluster Name\n",
14+
"-------------- ----------------------------\n",
15+
"EKS hp-cluster-for-inf-Beta2try1\n",
16+
"Updated context arn:aws:eks:us-east-2:637423555983:cluster/EKSClusterForInf-Beta2try1 in /tmp/kubeconfig\n",
17+
"Successfully set current cluster as: hp-cluster-for-inf-Beta2try1\n"
18+
]
19+
}
20+
],
21+
"source": [
22+
"from sagemaker.hyperpod.hyperpod_manager import HyperPodManager\n",
23+
"\n",
24+
"HyperPodManager.list_clusters(region='us-east-2')\n",
25+
"HyperPodManager.set_context('hp-cluster-for-inf-Beta2try1', region='us-east-2')"
26+
]
27+
},
28+
{
29+
"cell_type": "code",
30+
"execution_count": 3,
31+
"id": "32f976ba-d113-4e73-9698-2e5d8c7c44f6",
32+
"metadata": {},
33+
"outputs": [],
34+
"source": [
35+
"from sagemaker.hyperpod.inference.config.hp_endpoint_config import CloudWatchTrigger, PrometheusTrigger, AutoScalingSpec, ModelMetrics, Metrics, FsxStorage, ModelSourceConfig, Tags, TlsConfig, ConfigMapKeyRef, FieldRef, ResourceFieldRef, SecretKeyRef, ValueFrom, EnvironmentVariables, ModelInvocationPort, ModelVolumeMount, Claims, Resources, Worker\n",
36+
"from sagemaker.hyperpod.inference.hp_endpoint import HPEndpoint"
37+
]
38+
},
39+
{
40+
"cell_type": "code",
41+
"execution_count": 4,
42+
"id": "67f9a718-524a-4a3d-9885-31385c995c18",
43+
"metadata": {},
44+
"outputs": [],
45+
"source": [
46+
"tls_config=TlsConfig(tls_certificate_output_s3_uri='s3://tls-bucket-inf1-beta2')\n",
47+
"\n",
48+
"model_source_config = ModelSourceConfig(\n",
49+
" model_source_type='fsx',\n",
50+
" model_location=\"deepseek-1-5b\",\n",
51+
" fsx_storage=FsxStorage(\n",
52+
" file_system_id='fs-0e6a92495c35a81f2'\n",
53+
" ),\n",
54+
")\n",
55+
"\n",
56+
"environment_variables = [\n",
57+
" EnvironmentVariables(name=\"HF_MODEL_ID\", value=\"/opt/ml/model\"),\n",
58+
" EnvironmentVariables(name=\"SAGEMAKER_PROGRAM\", value=\"inference.py\"),\n",
59+
" EnvironmentVariables(name=\"SAGEMAKER_SUBMIT_DIRECTORY\", value=\"/opt/ml/model/code\"),\n",
60+
" EnvironmentVariables(name=\"MODEL_CACHE_ROOT\", value=\"/opt/ml/model\"),\n",
61+
" EnvironmentVariables(name=\"SAGEMAKER_ENV\", value=\"1\"),\n",
62+
"]\n",
63+
"\n",
64+
"worker = Worker(\n",
65+
" image='763104351884.dkr.ecr.us-east-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.4.0-tgi2.3.1-gpu-py311-cu124-ubuntu22.04-v2.0',\n",
66+
" model_volume_mount=ModelVolumeMount(\n",
67+
" name='model-weights',\n",
68+
" ),\n",
69+
" model_invocation_port=ModelInvocationPort(container_port=8080),\n",
70+
" resources=Resources(\n",
71+
" requests={\"cpu\": \"30000m\", \"nvidia.com/gpu\": 1, \"memory\": \"100Gi\"},\n",
72+
" limits={\"nvidia.com/gpu\": 1}\n",
73+
" ),\n",
74+
" environment_variables=environment_variables,\n",
75+
")"
76+
]
77+
},
78+
{
79+
"cell_type": "code",
80+
"execution_count": 13,
81+
"id": "ae599413-e275-47c3-9dca-05b80b1bb6e2",
82+
"metadata": {},
83+
"outputs": [],
84+
"source": [
85+
"fsx_endpoint = HPEndpoint(\n",
86+
" endpoint_name='test-endpoint-name-fsx-zhaoqi-pysdk-06-28-1',\n",
87+
" instance_type='ml.g5.8xlarge',\n",
88+
" model_name='deepseek15b-fsx-test-zhaoqi-pysdk-06-28-1',\n",
89+
" tls_config=tls_config,\n",
90+
" model_source_config=model_source_config,\n",
91+
" worker=worker,\n",
92+
")"
93+
]
94+
},
95+
{
96+
"cell_type": "code",
97+
"execution_count": 8,
98+
"id": "bf04c78e-745d-4530-8342-216cf1fbcfc4",
99+
"metadata": {},
100+
"outputs": [
101+
{
102+
"name": "stdout",
103+
"output_type": "stream",
104+
"text": [
105+
"Deploying model and its endpoint... The process may take a few minutes.\n"
106+
]
107+
}
108+
],
109+
"source": [
110+
"fsx_endpoint.create()"
111+
]
112+
},
113+
{
114+
"cell_type": "code",
115+
"execution_count": 22,
116+
"id": "ec2cfae4-a056-465d-823a-75f5b6b9a495",
117+
"metadata": {},
118+
"outputs": [],
119+
"source": [
120+
"fsx_endpoint.refresh()"
121+
]
122+
},
123+
{
124+
"cell_type": "code",
125+
"execution_count": 23,
126+
"id": "1ab21f53-36d6-4230-a5ad-1622ebcbe32c",
127+
"metadata": {
128+
"scrolled": true
129+
},
130+
"outputs": [
131+
{
132+
"name": "stdout",
133+
"output_type": "stream",
134+
"text": [
135+
"endpointName: test-endpoint-name-fsx-zhaoqi-pysdk-06-28-1\n",
136+
"instanceType: ml.g5.8xlarge\n",
137+
"invocationEndpoint: invocations\n",
138+
"modelName: deepseek15b-fsx-test-zhaoqi-pysdk-06-28-1\n",
139+
"modelSourceConfig:\n",
140+
" fsxStorage:\n",
141+
" fileSystemId: fs-0e6a92495c35a81f2\n",
142+
" modelLocation: deepseek-1-5b\n",
143+
" modelSourceType: fsx\n",
144+
" prefetchEnabled: false\n",
145+
"namespace: default\n",
146+
"replicas: 1\n",
147+
"status:\n",
148+
" conditions:\n",
149+
" - lastTransitionTime: '2025-06-29T01:19:45Z'\n",
150+
" message: Deployment or SageMaker endpoint registration creation for model is in\n",
151+
" progress\n",
152+
" reason: InProgress\n",
153+
" status: 'True'\n",
154+
" type: DeploymentInProgress\n",
155+
" - lastTransitionTime: '2025-06-29T01:24:39Z'\n",
156+
" message: Deployment and SageMaker endpoint registration for model have been created\n",
157+
" successfully\n",
158+
" reason: Success\n",
159+
" status: 'True'\n",
160+
" type: DeploymentComplete\n",
161+
" deploymentStatus:\n",
162+
" deploymentObjectOverallState: DeploymentComplete\n",
163+
" lastUpdated: '2025-06-29T01:24:39Z'\n",
164+
" name: deepseek15b-fsx-test-zhaoqi-pysdk-06-28-1\n",
165+
" reason: NativeDeploymentObjectFound\n",
166+
" status:\n",
167+
" availableReplicas: 1\n",
168+
" conditions:\n",
169+
" - lastTransitionTime: '2025-06-29T01:19:55Z'\n",
170+
" lastUpdateTime: '2025-06-29T01:19:55Z'\n",
171+
" message: Deployment has minimum availability.\n",
172+
" reason: MinimumReplicasAvailable\n",
173+
" status: 'True'\n",
174+
" type: Available\n",
175+
" - lastTransitionTime: '2025-06-29T01:19:37Z'\n",
176+
" lastUpdateTime: '2025-06-29T01:19:55Z'\n",
177+
" message: ReplicaSet \"deepseek15b-fsx-test-zhaoqi-pysdk-06-28-1-54fb8fbfc8\"\n",
178+
" has successfully progressed.\n",
179+
" reason: NewReplicaSetAvailable\n",
180+
" status: 'True'\n",
181+
" type: Progressing\n",
182+
" observedGeneration: 1\n",
183+
" readyReplicas: 1\n",
184+
" replicas: 1\n",
185+
" updatedReplicas: 1\n",
186+
" endpoints:\n",
187+
" sagemaker:\n",
188+
" endpointArn: arn:aws:sagemaker:us-east-2:637423555983:endpoint/test-endpoint-name-fsx-zhaoqi-pysdk-06-28-1\n",
189+
" state: CreationCompleted\n",
190+
" replicas: 1\n",
191+
" selector: app=deepseek15b-fsx-test-zhaoqi-pysdk-06-28-1,deploying-service=hyperpod-inference\n",
192+
" state: DeploymentComplete\n",
193+
" tlsCertificate:\n",
194+
" certificateARN: arn:aws:acm:us-east-2:637423555983:certificate/08c7f68c-e9bb-4069-8a68-b5ee72ea351a\n",
195+
" certificateDomainNames:\n",
196+
" - internal-k8s-default-albdeeps-a0dda9b7ad-724026463.us-east-2.elb.amazonaws.com\n",
197+
" certificateName: deepseek15b-fsx-test-zhaoqi-pysdk-06-28-1-certificate\n",
198+
" importedCertificates:\n",
199+
" - arn:aws:acm:us-east-2:637423555983:certificate/08c7f68c-e9bb-4069-8a68-b5ee72ea351a\n",
200+
" issuerName: deepseek15b-fsx-test-zhaoqi-pysdk-06-28-1-issuer\n",
201+
" lastCertExpiryTime: '2026-06-29T01:19:44Z'\n",
202+
" tlsCertificateOutputS3Bucket: tls-bucket-inf1-beta2\n",
203+
" tlsCertificateS3Keys:\n",
204+
" - 52c2qrh8q2ts/default-deepseek15b-fsx-test-zhaoqi-pysdk-06-28-1-1751159976/deepseek15b-fsx-test-zhaoqi-pysdk-06-28-1-certificate-1782695984.pem\n",
205+
"tlsConfig:\n",
206+
" tlsCertificateOutputS3Uri: s3://tls-bucket-inf1-beta2\n",
207+
"worker:\n",
208+
" environmentVariables:\n",
209+
" - name: HF_MODEL_ID\n",
210+
" value: /opt/ml/model\n",
211+
" - name: SAGEMAKER_PROGRAM\n",
212+
" value: inference.py\n",
213+
" - name: SAGEMAKER_SUBMIT_DIRECTORY\n",
214+
" value: /opt/ml/model/code\n",
215+
" - name: MODEL_CACHE_ROOT\n",
216+
" value: /opt/ml/model\n",
217+
" - name: SAGEMAKER_ENV\n",
218+
" value: '1'\n",
219+
" image: 763104351884.dkr.ecr.us-east-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.4.0-tgi2.3.1-gpu-py311-cu124-ubuntu22.04-v2.0\n",
220+
" modelInvocationPort:\n",
221+
" containerPort: 8080\n",
222+
" name: http\n",
223+
" modelVolumeMount:\n",
224+
" mountPath: /opt/ml/model\n",
225+
" name: model-weights\n",
226+
" resources:\n",
227+
" limits:\n",
228+
" nvidia.com/gpu: 1\n",
229+
" requests:\n",
230+
" cpu: 30000m\n",
231+
" memory: 100Gi\n",
232+
" nvidia.com/gpu: 1\n",
233+
"\n"
234+
]
235+
}
236+
],
237+
"source": [
238+
"# print refreshed config\n",
239+
"import yaml\n",
240+
"print(yaml.dump(fsx_endpoint.model_dump(exclude_none=True)))"
241+
]
242+
},
243+
{
244+
"cell_type": "code",
245+
"execution_count": 18,
246+
"id": "d606c862-0f03-4cd9-9324-c5960d6be362",
247+
"metadata": {},
248+
"outputs": [
249+
{
250+
"data": {
251+
"text/plain": [
252+
"[HPEndpoint(InitialReplicaCount=None, autoScalingSpec=None, endpointName='test-endpoint-name-fsx-zhaoqi-pysdk', instanceType='ml.g5.8xlarge', invocationEndpoint='invocations', metrics=None, modelName='deepseek15b-fsx-test-zhaoqi-pysdk', modelSourceConfig=ModelSourceConfig(fsxStorage=FsxStorage(dnsName=None, fileSystemId='fs-0e6a92495c35a81f2', mountName=None), modelLocation='deepseek-1-5b', modelSourceType='fsx', prefetchEnabled=False, s3Storage=None), modelVersion=None, replicas=1, tags=None, tlsConfig=TlsConfig(tlsCertificateOutputS3Uri='s3://tls-bucket-inf1-beta2'), worker=Worker(environmentVariables=[EnvironmentVariables(name='HF_MODEL_ID', value='/opt/ml/model', valueFrom=None), EnvironmentVariables(name='SAGEMAKER_PROGRAM', value='inference.py', valueFrom=None), EnvironmentVariables(name='SAGEMAKER_SUBMIT_DIRECTORY', value='/opt/ml/model/code', valueFrom=None), EnvironmentVariables(name='MODEL_CACHE_ROOT', value='/opt/ml/model', valueFrom=None), EnvironmentVariables(name='SAGEMAKER_ENV', value='1', valueFrom=None)], image='763104351884.dkr.ecr.us-east-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.4.0-tgi2.3.1-gpu-py311-cu124-ubuntu22.04-v2.0', modelInvocationPort=ModelInvocationPort(containerPort=8080, name='http'), modelVolumeMount=ModelVolumeMount(mountPath='/opt/ml/model', name='model-weights'), resources=Resources(claims=None, limits={'nvidia.com/gpu': 1}, requests={'cpu': '30000m', 'memory': '100Gi', 'nvidia.com/gpu': 1})), namespace='default', status=None),\n",
253+
" HPEndpoint(InitialReplicaCount=None, autoScalingSpec=None, endpointName='test-endpoint-name-fsx-zhaoqi-pysdk-06-28-1', instanceType='ml.g5.8xlarge', invocationEndpoint='invocations', metrics=None, modelName='deepseek15b-fsx-test-zhaoqi-pysdk-06-28-1', modelSourceConfig=ModelSourceConfig(fsxStorage=FsxStorage(dnsName=None, fileSystemId='fs-0e6a92495c35a81f2', mountName=None), modelLocation='deepseek-1-5b', modelSourceType='fsx', prefetchEnabled=False, s3Storage=None), modelVersion=None, replicas=1, tags=None, tlsConfig=TlsConfig(tlsCertificateOutputS3Uri='s3://tls-bucket-inf1-beta2'), worker=Worker(environmentVariables=[EnvironmentVariables(name='HF_MODEL_ID', value='/opt/ml/model', valueFrom=None), EnvironmentVariables(name='SAGEMAKER_PROGRAM', value='inference.py', valueFrom=None), EnvironmentVariables(name='SAGEMAKER_SUBMIT_DIRECTORY', value='/opt/ml/model/code', valueFrom=None), EnvironmentVariables(name='MODEL_CACHE_ROOT', value='/opt/ml/model', valueFrom=None), EnvironmentVariables(name='SAGEMAKER_ENV', value='1', valueFrom=None)], image='763104351884.dkr.ecr.us-east-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.4.0-tgi2.3.1-gpu-py311-cu124-ubuntu22.04-v2.0', modelInvocationPort=ModelInvocationPort(containerPort=8080, name='http'), modelVolumeMount=ModelVolumeMount(mountPath='/opt/ml/model', name='model-weights'), resources=Resources(claims=None, limits={'nvidia.com/gpu': 1}, requests={'cpu': '30000m', 'memory': '100Gi', 'nvidia.com/gpu': 1})), namespace='default', status=None)]"
254+
]
255+
},
256+
"execution_count": 18,
257+
"metadata": {},
258+
"output_type": "execute_result"
259+
}
260+
],
261+
"source": [
262+
"# list all endpoints\n",
263+
"endpoints = HPEndpoint.list()\n",
264+
"endpoints"
265+
]
266+
},
267+
{
268+
"cell_type": "code",
269+
"execution_count": 30,
270+
"id": "7159577e-d9c3-4f95-8c94-0df869b658cb",
271+
"metadata": {},
272+
"outputs": [],
273+
"source": [
274+
"# get endpoint\n",
275+
"endpoint = HPEndpoint.get(name='deepseek15b-fsx-test-zhaoqi-pysdk-06-28-1')\n",
276+
"\n",
277+
"# another way to get endpoint object\n",
278+
"# endpoint = HPEndpoint.list()[0]"
279+
]
280+
},
281+
{
282+
"cell_type": "code",
283+
"execution_count": 31,
284+
"id": "59d65df2-fe0c-4e3c-b584-dfd10e9e7264",
285+
"metadata": {},
286+
"outputs": [
287+
{
288+
"data": {
289+
"text/plain": [
290+
"b'[{\"generated_text\":\"What is the capital of Japan? Or, if more than one city is capital, list the first one.\\\\nThe capital city of France is Paris, and the capital of Russia is Moscow, so each country has its own capital, but Japan only has one.\\\\nWhat is the capital of Japan? Or if there is more than one capital city, just list the first one.\\\\nIn Japan, although some cities (100-200 population) have their own capital, perhaps\\\\nthe typical capital might...,\\\\nWait, the\"}]'"
291+
]
292+
},
293+
"execution_count": 31,
294+
"metadata": {},
295+
"output_type": "execute_result"
296+
}
297+
],
298+
"source": [
299+
"# invoke\n",
300+
"data='{\"inputs\": \"What is the capital of Japan?\"}'\n",
301+
"\n",
302+
"# invoke\n",
303+
"endpoint.invoke(body=data).body.read()"
304+
]
305+
},
306+
{
307+
"cell_type": "code",
308+
"execution_count": null,
309+
"id": "d2b3a2ce-6531-4d8d-bd17-e1daabf557cc",
310+
"metadata": {},
311+
"outputs": [],
312+
"source": []
313+
}
314+
],
315+
"metadata": {
316+
"kernelspec": {
317+
"display_name": "Python 3 (ipykernel)",
318+
"language": "python",
319+
"name": "python3"
320+
},
321+
"language_info": {
322+
"codemirror_mode": {
323+
"name": "ipython",
324+
"version": 3
325+
},
326+
"file_extension": ".py",
327+
"mimetype": "text/x-python",
328+
"name": "python",
329+
"nbconvert_exporter": "python",
330+
"pygments_lexer": "ipython3",
331+
"version": "3.12.3"
332+
}
333+
},
334+
"nbformat": 4,
335+
"nbformat_minor": 5
336+
}

0 commit comments

Comments
 (0)