Skip to content

Commit e9d2699

Browse files
mollyheamazonpapriwal
authored andcommitted
Minor update on README, example notebooks and documentation (aws#216)
* Update generate_click_command inject logic to not expose unwanted flags to hyp-jumpstart-endpoint * Update unit tests for bug fix, change --label_selector to --label-selector * Update README, example notebooks and documentation to 1)remove model_version, 2)add --model-volume-mount-name 3)remove tar.gz from --model-location 4)update unique mount_path for --volume * Update README, example notebooks and documentation to remove tls-config for jumpstart * minor update to remove tar.gz from --model-location for documentation
1 parent 9e5a480 commit e9d2699

File tree

3 files changed

+10
-20
lines changed

3 files changed

+10
-20
lines changed

README.md

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ hyp create hyp-pytorch-job \
160160
--priority "high" \
161161
--max-retry 3 \
162162
--volume name=model-data,type=hostPath,mount_path=/data,path=/data \
163-
--volume name=training-output,type=pvc,mount_path=/output_data,claim_name=my-pvc,read_only=false
163+
--volume name=training-output,type=pvc,mount_path=/data2,claim_name=my-pvc,read_only=false
164164
```
165165
166166
Key required parameters explained:
@@ -181,7 +181,6 @@ hyp create hyp-jumpstart-endpoint \
181181
--model-id jumpstart-model-id\
182182
--instance-type ml.g5.8xlarge \
183183
--endpoint-name endpoint-jumpstart \
184-
--tls-output-s3-uri s3://sample-bucket
185184
```
186185
187186
@@ -208,7 +207,8 @@ hyp create hyp-custom-endpoint \
208207
--endpoint-name my-custom-endpoint \
209208
--model-name my-pytorch-model \
210209
--model-source-type s3 \
211-
--model-location my-pytorch-training/model.tar.gz \
210+
--model-location my-pytorch-training \
211+
--model-volume-mount-name test-volume \
212212
--s3-bucket-name your-bucket \
213213
--s3-region us-east-1 \
214214
--instance-type ml.g5.8xlarge \
@@ -322,20 +322,17 @@ from sagemaker.hyperpod.inference.config.hp_jumpstart_endpoint_config import Mod
322322
from sagemaker.hyperpod.inference.hp_jumpstart_endpoint import HPJumpStartEndpoint
323323

324324
model=Model(
325-
model_id='deepseek-llm-r1-distill-qwen-1-5b',
326-
model_version='2.0.4',
325+
model_id='deepseek-llm-r1-distill-qwen-1-5b'
327326
)
328327
server=Server(
329328
instance_type='ml.g5.8xlarge',
330329
)
331330
endpoint_name=SageMakerEndpoint(name='<my-endpoint-name>')
332-
tls_config=TlsConfig(tls_certificate_output_s3_uri='s3://<my-tls-bucket>')
333331

334332
js_endpoint=HPJumpStartEndpoint(
335333
model=model,
336334
server=server,
337-
sage_maker_endpoint=endpoint_name,
338-
tls_config=tls_config,
335+
sage_maker_endpoint=endpoint_name
339336
)
340337

341338
js_endpoint.create()

doc/inference.md

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,7 @@ from sagemaker.hyperpod.inference.config.hp_jumpstart_endpoint_config import Mod
3737
from sagemaker.hyperpod.inference.hp_jumpstart_endpoint import HPJumpStartEndpoint
3838
3939
model = Model(
40-
model_id="deepseek-llm-r1-distill-qwen-1-5b",
41-
model_version="2.0.4"
40+
model_id="deepseek-llm-r1-distill-qwen-1-5b"
4241
)
4342
4443
server = Server(
@@ -47,13 +46,10 @@ server = Server(
4746
4847
endpoint_name = SageMakerEndpoint(name="endpoint-jumpstart")
4948
50-
tls_config = TlsConfig(tls_certificate_output_s3_uri="s3://sample-bucket")
51-
5249
js_endpoint = HPJumpStartEndpoint(
5350
model=model,
5451
server=server,
55-
sage_maker_endpoint=endpoint_name,
56-
tls_config=tls_config
52+
sage_maker_endpoint=endpoint_name
5753
)
5854
5955
js_endpoint.create()
@@ -85,7 +81,7 @@ from sagemaker.hyperpod.inference.hp_endpoint import HPEndpoint
8581
8682
model = Model(
8783
model_source_type="s3",
88-
model_location="test-pytorch-job/model.tar.gz",
84+
model_location="test-pytorch-job",
8985
s3_bucket_name="my-bucket",
9086
s3_region="us-east-2",
9187
prefetch_enabled=True

examples/inference/SDK/inference-jumpstart-e2e.ipynb

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -107,21 +107,18 @@
107107
"source": [
108108
"# create configs\n",
109109
"model=Model(\n",
110-
" model_id='deepseek-llm-r1-distill-qwen-1-5b',\n",
111-
" model_version='2.0.4',\n",
110+
" model_id='deepseek-llm-r1-distill-qwen-1-5b'\n",
112111
")\n",
113112
"server=Server(\n",
114113
" instance_type='ml.g5.8xlarge',\n",
115114
")\n",
116115
"endpoint_name=SageMakerEndpoint(name='<my-endpoint-name>')\n",
117-
"tls_config=TlsConfig(tls_certificate_output_s3_uri='s3://<my-tls-bucket>')\n",
118116
"\n",
119117
"# create spec\n",
120118
"js_endpoint=HPJumpStartEndpoint(\n",
121119
" model=model,\n",
122120
" server=server,\n",
123-
" sage_maker_endpoint=endpoint_name,\n",
124-
" tls_config=tls_config,\n",
121+
" sage_maker_endpoint=endpoint_name\n",
125122
")"
126123
]
127124
},

0 commit comments

Comments
 (0)