Documentation (#154)

Aditi2424 · adishaa · maheshxb · web-flow · commit 2ccd95ad985b · 2025-07-23T14:53:22.000-07:00
* Update telemetry status to be Integer for parity (#130) Co-authored-by: adishaa <adishaa@amazon.com> * Release new version for Health Monitoring Agent (1.0.643.0_1.0.192.0) with minor improvements and bug fixes (#137) * Release new version for Health Monitoring Agent (1.0.674.0_1.0.199.0) with minor improvements and bug fixes. (#139) * documentation working setup * training inference documentation changes * Add more inference examples * UI changes for documentation * Change to tabbed view for CLI and SDK * Change to tabbed view getting started page * clean up custom css * fix inference sdk create commands --------- Co-authored-by: adishaa <adishaa@amazon.com> Co-authored-by: maheshxb <maheshxb@amazon.com> Co-authored-by: jiayelamazon <jiayel@amazon.com>
diff --git a/doc/conf.py b/doc/conf.py
@@ -1,3 +1,15 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
 """Sphinx configuration."""
 
 import datetime
diff --git a/doc/inference.md b/doc/inference.md
@@ -35,18 +35,30 @@ hyp create hyp-jumpstart-endpoint \
 
 ````{tab-item} SDK
 ```python
-from sagemaker.hyperpod.inference import HyperPodJumpstartEndpoint
+from sagemaker.hyperpod.inference.config.hp_jumpstart_endpoint_config import Model, Server, SageMakerEndpoint, TlsConfig
+from sagemaker.hyperpod.inference.hp_jumpstart_endpoint import HPJumpStartEndpoint
 
-# Create a JumpStart endpoint
-endpoint = HyperPodJumpstartEndpoint(
-    endpoint_name="endpoint-jumpstart",
-    model_id="jumpstart-model-id",
-    instance_type="ml.g5.8xlarge",
-    tls_output_s3_uri="s3://sample-bucket"
+model = Model(
+    model_id="deepseek-llm-r1-distill-qwen-1-5b",
+    model_version="2.0.4"
+)
+
+server = Server(
+    instance_type="ml.g5.8xlarge"
 )
 
-# Deploy the endpoint
-endpoint.create()
+endpoint_name = SageMakerEndpoint(name="endpoint-jumpstart")
+
+tls_config = TlsConfig(tls_certificate_output_s3_uri="s3://sample-bucket")
+
+js_endpoint = HPJumpStartEndpoint(
+    model=model,
+    server=server,
+    sage_maker_endpoint=endpoint_name,
+    tls_config=tls_config
+)
+
+js_endpoint.create()
 ```
 ````
 `````
@@ -68,19 +80,51 @@ hyp create hyp-custom-endpoint \
 
 ````{tab-item} SDK
 ```python
-from sagemaker.hyperpod.inference import HyperPodCustomEndpoint
+from sagemaker.hyperpod.inference.config.hp_custom_endpoint_config import Model, Server, SageMakerEndpoint, TlsConfig, EnvironmentVariables
+from sagemaker.hyperpod.inference.hp_custom_endpoint import HPCustomEndpoint
+
+model = Model(
+    model_source_type="s3",
+    model_location="test-pytorch-job/model.tar.gz",
+    s3_bucket_name="my-bucket",
+    s3_region="us-east-2",
+    prefetch_enabled=True
+)
 
-# Create a custom endpoint
-endpoint = HyperPodCustomEndpoint(
-    endpoint_name="endpoint-custom",
-    model_uri="s3://my-bucket/model-artifacts",
-    image="123456789012.dkr.ecr.us-west-2.amazonaws.com/my-inference-image:latest",
+server = Server(
     instance_type="ml.g5.8xlarge",
-    tls_output_s3_uri="s3://sample-bucket"
+    image_uri="763104351884.dkr.ecr.us-east-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.4.0-tgi2.3.1-gpu-py311-cu124-ubuntu22.04-v2.0",
+    container_port=8080,
+    model_volume_mount_name="model-weights"
+)
+
+resources = {
+    "requests": {"cpu": "30000m", "nvidia.com/gpu": 1, "memory": "100Gi"},
+    "limits": {"nvidia.com/gpu": 1}
+}
+
+env = EnvironmentVariables(
+    HF_MODEL_ID="/opt/ml/model",
+    SAGEMAKER_PROGRAM="inference.py",
+    SAGEMAKER_SUBMIT_DIRECTORY="/opt/ml/model/code",
+    MODEL_CACHE_ROOT="/opt/ml/model",
+    SAGEMAKER_ENV="1"
+)
+
+endpoint_name = SageMakerEndpoint(name="endpoint-custom-pytorch")
+
+tls_config = TlsConfig(tls_certificate_output_s3_uri="s3://sample-bucket")
+
+custom_endpoint = HPCustomEndpoint(
+    model=model,
+    server=server,
+    resources=resources,
+    environment=env,
+    sage_maker_endpoint=endpoint_name,
+    tls_config=tls_config,
 )
 
-# Deploy the endpoint
-endpoint.create()
+custom_endpoint.create()
 ```
 ````
 `````
@@ -113,14 +157,15 @@ hyp list hyp-custom-endpoint
 
 ````{tab-item} SDK
 ```python
-from sagemaker.hyperpod.inference import HyperPodJumpstartEndpoint, HyperPodCustomEndpoint
+from sagemaker.hyperpod.inference.hp_jumpstart_endpoint import HPJumpStartEndpoint
+from sagemaker.hyperpod.inference.hp_custom_endpoint import HPCustomEndpoint
 
 # List JumpStart endpoints
-jumpstart_endpoints = HyperPodJumpstartEndpoint.list()
+jumpstart_endpoints = HPJumpStartEndpoint.list()
 print(jumpstart_endpoints)
 
 # List custom endpoints
-custom_endpoints = HyperPodCustomEndpoint.list()
+custom_endpoints = HPCustomEndpoint.list()
 print(custom_endpoints)
 ```
 ````
@@ -171,16 +216,8 @@ hyp invoke hyp-custom-endpoint \
 
 ````{tab-item} SDK
 ```python
-from sagemaker.hyperpod.inference import HyperPodCustomEndpoint
-
-# Load the endpoint
-endpoint = HyperPodCustomEndpoint.load(endpoint_name="endpoint-custom")
-
-# Invoke the endpoint
-response = endpoint.invoke(
-    payload={"inputs": "What is machine learning?"},
-    content_type="application/json"
-)
+data = '{"inputs":"What is the capital of USA?"}'
+response = endpoint.invoke(body=data).body.read()
 print(response)
 ```
 ````
diff --git a/doc/installation.md b/doc/installation.md
@@ -1,7 +1,5 @@
 (installation)=
-
 # Installation
-
 This guide provides installation instructions for the SageMaker HyperPod CLI and SDK.
 
 ## System Requirements
@@ -18,9 +16,7 @@ This guide provides installation instructions for the SageMaker HyperPod CLI and
 - PyTorch (version ≥ 1.10)
 
 ### Supported Python Versions
-- 3.9
-- 3.10
-- 3.11
+- 3.9 and above
 
 ## Prerequisites
 
@@ -38,7 +34,7 @@ To enable this, install the **SageMaker Inference Operator**.
 
 ## Installation Options
 
-### Option 1: Install from PyPI
+### Install from PyPI
 
 You can install the SageMaker HyperPod CLI and SDK directly using `pip`:
 
@@ -53,18 +49,3 @@ To verify that the installation was successful, run:
 # Verify CLI installation
 hyp --help
 ```
-
-### Option 2: Install from Source
-
-Clone the GitHub repository and install the CLI from source:
-
-```bash
-# Clone the repository
-git clone https://github.com/aws/sagemaker-hyperpod-cli.git
-
-# Change to the repository directory
-cd sagemaker-hyperpod-cli
-
-# Install using pip
-pip install .
-```