Skip to content

Commit 6a9ddb4

Browse files
shantanutripzhaoqizqwang
authored andcommitted
Add crds, service account and region (#32)
* Add CRDs and setup for region * Change annotation for SA * Remove default region
1 parent 6868542 commit 6a9ddb4

File tree

6 files changed

+375
-23
lines changed

6 files changed

+375
-23
lines changed

helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_inferenceendpointconfigs.yaml

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,39 @@ spec:
6262
The invocation endpoint of the model server. http://<host>:<port>/ would be pre-populated based on the other fields.
6363
Please fill in the path after http://<host>:<port>/ specific to your model server.
6464
type: string
65+
metrics:
66+
description: Configuration for metrics collection and exposure
67+
properties:
68+
enabled:
69+
default: false
70+
description: Enable metrics collection for this model deployment
71+
type: boolean
72+
metricsScrapeIntervalSeconds:
73+
default: 15
74+
description: Scrape interval in seconds for metrics collection
75+
from sidecar and model container.
76+
format: int32
77+
maximum: 300
78+
minimum: 5
79+
type: integer
80+
modelMetrics:
81+
description: Configuration for model container metrics scraping
82+
properties:
83+
path:
84+
default: /metrics
85+
description: Path where the model exposes metrics
86+
pattern: ^/[a-zA-Z0-9\-_.\/]*$
87+
type: string
88+
port:
89+
default: 8080
90+
description: Port where the model exposes metrics. If not
91+
specified, a default port will be used.
92+
format: int32
93+
maximum: 65535
94+
minimum: 1024
95+
type: integer
96+
type: object
97+
type: object
6598
modelName:
6699
description: Name of model that will be created on Sagemaker
67100
maxLength: 63
@@ -442,6 +475,112 @@ spec:
442475
- type
443476
type: object
444477
type: array
478+
deploymentStatus:
479+
description: Details of the native kubernetes deployment that hosts
480+
the model
481+
properties:
482+
deploymentObjectOverallState:
483+
description: Overall State of the Deployment Object
484+
type: string
485+
lastUpdated:
486+
description: Last Update Time
487+
format: date-time
488+
type: string
489+
message:
490+
description: Message populated in the root CRD while updating
491+
the status of underlying Deployment
492+
type: string
493+
name:
494+
description: Name of the Deployment Object
495+
type: string
496+
reason:
497+
description: Reason populated in the root CRD while updating the
498+
status of underlying Deployment
499+
type: string
500+
status:
501+
description: Status of the Deployment Object
502+
properties:
503+
availableReplicas:
504+
description: Total number of available pods (ready for at
505+
least minReadySeconds) targeted by this deployment.
506+
format: int32
507+
type: integer
508+
collisionCount:
509+
description: |-
510+
Count of hash collisions for the Deployment. The Deployment controller uses this
511+
field as a collision avoidance mechanism when it needs to create the name for the
512+
newest ReplicaSet.
513+
format: int32
514+
type: integer
515+
conditions:
516+
description: Represents the latest available observations
517+
of a deployment's current state.
518+
items:
519+
description: DeploymentCondition describes the state of
520+
a deployment at a certain point.
521+
properties:
522+
lastTransitionTime:
523+
description: Last time the condition transitioned from
524+
one status to another.
525+
format: date-time
526+
type: string
527+
lastUpdateTime:
528+
description: The last time this condition was updated.
529+
format: date-time
530+
type: string
531+
message:
532+
description: A human readable message indicating details
533+
about the transition.
534+
type: string
535+
reason:
536+
description: The reason for the condition's last transition.
537+
type: string
538+
status:
539+
description: Status of the condition, one of True, False,
540+
Unknown.
541+
type: string
542+
type:
543+
description: Type of deployment condition.
544+
type: string
545+
required:
546+
- status
547+
- type
548+
type: object
549+
type: array
550+
x-kubernetes-list-map-keys:
551+
- type
552+
x-kubernetes-list-type: map
553+
observedGeneration:
554+
description: The generation observed by the deployment controller.
555+
format: int64
556+
type: integer
557+
readyReplicas:
558+
description: readyReplicas is the number of pods targeted
559+
by this Deployment with a Ready Condition.
560+
format: int32
561+
type: integer
562+
replicas:
563+
description: Total number of non-terminated pods targeted
564+
by this deployment (their labels match the selector).
565+
format: int32
566+
type: integer
567+
unavailableReplicas:
568+
description: |-
569+
Total number of unavailable pods targeted by this deployment. This is the total number of
570+
pods that are still required for the deployment to have 100% available capacity. They may
571+
either be pods that are running but not yet available or pods that still have not been created.
572+
format: int32
573+
type: integer
574+
updatedReplicas:
575+
description: Total number of non-terminated pods targeted
576+
by this deployment that have the desired template spec.
577+
format: int32
578+
type: integer
579+
type: object
580+
required:
581+
- lastUpdated
582+
- name
583+
type: object
445584
endpoints:
446585
description: EndpointStatus contains the status of SageMaker endpoints
447586
properties:
@@ -469,6 +608,37 @@ spec:
469608
- state
470609
type: object
471610
type: object
611+
metricsStatus:
612+
description: Status of metrics collection
613+
properties:
614+
enabled:
615+
description: Whether metrics collection is enabled
616+
type: boolean
617+
errorMessage:
618+
description: Error message if metrics collection is in error state
619+
type: string
620+
metricsScrapeIntervalSeconds:
621+
description: Scrape interval in seconds for metrics collection
622+
from sidecar and model container.
623+
format: int32
624+
type: integer
625+
modelMetrics:
626+
description: Status of model container metrics collection
627+
properties:
628+
path:
629+
description: The path where metrics are available
630+
type: string
631+
port:
632+
description: The port on which metrics are exposed
633+
format: int32
634+
type: integer
635+
type: object
636+
state:
637+
description: Current state of metrics collection
638+
type: string
639+
required:
640+
- enabled
641+
type: object
472642
observedGeneration:
473643
description: Latest generation reconciled by controller
474644
format: int64
@@ -505,6 +675,16 @@ spec:
505675
issuerName:
506676
description: The issuer name of cert manager
507677
type: string
678+
tlsCertificateOutputS3Bucket:
679+
description: S3 bucket that stores the certificate that needs
680+
to be trusted
681+
type: string
682+
tlsCertificateS3Keys:
683+
description: The output tls certificate S3 key that points to
684+
the .pem file
685+
items:
686+
type: string
687+
type: array
508688
type: object
509689
type: object
510690
type: object

0 commit comments

Comments
 (0)