Nimcache
Parameters
Template
The following tabs display the definition's Cue template and the rendered YAML. The rendered YAML is the output of the Cue template when the definition is applied to a cluster.
The following tabs display the definition's Cue template and the rendered YAML. The rendered YAML is the output of the Cue template when the definition is applied to a cluster.
nimcache: {
type: "component"
description: "NIMCache is the Schema for the nimcaches API."
labels: {
"componentdefinition.spectrocloud.com/type": "application"
"wl.spectrocloud.com/provider": "apps.nvidia.com"
"definition.spectrocloud.com/category": "NVIDIA-NIM"
}
}
template: {
output: {
apiVersion: "apps.nvidia.com/v1alpha1"
kind: "NIMCache"
metadata: {
labels: {
if parameter.labels != _|_ {
parameter.labels
}
"wl.spectrocloud.com/name": context.workloadName
"wl.spectrocloud.com/component": context.name
}
if parameter.annotations != _|_ {
annotations: parameter.annotations
}
}
spec: {
if parameter.certConfig != _|_ {
certConfig: parameter.certConfig
}
if parameter.env != _|_ {
env: parameter.env
}
if parameter.groupID != _|_ {
groupID: parameter.groupID
}
if parameter.nodeSelector != _|_ {
nodeSelector: parameter.nodeSelector
}
if parameter.proxy != _|_ {
proxy: parameter.proxy
}
if parameter.resources != _|_ {
resources: parameter.resources
}
if parameter.runtimeClassName != _|_ {
runtimeClassName: parameter.runtimeClassName
}
source: parameter.source
storage: parameter.storage
if parameter.tolerations != _|_ {
tolerations: parameter.tolerations
}
if parameter.userID != _|_ {
userID: parameter.userID
}
}
}
parameter: {
// +usage=Annotations for the workload
annotations?: [string]: string
// +usage=Labels for the workload
labels?: [string]: string
// +usage=CertConfig is the name of the ConfigMap containing the custom certificates. for secure communication. Deprecated: use `Proxy` instead to configure custom certificates for using proxy.
certConfig?: {
// +usage=MountPath is the path where the certificates should be mounted in the container.
mountPath: string
// +usage=Name of the ConfigMap containing the certificate data.
name: string
}
// +usage=Env are the additional custom environment variabes for the caching job
env?: [...{
// +usage=Name of the environment variable. Must be a C_IDENTIFIER.
name: string
// +usage=Variable references $(VAR_NAME) are expanded using the previously defined environment variables in the container and any service environment variables. If a variable cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless of whether the variable exists or not. Defaults to "".
value?: string
// +usage=Source for the environment variable's value. Cannot be used if value is not empty.
valueFrom?: {
// +usage=Selects a key of a ConfigMap.
configMapKeyRef?: {
// +usage=The key to select.
key: string
// +usage=Name of the referent. This field is effectively required, but due to backwards compatibility is allowed to be empty. Instances of this type with an empty value here are almost certainly wrong. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
name?: *"" | string
// +usage=Specify whether the ConfigMap or its key must be defined
optional?: bool
}
// +usage=Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['<KEY>']`, `metadata.annotations['<KEY>']`, spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs.
fieldRef?: {
// +usage=Version of the schema the FieldPath is written in terms of, defaults to "v1".
apiVersion?: string
// +usage=Path of the field to select in the specified API version.
fieldPath: string
}
// +usage=Selects a resource of the container: only resources limits and requests (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported.
resourceFieldRef?: {
// +usage=Container name: required for volumes, optional for env vars
containerName?: string
// +usage=Specifies the output format of the exposed resources, defaults to "1" Pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$.
divisor?: _
// +usage=Required: resource to select
resource: string
}
// +usage=Selects a key of a secret in the pod's namespace
secretKeyRef?: {
// +usage=The key of the secret to select from. Must be a valid secret key.
key: string
// +usage=Name of the referent. This field is effectively required, but due to backwards compatibility is allowed to be empty. Instances of this type with an empty value here are almost certainly wrong. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
name?: *"" | string
// +usage=Specify whether the Secret or its key must be defined
optional?: bool
}
}
}]
// +usage=GroupID is the group ID for the caching job
groupID?: int
// +usage=NodeSelector is the node selector labels to schedule the caching job.
nodeSelector?: [string]: string
// +usage=ProxySpec defines the proxy configuration for NIMService.
proxy?: {
certConfigMap?: string
httpProxy?: string
httpsProxy?: string
noProxy?: string
}
// +usage=Resources defines the minimum resources required for the caching job to run(cpu, memory, gpu).
resources?: {
// +usage=CPU indicates the minimum number of CPUs to use while caching NIM Pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$.
cpu?: _
// +usage=Memory indicates the minimum amount of memory to use while caching NIM Valid values are numbers followed by one of the suffixes Ki, Mi, Gi, or Ti (e.g. "4Gi", "4096Mi"). Pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$.
memory?: _
}
// +usage=RuntimeClassName is the runtimeclass for the caching job
runtimeClassName?: string
// +usage=Source is the NIM model source to cache
source: {
// +usage=DataStore represents models stored in NVIDIA NeMo DataStore service
dataStore?: {
// +usage=AuthSecret is the name of the secret containing the "HF_TOKEN" token
authSecret: string
// +usage=DatasetName is the name of the dataset
datasetName?: string
// +usage=Endpoint is the HuggingFace endpoint from NeMo DataStore Pattern: ^https?://.*/v1/hf/?$.
endpoint: string
// +usage=ModelName is the name of the model
modelName?: string
// +usage=ModelPuller is the containerized huggingface-cli image to pull the data
modelPuller: string
// +usage=Namespace is the namespace within NeMo DataStore
namespace: *"default" | string
// +usage=PullSecret is the name of the image pull secret for the modelPuller image
pullSecret: string
// +usage=Revision is the revision of the object to be cached. This is either a commit hash, branch name or tag.
revision?: string
}
// +usage=HuggingFaceHub represents models stored in HuggingFace Hub
hf?: {
// +usage=AuthSecret is the name of the secret containing the "HF_TOKEN" token
authSecret: string
// +usage=DatasetName is the name of the dataset
datasetName?: string
// +usage=Endpoint is the HuggingFace endpoint Pattern: ^https?://.*$.
endpoint: string
// +usage=ModelName is the name of the model
modelName?: string
// +usage=ModelPuller is the containerized huggingface-cli image to pull the data
modelPuller: string
// +usage=Namespace is the namespace within the HuggingFace Hub
namespace: string
// +usage=PullSecret is the name of the image pull secret for the modelPuller image
pullSecret: string
// +usage=Revision is the revision of the object to be cached. This is either a commit hash, branch name or tag.
revision?: string
}
// +usage=NGCSource represents models stored in NGC
ngc?: {
// +usage=The name of an existing pull secret containing the NGC_API_KEY
authSecret: string
// +usage=Model spec for caching
model?: {
// +usage=Buildable indicates generic model profiles that can be optimized with an NVIDIA engine for any GPUs
buildable?: bool
// +usage=Engine is the backend engine (tensorrt_llm, vllm)
engine?: string
// +usage=GPU is the spec for matching GPUs for caching optimized models
gpus?: [...{
// +usage=IDs are the device-ids for a specific GPU SKU
ids?: [...string]
// +usage=Product is the GPU product string (h100, a100, l40s)
product?: string
}]
// +usage=Lora indicates a finetuned model with LoRa adapters
lora?: bool
// +usage=Precision is the precision for model quantization
precision?: string
// +usage=Profiles are the specific model profiles to cache. When these are provided, rest of the model parameters for profile selection are ignored
profiles?: [...string]
// +usage=QoSProfile is the supported QoS profile types for the models (throughput, latency)
qosProfile?: string
// +usage=TensorParallelism is the minimum GPUs required for the model computations
tensorParallelism?: string
}
// +usage=ModelEndpoint is the endpoint for the model to be cached for Universal NIM
modelEndpoint?: string
// +usage=ModelPuller is the container image that can pull the model
modelPuller: string
// +usage=PullSecret to pull the model puller image
pullSecret?: string
}
}
// +usage=Storage is the target storage for caching NIM model
storage: {
// +usage=HostPath is the host path volume for caching NIM Deprecated: use PVC instead.
hostPath?: string
// +usage=PersistentVolumeClaim is the pvc volume used for caching NIM
pvc?: {
// +usage=Annotations for the PVC
annotations?: [string]: string
// +usage=Create specifies whether to create a new PersistentVolumeClaim (PVC). If set to false, an existing PVC must be referenced via the `Name` field.
create?: bool
// +usage=Name of the PVC to use. Required if `Create` is false (i.e., using an existing PVC).
name?: string
// +usage=Size of the NIM cache in Gi, used during PVC creation
size?: string
// +usage=StorageClass to be used for PVC creation. Leave it as empty if the PVC is already created or a default storage class is set in the cluster.
storageClass?: string
// +usage=SubPath is the path inside the PVC that should be mounted
subPath?: string
// +usage=VolumeAccessMode is the volume access mode of the PVC
volumeAccessMode?: string
}
}
// +usage=Tolerations for running the job to cache the NIM model
tolerations?: [...{
// +usage=Effect indicates the taint effect to match. Empty means match all taint effects. When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute.
effect?: string
// +usage=Key is the taint key that the toleration applies to. Empty means match all taint keys. If the key is empty, operator must be Exists; this combination means to match all values and all keys.
key?: string
// +usage=Operator represents a key's relationship to the value. Valid operators are Exists and Equal. Defaults to Equal. Exists is equivalent to wildcard for value, so that a pod can tolerate all taints of a particular category.
operator?: string
// +usage=TolerationSeconds represents the period of time the toleration (which must be of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default, it is not set, which means tolerate the taint forever (do not evict). Zero and negative values will be treated as 0 (evict immediately) by the system.
tolerationSeconds?: int
// +usage=Value is the taint value the toleration matches to. If the operator is Exists, the value should be empty, otherwise just a regular string.
value?: string
}]
// +usage=UserID is the user ID for the caching job
userID?: int
}
}