feat: update GPU vars and resources limits

otwld · Dec 15, 2023 · d06dc71 · d06dc71
1 parent 837cb63
commit d06dc71
Showing 1 changed file with 23 additions and 20 deletions.
diff --git a/templates/deployment.yaml b/templates/deployment.yaml
@@ -19,7 +19,7 @@ spec:
       {{- end }}
       labels:
         {{- include "ollama.labels" . | nindent 8 }}
- {{- with .Values.podLabels }}
+        {{- with .Values.podLabels }}
         {{- toYaml . | nindent 8 }}
         {{- end }}
     spec:
@@ -41,24 +41,30 @@ spec:
               containerPort: {{ .Values.service.port }}
               protocol: TCP
           env:
-          {{- if .Values.ollama.gpu.enable }}
+          {{- if or .Values.ollama.gpu.enabled .Values.ollama.gpu.enable }}
             - name: PATH
-              value: /usr/local/nvidia/bin:/usr/local/nvidia/lib64:/usr/bin:/usr/sbin:/bin:/sbin
+              value: /usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
             - name: LD_LIBRARY_PATH
-              value: /usr/local/nvidia/lib64
+              value: /usr/local/nvidia/lib:/usr/local/nvidia/lib64
+            - name: NVIDIA_DRIVER_CAPABILITIES
+              value: compute,utility
           {{- end}}
+          {{- if .Values.resources }}
           resources:
-            {{- toYaml .Values.resources | nindent 12 }}
-            {{- if .Values.ollama.gpu.enable }}
-            limits:
-              nvidia.com/gpu: "{{ .Values.ollama.gpu.nbrGpu }}:{{ .Values.ollama.gpu.nbrGpu | default 1 }}"
-            {{- end}}
+            {{- $limits := default dict .Values.resources.limits }}
+            {{- if or .Values.ollama.gpu.enabled .Values.ollama.gpu.enable }}
+              {{- $gpuLimit := dict "nvidia.com/gpu" (.Values.ollama.gpu.number | .Values.ollama.gpu.nbrGpu | default 1) }}
+              {{- $limits = merge $limits $gpuLimit }}
+            {{- end }}
+            {{- $ressources := merge .Values.resources (dict "limits" $limits) }}
+            {{- toYaml $ressources | nindent 12 }}
+          {{- end}}
           volumeMounts:
             - name: ollama-data
               mountPath: /root/.ollama
-            {{- with .Values.volumeMounts }}
-              {{- toYaml . | nindent 12 }}
-            {{- end }}
+          {{- with .Values.volumeMounts }}
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
           {{- if .Values.livenessProbe.enabled }}
           livenessProbe:
             httpGet:
@@ -83,15 +89,12 @@ spec:
           {{- end }}
       volumes:
         - name: ollama-data
-        {{- if .Values.persistentVolume.enabled }}
-          {{- if .Values.persistentVolume.existingClaim }}
+          {{- if .Values.persistentVolume.enabled }}
           persistentVolumeClaim:
-            claimName: {{ .Values.persistentVolume.existingClaim }}
+            claimName: {{ .Values.persistentVolume.existingClaim |  default (printf "%s" (include "ollama.fullname" .)) }}
           {{- else }}
-          persistentVolumeClaim:
-            claimName: {{ template "ollama.fullname" . }}
+          emptyDir: { }
           {{- end }}
-        {{- end }}
         {{- with .Values.volumes }}
           {{- toYaml . | nindent 8 }}
         {{- end }}
@@ -103,9 +106,9 @@ spec:
       affinity:
         {{- toYaml . | nindent 8 }}
       {{- end }}
-      {{- if or (.Values.ollama.gpu.enable) (.Values.tolerations) }}
+      {{- if or .Values.ollama.gpu.enabled .Values.tolerations }}
       tolerations:
-        {{- if .Values.ollama.gpu.enable }}
+        {{- if or .Values.ollama.gpu.enabled .Values.ollama.gpu.enable }}
         - key: nvidia.com/gpu
           operator: Exists
           effect: NoSchedule