diff --git a/charts/dial/Chart.yaml b/charts/dial/Chart.yaml index 6f1e1ba..3ec9476 100644 --- a/charts/dial/Chart.yaml +++ b/charts/dial/Chart.yaml @@ -2,7 +2,7 @@ annotations: category: MachineLearning licenses: Apache-2.0 apiVersion: v2 -appVersion: "1.14.1" +appVersion: "1.15.0" dependencies: - name: common repository: https://charts.bitnami.com/bitnami @@ -53,6 +53,11 @@ dependencies: alias: assistant condition: assistant.enabled version: 1.0.4 + - name: dial-extension + repository: https://charts.epam-rail.com + alias: dial + condition: dial.enabled + version: 1.0.4 description: Umbrella chart for DIAL solution home: https://epam-rail.com icon: "https://docs.epam-rail.com/img/favicon.ico" @@ -65,4 +70,4 @@ maintainers: name: dial sources: - https://github.com/epam/ai-dial-helm/tree/main/charts/dial -version: 3.0.0 +version: 3.1.0 diff --git a/charts/dial/README.md b/charts/dial/README.md index ef25392..72567ed 100644 --- a/charts/dial/README.md +++ b/charts/dial/README.md @@ -1,6 +1,6 @@ # dial -![Version: 3.0.0](https://img.shields.io/badge/Version-3.0.0-informational?style=flat-square) ![AppVersion: 1.14.1](https://img.shields.io/badge/AppVersion-1.14.1-informational?style=flat-square) +![Version: 3.1.0](https://img.shields.io/badge/Version-3.1.0-informational?style=flat-square) ![AppVersion: 1.15.0](https://img.shields.io/badge/AppVersion-1.15.0-informational?style=flat-square) Umbrella chart for DIAL solution @@ -26,6 +26,7 @@ Kubernetes: `>=1.23.0-0` | https://charts.epam-rail.com | bedrock(dial-extension) | 1.0.4 | | https://charts.epam-rail.com | vertexai(dial-extension) | 1.0.4 | | https://charts.epam-rail.com | assistant(dial-extension) | 1.0.4 | +| https://charts.epam-rail.com | dial(dial-extension) | 1.0.4 | ## Installing the Chart @@ -96,7 +97,7 @@ helm install my-release dial/dial -f values.yaml | bedrock.commonLabels."app.kubernetes.io/component" | string | `"adapter"` | | | bedrock.enabled | bool | `false` | Enable/disable ai-dial-adapter-bedrock | | bedrock.image.repository | string | `"epam/ai-dial-adapter-bedrock"` | | -| bedrock.image.tag | string | `"0.13.3"` | | +| bedrock.image.tag | string | `"0.14.0"` | | | bedrock.livenessProbe.enabled | bool | `true` | | | bedrock.readinessProbe.enabled | bool | `true` | | | bedrock.secrets | object | `{}` | | @@ -104,7 +105,7 @@ helm install my-release dial/dial -f values.yaml | chat.containerPorts.http | int | `3000` | | | chat.enabled | bool | `true` | Enable/disable ai-dial-chat | | chat.image.repository | string | `"epam/ai-dial-chat"` | | -| chat.image.tag | string | `"0.16.0"` | | +| chat.image.tag | string | `"0.17.0"` | | | chat.livenessProbe.enabled | bool | `true` | | | chat.livenessProbe.failureThreshold | int | `6` | | | chat.livenessProbe.httpGet.path | string | `"/api/health"` | | @@ -112,7 +113,13 @@ helm install my-release dial/dial -f values.yaml | chat.readinessProbe.failureThreshold | int | `6` | | | chat.readinessProbe.httpGet.path | string | `"/api/health"` | | | core.enabled | bool | `true` | Enable/disable ai-dial-core | -| core.image.tag | string | `"0.15.1"` | | +| core.image.tag | string | `"0.16.0"` | | +| dial.commonLabels."app.kubernetes.io/component" | string | `"adapter"` | | +| dial.enabled | bool | `false` | Enable/disable ai-dial-adapter-dial | +| dial.image.repository | string | `"epam/ai-dial-adapter-dial"` | | +| dial.image.tag | string | `"0.1.0"` | | +| dial.livenessProbe.enabled | bool | `true` | | +| dial.readinessProbe.enabled | bool | `true` | | | extraDeploy | list | `[]` | | | keycloak.enabled | bool | `false` | Enable/disable keycloak | | keycloak.extraEnvVars[0].name | string | `"KC_FEATURES"` | | @@ -125,7 +132,7 @@ helm install my-release dial/dial -f values.yaml | openai.commonLabels."app.kubernetes.io/component" | string | `"adapter"` | | | openai.enabled | bool | `false` | Enable/disable ai-dial-adapter-openai | | openai.image.repository | string | `"epam/ai-dial-adapter-openai"` | | -| openai.image.tag | string | `"0.13.1"` | | +| openai.image.tag | string | `"0.14.0"` | | | openai.livenessProbe.enabled | bool | `true` | | | openai.readinessProbe.enabled | bool | `true` | | | themes.commonLabels."app.kubernetes.io/component" | string | `"webserver"` | | @@ -133,14 +140,14 @@ helm install my-release dial/dial -f values.yaml | themes.containerSecurityContext.runAsUser | int | `101` | | | themes.enabled | bool | `true` | Enable/disable ai-dial-chat-themes | | themes.image.repository | string | `"epam/ai-dial-chat-themes"` | | -| themes.image.tag | string | `"0.5.0"` | | +| themes.image.tag | string | `"0.6.0"` | | | themes.livenessProbe.enabled | bool | `true` | | | themes.podSecurityContext.fsGroup | int | `101` | | | themes.readinessProbe.enabled | bool | `true` | | | vertexai.commonLabels."app.kubernetes.io/component" | string | `"adapter"` | | | vertexai.enabled | bool | `false` | Enable/disable ai-dial-adapter-vertexai | | vertexai.image.repository | string | `"epam/ai-dial-adapter-vertexai"` | | -| vertexai.image.tag | string | `"0.9.0"` | | +| vertexai.image.tag | string | `"0.10.0"` | | | vertexai.livenessProbe.enabled | bool | `true` | | | vertexai.readinessProbe.enabled | bool | `true` | | diff --git a/charts/dial/examples/gcp/complete/README.md b/charts/dial/examples/gcp/complete/README.md new file mode 100644 index 0000000..68d2620 --- /dev/null +++ b/charts/dial/examples/gcp/complete/README.md @@ -0,0 +1,158 @@ +# AI DIAL GCP Installation Complete Guide + +- [AI DIAL GCP Installation Complete Guide](#ai-dial-gcp-installation-complete-guide) + - [Prerequisites](#prerequisites) + - [Expected Outcome](#expected-outcome) + - [Install](#install) + - [Uninstall](#uninstall) + - [What's next?](#whats-next) + +## Prerequisites + +- GKE 1.24+ +- [kubectl](https://kubernetes.io/docs/tasks/tools/#kubectl) installed and configured +- [Helm](https://helm.sh/docs/intro/install/) `3.8.0+` installed +- [external-dns](https://github.com/kubernetes-sigs/external-dns) installed in the cluster (optional) +- [GCP IAM roles for service accounts](https://cloud.google.com/iam/docs/service-account-overview) installed and configured +- [Azure AD Workload Identity](https://azure.github.io/azure-workload-identity/docs/introduction.html) installed and configured +- [AWS IAM credentials](https://docs.aws.amazon.com/IAM/latest/UserGuide/getting-started-workloads.html) configured +- [GKE Workload Identity](https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity) installed and configured +- [GKE Ingress](https://cloud.google.com/kubernetes-engine/docs/concepts/ingress) installed +- [Static IP address](https://cloud.google.com/vpc/docs/reserve-static-external-ip-address) reserved for Chat and Core +- [DNS records](https://cloud.google.com/dns/docs/set-up-dns-records-domain-name) configured for Chat and Core +- [Google-managed SSL certificates](https://cloud.google.com/kubernetes-engine/docs/how-to/managed-certs) issued for Chat and Core +- [Google Storage bucket](https://cloud.google.com/storage/docs/buckets) +- [Google MemoryStore RedisCluster](https://cloud.google.com/memorystore/docs/cluster) + - [Downloading the Certificate Authority](https://cloud.google.com/memorystore/docs/redis/manage-in-transit-encryption#downloading_the_certificate_authority) + - [Creating TrustStore](https://docs.oracle.com/cd/E19509-01/820-3503/ggfka/index.html) +- [Google Identity](https://docs.epam-rail.com/Auth/Web/IDPs/google) as identity provider +- [Google Vertex AI](https://cloud.google.com/vertex-ai/?hl=en) `gemini-1.5-pro` model deployed: + - [GCP Model Deployment Guide](https://docs.epam-rail.com/Deployment/Vertex%20Model%20Deployment) +- [Azure OpenAI](https://learn.microsoft.com/en-us/azure/ai-services/openai/overview) + - [OpenAI Model Deployment Guide](https://docs.epam-rail.com/Deployment/OpenAI%20Model%20Deployment) +- [Amazon Bedrock](https://docs.aws.amazon.com/bedrock/latest/userguide/what-is-bedrock.html) `anthropic.claude-v1` model deployed: + - [Bedrock Model Deployment Guide](https://docs.epam-rail.com/Deployment/Bedrock%20Model%20Deployment) + +## Expected Outcome + +By following the instructions in this guide, you will successfully install the AI DIAL system with configured connection to the Vertex AI, OpenAI, Bedrock APIs.\ +Please note that this guide represents a very basic deployment scenario, and **should never be used in production**.\ +Configuring authentication provider, encrypted secrets, model usage limits, Ingress allowlisting and other security measures are **out of scope** of this guide. + +## Install + +1. Create Kubernetes namespace, e.g. `dial` + + **Command:** + + ```sh + kubectl create namespace dial + ``` + + **Output:** + + ```console + namespace/dial created + ``` + +1. Add Helm chart repository + + **Command:** + + ```sh + helm repo add dial https://charts.epam-rail.com + ``` + + **Output:** + + ```console + "dial" has been added to your repositories + ``` + +1. Copy [values.yaml](values.yaml) file to your working directory and fill in missing values: + - Replace `%%NAMESPACE%%` with namespace created above, e.g. `dial` + - Replace `%%DOMAIN%%` with your domain name, e.g. `example.com` + - Replace `%%DIAL_API_KEY%%` with generated value (`pwgen -s -1 64`) + - Replace `%%CORE_ENCRYPT_SECRET%%` with generated value (`pwgen -s -1 32`) + - Replace `%%CORE_ENCRYPT_KEY%%` with generated value (`pwgen -s -1 32`) + - Replace `%%NEXTAUTH_SECRET%%` with generated value (`openssl rand -base64 64`) + - Replace `%%TRUSTSTORE_PASSWORD%%` with Java truststore password, e.g. `changeit` + - Replace `%%GCP_CORE_SERVICE_ACCOUNT%%` with Google Service Account from [prerequisites](#prerequisites) + - Replace `%%GCP_CORE_STORAGE_BUCKET_NAME%%` with Google Storage bucket name from [prerequisites](#prerequisites) + - Replace `%%GCP_CHAT_IP_ADDRESS%%` with static IP address name for Chat from [prerequisites](#prerequisites) + - Replace `%%GCP_CHAT_CERTIFICATE%%` with Google-managed certificate name for Chat from [prerequisites](#prerequisites) + - Replace `%%GCP_CORE_IP_ADDRESS%%` with static IP address name for Core from [prerequisites](#prerequisites) + - Replace `%%GCP_CORE_CERTIFICATE%%` with Google-managed certificate name for Core from [prerequisites](#prerequisites) + - Replace `%%GCP_PROJECT_ID%%` with GCP Project Id e.g. `dial-191923` + - Replace `%%GCP_REGION%%` with GCP Region e.g. `us-east1` + - Replace `%%GCP_MEMORYSTORE_REDISCLUSTER_ENDPOINT%%` with MemoryStore RedisCluster endpoint, e.g. `[\"rediss://10.0.0.2:6379\"]` + - Replace `%%AUTH_GOOGLE_CLIENT_ID%%` with Cloud Identity client ID from [prerequisites](#prerequisites) + - Replace `%%AUTH_GOOGLE_SECRET%%` with Cloud Identity client secret from [prerequisites](#prerequisites) + - Replace `%%GCP_VERTEXAI_SERVICE_ACCOUNT%%` with Google Service Account from [prerequisites](#prerequisites) + - Replace `%%AWS_ACCESS_KEY%%` with AWS access key from [prerequisites](#prerequisites) + - Replace `%%AWS_SECRET_KEY%%` with AWS secret key from [prerequisites](#prerequisites) + - Replace `%%AZURE_WORKLOAD_IDENTITY_CLIENT_ID%%` with appropriate workload identity [link](https://docs.epam-rail.com/Deployment/OpenAI%20Model%20Deployment#use-kubernetes-service-account-assigned-to-azure-user-assigned-managed-identity) + +1. Install `dial` helm chart in created namespace, applying custom values file: + + **Command:** + + ```sh + helm install dial dial/dial -f values.yaml --namespace dial + ``` + + **Output:** + + ```console + Release "dial" does not exist. Installing it now. + NAME: dial + LAST DEPLOYED: Thu Nov 30 16:35:54 2023 + NAMESPACE: dial + STATUS: deployed + REVISION: 1 + TEST SUITE: None + NOTES: + CHART NAME: dial + CHART VERSION: 1.0.0 + APP VERSION: 1.0 + ** Please be patient while the chart is being deployed ** + ``` + +1. Now you can access: + - Chat by the following URL: `https://chat.%%DOMAIN%%/`, e.g. `https://chat.example.com/` + - API by the following URL: `https://dial.%%DOMAIN%%/`, e.g. `https://dial.example.com/` + - Use previously generated `%%DIAL_API_KEY%%` value + +## Uninstall + +1. Uninstall `dial` helm chart from created namespace + + **Command:** + + ```sh + helm uninstall dial --namespace dial + ``` + + **Output:** + + ```console + release "dial" uninstalled + ``` + +1. Delete Kubernetes namespace, e.g. `dial` + + **Command:** + + ```sh + kubectl delete namespace dial + ``` + + **Output:** + + ```console + namespace "dial" deleted + ``` + +## What's next? + +- [Configuration](https://docs.epam-rail.com/Deployment/configuration) diff --git a/charts/dial/examples/gcp/complete/values.yaml b/charts/dial/examples/gcp/complete/values.yaml new file mode 100644 index 0000000..75e8c7a --- /dev/null +++ b/charts/dial/examples/gcp/complete/values.yaml @@ -0,0 +1,176 @@ +core: + enabled: true + serviceAccount: + create: true + annotations: + iam.gke.io/gcp-service-account: "%%GCP_CORE_SERVICE_ACCOUNT%%" + podAnnotations: + # -- Annotation hack to restart core pod after each Helm chart upgrade + autorestart: '{{ dateInZone "2006-01-02 15:04:05Z" (now) "UTC" }}' + configuration: + encryption: + secret: "%%CORE_ENCRYPT_SECRET%%" + key: "%%CORE_ENCRYPT_KEY%%" + env: + aidial.config.files: '["/mnt/secrets-store/aidial.config.json"]' + aidial.storage.provider: "google-cloud-storage" + aidial.storage.bucket: "%%GCP_CORE_STORAGE_BUCKET_NAME%%" + aidial.storage.createBucket: "false" + aidial.storage.prefix: "core" + aidial.storage.overrides: '{"jclouds.oauth.credential-type": "bearerTokenCredentials"}' + aidial.identityProviders.google.userInfoEndpoint: "https://openidconnect.googleapis.com/v1/userinfo" + aidial.identityProviders.google.rolePath: "fn:getGoogleWorkspaceGroups" + aidial.identityProviders.google.loggingKey: "sub" + aidial.identityProviders.google.loggingSalt: "loggingSalt" + aidial.redis.clusterServersConfig.nodeAddresses: "%%GCP_MEMORYSTORE_REDISCLUSTER_ENDPOINT%%" + aidial.redis.clusterServersConfig.sslTruststore: "file:///mnt/secrets-store/redis-truststore.jks" + aidial.redis.clusterServersConfig.sslTruststorePassword: "%%TRUSTSTORE_PASSWORD%%" + secrets: + aidial.config.json: | + { + "models": { + "gpt-4": { + "type": "chat", + "displayName": "GPT-4", + "iconUrl": "/gpt4.svg", + "endpoint": "http://dial-openai.%%NAMESPACE%%.svc.cluster.local.:80/openai/deployments/gpt-4-0613/chat/completions", + "upstreams": [ + { + "endpoint": "http://%%AZURE_DEPLOYMENT_HOST%%/openai/deployments/gpt-4/chat/completions" + } + ] + }, + "gemini-1.5-pro": { + "type": "chat", + "displayName": "Gemini 1.5 Pro", + "iconUrl": "/Gemini.svg", + "endpoint": "http://dial-vertexai.%%NAMESPACE%%.svc.cluster.local/openai/deployments/gemini-pro/chat/completions" + }, + "anthropic.claude-v2:1": { + "type": "chat", + "displayName": "Anthropic (Claude)", + "iconUrl": "/anthropic.svg", + "endpoint": "http://dial-bedrock.%%NAMESPACE%%.svc.cluster.local/openai/deployments/anthropic.claude-v1/chat/completions" + } + }, + "roles": { + "chat": { + "limits": { + "gpt-4": {}, + "gemini-1.5-pro": {}, + "anthropic.claude-v2:1": {} + } + } + } + } + extraVolumes: + - name: config + secret: + secretName: '{{ template "dialCore.names.fullname" . }}' + items: + - key: aidial.config.json + path: aidial.config.json + - name: redis-cacert + secret: + secretName: redis-cacert + items: + - key: redis-truststore.jks + path: redis-truststore.jks + extraVolumeMounts: + - name: config + mountPath: "/mnt/secrets-store/aidial.config.json" + subPath: aidial.config.json + readOnly: true + - name: redis-cacert + mountPath: "/mnt/secrets-store/redis-truststore.jks" + subPath: redis-truststore.jks + readOnly: true + redis: + enabled: false + ingress: + enabled: true + ingressClassName: gce + annotations: + kubernetes.io/ingress.class: "gce" + kubernetes.io/ingress.global-static-ip-name: "%%GCP_CORE_IP_ADDRESS%%" + networking.gke.io/managed-certificates: "%%GCP_CORE_CERTIFICATE%%" + hosts: + - dial.%%DOMAIN%% + +chat: + enabled: true + env: + # -- Canonical URL of your site + # ref: https://next-auth.js.org/configuration/options#nextauth_url + NEXTAUTH_URL: "https://chat.%%DOMAIN%%" + # -- DIAL core API endpoint + # Internal service name (DNS name) of DIAL core service + DIAL_API_HOST: "http://dial-core.%%NAMESPACE%%.svc.cluster.local" + # -- List of DIAL chat features to enable; + # ref: https://github.com/epam/ai-dial-chat/blob/development/libs/shared/src/types/features.ts + ENABLED_FEATURES: "conversations-section,prompts-section,top-settings,top-clear-conversation,top-chat-info,top-chat-model-settings,empty-chat-settings,header,footer,likes,conversations-sharing,prompts-sharing,input-files,attachments-manager,conversations-publishing,prompts-publishing" + # -- External URL of DIAL themes; + # Same allowlist as for DIAL chat should be applied + THEMES_CONFIG_HOST: "http://dial-themes.%%NAMESPACE%%.svc.cluster.local" + DEFAULT_MODEL: "gemini-1.5-pro" + AUTH_GOOGLE_CLIENT_ID: "%%AUTH_GOOGLE_CLIENT_ID%%" + AUTH_GOOGLE_SECRET: "%%AUTH_GOOGLE_SECRET%%" + AUTH_GOOGLE_SCOPE: "openid email profile https://www.googleapis.com/auth/cloud-identity.groups.readonly" + secrets: + NEXTAUTH_SECRET: "%%NEXTAUTH_SECRET%%" + # -- API key defined in core configuration + DIAL_API_KEY: "%%DIAL_API_KEY%%" + ingress: + enabled: true + ingressClassName: gce + annotations: + kubernetes.io/ingress.class: "gce" + kubernetes.io/ingress.global-static-ip-name: "%%GCP_CHAT_IP_ADDRESS%%" + networking.gke.io/managed-certificates: "%%GCP_CHAT_CERTIFICATE%%" + hosts: + - chat.%%DOMAIN%% + +themes: + enabled: true + ingress: + enabled: false + +vertexai: + enabled: true + + serviceAccount: + create: true + name: dial-vertexai + annotations: + iam.gke.io/gcp-service-account: "%%GCP_VERTEXAI_SERVICE_ACCOUNT%%" + + env: + DIAL_URL: "http://dial-core.%%NAMESPACE%%.svc.cluster.local" + GCP_PROJECT_ID: "%%GCP_PROJECT_ID%%" + DEFAULT_REGION: "%%GCP_REGION%%" + +bedrock: + enabled: true + + env: + DIAL_URL: "http://core.%%NAMESPACE%%.svc.cluster.local" + AWS_ACCESS_KEY: "%%AWS_ACCESS_KEY%%" + AWS_SECRET_KEY: "%%AWS_SECRET_KEY%%" + + serviceAccount: + create: true + +openai: + enabled: true + + env: + DIAL_URL: "http://core.%%NAMESPACE%%.svc.cluster.local" + + podLabels: + azure.workload.identity/use: "true" + + serviceAccount: + enabled: true + name: dial-openai + annotations: + azure.workload.identity/client-id: "%%AZURE_WORKLOAD_IDENTITY_CLIENT_ID%%" diff --git a/charts/dial/values.yaml b/charts/dial/values.yaml index a417bba..4d2e2fd 100644 --- a/charts/dial/values.yaml +++ b/charts/dial/values.yaml @@ -53,7 +53,7 @@ core: # -- Enable/disable ai-dial-core enabled: true image: - tag: 0.15.1 + tag: 0.16.0 ### ai-dial-chat configuration ### chat: @@ -63,7 +63,7 @@ chat: app.kubernetes.io/component: "application" image: repository: epam/ai-dial-chat - tag: 0.16.0 + tag: 0.17.0 containerPorts: http: 3000 livenessProbe: @@ -85,7 +85,7 @@ themes: app.kubernetes.io/component: "webserver" image: repository: epam/ai-dial-chat-themes - tag: 0.5.0 + tag: 0.6.0 containerPorts: http: 8080 podSecurityContext: @@ -105,7 +105,7 @@ openai: app.kubernetes.io/component: "adapter" image: repository: epam/ai-dial-adapter-openai - tag: 0.13.1 + tag: 0.14.0 # env: # DIAL_USE_FILE_STORAGE: "true" # DIAL_URL: "http://{{ .Release.Name }}-core" @@ -122,7 +122,7 @@ bedrock: app.kubernetes.io/component: "adapter" image: repository: epam/ai-dial-adapter-bedrock - tag: 0.13.3 + tag: 0.14.0 # env: # DIAL_URL: "http://{{ .Release.Name }}-core" secrets: @@ -143,7 +143,23 @@ vertexai: app.kubernetes.io/component: "adapter" image: repository: epam/ai-dial-adapter-vertexai - tag: 0.9.0 + tag: 0.10.0 + # env: + # DIAL_URL: "http://{{ .Release.Name }}-core" + livenessProbe: + enabled: true + readinessProbe: + enabled: true + +### ai-dial-adapter-dial configuration ### +dial: + # -- Enable/disable ai-dial-adapter-dial + enabled: false + commonLabels: + app.kubernetes.io/component: "adapter" + image: + repository: epam/ai-dial-adapter-dial + tag: 0.1.0 # env: # DIAL_URL: "http://{{ .Release.Name }}-core" livenessProbe: