feat: [generativelanguage] Add content caching (#5451)

* feat: Add content caching feat: Add cached_content_token_count to generative_service's UsageMetadata feat: Add cached_content_token_count to CountTokensResponse docs: Small fixes PiperOrigin-RevId: 642001790 Source-Link: googleapis/googleapis@804ed3e Source-Link: googleapis/googleapis-gen@cefe39f Copy-Tag: eyJwIjoicGFja2FnZXMvZ29vZ2xlLWFpLWdlbmVyYXRpdmVsYW5ndWFnZS8uT3dsQm90LnlhbWwiLCJoIjoiY2VmZTM5ZjVlOWE0NWFkODI2MzlhMDhlMDIzMTQ4ODQwNmUxYjZiOSJ9 * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * chore: update test to not render null in string --------- Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com> Co-authored-by: Daniel Bankhead <[email protected]> Co-authored-by: sofisl <[email protected]> Co-authored-by: Sofia Leon <[email protected]>
googleapis · Jun 26, 2024 · 75c026f · 75c026f
1 parent 3d6134e
commit 75c026f
Show file tree

Hide file tree

Showing 49 changed files with 9,162 additions and 1,017 deletions.
diff --git a/packages/google-ai-generativelanguage/README.md b/packages/google-ai-generativelanguage/README.md
@@ -160,6 +160,11 @@ Samples are in the [`samples/`](https://github.com/googleapis/google-cloud-node/
 | Generative_service.stream_generate_content | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-ai-generativelanguage/samples/generated/v1/generative_service.stream_generate_content.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-ai-generativelanguage/samples/generated/v1/generative_service.stream_generate_content.js,packages/google-ai-generativelanguage/samples/README.md) |
 | Model_service.get_model | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-ai-generativelanguage/samples/generated/v1/model_service.get_model.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-ai-generativelanguage/samples/generated/v1/model_service.get_model.js,packages/google-ai-generativelanguage/samples/README.md) |
 | Model_service.list_models | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-ai-generativelanguage/samples/generated/v1/model_service.list_models.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-ai-generativelanguage/samples/generated/v1/model_service.list_models.js,packages/google-ai-generativelanguage/samples/README.md) |
+| Cache_service.create_cached_content | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-ai-generativelanguage/samples/generated/v1beta/cache_service.create_cached_content.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-ai-generativelanguage/samples/generated/v1beta/cache_service.create_cached_content.js,packages/google-ai-generativelanguage/samples/README.md) |
+| Cache_service.delete_cached_content | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-ai-generativelanguage/samples/generated/v1beta/cache_service.delete_cached_content.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-ai-generativelanguage/samples/generated/v1beta/cache_service.delete_cached_content.js,packages/google-ai-generativelanguage/samples/README.md) |
+| Cache_service.get_cached_content | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-ai-generativelanguage/samples/generated/v1beta/cache_service.get_cached_content.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-ai-generativelanguage/samples/generated/v1beta/cache_service.get_cached_content.js,packages/google-ai-generativelanguage/samples/README.md) |
+| Cache_service.list_cached_contents | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-ai-generativelanguage/samples/generated/v1beta/cache_service.list_cached_contents.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-ai-generativelanguage/samples/generated/v1beta/cache_service.list_cached_contents.js,packages/google-ai-generativelanguage/samples/README.md) |
+| Cache_service.update_cached_content | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-ai-generativelanguage/samples/generated/v1beta/cache_service.update_cached_content.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-ai-generativelanguage/samples/generated/v1beta/cache_service.update_cached_content.js,packages/google-ai-generativelanguage/samples/README.md) |
 | Discuss_service.count_message_tokens | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-ai-generativelanguage/samples/generated/v1beta/discuss_service.count_message_tokens.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-ai-generativelanguage/samples/generated/v1beta/discuss_service.count_message_tokens.js,packages/google-ai-generativelanguage/samples/README.md) |
 | Discuss_service.generate_message | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-ai-generativelanguage/samples/generated/v1beta/discuss_service.generate_message.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-ai-generativelanguage/samples/generated/v1beta/discuss_service.generate_message.js,packages/google-ai-generativelanguage/samples/README.md) |
 | File_service.create_file | [source code](https://github.com/googleapis/google-cloud-node/blob/main/packages/google-ai-generativelanguage/samples/generated/v1beta/file_service.create_file.js) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/google-cloud-node&page=editor&open_in_editor=packages/google-ai-generativelanguage/samples/generated/v1beta/file_service.create_file.js,packages/google-ai-generativelanguage/samples/README.md) |

diff --git a/...ogle-ai-generativelanguage/protos/google/ai/generativelanguage/v1beta/cache_service.proto b/...ogle-ai-generativelanguage/protos/google/ai/generativelanguage/v1beta/cache_service.proto
@@ -0,0 +1,147 @@
+// Copyright 2024 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package google.ai.generativelanguage.v1beta;
+
+import "google/ai/generativelanguage/v1beta/cached_content.proto";
+import "google/api/annotations.proto";
+import "google/api/client.proto";
+import "google/api/field_behavior.proto";
+import "google/api/resource.proto";
+import "google/protobuf/empty.proto";
+import "google/protobuf/field_mask.proto";
+
+option go_package = "cloud.google.com/go/ai/generativelanguage/apiv1beta/generativelanguagepb;generativelanguagepb";
+option java_multiple_files = true;
+option java_outer_classname = "CacheServiceProto";
+option java_package = "com.google.ai.generativelanguage.v1beta";
+
+// API for managing cache of content (CachedContent resources) that can be used
+// in GenerativeService requests. This way generate content requests can benefit
+// from preprocessing work being done earlier, possibly lowering their
+// computational cost. It is intended to be used with large contexts.
+service CacheService {
+  option (google.api.default_host) = "generativelanguage.googleapis.com";
+
+  // Lists CachedContents.
+  rpc ListCachedContents(ListCachedContentsRequest)
+      returns (ListCachedContentsResponse) {
+    option (google.api.http) = {
+      get: "/v1beta/cachedContents"
+    };
+    option (google.api.method_signature) = "";
+  }
+
+  // Creates CachedContent resource.
+  rpc CreateCachedContent(CreateCachedContentRequest) returns (CachedContent) {
+    option (google.api.http) = {
+      post: "/v1beta/cachedContents"
+      body: "cached_content"
+    };
+    option (google.api.method_signature) = "cached_content";
+  }
+
+  // Reads CachedContent resource.
+  rpc GetCachedContent(GetCachedContentRequest) returns (CachedContent) {
+    option (google.api.http) = {
+      get: "/v1beta/{name=cachedContents/*}"
+    };
+    option (google.api.method_signature) = "name";
+  }
+
+  // Updates CachedContent resource (only expiration is updatable).
+  rpc UpdateCachedContent(UpdateCachedContentRequest) returns (CachedContent) {
+    option (google.api.http) = {
+      patch: "/v1beta/{cached_content.name=cachedContents/*}"
+      body: "cached_content"
+    };
+    option (google.api.method_signature) = "cached_content,update_mask";
+  }
+
+  // Deletes CachedContent resource.
+  rpc DeleteCachedContent(DeleteCachedContentRequest)
+      returns (google.protobuf.Empty) {
+    option (google.api.http) = {
+      delete: "/v1beta/{name=cachedContents/*}"
+    };
+    option (google.api.method_signature) = "name";
+  }
+}
+
+// Request to list CachedContents.
+message ListCachedContentsRequest {
+  // Optional. The maximum number of cached contents to return. The service may
+  // return fewer than this value. If unspecified, some default (under maximum)
+  // number of items will be returned. The maximum value is 1000; values above
+  // 1000 will be coerced to 1000.
+  int32 page_size = 1 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. A page token, received from a previous `ListCachedContents` call.
+  // Provide this to retrieve the subsequent page.
+  //
+  // When paginating, all other parameters provided to `ListCachedContents` must
+  // match the call that provided the page token.
+  string page_token = 2 [(google.api.field_behavior) = OPTIONAL];
+}
+
+// Response with CachedContents list.
+message ListCachedContentsResponse {
+  // List of cached contents.
+  repeated CachedContent cached_contents = 1;
+
+  // A token, which can be sent as `page_token` to retrieve the next page.
+  // If this field is omitted, there are no subsequent pages.
+  string next_page_token = 2;
+}
+
+// Request to create CachedContent.
+message CreateCachedContentRequest {
+  // Required. The cached content to create.
+  CachedContent cached_content = 1 [(google.api.field_behavior) = REQUIRED];
+}
+
+// Request to read CachedContent.
+message GetCachedContentRequest {
+  // Required. The resource name referring to the content cache entry.
+  // Format: `cachedContents/{id}`
+  string name = 1 [
+    (google.api.field_behavior) = REQUIRED,
+    (google.api.resource_reference) = {
+      type: "generativelanguage.googleapis.com/CachedContent"
+    }
+  ];
+}
+
+// Request to update CachedContent.
+message UpdateCachedContentRequest {
+  // Required. The content cache entry to update
+  CachedContent cached_content = 1 [(google.api.field_behavior) = REQUIRED];
+
+  // The list of fields to update.
+  google.protobuf.FieldMask update_mask = 2;
+}
+
+// Request to delete CachedContent.
+message DeleteCachedContentRequest {
+  // Required. The resource name referring to the content cache entry
+  // Format: `cachedContents/{id}`
+  string name = 1 [
+    (google.api.field_behavior) = REQUIRED,
+    (google.api.resource_reference) = {
+      type: "generativelanguage.googleapis.com/CachedContent"
+    }
+  ];
+}
diff --git a/...gle-ai-generativelanguage/protos/google/ai/generativelanguage/v1beta/cached_content.proto b/...gle-ai-generativelanguage/protos/google/ai/generativelanguage/v1beta/cached_content.proto
@@ -0,0 +1,125 @@
+// Copyright 2024 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package google.ai.generativelanguage.v1beta;
+
+import "google/ai/generativelanguage/v1beta/content.proto";
+import "google/api/field_behavior.proto";
+import "google/api/resource.proto";
+import "google/protobuf/duration.proto";
+import "google/protobuf/timestamp.proto";
+
+option go_package = "cloud.google.com/go/ai/generativelanguage/apiv1beta/generativelanguagepb;generativelanguagepb";
+option java_multiple_files = true;
+option java_outer_classname = "CachedContentProto";
+option java_package = "com.google.ai.generativelanguage.v1beta";
+
+// Content that has been preprocessed and can be used in subsequent request
+// to GenerativeService.
+//
+// Cached content can be only used with model it was created for.
+message CachedContent {
+  option (google.api.resource) = {
+    type: "generativelanguage.googleapis.com/CachedContent"
+    pattern: "cachedContents/{id}"
+    plural: "cachedContents"
+    singular: "cachedContent"
+  };
+
+  // Metadata on the usage of the cached content.
+  message UsageMetadata {
+    // Total number of tokens that the cached content consumes.
+    int32 total_token_count = 1;
+  }
+
+  // Specifies when this resource will expire.
+  oneof expiration {
+    // Timestamp in UTC of when this resource is considered expired.
+    // This is *always* provided on output, regardless of what was sent
+    // on input.
+    google.protobuf.Timestamp expire_time = 9;
+
+    // Input only. New TTL for this resource, input only.
+    google.protobuf.Duration ttl = 10
+        [(google.api.field_behavior) = INPUT_ONLY];
+  }
+
+  // Optional. Identifier. The resource name referring to the cached content.
+  // Format: `cachedContents/{id}`
+  optional string name = 1 [
+    (google.api.field_behavior) = IDENTIFIER,
+    (google.api.field_behavior) = OPTIONAL
+  ];
+
+  // Optional. Immutable. The user-generated meaningful display name of the
+  // cached content. Maximum 128 Unicode characters.
+  optional string display_name = 11 [
+    (google.api.field_behavior) = OPTIONAL,
+    (google.api.field_behavior) = IMMUTABLE
+  ];
+
+  // Required. Immutable. The name of the `Model` to use for cached content
+  // Format: `models/{model}`
+  optional string model = 2 [
+    (google.api.field_behavior) = IMMUTABLE,
+    (google.api.field_behavior) = REQUIRED,
+    (google.api.resource_reference) = {
+      type: "generativelanguage.googleapis.com/Model"
+    }
+  ];
+
+  // Optional. Input only. Immutable. Developer set system instruction.
+  // Currently text only.
+  optional Content system_instruction = 3 [
+    (google.api.field_behavior) = OPTIONAL,
+    (google.api.field_behavior) = IMMUTABLE,
+    (google.api.field_behavior) = INPUT_ONLY
+  ];
+
+  // Optional. Input only. Immutable. The content to cache.
+  repeated Content contents = 4 [
+    (google.api.field_behavior) = OPTIONAL,
+    (google.api.field_behavior) = IMMUTABLE,
+    (google.api.field_behavior) = INPUT_ONLY
+  ];
+
+  // Optional. Input only. Immutable. A list of `Tools` the model may use to
+  // generate the next response
+  repeated Tool tools = 5 [
+    (google.api.field_behavior) = OPTIONAL,
+    (google.api.field_behavior) = IMMUTABLE,
+    (google.api.field_behavior) = INPUT_ONLY
+  ];
+
+  // Optional. Input only. Immutable. Tool config. This config is shared for all
+  // tools.
+  optional ToolConfig tool_config = 6 [
+    (google.api.field_behavior) = OPTIONAL,
+    (google.api.field_behavior) = IMMUTABLE,
+    (google.api.field_behavior) = INPUT_ONLY
+  ];
+
+  // Output only. Creation time of the cache entry.
+  google.protobuf.Timestamp create_time = 7
+      [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. When the cache entry was last updated in UTC time.
+  google.protobuf.Timestamp update_time = 8
+      [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. Metadata on the usage of the cached content.
+  UsageMetadata usage_metadata = 12 [(google.api.field_behavior) = OUTPUT_ONLY];
+}
diff --git a/...ges/google-ai-generativelanguage/protos/google/ai/generativelanguage/v1beta/content.proto b/...ges/google-ai-generativelanguage/protos/google/ai/generativelanguage/v1beta/content.proto
@@ -162,7 +162,7 @@ message FunctionCallingConfig {
     MODE_UNSPECIFIED = 0;
 
     // Default model behavior, model decides to predict either a function call
-    // or a natural language repspose.
+    // or a natural language response.
     AUTO = 1;
 
     // Model is constrained to always predicting a function call only.

diff --git a/...ai-generativelanguage/protos/google/ai/generativelanguage/v1beta/generative_service.proto b/...ai-generativelanguage/protos/google/ai/generativelanguage/v1beta/generative_service.proto
@@ -188,6 +188,17 @@ message GenerateContentRequest {
   // Optional. Configuration options for model generation and outputs.
   optional GenerationConfig generation_config = 4
       [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. The name of the cached content used as context to serve the
+  // prediction. Note: only used in explicit caching, where users can have
+  // control over caching (e.g. what content to cache) and enjoy guaranteed cost
+  // savings. Format: `cachedContents/{cachedContent}`
+  optional string cached_content = 9 [
+    (google.api.field_behavior) = OPTIONAL,
+    (google.api.resource_reference) = {
+      type: "generativelanguage.googleapis.com/CachedContent"
+    }
+  ];
 }
 
 // Configuration options for model generation and outputs. Not all parameters
@@ -324,9 +335,15 @@ message GenerateContentResponse {
 
   // Metadata on the generation request's token usage.
   message UsageMetadata {
-    // Number of tokens in the prompt.
+    // Number of tokens in the prompt. When cached_content is set, this is still
+    // the total effective prompt size. I.e. this includes the number of tokens
+    // in the cached content.
     int32 prompt_token_count = 1;
 
+    // Number of tokens in the cached part of the prompt, i.e. in the cached
+    // content.
+    int32 cached_content_token_count = 4;
+
     // Total number of tokens across the generated candidates.
     int32 candidates_token_count = 2;
 
@@ -704,6 +721,12 @@ message CountTokensRequest {
 message CountTokensResponse {
   // The number of tokens that the `model` tokenizes the `prompt` into.
   //
-  // Always non-negative.
+  // Always non-negative. When cached_content is set, this is still the total
+  // effective prompt size. I.e. this includes the number of tokens in the
+  // cached content.
   int32 total_tokens = 1;
+
+  // Number of tokens in the cached part of the prompt, i.e. in the cached
+  // content.
+  int32 cached_content_token_count = 5;
 }
diff --git a/packages/google-ai-generativelanguage/protos/google/ai/generativelanguage/v1beta/model.proto b/packages/google-ai-generativelanguage/protos/google/ai/generativelanguage/v1beta/model.proto
@@ -77,7 +77,7 @@ message Model {
 
   // Controls the randomness of the output.
   //
-  // Values can range over `[0.0,1.0]`, inclusive. A value closer to `1.0` will
+  // Values can range over `[0.0,2.0]`, inclusive. A higher value will
   // produce responses that are more varied, while a value closer to `0.0` will
   // typically result in less surprising responses from the model.
   // This value specifies default to be used by the backend while making the