build: use codecov's action, now that it's authless (#584)

googleapis · Apr 20, 2020 · 2369c8b · 2369c8b
1 parent f9458e7
commit 2369c8b
Show file tree

Hide file tree

Showing 10 changed files with 3,836 additions and 620 deletions.
diff --git a/packages/google-cloud-speech/protos/google/cloud/speech/v1p1beta1/cloud_speech.proto b/packages/google-cloud-speech/protos/google/cloud/speech/v1p1beta1/cloud_speech.proto
@@ -1,4 +1,4 @@
-// Copyright 2019 Google LLC.
+// Copyright 2020 Google LLC
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -11,7 +11,6 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-//
 
 syntax = "proto3";
 
@@ -20,6 +19,8 @@ package google.cloud.speech.v1p1beta1;
 import "google/api/annotations.proto";
 import "google/api/client.proto";
 import "google/api/field_behavior.proto";
+import "google/api/resource.proto";
+import "google/cloud/speech/v1p1beta1/resource.proto";
 import "google/longrunning/operations.proto";
 import "google/protobuf/any.proto";
 import "google/protobuf/duration.proto";
@@ -36,7 +37,8 @@ option objc_class_prefix = "GCS";
 // Service that implements Google Cloud Speech API.
 service Speech {
   option (google.api.default_host) = "speech.googleapis.com";
-  option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
+  option (google.api.oauth_scopes) =
+      "https://www.googleapis.com/auth/cloud-platform";
 
   // Performs synchronous speech recognition: receive results after all audio
   // has been sent and processed.
@@ -54,7 +56,8 @@ service Speech {
   // a `LongRunningRecognizeResponse` message.
   // For more information on asynchronous speech recognition, see the
   // [how-to](https://cloud.google.com/speech-to-text/docs/async-recognize).
-  rpc LongRunningRecognize(LongRunningRecognizeRequest) returns (google.longrunning.Operation) {
+  rpc LongRunningRecognize(LongRunningRecognizeRequest)
+      returns (google.longrunning.Operation) {
     option (google.api.http) = {
       post: "/v1p1beta1/speech:longrunningrecognize"
       body: "*"
@@ -68,8 +71,8 @@ service Speech {
 
   // Performs bidirectional streaming speech recognition: receive results while
   // sending audio. This method is only available via the gRPC API (not REST).
-  rpc StreamingRecognize(stream StreamingRecognizeRequest) returns (stream StreamingRecognizeResponse) {
-  }
+  rpc StreamingRecognize(stream StreamingRecognizeRequest)
+      returns (stream StreamingRecognizeResponse) {}
 }
 
 // The top-level message sent by the client for the `Recognize` method.
@@ -169,7 +172,8 @@ message RecognitionConfig {
   // an `AudioEncoding` when you send  send `FLAC` or `WAV` audio, the
   // encoding configuration must match the encoding described in the audio
   // header; otherwise the request returns an
-  // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error code.
+  // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error
+  // code.
   enum AudioEncoding {
     // Not specified.
     ENCODING_UNSPECIFIED = 0;
@@ -215,14 +219,15 @@ message RecognitionConfig {
     SPEEX_WITH_HEADER_BYTE = 7;
 
     // MP3 audio. Support all standard MP3 bitrates (which range from 32-320
-    // kbps). When using this encoding, `sample_rate_hertz` can be optionally
-    // unset if not known.
+    // kbps). When using this encoding, `sample_rate_hertz` has to match the
+    // sample rate of the file being used.
     MP3 = 8;
   }
 
   // Encoding of audio data sent in all `RecognitionAudio` messages.
   // This field is optional for `FLAC` and `WAV` audio files and required
-  // for all other audio formats. For details, see [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
+  // for all other audio formats. For details, see
+  // [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
   AudioEncoding encoding = 1;
 
   // Sample rate in Hertz of the audio data sent in all
@@ -231,7 +236,8 @@ message RecognitionConfig {
   // source to 16000 Hz. If that's not possible, use the native sample rate of
   // the audio source (instead of re-sampling).
   // This field is optional for FLAC and WAV audio files, but is
-  // required for all other audio formats. For details, see [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
+  // required for all other audio formats. For details, see
+  // [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
   int32 sample_rate_hertz = 2;
 
   // The number of channels in the input audio data.
@@ -289,6 +295,13 @@ message RecognitionConfig {
   // won't be filtered out.
   bool profanity_filter = 5;
 
+  // Speech adaptation configuration improves the accuracy of speech
+  // recognition. When speech adaptation is set it supersedes the
+  // `speech_contexts` field. For more information, see the [speech
+  // adaptation](https://cloud.google.com/speech-to-text/docs/context-strength)
+  // documentation.
+  SpeechAdaptation adaptation = 20;
+
   // Array of [SpeechContext][google.cloud.speech.v1p1beta1.SpeechContext].
   // A means to provide context to assist the speech recognition. For more
   // information, see
@@ -311,9 +324,6 @@ message RecognitionConfig {
   // This feature is only available in select languages. Setting this for
   // requests in other languages has no effect at all.
   // The default 'false' value does not add punctuation to result hypotheses.
-  // Note: This is currently offered as an experimental service, complimentary
-  // to all users. In the future this may be exclusively available as a
-  // premium feature.
   bool enable_automatic_punctuation = 11;
 
   // If 'true', enables speaker detection for each recognized word in
@@ -401,6 +411,10 @@ message SpeakerDiarizationConfig {
   // flexibility by allowing the system to automatically determine the correct
   // number of speakers. If not set, the default value is 6.
   int32 max_speaker_count = 3;
+
+  // Output only. Unused.
+  int32 speaker_tag = 5
+      [deprecated = true, (google.api.field_behavior) = OUTPUT_ONLY];
 }
 
 // Description of audio data to be recognized.
@@ -564,8 +578,8 @@ message SpeechContext {
 
 // Contains audio data in the encoding specified in the `RecognitionConfig`.
 // Either `content` or `uri` must be supplied. Supplying both or neither
-// returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. See
-// [content limits](https://cloud.google.com/speech-to-text/quotas#content).
+// returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
+// See [content limits](https://cloud.google.com/speech-to-text/quotas#content).
 message RecognitionAudio {
   // The audio source, which is either inline content or a Google Cloud
   // Storage uri.
@@ -580,8 +594,9 @@ message RecognitionAudio {
     // Currently, only Google Cloud Storage URIs are
     // supported, which must be specified in the following format:
     // `gs://bucket_name/object_name` (other URI formats return
-    // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more information, see
-    // [Request URIs](https://cloud.google.com/storage/docs/reference-uris).
+    // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]).
+    // For more information, see [Request
+    // URIs](https://cloud.google.com/storage/docs/reference-uris).
     string uri = 2;
   }
 }
@@ -619,6 +634,10 @@ message LongRunningRecognizeMetadata {
 
   // Time of the most recent processing update.
   google.protobuf.Timestamp last_update_time = 3;
+
+  // The URI of the audio file being transcribed. Empty if the audio was sent
+  // as byte content.
+  string uri = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
 }
 
 // `StreamingRecognizeResponse` is the only message returned to the client by
@@ -732,10 +751,10 @@ message StreamingRecognitionResult {
   // For audio_channel_count = N, its output values can range from '1' to 'N'.
   int32 channel_tag = 5;
 
-  // The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag
-  // of the language in this result. This language code was detected to have
-  // the most likelihood of being spoken in the audio.
-  string language_code = 6;
+  // Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
+  // language tag of the language in this result. This language code was
+  // detected to have the most likelihood of being spoken in the audio.
+  string language_code = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
 }
 
 // A speech recognition result corresponding to a portion of the audio.
@@ -751,10 +770,10 @@ message SpeechRecognitionResult {
   // For audio_channel_count = N, its output values can range from '1' to 'N'.
   int32 channel_tag = 2;
 
-  // The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag
-  // of the language in this result. This language code was detected to have
-  // the most likelihood of being spoken in the audio.
-  string language_code = 5;
+  // Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
+  // language tag of the language in this result. This language code was
+  // detected to have the most likelihood of being spoken in the audio.
+  string language_code = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
 }
 
 // Alternative hypotheses (a.k.a. n-best list).
@@ -807,10 +826,10 @@ message WordInfo {
   // The default of 0.0 is a sentinel value indicating `confidence` was not set.
   float confidence = 4;
 
-  // A distinct integer value is assigned for every speaker within
+  // Output only. A distinct integer value is assigned for every speaker within
   // the audio. This field specifies which one of those speakers was detected to
   // have spoken this word. Value ranges from '1' to diarization_speaker_count.
   // speaker_tag is set if enable_speaker_diarization = 'true' and only in the
   // top alternative.
-  int32 speaker_tag = 5;
+  int32 speaker_tag = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
 }
diff --git a/packages/google-cloud-speech/protos/google/cloud/speech/v1p1beta1/resource.proto b/packages/google-cloud-speech/protos/google/cloud/speech/v1p1beta1/resource.proto
@@ -0,0 +1,129 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package google.cloud.speech.v1p1beta1;
+
+import "google/api/annotations.proto";
+import "google/api/resource.proto";
+
+option cc_enable_arenas = true;
+option go_package = "google.golang.org/genproto/googleapis/cloud/speech/v1p1beta1;speech";
+option java_multiple_files = true;
+option java_outer_classname = "SpeechResourceProto";
+option java_package = "com.google.cloud.speech.v1p1beta1";
+option objc_class_prefix = "GCS";
+
+// A set of words or phrases that represents a common concept likely to appear
+// in your audio, for example a list of passenger ship names. CustomClass items
+// can be substituted into placeholders that you set in PhraseSet phrases.
+message CustomClass {
+  option (google.api.resource) = {
+    type: "speech.googleapis.com/CustomClass"
+    pattern: "projects/{project}/locations/{location}/customClasses/{custom_class}"
+  };
+
+  // An item of the class.
+  message ClassItem {
+    // The class item's value.
+    string value = 1;
+  }
+
+  // The resource name of the custom class.
+  string name = 1;
+
+  // If this custom class is a resource, the custom_class_id is the resource id
+  // of the CustomClass.
+  string custom_class_id = 2;
+
+  // A collection of class items.
+  repeated ClassItem items = 3;
+}
+
+// Provides "hints" to the speech recognizer to favor specific words and phrases
+// in the results.
+message PhraseSet {
+  option (google.api.resource) = {
+    type: "speech.googleapis.com/PhraseSet"
+    pattern: "projects/{project}/locations/{location}/phraseSets/{phrase_set}"
+  };
+
+  // A phrases containing words and phrase "hints" so that
+  // the speech recognition is more likely to recognize them. This can be used
+  // to improve the accuracy for specific words and phrases, for example, if
+  // specific commands are typically spoken by the user. This can also be used
+  // to add additional words to the vocabulary of the recognizer. See
+  // [usage limits](https://cloud.google.com/speech-to-text/quotas#content).
+  //
+  // List items can also include pre-built or custom classes containing groups
+  // of words that represent common concepts that occur in natural language. For
+  // example, rather than providing a phrase hint for every month of the
+  // year (e.g. "i was born in january", "i was born in febuary", ...), use the
+  // pre-built `$MONTH` class improves the likelihood of correctly transcribing
+  // audio that includes months (e.g. "i was born in $month").
+  // To refer to pre-built classes, use the class' symbol prepended with `$`
+  // e.g. `$MONTH`. To refer to custom classes that were defined inline in the
+  // request, set the class's `custom_class_id` to a string unique to all class
+  // resources and inline classes. Then use the class' id wrapped in $`{...}`
+  // e.g. "${my-months}". To refer to custom classes resources, use the class'
+  // id wrapped in `${}` (e.g. `${my-months}`).
+  message Phrase {
+    // The phrase itself.
+    string value = 1;
+
+    // Hint Boost. Overrides the boost set at the phrase set level.
+    // Positive value will increase the probability that a specific phrase will
+    // be recognized over other similar sounding phrases. The higher the boost,
+    // the higher the chance of false positive recognition as well. Negative
+    // boost values would correspond to anti-biasing. Anti-biasing is not
+    // enabled, so negative boost will simply be ignored. Though `boost` can
+    // accept a wide range of positive values, most use cases are best served
+    // with values between 0 and 20. We recommend using a binary search approach
+    // to finding the optimal value for your use case. Speech recognition
+    // will skip PhraseSets with a boost value of 0.
+    float boost = 2;
+  }
+
+  // The resource name of the phrase set.
+  string name = 1;
+
+  // A list of word and phrases.
+  repeated Phrase phrases = 2;
+
+  // Hint Boost. Positive value will increase the probability that a specific
+  // phrase will be recognized over other similar sounding phrases. The higher
+  // the boost, the higher the chance of false positive recognition as well.
+  // Negative boost values would correspond to anti-biasing. Anti-biasing is not
+  // enabled, so negative boost will simply be ignored. Though `boost` can
+  // accept a wide range of positive values, most use cases are best served with
+  // values between 0 (exclusive) and 20. We recommend using a binary search
+  // approach to finding the optimal value for your use case. Speech recognition
+  // will skip PhraseSets with a boost value of 0.
+  float boost = 4;
+}
+
+// Speech adaptation configuration.
+message SpeechAdaptation {
+  // A collection of phrase sets. To specify the hints inline, leave the
+  // phrase set's `name` blank and fill in the rest of its fields. Any
+  // phrase set can use any custom class.
+  repeated PhraseSet phrase_sets = 1;
+
+  // A collection of custom classes. To specify the classes inline, leave the
+  // class' `name` blank and fill in the rest of its fields, giving it a unique
+  // `custom_class_id`. Refer to the inline defined class in phrase hints by its
+  // `custom_class_id`.
+  repeated CustomClass custom_classes = 2;
+}