From 509a1ff1b0f3c3c90e9096bdf4d4b2258717fd83 Mon Sep 17 00:00:00 2001 From: Anthony Wat Date: Sat, 24 Feb 2024 23:17:29 -0500 Subject: [PATCH] feat: Add custom_time_zone & file_extension to extended_S3_configuration block for aws_kinesis_firehose_delivery_stream --- .changelog/35969.txt | 3 + internal/service/firehose/delivery_stream.go | 20 ++ .../service/firehose/delivery_stream_test.go | 86 +++++++ ...sis_firehose_delivery_stream.html.markdown | 229 +++++++++++------- 4 files changed, 255 insertions(+), 83 deletions(-) create mode 100644 .changelog/35969.txt diff --git a/.changelog/35969.txt b/.changelog/35969.txt new file mode 100644 index 00000000000..2bba53ec9ad --- /dev/null +++ b/.changelog/35969.txt @@ -0,0 +1,3 @@ +```release-note:enhancement +aws_kinesis_firehose_delivery_stream: Add `custom_time_zone` and `file_extension` arguments to the `extended_S3_configuration` configuration block +``` \ No newline at end of file diff --git a/internal/service/firehose/delivery_stream.go b/internal/service/firehose/delivery_stream.go index 4c8dfa03232..71b95012532 100644 --- a/internal/service/firehose/delivery_stream.go +++ b/internal/service/firehose/delivery_stream.go @@ -438,6 +438,12 @@ func resourceDeliveryStream() *schema.Resource { Default: types.CompressionFormatUncompressed, ValidateDiagFunc: enum.Validate[types.CompressionFormat](), }, + "custom_time_zone": { + Type: schema.TypeString, + Optional: true, + Default: "UTC", + ValidateFunc: validation.StringLenBetween(0, 50), + }, "data_format_conversion_configuration": { Type: schema.TypeList, Optional: true, @@ -688,6 +694,14 @@ func resourceDeliveryStream() *schema.Resource { Optional: true, ValidateFunc: validation.StringLenBetween(0, 1024), }, + "file_extension": { + Type: schema.TypeString, + Optional: true, + ValidateFunc: validation.All( + validation.StringLenBetween(0, 50), + validation.StringMatch(regexache.MustCompile(`^$|\.[0-9a-z!\-_.*'()]+`), ""), + ), + }, "kms_key_arn": { Type: schema.TypeString, Optional: true, @@ -1775,8 +1789,10 @@ func expandExtendedS3DestinationConfiguration(s3 map[string]interface{}) *types. }, Prefix: expandPrefix(s3), CompressionFormat: types.CompressionFormat(s3["compression_format"].(string)), + CustomTimeZone: aws.String(s3["custom_time_zone"].(string)), DataFormatConversionConfiguration: expandDataFormatConversionConfiguration(s3["data_format_conversion_configuration"].([]interface{})), EncryptionConfiguration: expandEncryptionConfiguration(s3), + FileExtension: aws.String(s3["file_extension"].(string)), } if _, ok := s3["processing_configuration"]; ok { @@ -1864,7 +1880,9 @@ func expandExtendedS3DestinationUpdate(s3 map[string]interface{}) *types.Extende IntervalInSeconds: aws.Int32(int32(s3["buffering_interval"].(int))), SizeInMBs: aws.Int32(int32(s3["buffering_size"].(int))), }, + CustomTimeZone: aws.String(s3["custom_time_zone"].(string)), ErrorOutputPrefix: aws.String(s3["error_output_prefix"].(string)), + FileExtension: aws.String(s3["file_extension"].(string)), Prefix: expandPrefix(s3), CompressionFormat: types.CompressionFormat(s3["compression_format"].(string)), EncryptionConfiguration: expandEncryptionConfiguration(s3), @@ -3057,8 +3075,10 @@ func flattenExtendedS3DestinationDescription(description *types.ExtendedS3Destin "bucket_arn": aws.ToString(description.BucketARN), "cloudwatch_logging_options": flattenCloudWatchLoggingOptions(description.CloudWatchLoggingOptions), "compression_format": description.CompressionFormat, + "custom_time_zone": aws.ToString(description.CustomTimeZone), "data_format_conversion_configuration": flattenDataFormatConversionConfiguration(description.DataFormatConversionConfiguration), "error_output_prefix": aws.ToString(description.ErrorOutputPrefix), + "file_extension": aws.ToString(description.FileExtension), "prefix": aws.ToString(description.Prefix), "processing_configuration": flattenProcessingConfiguration(description.ProcessingConfiguration, destinationTypeExtendedS3, aws.ToString(description.RoleARN)), "dynamic_partitioning_configuration": flattenDynamicPartitioningConfiguration(description.DynamicPartitioningConfiguration), diff --git a/internal/service/firehose/delivery_stream_test.go b/internal/service/firehose/delivery_stream_test.go index 9250a83bc33..c4f173e19b4 100644 --- a/internal/service/firehose/delivery_stream_test.go +++ b/internal/service/firehose/delivery_stream_test.go @@ -53,9 +53,11 @@ func TestAccFirehoseDeliveryStream_basic(t *testing.T) { resource.TestCheckResourceAttr(resourceName, "extended_s3_configuration.0.cloudwatch_logging_options.0.log_group_name", ""), resource.TestCheckResourceAttr(resourceName, "extended_s3_configuration.0.cloudwatch_logging_options.0.log_stream_name", ""), resource.TestCheckResourceAttr(resourceName, "extended_s3_configuration.0.compression_format", "UNCOMPRESSED"), + resource.TestCheckResourceAttr(resourceName, "extended_s3_configuration.0.custom_time_zone", "UTC"), resource.TestCheckResourceAttr(resourceName, "extended_s3_configuration.0.data_format_conversion_configuration.#", "0"), resource.TestCheckResourceAttr(resourceName, "extended_s3_configuration.0.dynamic_partitioning_configuration.#", "0"), resource.TestCheckResourceAttr(resourceName, "extended_s3_configuration.0.error_output_prefix", ""), + resource.TestCheckResourceAttr(resourceName, "extended_s3_configuration.0.file_extension", ""), resource.TestCheckResourceAttr(resourceName, "extended_s3_configuration.0.kms_key_arn", ""), resource.TestCheckResourceAttr(resourceName, "extended_s3_configuration.0.prefix", ""), resource.TestCheckResourceAttr(resourceName, "extended_s3_configuration.0.processing_configuration.#", "1"), @@ -888,6 +890,58 @@ func TestAccFirehoseDeliveryStream_extendedS3Updates(t *testing.T) { }) } +func TestAccFirehoseDeliveryStream_extendedS3CustomTimeZoneAndFileExtensionUpdates(t *testing.T) { + ctx := acctest.Context(t) + var stream types.DeliveryStreamDescription + rName := sdkacctest.RandomWithPrefix(acctest.ResourcePrefix) + resourceName := "aws_kinesis_firehose_delivery_stream.test" + customTimeZone := "America/Los_Angeles" + fileExtension := ".json" + + resource.ParallelTest(t, resource.TestCase{ + PreCheck: func() { acctest.PreCheck(ctx, t) }, + ErrorCheck: acctest.ErrorCheck(t, names.FirehoseServiceID), + ProtoV5ProviderFactories: acctest.ProtoV5ProviderFactories, + CheckDestroy: testAccCheckDeliveryStreamDestroy_ExtendedS3(ctx), + Steps: []resource.TestStep{ + { + Config: testAccDeliveryStreamConfig_extendedS3CustomTimeZoneAndFileExtensionUpdates(rName, customTimeZone, fileExtension), + Check: resource.ComposeAggregateTestCheckFunc( + testAccCheckDeliveryStreamExists(ctx, resourceName, &stream), + resource.TestCheckResourceAttr(resourceName, "extended_s3_configuration.0.custom_time_zone", customTimeZone), + resource.TestCheckResourceAttr(resourceName, "extended_s3_configuration.0.file_extension", fileExtension), + ), + }, + { + ResourceName: resourceName, + ImportState: true, + ImportStateVerify: true, + }, + { + Config: testAccDeliveryStreamConfig_extendedS3CustomTimeZoneAndFileExtensionUpdatesNoValues(rName), + Check: resource.ComposeAggregateTestCheckFunc( + testAccCheckDeliveryStreamExists(ctx, resourceName, &stream), + resource.TestCheckResourceAttr(resourceName, "extended_s3_configuration.0.custom_time_zone", "UTC"), + resource.TestCheckResourceAttr(resourceName, "extended_s3_configuration.0.file_extension", ""), + ), + }, + { + ResourceName: resourceName, + ImportState: true, + ImportStateVerify: true, + }, + { + Config: testAccDeliveryStreamConfig_extendedS3CustomTimeZoneAndFileExtensionUpdates(rName, customTimeZone, fileExtension), + Check: resource.ComposeAggregateTestCheckFunc( + testAccCheckDeliveryStreamExists(ctx, resourceName, &stream), + resource.TestCheckResourceAttr(resourceName, "extended_s3_configuration.0.custom_time_zone", customTimeZone), + resource.TestCheckResourceAttr(resourceName, "extended_s3_configuration.0.file_extension", fileExtension), + ), + }, + }, + }) +} + func TestAccFirehoseDeliveryStream_ExtendedS3_kinesisStreamSource(t *testing.T) { ctx := acctest.Context(t) var stream types.DeliveryStreamDescription @@ -3354,6 +3408,38 @@ resource "aws_kinesis_firehose_delivery_stream" "test" { `, rName)) } +func testAccDeliveryStreamConfig_extendedS3CustomTimeZoneAndFileExtensionUpdates(rName, customTimeZone, fileExtension string) string { + return acctest.ConfigCompose(testAccDeliveryStreamConfig_base(rName), fmt.Sprintf(` +resource "aws_kinesis_firehose_delivery_stream" "test" { + depends_on = [aws_iam_role_policy.firehose] + name = %[1]q + destination = "extended_s3" + + extended_s3_configuration { + role_arn = aws_iam_role.firehose.arn + bucket_arn = aws_s3_bucket.bucket.arn + custom_time_zone = %[2]q + file_extension = %[3]q + } +} +`, rName, customTimeZone, fileExtension)) +} + +func testAccDeliveryStreamConfig_extendedS3CustomTimeZoneAndFileExtensionUpdatesNoValues(rName string) string { + return acctest.ConfigCompose(testAccDeliveryStreamConfig_base(rName), fmt.Sprintf(` +resource "aws_kinesis_firehose_delivery_stream" "test" { + depends_on = [aws_iam_role_policy.firehose] + name = %[1]q + destination = "extended_s3" + + extended_s3_configuration { + role_arn = aws_iam_role.firehose.arn + bucket_arn = aws_s3_bucket.bucket.arn + } +} +`, rName)) +} + func testAccDeliveryStreamConfig_baseRedshift(rName string) string { return acctest.ConfigCompose( testAccDeliveryStreamConfig_base(rName), diff --git a/website/docs/r/kinesis_firehose_delivery_stream.html.markdown b/website/docs/r/kinesis_firehose_delivery_stream.html.markdown index 1188309ff29..38f33ce6b8d 100644 --- a/website/docs/r/kinesis_firehose_delivery_stream.html.markdown +++ b/website/docs/r/kinesis_firehose_delivery_stream.html.markdown @@ -598,66 +598,83 @@ This resource supports the following arguments: * `name` - (Required) A name to identify the stream. This is unique to the AWS account and region the Stream is created in. When using for WAF logging, name must be prefixed with `aws-waf-logs-`. See [AWS Documentation](https://docs.aws.amazon.com/waf/latest/developerguide/waf-policies.html#waf-policies-logging-config) for more details. * `tags` - (Optional) A map of tags to assign to the resource. If configured with a provider [`default_tags` configuration block](https://registry.terraform.io/providers/hashicorp/aws/latest/docs#default_tags-configuration-block) present, tags with matching keys will overwrite those defined at the provider-level. -* `kinesis_source_configuration` - (Optional) The stream and role Amazon Resource Names (ARNs) for a Kinesis data stream used as the source for a delivery stream. More details are given below. -* `msk_source_configuration` - (Optional) The configuration for the Amazon MSK cluster to be used as the source for a delivery stream. More details are given below. -* `server_side_encryption` - (Optional) Encrypt at rest options. -Server-side encryption should not be enabled when a kinesis stream is configured as the source of the firehose delivery stream. +* `kinesis_source_configuration` - (Optional) The stream and role Amazon Resource Names (ARNs) for a Kinesis data stream used as the source for a delivery stream. See [`kinesis_source_configuration` block](#kinesis_source_configuration-block) below for details. +* `msk_source_configuration` - (Optional) The configuration for the Amazon MSK cluster to be used as the source for a delivery stream. See [`msk_source_configuration` block](#msk_source_configuration-block) below for details. +* `server_side_encryption` - (Optional) Encrypt at rest options. See [`server_side_encryption` block](#server_side_encryption-block) below for details. + + **NOTE:** Server-side encryption should not be enabled when a kinesis stream is configured as the source of the firehose delivery stream. * `destination` – (Required) This is the destination to where the data is delivered. The only options are `s3` (Deprecated, use `extended_s3` instead), `extended_s3`, `redshift`, `elasticsearch`, `splunk`, `http_endpoint`, `opensearch` and `opensearchserverless`. -* `elasticsearch_configuration` - (Optional) Configuration options when `destination` is `elasticsearch`. More details are given below. -* `extended_s3_configuration` - (Optional, only Required when `destination` is `extended_s3`) Enhanced configuration options for the s3 destination. More details are given below. -* `http_endpoint_configuration` - (Optional) Configuration options when `destination` is `http_endpoint`. Requires the user to also specify an `s3_configuration` block. More details are given below. -* `opensearch_configuration` - (Optional) Configuration options when `destination` is `opensearch`. More details are given below. -* `opensearchserverless_configuration` - (Optional) Configuration options when `destination` is `opensearchserverless`. More details are given below. -* `redshift_configuration` - (Optional) Configuration options when `destination` is `redshift`. Requires the user to also specify an `s3_configuration` block. More details are given below. -* `splunk_configuration` - (Optional) Configuration options when `destination` is `splunk`. More details are given below. +* `elasticsearch_configuration` - (Optional) Configuration options when `destination` is `elasticsearch`. See [`elasticsearch_configuration` block](#elasticsearch_configuration-block) below for details. +* `extended_s3_configuration` - (Optional, only Required when `destination` is `extended_s3`) Enhanced configuration options for the s3 destination. See [`extended_s3_configuration` block](#extended_s3_configuration-block) below for details. +* `http_endpoint_configuration` - (Optional) Configuration options when `destination` is `http_endpoint`. Requires the user to also specify an `s3_configuration` block. See [`http_endpoint_configuration` block](#http_endpoint_configuration-block) below for details. +* `opensearch_configuration` - (Optional) Configuration options when `destination` is `opensearch`. See [`opensearch_configuration` block](#opensearch_configuration-block) below for details. +* `opensearchserverless_configuration` - (Optional) Configuration options when `destination` is `opensearchserverless`. See [`opensearchserverless_configuration` block](#opensearchserverless_configuration-block) below for details. +* `redshift_configuration` - (Optional) Configuration options when `destination` is `redshift`. Requires the user to also specify an `s3_configuration` block. See [`redshift_configuration` block](#redshift_configuration-block) below for details. +* `splunk_configuration` - (Optional) Configuration options when `destination` is `splunk`. See [`splunk_configuration` block](#splunk_configuration-block) below for details. + +### `kinesis_source_configuration` block -The `kinesis_source_configuration` object supports the following: +The `kinesis_source_configuration` configuration block supports the following arguments: * `kinesis_stream_arn` - (Required) The kinesis stream used as the source of the firehose delivery stream. * `role_arn` - (Required) The ARN of the role that provides access to the source Kinesis stream. -The `msk_source_configuration` object supports the following: +### `msk_source_configuration` block -* `authentication_configuration` - (Required) The authentication configuration of the Amazon MSK cluster. More details are given below. +The `msk_source_configuration` configuration block supports the following arguments: + +* `authentication_configuration` - (Required) The authentication configuration of the Amazon MSK cluster. See [`authentication_configuration` block](#authentication_configuration-block) below for details. * `msk_cluster_arn` - (Required) The ARN of the Amazon MSK cluster. * `topic_name` - (Required) The topic name within the Amazon MSK cluster. -The `authentication_configuration` object supports the following: +### `authentication_configuration` block + +The `authentication_configuration` configuration block supports the following arguments: * `connectivity` - (Required) The type of connectivity used to access the Amazon MSK cluster. Valid values: `PUBLIC`, `PRIVATE`. * `role_arn` - (Required) The ARN of the role used to access the Amazon MSK cluster. -The `server_side_encryption` object supports the following: +### `server_side_encryption` block + +The `server_side_encryption` configuration block supports the following arguments: * `enabled` - (Optional) Whether to enable encryption at rest. Default is `false`. * `key_type`- (Optional) Type of encryption key. Default is `AWS_OWNED_CMK`. Valid values are `AWS_OWNED_CMK` and `CUSTOMER_MANAGED_CMK` * `key_arn` - (Optional) Amazon Resource Name (ARN) of the encryption key. Required when `key_type` is `CUSTOMER_MANAGED_CMK`. -The `extended_s3_configuration` object supports the same fields from [s3_configuration](#s3-configuration) as well as the following: +### `extended_s3_configuration` block + +The `extended_s3_configuration` configuration block supports the same fields from the [`s3_configuration` block](#s3_configuration-block) as well as the following: -* `data_format_conversion_configuration` - (Optional) Nested argument for the serializer, deserializer, and schema for converting data from the JSON format to the Parquet or ORC format before writing it to Amazon S3. More details given below. -* `processing_configuration` - (Optional) The data processing configuration. More details are given below. +* `custom_time_zone` - (Optional) The time zone you prefer. Valid values are `UTC` or a non-3-letter IANA time zones (for example, `America/Los_Angeles`). Default value is `UTC`. +* `data_format_conversion_configuration` - (Optional) Nested argument for the serializer, deserializer, and schema for converting data from the JSON format to the Parquet or ORC format before writing it to Amazon S3. See [`data_format_conversion_configuration` block](#data_format_conversion_configuration-block) below for details. +* `file_extension` - (Optional) The file extension to override the default file extension (for example, `.json`). +* `processing_configuration` - (Optional) The data processing configuration. See [`processing_configuration` block](#processing_configuration-block) below for details. * `s3_backup_mode` - (Optional) The Amazon S3 backup mode. Valid values are `Disabled` and `Enabled`. Default value is `Disabled`. * `s3_backup_configuration` - (Optional) The configuration for backup in Amazon S3. Required if `s3_backup_mode` is `Enabled`. Supports the same fields as `s3_configuration` object. -* `dynamic_partitioning_configuration` - (Optional) The configuration for dynamic partitioning. See [Dynamic Partitioning Configuration](#dynamic_partitioning_configuration) below for more details. Required when using dynamic partitioning. +* `dynamic_partitioning_configuration` - (Optional) The configuration for dynamic partitioning. Required when using [dynamic partitioning](https://docs.aws.amazon.com/firehose/latest/dev/dynamic-partitioning.html). See [`dynamic_partitioning_configuration` block](#dynamic_partitioning_configuration-block) below for details. -The `redshift_configuration` object supports the following: +### `redshift_configuration` block + +The `redshift_configuration` configuration block supports the following arguments: * `cluster_jdbcurl` - (Required) The jdbcurl of the redshift cluster. * `username` - (Required) The username that the firehose delivery stream will assume. It is strongly recommended that the username and password provided is used exclusively for Amazon Kinesis Firehose purposes, and that the permissions for the account are restricted for Amazon Redshift INSERT permissions. * `password` - (Required) The password for the username above. * `retry_duration` - (Optional) The length of time during which Firehose retries delivery after a failure, starting from the initial request and including the first attempt. The default value is 3600 seconds (60 minutes). Firehose does not retry if the value of DurationInSeconds is 0 (zero) or if the first delivery attempt takes longer than the current value. * `role_arn` - (Required) The arn of the role the stream assumes. -* `s3_configuration` - (Required) The S3 Configuration. See [s3_configuration](#s3-configuration) for more details. +* `s3_configuration` - (Required) The S3 Configuration. See [s3_configuration](#s3_configuration-block) below for details. * `s3_backup_mode` - (Optional) The Amazon S3 backup mode. Valid values are `Disabled` and `Enabled`. Default value is `Disabled`. * `s3_backup_configuration` - (Optional) The configuration for backup in Amazon S3. Required if `s3_backup_mode` is `Enabled`. Supports the same fields as `s3_configuration` object. * `data_table_name` - (Required) The name of the table in the redshift cluster that the s3 bucket will copy to. * `copy_options` - (Optional) Copy options for copying the data from the s3 intermediate bucket into redshift, for example to change the default delimiter. For valid values, see the [AWS documentation](http://docs.aws.amazon.com/firehose/latest/APIReference/API_CopyCommand.html) * `data_table_columns` - (Optional) The data table columns that will be targeted by the copy command. -* `cloudwatch_logging_options` - (Optional) The CloudWatch Logging Options for the delivery stream. More details are given below -* `processing_configuration` - (Optional) The data processing configuration. More details are given below. +* `cloudwatch_logging_options` - (Optional) The CloudWatch Logging Options for the delivery stream. See [`cloudwatch_logging_options` block](#cloudwatch_logging_options-block) below for details. +* `processing_configuration` - (Optional) The data processing configuration. See [`processing_configuration` block](#processing_configuration-block) below for details. + +### `elasticsearch_configuration` block -The `elasticsearch_configuration` object supports the following: +The `elasticsearch_configuration` configuration block supports the following arguments: * `buffering_interval` - (Optional) Buffer incoming data for the specified period of time, in seconds between 0 to 900, before delivering it to the destination. The default value is 300s. * `buffering_size` - (Optional) Buffer incoming data to the specified size, in MBs between 1 to 100, before delivering it to the destination. The default value is 5MB. @@ -667,14 +684,16 @@ The `elasticsearch_configuration` object supports the following: * `index_rotation_period` - (Optional) The Elasticsearch index rotation period. Index rotation appends a timestamp to the IndexName to facilitate expiration of old data. Valid values are `NoRotation`, `OneHour`, `OneDay`, `OneWeek`, and `OneMonth`. The default value is `OneDay`. * `retry_duration` - (Optional) After an initial failure to deliver to Amazon Elasticsearch, the total amount of time, in seconds between 0 to 7200, during which Firehose re-attempts delivery (including the first attempt). After this time has elapsed, the failed documents are written to Amazon S3. The default value is 300s. There will be no retry if the value is 0. * `role_arn` - (Required) The ARN of the IAM role to be assumed by Firehose for calling the Amazon ES Configuration API and for indexing documents. The IAM role must have permission for `DescribeElasticsearchDomain`, `DescribeElasticsearchDomains`, and `DescribeElasticsearchDomainConfig`. The pattern needs to be `arn:.*`. -* `s3_configuration` - (Required) The S3 Configuration. See [s3_configuration](#s3-configuration) for more details. +* `s3_configuration` - (Required) The S3 Configuration. See [`s3_configuration` block](#s3_configuration-block) below for details. * `s3_backup_mode` - (Optional) Defines how documents should be delivered to Amazon S3. Valid values are `FailedDocumentsOnly` and `AllDocuments`. Default value is `FailedDocumentsOnly`. * `type_name` - (Optional) The Elasticsearch type name with maximum length of 100 characters. -* `cloudwatch_logging_options` - (Optional) The CloudWatch Logging Options for the delivery stream. More details are given below -* `vpc_config` - (Optional) The VPC configuration for the delivery stream to connect to Elastic Search associated with the VPC. More details are given below -* `processing_configuration` - (Optional) The data processing configuration. More details are given below. +* `cloudwatch_logging_options` - (Optional) The CloudWatch Logging Options for the delivery stream. See [`cloudwatch_logging_options` block](#cloudwatch_logging_options-block) below for details. +* `vpc_config` - (Optional) The VPC configuration for the delivery stream to connect to Elastic Search associated with the VPC. See [`vpc_config` block](#vpc_config-block) below for details. +* `processing_configuration` - (Optional) The data processing configuration. See [`processing_configuration` block](#processing_configuration-block) below for details. -The `opensearch_configuration` object supports the following: +### `opensearch_configuration` block + +The `opensearch_configuration` configuration block supports the following arguments: * `buffering_interval` - (Optional) Buffer incoming data for the specified period of time, in seconds between 0 to 900, before delivering it to the destination. The default value is 300s. * `buffering_size` - (Optional) Buffer incoming data to the specified size, in MBs between 1 to 100, before delivering it to the destination. The default value is 5MB. @@ -684,15 +703,17 @@ The `opensearch_configuration` object supports the following: * `index_rotation_period` - (Optional) The OpenSearch index rotation period. Index rotation appends a timestamp to the IndexName to facilitate expiration of old data. Valid values are `NoRotation`, `OneHour`, `OneDay`, `OneWeek`, and `OneMonth`. The default value is `OneDay`. * `retry_duration` - (Optional) After an initial failure to deliver to Amazon OpenSearch, the total amount of time, in seconds between 0 to 7200, during which Firehose re-attempts delivery (including the first attempt). After this time has elapsed, the failed documents are written to Amazon S3. The default value is 300s. There will be no retry if the value is 0. * `role_arn` - (Required) The ARN of the IAM role to be assumed by Firehose for calling the Amazon ES Configuration API and for indexing documents. The IAM role must have permission for `DescribeDomain`, `DescribeDomains`, and `DescribeDomainConfig`. The pattern needs to be `arn:.*`. -* `s3_configuration` - (Required) The S3 Configuration. See [s3_configuration](#s3-configuration) for more details. +* `s3_configuration` - (Required) The S3 Configuration. See [`s3_configuration` block](#s3_configuration-block) below for details. * `s3_backup_mode` - (Optional) Defines how documents should be delivered to Amazon S3. Valid values are `FailedDocumentsOnly` and `AllDocuments`. Default value is `FailedDocumentsOnly`. * `type_name` - (Optional) The Elasticsearch type name with maximum length of 100 characters. Types are deprecated in OpenSearch_1.1. TypeName must be empty. -* `cloudwatch_logging_options` - (Optional) The CloudWatch Logging Options for the delivery stream. More details are given below. -* `vpc_config` - (Optional) The VPC configuration for the delivery stream to connect to OpenSearch associated with the VPC. More details are given below. -* `processing_configuration` - (Optional) The data processing configuration. More details are given below. -* `document_id_options` - (Optional) The method for setting up document ID. More details are given below. +* `cloudwatch_logging_options` - (Optional) The CloudWatch Logging Options for the delivery stream. See [`cloudwatch_logging_options` block](#cloudwatch_logging_options-block) below for details. +* `vpc_config` - (Optional) The VPC configuration for the delivery stream to connect to OpenSearch associated with the VPC. See [`vpc_config` block](#vpc_config-block) below for details. +* `processing_configuration` - (Optional) The data processing configuration. See [`processing_configuration` block](#processing_configuration-block) below for details. +* `document_id_options` - (Optional) The method for setting up document ID. See [`document_id_options` block] below for details. + +### `opensearchserverless_configuration` block -The `opensearchserverless_configuration` object supports the following: +The `opensearchserverless_configuration` configuration block supports the following arguments: * `buffering_interval` - (Optional) Buffer incoming data for the specified period of time, in seconds between 0 to 900, before delivering it to the destination. The default value is 300s. * `buffering_size` - (Optional) Buffer incoming data to the specified size, in MBs between 1 to 100, before delivering it to the destination. The default value is 5MB. @@ -700,13 +721,15 @@ The `opensearchserverless_configuration` object supports the following: * `index_name` - (Required) The Serverless offering for Amazon OpenSearch Service index name. * `retry_duration` - (Optional) After an initial failure to deliver to the Serverless offering for Amazon OpenSearch Service, the total amount of time, in seconds between 0 to 7200, during which Kinesis Data Firehose retries delivery (including the first attempt). After this time has elapsed, the failed documents are written to Amazon S3. The default value is 300s. There will be no retry if the value is 0. * `role_arn` - (Required) The Amazon Resource Name (ARN) of the IAM role to be assumed by Kinesis Data Firehose for calling the Serverless offering for Amazon OpenSearch Service Configuration API and for indexing documents. The pattern needs to be `arn:.*`. -* `s3_configuration` - (Required) The S3 Configuration. See [s3_configuration](#s3-configuration) for more details. +* `s3_configuration` - (Required) The S3 Configuration. See [`s3_configuration` block](#s3_configuration-block) below for details. * `s3_backup_mode` - (Optional) Defines how documents should be delivered to Amazon S3. Valid values are `FailedDocumentsOnly` and `AllDocuments`. Default value is `FailedDocumentsOnly`. -* `cloudwatch_logging_options` - (Optional) The CloudWatch Logging Options for the delivery stream. More details are given below -* `vpc_config` - (Optional) The VPC configuration for the delivery stream to connect to OpenSearch Serverless associated with the VPC. More details are given below -* `processing_configuration` - (Optional) The data processing configuration. More details are given below. +* `cloudwatch_logging_options` - (Optional) The CloudWatch Logging Options for the delivery stream. See [`cloudwatch_logging_options` block](#cloudwatch_logging_options-block) below for details. +* `vpc_config` - (Optional) The VPC configuration for the delivery stream to connect to OpenSearch Serverless associated with the VPC. See [`vpc_config` block](#vpc_config-block) below for details. +* `processing_configuration` - (Optional) The data processing configuration. See [`processing_configuration` block](#processing_configuration-block) below for details. -The `splunk_configuration` objects supports the following: +### `splunk_configuration` block + +The `splunk_configuration` configuration block supports the following arguments: * `buffering_interval` - (Optional) Buffer incoming data for the specified period of time, in seconds between 0 to 60, before delivering it to the destination. The default value is 60s. * `buffering_size` - (Optional) Buffer incoming data to the specified size, in MBs between 1 to 5, before delivering it to the destination. The default value is 5MB. @@ -714,67 +737,83 @@ The `splunk_configuration` objects supports the following: * `hec_endpoint` - (Required) The HTTP Event Collector (HEC) endpoint to which Kinesis Firehose sends your data. * `hec_endpoint_type` - (Optional) The HEC endpoint type. Valid values are `Raw` or `Event`. The default value is `Raw`. * `hec_token` - (Required) The GUID that you obtain from your Splunk cluster when you create a new HEC endpoint. -* `s3_configuration` - (Required) The S3 Configuration. See [s3_configuration](#s3-configuration) for more details. +* `s3_configuration` - (Required) The S3 Configuration. See [`s3_configuration` block](#s3_configuration-block) below for details. * `s3_backup_mode` - (Optional) Defines how documents should be delivered to Amazon S3. Valid values are `FailedEventsOnly` and `AllEvents`. Default value is `FailedEventsOnly`. * `retry_duration` - (Optional) After an initial failure to deliver to Splunk, the total amount of time, in seconds between 0 to 7200, during which Firehose re-attempts delivery (including the first attempt). After this time has elapsed, the failed documents are written to Amazon S3. The default value is 300s. There will be no retry if the value is 0. -* `cloudwatch_logging_options` - (Optional) The CloudWatch Logging Options for the delivery stream. More details are given below. -* `processing_configuration` - (Optional) The data processing configuration. More details are given below. +* `cloudwatch_logging_options` - (Optional) The CloudWatch Logging Options for the delivery stream. See [`cloudwatch_logging_options` block](#cloudwatch_logging_options-block) below for details. +* `processing_configuration` - (Optional) The data processing configuration. See [`processing_configuration` block](#processing_configuration-block) below for details. + +### `http_endpoint_configuration` block -The `http_endpoint_configuration` objects supports the following: +The `http_endpoint_configuration` configuration block supports the following arguments: * `url` - (Required) The HTTP endpoint URL to which Kinesis Firehose sends your data. * `name` - (Optional) The HTTP endpoint name. * `access_key` - (Optional) The access key required for Kinesis Firehose to authenticate with the HTTP endpoint selected as the destination. * `role_arn` - (Required) Kinesis Data Firehose uses this IAM role for all the permissions that the delivery stream needs. The pattern needs to be `arn:.*`. -* `s3_configuration` - (Required) The S3 Configuration. See [s3_configuration](#s3-configuration) for more details. +* `s3_configuration` - (Required) The S3 Configuration. See [`s3_configuration` block](#s3_configuration-block) below for details. * `s3_backup_mode` - (Optional) Defines how documents should be delivered to Amazon S3. Valid values are `FailedDataOnly` and `AllData`. Default value is `FailedDataOnly`. * `buffering_size` - (Optional) Buffer incoming data to the specified size, in MBs, before delivering it to the destination. The default value is 5. * `buffering_interval` - (Optional) Buffer incoming data for the specified period of time, in seconds, before delivering it to the destination. The default value is 300 (5 minutes). -* `cloudwatch_logging_options` - (Optional) The CloudWatch Logging Options for the delivery stream. More details are given below. -* `processing_configuration` - (Optional) The data processing configuration. More details are given below. -* `request_configuration` - (Optional) The request configuration. More details are given below. +* `cloudwatch_logging_options` - (Optional) The CloudWatch Logging Options for the delivery stream. See [`cloudwatch_logging_options` block](#cloudwatch_logging_options-block) below for details. +* `processing_configuration` - (Optional) The data processing configuration. See [`processing_configuration` block](#processing_configuration-block) below for details. +* `request_configuration` - (Optional) The request configuration. See [`request_configuration` block](#request_configuration-block) below for details. * `retry_duration` - (Optional) Total amount of seconds Firehose spends on retries. This duration starts after the initial attempt fails, It does not include the time periods during which Firehose waits for acknowledgment from the specified destination after each attempt. Valid values between `0` and `7200`. Default is `300`. -The `cloudwatch_logging_options` object supports the following: +### `cloudwatch_logging_options` block + +The `cloudwatch_logging_options` configuration block supports the following arguments: * `enabled` - (Optional) Enables or disables the logging. Defaults to `false`. * `log_group_name` - (Optional) The CloudWatch group name for logging. This value is required if `enabled` is true. * `log_stream_name` - (Optional) The CloudWatch log stream name for logging. This value is required if `enabled` is true. -The `processing_configuration` object supports the following: +### `processing_configuration` block + +The `processing_configuration` configuration block supports the following arguments: * `enabled` - (Optional) Enables or disables data processing. -* `processors` - (Optional) Array of data processors. More details are given below +* `processors` - (Optional) Specifies the data processors as multiple blocks. See [`processors` block](#processors-block) below for details. -The `processors` array objects support the following: +### `processors` block + +The `processors` configuration block supports the following arguments: * `type` - (Required) The type of processor. Valid Values: `RecordDeAggregation`, `Lambda`, `MetadataExtraction`, `AppendDelimiterToRecord`. Validation is done against [AWS SDK constants](https://docs.aws.amazon.com/sdk-for-go/api/service/firehose/#pkg-constants); so that values not explicitly listed may also work. -* `parameters` - (Optional) Array of processor parameters. More details are given below +* `parameters` - (Optional) Specifies the processor parameters as multiple blocks. See [`parameters` block](#parameters-block) below for details. + +### `parameters` block -The `parameters` array objects support the following: +The `parameters` configuration block supports the following arguments: * `parameter_name` - (Required) Parameter name. Valid Values: `LambdaArn`, `NumberOfRetries`, `MetadataExtractionQuery`, `JsonParsingEngine`, `RoleArn`, `BufferSizeInMBs`, `BufferIntervalInSeconds`, `SubRecordType`, `Delimiter`. Validation is done against [AWS SDK constants](https://docs.aws.amazon.com/sdk-for-go/api/service/firehose/#pkg-constants); so that values not explicitly listed may also work. * `parameter_value` - (Required) Parameter value. Must be between 1 and 512 length (inclusive). When providing a Lambda ARN, you should specify the resource version as well. ~> **NOTE:** Parameters with default values, including `NumberOfRetries`(default: 3), `RoleArn`(default: firehose role ARN), `BufferSizeInMBs`(default: 1), and `BufferIntervalInSeconds`(default: 60), are not stored in terraform state. To prevent perpetual differences, it is therefore recommended to only include parameters with non-default values. -The `request_configuration` object supports the following: +### `request_configuration` block + +The `request_configuration` configuration block supports the following arguments: * `content_encoding` - (Optional) Kinesis Data Firehose uses the content encoding to compress the body of a request before sending the request to the destination. Valid values are `NONE` and `GZIP`. Default value is `NONE`. -* `common_attributes` - (Optional) Describes the metadata sent to the HTTP endpoint destination. More details are given below +* `common_attributes` - (Optional) Describes the metadata sent to the HTTP endpoint destination. See [`common_attributes` block](#common_attributes-block) below for details. + +### `common_attributes` block -The `common_attributes` array objects support the following: +The `common_attributes` configuration block supports the following arguments: * `name` - (Required) The name of the HTTP endpoint common attribute. * `value` - (Required) The value of the HTTP endpoint common attribute. -The `vpc_config` object supports the following: +### `vpc_config` block + +The `vpc_config` configuration block supports the following arguments: * `subnet_ids` - (Required) A list of subnet IDs to associate with Kinesis Firehose. * `security_group_ids` - (Required) A list of security group IDs to associate with Kinesis Firehose. * `role_arn` - (Required) The ARN of the IAM role to be assumed by Firehose for calling the Amazon EC2 configuration API and for creating network interfaces. Make sure role has necessary [IAM permissions](https://docs.aws.amazon.com/firehose/latest/dev/controlling-access.html#using-iam-es-vpc) -### data_format_conversion_configuration +### `data_format_conversion_configuration` block ~> **NOTE:** Once configured, the data format conversion configuration can only be disabled, in which the configuration values will remain, but will not be active. It is not currently possible to completely remove the configuration without recreating the resource. @@ -811,12 +850,16 @@ resource "aws_kinesis_firehose_delivery_stream" "example" { } ``` -* `input_format_configuration` - (Required) Nested argument that specifies the deserializer that you want Kinesis Data Firehose to use to convert the format of your data from JSON. More details below. -* `output_format_configuration` - (Required) Nested argument that specifies the serializer that you want Kinesis Data Firehose to use to convert the format of your data to the Parquet or ORC format. More details below. -* `schema_configuration` - (Required) Nested argument that specifies the AWS Glue Data Catalog table that contains the column information. More details below. +The `data_format_conversion_configuration` configuration block supports the following arguments: + +* `input_format_configuration` - (Required) Specifies the deserializer that you want Kinesis Data Firehose to use to convert the format of your data from JSON. See [`input_format_configuration` block](#input_format_configuration-block) below for details. +* `output_format_configuration` - (Required) Specifies the serializer that you want Kinesis Data Firehose to use to convert the format of your data to the Parquet or ORC format. See [`output_format_configuration` block](#output_format_configuration-block) below for details. +* `schema_configuration` - (Required) Specifies the AWS Glue Data Catalog table that contains the column information. See [`schema_configuration` block](#schema_configuration-block) below for details. * `enabled` - (Optional) Defaults to `true`. Set it to `false` if you want to disable format conversion while preserving the configuration details. -#### S3 Configuration +### `s3_configuration` block + +The `s3_configuration` configuration block supports the following arguments: * `role_arn` - (Required) The ARN of the AWS credentials. * `bucket_arn` - (Required) The ARN of the S3 bucket @@ -828,41 +871,55 @@ resource "aws_kinesis_firehose_delivery_stream" "example" { * `error_output_prefix` - (Optional) Prefix added to failed records before writing them to S3. Not currently supported for `redshift` destination. This prefix appears immediately following the bucket name. For information about how to specify this prefix, see [Custom Prefixes for Amazon S3 Objects](https://docs.aws.amazon.com/firehose/latest/dev/s3-prefixes.html). * `kms_key_arn` - (Optional) Specifies the KMS key ARN the stream will use to encrypt data. If not set, no encryption will be used. -* `cloudwatch_logging_options` - (Optional) The CloudWatch Logging Options for the delivery stream. More details are given below +* `cloudwatch_logging_options` - (Optional) The CloudWatch Logging Options for the delivery stream. See [`cloudwatch_logging_options` block](#cloudwatch_logging_options-block) below for details. + +### `input_format_configuration` block -#### input_format_configuration +The `input_format_configuration` configuration block supports the following arguments: -* `deserializer` - (Required) Nested argument that specifies which deserializer to use. You can choose either the Apache Hive JSON SerDe or the OpenX JSON SerDe. More details below. +* `deserializer` - (Required) Specifies which deserializer to use. You can choose either the Apache Hive JSON SerDe or the OpenX JSON SerDe. See [`deserializer` block](#deserializer-block) below for details. -##### deserializer +### `deserializer` block ~> **NOTE:** One of the deserializers must be configured. If no nested configuration needs to occur simply declare as `XXX_json_ser_de = []` or `XXX_json_ser_de {}`. -* `hive_json_ser_de` - (Optional) Nested argument that specifies the native Hive / HCatalog JsonSerDe. More details below. -* `open_x_json_ser_de` - (Optional) Nested argument that specifies the OpenX SerDe. More details below. +The `deserializer` configuration block supports the following arguments: + +* `hive_json_ser_de` - (Optional) Specifies the native Hive / HCatalog JsonSerDe. More details below. See [`hive_json_ser_de` block](#hive_json_ser_de-block) below for details. +* `open_x_json_ser_de` - (Optional) Specifies the OpenX SerDe. See [`open_x_json_ser_de` block](#open_x_json_ser_de-block) below for details. + +### `hive_json_ser_de` block -###### hive_json_ser_de +The `hive_json_ser_de` configuration block supports the following arguments: * `timestamp_formats` - (Optional) A list of how you want Kinesis Data Firehose to parse the date and time stamps that may be present in your input data JSON. To specify these format strings, follow the pattern syntax of JodaTime's DateTimeFormat format strings. For more information, see [Class DateTimeFormat](https://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html). You can also use the special value millis to parse time stamps in epoch milliseconds. If you don't specify a format, Kinesis Data Firehose uses java.sql.Timestamp::valueOf by default. -###### open_x_json_ser_de +### `open_x_json_ser_de` block + +The `open_x_json_ser_de` configuration block supports the following arguments: * `case_insensitive` - (Optional) When set to true, which is the default, Kinesis Data Firehose converts JSON keys to lowercase before deserializing them. * `column_to_json_key_mappings` - (Optional) A map of column names to JSON keys that aren't identical to the column names. This is useful when the JSON contains keys that are Hive keywords. For example, timestamp is a Hive keyword. If you have a JSON key named timestamp, set this parameter to `{ ts = "timestamp" }` to map this key to a column named ts. * `convert_dots_in_json_keys_to_underscores` - (Optional) When set to `true`, specifies that the names of the keys include dots and that you want Kinesis Data Firehose to replace them with underscores. This is useful because Apache Hive does not allow dots in column names. For example, if the JSON contains a key whose name is "a.b", you can define the column name to be "a_b" when using this option. Defaults to `false`. -#### output_format_configuration +### `output_format_configuration` block + +The `output_format_configuration` configuration block supports the following arguments: -* `serializer` - (Required) Nested argument that specifies which serializer to use. You can choose either the ORC SerDe or the Parquet SerDe. More details below. +* `serializer` - (Required) Specifies which serializer to use. You can choose either the ORC SerDe or the Parquet SerDe. See [`serializer` block](#serializer-block) below for details. -##### serializer +#### `serializer` block ~> **NOTE:** One of the serializers must be configured. If no nested configuration needs to occur simply declare as `XXX_ser_de = []` or `XXX_ser_de {}`. -* `orc_ser_de` - (Optional) Nested argument that specifies converting data to the ORC format before storing it in Amazon S3. For more information, see [Apache ORC](https://orc.apache.org/docs/). More details below. -* `parquet_ser_de` - (Optional) Nested argument that specifies converting data to the Parquet format before storing it in Amazon S3. For more information, see [Apache Parquet](https://parquet.apache.org/documentation/latest/). More details below. +The `serializer` configuration block supports the following arguments: + +* `orc_ser_de` - (Optional) Specifies converting data to the ORC format before storing it in Amazon S3. For more information, see [Apache ORC](https://orc.apache.org/docs/). See [`orc_ser_de` block](#orc_ser_de-block) below for details. +* `parquet_ser_de` - (Optional) Specifies converting data to the Parquet format before storing it in Amazon S3. For more information, see [Apache Parquet](https://parquet.apache.org/documentation/latest/). More details below. + +#### `orc_ser_de` block -###### orc_ser_de +The `orc_ser_de` configuration block supports the following arguments: * `block_size_bytes` - (Optional) The Hadoop Distributed File System (HDFS) block size. This is useful if you intend to copy the data from Amazon S3 to HDFS before querying. The default is 256 MiB and the minimum is 64 MiB. Kinesis Data Firehose uses this value for padding calculations. * `bloom_filter_columns` - (Optional) A list of column names for which you want Kinesis Data Firehose to create bloom filters. @@ -875,7 +932,9 @@ resource "aws_kinesis_firehose_delivery_stream" "example" { * `row_index_stride` - (Optional) The number of rows between index entries. The default is `10000` and the minimum is `1000`. * `stripe_size_bytes` - (Optional) The number of bytes in each stripe. The default is 64 MiB and the minimum is 8 MiB. -###### parquet_ser_de +### `parquet_ser_de` block + +The `parquet_ser_de` configuration block supports the following arguments: * `block_size_bytes` - (Optional) The Hadoop Distributed File System (HDFS) block size. This is useful if you intend to copy the data from Amazon S3 to HDFS before querying. The default is 256 MiB and the minimum is 64 MiB. Kinesis Data Firehose uses this value for padding calculations. * `compression` - (Optional) The compression code to use over data blocks. The possible values are `UNCOMPRESSED`, `SNAPPY`, and `GZIP`, with the default being `SNAPPY`. Use `SNAPPY` for higher decompression speed. Use `GZIP` if the compression ratio is more important than speed. @@ -884,7 +943,9 @@ resource "aws_kinesis_firehose_delivery_stream" "example" { * `page_size_bytes` - (Optional) The Parquet page size. Column chunks are divided into pages. A page is conceptually an indivisible unit (in terms of compression and encoding). The minimum value is 64 KiB and the default is 1 MiB. * `writer_version` - (Optional) Indicates the version of row format to output. The possible values are `V1` and `V2`. The default is `V1`. -#### schema_configuration +### `schema_configuration` block + +The `schema_configuration` configuration block supports the following arguments: * `database_name` - (Required) Specifies the name of the AWS Glue database that contains the schema for the output data. * `role_arn` - (Required) The role that Kinesis Data Firehose can use to access AWS Glue. This role must be in the same account you use for Kinesis Data Firehose. Cross-account roles aren't allowed. @@ -893,16 +954,18 @@ resource "aws_kinesis_firehose_delivery_stream" "example" { * `region` - (Optional) If you don't specify an AWS Region, the default is the current region. * `version_id` - (Optional) Specifies the table version for the output data schema. Defaults to `LATEST`. -#### dynamic_partitioning_configuration +### `dynamic_partitioning_configuration` block -Required when using [dynamic partitioning](https://docs.aws.amazon.com/firehose/latest/dev/dynamic-partitioning.html). +The `dynamic_partitioning_configuration` configuration block supports the following arguments: * `enabled` - (Optional) Enables or disables dynamic partitioning. Defaults to `false`. * `retry_duration` - (Optional) Total amount of seconds Firehose spends on retries. Valid values between 0 and 7200. Default is 300. ~> **NOTE:** You can enable dynamic partitioning only when you create a new delivery stream. Once you enable dynamic partitioning on a delivery stream, it cannot be disabled on this delivery stream. Therefore, Terraform will recreate the resource whenever dynamic partitioning is enabled or disabled. -### document_id_options +### `document_id_options` block + +The `document_id_options` configuration block supports the following arguments: * `default_document_id_format` - (Required) The method for setting up document ID. Valid values: `FIREHOSE_DEFAULT`, `NO_DOCUMENT_ID`.