Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Glue Crawler: Add catalog target config #9430

Merged
merged 8 commits into from
Aug 1, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 61 additions & 2 deletions aws/resource_aws_glue_crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,24 @@ func resourceAwsGlueCrawler() *schema.Resource {
},
},
},
"catalog_target": {
Type: schema.TypeList,
Optional: true,
MinItems: 1,
Elem: &schema.Resource{
Schema: map[string]*schema.Schema{
"database_name": {
Type: schema.TypeString,
Required: true,
},
"tables": {
Type: schema.TypeList,
Required: true,
Elem: &schema.Schema{Type: schema.TypeString},
},
},
},
},
"configuration": {
Type: schema.TypeString,
Optional: true,
Expand Down Expand Up @@ -325,14 +343,16 @@ func expandGlueCrawlerTargets(d *schema.ResourceData) (*glue.CrawlerTargets, err
dynamodbTargets, dynamodbTargetsOk := d.GetOk("dynamodb_target")
jdbcTargets, jdbcTargetsOk := d.GetOk("jdbc_target")
s3Targets, s3TargetsOk := d.GetOk("s3_target")
if !dynamodbTargetsOk && !jdbcTargetsOk && !s3TargetsOk {
return nil, fmt.Errorf("One of the following configurations is required: dynamodb_target, jdbc_target, s3_target")
catalogTargets, catalogTargetsOk := d.GetOk("catalog_target")
if !dynamodbTargetsOk && !jdbcTargetsOk && !s3TargetsOk && !catalogTargetsOk {
return nil, fmt.Errorf("One of the following configurations is required: dynamodb_target, jdbc_target, s3_target, catalog_target")
}

log.Print("[DEBUG] Creating crawler target")
crawlerTargets.DynamoDBTargets = expandGlueDynamoDBTargets(dynamodbTargets.([]interface{}))
crawlerTargets.JdbcTargets = expandGlueJdbcTargets(jdbcTargets.([]interface{}))
crawlerTargets.S3Targets = expandGlueS3Targets(s3Targets.([]interface{}))
crawlerTargets.CatalogTargets = expandGlueCatalogTargets(catalogTargets.([]interface{}))

return crawlerTargets, nil
}
Expand Down Expand Up @@ -407,6 +427,28 @@ func expandGlueJdbcTarget(cfg map[string]interface{}) *glue.JdbcTarget {
return target
}

func expandGlueCatalogTargets(targets []interface{}) []*glue.CatalogTarget {
if len(targets) < 1 {
return []*glue.CatalogTarget{}
}

perms := make([]*glue.CatalogTarget, len(targets))
for i, rawCfg := range targets {
cfg := rawCfg.(map[string]interface{})
perms[i] = expandGlueCatalogTarget(cfg)
}
return perms
}

func expandGlueCatalogTarget(cfg map[string]interface{}) *glue.CatalogTarget {
target := &glue.CatalogTarget{
DatabaseName: aws.String(cfg["database_name"].(string)),
Tables: expandStringList(cfg["tables"].([]interface{})),
}

return target
}

func resourceAwsGlueCrawlerUpdate(d *schema.ResourceData, meta interface{}) error {
glueConn := meta.(*AWSClient).glueconn
name := d.Get("name").(string)
Expand Down Expand Up @@ -509,6 +551,10 @@ func resourceAwsGlueCrawlerRead(d *schema.ResourceData, meta interface{}) error
if err := d.Set("s3_target", flattenGlueS3Targets(crawlerOutput.Crawler.Targets.S3Targets)); err != nil {
return fmt.Errorf("error setting s3_target: %s", err)
}

if err := d.Set("catalog_target", flattenGlueCatalogTargets(crawlerOutput.Crawler.Targets.CatalogTargets)); err != nil {
return fmt.Errorf("error setting catalog_target: %s", err)
}
}

return nil
Expand All @@ -527,6 +573,19 @@ func flattenGlueS3Targets(s3Targets []*glue.S3Target) []map[string]interface{} {
return result
}

func flattenGlueCatalogTargets(CatalogTargets []*glue.CatalogTarget) []map[string]interface{} {
result := make([]map[string]interface{}, 0)

for _, catalogTarget := range CatalogTargets {
attrs := make(map[string]interface{})
attrs["tables"] = flattenStringList(catalogTarget.Tables)
attrs["database_name"] = aws.StringValue(catalogTarget.DatabaseName)

result = append(result, attrs)
}
return result
}

func flattenGlueDynamoDBTargets(dynamodbTargets []*glue.DynamoDBTarget) []map[string]interface{} {
result := make([]map[string]interface{}, 0)

Expand Down
260 changes: 246 additions & 14 deletions aws/resource_aws_glue_crawler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,131 @@ func TestAccAWSGlueCrawler_S3Target_Multiple(t *testing.T) {
})
}

func TestAccAWSGlueCrawler_CatalogTarget(t *testing.T) {
var crawler glue.Crawler
rName := acctest.RandomWithPrefix("tf-acc-test")
resourceName := "aws_glue_crawler.test"

resource.ParallelTest(t, resource.TestCase{
PreCheck: func() { testAccPreCheck(t) },
Providers: testAccProviders,
CheckDestroy: testAccCheckAWSGlueCrawlerDestroy,
Steps: []resource.TestStep{
{
Config: testAccGlueCrawlerConfig_CatalogTarget(rName, 1),
Check: resource.ComposeTestCheckFunc(
testAccCheckAWSGlueCrawlerExists(resourceName, &crawler),
testAccCheckResourceAttrRegionalARN(resourceName, "arn", "glue", fmt.Sprintf("crawler/%s", rName)),
resource.TestCheckResourceAttr(resourceName, "classifiers.#", "0"),
resource.TestCheckResourceAttr(resourceName, "database_name", rName),
resource.TestCheckResourceAttr(resourceName, "description", ""),
resource.TestCheckResourceAttr(resourceName, "dynamodb_target.#", "0"),
resource.TestCheckResourceAttr(resourceName, "jdbc_target.#", "0"),
resource.TestCheckResourceAttr(resourceName, "s3_target.#", "0"),
resource.TestCheckResourceAttr(resourceName, "name", rName),
resource.TestCheckResourceAttr(resourceName, "role", rName),
resource.TestCheckResourceAttr(resourceName, "catalog_target.#", "1"),
resource.TestCheckResourceAttr(resourceName, "catalog_target.0.database_name", rName),
resource.TestCheckResourceAttr(resourceName, "catalog_target.0.tables.#", "1"),
resource.TestCheckResourceAttr(resourceName, "catalog_target.0.tables.0", fmt.Sprintf("%s_table_0", rName)),
resource.TestCheckResourceAttr(resourceName, "schedule", ""),
resource.TestCheckResourceAttr(resourceName, "schema_change_policy.#", "1"),
resource.TestCheckResourceAttr(resourceName, "schema_change_policy.0.delete_behavior", "LOG"),
resource.TestCheckResourceAttr(resourceName, "schema_change_policy.0.update_behavior", "UPDATE_IN_DATABASE"),
resource.TestCheckResourceAttr(resourceName, "table_prefix", ""),
resource.TestCheckResourceAttr(resourceName, "configuration", "{\"Version\":1.0,\"Grouping\":{\"TableGroupingPolicy\":\"CombineCompatibleSchemas\"}}"),
),
},
{
Config: testAccGlueCrawlerConfig_CatalogTarget(rName, 2),
Check: resource.ComposeTestCheckFunc(
testAccCheckAWSGlueCrawlerExists(resourceName, &crawler),
testAccCheckResourceAttrRegionalARN(resourceName, "arn", "glue", fmt.Sprintf("crawler/%s", rName)),
resource.TestCheckResourceAttr(resourceName, "classifiers.#", "0"),
resource.TestCheckResourceAttr(resourceName, "database_name", rName),
resource.TestCheckResourceAttr(resourceName, "description", ""),
resource.TestCheckResourceAttr(resourceName, "dynamodb_target.#", "0"),
resource.TestCheckResourceAttr(resourceName, "jdbc_target.#", "0"),
resource.TestCheckResourceAttr(resourceName, "s3_target.#", "0"),
resource.TestCheckResourceAttr(resourceName, "name", rName),
resource.TestCheckResourceAttr(resourceName, "role", rName),
resource.TestCheckResourceAttr(resourceName, "catalog_target.#", "1"),
resource.TestCheckResourceAttr(resourceName, "catalog_target.0.database_name", rName),
resource.TestCheckResourceAttr(resourceName, "catalog_target.0.tables.#", "2"),
resource.TestCheckResourceAttr(resourceName, "catalog_target.0.tables.0", fmt.Sprintf("%s_table_0", rName)),
resource.TestCheckResourceAttr(resourceName, "catalog_target.0.tables.1", fmt.Sprintf("%s_table_1", rName)),
resource.TestCheckResourceAttr(resourceName, "schedule", ""),
resource.TestCheckResourceAttr(resourceName, "schema_change_policy.#", "1"),
resource.TestCheckResourceAttr(resourceName, "schema_change_policy.0.delete_behavior", "LOG"),
resource.TestCheckResourceAttr(resourceName, "schema_change_policy.0.update_behavior", "UPDATE_IN_DATABASE"),
resource.TestCheckResourceAttr(resourceName, "table_prefix", ""),
resource.TestCheckResourceAttr(resourceName, "configuration", "{\"Version\":1.0,\"Grouping\":{\"TableGroupingPolicy\":\"CombineCompatibleSchemas\"}}"),
),
},
{
ResourceName: resourceName,
ImportState: true,
ImportStateVerify: true,
},
},
})
}

func TestAccAWSGlueCrawler_CatalogTarget_Multiple(t *testing.T) {
var crawler glue.Crawler
rName := acctest.RandomWithPrefix("tf-acc-test")
resourceName := "aws_glue_crawler.test"

resource.ParallelTest(t, resource.TestCase{
PreCheck: func() { testAccPreCheck(t) },
Providers: testAccProviders,
CheckDestroy: testAccCheckAWSGlueCrawlerDestroy,
Steps: []resource.TestStep{
{
Config: testAccGlueCrawlerConfig_CatalogTarget(rName, 1),
Check: resource.ComposeTestCheckFunc(
testAccCheckAWSGlueCrawlerExists(resourceName, &crawler),
testAccCheckResourceAttrRegionalARN(resourceName, "arn", "glue", fmt.Sprintf("crawler/%s", rName)),
resource.TestCheckResourceAttr(resourceName, "catalog_target.#", "1"),
resource.TestCheckResourceAttr(resourceName, "catalog_target.0.database_name", rName),
resource.TestCheckResourceAttr(resourceName, "catalog_target.0.tables.#", "1"),
resource.TestCheckResourceAttr(resourceName, "catalog_target.0.tables.0", fmt.Sprintf("%s_table_0", rName)),
),
},
{
Config: testAccGlueCrawlerConfig_CatalogTarget_Multiple(rName),
Check: resource.ComposeTestCheckFunc(
testAccCheckAWSGlueCrawlerExists(resourceName, &crawler),
testAccCheckResourceAttrRegionalARN(resourceName, "arn", "glue", fmt.Sprintf("crawler/%s", rName)),
resource.TestCheckResourceAttr(resourceName, "catalog_target.#", "2"),
resource.TestCheckResourceAttr(resourceName, "catalog_target.0.database_name", fmt.Sprintf("%s_database_0", rName)),
resource.TestCheckResourceAttr(resourceName, "catalog_target.1.database_name", fmt.Sprintf("%s_database_1", rName)),
resource.TestCheckResourceAttr(resourceName, "catalog_target.0.tables.#", "1"),
resource.TestCheckResourceAttr(resourceName, "catalog_target.0.tables.0", fmt.Sprintf("%s_table_0", rName)),
resource.TestCheckResourceAttr(resourceName, "catalog_target.1.tables.#", "1"),
resource.TestCheckResourceAttr(resourceName, "catalog_target.1.tables.0", fmt.Sprintf("%s_table_1", rName)),
),
},
{
Config: testAccGlueCrawlerConfig_CatalogTarget(rName, 1),
Check: resource.ComposeTestCheckFunc(
testAccCheckAWSGlueCrawlerExists(resourceName, &crawler),
testAccCheckResourceAttrRegionalARN(resourceName, "arn", "glue", fmt.Sprintf("crawler/%s", rName)),
resource.TestCheckResourceAttr(resourceName, "catalog_target.#", "1"),
resource.TestCheckResourceAttr(resourceName, "catalog_target.0.database_name", rName),
resource.TestCheckResourceAttr(resourceName, "catalog_target.0.tables.#", "1"),
resource.TestCheckResourceAttr(resourceName, "catalog_target.0.tables.0", fmt.Sprintf("%s_table_0", rName)),
),
},
{
ResourceName: resourceName,
ImportState: true,
ImportStateVerify: true,
},
},
})
}

func TestAccAWSGlueCrawler_recreates(t *testing.T) {
var crawler glue.Crawler
rName := acctest.RandomWithPrefix("tf-acc-test")
Expand Down Expand Up @@ -893,26 +1018,26 @@ data "aws_partition" "current" {}

resource "aws_iam_role" "test" {
name = %q
assume_role_policy = "${data.aws_iam_policy_document.assume.json}"
}

assume_role_policy = <<EOF
{
"Version": "2012-10-17",
"Statement": [
{
"Action": "sts:AssumeRole",
"Principal": {
"Service": "glue.amazonaws.com"
},
"Effect": "Allow",
"Sid": ""
data "aws_iam_policy_document" "assume" {
statement {
actions = ["sts:AssumeRole"]

principals {
type = "Service"
identifiers = ["glue.amazonaws.com"]
}
]
}
}
EOF

data "aws_iam_policy" "AWSGlueServiceRole" {
arn = "arn:${data.aws_partition.current.partition}:iam::aws:policy/service-role/AWSGlueServiceRole"
}

resource "aws_iam_role_policy_attachment" "test-AWSGlueServiceRole" {
policy_arn = "arn:${data.aws_partition.current.partition}:iam::aws:policy/service-role/AWSGlueServiceRole"
policy_arn = "${data.aws_iam_policy.AWSGlueServiceRole.arn}"
role = "${aws_iam_role.test.name}"
}
`, rName)
Expand Down Expand Up @@ -1391,6 +1516,113 @@ resource "aws_glue_crawler" "test" {
`, rName, rName, path1, path2)
}

func testAccGlueCrawlerConfig_CatalogTarget(rName string, tableCount int) string {
return testAccGlueCrawlerConfig_Base(rName) + fmt.Sprintf(`
resource "aws_glue_catalog_database" "test" {
name = %[1]q
}

resource "aws_s3_bucket" "default" {
bucket = %[1]q
force_destroy = true
}

resource "aws_glue_catalog_table" "test" {
count = %[2]d

database_name = "${aws_glue_catalog_database.test.name}"
name = "%[1]s_table_${count.index}"
table_type = "EXTERNAL_TABLE"

storage_descriptor {
location = "s3://${aws_s3_bucket.default.bucket}"
}
}

resource "aws_glue_crawler" "test" {
depends_on = ["aws_iam_role_policy_attachment.test-AWSGlueServiceRole"]

database_name = "${aws_glue_catalog_database.test.name}"
name = %[1]q
role = "${aws_iam_role.test.name}"

schema_change_policy {
delete_behavior = "LOG"
}

catalog_target {
database_name = "${aws_glue_catalog_database.test.name}"
tables = flatten(["${aws_glue_catalog_table.test[*].name}"])
}

configuration = <<EOF
{
"Version":1.0,
"Grouping": {
"TableGroupingPolicy": "CombineCompatibleSchemas"
}
}
EOF
}
`, rName, tableCount)
}

func testAccGlueCrawlerConfig_CatalogTarget_Multiple(rName string) string {
return testAccGlueCrawlerConfig_Base(rName) + fmt.Sprintf(`
resource "aws_glue_catalog_database" "test" {
count = 2
name = "%[1]s_database_${count.index}"
}

resource "aws_glue_catalog_table" "test" {
count = 2
database_name = "${aws_glue_catalog_database.test[count.index].name}"
name = "%[1]s_table_${count.index}"
table_type = "EXTERNAL_TABLE"

storage_descriptor {
location = "s3://${aws_s3_bucket.default.bucket}"
}
}

resource "aws_s3_bucket" "default" {
bucket = %[1]q
force_destroy = true
}

resource "aws_glue_crawler" "test" {
depends_on = ["aws_iam_role_policy_attachment.test-AWSGlueServiceRole"]

database_name = "${aws_glue_catalog_database.test[0].name}"
name = %[1]q
role = "${aws_iam_role.test.name}"

schema_change_policy {
delete_behavior = "LOG"
}

catalog_target {
database_name = "${aws_glue_catalog_database.test[0].name}"
tables = ["${aws_glue_catalog_table.test[0].name}"]
}

catalog_target {
database_name = "${aws_glue_catalog_database.test[1].name}"
tables = ["${aws_glue_catalog_table.test[1].name}"]
}

configuration = <<EOF
{
"Version":1.0,
"Grouping": {
"TableGroupingPolicy": "CombineCompatibleSchemas"
}
}
EOF
}
`, rName)
}

func testAccGlueCrawlerConfig_Schedule(rName, schedule string) string {
return testAccGlueCrawlerConfig_Base(rName) + fmt.Sprintf(`
resource "aws_glue_catalog_database" "test" {
Expand Down
Loading