From 34b5268cc81353d8b2aa5b58ddb01f2374e83856 Mon Sep 17 00:00:00 2001 From: max-jacobs Date: Mon, 25 Nov 2019 16:04:26 +0000 Subject: [PATCH 1/4] Adding orphaned data strategy --- CHANGELOG.md | 4 ++++ README.md | 18 ++++++++++-------- templates.tf | 1 + templates/shuntingyard-config.yml.tmpl | 1 + variables.tf | 9 +++++++++ 5 files changed, 25 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 327aae5..75be686 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [1.2.1] - TBD +### Added +- Adding orphaned data strategy. + ## [1.2.0] - 2019-08-29 ### Added - Support for Docker Auth. diff --git a/README.md b/README.md index 94865c4..4f52ca3 100644 --- a/README.md +++ b/README.md @@ -5,25 +5,27 @@ Terraform module for setting up infrastructure for [Shunting Yard](https://githu For more information please refer to the main [Apiary](https://github.com/ExpediaGroup/apiary) project page. -## Variables +## Inputs + | Name | Description | Type | Default | Required | |------|-------------|:----:|:-----:|:-----:| -| allowed\_s3\_buckets | List of S3 Buckets to which Shunting Yard will have read-write access. eg. `["bucket-1", "bucket-2"]`. | list | `n/a` | yes | +| allowed\_s3\_buckets | List of S3 Buckets to which Shunting Yard will have read-write access. | list | n/a | yes | | aws\_region | AWS region to use for resources. | string | n/a | yes | | cpu | The number of CPU units to reserve for the Shunting Yard container. Valid values can be 256, 512, 1024, 2048 and 4096. Reference: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | string | `"1024"` | no | | ct\_common\_config\_yaml | Common Circus Train configuration to be passed to internal Circus Train instance. It can be used, for example to configure Graphite for Circus Train. Refer to [Circus Train README](https://github.com/HotelsDotCom/circus-train/blob/master/README.md) for an exhaustive list of options supported by Circus Train. | string | n/a | yes | | docker\_image | Full path of Shunting Yard Docker image. | string | n/a | yes | +| docker\_registry\_auth\_secret\_name | Docker Registry authentication SecretManager secret name. | string | `""` | no | | docker\_version | Shunting Yard Docker image version. | string | n/a | yes | -| docker\_registry\_auth\_secret\_name | Docker Registry authentication SecretManager secret name. | string | `` | no | | instance\_name | Shunting Yard instance name to identify resources in multi-instance deployments. | string | `""` | no | -| memory | The amount of memory (in MiB) allocated to the Shunting Yard container. Valid values: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | string | `"4096"` | no | +| memory | The amount of memory (in MiB) used to allocate for the Shunting Yard container. Valid values: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | string | `"4096"` | no | | metastore\_events\_sns\_topic | SNS Topic for Hive Metastore events. | string | n/a | yes | -| shuntingyard\_sqs\_queue\_wait\_timeout | Shunting Yard SQS queue wait timeout (in seconds) | string | 15 | no | -| shuntingyard\_sqs\_queue\_stale\_messages\_timeout | Shunting Yard SQS queue stale messages alert timeout (in seconds) | string | 300 | no | -| selected\_tables | Tables selected for Shunting Yard Replication. Supported Format: `[ "database_1.table_1", "database_2.table_2" ]` | list | [] | no | +| orphaned\_data\_strategy | Orphaned data strategy to use for stale data during replication. Supported strategies: "NONE", "HOUSEKEEPING" (default). | string | `"HOUSEKEEPING"` | no | +| selected\_tables | Tables selected for Shunting Yard Replication. Supported Format: [ "database_1.table_1", "database_2.table_2" ] Wildcards are not supported, i.e. you need to specify each table explicitly. | list | `` | no | +| shuntingyard\_sqs\_queue\_stale\_messages\_timeout | Shunting Yard SQS Queue Cloudwatch Alert timeout for messages older than this number of seconds. | string | `"300"` | no | +| shuntingyard\_sqs\_queue\_wait\_timeout | Wait timeout for connecting to the Shunting Yard SQS queue (in seconds) | string | `"15"` | no | +| shuntingyard\_tags | A map of tags to apply to resources. | map | n/a | yes | | source\_metastore\_uri | Source Metastore URI for Shunting Yard. | string | n/a | yes | | subnets | ECS container subnets. | list | n/a | yes | -| shuntingyard\_tags | A map of tags to apply to resources. | map | `` | no | | target\_metastore\_uri | Target Metastore URI for Shunting Yard. | string | n/a | yes | | vpc\_id | VPC ID. | string | n/a | yes | diff --git a/templates.tf b/templates.tf index c794299..6ee8df7 100644 --- a/templates.tf +++ b/templates.tf @@ -13,6 +13,7 @@ data "template_file" "shuntingyard_config_yaml" { shuntingyard_sqs_queue = "${aws_sqs_queue.shuntingyard_sqs_queue.id}" shuntingyard_sqs_queue_wait_timeout = "${var.shuntingyard_sqs_queue_wait_timeout}" selected_tables = "${join("\n", formatlist(" - %s", var.selected_tables))}" + orphaned_data_strategy = "${format("orphaned-data-strategy: %s", var.orphaned_data_strategy)}" } } diff --git a/templates/shuntingyard-config.yml.tmpl b/templates/shuntingyard-config.yml.tmpl index e71305d..bd38479 100644 --- a/templates/shuntingyard-config.yml.tmpl +++ b/templates/shuntingyard-config.yml.tmpl @@ -16,3 +16,4 @@ event-receiver: source-table-filter: table-names: ${selected_tables} +${orphaned_data_strategy} diff --git a/variables.tf b/variables.tf index 5e38859..d09c312 100644 --- a/variables.tf +++ b/variables.tf @@ -110,6 +110,15 @@ EOF default = [] } +variable "orphaned_data_strategy" { + description = < Date: Wed, 15 Jan 2020 12:20:37 +0000 Subject: [PATCH 2/4] Update CHANGELOG.md Co-Authored-By: Abhimanyu Gupta --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 75be686..fca4924 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a ## [1.2.1] - TBD ### Added -- Adding orphaned data strategy. +- A new argument `orphaned_data_strategy` to use for handling stale data during replication. ## [1.2.0] - 2019-08-29 ### Added From 89d601bf0467b92db4d41943c9393a01bba53fbe Mon Sep 17 00:00:00 2001 From: max-jacobs Date: Wed, 15 Jan 2020 12:22:01 +0000 Subject: [PATCH 3/4] Adding release date to changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fca4924..1aa927e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). -## [1.2.1] - TBD +## [1.2.1] - 2020-01-15 ### Added - A new argument `orphaned_data_strategy` to use for handling stale data during replication. From 69f7ad5d1b97e92a862cbabede8708f256872617 Mon Sep 17 00:00:00 2001 From: max-jacobs Date: Thu, 16 Jan 2020 09:51:00 +0000 Subject: [PATCH 4/4] Updating changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1aa927e..d145f2c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). -## [1.2.1] - 2020-01-15 +## [1.2.1] - 2020-01-16 ### Added - A new argument `orphaned_data_strategy` to use for handling stale data during replication.