Skip to content

Commit

Permalink
Merge branch 'main' into dependabot/go_modules/google.golang.org/grpc…
Browse files Browse the repository at this point in the history
…-1.56.3
  • Loading branch information
alexlokshin-czi authored Dec 1, 2023
2 parents 6025194 + 343028d commit 7202580
Show file tree
Hide file tree
Showing 35 changed files with 1,990 additions and 2 deletions.
55 changes: 55 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,60 @@
# Changelog

## [0.63.1](https://github.com/chanzuckerberg/cztack/compare/v0.63.0...v0.63.1) (2023-11-16)


### Bug Fixes

* personal instance pools var for databricks compute policies ([#543](https://github.com/chanzuckerberg/cztack/issues/543)) ([541f8d3](https://github.com/chanzuckerberg/cztack/commit/541f8d393351d9492e55cbaa452ff0187106fed0))

## [0.63.0](https://github.com/chanzuckerberg/cztack/compare/v0.62.3...v0.63.0) (2023-11-15)


### Features

* add pool use to personal compute Databricks policy ([#542](https://github.com/chanzuckerberg/cztack/issues/542)) ([9d4cd22](https://github.com/chanzuckerberg/cztack/commit/9d4cd22daedfcfa9e5f125f6650ec547bcd35e4c))

## [0.62.3](https://github.com/chanzuckerberg/cztack/compare/v0.62.2...v0.62.3) (2023-11-03)


### Bug Fixes

* module name fix ([42b328d](https://github.com/chanzuckerberg/cztack/commit/42b328dd1edf200e9672ecd48dba743c0b053500))

## [0.62.2](https://github.com/chanzuckerberg/cztack/compare/v0.62.1...v0.62.2) (2023-11-03)


### Bug Fixes

* Split out job compute policy between single and multi node ([#537](https://github.com/chanzuckerberg/cztack/issues/537)) ([770b19e](https://github.com/chanzuckerberg/cztack/commit/770b19e544cca18a6f6e7f3f59800e84f16c1393))

## [0.62.1](https://github.com/chanzuckerberg/cztack/compare/v0.62.0...v0.62.1) (2023-10-31)


### Bug Fixes

* remove unused databricks-workspace-e2 variable ([#535](https://github.com/chanzuckerberg/cztack/issues/535)) ([a21509b](https://github.com/chanzuckerberg/cztack/commit/a21509bda6d4bbeb81aaa2afc5fb9bd19f4f86f8))

## [0.62.0](https://github.com/chanzuckerberg/cztack/compare/v0.61.0...v0.62.0) (2023-10-31)


### Features

* CDI-2182 Add databricks-default-cluster-policy module ([#531](https://github.com/chanzuckerberg/cztack/issues/531)) ([4c70f29](https://github.com/chanzuckerberg/cztack/commit/4c70f295cefb5013590e6533b6ae6e09efc52a0c))
* CDI-2183 Add databricks-cluster-log-permissions module ([#532](https://github.com/chanzuckerberg/cztack/issues/532)) ([2e5974a](https://github.com/chanzuckerberg/cztack/commit/2e5974a61defa36d339a1a28ce7c90a17bd22685))


### Bug Fixes

* update readmes and trigger release ([#534](https://github.com/chanzuckerberg/cztack/issues/534)) ([7fef82a](https://github.com/chanzuckerberg/cztack/commit/7fef82aa47a9dcc5b9e897072406f080e4ddef1f))

## [0.61.0](https://github.com/chanzuckerberg/cztack/compare/v0.60.1...v0.61.0) (2023-10-30)


### Features

* all more options when creating the trust relationship ([#525](https://github.com/chanzuckerberg/cztack/issues/525)) ([edfff23](https://github.com/chanzuckerberg/cztack/commit/edfff23a634152c02a75c78246d5784b0c2f75db))

## [0.60.1](https://github.com/chanzuckerberg/cztack/compare/v0.60.0...v0.60.1) (2023-10-03)


Expand Down
23 changes: 22 additions & 1 deletion aws-iam-role-github-action/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,24 @@ locals {

// https://docs.github.com/en/actions/deployment/security-hardening-your-deployments/configuring-openid-connect-in-amazon-web-services#adding-the-identity-provider-to-aws
data "aws_iam_policy_document" "assume_role" {
dynamic "statement" {
for_each = var.authorized_aws_accounts

content {
sid = "AllowAssumeRoleFrom${statement.key}"
principals {
type = "AWS"
identifiers = ["arn:aws:iam::${statement.value}:root"]
}
actions = ["sts:AssumeRole", "sts:TagSession"]
effect = "Allow"
}
}
dynamic "statement" {
for_each = var.authorized_github_repos

content {
sid = "AllowGithubActionsToAssumeRole"
principals {
type = "Federated"
identifiers = [local.idp_arn]
Expand All @@ -30,12 +44,19 @@ data "aws_iam_policy_document" "assume_role" {
}
}

data "aws_iam_policy_document" "this" {
source_policy_documents = compact([
data.aws_iam_policy_document.assume_role.json,
var.additional_assume_role_policies_json,
])
}

resource "aws_iam_role" "role" {
name = var.role.name

tags = var.tags

assume_role_policy = data.aws_iam_policy_document.assume_role.json
assume_role_policy = data.aws_iam_policy_document.this.json
max_session_duration = 60 * 60 // 1 hour, not sure what max github action exec time is

# We have to force detach policies in order to recreate roles.
Expand Down
12 changes: 12 additions & 0 deletions aws-iam-role-github-action/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,15 @@ variable "tags" {

description = "Standard tagging."
}

variable "authorized_aws_accounts" {
type = map(string)
description = "The map of authorized AWS accounts to assume the created role."
default = {}
}

variable "additional_assume_role_policies_json" {
type = string
description = "The JSON string of any other additional assume role policies to add to the Github Actions role"
default = ""
}
67 changes: 67 additions & 0 deletions databricks-cluster-log-permissions/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# README
<!-- START -->
## Requirements

| Name | Version |
|------|---------|
| <a name="requirement_terraform"></a> [terraform](#requirement\_terraform) | >= 0.13 |

## Providers

| Name | Version |
|------|---------|
| <a name="provider_aws"></a> [aws](#provider\_aws) | n/a |
| <a name="provider_aws.czi-logs"></a> [aws.czi-logs](#provider\_aws.czi-logs) | n/a |
| <a name="provider_databricks"></a> [databricks](#provider\_databricks) | n/a |

## Modules

No modules.

## Resources

| Name | Type |
|------|------|
| [aws_iam_instance_profile.cluster_log_cluster](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_instance_profile) | resource |
| [aws_iam_instance_profile.cluster_log_cluster_rw](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_instance_profile) | resource |
| [aws_iam_policy.cluster_log_bucket_read_access](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource |
| [aws_iam_policy.cluster_log_bucket_write_access](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource |
| [aws_iam_role.cluster_log_cluster_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource |
| [aws_iam_role.cluster_log_rw_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource |
| [aws_iam_role_policy_attachment.additional_write_access_attachment](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource |
| [aws_iam_role_policy_attachment.read_access_attachment](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource |
| [aws_iam_role_policy_attachment.write_access_attachment_default_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource |
| [aws_iam_role_policy_attachment.write_access_attachment_rw_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource |
| [aws_kms_grant.additional_bucket_kms_encryption_key_grant](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/kms_grant) | resource |
| [aws_kms_grant.bucket_kms_encryption_key_grant_default](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/kms_grant) | resource |
| [aws_kms_grant.bucket_kms_encryption_key_grant_rw](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/kms_grant) | resource |
| [databricks_instance_profile.cluster_log_cluster](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/instance_profile) | resource |
| [databricks_instance_profile.cluster_log_cluster_rw](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/instance_profile) | resource |
| [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source |
| [aws_iam_policy_document.assume_role_for_cluster_log_cluster](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source |
| [aws_iam_policy_document.cluster_log_bucket_read_access](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source |
| [aws_iam_policy_document.cluster_log_bucket_write_access](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source |

## Inputs

| Name | Description | Type | Default | Required |
|------|-------------|------|---------|:--------:|
| <a name="input_add_reader"></a> [add\_reader](#input\_add\_reader) | Flag to add reader role for logs - should only be invoked for the ie workspace | `bool` | `false` | no |
| <a name="input_bucket_kms_encryption_key_arn"></a> [bucket\_kms\_encryption\_key\_arn](#input\_bucket\_kms\_encryption\_key\_arn) | ARN for KMS key used to encrypt bucket for cluster logs | `string` | n/a | yes |
| <a name="input_env"></a> [env](#input\_env) | Environment name | `string` | n/a | yes |
| <a name="input_existing_role_names"></a> [existing\_role\_names](#input\_existing\_role\_names) | List of other existing instance policy roles on the workspace for which to add cluster log write permissions | `list(string)` | `[]` | no |
| <a name="input_databricks_logs_bucket_name"></a> [databricks\_logs\_bucke\_name](#input\_databricks\_logs\_bucket\_name) | Name of the bucket to store cluster logs | `string` | n/a | yes |
| <a name="input_global_reader_env"></a> [global\_reader\_env](#input\_global\_reader\_env) | Name of env to grant global logs reader access to | `string` | n/a | yes |
| <a name="input_destination_account_id"></a> [destination\_account\_id](#input\_destination\_account\_id) | Account ID for the logs destination AWS account | `string` | n/a | yes |
| <a name="input_destination_account_region"></a> [destination\_account\_region](#input\_destination\_account\_region) | Region for the logs destination AWS account | `string` | n/a | yes |
| <a name="input_destination_account_assume_role_name"></a> [destination\_account\_assume\_role\_name](#input\_destination\_account\_assume\_role_name) | Role name to assume in the logs destination AWS account | `string` | n/a | yes |


## Outputs

| Name | Description |
|------|-------------|
| <a name="output_default_logging_role_arn"></a> [default\_logging\_role\_arn](#output\_default\_logging\_role\_arn) | ARN of the AWS IAM role created for default logs access |
| <a name="output_rw_logging_role_arn"></a> [rw\_logging\_role\_arn](#output\_rw\_logging\_role\_arn) | ARN of the AWS IAM role created for read and write logs access |
| <a name="output_rw_logging_role_instance_profile_arn"></a> [rw\_logging\_role\_instance\_profile\_arn](#output\_rw\_logging\_role\_instance\_profile\_arn) | ARN of the AWS instance profile created for read and write logs access |
<!-- END -->
200 changes: 200 additions & 0 deletions databricks-cluster-log-permissions/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
# - Creates a standard instance policy to allow clusters to write cluster logs to a destination S3 bucket
# - For a given list of instance profiles, also appends a policy attachment to allow them to write cluster logs, too

###
locals {
default_role_name = "cluster_log_cluster_role" # standard role for clusters - allows both writing and reading cluster logs for only the same workspace
read_write_role_name = "cluster_log_rw_role" # special role - allows both writing and reading cluster logs for all workspaces
path = "/databricks/"

# hacky way to validate if this workspace/cluster should have read permissions
# tflint-ignore: terraform_unused_declarations
validate_add_reader = (var.add_reader == true && var.env != var.global_reader_env) ? tobool("add_reader is not supported for this environment") : true

databricks_bucket_cluster_log_prefix = "cluster-logs"

# kms grants - all roles can read and write
read_write_operations = ["Encrypt", "GenerateDataKey", "Decrypt"]
}

data "aws_iam_policy_document" "assume_role_for_cluster_log_cluster" {
statement {
effect = "Allow"
actions = ["sts:AssumeRole"]
principals {
identifiers = ["ec2.amazonaws.com"]
type = "Service"
}
}
}
resource "aws_iam_role" "cluster_log_cluster_role" {
name = local.default_role_name
path = local.path
description = "Role for cluster to write to cluster log bucket"
assume_role_policy = data.aws_iam_policy_document.assume_role_for_cluster_log_cluster.json
}

resource "aws_iam_role" "cluster_log_rw_role" {
count = var.add_reader == true ? 1 : 0

name = local.read_write_role_name
path = local.path
description = "Role for cluster to read from and write to cluster log bucket"
assume_role_policy = data.aws_iam_policy_document.assume_role_for_cluster_log_cluster.json
}

###
## write and limited read access
data "aws_iam_policy_document" "cluster_log_bucket_write_access" {
statement {
sid = "ReadWriteClusterLogs"
actions = [
"s3:PutObject",
"s3:PutObjectAcl",
"s3:GetObject",
"s3:ListBucket",
"s3:GetBucketLocation"
]

resources = [
"arn:aws:s3:::${var.databricks_logs_bucket_name}/${local.databricks_bucket_cluster_log_prefix}/*",
"arn:aws:s3:::${var.databricks_logs_bucket_name}"
]
}
statement {
sid = "ReadWriteEncryptedClusterLogs"
actions = [
"kms:Encrypt",
"kms:Decrypt",
"kms:GenerateDataKey",
]

resources = [
var.bucket_kms_encryption_key_arn
]
}
}

resource "aws_iam_policy" "cluster_log_bucket_write_access" {
name = "cluster_log_bucket_write_access_policy"
path = local.path
policy = data.aws_iam_policy_document.cluster_log_bucket_write_access.json
}

resource "aws_iam_role_policy_attachment" "write_access_attachment_default_role" {
policy_arn = aws_iam_policy.cluster_log_bucket_write_access.arn
role = local.default_role_name
}

resource "aws_iam_role_policy_attachment" "write_access_attachment_rw_role" {
count = var.add_reader == true ? 1 : 0

policy_arn = aws_iam_policy.cluster_log_bucket_write_access.arn
role = local.read_write_role_name
}

## non-standard global-read access

data "aws_iam_policy_document" "cluster_log_bucket_read_access" {
count = var.add_reader == true ? 1 : 0

statement {
sid = "ReadAllClusterLogs"
actions = [
"s3:GetObject",
"s3:GetObjectVersion"
]

resources = [
"arn:aws:s3:::${var.databricks_logs_bucket_name}/*",
"arn:aws:s3:::${var.databricks_logs_bucket_name}"
]
}
}

resource "aws_iam_policy" "cluster_log_bucket_read_access" {
count = var.add_reader == true ? 1 : 0

name = "cluster_log_bucket_read_access_policy"
path = local.path
policy = data.aws_iam_policy_document.cluster_log_bucket_read_access[0].json
}

resource "aws_iam_role_policy_attachment" "read_access_attachment" {
count = var.add_reader == true ? 1 : 0

policy_arn = aws_iam_policy.cluster_log_bucket_read_access[0].arn
role = local.read_write_role_name
}

## kms access

data "aws_caller_identity" "current" {
provider = aws
}

resource "aws_kms_grant" "bucket_kms_encryption_key_grant_default" {
provider = aws.logs_destination

name = "cluster-log-kms-grant-${data.aws_caller_identity.current.account_id}-write"
key_id = var.bucket_kms_encryption_key_arn
grantee_principal = aws_iam_role.cluster_log_cluster_role.arn
operations = local.read_write_operations
}

resource "aws_kms_grant" "bucket_kms_encryption_key_grant_rw" {
count = var.add_reader == true ? 1 : 0
provider = aws.logs_destination

name = "cluster-log-kms-grant-${data.aws_caller_identity.current.account_id}-read-write"
key_id = var.bucket_kms_encryption_key_arn
grantee_principal = aws_iam_role.cluster_log_rw_role[0].arn
operations = local.read_write_operations
}

## standard instance profile(s)

resource "aws_iam_instance_profile" "cluster_log_cluster" {
name = "cluster-log-cluster-instance-profile"
path = local.path
role = aws_iam_role.cluster_log_cluster_role.name
}

resource "databricks_instance_profile" "cluster_log_cluster" {
depends_on = [aws_iam_instance_profile.cluster_log_cluster]
instance_profile_arn = aws_iam_instance_profile.cluster_log_cluster.arn
}

resource "aws_iam_instance_profile" "cluster_log_cluster_rw" {
count = var.add_reader == true ? 1 : 0

name = "cluster-log-rw-instance-profile"
path = local.path
role = aws_iam_role.cluster_log_rw_role[0].name
}

resource "databricks_instance_profile" "cluster_log_cluster_rw" {
count = var.add_reader == true ? 1 : 0

depends_on = [aws_iam_instance_profile.cluster_log_cluster_rw]
instance_profile_arn = aws_iam_instance_profile.cluster_log_cluster_rw[0].arn
}

## attach policies to given list of existing instance profiles

resource "aws_iam_role_policy_attachment" "additional_write_access_attachment" {
for_each = toset(var.existing_role_names)

policy_arn = aws_iam_policy.cluster_log_bucket_write_access.arn
role = each.value
}

resource "aws_kms_grant" "additional_bucket_kms_encryption_key_grant" {
for_each = toset(var.existing_role_names)
provider = aws.logs_destination

name = "cluster-log-kms-grant-${data.aws_caller_identity.current.account_id}"
key_id = var.bucket_kms_encryption_key_arn
grantee_principal = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/databricks/${each.value}"
operations = local.read_write_operations
}
Loading

0 comments on commit 7202580

Please sign in to comment.