subcategory |
---|
AWS |
This datasource configures a simple access policy for AWS S3 buckets, so that Databricks can access data in it.
resource "aws_s3_bucket" "this" {
bucket = "<unique_bucket_name>"
acl = "private"
force_destroy = true
}
data "databricks_aws_bucket_policy" "stuff" {
bucket_name = aws_s3_bucket.this.bucket
}
resource "aws_s3_bucket_policy" "this" {
bucket = aws_s3_bucket.this.id
policy = data.databricks_aws_bucket_policy.this.json
}
Bucket policy with full access:
resource "aws_s3_bucket" "ds" {
bucket = "${var.prefix}-ds"
acl = "private"
versioning {
enabled = false
}
force_destroy = true
tags = merge(var.tags, {
Name = "${var.prefix}-ds"
})
}
data "aws_iam_policy_document" "assume_role_for_ec2" {
statement {
effect = "Allow"
actions = ["sts:AssumeRole"]
principals {
identifiers = ["ec2.amazonaws.com"]
type = "Service"
}
}
}
resource "aws_iam_role" "data_role" {
name = "${var.prefix}-first-ec2s3"
description = "(${var.prefix}) EC2 Assume Role role for S3 access"
assume_role_policy = data.aws_iam_policy_document.assume_role_for_ec2.json
tags = var.tags
}
data "databricks_aws_bucket_policy" "ds" {
provider = databricks.mws
full_access_role = aws_iam_role.data_role.arn
bucket = aws_s3_bucket.ds.bucket
}
// allow databricks to access this bucket
resource "aws_s3_bucket_policy" "ds" {
bucket = aws_s3_bucket.ds.id
policy = data.databricks_aws_bucket_policy.ds.json
}
bucket
- (Required) AWS S3 Bucket name for which to generate the policy document.full_access_role
- (Optional) Data access role that can have full access for this bucket
In addition to all arguments above, the following attributes are exported:
json
- (Read-only) AWS IAM Policy JSON document to grant Databricks full access to bucket.
The following resources are used in the same context:
- Provisioning AWS Databricks E2 with a Hub & Spoke firewall for data exfiltration protection guide
- End to end workspace management guide
- databricks_instance_profile to manage AWS EC2 instance profiles that users can launch databricks_cluster and access data, like databricks_mount.
- databricks_mount to mount your cloud storage on
dbfs:/mnt/name
.