diff --git a/terraform/aws/main.tf b/terraform/aws/main.tf
index 016f44ce..cc044e57 100644
--- a/terraform/aws/main.tf
+++ b/terraform/aws/main.tf
@@ -140,9 +140,9 @@ module "flink" {
building_block = var.building_block
flink_container_registry = var.flink_container_registry
flink_image_tag = var.flink_image_tag
- flink_merged_pipeline_release_names = var.flink_merged_pipeline_release_names
+ flink_unified_pipeline_release_names = var.flink_unified_pipeline_release_names
flink_release_names = var.flink_release_names
- merged_pipeline_enabled = var.merged_pipeline_enabled
+ unified_pipeline_enabled = var.unified_pipeline_enabled
flink_checkpoint_store_type = var.flink_checkpoint_store_type
flink_chart_depends_on = [module.kafka, module.postgresql_migration, module.redis_dedup, module.redis_denorm]
postgresql_obsrv_username = module.postgresql.postgresql_obsrv_username
@@ -226,6 +226,12 @@ module "dataset_api" {
dataset_api_namespace = module.eks.dataset_api_namespace
s3_bucket = module.s3.s3_bucket
service_type = var.service_type
+ enable_lakehouse = var.enable_lakehouse
+ lakehouse_host = var.lakehouse_host
+ lakehouse_port = var.lakehouse_port
+ lakehouse_catalog = var.lakehouse_catalog
+ lakehouse_schema = var.lakehouse_schema
+ lakehouse_default_user = var.lakehouse_default_user
}
module "secor" {
@@ -294,6 +300,7 @@ module "command_service" {
postgresql_obsrv_user_password = module.postgresql.postgresql_obsrv_user_password
postgresql_obsrv_database = module.postgresql.postgresql_obsrv_database
flink_namespace = module.flink.flink_namespace
+ enable_lakehouse = var.enable_lakehouse
}
module "postgresql_migration" {
@@ -308,4 +315,66 @@ module "postgresql_migration" {
postgresql_druid_raw_user_password = module.postgresql.postgresql_druid_raw_user_password
postgresql_obsrv_user_password = module.postgresql.postgresql_obsrv_user_password
data_encryption_key = resource.random_string.data_encryption_key.result
+ postgresql_hms_user_password = module.postgresql.postgresql_hms_user_password
+ enable_lakehouse = var.enable_lakehouse
}
+
+module "trino" {
+ source = "../modules/helm/trino"
+ count = var.enable_lakehouse ? 1 : 0
+ trino_namespace = var.hudi_namespace
+ trino_lakehouse_metadata = {
+ "hive.s3.aws-access-key" = var.create_velero_user ? module.iam[0].s3_access_key : var.velero_aws_access_key_id
+ "hive.s3.aws-secret-key" = var.create_velero_user ? module.iam[0].s3_secret_key : var.velero_aws_secret_access_key
+ }
+}
+
+module "hms" {
+ source = "../modules/helm/hive_meta_store"
+ count = var.enable_lakehouse ? 1 : 0
+ hms_namespace = var.hudi_namespace
+ hms_db_metadata = {
+ "DATABASE_HOST" = "postgresql-hl.postgresql.svc"
+ "DATABASE_DB" = module.postgresql.postgresql_hms_database
+ "DATABASE_USER" = module.postgresql.postgresql_hms_username
+ "DATABASE_PASSWORD" = module.postgresql.postgresql_hms_user_password
+ "WAREHOUSE_DIR" = "s3a://${module.s3[0].s3_bucket}/${var.hudi_prefix_path}/"
+ "THRIFT_PORT" = "9083"
+ }
+ hadoop_metadata = {
+ "fs.s3a.access.key" = var.create_velero_user ? module.iam[0].s3_access_key : var.velero_aws_access_key_id
+ "fs.s3a.secret.key" = var.create_velero_user ? module.iam[0].s3_secret_key : var.velero_aws_secret_access_key
+ }
+}
+
+module "lakehouse-connector" {
+ source = "../modules/helm/lakehouse-connector"
+ count = var.enable_lakehouse ? 1 : 0
+ env = var.env
+ building_block = var.building_block
+ flink_container_registry = var.flink_container_registry
+ flink_lakehouse_image_tag = var.flink_lakehouse_image_tag
+ flink_image_name = var.flink_image_name
+ flink_checkpoint_store_type = var.flink_checkpoint_store_type
+ flink_chart_depends_on = [module.kafka, module.postgresql_migration, module.redis_dedup, module.redis_denorm]
+ postgresql_obsrv_username = module.postgresql.postgresql_obsrv_username
+ postgresql_obsrv_user_password = module.postgresql.postgresql_obsrv_user_password
+ postgresql_obsrv_database = module.postgresql.postgresql_obsrv_database
+ checkpoint_base_url = "s3://${module.s3[0].checkpoint_storage_bucket}"
+ denorm_redis_namespace = module.redis_denorm.redis_namespace
+ denorm_redis_release_name = module.redis_denorm.redis_release_name
+ dedup_redis_namespace = module.redis_dedup.redis_namespace
+ dedup_redis_release_name = module.redis_dedup.redis_release_name
+ flink_sa_annotations = "eks.amazonaws.com/role-arn: ${module.eks.flink_sa_iam_role}"
+ flink_namespace = module.eks.flink_namespace
+ postgresql_service_name = module.postgresql.postgresql_service_name
+ enable_lakehouse = var.enable_lakehouse
+ postgresql_hms_username = module.postgresql.postgresql_hms_username
+ postgresql_hms_user_password = module.postgresql.postgresql_hms_user_password
+ hudi_bucket = module.s3[0].s3_bucket
+ hudi_prefix_path = var.hudi_prefix_path
+ hadoop_metadata = {
+ "fs.s3a.access.key" = var.create_velero_user ? module.iam[0].s3_access_key : var.velero_aws_access_key_id
+ "fs.s3a.secret.key" = var.create_velero_user ? module.iam[0].s3_secret_key : var.velero_aws_secret_access_key
+ }
+}
\ No newline at end of file
diff --git a/terraform/aws/variables.tf b/terraform/aws/variables.tf
index 071b5096..c83e90a0 100644
--- a/terraform/aws/variables.tf
+++ b/terraform/aws/variables.tf
@@ -141,17 +141,17 @@ variable "flink_release_names" {
}
}
-variable "flink_merged_pipeline_release_names" {
+variable "flink_unified_pipeline_release_names" {
description = "Create release names"
type = map(string)
default = {
- merged-pipeline = "merged-pipeline"
+ unified-pipeline = "unified-pipeline"
master-data-processor = "master-data-processor"
}
}
-variable "merged_pipeline_enabled" {
- description = "Toggle to deploy merged pipeline"
+variable "unified_pipeline_enabled" {
+ description = "Toggle to deploy unified pipeline"
type = bool
default = true
}
@@ -170,3 +170,64 @@ variable "secor_image_tag" {
type = string
description = "secor image version"
}
+
+
+variable "hudi_namespace" {
+ type = string
+ default = "hudi"
+ description = "Apache Hudi namespace"
+}
+
+variable "hudi_prefix_path" {
+ type = string
+ description = "Hudi prefix path"
+ default = "hudi"
+}
+
+variable "enable_lakehouse" {
+ type = bool
+ description = "Toggle to install hudi components (hms, trino and flink job)"
+}
+
+variable "lakehouse_host" {
+ type = string
+ description = "Lakehouse Host"
+ default = "http://trino.hudi.svc.cluster.local"
+}
+
+variable "lakehouse_port" {
+ type = string
+ description = "Trino port"
+ default = "8080"
+}
+
+variable "lakehouse_catalog" {
+ type = string
+ description = "Lakehouse Catalog name"
+ default = "lakehouse"
+}
+
+variable "lakehouse_schema" {
+ type = string
+ description = "Lakehouse Schema name"
+ default = "hms"
+}
+
+variable "lakehouse_default_user" {
+ type = string
+ description = "Lakehouse default user"
+ default = "admin"
+}
+
+
+variable "flink_image_name" {
+ type = string
+ description = "Flink image name."
+ default = "lakehouse-connector"
+}
+
+variable "flink_lakehouse_image_tag" {
+ type = string
+ description = "Flink lakehouse image tag."
+ default = "1.0.0"
+}
diff --git a/terraform/aws/vars/cluster_overrides.tfvars b/terraform/aws/vars/cluster_overrides.tfvars
index e7ca3a91..11225b78 100644
--- a/terraform/aws/vars/cluster_overrides.tfvars
+++ b/terraform/aws/vars/cluster_overrides.tfvars
@@ -32,3 +32,6 @@ dataset_api_image_tag = "1.0.2-GA"
flink_image_tag = "1.0.1-GA"
secor_image_tag = "1.0.0-GA"
superset_image_tag = "3.0.2"
+
+enable_lakehouse = false
+flink_lakehouse_image_tag = "1.0.1"
diff --git a/terraform/gcp/main.tf b/terraform/gcp/main.tf
index 410564e4..cec40ef5 100644
--- a/terraform/gcp/main.tf
+++ b/terraform/gcp/main.tf
@@ -390,9 +390,9 @@ module "flink" {
building_block = var.building_block
flink_container_registry = var.flink_container_registry
flink_image_tag = var.flink_image_tag
- flink_merged_pipeline_release_names = var.flink_merged_pipeline_release_names
+ flink_unified_pipeline_release_names = var.flink_unified_pipeline_release_names
flink_release_names = var.flink_release_names
- merged_pipeline_enabled = var.merged_pipeline_enabled
+ unified_pipeline_enabled = var.unified_pipeline_enabled
flink_checkpoint_store_type = var.flink_checkpoint_store_type
flink_chart_depends_on = [ module.kafka, module.postgresql_migration, module.redis_dedup, module.redis_denorm ]
postgresql_obsrv_username = module.postgresql.postgresql_obsrv_username
diff --git a/terraform/gcp/variables.tf b/terraform/gcp/variables.tf
index 165d3771..eb60f2af 100644
--- a/terraform/gcp/variables.tf
+++ b/terraform/gcp/variables.tf
@@ -226,17 +226,17 @@ variable "flink_release_names" {
}
}
-variable "flink_merged_pipeline_release_names" {
+variable "flink_unified_pipeline_release_names" {
description = "Create release names"
type = map(string)
default = {
- merged-pipeline = "merged-pipeline"
+ unified-pipeline = "unified-pipeline"
master-data-processor = "master-data-processor"
}
}
-variable "merged_pipeline_enabled" {
- description = "Toggle to deploy merged pipeline"
+variable "unified_pipeline_enabled" {
+ description = "Toggle to deploy unified pipeline"
type = bool
default = true
}
diff --git a/terraform/modules/helm/command_service/command-service-helm-chart/templates/command_api_configmap.yml b/terraform/modules/helm/command_service/command-service-helm-chart/templates/command_api_configmap.yml
index a1b4e9dd..cbe672ca 100644
--- a/terraform/modules/helm/command_service/command-service-helm-chart/templates/command_api_configmap.yml
+++ b/terraform/modules/helm/command_service/command-service-helm-chart/templates/command_api_configmap.yml
@@ -9,3 +9,6 @@ metadata:
data:
service_config: |
{{ .Values.service_config | indent 4 }}
+ {{ if .Values.enable_lakehouse }}
+{{ toYaml .Values.hudi_jobs | indent 8 }}
+ {{ end }}
\ No newline at end of file
diff --git a/terraform/modules/helm/command_service/command-service-helm-chart/values.yaml b/terraform/modules/helm/command_service/command-service-helm-chart/values.yaml
index 059727b6..0aad64f5 100644
--- a/terraform/modules/helm/command_service/command-service-helm-chart/values.yaml
+++ b/terraform/modules/helm/command_service/command-service-helm-chart/values.yaml
@@ -8,21 +8,14 @@ cpu_requests: 0.1
cpu_limits: 0.1
memory_requests: 512Mi
memory_limits: 512Mi
+enable_lakehouse: false
+
+hudi_jobs:
+ - name: "LakehouseConnectorJob"
+ release_name: lakehouse-connector
+ job_manager_url: "lakehouse-connector-jobmanager.flink.svc.cluster.local:8081"
service_config: |
- flink:
- namespace: flink
- reinstall_sleep_time: 3
- jobs:
- - name: "PipelineMergedJob"
- release_name: merged-pipeline
- job_manager_url: "merged-pipeline-jobmanager.flink.svc.cluster.local:8081"
- - name: "MasterDataProcessorJob"
- release_name: master-data-processor
- job_manager_url: "master-data-processor-jobmanager.flink.svc.cluster.local:8081"
- - name: "KafkaConnectorJob"
- release_name: kafka-connector
- job_manager_url: "kafka-connector-jobmanager.flink.svc.cluster.local:8081"
commands:
RESTART_PIPELINE:
@@ -37,6 +30,21 @@ service_config: |
database: obsrv
helm_charts_base_dir: /app/helm-charts
+
+ flink:
+ namespace: flink
+ reinstall_sleep_time: 3
+ jobs:
+ - name: "UnifiedPipelineJob"
+ release_name: unified-pipeline
+ job_manager_url: "unified-pipeline-jobmanager.flink.svc.cluster.local:8081"
+ - name: "MasterDataProcessorJob"
+ release_name: master-data-processor
+ job_manager_url: "master-data-processor-jobmanager.flink.svc.cluster.local:8081"
+ - name: "KafkaConnectorJob"
+ release_name: kafka-connector
+ job_manager_url: "kafka-connector-jobmanager.flink.svc.cluster.local:8081"
+
rbac:
# kubernetes.rest-service.exposed.type: NodePort requires
diff --git a/terraform/modules/helm/command_service/command_service.yaml.tfpl b/terraform/modules/helm/command_service/command_service.yaml.tfpl
index 022d8426..fd248e07 100644
--- a/terraform/modules/helm/command_service/command_service.yaml.tfpl
+++ b/terraform/modules/helm/command_service/command_service.yaml.tfpl
@@ -5,13 +5,26 @@ image:
tag: ${command_service_image_tag}
service_config: |
+
+ commands:
+ RESTART_PIPELINE:
+ workflow:
+ - RESTART_PIPELINE_JOBS
+
+ postgres:
+ db_host: postgresql-hl.postgresql.svc.cluster.local
+ db_port: 5432
+ db_user: ${postgresql_obsrv_username}
+ db_password: ${postgresql_obsrv_user_password}
+ database: ${postgresql_obsrv_database}
+
flink:
namespace: ${flink_namespace}
reinstall_sleep_time: 3
jobs:
- - name: "PipelineMergedJob"
- release_name: merged-pipeline
- job_manager_url: "merged-pipeline-jobmanager.flink.svc.cluster.local:8081"
+ - name: "UnifiedPipelineJob"
+ release_name: unified-pipeline
+ job_manager_url: "unified-pipeline-jobmanager.flink.svc.cluster.local:8081"
- name: "MasterDataProcessor"
release_name: master-data-processor
job_manager_url: "master-data-processor-jobmanager.flink.svc.cluster.local:8081"
@@ -19,17 +32,14 @@ service_config: |
release_name: kafka-connector
job_manager_url: "kafka-connector-jobmanager.flink.svc.cluster.local:8081"
- commands:
- RESTART_PIPELINE:
- workflow:
- - RESTART_PIPELINE_JOBS
- postgres:
- db_host: postgresql-hl.postgresql.svc.cluster.local
- db_port: 5432
- db_user: ${postgresql_obsrv_username}
- db_password: ${postgresql_obsrv_user_password}
- database: ${postgresql_obsrv_database}
+ helm_charts_base_dir: /app/helm-charts
+
+
+enable_lakehouse: ${enable_lakehouse}
+hudi_jobs:
+ - name: "LakehouseConnectorJob"
+ release_name: lakehouse-connector
+ job_manager_url: "lakehouse-connector-jobmanager.flink.svc.cluster.local:8081"
- helm_charts_base_dir: /app/helm-charts
\ No newline at end of file
diff --git a/terraform/modules/helm/command_service/main.tf b/terraform/modules/helm/command_service/main.tf
index d02cfa00..ca403fb2 100644
--- a/terraform/modules/helm/command_service/main.tf
+++ b/terraform/modules/helm/command_service/main.tf
@@ -19,6 +19,8 @@ resource "helm_release" "command_service" {
postgresql_obsrv_user_password = var.postgresql_obsrv_user_password
postgresql_obsrv_database = var.postgresql_obsrv_database
flink_namespace = var.flink_namespace
+ enable_lakehouse = var.enable_lakehouse
+
})
]
}
\ No newline at end of file
diff --git a/terraform/modules/helm/command_service/variables.tf b/terraform/modules/helm/command_service/variables.tf
index bb4aac91..084169e8 100644
--- a/terraform/modules/helm/command_service/variables.tf
+++ b/terraform/modules/helm/command_service/variables.tf
@@ -85,4 +85,8 @@ variable "flink_namespace" {
default = "flink"
}
-
\ No newline at end of file
+
+variable "enable_lakehouse" {
+ type = bool
+ description = "Toggle to install hudi components (hms, trino and flink job)"
+}
\ No newline at end of file
diff --git a/terraform/modules/helm/dataset_api/dataset-api-helm-chart/templates/configmap.yaml b/terraform/modules/helm/dataset_api/dataset-api-helm-chart/templates/configmap.yaml
index b6e0ed93..550f1b0a 100644
--- a/terraform/modules/helm/dataset_api/dataset-api-helm-chart/templates/configmap.yaml
+++ b/terraform/modules/helm/dataset_api/dataset-api-helm-chart/templates/configmap.yaml
@@ -44,5 +44,11 @@ data:
grafana_token: "{{ .Values.grafana_service.GRAFANA_TOKEN }}"
{{- end }}
-
+ {{- if .Values.enable_lakehouse }}
+ lakehouse_host: "{{ .Values.lakehouse_service.LAKEHOUSE_HOST }}"
+ lakehouse_port: "{{ .Values.lakehouse_service.LAKEHOUSE_PORT }}"
+ lakehouse_catalog: "{{ .Values.lakehouse_service.LAKEHOUSE_CATALOG }}"
+ lakehouse_schema: "{{ .Values.lakehouse_service.LAKEHOUSE_SCHEMA }}"
+ lakehouse_default_user: "{{ .Values.lakehouse_service.LAKEHOUSE_DEFAULT_USER }}"
+ {{- end}}
diff --git a/terraform/modules/helm/dataset_api/dataset-api-helm-chart/values.yaml b/terraform/modules/helm/dataset_api/dataset-api-helm-chart/values.yaml
index 3bdafc3a..715de349 100644
--- a/terraform/modules/helm/dataset_api/dataset-api-helm-chart/values.yaml
+++ b/terraform/modules/helm/dataset_api/dataset-api-helm-chart/values.yaml
@@ -51,4 +51,12 @@ EXCLUDE_DATASOURCE_VALIDATION: "system-stats,failed-events-summary,masterdata-sy
service_account_annotations:
eks.amazonaws.com/role-arn: ""
service:
- type: LoadBalancer
\ No newline at end of file
+ type: LoadBalancer
+
+enable_lakehouse: false
+lakehouse_service:
+ LAKEHOUSE_HOST: http://obsrv-trino
+ LAKEHOUSE_PORT: "8080"
+ LAKEHOUSE_CATALOG: "lakehouse"
+ LAKEHOUSE_SCHEMA: "hms"
+ LAKEHOUSE_DEFAULT_USER: "admin"
\ No newline at end of file
diff --git a/terraform/modules/helm/dataset_api/dataset_api.yaml.tfpl b/terraform/modules/helm/dataset_api/dataset_api.yaml.tfpl
index 3082621f..7e9de262 100644
--- a/terraform/modules/helm/dataset_api/dataset_api.yaml.tfpl
+++ b/terraform/modules/helm/dataset_api/dataset_api.yaml.tfpl
@@ -30,4 +30,11 @@ exhaust_service:
service_account_annotations:
${dataset_api_sa_annotations}
service:
- type: ${service_type}
\ No newline at end of file
+ type: ${service_type}
+enable_lakehouse: ${enable_lakehouse}
+lakehouse_service:
+ LAKEHOUSE_HOST: ${lakehouse_host}
+ LAKEHOUSE_PORT: ${lakehouse_port}
+ LAKEHOUSE_CATALOG: ${lakehouse_catalog}
+ LAKEHOUSE_SCHEMA: ${lakehouse_schema}
+ LAKEHOUSE_DEFAULT_USER: ${lakehouse_default_user}
\ No newline at end of file
diff --git a/terraform/modules/helm/dataset_api/main.tf b/terraform/modules/helm/dataset_api/main.tf
index 82f18f8f..8d4b0450 100644
--- a/terraform/modules/helm/dataset_api/main.tf
+++ b/terraform/modules/helm/dataset_api/main.tf
@@ -27,6 +27,13 @@ resource "helm_release" "dataset_api" {
dedup_redis_release_name = var.dedup_redis_release_name
s3_bucket = var.s3_bucket
service_type = var.service_type
+ enable_lakehouse = var.enable_lakehouse
+ lakehouse_host = var.lakehouse_host
+ lakehouse_port = var.lakehouse_port
+ lakehouse_catalog = var.lakehouse_catalog
+ lakehouse_schema = var.lakehouse_schema
+ lakehouse_default_user = var.lakehouse_default_user
+
}
)
]
diff --git a/terraform/modules/helm/dataset_api/variables.tf b/terraform/modules/helm/dataset_api/variables.tf
index 8996d798..a2207650 100644
--- a/terraform/modules/helm/dataset_api/variables.tf
+++ b/terraform/modules/helm/dataset_api/variables.tf
@@ -125,3 +125,38 @@ variable "s3_bucket" {
description = "S3 bucket name for dataset api exhaust."
default = ""
}
+
+variable "enable_lakehouse" {
+ type = bool
+ description = "Toggle to install hudi components (hms, trino and flink job)"
+}
+
+variable "lakehouse_host" {
+ type = string
+ description = "Lakehouse Host"
+ default = "http://trino.hudi.svc.cluster.local"
+}
+
+variable "lakehouse_port" {
+ type = string
+ description = "Trino port"
+ default = "8080"
+}
+
+variable "lakehouse_catalog" {
+ type = string
+ description = "Lakehouse Catalog name"
+ default = "lakehouse"
+}
+
+variable "lakehouse_schema" {
+ type = string
+ description = "Lakehouse Schema name"
+ default = "hms"
+}
+
+variable "lakehouse_default_user" {
+ type = string
+ description = "Lakehouse default user"
+ default = "admin"
+}
\ No newline at end of file
diff --git a/terraform/modules/helm/flink/flink-helm-chart/values.yaml b/terraform/modules/helm/flink/flink-helm-chart/values.yaml
index 131e8e96..eb066f5f 100644
--- a/terraform/modules/helm/flink/flink-helm-chart/values.yaml
+++ b/terraform/modules/helm/flink/flink-helm-chart/values.yaml
@@ -2,7 +2,7 @@ namespace: "flink"
imagepullsecrets: ""
image:
registry: sanketikahub
- repository: merged-pipeline
+ repository: unified-pipeline
tag: 1.0.0-GA
serviceMonitor:
enabled: false
@@ -161,8 +161,8 @@ base_config: |
port = "9042"
}
-merged-pipeline:
- merged-pipeline: |+
+unified-pipeline:
+ unified-pipeline: |+
include file("/data/flink/conf/baseconfig.conf")
kafka {
input.topic = ${job.env}".ingest"
@@ -176,6 +176,7 @@ merged-pipeline:
output.denorm.topic = ${job.env}".denorm"
output.denorm.failed.topic = ${job.env}".failed"
output.transform.topic = ${job.env}".transform"
+ output.transform.failed.topic = ${job.env}".failed"
stats.topic = ${job.env}".stats"
groupId = ${job.env}"-single-pipeline-group"
producer {
@@ -211,7 +212,7 @@ merged-pipeline:
taskmanager.memory.process.size: 1700m
jobmanager.memory.process.size: 1600m
state.savepoints.dir: file:///tmp
- job_classname: org.sunbird.obsrv.pipeline.task.MergedPipelineStreamTask
+ job_classname: org.sunbird.obsrv.pipeline.task.UnifiedPipelineStreamTask
extractor:
extractor: |+
@@ -332,6 +333,7 @@ transformer:
kafka {
input.topic = ${job.env}".denorm"
output.transform.topic = ${job.env}".transform"
+ output.transform.failed.topic = ${job.env}".failed"
groupId = ${job.env}"-transformer-group"
producer {
max-request-size = 10000024
@@ -436,6 +438,7 @@ master-data-processor:
output.duplicate.topic = ${job.env}".masterdata.failed"
output.denorm.topic = ${job.env}".masterdata.denorm"
output.transform.topic = ${job.env}".masterdata.transform"
+ output.transform.failed.topic = ${job.env}".masterdata.failed"
stats.topic = ${job.env}".masterdata.stats"
groupId = ${job.env}"-masterdata-pipeline-group"
diff --git a/terraform/modules/helm/flink/main.tf b/terraform/modules/helm/flink/main.tf
index 6030928d..7ebcddd3 100644
--- a/terraform/modules/helm/flink/main.tf
+++ b/terraform/modules/helm/flink/main.tf
@@ -20,7 +20,7 @@ resource "helm_release" "flink_sa" {
}
resource "helm_release" "flink" {
- for_each = contains([var.merged_pipeline_enabled], true ) ? var.flink_merged_pipeline_release_names : var.flink_release_names
+ for_each = contains([var.unified_pipeline_enabled], true ) ? var.flink_unified_pipeline_release_names : var.flink_release_names
name = each.key
chart = "${path.module}/${var.flink_chart_path}"
namespace = var.flink_namespace
diff --git a/terraform/modules/helm/flink/variables.tf b/terraform/modules/helm/flink/variables.tf
index c0491ef6..b3e93125 100644
--- a/terraform/modules/helm/flink/variables.tf
+++ b/terraform/modules/helm/flink/variables.tf
@@ -35,7 +35,7 @@ variable "flink_chart_path" {
# variable "flink_release_name" {
# type = string
# description = "Flink helm release name."
-# default = "merged-pipeline"
+# default = "unified-pipeline"
# }
# *** changed this to release map.
@@ -182,12 +182,12 @@ variable "flink_release_names" {
type = map(string)
}
-variable "flink_merged_pipeline_release_names" {
+variable "flink_unified_pipeline_release_names" {
description = "Create release names"
type = map(string)
}
-variable "merged_pipeline_enabled" {
- description = "Toggle to enable merged pipeline"
+variable "unified_pipeline_enabled" {
+ description = "Toggle to enable unified pipeline"
type = bool
}
diff --git a/terraform/modules/helm/hive_meta_store/hms-helm-chart/.helmignore b/terraform/modules/helm/hive_meta_store/hms-helm-chart/.helmignore
new file mode 100644
index 00000000..0e8a0eb3
--- /dev/null
+++ b/terraform/modules/helm/hive_meta_store/hms-helm-chart/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/terraform/modules/helm/hive_meta_store/hms-helm-chart/Chart.yaml b/terraform/modules/helm/hive_meta_store/hms-helm-chart/Chart.yaml
new file mode 100644
index 00000000..856304b6
--- /dev/null
+++ b/terraform/modules/helm/hive_meta_store/hms-helm-chart/Chart.yaml
@@ -0,0 +1,24 @@
+apiVersion: v2
+name: metastore-app
+description: A Helm chart for Kubernetes
+
+# A chart can be either an 'application' or a 'library' chart.
+#
+# Application charts are a collection of templates that can be packaged into versioned archives
+# to be deployed.
+#
+# Library charts provide useful utilities or functions for the chart developer. They're included as
+# a dependency of application charts to inject those utilities and functions into the rendering
+# pipeline. Library charts do not define any templates and therefore cannot be deployed.
+type: application
+
+# This is the chart version. This version number should be incremented each time you make changes
+# to the chart and its templates, including the app version.
+# Versions are expected to follow Semantic Versioning (https://semver.org/)
+version: 0.1.0
+
+# This is the version number of the application being deployed. This version number should be
+# incremented each time you make changes to the application. Versions are not expected to
+# follow Semantic Versioning. They should reflect the version the application is using.
+# It is recommended to use it with quotes.
+appVersion: "1.16.0"
diff --git a/terraform/modules/helm/hive_meta_store/hms-helm-chart/templates/NOTES.txt b/terraform/modules/helm/hive_meta_store/hms-helm-chart/templates/NOTES.txt
new file mode 100644
index 00000000..fe60e1e6
--- /dev/null
+++ b/terraform/modules/helm/hive_meta_store/hms-helm-chart/templates/NOTES.txt
@@ -0,0 +1,22 @@
+1. Get the application URL by running these commands:
+{{- if .Values.ingress.enabled }}
+{{- range $host := .Values.ingress.hosts }}
+ {{- range .paths }}
+ http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
+ {{- end }}
+{{- end }}
+{{- else if contains "NodePort" .Values.service.type }}
+ export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "metastore-app.fullname" . }})
+ export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
+ echo http://$NODE_IP:$NODE_PORT
+{{- else if contains "LoadBalancer" .Values.service.type }}
+ NOTE: It may take a few minutes for the LoadBalancer IP to be available.
+ You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "metastore-app.fullname" . }}'
+ export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "metastore-app.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
+ echo http://$SERVICE_IP:{{ .Values.service.port }}
+{{- else if contains "ClusterIP" .Values.service.type }}
+ export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "metastore-app.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
+ export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
+ echo "Visit http://127.0.0.1:8080 to use your application"
+ kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
+{{- end }}
diff --git a/terraform/modules/helm/hive_meta_store/hms-helm-chart/templates/_helpers.tpl b/terraform/modules/helm/hive_meta_store/hms-helm-chart/templates/_helpers.tpl
new file mode 100644
index 00000000..5b147765
--- /dev/null
+++ b/terraform/modules/helm/hive_meta_store/hms-helm-chart/templates/_helpers.tpl
@@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "metastore-app.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "metastore-app.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "metastore-app.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "metastore-app.labels" -}}
+helm.sh/chart: {{ include "metastore-app.chart" . }}
+{{ include "metastore-app.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "metastore-app.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "metastore-app.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "metastore-app.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "metastore-app.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
diff --git a/terraform/modules/helm/hive_meta_store/hms-helm-chart/templates/deployment.yaml b/terraform/modules/helm/hive_meta_store/hms-helm-chart/templates/deployment.yaml
new file mode 100644
index 00000000..7dedd878
--- /dev/null
+++ b/terraform/modules/helm/hive_meta_store/hms-helm-chart/templates/deployment.yaml
@@ -0,0 +1,73 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: {{ include "metastore-app.fullname" . }}
+ labels:
+ {{- include "metastore-app.labels" . | nindent 4 }}
+spec:
+ {{- if not .Values.autoscaling.enabled }}
+ replicas: {{ .Values.replicaCount }}
+ {{- end }}
+ selector:
+ matchLabels:
+ {{- include "metastore-app.selectorLabels" . | nindent 6 }}
+ template:
+ metadata:
+ {{- with .Values.podAnnotations }}
+ annotations:
+ {{- toYaml . | nindent 8 }}
+ {{- end }}
+ labels:
+ {{- include "metastore-app.labels" . | nindent 8 }}
+ {{- with .Values.podLabels }}
+ {{- toYaml . | nindent 8 }}
+ {{- end }}
+ spec:
+ {{- with .Values.imagePullSecrets }}
+ imagePullSecrets:
+ {{- toYaml . | nindent 8 }}
+ {{- end }}
+ serviceAccountName: {{ include "metastore-app.serviceAccountName" . }}
+ securityContext:
+ {{- toYaml .Values.podSecurityContext | nindent 8 }}
+ containers:
+ - name: {{ .Chart.Name }}
+ securityContext:
+ {{- toYaml .Values.securityContext | nindent 12 }}
+ image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+ imagePullPolicy: {{ .Values.image.pullPolicy }}
+ env:
+ {{- range $key, $value := .Values.envVars }}
+ - name: {{ $key | quote }}
+ value: {{ $value | quote }}
+ {{- end }}
+ ports:
+ - name: http
+ containerPort: {{ .Values.service.port }}
+ protocol: TCP
+ livenessProbe:
+ {{- toYaml .Values.livenessProbe | nindent 12 }}
+ readinessProbe:
+ {{- toYaml .Values.readinessProbe | nindent 12 }}
+ resources:
+ {{- toYaml .Values.resources | nindent 12 }}
+ {{- with .Values.volumeMounts }}
+ volumeMounts:
+ {{- toYaml . | nindent 12 }}
+ {{- end }}
+ {{- with .Values.volumes }}
+ volumes:
+ {{- toYaml . | nindent 8 }}
+ {{- end }}
+ {{- with .Values.nodeSelector }}
+ nodeSelector:
+ {{- toYaml . | nindent 8 }}
+ {{- end }}
+ {{- with .Values.affinity }}
+ affinity:
+ {{- toYaml . | nindent 8 }}
+ {{- end }}
+ {{- with .Values.tolerations }}
+ tolerations:
+ {{- toYaml . | nindent 8 }}
+ {{- end }}
diff --git a/terraform/modules/helm/hive_meta_store/hms-helm-chart/templates/hadoop-config-map.yaml b/terraform/modules/helm/hive_meta_store/hms-helm-chart/templates/hadoop-config-map.yaml
new file mode 100644
index 00000000..f3da004f
--- /dev/null
+++ b/terraform/modules/helm/hive_meta_store/hms-helm-chart/templates/hadoop-config-map.yaml
@@ -0,0 +1,14 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: hadoop-config
+data:
+ core-site.xml: |
+
+ {{- range $key, $value := .Values.hadoop_core_site }}
+
+ {{ $key }}
+ {{ $value }}
+
+ {{- end }}
+
\ No newline at end of file
diff --git a/terraform/modules/helm/hive_meta_store/hms-helm-chart/templates/service.yaml b/terraform/modules/helm/hive_meta_store/hms-helm-chart/templates/service.yaml
new file mode 100644
index 00000000..a09130c1
--- /dev/null
+++ b/terraform/modules/helm/hive_meta_store/hms-helm-chart/templates/service.yaml
@@ -0,0 +1,15 @@
+apiVersion: v1
+kind: Service
+metadata:
+ name: {{ include "metastore-app.fullname" . }}
+ labels:
+ {{- include "metastore-app.labels" . | nindent 4 }}
+spec:
+ type: {{ .Values.service.type }}
+ ports:
+ - port: {{ .Values.service.port }}
+ targetPort: http
+ protocol: TCP
+ name: http
+ selector:
+ {{- include "metastore-app.selectorLabels" . | nindent 4 }}
diff --git a/terraform/modules/helm/hive_meta_store/hms-helm-chart/templates/tests/test-connection.yaml b/terraform/modules/helm/hive_meta_store/hms-helm-chart/templates/tests/test-connection.yaml
new file mode 100644
index 00000000..319ddc75
--- /dev/null
+++ b/terraform/modules/helm/hive_meta_store/hms-helm-chart/templates/tests/test-connection.yaml
@@ -0,0 +1,15 @@
+apiVersion: v1
+kind: Pod
+metadata:
+ name: "{{ include "metastore-app.fullname" . }}-test-connection"
+ labels:
+ {{- include "metastore-app.labels" . | nindent 4 }}
+ annotations:
+ "helm.sh/hook": test
+spec:
+ containers:
+ - name: wget
+ image: busybox
+ command: ['wget']
+ args: ['{{ include "metastore-app.fullname" . }}:{{ .Values.service.port }}']
+ restartPolicy: Never
diff --git a/terraform/modules/helm/hive_meta_store/hms-helm-chart/values.yaml b/terraform/modules/helm/hive_meta_store/hms-helm-chart/values.yaml
new file mode 100644
index 00000000..f604aaf5
--- /dev/null
+++ b/terraform/modules/helm/hive_meta_store/hms-helm-chart/values.yaml
@@ -0,0 +1,117 @@
+# Default values for metastore-app.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+replicaCount: 1
+
+image:
+ repository: sanketikahub/hms
+ pullPolicy: Always
+ # Overrides the image tag whose default is the chart appVersion.
+ tag: "1.0.0"
+
+imagePullSecrets: []
+nameOverride: ""
+fullnameOverride: ""
+
+serviceAccount:
+ # Specifies whether a service account should be created
+ create: false
+ # Automatically mount a ServiceAccount's API credentials?
+ automount: false
+ # Annotations to add to the service account
+ annotations: {}
+ # The name of the service account to use.
+ # If not set and create is true, a name is generated using the fullname template
+ name: ""
+
+podAnnotations: {}
+podLabels: {}
+
+podSecurityContext:
+ {}
+ # fsGroup: 2000
+
+securityContext:
+ {}
+ # capabilities:
+ # drop:
+ # - ALL
+ # readOnlyRootFilesystem: true
+ # runAsNonRoot: true
+ # runAsUser: 1000
+
+service:
+ type: ClusterIP
+ port: 9083
+
+ingress:
+ enabled: false
+ className: ""
+ annotations:
+ {}
+ # kubernetes.io/ingress.class: nginx
+ # kubernetes.io/tls-acme: "true"
+ hosts:
+ - host: chart-example.local
+ paths:
+ - path: /
+ pathType: ImplementationSpecific
+ tls: []
+ # - secretName: chart-example-tls
+ # hosts:
+ # - chart-example.local
+
+resources:
+ {}
+ # We usually recommend not to specify default resources and to leave this as a conscious
+ # choice for the user. This also increases chances charts run on environments with little
+ # resources, such as Minikube. If you do want to specify resources, uncomment the following
+ # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+ # limits:
+ # cpu: 100m
+ # memory: 128Mi
+ # requests:
+ # cpu: 100m
+ # memory: 128Mi
+
+# livenessProbe:
+# httpGet:
+# path: /
+# port: http
+# readinessProbe:
+# httpGet:
+# path: /
+# port: http
+
+autoscaling:
+ enabled: false
+ minReplicas: 1
+ maxReplicas: 2
+ targetCPUUtilizationPercentage: 80
+ # targetMemoryUtilizationPercentage: 80
+
+# Additional volumes on the output Deployment definition.
+volumes:
+- name: hadoop-config-volume
+ configMap:
+ name: hadoop-config
+ items:
+ - key: core-site.xml
+ path: core-site.xml
+
+# Additional volumeMounts on the output Deployment definition.
+volumeMounts:
+- name: hadoop-config-volume
+ mountPath: /opt/hadoop/etc/hadoop/core-site.xml
+ subPath: core-site.xml
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
+
+envVars: {}
+
+hadoop_core_site: {}
diff --git a/terraform/modules/helm/hive_meta_store/hms.yaml.tfpl b/terraform/modules/helm/hive_meta_store/hms.yaml.tfpl
new file mode 100644
index 00000000..d95867a3
--- /dev/null
+++ b/terraform/modules/helm/hive_meta_store/hms.yaml.tfpl
@@ -0,0 +1,115 @@
+# Default values for metastore-app.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+replicaCount: ${hms_replica_count}
+
+image:
+ repository: ${hms_image.registry}/${hms_image.name}
+ pullPolicy: ${hms_image.pullPolicy}
+ tag: ${hms_image.tag}
+
+imagePullSecrets: []
+nameOverride: ""
+fullnameOverride: ""
+
+serviceAccount:
+ # Specifies whether a service account should be created
+ create: false
+ # Automatically mount a ServiceAccount's API credentials?
+ automount: false
+ # Annotations to add to the service account
+ annotations: {}
+ # The name of the service account to use.
+ # If not set and create is true, a name is generated using the fullname template
+ name: ""
+
+podAnnotations: {}
+podLabels: {}
+
+podSecurityContext:
+ {}
+ # fsGroup: 2000
+
+securityContext:
+ {}
+ # capabilities:
+ # drop:
+ # - ALL
+ # readOnlyRootFilesystem: true
+ # runAsNonRoot: true
+ # runAsUser: 1000
+
+service:
+ type: ${hms_service.type}
+ port: ${hms_service.port}
+
+ingress:
+ enabled: false
+ className: ""
+ annotations:
+ {}
+ # kubernetes.io/ingress.class: nginx
+ # kubernetes.io/tls-acme: "true"
+ hosts:
+ - host: chart-example.local
+ paths:
+ - path: /
+ pathType: ImplementationSpecific
+ tls: []
+ # - secretName: chart-example-tls
+ # hosts:
+ # - chart-example.local
+
+resources:
+ {}
+ # We usually recommend not to specify default resources and to leave this as a conscious
+ # choice for the user. This also increases chances charts run on environments with little
+ # resources, such as Minikube. If you do want to specify resources, uncomment the following
+ # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+ # limits:
+ # cpu: 100m
+ # memory: 128Mi
+ # requests:
+ # cpu: 100m
+ # memory: 128Mi
+
+# livenessProbe:
+# httpGet:
+# path: /
+# port: http
+# readinessProbe:
+# httpGet:
+# path: /
+# port: http
+
+autoscaling:
+ enabled: false
+ minReplicas: 1
+ maxReplicas: 2
+ targetCPUUtilizationPercentage: 80
+ # targetMemoryUtilizationPercentage: 80
+
+# Additional volumes on the output Deployment definition.
+volumes:
+- name: hadoop-config-volume
+ configMap:
+ name: hadoop-config
+ items:
+ - key: core-site.xml
+ path: core-site.xml
+
+# Additional volumeMounts on the output Deployment definition.
+volumeMounts:
+- name: hadoop-config-volume
+ mountPath: /opt/hadoop/etc/hadoop/core-site.xml
+ subPath: core-site.xml
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
+
+envVars: ${hms_env_vars}
+hadoop_core_site: ${hadoop_conf}
\ No newline at end of file
diff --git a/terraform/modules/helm/hive_meta_store/main.tf b/terraform/modules/helm/hive_meta_store/main.tf
new file mode 100644
index 00000000..1480639c
--- /dev/null
+++ b/terraform/modules/helm/hive_meta_store/main.tf
@@ -0,0 +1,21 @@
+resource "helm_release" "hms" {
+ name = var.hms_release_name
+ chart = "${path.module}/${var.hms_chart_path}"
+ namespace = var.hms_namespace
+ create_namespace = var.hms_create_namespace
+ depends_on = [var.hms_chart_depends_on]
+ force_update = true
+ cleanup_on_fail = true
+ atomic = true
+ values = [
+ templatefile("${path.module}/${var.hms_custom_values_yaml}",
+ {
+ hms_image = var.hms_image
+ hms_replica_count = var.hms_replica_count
+ hms_service = var.hms_service
+ hadoop_conf = jsonencode(local.hadoop_configuration)
+ hms_env_vars = jsonencode(local.env_vars)
+ }
+ )
+ ]
+}
diff --git a/terraform/modules/helm/hive_meta_store/variables.tf b/terraform/modules/helm/hive_meta_store/variables.tf
new file mode 100644
index 00000000..09e09854
--- /dev/null
+++ b/terraform/modules/helm/hive_meta_store/variables.tf
@@ -0,0 +1,93 @@
+variable "hms_image" {
+ type = object({ name = string, tag = string, registry = string, pullPolicy = string })
+ description = "Trino image metadata"
+ default = {
+ name = "hms"
+ tag = "1.0.3"
+ pullPolicy = "IfNotPresent"
+ registry = "sanketikahub"
+ }
+}
+
+variable "hms_namespace" {
+ type = string
+ description = "HMS namespace"
+ default = "hudi"
+}
+
+variable "hms_create_namespace" {
+ type = bool
+ description = "Create HMS namespace."
+ default = true
+}
+
+variable "hms_wait_for_jobs" {
+ type = bool
+ description = "HMS wait for jobs paramater."
+ default = false
+}
+
+variable "hms_chart_install_timeout" {
+ type = number
+ description = "HMS chart install timeout."
+ default = 900
+}
+
+variable "hms_custom_values_yaml" {
+ type = string
+ description = "HMS chart values.yaml path."
+ default = "hms.yaml.tfpl"
+}
+
+variable "hms_release_name" {
+ type = string
+ description = "HMS release name"
+ default = "hms"
+}
+
+variable "hms_chart_path" {
+ type = string
+ description = "HMS helm chart path."
+ default = "hms-helm-chart"
+}
+
+variable "hms_chart_depends_on" {
+ type = any
+ description = "List of helm release names that this chart depends on."
+ default = ""
+}
+
+variable "hms_replica_count" {
+ type = number
+ description = "HMS replica count"
+ default = 1
+}
+
+variable "hms_service" {
+ type = object({ type = string, port = number })
+ description = "HMS service metadata"
+ default = { type = "ClusterIP", port = 9083 }
+}
+
+locals {
+ default_hms_db_metadata = {}
+ default_hadoop_metadata = {
+ "fs.s3a.impl" = "org.apache.hadoop.fs.s3a.S3AFileSystem"
+ "fs.s3a.connection.ssl.enabled" = "false"
+ }
+}
+
+variable "hms_db_metadata" {
+ type = map(string)
+ description = "HMS database connection details"
+}
+
+variable "hadoop_metadata" {
+ type = map(string)
+ description = "Hadoop core site configuration"
+}
+
+locals {
+ env_vars = merge(local.default_hms_db_metadata, var.hms_db_metadata)
+ hadoop_configuration = merge(local.default_hadoop_metadata, var.hadoop_metadata)
+}
\ No newline at end of file
diff --git a/terraform/modules/helm/kafka/kafka.yaml.tfpl b/terraform/modules/helm/kafka/kafka.yaml.tfpl
index 68263728..9eb851f4 100644
--- a/terraform/modules/helm/kafka/kafka.yaml.tfpl
+++ b/terraform/modules/helm/kafka/kafka.yaml.tfpl
@@ -8,7 +8,9 @@ provisioning:
partitions: 1
replicationFactor: 1
# https://kafka.apache.org/documentation/#topicconfigs
-
+ - name: "${input_hudi_topic}"
+ partitions: 1
+ replicationFactor: 1
persistence:
size: 50Gi
diff --git a/terraform/modules/helm/kafka/main.tf b/terraform/modules/helm/kafka/main.tf
index 51e3b087..c84aee90 100644
--- a/terraform/modules/helm/kafka/main.tf
+++ b/terraform/modules/helm/kafka/main.tf
@@ -14,6 +14,8 @@ resource "helm_release" "kafka" {
{
input_topic = "${var.env}.${var.kafka_input_topic}"
input_masterdata_topic = "${var.env}.${var.kafka_input_masterdata_topic}"
+ input_hudi_topic = "${var.env}.${var.kafka_input_hudi_topic}"
+
}
)
]
diff --git a/terraform/modules/helm/kafka/variables.tf b/terraform/modules/helm/kafka/variables.tf
index 1afed714..67219565 100644
--- a/terraform/modules/helm/kafka/variables.tf
+++ b/terraform/modules/helm/kafka/variables.tf
@@ -73,4 +73,9 @@ variable "kafka_install_timeout" {
type = number
description = "Kafka chart install timeout."
default = 1200
+}
+variable "kafka_input_hudi_topic" {
+ type = string
+ description = "Kafka hudi output topic"
+ default = "hudi.connector.in"
}
\ No newline at end of file
diff --git a/terraform/modules/helm/lakehouse-connector/.helmignore b/terraform/modules/helm/lakehouse-connector/.helmignore
new file mode 100644
index 00000000..50af0317
--- /dev/null
+++ b/terraform/modules/helm/lakehouse-connector/.helmignore
@@ -0,0 +1,22 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/terraform/modules/helm/lakehouse-connector/flink.yaml.tfpl b/terraform/modules/helm/lakehouse-connector/flink.yaml.tfpl
new file mode 100644
index 00000000..f2c50a0f
--- /dev/null
+++ b/terraform/modules/helm/lakehouse-connector/flink.yaml.tfpl
@@ -0,0 +1,129 @@
+namespace: ${flink_namespace}
+checkpoint_store_type: ${checkpoint_store_type}
+s3_access_key: ${s3_access_key}
+s3_secret_key: ${s3_secret_key}
+azure_account: ${azure_account}
+azure_secret: ${azure_secret}
+image:
+ registry: ${flink_container_registry}
+ repository: ${flink_image_name}
+ tag: ${flink_lakehouse_image_tag}
+ imagePullSecrets: ""
+base_config: |
+ job {
+ env = "${env}"
+ enable.distributed.checkpointing = true
+ statebackend {
+ base.url = "${checkpoint_base_url}"
+ }
+ }
+ kafka {
+ broker-servers = "kafka-headless.kafka.svc.cluster.local:9092"
+ producer.broker-servers = "kafka-headless.kafka.svc.cluster.local:9092"
+ consumer.broker-servers = "kafka-headless.kafka.svc.cluster.local:9092"
+ zookeeper = "kafka-zookeeper-headless.svc.cluster.local:2181"
+ producer {
+ max-request-size = 1572864
+ batch.size = 98304
+ linger.ms = 10
+ compression = "snappy"
+ }
+ output.system.event.topic = $${job.env}".system.events"
+ output.failed.topic = $${job.env}".failed"
+ }
+ task {
+ parallelism = 1
+ consumer.parallelism = 1
+ checkpointing.interval = 10000
+ checkpointing.pause.between.seconds = 10000
+ restart-strategy.attempts = 3
+ restart-strategy.delay = 30000 # in milli-seconds
+ }
+
+ redis.connection.timeout = 100
+ redis {
+ host = ${dedup_redis_release_name}-master.${dedup_redis_namespace}.svc.cluster.local
+ port = 6379
+ }
+
+ redis-meta {
+ host = ${denorm_redis_release_name}-master.${denorm_redis_namespace}.svc.cluster.local
+ port = 6379
+ }
+
+ postgres {
+ host = ${postgresql_service_name}.svc.cluster.local
+ port = 5432
+ maxConnections = 2
+ user = ${postgresql_obsrv_username}
+ password = ${postgresql_obsrv_user_password}
+ database = ${postgresql_obsrv_database}
+ }
+
+ lms-cassandra {
+ host = "localhost"
+ port = "9042"
+ }
+
+enable_lakehouse: ${enable_lakehouse}
+
+lakehouse-connector:
+ lakehouse-connector: |+
+ include file("/data/flink/conf/baseconfig.conf")
+ kafka {
+ input.topic = $${job.env}".hudi.connector.in"
+ output.topic = $${job.env}".hudi.connector.out"
+ output.invalid.topic = $${job.env}".failed"
+ event.max.size = "1048576" # Max is only 1MB
+ groupId = $${job.env}"-hudi-writer-group"
+ producer {
+ max-request-size = 5242880
+ }
+ }
+
+ task {
+ checkpointing.compressed = true
+ checkpointing.interval = 30000
+ checkpointing.pause.between.seconds = 30000
+ restart-strategy.attempts = 3
+ restart-strategy.delay = 30000 # in milli-seconds
+ parallelism = 1
+ consumer.parallelism = 1
+ downstream.operators.parallelism = 1
+ }
+
+ hudi {
+ hms {
+ enabled = true
+ uri = "thrift://hms-metastore-app.hudi.svc:9083"
+ database {
+ name = "hms"
+ username = "${postgresql_hms_username}"
+ password = "${postgresql_hms_user_password}"
+ }
+ }
+ table {
+ type = "MERGE_ON_READ"
+ base.path = "s3a://${hudi_bucket}/${hudi_prefix_path}"
+ }
+ compaction.enabled = true
+ write.tasks = 1
+ }
+ flink-conf: |+
+ jobmanager.memory.flink.size: 1024m
+ taskmanager.memory.flink.size: 1024m
+ taskmanager.numberOfTaskSlots: 1
+ jobManager.numberOfTaskSlots: 1
+ parallelism.default: 1
+ jobmanager.execution.failover-strategy: region
+ taskmanager.memory.network.fraction: 0.1
+ heartbeat.timeout: 8000
+ heartbeat.interval: 5000
+ state.savepoints.dir: file:///tmp
+ jobmanager.rpc.address: lakehouse-connector-jobmanager
+ blob.server.port: 6124
+ jobmanager.rpc.port: 6123
+ taskmanager.rpc.port: 6122
+ job_classname: org.sunbird.obsrv.streaming.HudiConnectorStreamTask
+
+hadoop_core_site: ${hadoop_configuration}
\ No newline at end of file
diff --git a/terraform/modules/helm/lakehouse-connector/lakehouse-flink-helm-chart/Chart.yaml b/terraform/modules/helm/lakehouse-connector/lakehouse-flink-helm-chart/Chart.yaml
new file mode 100644
index 00000000..1a27efca
--- /dev/null
+++ b/terraform/modules/helm/lakehouse-connector/lakehouse-flink-helm-chart/Chart.yaml
@@ -0,0 +1,21 @@
+apiVersion: v2
+name: lakehouse-connector
+description: A Helm chart for Kubernetes
+
+# A chart can be either an 'application' or a 'library' chart.
+#
+# Application charts are a collection of templates that can be packaged into versioned archives
+# to be deployed.
+#
+# Library charts provide useful utilities or functions for the chart developer. They're included as
+# a dependency of application charts to inject those utilities and functions into the rendering
+# pipeline. Library charts do not define any templates and therefore cannot be deployed.
+type: application
+
+# This is the chart version. This version number should be incremented each time you make changes
+# to the chart and its templates, including the app version.
+version: 0.1.2
+
+# This is the version number of the application being deployed. This version number should be
+# incremented each time you make changes to the application.
+appVersion: 1.16.0
diff --git a/terraform/modules/helm/lakehouse-connector/lakehouse-flink-helm-chart/templates/_helpers.tpl b/terraform/modules/helm/lakehouse-connector/lakehouse-flink-helm-chart/templates/_helpers.tpl
new file mode 100644
index 00000000..8e606f3d
--- /dev/null
+++ b/terraform/modules/helm/lakehouse-connector/lakehouse-flink-helm-chart/templates/_helpers.tpl
@@ -0,0 +1,67 @@
+{{/* vim: set filetype=mustache: */}}
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "flink.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+
+{{- define "name" -}}
+{{- default .Release.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "flink.fullname" -}}
+{{- if .Values.fullnameOverride -}}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- $name := default .Chart.Name .Values.nameOverride -}}
+{{- if contains $name .Release.Name -}}
+{{- .Release.Name | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+{{- end -}}
+{{- end -}}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "flink.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+
+{{/*
+Common labels
+*/}}
+{{- define "flink.labels" -}}
+helm.sh/chart: {{ include "flink.chart" . }}
+{{ include "flink.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end -}}
+
+{{/*
+Selector labels
+*/}}
+{{- define "flink.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "flink.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end -}}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "flink.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create -}}
+ {{ default (include "flink.fullname" .) .Values.serviceAccount.name }}
+{{- else -}}
+ {{ default "default" .Values.serviceAccount.name }}
+{{- end -}}
+{{- end -}}
diff --git a/terraform/modules/helm/lakehouse-connector/lakehouse-flink-helm-chart/templates/configmap.yaml b/terraform/modules/helm/lakehouse-connector/lakehouse-flink-helm-chart/templates/configmap.yaml
new file mode 100644
index 00000000..6a419c6a
--- /dev/null
+++ b/terraform/modules/helm/lakehouse-connector/lakehouse-flink-helm-chart/templates/configmap.yaml
@@ -0,0 +1,24 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: {{ .Release.Name }}-config
+ namespace: {{ .Release.namespace }}
+ labels:
+ app: flink
+ system.processing: "true"
+data:
+ base-config: |+
+{{ .Values.base_config | indent 4 }}
+{{- $name := .Release.Name }}
+{{ index .Values $name | toYaml | indent 2 }}
+ log4j_console_properties: |+
+{{ .Values.log4j_console_properties | indent 4 }}
+ core-site.xml: |+
+
+ {{- range $key, $value := .Values.hadoop_core_site }}
+
+ {{ $key }}
+ {{ $value }}
+
+ {{- end }}
+
\ No newline at end of file
diff --git a/terraform/modules/helm/lakehouse-connector/lakehouse-flink-helm-chart/templates/deployment.yaml b/terraform/modules/helm/lakehouse-connector/lakehouse-flink-helm-chart/templates/deployment.yaml
new file mode 100644
index 00000000..885970c1
--- /dev/null
+++ b/terraform/modules/helm/lakehouse-connector/lakehouse-flink-helm-chart/templates/deployment.yaml
@@ -0,0 +1,277 @@
+---
+apiVersion: v1
+kind: Service
+metadata:
+ name: {{ .Release.Name }}-jobmanager
+ namespace: {{ .Release.Namespace }}
+ labels:
+ app: flink
+ component: {{ .Release.Name }}-jobmanager
+ system.processing: "true"
+spec:
+ type: ClusterIP
+ ports:
+ - name: rpc
+ port: {{ .Values.jobmanager.rpc_port }}
+ - name: blob
+ port: {{ .Values.jobmanager.blob_port }}
+ - name: query
+ port: {{ .Values.jobmanager.query_port }}
+ - name: ui
+ port: {{ .Values.jobmanager.ui_port }}
+ - name: prom
+ port: {{ .Values.jobmanager.prom_port }}
+ selector:
+ app: flink
+ component: {{ .Release.Name }}-jobmanager
+ system.processing: "true"
+
+---
+apiVersion: v1
+kind: Service
+metadata:
+ name: {{ .Release.Name }}-jobmanager-webui
+ namespace: {{ .Release.Namespace }}
+spec:
+ {{- if eq .Values.service.type "ClusterIP" }}
+ type: ClusterIP
+ {{- end }}
+ {{- if eq .Values.service.type "LoadBalancer" }}
+ type: LoadBalancer
+ {{- end }}
+ ports:
+ - name: rest
+ port: {{ .Values.rest_port }}
+ protocol: TCP
+ targetPort: {{ .Values.resttcp_port }}
+ selector:
+ app: flink
+ component: {{ .Release.Name }}-jobmanager
+
+---
+apiVersion: v1
+kind: Service
+metadata:
+ name: {{ .Release.Name }}-taskmanager
+ namespace: {{ .Release.Namespace }}
+ labels:
+ app: flink
+ component: {{ .Release.Name }}-taskmanager
+ system.processing: "true"
+spec:
+ type: ClusterIP
+ ports:
+ - name: prom
+ port: {{ .Values.taskmanager.prom_port }}
+ selector:
+ app: flink
+ component: {{ .Release.Name }}-taskmanager
+
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: {{ .Release.Name }}-jobmanager
+ namespace: {{ .Release.Namespace }}
+spec:
+ selector:
+ matchLabels:
+ app: flink
+ component: {{ .Release.Name }}-jobmanager
+ template:
+ metadata:
+ labels:
+ app: flink
+ component: {{ .Release.Name }}-jobmanager
+ system.processing: "true"
+ annotations:
+ prometheus.io/scrape: 'true'
+ prometheus.io/port: "{{ .Values.jobmanager.prom_port }}"
+ spec:
+ # imagePullSecrets:
+ # - name: {{ .Values.image.imagePullSecrets }}
+ serviceAccount: {{ .Release.Namespace }}-sa
+ volumes:
+ - configMap:
+ items:
+ - key: flink-conf
+ path: flink-conf.yaml
+ - key: base-config
+ path: base-config.conf
+ - key: {{ .Release.Name }}
+ path: {{ .Release.Name }}.conf
+ - key: log4j_console_properties
+ path: log4j-console.properties
+ - key: core-site.xml
+ path: core-site.xml
+ name: {{ .Release.Name }}-config
+ name: flink-config-volume
+ containers:
+ - name: {{ .Release.Name }}-jobmanager # Main container to start job-manager
+ image: "{{ .Values.image.registry }}/{{ .Values.image.repository }}:{{ .Values.image.tag }}"
+ # image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
+ imagePullPolicy: Always
+ ports:
+ - containerPort: 6123
+ name: rpc
+ - containerPort: 6124
+ name: blob
+ - containerPort: 6125
+ name: query
+ - containerPort: 8081
+ name: ui
+ resources:
+ requests:
+ cpu: "{{ .Values.jobmanager.cpu_requests }}"
+ memory: "{{ .Values.jobmanager.memory_requests }}"
+ limits:
+ cpu: "{{ .Values.jobmanager.cpu_limits }}"
+ memory: "{{ .Values.jobmanager.memory_limits }}"
+ volumeMounts:
+ - name: flink-config-volume
+ mountPath: /opt/flink/conf/log4j-console.properties
+ subPath: log4j-console.properties
+ volumeMounts:
+ - mountPath: /opt/flink/conf/flink-conf.yaml
+ name: flink-config-volume
+ subPath: flink-conf.yaml
+ - mountPath: /data/flink/conf/baseconfig.conf
+ name: flink-config-volume
+ subPath: base-config.conf
+ - mountPath: /data/flink/conf/{{ .Release.Name }}.conf
+ name: flink-config-volume
+ subPath: {{ .Release.Name }}.conf
+ - mountPath: /opt/flink/conf/log4j-console.properties
+ name: flink-config-volume
+ subPath: log4j-console.properties
+ - name: flink-config-volume
+ mountPath: /opt/hadoop/etc/hadoop/core-site.xml
+ subPath: core-site.xml
+ workingDir: /opt/flink
+ args: ["jobmanager"]
+ env:
+ - name: HADOOP_CONF_DIR
+ value: "/opt/hadoop/etc/hadoop"
+ - name: FLINK_PROPERTIES
+ value: |+
+ jobmanager.rpc.address: {{ .Release.Name }}-jobmanager
+ jobmanager.rpc.port=6123
+ metrics.reporters: prom
+ metrics.reporter.prom.factory.class: org.apache.flink.metrics.prometheus.PrometheusReporterFactory
+ metrics.reporter.prom.host: {{ .Release.Name }}-jobmanager
+ metrics.reporter.prom.port: 9250
+ - name: {{ .Release.Name }}-job-submit # side car to submit the hudi connector
+ image: "{{ .Values.image.registry }}/{{ .Values.image.repository }}:{{ .Values.image.tag }}"
+ imagePullPolicy: Always
+
+ command:
+ - /opt/flink/bin/flink
+ - run
+ - -m
+ - {{ .Release.Name }}-jobmanager.flink.svc.cluster.local:8081
+ - /opt/flink/custom-lib/hudi-connector-1.0.0.jar
+ - --config.file.path
+ - /data/flink/conf/{{ .Release.Name }}.conf
+ volumeMounts:
+ - mountPath: /data/flink/conf/baseconfig.conf
+ name: flink-config-volume
+ subPath: base-config.conf
+ - mountPath: /data/flink/conf/{{ .Release.Name }}.conf
+ name: flink-config-volume
+ subPath: {{ .Release.Name }}.conf
+ workingDir: /opt/flink
+ volumes:
+ - configMap:
+ items:
+ - key: base-config
+ path: base-config.conf
+ - key: {{ .Release.Name }}
+ path: {{ .Release.Name }}.conf
+ name: {{ .Release.Name }}-config
+ name: flink-config-volume
+
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: {{ .Release.Name }}-taskmanager
+ namespace: {{ .Release.Namespace }}
+spec:
+ replicas: {{ .Values.taskmanager.replicas }}
+ selector:
+ matchLabels:
+ app: flink
+ component: {{ .Release.Name }}-taskmanager
+ template:
+ metadata:
+ labels:
+ system.processing: "true"
+ app: flink
+ component: {{ .Release.Name }}-taskmanager
+ spec:
+ # imagePullSecrets:
+ # - name: {{ .Values.image.imagePullSecrets }}
+ serviceAccount: {{ .Release.Namespace }}-sa
+ volumes:
+ - configMap:
+ items:
+ - key: flink-conf
+ path: flink-conf.yaml
+ - key: base-config
+ path: base-config.conf
+ - key: {{ .Release.Name }}
+ path: {{ .Release.Name }}.conf
+ - key: log4j_console_properties
+ path: log4j-console.properties
+ - key: core-site.xml
+ path: core-site.xml
+ name: {{ .Release.Name }}-config
+ name: flink-config-volume
+ containers:
+ - name: {{ .Release.Name }}-taskmanager
+ image: "{{ .Values.image.registry }}/{{ .Values.image.repository }}:{{ .Values.image.tag }}"
+ imagePullPolicy: Always
+ ports:
+ - containerPort: 6122
+ name: rpc
+ resources:
+ requests:
+ cpu: "{{ .Values.taskmanager.cpu_requests }}"
+ memory: "{{ .Values.taskmanager.memory_requests }}"
+ limits:
+ cpu: "{{ .Values.taskmanager.cpu_limits }}"
+ memory: "{{ .Values.taskmanager.memory_limits }}"
+ volumeMounts:
+ - name: flink-config-volume
+ mountPath: /opt/flink/conf/log4j-console.properties
+ subPath: log4j-console.properties
+ volumeMounts:
+ - mountPath: /opt/flink/conf/flink-conf.yaml
+ name: flink-config-volume
+ subPath: flink-conf.yaml
+ - mountPath: /data/flink/conf/baseconfig.conf
+ name: flink-config-volume
+ subPath: base-config.conf
+ - mountPath: /data/flink/conf/{{ .Release.Name }}.conf
+ name: flink-config-volume
+ subPath: {{ .Release.Name }}.conf
+ - mountPath: /opt/flink/conf/log4j-console.properties
+ name: flink-config-volume
+ subPath: log4j-console.properties
+ - name: flink-config-volume
+ mountPath: /opt/hadoop/etc/hadoop/core-site.xml
+ subPath: core-site.xml
+ workingDir: /opt/flink
+ args: ["taskmanager"]
+ env:
+ - name: HADOOP_CONF_DIR
+ value: "/opt/hadoop/etc/hadoop"
+ - name: FLINK_PROPERTIES
+ value: |+
+ jobmanager.rpc.address: {{ .Release.Name }}-jobmanager
+ taskmanager.rpc.port=6122
+ taskmanager.numberOfTaskSlots: 2
+ metrics.reporters: prom
+ metrics.reporter.prom.factory.class: org.apache.flink.metrics.prometheus.PrometheusReporterFactory
+ metrics.reporter.prom.host: {{ .Release.Name }}-taskmanager
+ metrics.reporter.prom.port: 9251
diff --git a/terraform/modules/helm/lakehouse-connector/lakehouse-flink-helm-chart/templates/submit-job.yaml b/terraform/modules/helm/lakehouse-connector/lakehouse-flink-helm-chart/templates/submit-job.yaml
new file mode 100644
index 00000000..0fe3bf37
--- /dev/null
+++ b/terraform/modules/helm/lakehouse-connector/lakehouse-flink-helm-chart/templates/submit-job.yaml
@@ -0,0 +1,55 @@
+# apiVersion: batch/v1
+# kind: Job
+# metadata:
+# name: {{ .Release.Name }}-job-submit
+# namespace: {{ .Release.Namespace }}
+# spec:
+# template:
+# spec:
+# initContainers:
+# - name: init
+# image: "docker.io/library/ubuntu@sha256:33a5cc25d22c45900796a1aca487ad7a7cb09f09ea00b779e3b2026b4fc2faba"
+# command:
+# - sh
+# - -c
+# - |
+# apt-get update && apt-get install -y curl
+# while true; do
+# if curl -sSf -o /dev/null http://{{ .Release.Name }}-jobmanager.flink.svc.cluster.local:8081; then
+# break
+# else
+# echo "Waiting for Job Manager..."
+# sleep 2
+# fi
+# done
+# containers:
+# - name: {{ .Release.Name }}-job-submit
+# image: "{{ .Values.image.registry }}/{{ .Values.image.repository }}:{{ .Values.image.tag }}"
+# imagePullPolicy: Always
+# command:
+# - /opt/flink/bin/flink
+# - run
+# - -m
+# - {{ .Release.Name }}-jobmanager.flink.svc.cluster.local:8081
+# - /opt/flink/custom-lib/{{ .Release.Name }}-1.0.0.jar
+# - --config.file.path
+# - /data/flink/conf/{{ .Release.Name }}.conf
+# volumeMounts:
+# - mountPath: /data/flink/conf/baseconfig.conf
+# name: flink-config-volume
+# subPath: base-config.conf
+# - mountPath: /data/flink/conf/{{ .Release.Name }}.conf
+# name: flink-config-volume
+# subPath: {{ .Release.Name }}.conf
+# workingDir: /opt/flink
+# restartPolicy: Never
+# volumes:
+# - configMap:
+# items:
+# - key: base-config
+# path: base-config.conf
+# - key: {{ .Release.Name }}
+# path: {{ .Release.Name }}.conf
+# name: {{ .Release.Name }}-config
+# name: flink-config-volume
+# backoffLimit: 3
\ No newline at end of file
diff --git a/terraform/modules/helm/lakehouse-connector/lakehouse-flink-helm-chart/values.yaml b/terraform/modules/helm/lakehouse-connector/lakehouse-flink-helm-chart/values.yaml
new file mode 100644
index 00000000..d1c12c74
--- /dev/null
+++ b/terraform/modules/helm/lakehouse-connector/lakehouse-flink-helm-chart/values.yaml
@@ -0,0 +1,174 @@
+namespace: "flink"
+imagepullsecrets: ""
+image:
+ registry: sanketikahub
+ repository: lakehouse-connector
+ tag: 1.0.0
+serviceMonitor:
+ enabled: false
+replicaCount: 1
+
+jobmanager:
+ rpc_port: 6123
+ blob_port: 6124
+ query_port: 6125
+ ui_port: 8081
+ prom_port: 9250
+ heap_memory: 1024
+ cpu_requests: 0.25
+ cpu_limits: 0.25
+ memory_requests: 1024Mi
+ memory_limits: 1024Mi
+
+rest_port: 80
+resttcp_port: 8081
+service:
+ type: ClusterIP
+
+taskmanager:
+ prom_port: 9251
+ rpc_port: 6122
+ heap_memory: 1024
+ replicas: 1
+ cpu_requests: 0.5
+ cpu_limits: 0.5
+ memory_requests: 1024Mi
+ memory_limits: 1024Mi
+
+checkpoint_store_type: "s3"
+
+# AWS S3 Details
+s3_access_key: ""
+s3_secret_key: ""
+s3_endpoint: ""
+
+# Azure Container Details
+azure_account: ""
+azure_secret: ""
+
+# Google Cloud Storage Service Account JSON Path
+google_service_account_key_path: ""
+
+dataset_registry: |
+ postgres {
+ host = postgresql.postgresql.svc.cluster.local
+ port = 5432
+ maxConnections = 2
+ user = "flink"
+ password = "flink"
+ database = "flink"
+ }
+
+log4j_console_properties: |
+ # This affects logging for both user code and Flink
+ rootLogger.level = INFO
+ rootLogger.appenderRef.console.ref = ConsoleAppender
+ rootLogger.appenderRef.rolling.ref = RollingFileAppender
+
+ # Uncomment this if you want to _only_ change Flink's logging
+ logger.flink.name = org.apache.flink
+ logger.flink.level = INFO
+
+ # The following lines keep the log level of common libraries/connectors on
+ # log level INFO. The root logger does not override this. You have to manually
+ # change the log levels here.
+ logger.akka.name = akka
+ logger.akka.level = ERROR
+ logger.kafka.name= org.apache.kafka
+ logger.kafka.level = ERROR
+ logger.hadoop.name = org.apache.hadoop
+ logger.hadoop.level = ERROR
+ logger.zookeeper.name = org.apache.zookeeper
+ logger.zookeeper.level = ERROR
+
+ # Log all infos to the console
+ appender.console.name = ConsoleAppender
+ appender.console.type = CONSOLE
+ appender.console.layout.type = PatternLayout
+ appender.console.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n
+
+ # Log all infos in the given rolling file
+ appender.rolling.name = RollingFileAppender
+ appender.rolling.type = RollingFile
+ appender.rolling.append = false
+ appender.rolling.fileName = ${sys:log.file}
+ appender.rolling.filePattern = ${sys:log.file}.%i
+ appender.rolling.layout.type = PatternLayout
+ appender.rolling.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n
+ appender.rolling.policies.type = Policies
+ appender.rolling.policies.size.type = SizeBasedTriggeringPolicy
+ appender.rolling.policies.size.size=10MB
+ appender.rolling.strategy.type = DefaultRolloverStrategy
+ appender.rolling.strategy.max = 5
+
+ # Suppress the irrelevant (wrong) warnings from the Netty channel handler
+ logger.netty.name = org.apache.flink.shaded.akka.org.jboss.netty.channel.DefaultChannelPipeline
+ logger.netty.level = OFF
+
+base_config: |
+ job {
+ env = "dev"
+ enable.distributed.checkpointing = true
+ statebackend {
+ base.url = "s3://"
+ }
+ }
+ kafka {
+ broker-servers = "kafka-headless.kafka.svc.cluster.local:9092"
+ producer.broker-servers = "kafka-headless.kafka.svc.cluster.local:9092"
+ consumer.broker-servers = "kafka-headless.kafka.svc.cluster.local:9092"
+ zookeeper = "kafka-headless.kafka.svc.cluster.local:2181"
+ producer {
+ max-request-size = 10000024
+ batch.size = 98304
+ linger.ms = 10
+ compression = "snappy"
+ }
+ output.system.event.topic = ${job.env}".system.events"
+ output.failed.topic = ${job.env}".failed"
+ }
+ task {
+ parallelism = 1
+ consumer.parallelism = 1
+ checkpointing.interval = 30000
+ checkpointing.pause.between.seconds = 5000
+ restart-strategy.attempts = 3
+ restart-strategy.delay = 30000 # in milli-seconds
+ }
+
+ redis.connection.timeout = 100
+ redis {
+ host = obsrv-redis-master.redis.svc.cluster.local
+ port = 6379
+ }
+
+ redis-meta {
+ host = obsrv-redis-master.redis.svc.cluster.local
+ port = 6379
+ }
+
+ postgres {
+ host = postgresql-hl.postgresql.svc.cluster.local
+ port = 5432
+ maxConnections = 2
+ user = "postgres"
+ password = "postgres"
+ database = "obsrv"
+ }
+
+ lms-cassandra {
+ host = "localhost"
+ port = "9042"
+ }
+
+
+serviceAccount:
+ # Specifies whether a service account should be created
+ create: true
+ # Annotations to add to the service account
+ annotations: {}
+ # The name of the service account to use.
+ # If not set and create is true, a name is generated using the fullname template
+ name: ""
+
+enable_lakehouse: false
\ No newline at end of file
diff --git a/terraform/modules/helm/lakehouse-connector/main.tf b/terraform/modules/helm/lakehouse-connector/main.tf
new file mode 100644
index 00000000..3ba23c5a
--- /dev/null
+++ b/terraform/modules/helm/lakehouse-connector/main.tf
@@ -0,0 +1,42 @@
+resource "helm_release" "lakehouse-connector" {
+ name = "lakehouse-connector"
+ chart = "${path.module}/${var.flink_chart_path}"
+ namespace = var.flink_namespace
+ create_namespace = var.flink_create_namespace
+ # depends_on = [var.flink_chart_depends_on,helm_release.flink_sa]
+ wait_for_jobs = var.flink_wait_for_jobs
+ timeout = var.flink_chart_install_timeout
+ force_update = true
+ cleanup_on_fail = true
+ atomic = true
+ values = [
+ templatefile("${path.module}/${var.flink_custom_values_yaml}",
+ {
+ env = var.env
+ flink_namespace = var.flink_namespace
+ flink_container_registry = "${var.flink_container_registry}"
+ flink_lakehouse_image_tag = var.flink_lakehouse_image_tag
+ flink_image_name = var.flink_image_name
+ checkpoint_store_type = var.flink_checkpoint_store_type
+ s3_access_key = var.s3_access_key
+ s3_secret_key = var.s3_secret_key
+ azure_account = var.azure_storage_account_name
+ azure_secret = var.azure_storage_account_key
+ postgresql_service_name = var.postgresql_service_name
+ postgresql_obsrv_username = var.postgresql_obsrv_username
+ postgresql_obsrv_user_password = var.postgresql_obsrv_user_password
+ postgresql_obsrv_database = var.postgresql_obsrv_database
+ checkpoint_base_url = var.checkpoint_base_url
+ denorm_redis_namespace = var.denorm_redis_namespace
+ denorm_redis_release_name = var.denorm_redis_release_name
+ dedup_redis_namespace = var.dedup_redis_namespace
+ dedup_redis_release_name = var.dedup_redis_release_name
+ hadoop_configuration = jsonencode(local.hadoop_configuration)
+ enable_lakehouse = var.enable_lakehouse
+ postgresql_hms_username = var.postgresql_hms_username
+ postgresql_hms_user_password = var.postgresql_hms_user_password
+ hudi_bucket = var.hudi_bucket
+ hudi_prefix_path = var.hudi_prefix_path
+ })
+ ]
+}
diff --git a/terraform/modules/helm/lakehouse-connector/outputs.tf b/terraform/modules/helm/lakehouse-connector/outputs.tf
new file mode 100644
index 00000000..c33a3822
--- /dev/null
+++ b/terraform/modules/helm/lakehouse-connector/outputs.tf
@@ -0,0 +1,3 @@
+output "flink_namespace" {
+ value = var.flink_namespace
+}
\ No newline at end of file
diff --git a/terraform/modules/helm/lakehouse-connector/variables.tf b/terraform/modules/helm/lakehouse-connector/variables.tf
new file mode 100644
index 00000000..1372b893
--- /dev/null
+++ b/terraform/modules/helm/lakehouse-connector/variables.tf
@@ -0,0 +1,205 @@
+variable "env" {
+ type = string
+ description = "Environment name. All resources will be prefixed with this value."
+}
+
+variable "building_block" {
+ type = string
+ description = "Building block name. All resources will be prefixed with this value."
+}
+
+variable "flink_namespace" {
+ type = string
+ description = "Flink namespace."
+}
+
+
+variable "flink_chart_path" {
+ type = string
+ description = "Flink chart path."
+ default = "lakehouse-flink-helm-chart"
+}
+
+# *** changed this to release map.
+# variable "flink_release_name" {
+# type = string
+# description = "Flink helm release name."
+# default = "unified-pipeline"
+# }
+# *** changed this to release map.
+
+variable "flink_chart_install_timeout" {
+ type = number
+ description = "Flink chart install timeout."
+ default = 900
+}
+
+variable "flink_create_namespace" {
+ type = bool
+ description = "Create flink namespace."
+ default = true
+}
+
+variable "flink_wait_for_jobs" {
+ type = bool
+ description = "Flink wait for jobs paramater."
+ default = false
+}
+
+variable "flink_custom_values_yaml" {
+ type = string
+ description = "Flink chart values.yaml path."
+ default = "flink.yaml.tfpl"
+}
+
+variable "flink_kubernetes_service_name" {
+ type = string
+ description = "Flink kubernetes service name."
+ default = ""
+}
+
+variable "flink_container_registry" {
+ type = string
+ description = "Container registry. For example docker.io/obsrv"
+}
+
+
+variable "flink_lakehouse_image_tag" {
+ type = string
+ description = "Flink image tag for lakehouse image."
+}
+
+
+variable "flink_image_name" {
+ type = string
+ description = "Flink image name."
+}
+
+variable "flink_checkpoint_store_type" {
+ type = string
+ description = "Flink checkpoint store type."
+}
+
+variable "checkpoint_base_url" {
+ type = string
+ description = "checkpoint storage base url."
+ default = ""
+}
+
+variable "flink_chart_depends_on" {
+ type = any
+ description = "List of helm release names that this chart depends on."
+ default = ""
+}
+
+variable "postgresql_obsrv_username" {
+ type = string
+ description = "Postgresql obsrv username."
+ default = "obsrv"
+}
+
+variable "postgresql_obsrv_user_password" {
+ type = string
+ description = "Postgresql obsrv user password."
+}
+
+variable "postgresql_obsrv_database" {
+ type = string
+ description = "Postgresql obsrv database."
+}
+
+variable "postgresql_service_name" {
+ type = string
+ description = "Postgresql service name."
+}
+
+variable "denorm_redis_namespace" {
+ type = string
+ description = "Namespace of Redis installation."
+ default = "redis"
+}
+
+variable "denorm_redis_release_name" {
+ type = string
+ description = "Release name for Redis installation."
+ default = "obsrv-denorm-redis"
+}
+
+variable "dedup_redis_release_name" {
+ type = string
+ description = "Redis helm release name."
+ default = "obsrv-dedup-redis"
+}
+
+variable "dedup_redis_namespace" {
+ type = string
+ description = "Redis namespace."
+ default = "redis"
+}
+
+variable "s3_access_key" {
+ type = string
+ description = "S3 access key for flink checkpoints."
+ default = ""
+}
+
+variable "s3_secret_key" {
+ type = string
+ description = "S3 secret key for flink checkpoints."
+ default = ""
+}
+
+variable "azure_storage_account_name" {
+ type = string
+ description = "Azure storage account name for flink checkpoints."
+ default = ""
+}
+
+variable "azure_storage_account_key" {
+ type = string
+ description = "Azure storage account key for flink checkpoints."
+ default = ""
+}
+
+variable "flink_sa_annotations" {
+ type = string
+ description = "Service account annotations for flink service account."
+ default = "serviceAccountName: default"
+}
+
+locals {
+ default_hadoop_metadata = {
+ "fs.s3a.impl" = "org.apache.hadoop.fs.s3a.S3AFileSystem"
+ "fs.s3a.connection.ssl.enabled" = "false"
+ }
+}
+
+variable "hadoop_metadata" {
+ type = map(string)
+ description = "Hadoop core site configuration"
+}
+
+locals {
+ hadoop_configuration = merge(local.default_hadoop_metadata, var.hadoop_metadata)
+}
+
+variable "enable_lakehouse" {
+ type = bool
+ description = "Toggle to install hudi components (hms, trino and flink job)"
+}
+variable "postgresql_hms_username" {
+ type = string
+ description = "Postgresql hms username"
+}
+variable "postgresql_hms_user_password" {
+ type = string
+ description = "Postgresql hms user password."
+}
+variable "hudi_bucket" {
+ type = string
+ description = "Apache hudi bucket name"
+}
+variable "hudi_prefix_path" {
+ type = string
+ description = "Apache hudi bucket prefix path name"
+}
diff --git a/terraform/modules/helm/postgresql/outputs.tf b/terraform/modules/helm/postgresql/outputs.tf
index 9605be24..6fd4ad14 100644
--- a/terraform/modules/helm/postgresql/outputs.tf
+++ b/terraform/modules/helm/postgresql/outputs.tf
@@ -44,4 +44,14 @@ output "postgresql_obsrv_database" {
output "postgresql_service_name" {
value = contains([var.postgresql_release_name], "postgresql") ? "${var.postgresql_release_name}.${var.postgresql_namespace}" : "${var.postgresql_release_name}-postgresql.${var.postgresql_namespace}"
-}
\ No newline at end of file
+}
+
+output "postgresql_hms_user_password" {
+ value = var.postgresql_hms_user_password
+ sensitive = true
+}
+
+output "postgresql_hms_username" {
+ value = var.postgresql_hms_username
+ sensitive = true
+}
diff --git a/terraform/modules/helm/postgresql/variables.tf b/terraform/modules/helm/postgresql/variables.tf
index 76a630b7..b06576fd 100644
--- a/terraform/modules/helm/postgresql/variables.tf
+++ b/terraform/modules/helm/postgresql/variables.tf
@@ -121,3 +121,15 @@ variable "postgresql_druid_raw_user_password" {
description = "Postgresql druid user password."
default = "druidraw123"
}
+
+
+variable "postgresql_hms_username" {
+ type = string
+ description = "Postgresql hms username"
+ default = "hms"
+}
+variable "postgresql_hms_user_password" {
+ type = string
+ description = "Postgresql hms user password."
+ default = "hms123"
+}
\ No newline at end of file
diff --git a/terraform/modules/helm/postgresql_migration/main.tf b/terraform/modules/helm/postgresql_migration/main.tf
index e264addb..77973f43 100644
--- a/terraform/modules/helm/postgresql_migration/main.tf
+++ b/terraform/modules/helm/postgresql_migration/main.tf
@@ -19,6 +19,9 @@ resource "helm_release" "postgresql_migration" {
postgresql_druid_raw_user_password = var.postgresql_druid_raw_user_password
postgresql_obsrv_user_password = var.postgresql_obsrv_user_password
data_encryption_key = var.data_encryption_key
+ postgresql_hms_user_password = var.postgresql_hms_user_password
+ enable_lakehouse = var.enable_lakehouse
+
})
]
}
diff --git a/terraform/modules/helm/postgresql_migration/postgresql-migration-helm-chart/migration/03_obsrv/V3__ddl_datasource.sql b/terraform/modules/helm/postgresql_migration/postgresql-migration-helm-chart/migration/03_obsrv/V3__ddl_datasource.sql
new file mode 100644
index 00000000..7e6ee3f8
--- /dev/null
+++ b/terraform/modules/helm/postgresql_migration/postgresql-migration-helm-chart/migration/03_obsrv/V3__ddl_datasource.sql
@@ -0,0 +1 @@
+ALTER TABLE datasources ADD COLUMN type TEXT not NULL DEFAULT 'druid';
\ No newline at end of file
diff --git a/terraform/modules/helm/postgresql_migration/postgresql-migration-helm-chart/migration/05_hms/V1__create_hms_initial_data.sql b/terraform/modules/helm/postgresql_migration/postgresql-migration-helm-chart/migration/05_hms/V1__create_hms_initial_data.sql
new file mode 100644
index 00000000..398350d6
--- /dev/null
+++ b/terraform/modules/helm/postgresql_migration/postgresql-migration-helm-chart/migration/05_hms/V1__create_hms_initial_data.sql
@@ -0,0 +1,28 @@
+DO $$
+BEGIN
+ IF NOT EXISTS (SELECT 1 FROM pg_database WHERE datname = 'hms') THEN
+ CREATE DATABASE hms;
+ END IF;
+END $$;
+
+DO
+$do$
+BEGIN
+ IF EXISTS (
+ SELECT FROM pg_catalog.pg_roles
+ WHERE rolname = 'hms') THEN
+
+ RAISE NOTICE 'Role "hms" already exists. Skipping.';
+ ELSE
+ BEGIN
+ CREATE ROLE hms LOGIN PASSWORD '{{ .Values.postgresql_hms_user_password }}';
+ EXCEPTION
+ WHEN duplicate_object THEN
+ RAISE NOTICE 'Role "hms" was just created by a concurrent transaction. Skipping.';
+ END;
+ END IF;
+END
+$do$;
+
+GRANT ALL PRIVILEGES ON DATABASE hms TO hms;
+ALTER DATABASE hms OWNER TO hms;
diff --git a/terraform/modules/helm/postgresql_migration/postgresql-migration-helm-chart/templates/flyway-cm.yaml b/terraform/modules/helm/postgresql_migration/postgresql-migration-helm-chart/templates/flyway-cm.yaml
index 5b5b2d60..dc716ebf 100644
--- a/terraform/modules/helm/postgresql_migration/postgresql-migration-helm-chart/templates/flyway-cm.yaml
+++ b/terraform/modules/helm/postgresql_migration/postgresql-migration-helm-chart/templates/flyway-cm.yaml
@@ -31,4 +31,19 @@ metadata:
name: obsrv-sql-files
app: flyway-postgresql
data:
- {{- (tpl (.Files.Glob "migration/03_obsrv/*.sql").AsConfig .) | nindent 2 }}
\ No newline at end of file
+ {{- (tpl (.Files.Glob "migration/03_obsrv/*.sql").AsConfig .) | nindent 2 }}
+
+
+---
+{{- if .Values.enable_lakehouse }}
+kind: ConfigMap
+apiVersion: v1
+metadata:
+ name: hms-sql-files
+ namespace: {{ .Values.namespace }}
+ labels:
+ name: hms-sql-files
+ app: flyway-postgresql
+data:
+ {{- (tpl (.Files.Glob "migration/05_hms/*.sql").AsConfig .) | nindent 2 }}
+{{- end }}
\ No newline at end of file
diff --git a/terraform/modules/helm/postgresql_migration/postgresql-migration-helm-chart/templates/job.yaml b/terraform/modules/helm/postgresql_migration/postgresql-migration-helm-chart/templates/job.yaml
index f3261ed6..123c1dbe 100644
--- a/terraform/modules/helm/postgresql_migration/postgresql-migration-helm-chart/templates/job.yaml
+++ b/terraform/modules/helm/postgresql_migration/postgresql-migration-helm-chart/templates/job.yaml
@@ -35,6 +35,10 @@ spec:
mountPath: /flyway/migrations/02_druid_raw
- name: obsrv-sql-files
mountPath: /flyway/migrations/03_obsrv
+ {{- if .Values.enable_lakehouse }}
+ - name: hms-sql-files
+ mountPath: /flyway/migrations/05_hms
+ {{- end }}
volumes:
- name: flyway-migrate-script
configMap:
@@ -48,4 +52,9 @@ spec:
name: druid-raw-sql-files
- name: obsrv-sql-files
configMap:
- name: obsrv-sql-files
\ No newline at end of file
+ name: obsrv-sql-files
+ {{- if .Values.enable_lakehouse }}
+ - name: hms-sql-files
+ configMap:
+ name: hms-sql-files
+ {{- end }}
\ No newline at end of file
diff --git a/terraform/modules/helm/postgresql_migration/postgresql-migration-helm-chart/values.yaml b/terraform/modules/helm/postgresql_migration/postgresql-migration-helm-chart/values.yaml
index ae7a05c3..044089e3 100644
--- a/terraform/modules/helm/postgresql_migration/postgresql-migration-helm-chart/values.yaml
+++ b/terraform/modules/helm/postgresql_migration/postgresql-migration-helm-chart/values.yaml
@@ -20,3 +20,6 @@ system_settings:
default_dataset_id: "ALL"
max_event_size: 1048576
dedup_period: 604800 # In seconds (7 days)
+
+postgresql_hms_user_password: ""
+enable_lakehouse: false
\ No newline at end of file
diff --git a/terraform/modules/helm/postgresql_migration/postgresql_migration.yaml.tfpl b/terraform/modules/helm/postgresql_migration/postgresql_migration.yaml.tfpl
index 3792a126..427aee2e 100644
--- a/terraform/modules/helm/postgresql_migration/postgresql_migration.yaml.tfpl
+++ b/terraform/modules/helm/postgresql_migration/postgresql_migration.yaml.tfpl
@@ -12,3 +12,6 @@ postgresql_obsrv_user_password: "${postgresql_obsrv_user_password}"
system_settings:
encryption_key: "${data_encryption_key}"
+
+postgresql_hms_user_password: "${postgresql_hms_user_password}"
+enable_lakehouse: ${enable_lakehouse}
\ No newline at end of file
diff --git a/terraform/modules/helm/postgresql_migration/variables.tf b/terraform/modules/helm/postgresql_migration/variables.tf
index 9497ea3d..83090a62 100644
--- a/terraform/modules/helm/postgresql_migration/variables.tf
+++ b/terraform/modules/helm/postgresql_migration/variables.tf
@@ -83,3 +83,12 @@ variable "data_encryption_key" {
type = string
description = "Data encryption key. This is used to encrypt data in pipeline. This is a 32 character string."
}
+
+variable "postgresql_hms_user_password" {
+ type = string
+ description = "Postgresql hms user password."
+}
+variable "enable_lakehouse" {
+ type = bool
+ description = "Toggle to install hudi components (hms, trino and flink job)"
+}
\ No newline at end of file
diff --git a/terraform/modules/helm/trino/main.tf b/terraform/modules/helm/trino/main.tf
new file mode 100644
index 00000000..3b75e8df
--- /dev/null
+++ b/terraform/modules/helm/trino/main.tf
@@ -0,0 +1,21 @@
+resource "helm_release" "trino" {
+ name = var.trino_release_name
+ chart = "${path.module}/${var.trino_chart_path}"
+ namespace = var.trino_namespace
+ create_namespace = var.trino_create_namespace
+ depends_on = [var.trino_chart_depends_on]
+ force_update = true
+ cleanup_on_fail = true
+ atomic = true
+ values = [
+ templatefile("${path.module}/${var.trino_custom_values_yaml}",
+ {
+ trino_namespace = var.trino_namespace
+ trino_image = var.trino_image
+ trino_workers_count = var.trino_workers_count
+ trino_service = var.trino_service
+ trino_catalogs = jsonencode({ for key, value in local.catalogs : key => join("\n", [for k, v in value : "${k}=${v}"]) })
+ }
+ )
+ ]
+}
diff --git a/terraform/modules/helm/trino/trino-helm-chart/Chart.yaml b/terraform/modules/helm/trino/trino-helm-chart/Chart.yaml
new file mode 100644
index 00000000..8b1d8bd0
--- /dev/null
+++ b/terraform/modules/helm/trino/trino-helm-chart/Chart.yaml
@@ -0,0 +1,14 @@
+apiVersion: v2
+appVersion: "432"
+description: Fast distributed SQL query engine for big data analytics that helps you
+ explore your data universe
+home: https://trino.io/
+icon: https://trino.io/assets/trino.png
+maintainers:
+- name: Trino community
+name: trino
+sources:
+- https://github.com/trinodb/charts
+- https://github.com/trinodb/trino/tree/master/core/docker
+type: application
+version: 0.19.0
diff --git a/terraform/modules/helm/trino/trino-helm-chart/README.md b/terraform/modules/helm/trino/trino-helm-chart/README.md
new file mode 100644
index 00000000..44465278
--- /dev/null
+++ b/terraform/modules/helm/trino/trino-helm-chart/README.md
@@ -0,0 +1,115 @@
+
+Trino
+===========
+
+Fast distributed SQL query engine for big data analytics that helps you explore your data universe
+
+
+## Configuration
+
+The following table lists the configurable parameters of the Trino chart and their default values.
+
+| Parameter | Description | Default |
+| ------------------------ | ----------------------- | -------------- |
+| `image.registry` | Image registry, defaults to empty, which results in DockerHub usage | `""` |
+| `image.repository` | Repository location of the Trino image, typically `organization/imagename` | `"trinodb/trino"` |
+| `image.tag` | Image tag, defaults to the Trino release version specified as `appVersion` from Chart.yaml | `""` |
+| `image.digest` | Optional digest value of the image specified as `sha256:abcd...`. A specified value overrides `tag`. | `""` |
+| `image.useRepositoryAsSoleImageReference` | When true, only the content in `repository` is used as image reference | `false` |
+| `image.pullPolicy` | | `"IfNotPresent"` |
+| `imagePullSecrets` | | `[{"name": "registry-credentials"}]` |
+| `server.workers` | | `2` |
+| `server.node.environment` | | `"production"` |
+| `server.node.dataDir` | | `"/data/trino"` |
+| `server.node.pluginDir` | | `"/usr/lib/trino/plugin"` |
+| `server.log.trino.level` | | `"INFO"` |
+| `server.config.path` | | `"/etc/trino"` |
+| `server.config.http.port` | | `8080` |
+| `server.config.https.enabled` | | `false` |
+| `server.config.https.port` | | `8443` |
+| `server.config.https.keystore.path` | | `""` |
+| `server.config.authenticationType` | | `""` |
+| `server.config.query.maxMemory` | | `"4GB"` |
+| `server.exchangeManager.name` | | `"filesystem"` |
+| `server.exchangeManager.baseDir` | | `"/tmp/trino-local-file-system-exchange-manager"` |
+| `server.workerExtraConfig` | | `""` |
+| `server.coordinatorExtraConfig` | | `""` |
+| `server.autoscaling.enabled` | | `false` |
+| `server.autoscaling.maxReplicas` | | `5` |
+| `server.autoscaling.targetCPUUtilizationPercentage` | | `50` |
+| `server.autoscaling.behavior` | | `{}` |
+| `accessControl` | | `{}` |
+| `resourceGroups` | | `{}` |
+| `additionalNodeProperties` | | `{}` |
+| `additionalConfigProperties` | | `{}` |
+| `additionalLogProperties` | | `{}` |
+| `additionalExchangeManagerProperties` | | `{}` |
+| `eventListenerProperties` | | `{}` |
+| `additionalCatalogs` | | `{}` |
+| `env` | | `[]` |
+| `envFrom` | | `[]` |
+| `initContainers` | | `{}` |
+| `sidecarContainers` | | `{}` |
+| `securityContext.runAsUser` | | `1000` |
+| `securityContext.runAsGroup` | | `1000` |
+| `shareProcessNamespace.coordinator` | | `false` |
+| `shareProcessNamespace.worker` | | `false` |
+| `service.type` | | `"ClusterIP"` |
+| `service.port` | | `8080` |
+| `auth` | | `{}` |
+| `serviceAccount.create` | | `false` |
+| `serviceAccount.name` | | `""` |
+| `serviceAccount.annotations` | | `{}` |
+| `secretMounts` | | `[]` |
+| `coordinator.jvm.maxHeapSize` | | `"8G"` |
+| `coordinator.jvm.gcMethod.type` | | `"UseG1GC"` |
+| `coordinator.jvm.gcMethod.g1.heapRegionSize` | | `"32M"` |
+| `coordinator.config.memory.heapHeadroomPerNode` | | `""` |
+| `coordinator.config.query.maxMemoryPerNode` | | `"1GB"` |
+| `coordinator.additionalJVMConfig` | | `{}` |
+| `coordinator.additionalExposedPorts` | | `{}` |
+| `coordinator.resources` | | `{}` |
+| `coordinator.livenessProbe` | | `{}` |
+| `coordinator.readinessProbe` | | `{}` |
+| `coordinator.nodeSelector` | | `{}` |
+| `coordinator.tolerations` | | `[]` |
+| `coordinator.affinity` | | `{}` |
+| `coordinator.additionalConfigFiles` | | `{}` |
+| `coordinator.additionalVolumes` | One or more additional volumes to add to the coordinator. | `[]` |
+| `coordinator.additionalVolumeMounts` | One or more additional volume mounts to add to the coordinator. | `[]` |
+| `coordinator.annotations` | | `{}` |
+| `coordinator.labels` | | `{}` |
+| `coordinator.secretMounts` | | `[]` |
+| `worker.jvm.maxHeapSize` | | `"8G"` |
+| `worker.jvm.gcMethod.type` | | `"UseG1GC"` |
+| `worker.jvm.gcMethod.g1.heapRegionSize` | | `"32M"` |
+| `worker.config.memory.heapHeadroomPerNode` | | `""` |
+| `worker.config.query.maxMemoryPerNode` | | `"1GB"` |
+| `worker.additionalJVMConfig` | | `{}` |
+| `worker.additionalExposedPorts` | | `{}` |
+| `worker.resources` | | `{}` |
+| `worker.livenessProbe` | | `{}` |
+| `worker.readinessProbe` | | `{}` |
+| `worker.nodeSelector` | | `{}` |
+| `worker.tolerations` | | `[]` |
+| `worker.affinity` | | `{}` |
+| `worker.additionalConfigFiles` | | `{}` |
+| `worker.additionalVolumes` | One or more additional volume mounts to add to all workers. | `[]` |
+| `worker.additionalVolumeMounts` | One or more additional volume mounts to add to all workers. | `[]` |
+| `worker.annotations` | | `{}` |
+| `worker.labels` | | `{}` |
+| `worker.secretMounts` | | `[]` |
+| `kafka.mountPath` | | `"/etc/trino/schemas"` |
+| `kafka.tableDescriptions` | | `{}` |
+| `commonLabels` | Labels that get applied to every resource's metadata | `{}` |
+| `ingress.enabled` | | `false` |
+| `ingress.className` | | `""` |
+| `ingress.annotations` | | `{}` |
+| `ingress.hosts` | | `[]` |
+| `ingress.tls` | | `[]` |
+
+
+
+---
+_Documentation generated by [Frigate](https://frigate.readthedocs.io)._
+
diff --git a/terraform/modules/helm/trino/trino-helm-chart/ci/custom-values.yaml b/terraform/modules/helm/trino/trino-helm-chart/ci/custom-values.yaml
new file mode 100644
index 00000000..2a83c54a
--- /dev/null
+++ b/terraform/modules/helm/trino/trino-helm-chart/ci/custom-values.yaml
@@ -0,0 +1,19 @@
+# All custom values belong here during testing.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+coordinator:
+ jvm:
+ maxHeapSize: "8G"
+ gcMethod:
+ type: "UseG1GC"
+ g1:
+ heapRegionSize: "32M"
+
+worker:
+ jvm:
+ maxHeapSize: "8G"
+ gcMethod:
+ type: "UseG1GC"
+ g1:
+ heapRegionSize: "32M"
diff --git a/terraform/modules/helm/trino/trino-helm-chart/templates/NOTES.txt b/terraform/modules/helm/trino/trino-helm-chart/templates/NOTES.txt
new file mode 100644
index 00000000..251ff875
--- /dev/null
+++ b/terraform/modules/helm/trino/trino-helm-chart/templates/NOTES.txt
@@ -0,0 +1,10 @@
+Get the application URL by running these commands:
+{{- if contains "NodePort" .Values.service.type }}
+ export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ template "trino.fullname" . }})
+ export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
+ echo http://$NODE_IP:$NODE_PORT
+{{- else if contains "ClusterIP" .Values.service.type }}
+ export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app={{ template "trino.name" . }},release={{ .Release.Name }},component=coordinator" -o jsonpath="{.items[0].metadata.name}")
+ echo "Visit http://127.0.0.1:8080 to use your application"
+ kubectl port-forward $POD_NAME 8080:8080
+{{- end }}
diff --git a/terraform/modules/helm/trino/trino-helm-chart/templates/_helpers.tpl b/terraform/modules/helm/trino/trino-helm-chart/templates/_helpers.tpl
new file mode 100644
index 00000000..a5ddc1b4
--- /dev/null
+++ b/terraform/modules/helm/trino/trino-helm-chart/templates/_helpers.tpl
@@ -0,0 +1,122 @@
+{{/* vim: set filetype=mustache: */}}
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "trino.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "trino.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "trino.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{- define "trino.coordinator" -}}
+{{- if .Values.coordinatorNameOverride }}
+{{- .Values.coordinatorNameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}-coordinator
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}-coordinator
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{- define "trino.worker" -}}
+{{- if .Values.workerNameOverride }}
+{{- .Values.workerNameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}-worker
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}-worker
+{{- end }}
+{{- end }}
+{{- end }}
+
+
+{{- define "trino.catalog" -}}
+{{ template "trino.fullname" . }}-catalog
+{{- end -}}
+
+{{/*
+Common labels
+*/}}
+{{- define "trino.labels" -}}
+helm.sh/chart: {{ include "trino.chart" . }}
+{{ include "trino.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "trino.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "trino.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "trino.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "trino.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
+
+{{/*
+Return the proper image name
+{{ include "trino.image" . }}
+
+Code is inspired from bitnami/common
+
+*/}}
+{{- define "trino.image" -}}
+{{- $repositoryName := .Values.image.repository -}}
+{{- if .Values.image.useRepositoryAsSoleImageReference -}}
+ {{- printf "%s" $repositoryName -}}
+{{- else -}}
+ {{- $repositoryName := .Values.image.repository -}}
+ {{- $registryName := .Values.image.registry -}}
+ {{- $separator := ":" -}}
+ {{- $termination := (default .Chart.AppVersion .Values.image.tag) | toString -}}
+ {{- if .Values.image.digest }}
+ {{- $separator = "@" -}}
+ {{- $termination = .Values.image.digest | toString -}}
+ {{- end -}}
+ {{- if $registryName }}
+ {{- printf "%s/%s%s%s" $registryName $repositoryName $separator $termination -}}
+ {{- else -}}
+ {{- printf "%s%s%s" $repositoryName $separator $termination -}}
+ {{- end -}}
+{{- end -}}
+{{- end -}}
diff --git a/terraform/modules/helm/trino/trino-helm-chart/templates/autoscaler.yaml b/terraform/modules/helm/trino/trino-helm-chart/templates/autoscaler.yaml
new file mode 100644
index 00000000..7476b3ab
--- /dev/null
+++ b/terraform/modules/helm/trino/trino-helm-chart/templates/autoscaler.yaml
@@ -0,0 +1,28 @@
+{{- if .Values.server.autoscaling.enabled -}}
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+ name: {{ template "trino.worker" . }}
+ {{- if .Values.commonLabels }}
+ labels:
+ {{- tpl (toYaml .Values.commonLabels) . | nindent 4 }}
+ {{- end }}
+spec:
+ maxReplicas: {{ .Values.server.autoscaling.maxReplicas }}
+ minReplicas: {{ .Values.server.workers }}
+ scaleTargetRef:
+ apiVersion: apps/v1
+ kind: Deployment
+ name: {{ template "trino.worker" . }}
+ metrics:
+ - type: Resource
+ resource:
+ name: cpu
+ target:
+ type: Utilization
+ averageUtilization: {{ .Values.server.autoscaling.targetCPUUtilizationPercentage }}
+ {{ if .Values.server.autoscaling.behavior -}}
+ behavior:
+ {{- toYaml .Values.server.autoscaling.behavior | nindent 4 }}
+ {{- end }}
+{{- end }}
diff --git a/terraform/modules/helm/trino/trino-helm-chart/templates/configmap-catalog.yaml b/terraform/modules/helm/trino/trino-helm-chart/templates/configmap-catalog.yaml
new file mode 100644
index 00000000..9c1fea67
--- /dev/null
+++ b/terraform/modules/helm/trino/trino-helm-chart/templates/configmap-catalog.yaml
@@ -0,0 +1,26 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: {{ template "trino.catalog" . }}
+ labels:
+ app: {{ template "trino.name" . }}
+ chart: {{ template "trino.chart" . }}
+ release: {{ .Release.Name }}
+ heritage: {{ .Release.Service }}
+ role: catalogs
+ {{- if .Values.commonLabels }}
+ {{- tpl (toYaml .Values.commonLabels) . | nindent 4 }}
+ {{- end }}
+data:
+ tpch.properties: |
+ connector.name=tpch
+ tpch.splits-per-node=4
+ tpcds.properties: |
+ connector.name=tpcds
+ tpcds.splits-per-node=4
+{{- range $catalogName, $catalogProperties := .Values.additionalCatalogs }}
+ {{ $catalogName }}.properties: |
+ {{- $catalogProperties | nindent 4 }}
+{{- end }}
+
+
diff --git a/terraform/modules/helm/trino/trino-helm-chart/templates/configmap-coordinator.yaml b/terraform/modules/helm/trino/trino-helm-chart/templates/configmap-coordinator.yaml
new file mode 100644
index 00000000..ba88283e
--- /dev/null
+++ b/terraform/modules/helm/trino/trino-helm-chart/templates/configmap-coordinator.yaml
@@ -0,0 +1,173 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: {{ template "trino.coordinator" . }}
+ labels:
+ app: {{ template "trino.name" . }}
+ chart: {{ template "trino.chart" . }}
+ release: {{ .Release.Name }}
+ heritage: {{ .Release.Service }}
+ component: coordinator
+ {{- if .Values.commonLabels }}
+ {{- tpl (toYaml .Values.commonLabels) . | nindent 4 }}
+ {{- end }}
+data:
+ node.properties: |
+ node.environment={{ .Values.server.node.environment }}
+ node.data-dir={{ .Values.server.node.dataDir }}
+ plugin.dir={{ .Values.server.node.pluginDir }}
+ {{- range $configValue := .Values.additionalNodeProperties }}
+ {{ $configValue }}
+ {{- end }}
+
+ jvm.config: |
+ -server
+ -agentpath:/usr/lib/trino/bin/libjvmkill.so
+ -Xmx{{ .Values.coordinator.jvm.maxHeapSize }}
+ -XX:+{{ .Values.coordinator.jvm.gcMethod.type }}
+ -XX:G1HeapRegionSize={{ .Values.coordinator.jvm.gcMethod.g1.heapRegionSize }}
+ -XX:+ExplicitGCInvokesConcurrent
+ -XX:+HeapDumpOnOutOfMemoryError
+ -XX:+ExitOnOutOfMemoryError
+ -XX:-OmitStackTraceInFastThrow
+ -XX:ReservedCodeCacheSize=512M
+ -XX:PerMethodRecompilationCutoff=10000
+ -XX:PerBytecodeRecompilationCutoff=10000
+ -Djdk.attach.allowAttachSelf=true
+ -Djdk.nio.maxCachedBufferSize=2000000
+ -XX:+UnlockDiagnosticVMOptions
+ # Reduce starvation of threads by GClocker, recommend to set about the number of cpu cores (JDK-8192647)
+ -XX:GCLockerRetryAllocationCount=32
+ {{- range $configValue := .Values.coordinator.additionalJVMConfig }}
+ {{ $configValue }}
+ {{- end }}
+
+ config.properties: |
+ coordinator=true
+{{- if gt (int .Values.server.workers) 0 }}
+ node-scheduler.include-coordinator=false
+{{- else }}
+ node-scheduler.include-coordinator=true
+{{- end }}
+ http-server.http.port={{ .Values.service.port }}
+ query.max-memory={{ .Values.server.config.query.maxMemory }}
+ query.max-memory-per-node={{ .Values.coordinator.config.query.maxMemoryPerNode }}
+{{- if .Values.coordinator.config.memory.heapHeadroomPerNode }}
+ memory.heap-headroom-per-node={{ .Values.coordinator.config.memory.heapHeadroomPerNode }}
+{{- end }}
+ discovery.uri=http://localhost:{{ .Values.service.port }}
+{{- if .Values.server.config.authenticationType }}
+ http-server.authentication.type={{ .Values.server.config.authenticationType }}
+{{- end }}
+ {{- range $configValue := .Values.additionalConfigProperties }}
+ {{ $configValue }}
+ {{- end }}
+ {{- if .Values.server.config.https.enabled }}
+ http-server.https.enabled=true
+ http-server.https.port={{ .Values.server.config.https.port }}
+ http-server.https.keystore.path={{ .Values.server.config.https.keystore.path }}
+ {{- end }}
+ {{ .Values.server.coordinatorExtraConfig | indent 4 }}
+
+{{- if .Values.accessControl }}{{- if eq .Values.accessControl.type "configmap" }}
+ access-control.properties: |
+ access-control.name=file
+ {{- if .Values.accessControl.refreshPeriod }}
+ security.refresh-period={{ .Values.accessControl.refreshPeriod }}
+ {{- end }}
+ security.config-file={{ .Values.server.config.path }}/access-control/{{ .Values.accessControl.configFile | default "rules.json" }}
+{{- end }}{{- end }}
+
+{{- if .Values.resourceGroups }}
+ resource-groups.properties: |
+ resource-groups.configuration-manager=file
+ resource-groups.config-file={{ .Values.server.config.path }}/resource-groups/resource-groups.json
+{{- end }}
+
+ exchange-manager.properties: |
+ exchange-manager.name={{ .Values.server.exchangeManager.name }}
+ {{ if eq .Values.server.exchangeManager.name "filesystem" }}
+ exchange.base-directories={{ .Values.server.exchangeManager.baseDir }}
+ {{- end }}
+ {{- range $configValue := .Values.additionalExchangeManagerProperties }}
+ {{ $configValue }}
+ {{- end }}
+
+ log.properties: |
+ io.trino={{ .Values.server.log.trino.level }}
+ {{- range $configValue := .Values.additionalLogProperties }}
+ {{ $configValue }}
+ {{- end }}
+
+ {{- if contains "PASSWORD" .Values.server.config.authenticationType }}
+ password-authenticator.properties: |
+ password-authenticator.name=file
+ file.password-file={{ .Values.server.config.path }}/auth/password.db
+ {{- end }}
+ {{- if .Values.auth.groups }}{{- if not (index .Values.coordinator.additionalConfigFiles "group-provider.properties") }}
+ group-provider.properties: |
+ group-provider.name=file
+ file.group-file={{ .Values.server.config.path }}/auth/group.db
+ {{- if .Values.auth.refreshPeriod }}
+ file.refresh-period={{ .Values.auth.refreshPeriod }}
+ {{- end }}
+ {{- end }}{{- end }}
+
+{{ if .Values.eventListenerProperties }}
+ event-listener.properties: |
+ {{- range $configValue := .Values.eventListenerProperties }}
+ {{ $configValue }}
+ {{- end }}
+{{ end }}
+
+{{- range $fileName, $fileContent := .Values.coordinator.additionalConfigFiles }}
+ {{ $fileName }}: |
+ {{- $fileContent | nindent 4 }}
+{{- end }}
+
+---
+
+{{- if .Values.accessControl }}{{- if eq .Values.accessControl.type "configmap" }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: trino-access-control-volume-coordinator
+ labels:
+ {{- include "trino.labels" . | nindent 4 }}
+ app.kubernetes.io/component: coordinator
+data:
+ {{- range $key, $val := .Values.accessControl.rules }}
+ {{ $key }}: {{ $val | quote }}
+ {{- end }}
+{{- end }}{{- end }}
+
+---
+
+{{- if .Values.resourceGroups }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: trino-resource-groups-volume-coordinator
+ labels:
+ {{- include "trino.labels" . | nindent 4 }}
+ app.kubernetes.io/component: coordinator
+data:
+ resource-groups.json: |-
+ {{- .Values.resourceGroups.resourceGroupsConfig | nindent 4 }}
+{{- end }}
+
+---
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: schemas-volume-coordinator
+ labels:
+ {{- include "trino.labels" . | nindent 4 }}
+ app.kubernetes.io/component: coordinator
+data:
+ {{- range $key, $val := .Values.kafka.tableDescriptions }}
+ {{ $key }}: {{ $val | quote }}
+ {{- end }}
+
+---
diff --git a/terraform/modules/helm/trino/trino-helm-chart/templates/configmap-worker.yaml b/terraform/modules/helm/trino/trino-helm-chart/templates/configmap-worker.yaml
new file mode 100644
index 00000000..03617a9b
--- /dev/null
+++ b/terraform/modules/helm/trino/trino-helm-chart/templates/configmap-worker.yaml
@@ -0,0 +1,100 @@
+{{- if gt (int .Values.server.workers) 0 }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: {{ template "trino.worker" . }}
+ labels:
+ app: {{ template "trino.name" . }}
+ chart: {{ template "trino.chart" . }}
+ release: {{ .Release.Name }}
+ heritage: {{ .Release.Service }}
+ component: worker
+ {{- if .Values.commonLabels }}
+ {{- tpl (toYaml .Values.commonLabels) . | nindent 4 }}
+ {{- end }}
+data:
+ node.properties: |
+ node.environment={{ .Values.server.node.environment }}
+ node.data-dir={{ .Values.server.node.dataDir }}
+ plugin.dir={{ .Values.server.node.pluginDir }}
+ {{- range $configValue := .Values.additionalNodeProperties }}
+ {{ $configValue }}
+ {{- end }}
+
+ jvm.config: |
+ -server
+ -agentpath:/usr/lib/trino/bin/libjvmkill.so
+ -Xmx{{ .Values.worker.jvm.maxHeapSize }}
+ -XX:+{{ .Values.worker.jvm.gcMethod.type }}
+ -XX:G1HeapRegionSize={{ .Values.worker.jvm.gcMethod.g1.heapRegionSize }}
+ -XX:+ExplicitGCInvokesConcurrent
+ -XX:+HeapDumpOnOutOfMemoryError
+ -XX:+ExitOnOutOfMemoryError
+ -XX:-OmitStackTraceInFastThrow
+ -XX:ReservedCodeCacheSize=512M
+ -XX:PerMethodRecompilationCutoff=10000
+ -XX:PerBytecodeRecompilationCutoff=10000
+ -Djdk.attach.allowAttachSelf=true
+ -Djdk.nio.maxCachedBufferSize=2000000
+ -XX:+UnlockDiagnosticVMOptions
+ # Reduce starvation of threads by GClocker, recommend to set about the number of cpu cores (JDK-8192647)
+ -XX:GCLockerRetryAllocationCount=32
+ {{- range $configValue := .Values.worker.additionalJVMConfig }}
+ {{ $configValue }}
+ {{- end }}
+
+ config.properties: |
+ coordinator=false
+ http-server.http.port={{ .Values.service.port }}
+ query.max-memory={{ .Values.server.config.query.maxMemory }}
+ query.max-memory-per-node={{ .Values.worker.config.query.maxMemoryPerNode }}
+ {{- if .Values.worker.config.memory.heapHeadroomPerNode }}
+ memory.heap-headroom-per-node={{ .Values.worker.config.memory.heapHeadroomPerNode }}
+ {{- end }}
+ discovery.uri=http://{{ template "trino.fullname" . }}:{{ .Values.service.port }}
+ {{- range $configValue := .Values.additionalConfigProperties }}
+ {{ $configValue }}
+ {{- end }}
+ {{ .Values.server.workerExtraConfig | indent 4 }}
+
+ exchange-manager.properties: |
+ exchange-manager.name={{ .Values.server.exchangeManager.name }}
+ {{ if eq .Values.server.exchangeManager.name "filesystem" }}
+ exchange.base-directories={{ .Values.server.exchangeManager.baseDir }}
+ {{- end }}
+ {{- range $configValue := .Values.additionalExchangeManagerProperties }}
+ {{ $configValue }}
+ {{- end }}
+
+ log.properties: |
+ io.trino={{ .Values.server.log.trino.level }}
+ {{- range $configValue := .Values.additionalLogProperties }}
+ {{ $configValue }}
+ {{- end }}
+
+{{ if .Values.eventListenerProperties }}
+ event-listener.properties: |
+ {{- range $configValue := .Values.eventListenerProperties }}
+ {{ $configValue }}
+ {{- end }}
+{{ end }}
+
+{{- range $fileName, $fileContent := .Values.worker.additionalConfigFiles }}
+ {{ $fileName }}: |
+ {{- $fileContent | nindent 4 }}
+{{- end }}
+
+---
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: schemas-volume-worker
+ labels:
+ {{- include "trino.labels" . | nindent 4 }}
+ app.kubernetes.io/component: worker
+data:
+ {{- range $key, $val := .Values.kafka.tableDescriptions }}
+ {{ $key }}: {{ $val | quote }}
+ {{- end }}
+{{- end }}
diff --git a/terraform/modules/helm/trino/trino-helm-chart/templates/deployment-coordinator.yaml b/terraform/modules/helm/trino/trino-helm-chart/templates/deployment-coordinator.yaml
new file mode 100644
index 00000000..844be4e7
--- /dev/null
+++ b/terraform/modules/helm/trino/trino-helm-chart/templates/deployment-coordinator.yaml
@@ -0,0 +1,184 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: {{ template "trino.coordinator" . }}
+ labels:
+ app: {{ template "trino.name" . }}
+ chart: {{ template "trino.chart" . }}
+ release: {{ .Release.Name }}
+ heritage: {{ .Release.Service }}
+ component: coordinator
+ {{- if .Values.commonLabels }}
+ {{- tpl (toYaml .Values.commonLabels) . | nindent 4 }}
+ {{- end }}
+spec:
+ selector:
+ matchLabels:
+ app: {{ template "trino.name" . }}
+ release: {{ .Release.Name }}
+ component: coordinator
+ template:
+ metadata:
+ {{- if .Values.coordinator.annotations }}
+ annotations:
+ {{- tpl (toYaml .Values.coordinator.annotations) . | nindent 8 }}
+ {{- end }}
+ labels:
+ app: {{ template "trino.name" . }}
+ release: {{ .Release.Name }}
+ component: coordinator
+ {{- if .Values.coordinator.labels }}
+ {{- tpl (toYaml .Values.coordinator.labels) . | nindent 8 }}
+ {{- end }}
+ {{- if .Values.commonLabels }}
+ {{- tpl (toYaml .Values.commonLabels) . | nindent 8 }}
+ {{- end }}
+ spec:
+ serviceAccountName: {{ include "trino.serviceAccountName" . }}
+ {{- with .Values.securityContext }}
+ securityContext:
+ runAsUser: {{ .runAsUser }}
+ runAsGroup: {{ .runAsGroup }}
+ {{- end }}
+ {{- if .Values.shareProcessNamespace.coordinator }}
+ shareProcessNamespace: {{ .Values.shareProcessNamespace.coordinator }}
+ {{- end }}
+ volumes:
+ - name: config-volume
+ configMap:
+ name: {{ template "trino.coordinator" . }}
+ - name: catalog-volume
+ configMap:
+ name: {{ template "trino.catalog" . }}
+ - name: schemas-volume
+ configMap:
+ name: schemas-volume-coordinator
+ {{- if .Values.accessControl }}{{- if eq .Values.accessControl.type "configmap" }}
+ - name: access-control-volume
+ configMap:
+ name: trino-access-control-volume-coordinator
+ {{- end }}{{- end }}
+ {{- if .Values.resourceGroups }}
+ - name: resource-groups-volume
+ configMap:
+ name: trino-resource-groups-volume-coordinator
+ {{- end }}
+ {{- if or .Values.auth.passwordAuth .Values.auth.groups }}
+ - name: file-authentication-volume
+ secret:
+ {{- if and .Values.auth .Values.auth.passwordAuthSecret }}
+ secretName: {{ .Values.auth.passwordAuthSecret }}
+ {{- else }}
+ secretName: trino-file-authentication
+ {{- end }}
+ items:
+ {{- if .Values.auth.passwordAuth }}
+ - key: password.db
+ path: password.db
+ {{- end }}
+ {{- if .Values.auth.groups }}
+ - key: group.db
+ path: group.db
+ {{- end }}
+ {{- end }}
+ {{- range .Values.secretMounts }}
+ - name: {{ .name }}
+ secret:
+ secretName: {{ .secretName }}
+ {{- end }}
+ {{- range .Values.coordinator.secretMounts }}
+ - name: {{ .name }}
+ secret:
+ secretName: {{ .secretName }}
+ {{- end }}
+ {{- with .Values.coordinator.additionalVolumes }}
+ {{- . | toYaml | nindent 8 }}
+ {{- end }}
+ {{- if .Values.initContainers.coordinator }}
+ initContainers:
+ {{- tpl (toYaml .Values.initContainers.coordinator) . | nindent 6 }}
+ {{- end }}
+ imagePullSecrets:
+ {{- toYaml .Values.imagePullSecrets | nindent 8 }}
+ containers:
+ - name: {{ .Chart.Name }}-coordinator
+ image: {{ include "trino.image" . }}
+ imagePullPolicy: {{ .Values.image.pullPolicy }}
+ env:
+ {{- toYaml .Values.env | nindent 12 }}
+ envFrom:
+ {{- toYaml .Values.envFrom | nindent 12 }}
+ volumeMounts:
+ - mountPath: {{ .Values.server.config.path }}
+ name: config-volume
+ - mountPath: {{ .Values.server.config.path }}/catalog
+ name: catalog-volume
+ - mountPath: {{ .Values.kafka.mountPath }}
+ name: schemas-volume
+ {{- if .Values.accessControl }}{{- if eq .Values.accessControl.type "configmap" }}
+ - mountPath: {{ .Values.server.config.path }}/access-control
+ name: access-control-volume
+ {{- end }}{{- end }}
+ {{- if .Values.resourceGroups }}
+ - mountPath: {{ .Values.server.config.path }}/resource-groups
+ name: resource-groups-volume
+ {{- end }}
+ {{- range .Values.secretMounts }}
+ - name: {{ .name }}
+ mountPath: {{ .path }}
+ {{- end }}
+ {{- range .Values.coordinator.secretMounts }}
+ - name: {{ .name }}
+ mountPath: {{ .path }}
+ {{- end }}
+ {{- if or .Values.auth.passwordAuth .Values.auth.groups }}
+ - mountPath: {{ .Values.server.config.path }}/auth
+ name: file-authentication-volume
+ {{- end }}
+ {{- with .Values.coordinator.additionalVolumeMounts }}
+ {{- . | toYaml | nindent 12 }}
+ {{- end }}
+ ports:
+ - name: http
+ containerPort: {{ .Values.service.port }}
+ protocol: TCP
+ {{- range $key, $value := .Values.coordinator.additionalExposedPorts }}
+ - name: {{ $value.name }}
+ containerPort: {{ $value.port }}
+ protocol: {{ $value.protocol }}
+ {{- end }}
+ livenessProbe:
+ httpGet:
+ path: /v1/info
+ port: http
+ initialDelaySeconds: {{ .Values.coordinator.livenessProbe.initialDelaySeconds | default 20 }}
+ periodSeconds: {{ .Values.coordinator.livenessProbe.periodSeconds | default 10 }}
+ timeoutSeconds: {{ .Values.coordinator.livenessProbe.timeoutSeconds | default 5 }}
+ failureThreshold: {{ .Values.coordinator.livenessProbe.failureThreshold | default 6 }}
+ successThreshold: {{ .Values.coordinator.livenessProbe.successThreshold | default 1 }}
+ readinessProbe:
+ httpGet:
+ path: /v1/info
+ port: http
+ initialDelaySeconds: {{ .Values.coordinator.readinessProbe.initialDelaySeconds | default 20 }}
+ periodSeconds: {{ .Values.coordinator.readinessProbe.periodSeconds | default 10 }}
+ timeoutSeconds: {{ .Values.coordinator.readinessProbe.timeoutSeconds | default 5 }}
+ failureThreshold: {{ .Values.coordinator.readinessProbe.failureThreshold | default 6 }}
+ successThreshold: {{ .Values.coordinator.readinessProbe.successThreshold | default 1 }}
+ resources:
+ {{- toYaml .Values.coordinator.resources | nindent 12 }}
+ {{- if .Values.sidecarContainers.coordinator }}
+ {{- toYaml .Values.sidecarContainers.coordinator | nindent 8 }}
+ {{- end }}
+ {{- with .Values.coordinator.nodeSelector }}
+ nodeSelector:
+ {{- toYaml . | nindent 8 }}
+ {{- end }}
+ {{- with .Values.coordinator.affinity }}
+ affinity:
+ {{- toYaml . | nindent 8 }}
+ {{- end }}
+ {{- with .Values.coordinator.tolerations }}
+ tolerations:
+ {{- toYaml . | nindent 8 }}
+ {{- end }}
diff --git a/terraform/modules/helm/trino/trino-helm-chart/templates/deployment-worker.yaml b/terraform/modules/helm/trino/trino-helm-chart/templates/deployment-worker.yaml
new file mode 100644
index 00000000..5d9dd373
--- /dev/null
+++ b/terraform/modules/helm/trino/trino-helm-chart/templates/deployment-worker.yaml
@@ -0,0 +1,149 @@
+{{- if gt (int .Values.server.workers) 0 }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: {{ template "trino.worker" . }}
+ labels:
+ app: {{ template "trino.name" . }}
+ chart: {{ template "trino.chart" . }}
+ release: {{ .Release.Name }}
+ heritage: {{ .Release.Service }}
+ component: worker
+ {{- if .Values.commonLabels }}
+ {{- tpl (toYaml .Values.commonLabels) . | nindent 4 }}
+ {{- end }}
+spec:
+ {{- if not .Values.server.autoscaling.enabled }}
+ replicas: {{ .Values.server.workers }}
+ {{- end }}
+ selector:
+ matchLabels:
+ app: {{ template "trino.name" . }}
+ release: {{ .Release.Name }}
+ component: worker
+ template:
+ metadata:
+ {{- if .Values.worker.annotations }}
+ annotations:
+ {{- tpl (toYaml .Values.worker.annotations) . | nindent 8 }}
+ {{- end }}
+ labels:
+ app: {{ template "trino.name" . }}
+ release: {{ .Release.Name }}
+ component: worker
+ {{- if .Values.worker.labels }}
+ {{- tpl (toYaml .Values.worker.labels) . | nindent 8 }}
+ {{- end }}
+ {{- if .Values.commonLabels }}
+ {{- tpl (toYaml .Values.commonLabels) . | nindent 8 }}
+ {{- end }}
+ spec:
+ serviceAccountName: {{ include "trino.serviceAccountName" . }}
+ {{- with .Values.securityContext }}
+ securityContext:
+ runAsUser: {{ .runAsUser }}
+ runAsGroup: {{ .runAsGroup }}
+ {{- end }}
+ {{- if .Values.shareProcessNamespace.worker }}
+ shareProcessNamespace: {{ .Values.shareProcessNamespace.worker }}
+ {{- end }}
+ volumes:
+ - name: config-volume
+ configMap:
+ name: {{ template "trino.worker" . }}
+ - name: catalog-volume
+ configMap:
+ name: {{ template "trino.catalog" . }}
+ - name: schemas-volume
+ configMap:
+ name: schemas-volume-worker
+ {{- range .Values.secretMounts }}
+ - name: {{ .name }}
+ secret:
+ secretName: {{ .secretName }}
+ {{- end }}
+ {{- range .Values.worker.secretMounts }}
+ - name: {{ .name }}
+ secret:
+ secretName: {{ .secretName }}
+ {{- end }}
+ {{- with .Values.worker.additionalVolumes }}
+ {{- . | toYaml | nindent 8 }}
+ {{- end }}
+ {{- if .Values.initContainers.worker }}
+ initContainers:
+ {{- tpl (toYaml .Values.initContainers.worker) . | nindent 6 }}
+ {{- end }}
+ imagePullSecrets:
+ {{- toYaml .Values.imagePullSecrets | nindent 8 }}
+ containers:
+ - name: {{ .Chart.Name }}-worker
+ image: {{ include "trino.image" . }}
+ imagePullPolicy: {{ .Values.image.pullPolicy }}
+ env:
+ {{- toYaml .Values.env | nindent 12 }}
+ envFrom:
+ {{- toYaml .Values.envFrom | nindent 12 }}
+ volumeMounts:
+ - mountPath: {{ .Values.server.config.path }}
+ name: config-volume
+ - mountPath: {{ .Values.server.config.path }}/catalog
+ name: catalog-volume
+ - mountPath: {{ .Values.kafka.mountPath }}
+ name: schemas-volume
+ {{- range .Values.secretMounts }}
+ - name: {{ .name }}
+ mountPath: {{ .path }}
+ {{- end }}
+ {{- range .Values.worker.secretMounts }}
+ - name: {{ .name }}
+ mountPath: {{ .path }}
+ {{- end }}
+ {{- with .Values.worker.additionalVolumeMounts }}
+ {{- . | toYaml | nindent 12 }}
+ {{- end }}
+ ports:
+ - name: http
+ containerPort: {{ .Values.service.port }}
+ protocol: TCP
+ {{- range $key, $value := .Values.worker.additionalExposedPorts }}
+ - name: {{ $value.name }}
+ containerPort: {{ $value.port }}
+ protocol: {{ $value.protocol }}
+ {{- end }}
+ livenessProbe:
+ httpGet:
+ path: /v1/info
+ port: http
+ initialDelaySeconds: {{ .Values.worker.livenessProbe.initialDelaySeconds | default 20 }}
+ periodSeconds: {{ .Values.worker.livenessProbe.periodSeconds | default 10 }}
+ timeoutSeconds: {{ .Values.worker.livenessProbe.timeoutSeconds | default 5 }}
+ failureThreshold: {{ .Values.worker.livenessProbe.failureThreshold | default 6 }}
+ successThreshold: {{ .Values.worker.livenessProbe.successThreshold | default 1 }}
+ readinessProbe:
+ httpGet:
+ path: /v1/info
+ port: http
+ initialDelaySeconds: {{ .Values.worker.readinessProbe.initialDelaySeconds | default 20 }}
+ periodSeconds: {{ .Values.worker.readinessProbe.periodSeconds | default 10 }}
+ timeoutSeconds: {{ .Values.worker.readinessProbe.timeoutSeconds | default 5 }}
+ failureThreshold: {{ .Values.worker.readinessProbe.failureThreshold | default 6 }}
+ successThreshold: {{ .Values.worker.readinessProbe.successThreshold | default 1 }}
+ resources:
+ {{- toYaml .Values.worker.resources | nindent 12 }}
+ {{- if .Values.sidecarContainers.worker }}
+ {{- toYaml .Values.sidecarContainers.worker | nindent 8 }}
+ {{- end }}
+ {{- with .Values.worker.nodeSelector }}
+ nodeSelector:
+ {{- toYaml . | nindent 8 }}
+ {{- end }}
+ {{- with .Values.worker.affinity }}
+ affinity:
+ {{- toYaml . | nindent 8 }}
+ {{- end }}
+ {{- with .Values.worker.tolerations }}
+ tolerations:
+ {{- toYaml . | nindent 8 }}
+ {{- end }}
+{{- end }}
diff --git a/terraform/modules/helm/trino/trino-helm-chart/templates/ingress.yaml b/terraform/modules/helm/trino/trino-helm-chart/templates/ingress.yaml
new file mode 100644
index 00000000..b7849748
--- /dev/null
+++ b/terraform/modules/helm/trino/trino-helm-chart/templates/ingress.yaml
@@ -0,0 +1,44 @@
+{{- if .Values.ingress.enabled -}}
+{{- $fullName := include "trino.fullname" . -}}
+{{- $svcPort := .Values.service.port -}}
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+ name: {{ template "trino.coordinator" . }}
+ labels:
+ {{- include "trino.labels" . | nindent 4 }}
+ {{- if .Values.commonLabels }}
+ {{- tpl (toYaml .Values.commonLabels) . | nindent 4 }}
+ {{- end }}
+ {{- with .Values.ingress.annotations }}
+ annotations:
+ {{- toYaml . | nindent 4 }}
+ {{- end }}
+spec:
+ ingressClassName: {{ .Values.ingress.className }}
+ {{- if .Values.ingress.tls }}
+ tls:
+ {{- range .Values.ingress.tls }}
+ - hosts:
+ {{- range .hosts }}
+ - {{ . | quote }}
+ {{- end }}
+ secretName: {{ .secretName }}
+ {{- end }}
+ {{- end }}
+ rules:
+ {{- range .Values.ingress.hosts }}
+ - host: {{ .host | quote }}
+ http:
+ paths:
+ {{- range .paths }}
+ - path: {{ .path }}
+ pathType: {{ .pathType }}
+ backend:
+ service:
+ name: {{ $fullName }}
+ port:
+ number: {{ $svcPort }}
+ {{- end }}
+ {{- end }}
+{{- end }}
diff --git a/terraform/modules/helm/trino/trino-helm-chart/templates/secret.yaml b/terraform/modules/helm/trino/trino-helm-chart/templates/secret.yaml
new file mode 100644
index 00000000..754353a5
--- /dev/null
+++ b/terraform/modules/helm/trino/trino-helm-chart/templates/secret.yaml
@@ -0,0 +1,19 @@
+{{- if or .Values.auth.passwordAuth .Values.auth.groups }}
+apiVersion: v1
+kind: Secret
+metadata:
+ {{- if and .Values.auth .Values.auth.passwordAuthSecret }}
+ name: {{ .Values.auth.passwordAuthSecret }}
+ {{- else }}
+ name: trino-file-authentication
+ {{- end }}
+ labels:
+ {{- include "trino.labels" . | nindent 4 }}
+data:
+{{- if .Values.auth.passwordAuth }}
+ password.db: {{ .Values.auth.passwordAuth | b64enc }}
+{{- end }}
+{{- if .Values.auth.groups}}
+ group.db: {{ .Values.auth.groups | b64enc }}
+{{- end }}
+{{- end }}
diff --git a/terraform/modules/helm/trino/trino-helm-chart/templates/service.yaml b/terraform/modules/helm/trino/trino-helm-chart/templates/service.yaml
new file mode 100644
index 00000000..cfb0b2f3
--- /dev/null
+++ b/terraform/modules/helm/trino/trino-helm-chart/templates/service.yaml
@@ -0,0 +1,26 @@
+apiVersion: v1
+kind: Service
+metadata:
+ name: {{ template "trino.fullname" . }}
+ labels:
+ app: {{ template "trino.name" . }}
+ chart: {{ template "trino.chart" . }}
+ release: {{ .Release.Name }}
+ heritage: {{ .Release.Service }}
+spec:
+ type: {{ .Values.service.type }}
+ ports:
+ - port: {{ .Values.service.port }}
+ targetPort: http
+ protocol: TCP
+ name: http
+ {{- range $key, $value := .Values.coordinator.additionalExposedPorts }}
+ - port: {{ $value.servicePort }}
+ name: {{ $value.name }}
+ targetPort: {{ $value.port }}
+ protocol: {{ $value.protocol }}
+ {{- end }}
+ selector:
+ app: {{ template "trino.name" . }}
+ release: {{ .Release.Name }}
+ component: coordinator
diff --git a/terraform/modules/helm/trino/trino-helm-chart/templates/serviceaccount.yaml b/terraform/modules/helm/trino/trino-helm-chart/templates/serviceaccount.yaml
new file mode 100644
index 00000000..29dfdcc2
--- /dev/null
+++ b/terraform/modules/helm/trino/trino-helm-chart/templates/serviceaccount.yaml
@@ -0,0 +1,12 @@
+{{- if .Values.serviceAccount.create -}}
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+ name: {{ include "trino.serviceAccountName" . }}
+ labels:
+ {{- include "trino.labels" . | nindent 4 }}
+ {{- with .Values.serviceAccount.annotations }}
+ annotations:
+ {{- toYaml . | nindent 4 }}
+ {{- end }}
+{{- end }}
diff --git a/terraform/modules/helm/trino/trino-helm-chart/values.yaml b/terraform/modules/helm/trino/trino-helm-chart/values.yaml
new file mode 100644
index 00000000..05bc1020
--- /dev/null
+++ b/terraform/modules/helm/trino/trino-helm-chart/values.yaml
@@ -0,0 +1,448 @@
+# Default values for trino.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+image:
+ registry: "" # Image registry, defaults to empty, which results in DockerHub usage
+ repository: trinodb/trino # Repository location of the Trino image, typically `organization/imagename`
+ tag: "" # Image tag, defaults to the Trino release version specified as `appVersion` from Chart.yaml
+ digest: "" # Optional digest value of the image specified as `sha256:abcd...`. A specified value overrides `tag`.
+ useRepositoryAsSoleImageReference: false # When true, only the content in `repository` is used as image reference
+ pullPolicy: IfNotPresent
+
+imagePullSecrets:
+ - name: registry-credentials
+
+server:
+ workers: 1
+ node:
+ environment: production
+ dataDir: /data/trino
+ pluginDir: /usr/lib/trino/plugin
+ log:
+ trino:
+ level: INFO
+ config:
+ path: /etc/trino
+ http:
+ port: 8080
+ https:
+ enabled: false
+ port: 8443
+ keystore:
+ path: ""
+ # Trino supports multiple authentication types: PASSWORD, CERTIFICATE, OAUTH2, JWT, KERBEROS
+ # For more info: https://trino.io/docs/current/security/authentication-types.html
+ authenticationType: ""
+ query:
+ maxMemory: "4GB"
+ exchangeManager:
+ name: "filesystem"
+ baseDir: "/tmp/trino-local-file-system-exchange-manager"
+ workerExtraConfig: ""
+ coordinatorExtraConfig: ""
+ autoscaling:
+ enabled: false
+ maxReplicas: 5
+ targetCPUUtilizationPercentage: 50
+ behavior: {}
+ # scaleDown:
+ # stabilizationWindowSeconds: 300
+ # policies:
+ # - type: Percent
+ # value: 100
+ # periodSeconds: 15
+ # scaleUp:
+ # stabilizationWindowSeconds: 0
+ # policies:
+ # - type: Percent
+ # value: 100
+ # periodSeconds: 15
+ # - type: Pods
+ # value: 4
+ # periodSeconds: 15
+ # selectPolicy: Max
+
+accessControl: {}
+ # type: configmap
+ # refreshPeriod: 60s
+ # # Rules file is mounted to /etc/trino/access-control
+ # configFile: "rules.json"
+ # rules:
+ # rules.json: |-
+ # {
+ # "catalogs": [
+ # {
+ # "user": "admin",
+ # "catalog": "(mysql|system)",
+ # "allow": "all"
+ # },
+ # {
+ # "group": "finance|human_resources",
+ # "catalog": "postgres",
+ # "allow": true
+ # },
+ # {
+ # "catalog": "hive",
+ # "allow": "all"
+ # },
+ # {
+ # "user": "alice",
+ # "catalog": "postgresql",
+ # "allow": "read-only"
+ # },
+ # {
+ # "catalog": "system",
+ # "allow": "none"
+ # }
+ # ],
+ # "schemas": [
+ # {
+ # "user": "admin",
+ # "schema": ".*",
+ # "owner": true
+ # },
+ # {
+ # "user": "guest",
+ # "owner": false
+ # },
+ # {
+ # "catalog": "default",
+ # "schema": "default",
+ # "owner": true
+ # }
+ # ]
+ # }
+
+resourceGroups: {}
+ # # Resource groups file is mounted to /etc/trino/resource-groups/resource-groups.json
+ # resourceGroupsConfig: |-
+ # {
+ # "rootGroups": [
+ # {
+ # "name": "global",
+ # "softMemoryLimit": "80%",
+ # "hardConcurrencyLimit": 100,
+ # "maxQueued": 100,
+ # "schedulingPolicy": "fair",
+ # "jmxExport": true,
+ # "subGroups": [
+ # {
+ # "name": "admin",
+ # "softMemoryLimit": "30%",
+ # "hardConcurrencyLimit": 20,
+ # "maxQueued": 10
+ # },
+ # {
+ # "name": "finance_human_resources",
+ # "softMemoryLimit": "20%",
+ # "hardConcurrencyLimit": 15,
+ # "maxQueued": 10
+ # },
+ # {
+ # "name": "general",
+ # "softMemoryLimit": "30%",
+ # "hardConcurrencyLimit": 20,
+ # "maxQueued": 10
+ # },
+ # {
+ # "name": "readonly",
+ # "softMemoryLimit": "10%",
+ # "hardConcurrencyLimit": 5,
+ # "maxQueued": 5
+ # }
+ # ]
+ # }
+ # ],
+ # "selectors": [
+ # {
+ # "user": "admin",
+ # "group": "global.admin"
+ # },
+ # {
+ # "group": "finance|human_resources",
+ # "group": "global.finance_human_resources"
+ # },
+ # {
+ # "user": "alice",
+ # "group": "global.readonly"
+ # },
+ # {
+ # "group": "global.general"
+ # }
+ # ]
+ # }
+
+additionalNodeProperties: {}
+
+additionalConfigProperties: {}
+
+additionalLogProperties: {}
+
+additionalExchangeManagerProperties: {}
+
+eventListenerProperties: {}
+
+additionalCatalogs:
+ lakehouse: |-
+ connector.name=hudi
+ hive.metastore.uri=thrift://localhost:9083
+ hive.s3.aws-access-key=""
+ hive.s3.aws-secret-key=""
+ hive.s3.endpoint=http://localhost:4566
+ hive.s3.ssl.enabled=false
+
+# Array of EnvVar (https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.19/#envvar-v1-core)
+env: []
+
+# Array of EnvFromSource (https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.19/#envfromsource-v1-core)
+envFrom: []
+
+initContainers: {}
+ # coordinator:
+ # - name: init-coordinator
+ # image: busybox:1.28
+ # imagePullPolicy: IfNotPresent
+ # command: ['sh', '-c', "until nslookup myservice.$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local; do echo waiting for myservice; sleep 2; done"]
+ # worker:
+ # - name: init-worker
+ # image: busybox:1.28
+ # command: ['sh', '-c', 'echo The worker is running! && sleep 3600']
+
+sidecarContainers: {}
+# coordinator:
+# - name: side-coordinator
+# image: busybox:1.28
+# imagePullPolicy: IfNotPresent
+# command: ['sleep', '1']
+# worker:
+# - name: side-worker
+# image: busybox:1.28
+# imagePullPolicy: IfNotPresent
+# command: ['sleep', '1']
+
+securityContext:
+ runAsUser: 1000
+ runAsGroup: 1000
+
+shareProcessNamespace:
+ coordinator: false
+ worker: false
+
+service:
+ type: ClusterIP
+ port: 8080
+
+auth: {}
+ # Set username and password
+ # https://trino.io/docs/current/security/password-file.html#file-format
+ # passwordAuth: "username:encrypted-password-with-htpasswd"
+ # or set the name of a secret containing this file in the password.db key
+ # passwordAuthSecret: "trino-password-authentication"
+ # Set users' groups
+ # https://trino.io/docs/current/security/group-file.html#file-format
+ # refreshPeriod: 5s
+ # groups: "group_name:user_1,user_2,user_3"
+
+serviceAccount:
+ # Specifies whether a service account should be created
+ create: false
+ # The name of the service account to use.
+ # If not set and create is true, a name is generated using the fullname template
+ name: ""
+ # Annotations to add to the service account
+ annotations: {}
+
+secretMounts: []
+ # - name: sample-secret
+ # secretName: sample-secret
+ # path: /secrets/sample.json
+
+coordinator:
+ jvm:
+ maxHeapSize: "8G"
+ gcMethod:
+ type: "UseG1GC"
+ g1:
+ heapRegionSize: "32M"
+
+ config:
+ memory:
+ heapHeadroomPerNode: ""
+ query:
+ maxMemoryPerNode: "1GB"
+
+ additionalJVMConfig: {}
+
+ additionalExposedPorts: {}
+
+ resources: {}
+ # We usually recommend not to specify default resources and to leave this as a conscious
+ # choice for the user. This also increases chances charts run on environments with little
+ # resources, such as Minikube. If you do want to specify resources, uncomment the following
+ # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+ # limits:
+ # cpu: 100m
+ # memory: 128Mi
+ # requests:
+ # cpu: 100m
+ # memory: 128Mi
+
+ livenessProbe: {}
+ # initialDelaySeconds: 20
+ # periodSeconds: 10
+ # timeoutSeconds: 5
+ # failureThreshold: 6
+ # successThreshold: 1
+ readinessProbe: {}
+ # initialDelaySeconds: 20
+ # periodSeconds: 10
+ # timeoutSeconds: 5
+ # failureThreshold: 6
+ # successThreshold: 1
+
+ nodeSelector: {}
+
+ tolerations: []
+
+ affinity: {}
+
+ additionalConfigFiles: {}
+
+ additionalVolumes: [] # One or more additional volumes to add to the coordinator.
+ # - name: extras
+ # emptyDir: {}
+
+ additionalVolumeMounts: [] # One or more additional volume mounts to add to the coordinator.
+ # - name: extras
+ # mountPath: /usr/share/extras
+ # readOnly: true
+
+ annotations: {}
+
+ labels: {}
+
+ secretMounts: []
+ # - name: sample-secret
+ # secretName: sample-secret
+ # path: /secrets/sample.json
+
+worker:
+ jvm:
+ maxHeapSize: "8G"
+ gcMethod:
+ type: "UseG1GC"
+ g1:
+ heapRegionSize: "32M"
+
+ config:
+ memory:
+ heapHeadroomPerNode: ""
+ query:
+ maxMemoryPerNode: "1GB"
+
+ additionalJVMConfig: {}
+
+ additionalExposedPorts: {}
+
+ resources: {}
+ # We usually recommend not to specify default resources and to leave this as a conscious
+ # choice for the user. This also increases chances charts run on environments with little
+ # resources, such as Minikube. If you do want to specify resources, uncomment the following
+ # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+ # limits:
+ # cpu: 100m
+ # memory: 128Mi
+ # requests:
+ # cpu: 100m
+ # memory: 128Mi
+
+ livenessProbe: {}
+ # initialDelaySeconds: 20
+ # periodSeconds: 10
+ # timeoutSeconds: 5
+ # failureThreshold: 6
+ # successThreshold: 1
+ readinessProbe: {}
+ # initialDelaySeconds: 20
+ # periodSeconds: 10
+ # timeoutSeconds: 5
+ # failureThreshold: 6
+ # successThreshold: 1
+
+ nodeSelector: {}
+
+ tolerations: []
+
+ affinity: {}
+
+ additionalConfigFiles: {}
+
+ additionalVolumes: [] # One or more additional volume mounts to add to all workers.
+ # - name: extras
+ # emptyDir: {}
+
+ additionalVolumeMounts: [] # One or more additional volume mounts to add to all workers.
+ # - name: extras
+ # mountPath: /usr/share/extras
+ # readOnly: true
+
+ annotations: {}
+
+ labels: {}
+
+ secretMounts: []
+ # - name: sample-secret
+ # secretName: sample-secret
+ # path: /secrets/sample.json
+
+kafka:
+ mountPath: "/etc/trino/schemas"
+ tableDescriptions: {}
+ # Custom kafka table descriptions that will be mounted in mountPath
+ # testschema.json: |-
+ # {
+ # "tableName": "testtable",
+ # "schemaName": "testschema",
+ # "topicName": "testtopic",
+ # "key": {
+ # "dataFormat": "json",
+ # "fields": [
+ # {
+ # "name": "_key",
+ # "dataFormat": "VARCHAR",
+ # "type": "VARCHAR",
+ # "hidden": "false"
+ # }
+ # ]
+ # },
+ # "message": {
+ # "dataFormat": "json",
+ # "fields": [
+ # {
+ # "name": "id",
+ # "mapping": "id",
+ # "type": "BIGINT"
+ # },
+ # {
+ # "name": "test_field",
+ # "mapping": "test_field",
+ # "type": "VARCHAR"
+ # }
+ # ]
+ # }
+ # }
+commonLabels: {} # Labels that get applied to every resource's metadata
+ingress:
+ enabled: false
+ className: ""
+ annotations: {}
+ hosts: []
+ # - host: trino.example.com
+ # paths:
+ # - path: /
+ # pathType: ImplementationSpecific
+ tls: []
+ # - secretName: chart-example-tls
+ # hosts:
+ # - chart-example.local
diff --git a/terraform/modules/helm/trino/trino.yaml.tfpl b/terraform/modules/helm/trino/trino.yaml.tfpl
new file mode 100644
index 00000000..28970425
--- /dev/null
+++ b/terraform/modules/helm/trino/trino.yaml.tfpl
@@ -0,0 +1,441 @@
+# Default values for trino.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+image:
+ registry: ${trino_image.registry} # Image registry, defaults to empty, which results in DockerHub usage
+ repository: ${trino_image.name} # Repository location of the Trino image, typically `organization/imagename`
+ tag: ${trino_image.tag} # Image tag, defaults to the Trino release version specified as `appVersion` from Chart.yaml
+ digest: "" # Optional digest value of the image specified as `sha256:abcd...`. A specified value overrides `tag`.
+ useRepositoryAsSoleImageReference: false # When true, only the content in `repository` is used as image reference
+ pullPolicy: ${trino_image.pullPolicy}
+
+imagePullSecrets:
+ - name: registry-credentials
+
+server:
+ workers: ${trino_workers_count}
+ node:
+ environment: production
+ dataDir: /data/trino
+ pluginDir: /usr/lib/trino/plugin
+ log:
+ trino:
+ level: INFO
+ config:
+ path: /etc/trino
+ http:
+ port: 8080
+ https:
+ enabled: false
+ port: 8443
+ keystore:
+ path: ""
+ # Trino supports multiple authentication types: PASSWORD, CERTIFICATE, OAUTH2, JWT, KERBEROS
+ # For more info: https://trino.io/docs/current/security/authentication-types.html
+ authenticationType: ""
+ query:
+ maxMemory: "4GB"
+ exchangeManager:
+ name: "filesystem"
+ baseDir: "/tmp/trino-local-file-system-exchange-manager"
+ workerExtraConfig: ""
+ coordinatorExtraConfig: ""
+ autoscaling:
+ enabled: false
+ maxReplicas: 5
+ targetCPUUtilizationPercentage: 50
+ behavior: {}
+ # scaleDown:
+ # stabilizationWindowSeconds: 300
+ # policies:
+ # - type: Percent
+ # value: 100
+ # periodSeconds: 15
+ # scaleUp:
+ # stabilizationWindowSeconds: 0
+ # policies:
+ # - type: Percent
+ # value: 100
+ # periodSeconds: 15
+ # - type: Pods
+ # value: 4
+ # periodSeconds: 15
+ # selectPolicy: Max
+
+accessControl: {}
+ # type: configmap
+ # refreshPeriod: 60s
+ # # Rules file is mounted to /etc/trino/access-control
+ # configFile: "rules.json"
+ # rules:
+ # rules.json: |-
+ # {
+ # "catalogs": [
+ # {
+ # "user": "admin",
+ # "catalog": "(mysql|system)",
+ # "allow": "all"
+ # },
+ # {
+ # "group": "finance|human_resources",
+ # "catalog": "postgres",
+ # "allow": true
+ # },
+ # {
+ # "catalog": "hive",
+ # "allow": "all"
+ # },
+ # {
+ # "user": "alice",
+ # "catalog": "postgresql",
+ # "allow": "read-only"
+ # },
+ # {
+ # "catalog": "system",
+ # "allow": "none"
+ # }
+ # ],
+ # "schemas": [
+ # {
+ # "user": "admin",
+ # "schema": ".*",
+ # "owner": true
+ # },
+ # {
+ # "user": "guest",
+ # "owner": false
+ # },
+ # {
+ # "catalog": "default",
+ # "schema": "default",
+ # "owner": true
+ # }
+ # ]
+ # }
+
+resourceGroups: {}
+ # # Resource groups file is mounted to /etc/trino/resource-groups/resource-groups.json
+ # resourceGroupsConfig: |-
+ # {
+ # "rootGroups": [
+ # {
+ # "name": "global",
+ # "softMemoryLimit": "80%",
+ # "hardConcurrencyLimit": 100,
+ # "maxQueued": 100,
+ # "schedulingPolicy": "fair",
+ # "jmxExport": true,
+ # "subGroups": [
+ # {
+ # "name": "admin",
+ # "softMemoryLimit": "30%",
+ # "hardConcurrencyLimit": 20,
+ # "maxQueued": 10
+ # },
+ # {
+ # "name": "finance_human_resources",
+ # "softMemoryLimit": "20%",
+ # "hardConcurrencyLimit": 15,
+ # "maxQueued": 10
+ # },
+ # {
+ # "name": "general",
+ # "softMemoryLimit": "30%",
+ # "hardConcurrencyLimit": 20,
+ # "maxQueued": 10
+ # },
+ # {
+ # "name": "readonly",
+ # "softMemoryLimit": "10%",
+ # "hardConcurrencyLimit": 5,
+ # "maxQueued": 5
+ # }
+ # ]
+ # }
+ # ],
+ # "selectors": [
+ # {
+ # "user": "admin",
+ # "group": "global.admin"
+ # },
+ # {
+ # "group": "finance|human_resources",
+ # "group": "global.finance_human_resources"
+ # },
+ # {
+ # "user": "alice",
+ # "group": "global.readonly"
+ # },
+ # {
+ # "group": "global.general"
+ # }
+ # ]
+ # }
+
+additionalNodeProperties: {}
+
+additionalConfigProperties: {}
+
+additionalLogProperties: {}
+
+additionalExchangeManagerProperties: {}
+
+eventListenerProperties: {}
+
+additionalCatalogs: ${trino_catalogs}
+
+# Array of EnvVar (https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.19/#envvar-v1-core)
+env: []
+
+# Array of EnvFromSource (https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.19/#envfromsource-v1-core)
+envFrom: []
+
+initContainers: {}
+ # coordinator:
+ # - name: init-coordinator
+ # image: busybox:1.28
+ # imagePullPolicy: IfNotPresent
+ # command: ['sh', '-c', "until nslookup myservice.$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local; do echo waiting for myservice; sleep 2; done"]
+ # worker:
+ # - name: init-worker
+ # image: busybox:1.28
+ # command: ['sh', '-c', 'echo The worker is running! && sleep 3600']
+
+sidecarContainers: {}
+# coordinator:
+# - name: side-coordinator
+# image: busybox:1.28
+# imagePullPolicy: IfNotPresent
+# command: ['sleep', '1']
+# worker:
+# - name: side-worker
+# image: busybox:1.28
+# imagePullPolicy: IfNotPresent
+# command: ['sleep', '1']
+
+securityContext:
+ runAsUser: 1000
+ runAsGroup: 1000
+
+shareProcessNamespace:
+ coordinator: false
+ worker: false
+
+service:
+ type: ${trino_service.type}
+ port: ${trino_service.port}
+
+auth: {}
+ # Set username and password
+ # https://trino.io/docs/current/security/password-file.html#file-format
+ # passwordAuth: "username:encrypted-password-with-htpasswd"
+ # or set the name of a secret containing this file in the password.db key
+ # passwordAuthSecret: "trino-password-authentication"
+ # Set users' groups
+ # https://trino.io/docs/current/security/group-file.html#file-format
+ # refreshPeriod: 5s
+ # groups: "group_name:user_1,user_2,user_3"
+
+serviceAccount:
+ # Specifies whether a service account should be created
+ create: false
+ # The name of the service account to use.
+ # If not set and create is true, a name is generated using the fullname template
+ name: ""
+ # Annotations to add to the service account
+ annotations: {}
+
+secretMounts: []
+ # - name: sample-secret
+ # secretName: sample-secret
+ # path: /secrets/sample.json
+
+coordinator:
+ jvm:
+ maxHeapSize: "8G"
+ gcMethod:
+ type: "UseG1GC"
+ g1:
+ heapRegionSize: "32M"
+
+ config:
+ memory:
+ heapHeadroomPerNode: ""
+ query:
+ maxMemoryPerNode: "1GB"
+
+ additionalJVMConfig: {}
+
+ additionalExposedPorts: {}
+
+ resources: {}
+ # We usually recommend not to specify default resources and to leave this as a conscious
+ # choice for the user. This also increases chances charts run on environments with little
+ # resources, such as Minikube. If you do want to specify resources, uncomment the following
+ # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+ # limits:
+ # cpu: 100m
+ # memory: 128Mi
+ # requests:
+ # cpu: 100m
+ # memory: 128Mi
+
+ livenessProbe: {}
+ # initialDelaySeconds: 20
+ # periodSeconds: 10
+ # timeoutSeconds: 5
+ # failureThreshold: 6
+ # successThreshold: 1
+ readinessProbe: {}
+ # initialDelaySeconds: 20
+ # periodSeconds: 10
+ # timeoutSeconds: 5
+ # failureThreshold: 6
+ # successThreshold: 1
+
+ nodeSelector: {}
+
+ tolerations: []
+
+ affinity: {}
+
+ additionalConfigFiles: {}
+
+ additionalVolumes: [] # One or more additional volumes to add to the coordinator.
+ # - name: extras
+ # emptyDir: {}
+
+ additionalVolumeMounts: [] # One or more additional volume mounts to add to the coordinator.
+ # - name: extras
+ # mountPath: /usr/share/extras
+ # readOnly: true
+
+ annotations: {}
+
+ labels: {}
+
+ secretMounts: []
+ # - name: sample-secret
+ # secretName: sample-secret
+ # path: /secrets/sample.json
+
+worker:
+ jvm:
+ maxHeapSize: "8G"
+ gcMethod:
+ type: "UseG1GC"
+ g1:
+ heapRegionSize: "32M"
+
+ config:
+ memory:
+ heapHeadroomPerNode: ""
+ query:
+ maxMemoryPerNode: "1GB"
+
+ additionalJVMConfig: {}
+
+ additionalExposedPorts: {}
+
+ resources: {}
+ # We usually recommend not to specify default resources and to leave this as a conscious
+ # choice for the user. This also increases chances charts run on environments with little
+ # resources, such as Minikube. If you do want to specify resources, uncomment the following
+ # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+ # limits:
+ # cpu: 100m
+ # memory: 128Mi
+ # requests:
+ # cpu: 100m
+ # memory: 128Mi
+
+ livenessProbe: {}
+ # initialDelaySeconds: 20
+ # periodSeconds: 10
+ # timeoutSeconds: 5
+ # failureThreshold: 6
+ # successThreshold: 1
+ readinessProbe: {}
+ # initialDelaySeconds: 20
+ # periodSeconds: 10
+ # timeoutSeconds: 5
+ # failureThreshold: 6
+ # successThreshold: 1
+
+ nodeSelector: {}
+
+ tolerations: []
+
+ affinity: {}
+
+ additionalConfigFiles: {}
+
+ additionalVolumes: [] # One or more additional volume mounts to add to all workers.
+ # - name: extras
+ # emptyDir: {}
+
+ additionalVolumeMounts: [] # One or more additional volume mounts to add to all workers.
+ # - name: extras
+ # mountPath: /usr/share/extras
+ # readOnly: true
+
+ annotations: {}
+
+ labels: {}
+
+ secretMounts: []
+ # - name: sample-secret
+ # secretName: sample-secret
+ # path: /secrets/sample.json
+
+kafka:
+ mountPath: "/etc/trino/schemas"
+ tableDescriptions: {}
+ # Custom kafka table descriptions that will be mounted in mountPath
+ # testschema.json: |-
+ # {
+ # "tableName": "testtable",
+ # "schemaName": "testschema",
+ # "topicName": "testtopic",
+ # "key": {
+ # "dataFormat": "json",
+ # "fields": [
+ # {
+ # "name": "_key",
+ # "dataFormat": "VARCHAR",
+ # "type": "VARCHAR",
+ # "hidden": "false"
+ # }
+ # ]
+ # },
+ # "message": {
+ # "dataFormat": "json",
+ # "fields": [
+ # {
+ # "name": "id",
+ # "mapping": "id",
+ # "type": "BIGINT"
+ # },
+ # {
+ # "name": "test_field",
+ # "mapping": "test_field",
+ # "type": "VARCHAR"
+ # }
+ # ]
+ # }
+ # }
+commonLabels: {} # Labels that get applied to every resource's metadata
+ingress:
+ enabled: false
+ className: ""
+ annotations: {}
+ hosts: []
+ # - host: trino.example.com
+ # paths:
+ # - path: /
+ # pathType: ImplementationSpecific
+ tls: []
+ # - secretName: chart-example-tls
+ # hosts:
+ # - chart-example.local
diff --git a/terraform/modules/helm/trino/variables.tf b/terraform/modules/helm/trino/variables.tf
new file mode 100644
index 00000000..9cb3f9f4
--- /dev/null
+++ b/terraform/modules/helm/trino/variables.tf
@@ -0,0 +1,82 @@
+
+variable "trino_image" {
+ type = object({ name = string, tag = string, registry = string, pullPolicy = string })
+ description = "Trino image metadata"
+ default = {
+ name = "trinodb/trino"
+ tag = "latest"
+ pullPolicy = "IfNotPresent"
+ registry = ""
+ }
+}
+variable "trino_namespace" {
+ type = string
+ description = "Trino namespace"
+ default = "hudi"
+}
+variable "trino_create_namespace" {
+ type = bool
+ description = "Create Trino namespace."
+ default = true
+}
+variable "trino_wait_for_jobs" {
+ type = bool
+ description = "Trino wait for jobs paramater."
+ default = false
+}
+
+variable "trino_chart_install_timeout" {
+ type = number
+ description = "Trino chart install timeout."
+ default = 900
+}
+
+variable "trino_custom_values_yaml" {
+ type = string
+ description = "Trino chart values.yaml path."
+ default = "trino.yaml.tfpl"
+}
+
+variable "trino_workers_count" {
+ default = 1
+ description = "Number of trino workers"
+ type = number
+}
+
+variable "trino_release_name" {
+ type = string
+ description = "Trino release name"
+ default = "trino"
+}
+
+variable "trino_chart_path" {
+ type = string
+ description = "Trino helm chart path."
+ default = "trino-helm-chart"
+}
+variable "trino_chart_depends_on" {
+ type = any
+ description = "List of helm release names that this chart depends on."
+ default = ""
+}
+variable "trino_service" {
+ type = object({ type = string, port = number })
+ description = "Trino service metadata"
+ default = { type = "ClusterIP", port = 8080 }
+}
+variable "trino_lakehouse_metadata" {
+ type = map(string)
+ description = "Trino lakehouse config"
+}
+locals {
+ default_lakehouse_metadata = {
+ "connector.name" = "hudi"
+ "hive.metastore.uri" = "thrift://hms-metastore-app.hudi.svc:9083"
+ "hive.s3.ssl.enabled" = "false"
+ }
+}
+locals {
+ catalogs = {
+ lakehouse = merge(var.trino_lakehouse_metadata, local.default_lakehouse_metadata)
+ }
+}
diff --git a/terraform/modules/helm/unified_helm/obsrv/Chart.yaml b/terraform/modules/helm/unified_helm/obsrv/Chart.yaml
index 8a9a9184..220aeda1 100644
--- a/terraform/modules/helm/unified_helm/obsrv/Chart.yaml
+++ b/terraform/modules/helm/unified_helm/obsrv/Chart.yaml
@@ -40,9 +40,9 @@ dependencies:
- name: master-data-processor
version: 0.1.2
condition: master-data-processor.enabled
- - name: merged-pipeline
+ - name: unified-pipeline
version: 0.1.2
- condition: merged-pipeline.enabled
+ condition: unified-pipeline.enabled
- name: postgresql
version: 12.2.7
condition: postgresql.enabled
diff --git a/terraform/modules/helm/unified_helm/obsrv/charts/command-api/values.yaml b/terraform/modules/helm/unified_helm/obsrv/charts/command-api/values.yaml
index ee006d0d..6d1c5b0a 100644
--- a/terraform/modules/helm/unified_helm/obsrv/charts/command-api/values.yaml
+++ b/terraform/modules/helm/unified_helm/obsrv/charts/command-api/values.yaml
@@ -12,9 +12,9 @@ service_config: |
namespace: flink
reinstall_sleep_time: 3
jobs:
- - name: "PipelineMergedJob"
- release_name: merged-pipeline
- job_manager_url: "merged-pipeline-jobmanager.flink.svc.cluster.local:8081"
+ - name: "UnifiedPipelineJob"
+ release_name: unified-pipeline
+ job_manager_url: "unified-pipeline-jobmanager.flink.svc.cluster.local:8081"
- name: "MasterDataProcessorJob"
release_name: master-data-processor
job_manager_url: "master-data-processor-jobmanager.flink.svc.cluster.local:8081"
diff --git a/terraform/modules/helm/unified_helm/obsrv/charts/master-data-processor/templates/deployment.yaml b/terraform/modules/helm/unified_helm/obsrv/charts/master-data-processor/templates/deployment.yaml
index fa4728e9..49d9d0ac 100644
--- a/terraform/modules/helm/unified_helm/obsrv/charts/master-data-processor/templates/deployment.yaml
+++ b/terraform/modules/helm/unified_helm/obsrv/charts/master-data-processor/templates/deployment.yaml
@@ -128,7 +128,7 @@ data:
output.duplicate.topic = ${job.env}".masterdata.failed"
output.denorm.topic = ${job.env}".masterdata.denorm"
output.transform.topic = ${job.env}".masterdata.transform"
- output.transform.failed.topic = ${job.env}".masterdata.transform.failed"
+ output.transform.failed.topic = ${job.env}".masterdata.failed"
stats.topic = ${job.env}".masterdata.stats"
groupId = ${job.env}"-masterdata-pipeline-group"
diff --git a/terraform/modules/helm/unified_helm/obsrv/charts/master-data-processor/values.yaml b/terraform/modules/helm/unified_helm/obsrv/charts/master-data-processor/values.yaml
index 49b8cde7..d3497e87 100644
--- a/terraform/modules/helm/unified_helm/obsrv/charts/master-data-processor/values.yaml
+++ b/terraform/modules/helm/unified_helm/obsrv/charts/master-data-processor/values.yaml
@@ -157,6 +157,7 @@ master-data-processor:
output.unique.topic = ${job.env}".masterdata.unique"
output.duplicate.topic = ${job.env}".masterdata.duplicate"
output.transform.topic = ${job.env}".masterdata.transform"
+ output.transform.failed.topic = ${job.env}".masterdata.failed"
stats.topic = ${job.env}".masterdata.stats"
groupId = ${job.env}"-masterdata-pipeline-group"
diff --git a/terraform/modules/helm/unified_helm/obsrv/charts/merged-pipeline/Chart.yaml b/terraform/modules/helm/unified_helm/obsrv/charts/merged-pipeline/Chart.yaml
index eb70f976..f8f30dff 100644
--- a/terraform/modules/helm/unified_helm/obsrv/charts/merged-pipeline/Chart.yaml
+++ b/terraform/modules/helm/unified_helm/obsrv/charts/merged-pipeline/Chart.yaml
@@ -1,5 +1,5 @@
apiVersion: v2
-name: merged-pipeline
+name: unified-pipeline
description: A Helm chart for Kubernetes
# A chart can be either an 'application' or a 'library' chart.
diff --git a/terraform/modules/helm/unified_helm/obsrv/charts/merged-pipeline/templates/deployment.yaml b/terraform/modules/helm/unified_helm/obsrv/charts/merged-pipeline/templates/deployment.yaml
index 29b9b08b..f3ff2074 100644
--- a/terraform/modules/helm/unified_helm/obsrv/charts/merged-pipeline/templates/deployment.yaml
+++ b/terraform/modules/helm/unified_helm/obsrv/charts/merged-pipeline/templates/deployment.yaml
@@ -1,5 +1,5 @@
---
-# Source: obsrv-chart/charts/merged-pipeline/templates/flink_job_configmap.yaml
+# Source: obsrv-chart/charts/unified-pipeline/templates/flink_job_configmap.yaml
apiVersion: v1
data:
base-config: |+
@@ -71,7 +71,7 @@ data:
taskmanager.memory.process.size: 1700m
jobmanager.memory.process.size: 1600m
state.savepoints.dir: file:///tmp
- job_classname: org.sunbird.obsrv.pipeline.task.MergedPipelineStreamTask
+ job_classname: org.sunbird.obsrv.pipeline.task.UnifiedPipelineStreamTask
log4j_console_properties: |
# This affects logging for both user code and Flink
rootLogger.level = INFO
@@ -102,7 +102,7 @@ data:
# Suppress the irrelevant (wrong) warnings from the Netty channel handler
logger.netty.name = org.apache.flink.shaded.akka.org.jboss.netty.channel.DefaultChannelPipeline
logger.netty.level = OFF
- merged-pipeline: |
+ unified-pipeline: |
include file("/data/flink/conf/baseconfig.conf")
kafka {
input.topic = ${job.env}".ingest"
@@ -117,6 +117,7 @@ data:
output.denorm.topic = ${job.env}".denorm"
output.denorm.failed.topic = ${job.env}".denorm.failed"
output.transform.topic = ${job.env}".transform"
+ output.transform.failed.topic = ${job.env}".failed"
stats.topic = ${job.env}".stats"
groupId = ${job.env}"-single-pipeline-group"
producer {
@@ -142,18 +143,18 @@ kind: ConfigMap
metadata:
labels:
app: flink
- name: merged-pipeline-config
+ name: unified-pipeline-config
namespace: flink
---
-# Source: obsrv-chart/charts/merged-pipeline/templates/deployment.yaml
+# Source: obsrv-chart/charts/unified-pipeline/templates/deployment.yaml
apiVersion: v1
kind: Service
metadata:
labels:
app: flink
- component: merged-pipeline-jobmanager
- name: merged-pipeline-jobmanager
+ component: unified-pipeline-jobmanager
+ name: unified-pipeline-jobmanager
namespace: flink
spec:
ports:
@@ -169,14 +170,14 @@ spec:
port: 9250
selector:
app: flink
- component: merged-pipeline-jobmanager
+ component: unified-pipeline-jobmanager
type: ClusterIP
---
-# Source: obsrv-chart/charts/merged-pipeline/templates/deployment.yaml
+# Source: obsrv-chart/charts/unified-pipeline/templates/deployment.yaml
apiVersion: v1
kind: Service
metadata:
- name: merged-pipeline-jobmanager-webui
+ name: unified-pipeline-jobmanager-webui
namespace: flink
spec:
ports:
@@ -186,17 +187,17 @@ spec:
targetPort: 8081
selector:
app: flink
- component: merged-pipeline-jobmanager
+ component: unified-pipeline-jobmanager
type: ClusterIP
---
-# Source: obsrv-chart/charts/merged-pipeline/templates/deployment.yaml
+# Source: obsrv-chart/charts/unified-pipeline/templates/deployment.yaml
apiVersion: v1
kind: Service
metadata:
labels:
app: flink
- component: merged-pipeline-taskmanager
- name: merged-pipeline-taskmanager
+ component: unified-pipeline-taskmanager
+ name: unified-pipeline-taskmanager
namespace: flink
spec:
ports:
@@ -204,29 +205,29 @@ spec:
port: 9251
selector:
app: flink
- component: merged-pipeline-taskmanager
+ component: unified-pipeline-taskmanager
type: ClusterIP
---
---
-# Source: obsrv-chart/charts/merged-pipeline/templates/deployment.yaml
+# Source: obsrv-chart/charts/unified-pipeline/templates/deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
- name: merged-pipeline-taskmanager
+ name: unified-pipeline-taskmanager
namespace: flink
spec:
replicas: 1
selector:
matchLabels:
app: flink
- component: merged-pipeline-taskmanager
+ component: unified-pipeline-taskmanager
template:
metadata:
labels:
azure-extensions-usage-release-identifier: obsrv-base
app: flink
- component: merged-pipeline-taskmanager
+ component: unified-pipeline-taskmanager
system.processing: "true"
spec:
containers:
@@ -235,21 +236,21 @@ spec:
- -Dfs.azure.account.key.{{ .Values.global.azure_storage_account_name }}.blob.core.windows.net={{ .Values.global.azure_storage_account_key }}
- -Dweb.submit.enable=false
- -Dmetrics.reporter.prom.class=org.apache.flink.metrics.prometheus.PrometheusReporter
- - -Dmetrics.reporter.prom.host=merged-pipeline-taskmanager
+ - -Dmetrics.reporter.prom.host=unified-pipeline-taskmanager
- -Dmetrics.reporter.prom.port=9251-9260
- - -Djobmanager.rpc.address=merged-pipeline-jobmanager
+ - -Djobmanager.rpc.address=unified-pipeline-jobmanager
- -Dtaskmanager.rpc.port=6122
- --config.file.path
- - /data/flink/conf/merged-pipeline.conf
+ - /data/flink/conf/unified-pipeline.conf
command:
- /opt/flink/bin/taskmanager.sh
- {{ if .Values.global.azure.images.merged_pipeline }}
- image: "{{ .Values.global.azure.images.merged_pipeline.registry }}/{{ .Values.global.azure.images.merged_pipeline.image }}:{{ .Values.global.azure.images.merged_pipeline.digest }}"
+ {{ if .Values.global.azure.images.unified_pipeline }}
+ image: "{{ .Values.global.azure.images.unified_pipeline.registry }}/{{ .Values.global.azure.images.unified_pipeline.image }}:{{ .Values.global.azure.images.unified_pipeline.digest }}"
{{ else }}
- image: sanketikahub/merged-pipeline:release-0.5.0_RC26
+ image: sanketikahub/unified-pipeline:release-0.5.0_RC26
{{ end }}
imagePullPolicy: IfNotPresent
- name: merged-pipeline-taskmanager
+ name: unified-pipeline-taskmanager
ports:
- containerPort: 6122
name: rpc
@@ -268,9 +269,9 @@ spec:
- mountPath: /data/flink/conf/baseconfig.conf
name: flink-config-volume
subPath: base-config.conf
- - mountPath: /data/flink/conf/merged-pipeline.conf
+ - mountPath: /data/flink/conf/unified-pipeline.conf
name: flink-config-volume
- subPath: merged-pipeline.conf
+ subPath: unified-pipeline.conf
workingDir:
volumes:
- configMap:
@@ -281,22 +282,22 @@ spec:
path: log4j-console.properties
- key: base-config
path: base-config.conf
- - key: merged-pipeline
- path: merged-pipeline.conf
- name: merged-pipeline-config
+ - key: unified-pipeline
+ path: unified-pipeline.conf
+ name: unified-pipeline-config
name: flink-config-volume
---
-# Source: obsrv-chart/charts/merged-pipeline/templates/deployment.yaml
+# Source: obsrv-chart/charts/unified-pipeline/templates/deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
- name: merged-pipeline-jobmanager
+ name: unified-pipeline-jobmanager
namespace: flink
spec:
selector:
matchLabels:
app: flink
- component: merged-pipeline-jobmanager
+ component: unified-pipeline-jobmanager
azure-extensions-usage-release-identifier: obsrv-base
template:
metadata:
@@ -305,7 +306,7 @@ spec:
prometheus.io/scrape: "true"
labels:
app: flink
- component: merged-pipeline-jobmanager
+ component: unified-pipeline-jobmanager
azure-extensions-usage-release-identifier: obsrv-base
spec:
initContainers:
@@ -340,26 +341,26 @@ spec:
- args:
- start-foreground
- -Dfs.azure.account.key.{{ .Values.global.azure_storage_account_name }}.blob.core.windows.net={{ .Values.global.azure_storage_account_key }}
- - --job-classname=org.sunbird.obsrv.pipeline.task.MergedPipelineStreamTask
+ - --job-classname=org.sunbird.obsrv.pipeline.task.UnifiedPipelineStreamTask
- -Dweb.submit.enable=false
- -Dmetrics.reporter.prom.class=org.apache.flink.metrics.prometheus.PrometheusReporter
- -Dmetrics.reporter.prom.port=9250
- - -Djobmanager.rpc.address=merged-pipeline-jobmanager
+ - -Djobmanager.rpc.address=unified-pipeline-jobmanager
- -Djobmanager.rpc.port=6123
- -Dparallelism.default=1
- -Dblob.server.port=6124
- -Dqueryable-state.server.ports=6125
- --config.file.path
- - /data/flink/conf/merged-pipeline.conf
+ - /data/flink/conf/unified-pipeline.conf
command:
- /opt/flink/bin/standalone-job.sh
- {{ if .Values.global.azure.images.merged_pipeline }}
- image: "{{ .Values.global.azure.images.merged_pipeline.registry }}/{{ .Values.global.azure.images.merged_pipeline.image }}:{{ .Values.global.azure.images.merged_pipeline.digest }}"
+ {{ if .Values.global.azure.images.unified_pipeline }}
+ image: "{{ .Values.global.azure.images.unified_pipeline.registry }}/{{ .Values.global.azure.images.unified_pipeline.image }}:{{ .Values.global.azure.images.unified_pipeline.digest }}"
{{ else }}
- image: sanketikahub/merged-pipeline:release-0.5.0_RC26
+ image: sanketikahub/unified-pipeline:release-0.5.0_RC26
{{ end }}
imagePullPolicy: IfNotPresent
- name: merged-pipeline-jobmanager
+ name: unified-pipeline-jobmanager
ports:
- containerPort: 6123
name: rpc
@@ -381,9 +382,9 @@ spec:
- mountPath: /data/flink/conf/baseconfig.conf
name: flink-config-volume
subPath: base-config.conf
- - mountPath: /data/flink/conf/merged-pipeline.conf
+ - mountPath: /data/flink/conf/unified-pipeline.conf
name: flink-config-volume
- subPath: merged-pipeline.conf
+ subPath: unified-pipeline.conf
- mountPath: /opt/flink/conf/log4j-console.properties
name: flink-config-volume
subPath: log4j-console.properties
@@ -396,10 +397,10 @@ spec:
path: flink-conf.yaml
- key: base-config
path: base-config.conf
- - key: merged-pipeline
- path: merged-pipeline.conf
+ - key: unified-pipeline
+ path: unified-pipeline.conf
- key: log4j_console_properties
path: log4j-console.properties
- name: merged-pipeline-config
+ name: unified-pipeline-config
name: flink-config-volume
---
diff --git a/terraform/modules/helm/unified_helm/obsrv/charts/merged-pipeline/values.yaml b/terraform/modules/helm/unified_helm/obsrv/charts/merged-pipeline/values.yaml
index d2c0b221..118a1316 100644
--- a/terraform/modules/helm/unified_helm/obsrv/charts/merged-pipeline/values.yaml
+++ b/terraform/modules/helm/unified_helm/obsrv/charts/merged-pipeline/values.yaml
@@ -2,7 +2,7 @@ namespace: "flink"
imagepullsecrets: ""
image:
registry: sanketikahub
- repository: merged-pipeline
+ repository: unified-pipeline
tag: release-0.5.0_RC23
serviceMonitor:
enabled: false
@@ -142,8 +142,8 @@ base_config: |
port = "9042"
}
-merged-pipeline:
- merged-pipeline: |+
+unified-pipeline:
+ unified-pipeline: |+
include file("/data/flink/conf/baseconfig.conf")
kafka {
input.topic = ${job.env}".ingest"
@@ -158,6 +158,7 @@ merged-pipeline:
output.denorm.topic = ${job.env}".denorm"
output.denorm.failed.topic = ${job.env}".denorm.failed"
output.transform.topic = ${job.env}".transform"
+ output.transform.failed.topic = ${job.env}".failed"
stats.topic = ${job.env}".stats"
groupId = ${job.env}"-single-pipeline-group"
producer {
@@ -193,7 +194,7 @@ merged-pipeline:
taskmanager.memory.process.size: 1700m
jobmanager.memory.process.size: 1600m
state.savepoints.dir: file:///tmp
- job_classname: org.sunbird.obsrv.pipeline.task.MergedPipelineStreamTask
+ job_classname: org.sunbird.obsrv.pipeline.task.UnifiedPipelineStreamTask
serviceAccount:
# Specifies whether a service account should be created
diff --git a/terraform/modules/helm/unified_helm/obsrv/values.yaml b/terraform/modules/helm/unified_helm/obsrv/values.yaml
index 9dc108d6..dc21db97 100644
--- a/terraform/modules/helm/unified_helm/obsrv/values.yaml
+++ b/terraform/modules/helm/unified_helm/obsrv/values.yaml
@@ -41,10 +41,10 @@ global:
# digest: sha256:77bdba3135998baadc20015e00a9742eebac52167b90c3e46d0c339a2d668b12
# image: os-shell
# registry: docker.io/bitnami
- merged_pipeline:
+ unified_pipeline:
# tag: 1.0.2-GA
digest: 1.0.3-GA
- image: merged-pipeline
+ image: unified-pipeline
registry: *sanketika_docker_registry
master_data_processor:
# tag: 1.0.2-GA
@@ -377,9 +377,9 @@ druid-raw-cluster:
eks.amazonaws.com/role-arn: arn:aws:iam::725876873105:role/dev-obsrv-test-druid-raw-sa-iam-role
name: druid-raw-sa
-merged-pipeline:
+unified-pipeline:
enabled: true
- name: merged-pipeline
+ name: unified-pipeline
namespace: flink
env: *global-env
checkpoint_store_type: *global-cloud-storage-provider