diff --git a/terraform/aws/main.tf b/terraform/aws/main.tf index 016f44ce..cc044e57 100644 --- a/terraform/aws/main.tf +++ b/terraform/aws/main.tf @@ -140,9 +140,9 @@ module "flink" { building_block = var.building_block flink_container_registry = var.flink_container_registry flink_image_tag = var.flink_image_tag - flink_merged_pipeline_release_names = var.flink_merged_pipeline_release_names + flink_unified_pipeline_release_names = var.flink_unified_pipeline_release_names flink_release_names = var.flink_release_names - merged_pipeline_enabled = var.merged_pipeline_enabled + unified_pipeline_enabled = var.unified_pipeline_enabled flink_checkpoint_store_type = var.flink_checkpoint_store_type flink_chart_depends_on = [module.kafka, module.postgresql_migration, module.redis_dedup, module.redis_denorm] postgresql_obsrv_username = module.postgresql.postgresql_obsrv_username @@ -226,6 +226,12 @@ module "dataset_api" { dataset_api_namespace = module.eks.dataset_api_namespace s3_bucket = module.s3.s3_bucket service_type = var.service_type + enable_lakehouse = var.enable_lakehouse + lakehouse_host = var.lakehouse_host + lakehouse_port = var.lakehouse_port + lakehouse_catalog = var.lakehouse_catalog + lakehouse_schema = var.lakehouse_schema + lakehouse_default_user = var.lakehouse_default_user } module "secor" { @@ -294,6 +300,7 @@ module "command_service" { postgresql_obsrv_user_password = module.postgresql.postgresql_obsrv_user_password postgresql_obsrv_database = module.postgresql.postgresql_obsrv_database flink_namespace = module.flink.flink_namespace + enable_lakehouse = var.enable_lakehouse } module "postgresql_migration" { @@ -308,4 +315,66 @@ module "postgresql_migration" { postgresql_druid_raw_user_password = module.postgresql.postgresql_druid_raw_user_password postgresql_obsrv_user_password = module.postgresql.postgresql_obsrv_user_password data_encryption_key = resource.random_string.data_encryption_key.result + postgresql_hms_user_password = module.postgresql.postgresql_hms_user_password + enable_lakehouse = var.enable_lakehouse } + +module "trino" { + source = "../modules/helm/trino" + count = var.enable_lakehouse ? 1 : 0 + trino_namespace = var.hudi_namespace + trino_lakehouse_metadata = { + "hive.s3.aws-access-key" = var.create_velero_user ? module.iam[0].s3_access_key : var.velero_aws_access_key_id + "hive.s3.aws-secret-key" = var.create_velero_user ? module.iam[0].s3_secret_key : var.velero_aws_secret_access_key + } +} + +module "hms" { + source = "../modules/helm/hive_meta_store" + count = var.enable_lakehouse ? 1 : 0 + hms_namespace = var.hudi_namespace + hms_db_metadata = { + "DATABASE_HOST" = "postgresql-hl.postgresql.svc" + "DATABASE_DB" = module.postgresql.postgresql_hms_database + "DATABASE_USER" = module.postgresql.postgresql_hms_username + "DATABASE_PASSWORD" = module.postgresql.postgresql_hms_user_password + "WAREHOUSE_DIR" = "s3a://${module.s3[0].s3_bucket}/${var.hudi_prefix_path}/" + "THRIFT_PORT" = "9083" + } + hadoop_metadata = { + "fs.s3a.access.key" = var.create_velero_user ? module.iam[0].s3_access_key : var.velero_aws_access_key_id + "fs.s3a.secret.key" = var.create_velero_user ? module.iam[0].s3_secret_key : var.velero_aws_secret_access_key + } +} + +module "lakehouse-connector" { + source = "../modules/helm/lakehouse-connector" + count = var.enable_lakehouse ? 1 : 0 + env = var.env + building_block = var.building_block + flink_container_registry = var.flink_container_registry + flink_lakehouse_image_tag = var.flink_lakehouse_image_tag + flink_image_name = var.flink_image_name + flink_checkpoint_store_type = var.flink_checkpoint_store_type + flink_chart_depends_on = [module.kafka, module.postgresql_migration, module.redis_dedup, module.redis_denorm] + postgresql_obsrv_username = module.postgresql.postgresql_obsrv_username + postgresql_obsrv_user_password = module.postgresql.postgresql_obsrv_user_password + postgresql_obsrv_database = module.postgresql.postgresql_obsrv_database + checkpoint_base_url = "s3://${module.s3[0].checkpoint_storage_bucket}" + denorm_redis_namespace = module.redis_denorm.redis_namespace + denorm_redis_release_name = module.redis_denorm.redis_release_name + dedup_redis_namespace = module.redis_dedup.redis_namespace + dedup_redis_release_name = module.redis_dedup.redis_release_name + flink_sa_annotations = "eks.amazonaws.com/role-arn: ${module.eks.flink_sa_iam_role}" + flink_namespace = module.eks.flink_namespace + postgresql_service_name = module.postgresql.postgresql_service_name + enable_lakehouse = var.enable_lakehouse + postgresql_hms_username = module.postgresql.postgresql_hms_username + postgresql_hms_user_password = module.postgresql.postgresql_hms_user_password + hudi_bucket = module.s3[0].s3_bucket + hudi_prefix_path = var.hudi_prefix_path + hadoop_metadata = { + "fs.s3a.access.key" = var.create_velero_user ? module.iam[0].s3_access_key : var.velero_aws_access_key_id + "fs.s3a.secret.key" = var.create_velero_user ? module.iam[0].s3_secret_key : var.velero_aws_secret_access_key + } +} \ No newline at end of file diff --git a/terraform/aws/variables.tf b/terraform/aws/variables.tf index 071b5096..c83e90a0 100644 --- a/terraform/aws/variables.tf +++ b/terraform/aws/variables.tf @@ -141,17 +141,17 @@ variable "flink_release_names" { } } -variable "flink_merged_pipeline_release_names" { +variable "flink_unified_pipeline_release_names" { description = "Create release names" type = map(string) default = { - merged-pipeline = "merged-pipeline" + unified-pipeline = "unified-pipeline" master-data-processor = "master-data-processor" } } -variable "merged_pipeline_enabled" { - description = "Toggle to deploy merged pipeline" +variable "unified_pipeline_enabled" { + description = "Toggle to deploy unified pipeline" type = bool default = true } @@ -170,3 +170,64 @@ variable "secor_image_tag" { type = string description = "secor image version" } + + +variable "hudi_namespace" { + type = string + default = "hudi" + description = "Apache Hudi namespace" +} + +variable "hudi_prefix_path" { + type = string + description = "Hudi prefix path" + default = "hudi" +} + +variable "enable_lakehouse" { + type = bool + description = "Toggle to install hudi components (hms, trino and flink job)" +} + +variable "lakehouse_host" { + type = string + description = "Lakehouse Host" + default = "http://trino.hudi.svc.cluster.local" +} + +variable "lakehouse_port" { + type = string + description = "Trino port" + default = "8080" +} + +variable "lakehouse_catalog" { + type = string + description = "Lakehouse Catalog name" + default = "lakehouse" +} + +variable "lakehouse_schema" { + type = string + description = "Lakehouse Schema name" + default = "hms" +} + +variable "lakehouse_default_user" { + type = string + description = "Lakehouse default user" + default = "admin" +} + + +variable "flink_image_name" { + type = string + description = "Flink image name." + default = "lakehouse-connector" +} + +variable "flink_lakehouse_image_tag" { + type = string + description = "Flink lakehouse image tag." + default = "1.0.0" +} diff --git a/terraform/aws/vars/cluster_overrides.tfvars b/terraform/aws/vars/cluster_overrides.tfvars index e7ca3a91..11225b78 100644 --- a/terraform/aws/vars/cluster_overrides.tfvars +++ b/terraform/aws/vars/cluster_overrides.tfvars @@ -32,3 +32,6 @@ dataset_api_image_tag = "1.0.2-GA" flink_image_tag = "1.0.1-GA" secor_image_tag = "1.0.0-GA" superset_image_tag = "3.0.2" + +enable_lakehouse = false +flink_lakehouse_image_tag = "1.0.1" diff --git a/terraform/gcp/main.tf b/terraform/gcp/main.tf index 410564e4..cec40ef5 100644 --- a/terraform/gcp/main.tf +++ b/terraform/gcp/main.tf @@ -390,9 +390,9 @@ module "flink" { building_block = var.building_block flink_container_registry = var.flink_container_registry flink_image_tag = var.flink_image_tag - flink_merged_pipeline_release_names = var.flink_merged_pipeline_release_names + flink_unified_pipeline_release_names = var.flink_unified_pipeline_release_names flink_release_names = var.flink_release_names - merged_pipeline_enabled = var.merged_pipeline_enabled + unified_pipeline_enabled = var.unified_pipeline_enabled flink_checkpoint_store_type = var.flink_checkpoint_store_type flink_chart_depends_on = [ module.kafka, module.postgresql_migration, module.redis_dedup, module.redis_denorm ] postgresql_obsrv_username = module.postgresql.postgresql_obsrv_username diff --git a/terraform/gcp/variables.tf b/terraform/gcp/variables.tf index 165d3771..eb60f2af 100644 --- a/terraform/gcp/variables.tf +++ b/terraform/gcp/variables.tf @@ -226,17 +226,17 @@ variable "flink_release_names" { } } -variable "flink_merged_pipeline_release_names" { +variable "flink_unified_pipeline_release_names" { description = "Create release names" type = map(string) default = { - merged-pipeline = "merged-pipeline" + unified-pipeline = "unified-pipeline" master-data-processor = "master-data-processor" } } -variable "merged_pipeline_enabled" { - description = "Toggle to deploy merged pipeline" +variable "unified_pipeline_enabled" { + description = "Toggle to deploy unified pipeline" type = bool default = true } diff --git a/terraform/modules/helm/command_service/command-service-helm-chart/templates/command_api_configmap.yml b/terraform/modules/helm/command_service/command-service-helm-chart/templates/command_api_configmap.yml index a1b4e9dd..cbe672ca 100644 --- a/terraform/modules/helm/command_service/command-service-helm-chart/templates/command_api_configmap.yml +++ b/terraform/modules/helm/command_service/command-service-helm-chart/templates/command_api_configmap.yml @@ -9,3 +9,6 @@ metadata: data: service_config: | {{ .Values.service_config | indent 4 }} + {{ if .Values.enable_lakehouse }} +{{ toYaml .Values.hudi_jobs | indent 8 }} + {{ end }} \ No newline at end of file diff --git a/terraform/modules/helm/command_service/command-service-helm-chart/values.yaml b/terraform/modules/helm/command_service/command-service-helm-chart/values.yaml index 059727b6..0aad64f5 100644 --- a/terraform/modules/helm/command_service/command-service-helm-chart/values.yaml +++ b/terraform/modules/helm/command_service/command-service-helm-chart/values.yaml @@ -8,21 +8,14 @@ cpu_requests: 0.1 cpu_limits: 0.1 memory_requests: 512Mi memory_limits: 512Mi +enable_lakehouse: false + +hudi_jobs: + - name: "LakehouseConnectorJob" + release_name: lakehouse-connector + job_manager_url: "lakehouse-connector-jobmanager.flink.svc.cluster.local:8081" service_config: | - flink: - namespace: flink - reinstall_sleep_time: 3 - jobs: - - name: "PipelineMergedJob" - release_name: merged-pipeline - job_manager_url: "merged-pipeline-jobmanager.flink.svc.cluster.local:8081" - - name: "MasterDataProcessorJob" - release_name: master-data-processor - job_manager_url: "master-data-processor-jobmanager.flink.svc.cluster.local:8081" - - name: "KafkaConnectorJob" - release_name: kafka-connector - job_manager_url: "kafka-connector-jobmanager.flink.svc.cluster.local:8081" commands: RESTART_PIPELINE: @@ -37,6 +30,21 @@ service_config: | database: obsrv helm_charts_base_dir: /app/helm-charts + + flink: + namespace: flink + reinstall_sleep_time: 3 + jobs: + - name: "UnifiedPipelineJob" + release_name: unified-pipeline + job_manager_url: "unified-pipeline-jobmanager.flink.svc.cluster.local:8081" + - name: "MasterDataProcessorJob" + release_name: master-data-processor + job_manager_url: "master-data-processor-jobmanager.flink.svc.cluster.local:8081" + - name: "KafkaConnectorJob" + release_name: kafka-connector + job_manager_url: "kafka-connector-jobmanager.flink.svc.cluster.local:8081" + rbac: # kubernetes.rest-service.exposed.type: NodePort requires diff --git a/terraform/modules/helm/command_service/command_service.yaml.tfpl b/terraform/modules/helm/command_service/command_service.yaml.tfpl index 022d8426..fd248e07 100644 --- a/terraform/modules/helm/command_service/command_service.yaml.tfpl +++ b/terraform/modules/helm/command_service/command_service.yaml.tfpl @@ -5,13 +5,26 @@ image: tag: ${command_service_image_tag} service_config: | + + commands: + RESTART_PIPELINE: + workflow: + - RESTART_PIPELINE_JOBS + + postgres: + db_host: postgresql-hl.postgresql.svc.cluster.local + db_port: 5432 + db_user: ${postgresql_obsrv_username} + db_password: ${postgresql_obsrv_user_password} + database: ${postgresql_obsrv_database} + flink: namespace: ${flink_namespace} reinstall_sleep_time: 3 jobs: - - name: "PipelineMergedJob" - release_name: merged-pipeline - job_manager_url: "merged-pipeline-jobmanager.flink.svc.cluster.local:8081" + - name: "UnifiedPipelineJob" + release_name: unified-pipeline + job_manager_url: "unified-pipeline-jobmanager.flink.svc.cluster.local:8081" - name: "MasterDataProcessor" release_name: master-data-processor job_manager_url: "master-data-processor-jobmanager.flink.svc.cluster.local:8081" @@ -19,17 +32,14 @@ service_config: | release_name: kafka-connector job_manager_url: "kafka-connector-jobmanager.flink.svc.cluster.local:8081" - commands: - RESTART_PIPELINE: - workflow: - - RESTART_PIPELINE_JOBS - postgres: - db_host: postgresql-hl.postgresql.svc.cluster.local - db_port: 5432 - db_user: ${postgresql_obsrv_username} - db_password: ${postgresql_obsrv_user_password} - database: ${postgresql_obsrv_database} + helm_charts_base_dir: /app/helm-charts + + +enable_lakehouse: ${enable_lakehouse} +hudi_jobs: + - name: "LakehouseConnectorJob" + release_name: lakehouse-connector + job_manager_url: "lakehouse-connector-jobmanager.flink.svc.cluster.local:8081" - helm_charts_base_dir: /app/helm-charts \ No newline at end of file diff --git a/terraform/modules/helm/command_service/main.tf b/terraform/modules/helm/command_service/main.tf index d02cfa00..ca403fb2 100644 --- a/terraform/modules/helm/command_service/main.tf +++ b/terraform/modules/helm/command_service/main.tf @@ -19,6 +19,8 @@ resource "helm_release" "command_service" { postgresql_obsrv_user_password = var.postgresql_obsrv_user_password postgresql_obsrv_database = var.postgresql_obsrv_database flink_namespace = var.flink_namespace + enable_lakehouse = var.enable_lakehouse + }) ] } \ No newline at end of file diff --git a/terraform/modules/helm/command_service/variables.tf b/terraform/modules/helm/command_service/variables.tf index bb4aac91..084169e8 100644 --- a/terraform/modules/helm/command_service/variables.tf +++ b/terraform/modules/helm/command_service/variables.tf @@ -85,4 +85,8 @@ variable "flink_namespace" { default = "flink" } - \ No newline at end of file + +variable "enable_lakehouse" { + type = bool + description = "Toggle to install hudi components (hms, trino and flink job)" +} \ No newline at end of file diff --git a/terraform/modules/helm/dataset_api/dataset-api-helm-chart/templates/configmap.yaml b/terraform/modules/helm/dataset_api/dataset-api-helm-chart/templates/configmap.yaml index b6e0ed93..550f1b0a 100644 --- a/terraform/modules/helm/dataset_api/dataset-api-helm-chart/templates/configmap.yaml +++ b/terraform/modules/helm/dataset_api/dataset-api-helm-chart/templates/configmap.yaml @@ -44,5 +44,11 @@ data: grafana_token: "{{ .Values.grafana_service.GRAFANA_TOKEN }}" {{- end }} - + {{- if .Values.enable_lakehouse }} + lakehouse_host: "{{ .Values.lakehouse_service.LAKEHOUSE_HOST }}" + lakehouse_port: "{{ .Values.lakehouse_service.LAKEHOUSE_PORT }}" + lakehouse_catalog: "{{ .Values.lakehouse_service.LAKEHOUSE_CATALOG }}" + lakehouse_schema: "{{ .Values.lakehouse_service.LAKEHOUSE_SCHEMA }}" + lakehouse_default_user: "{{ .Values.lakehouse_service.LAKEHOUSE_DEFAULT_USER }}" + {{- end}} diff --git a/terraform/modules/helm/dataset_api/dataset-api-helm-chart/values.yaml b/terraform/modules/helm/dataset_api/dataset-api-helm-chart/values.yaml index 3bdafc3a..715de349 100644 --- a/terraform/modules/helm/dataset_api/dataset-api-helm-chart/values.yaml +++ b/terraform/modules/helm/dataset_api/dataset-api-helm-chart/values.yaml @@ -51,4 +51,12 @@ EXCLUDE_DATASOURCE_VALIDATION: "system-stats,failed-events-summary,masterdata-sy service_account_annotations: eks.amazonaws.com/role-arn: "" service: - type: LoadBalancer \ No newline at end of file + type: LoadBalancer + +enable_lakehouse: false +lakehouse_service: + LAKEHOUSE_HOST: http://obsrv-trino + LAKEHOUSE_PORT: "8080" + LAKEHOUSE_CATALOG: "lakehouse" + LAKEHOUSE_SCHEMA: "hms" + LAKEHOUSE_DEFAULT_USER: "admin" \ No newline at end of file diff --git a/terraform/modules/helm/dataset_api/dataset_api.yaml.tfpl b/terraform/modules/helm/dataset_api/dataset_api.yaml.tfpl index 3082621f..7e9de262 100644 --- a/terraform/modules/helm/dataset_api/dataset_api.yaml.tfpl +++ b/terraform/modules/helm/dataset_api/dataset_api.yaml.tfpl @@ -30,4 +30,11 @@ exhaust_service: service_account_annotations: ${dataset_api_sa_annotations} service: - type: ${service_type} \ No newline at end of file + type: ${service_type} +enable_lakehouse: ${enable_lakehouse} +lakehouse_service: + LAKEHOUSE_HOST: ${lakehouse_host} + LAKEHOUSE_PORT: ${lakehouse_port} + LAKEHOUSE_CATALOG: ${lakehouse_catalog} + LAKEHOUSE_SCHEMA: ${lakehouse_schema} + LAKEHOUSE_DEFAULT_USER: ${lakehouse_default_user} \ No newline at end of file diff --git a/terraform/modules/helm/dataset_api/main.tf b/terraform/modules/helm/dataset_api/main.tf index 82f18f8f..8d4b0450 100644 --- a/terraform/modules/helm/dataset_api/main.tf +++ b/terraform/modules/helm/dataset_api/main.tf @@ -27,6 +27,13 @@ resource "helm_release" "dataset_api" { dedup_redis_release_name = var.dedup_redis_release_name s3_bucket = var.s3_bucket service_type = var.service_type + enable_lakehouse = var.enable_lakehouse + lakehouse_host = var.lakehouse_host + lakehouse_port = var.lakehouse_port + lakehouse_catalog = var.lakehouse_catalog + lakehouse_schema = var.lakehouse_schema + lakehouse_default_user = var.lakehouse_default_user + } ) ] diff --git a/terraform/modules/helm/dataset_api/variables.tf b/terraform/modules/helm/dataset_api/variables.tf index 8996d798..a2207650 100644 --- a/terraform/modules/helm/dataset_api/variables.tf +++ b/terraform/modules/helm/dataset_api/variables.tf @@ -125,3 +125,38 @@ variable "s3_bucket" { description = "S3 bucket name for dataset api exhaust." default = "" } + +variable "enable_lakehouse" { + type = bool + description = "Toggle to install hudi components (hms, trino and flink job)" +} + +variable "lakehouse_host" { + type = string + description = "Lakehouse Host" + default = "http://trino.hudi.svc.cluster.local" +} + +variable "lakehouse_port" { + type = string + description = "Trino port" + default = "8080" +} + +variable "lakehouse_catalog" { + type = string + description = "Lakehouse Catalog name" + default = "lakehouse" +} + +variable "lakehouse_schema" { + type = string + description = "Lakehouse Schema name" + default = "hms" +} + +variable "lakehouse_default_user" { + type = string + description = "Lakehouse default user" + default = "admin" +} \ No newline at end of file diff --git a/terraform/modules/helm/flink/flink-helm-chart/values.yaml b/terraform/modules/helm/flink/flink-helm-chart/values.yaml index 131e8e96..eb066f5f 100644 --- a/terraform/modules/helm/flink/flink-helm-chart/values.yaml +++ b/terraform/modules/helm/flink/flink-helm-chart/values.yaml @@ -2,7 +2,7 @@ namespace: "flink" imagepullsecrets: "" image: registry: sanketikahub - repository: merged-pipeline + repository: unified-pipeline tag: 1.0.0-GA serviceMonitor: enabled: false @@ -161,8 +161,8 @@ base_config: | port = "9042" } -merged-pipeline: - merged-pipeline: |+ +unified-pipeline: + unified-pipeline: |+ include file("/data/flink/conf/baseconfig.conf") kafka { input.topic = ${job.env}".ingest" @@ -176,6 +176,7 @@ merged-pipeline: output.denorm.topic = ${job.env}".denorm" output.denorm.failed.topic = ${job.env}".failed" output.transform.topic = ${job.env}".transform" + output.transform.failed.topic = ${job.env}".failed" stats.topic = ${job.env}".stats" groupId = ${job.env}"-single-pipeline-group" producer { @@ -211,7 +212,7 @@ merged-pipeline: taskmanager.memory.process.size: 1700m jobmanager.memory.process.size: 1600m state.savepoints.dir: file:///tmp - job_classname: org.sunbird.obsrv.pipeline.task.MergedPipelineStreamTask + job_classname: org.sunbird.obsrv.pipeline.task.UnifiedPipelineStreamTask extractor: extractor: |+ @@ -332,6 +333,7 @@ transformer: kafka { input.topic = ${job.env}".denorm" output.transform.topic = ${job.env}".transform" + output.transform.failed.topic = ${job.env}".failed" groupId = ${job.env}"-transformer-group" producer { max-request-size = 10000024 @@ -436,6 +438,7 @@ master-data-processor: output.duplicate.topic = ${job.env}".masterdata.failed" output.denorm.topic = ${job.env}".masterdata.denorm" output.transform.topic = ${job.env}".masterdata.transform" + output.transform.failed.topic = ${job.env}".masterdata.failed" stats.topic = ${job.env}".masterdata.stats" groupId = ${job.env}"-masterdata-pipeline-group" diff --git a/terraform/modules/helm/flink/main.tf b/terraform/modules/helm/flink/main.tf index 6030928d..7ebcddd3 100644 --- a/terraform/modules/helm/flink/main.tf +++ b/terraform/modules/helm/flink/main.tf @@ -20,7 +20,7 @@ resource "helm_release" "flink_sa" { } resource "helm_release" "flink" { - for_each = contains([var.merged_pipeline_enabled], true ) ? var.flink_merged_pipeline_release_names : var.flink_release_names + for_each = contains([var.unified_pipeline_enabled], true ) ? var.flink_unified_pipeline_release_names : var.flink_release_names name = each.key chart = "${path.module}/${var.flink_chart_path}" namespace = var.flink_namespace diff --git a/terraform/modules/helm/flink/variables.tf b/terraform/modules/helm/flink/variables.tf index c0491ef6..b3e93125 100644 --- a/terraform/modules/helm/flink/variables.tf +++ b/terraform/modules/helm/flink/variables.tf @@ -35,7 +35,7 @@ variable "flink_chart_path" { # variable "flink_release_name" { # type = string # description = "Flink helm release name." -# default = "merged-pipeline" +# default = "unified-pipeline" # } # *** changed this to release map. @@ -182,12 +182,12 @@ variable "flink_release_names" { type = map(string) } -variable "flink_merged_pipeline_release_names" { +variable "flink_unified_pipeline_release_names" { description = "Create release names" type = map(string) } -variable "merged_pipeline_enabled" { - description = "Toggle to enable merged pipeline" +variable "unified_pipeline_enabled" { + description = "Toggle to enable unified pipeline" type = bool } diff --git a/terraform/modules/helm/hive_meta_store/hms-helm-chart/.helmignore b/terraform/modules/helm/hive_meta_store/hms-helm-chart/.helmignore new file mode 100644 index 00000000..0e8a0eb3 --- /dev/null +++ b/terraform/modules/helm/hive_meta_store/hms-helm-chart/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/terraform/modules/helm/hive_meta_store/hms-helm-chart/Chart.yaml b/terraform/modules/helm/hive_meta_store/hms-helm-chart/Chart.yaml new file mode 100644 index 00000000..856304b6 --- /dev/null +++ b/terraform/modules/helm/hive_meta_store/hms-helm-chart/Chart.yaml @@ -0,0 +1,24 @@ +apiVersion: v2 +name: metastore-app +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "1.16.0" diff --git a/terraform/modules/helm/hive_meta_store/hms-helm-chart/templates/NOTES.txt b/terraform/modules/helm/hive_meta_store/hms-helm-chart/templates/NOTES.txt new file mode 100644 index 00000000..fe60e1e6 --- /dev/null +++ b/terraform/modules/helm/hive_meta_store/hms-helm-chart/templates/NOTES.txt @@ -0,0 +1,22 @@ +1. Get the application URL by running these commands: +{{- if .Values.ingress.enabled }} +{{- range $host := .Values.ingress.hosts }} + {{- range .paths }} + http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }} + {{- end }} +{{- end }} +{{- else if contains "NodePort" .Values.service.type }} + export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "metastore-app.fullname" . }}) + export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT +{{- else if contains "LoadBalancer" .Values.service.type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "metastore-app.fullname" . }}' + export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "metastore-app.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}") + echo http://$SERVICE_IP:{{ .Values.service.port }} +{{- else if contains "ClusterIP" .Values.service.type }} + export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "metastore-app.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") + export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + echo "Visit http://127.0.0.1:8080 to use your application" + kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT +{{- end }} diff --git a/terraform/modules/helm/hive_meta_store/hms-helm-chart/templates/_helpers.tpl b/terraform/modules/helm/hive_meta_store/hms-helm-chart/templates/_helpers.tpl new file mode 100644 index 00000000..5b147765 --- /dev/null +++ b/terraform/modules/helm/hive_meta_store/hms-helm-chart/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "metastore-app.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "metastore-app.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "metastore-app.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "metastore-app.labels" -}} +helm.sh/chart: {{ include "metastore-app.chart" . }} +{{ include "metastore-app.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "metastore-app.selectorLabels" -}} +app.kubernetes.io/name: {{ include "metastore-app.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "metastore-app.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "metastore-app.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/terraform/modules/helm/hive_meta_store/hms-helm-chart/templates/deployment.yaml b/terraform/modules/helm/hive_meta_store/hms-helm-chart/templates/deployment.yaml new file mode 100644 index 00000000..7dedd878 --- /dev/null +++ b/terraform/modules/helm/hive_meta_store/hms-helm-chart/templates/deployment.yaml @@ -0,0 +1,73 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "metastore-app.fullname" . }} + labels: + {{- include "metastore-app.labels" . | nindent 4 }} +spec: + {{- if not .Values.autoscaling.enabled }} + replicas: {{ .Values.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "metastore-app.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "metastore-app.labels" . | nindent 8 }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "metastore-app.serviceAccountName" . }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Chart.Name }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + env: + {{- range $key, $value := .Values.envVars }} + - name: {{ $key | quote }} + value: {{ $value | quote }} + {{- end }} + ports: + - name: http + containerPort: {{ .Values.service.port }} + protocol: TCP + livenessProbe: + {{- toYaml .Values.livenessProbe | nindent 12 }} + readinessProbe: + {{- toYaml .Values.readinessProbe | nindent 12 }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + {{- with .Values.volumeMounts }} + volumeMounts: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.volumes }} + volumes: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/terraform/modules/helm/hive_meta_store/hms-helm-chart/templates/hadoop-config-map.yaml b/terraform/modules/helm/hive_meta_store/hms-helm-chart/templates/hadoop-config-map.yaml new file mode 100644 index 00000000..f3da004f --- /dev/null +++ b/terraform/modules/helm/hive_meta_store/hms-helm-chart/templates/hadoop-config-map.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: hadoop-config +data: + core-site.xml: | + + {{- range $key, $value := .Values.hadoop_core_site }} + + {{ $key }} + {{ $value }} + + {{- end }} + \ No newline at end of file diff --git a/terraform/modules/helm/hive_meta_store/hms-helm-chart/templates/service.yaml b/terraform/modules/helm/hive_meta_store/hms-helm-chart/templates/service.yaml new file mode 100644 index 00000000..a09130c1 --- /dev/null +++ b/terraform/modules/helm/hive_meta_store/hms-helm-chart/templates/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "metastore-app.fullname" . }} + labels: + {{- include "metastore-app.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: http + protocol: TCP + name: http + selector: + {{- include "metastore-app.selectorLabels" . | nindent 4 }} diff --git a/terraform/modules/helm/hive_meta_store/hms-helm-chart/templates/tests/test-connection.yaml b/terraform/modules/helm/hive_meta_store/hms-helm-chart/templates/tests/test-connection.yaml new file mode 100644 index 00000000..319ddc75 --- /dev/null +++ b/terraform/modules/helm/hive_meta_store/hms-helm-chart/templates/tests/test-connection.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "metastore-app.fullname" . }}-test-connection" + labels: + {{- include "metastore-app.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test +spec: + containers: + - name: wget + image: busybox + command: ['wget'] + args: ['{{ include "metastore-app.fullname" . }}:{{ .Values.service.port }}'] + restartPolicy: Never diff --git a/terraform/modules/helm/hive_meta_store/hms-helm-chart/values.yaml b/terraform/modules/helm/hive_meta_store/hms-helm-chart/values.yaml new file mode 100644 index 00000000..f604aaf5 --- /dev/null +++ b/terraform/modules/helm/hive_meta_store/hms-helm-chart/values.yaml @@ -0,0 +1,117 @@ +# Default values for metastore-app. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +replicaCount: 1 + +image: + repository: sanketikahub/hms + pullPolicy: Always + # Overrides the image tag whose default is the chart appVersion. + tag: "1.0.0" + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +serviceAccount: + # Specifies whether a service account should be created + create: false + # Automatically mount a ServiceAccount's API credentials? + automount: false + # Annotations to add to the service account + annotations: {} + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: "" + +podAnnotations: {} +podLabels: {} + +podSecurityContext: + {} + # fsGroup: 2000 + +securityContext: + {} + # capabilities: + # drop: + # - ALL + # readOnlyRootFilesystem: true + # runAsNonRoot: true + # runAsUser: 1000 + +service: + type: ClusterIP + port: 9083 + +ingress: + enabled: false + className: "" + annotations: + {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + hosts: + - host: chart-example.local + paths: + - path: / + pathType: ImplementationSpecific + tls: [] + # - secretName: chart-example-tls + # hosts: + # - chart-example.local + +resources: + {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +# livenessProbe: +# httpGet: +# path: / +# port: http +# readinessProbe: +# httpGet: +# path: / +# port: http + +autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 2 + targetCPUUtilizationPercentage: 80 + # targetMemoryUtilizationPercentage: 80 + +# Additional volumes on the output Deployment definition. +volumes: +- name: hadoop-config-volume + configMap: + name: hadoop-config + items: + - key: core-site.xml + path: core-site.xml + +# Additional volumeMounts on the output Deployment definition. +volumeMounts: +- name: hadoop-config-volume + mountPath: /opt/hadoop/etc/hadoop/core-site.xml + subPath: core-site.xml + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +envVars: {} + +hadoop_core_site: {} diff --git a/terraform/modules/helm/hive_meta_store/hms.yaml.tfpl b/terraform/modules/helm/hive_meta_store/hms.yaml.tfpl new file mode 100644 index 00000000..d95867a3 --- /dev/null +++ b/terraform/modules/helm/hive_meta_store/hms.yaml.tfpl @@ -0,0 +1,115 @@ +# Default values for metastore-app. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +replicaCount: ${hms_replica_count} + +image: + repository: ${hms_image.registry}/${hms_image.name} + pullPolicy: ${hms_image.pullPolicy} + tag: ${hms_image.tag} + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +serviceAccount: + # Specifies whether a service account should be created + create: false + # Automatically mount a ServiceAccount's API credentials? + automount: false + # Annotations to add to the service account + annotations: {} + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: "" + +podAnnotations: {} +podLabels: {} + +podSecurityContext: + {} + # fsGroup: 2000 + +securityContext: + {} + # capabilities: + # drop: + # - ALL + # readOnlyRootFilesystem: true + # runAsNonRoot: true + # runAsUser: 1000 + +service: + type: ${hms_service.type} + port: ${hms_service.port} + +ingress: + enabled: false + className: "" + annotations: + {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + hosts: + - host: chart-example.local + paths: + - path: / + pathType: ImplementationSpecific + tls: [] + # - secretName: chart-example-tls + # hosts: + # - chart-example.local + +resources: + {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +# livenessProbe: +# httpGet: +# path: / +# port: http +# readinessProbe: +# httpGet: +# path: / +# port: http + +autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 2 + targetCPUUtilizationPercentage: 80 + # targetMemoryUtilizationPercentage: 80 + +# Additional volumes on the output Deployment definition. +volumes: +- name: hadoop-config-volume + configMap: + name: hadoop-config + items: + - key: core-site.xml + path: core-site.xml + +# Additional volumeMounts on the output Deployment definition. +volumeMounts: +- name: hadoop-config-volume + mountPath: /opt/hadoop/etc/hadoop/core-site.xml + subPath: core-site.xml + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +envVars: ${hms_env_vars} +hadoop_core_site: ${hadoop_conf} \ No newline at end of file diff --git a/terraform/modules/helm/hive_meta_store/main.tf b/terraform/modules/helm/hive_meta_store/main.tf new file mode 100644 index 00000000..1480639c --- /dev/null +++ b/terraform/modules/helm/hive_meta_store/main.tf @@ -0,0 +1,21 @@ +resource "helm_release" "hms" { + name = var.hms_release_name + chart = "${path.module}/${var.hms_chart_path}" + namespace = var.hms_namespace + create_namespace = var.hms_create_namespace + depends_on = [var.hms_chart_depends_on] + force_update = true + cleanup_on_fail = true + atomic = true + values = [ + templatefile("${path.module}/${var.hms_custom_values_yaml}", + { + hms_image = var.hms_image + hms_replica_count = var.hms_replica_count + hms_service = var.hms_service + hadoop_conf = jsonencode(local.hadoop_configuration) + hms_env_vars = jsonencode(local.env_vars) + } + ) + ] +} diff --git a/terraform/modules/helm/hive_meta_store/variables.tf b/terraform/modules/helm/hive_meta_store/variables.tf new file mode 100644 index 00000000..09e09854 --- /dev/null +++ b/terraform/modules/helm/hive_meta_store/variables.tf @@ -0,0 +1,93 @@ +variable "hms_image" { + type = object({ name = string, tag = string, registry = string, pullPolicy = string }) + description = "Trino image metadata" + default = { + name = "hms" + tag = "1.0.3" + pullPolicy = "IfNotPresent" + registry = "sanketikahub" + } +} + +variable "hms_namespace" { + type = string + description = "HMS namespace" + default = "hudi" +} + +variable "hms_create_namespace" { + type = bool + description = "Create HMS namespace." + default = true +} + +variable "hms_wait_for_jobs" { + type = bool + description = "HMS wait for jobs paramater." + default = false +} + +variable "hms_chart_install_timeout" { + type = number + description = "HMS chart install timeout." + default = 900 +} + +variable "hms_custom_values_yaml" { + type = string + description = "HMS chart values.yaml path." + default = "hms.yaml.tfpl" +} + +variable "hms_release_name" { + type = string + description = "HMS release name" + default = "hms" +} + +variable "hms_chart_path" { + type = string + description = "HMS helm chart path." + default = "hms-helm-chart" +} + +variable "hms_chart_depends_on" { + type = any + description = "List of helm release names that this chart depends on." + default = "" +} + +variable "hms_replica_count" { + type = number + description = "HMS replica count" + default = 1 +} + +variable "hms_service" { + type = object({ type = string, port = number }) + description = "HMS service metadata" + default = { type = "ClusterIP", port = 9083 } +} + +locals { + default_hms_db_metadata = {} + default_hadoop_metadata = { + "fs.s3a.impl" = "org.apache.hadoop.fs.s3a.S3AFileSystem" + "fs.s3a.connection.ssl.enabled" = "false" + } +} + +variable "hms_db_metadata" { + type = map(string) + description = "HMS database connection details" +} + +variable "hadoop_metadata" { + type = map(string) + description = "Hadoop core site configuration" +} + +locals { + env_vars = merge(local.default_hms_db_metadata, var.hms_db_metadata) + hadoop_configuration = merge(local.default_hadoop_metadata, var.hadoop_metadata) +} \ No newline at end of file diff --git a/terraform/modules/helm/kafka/kafka.yaml.tfpl b/terraform/modules/helm/kafka/kafka.yaml.tfpl index 68263728..9eb851f4 100644 --- a/terraform/modules/helm/kafka/kafka.yaml.tfpl +++ b/terraform/modules/helm/kafka/kafka.yaml.tfpl @@ -8,7 +8,9 @@ provisioning: partitions: 1 replicationFactor: 1 # https://kafka.apache.org/documentation/#topicconfigs - + - name: "${input_hudi_topic}" + partitions: 1 + replicationFactor: 1 persistence: size: 50Gi diff --git a/terraform/modules/helm/kafka/main.tf b/terraform/modules/helm/kafka/main.tf index 51e3b087..c84aee90 100644 --- a/terraform/modules/helm/kafka/main.tf +++ b/terraform/modules/helm/kafka/main.tf @@ -14,6 +14,8 @@ resource "helm_release" "kafka" { { input_topic = "${var.env}.${var.kafka_input_topic}" input_masterdata_topic = "${var.env}.${var.kafka_input_masterdata_topic}" + input_hudi_topic = "${var.env}.${var.kafka_input_hudi_topic}" + } ) ] diff --git a/terraform/modules/helm/kafka/variables.tf b/terraform/modules/helm/kafka/variables.tf index 1afed714..67219565 100644 --- a/terraform/modules/helm/kafka/variables.tf +++ b/terraform/modules/helm/kafka/variables.tf @@ -73,4 +73,9 @@ variable "kafka_install_timeout" { type = number description = "Kafka chart install timeout." default = 1200 +} +variable "kafka_input_hudi_topic" { + type = string + description = "Kafka hudi output topic" + default = "hudi.connector.in" } \ No newline at end of file diff --git a/terraform/modules/helm/lakehouse-connector/.helmignore b/terraform/modules/helm/lakehouse-connector/.helmignore new file mode 100644 index 00000000..50af0317 --- /dev/null +++ b/terraform/modules/helm/lakehouse-connector/.helmignore @@ -0,0 +1,22 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/terraform/modules/helm/lakehouse-connector/flink.yaml.tfpl b/terraform/modules/helm/lakehouse-connector/flink.yaml.tfpl new file mode 100644 index 00000000..f2c50a0f --- /dev/null +++ b/terraform/modules/helm/lakehouse-connector/flink.yaml.tfpl @@ -0,0 +1,129 @@ +namespace: ${flink_namespace} +checkpoint_store_type: ${checkpoint_store_type} +s3_access_key: ${s3_access_key} +s3_secret_key: ${s3_secret_key} +azure_account: ${azure_account} +azure_secret: ${azure_secret} +image: + registry: ${flink_container_registry} + repository: ${flink_image_name} + tag: ${flink_lakehouse_image_tag} + imagePullSecrets: "" +base_config: | + job { + env = "${env}" + enable.distributed.checkpointing = true + statebackend { + base.url = "${checkpoint_base_url}" + } + } + kafka { + broker-servers = "kafka-headless.kafka.svc.cluster.local:9092" + producer.broker-servers = "kafka-headless.kafka.svc.cluster.local:9092" + consumer.broker-servers = "kafka-headless.kafka.svc.cluster.local:9092" + zookeeper = "kafka-zookeeper-headless.svc.cluster.local:2181" + producer { + max-request-size = 1572864 + batch.size = 98304 + linger.ms = 10 + compression = "snappy" + } + output.system.event.topic = $${job.env}".system.events" + output.failed.topic = $${job.env}".failed" + } + task { + parallelism = 1 + consumer.parallelism = 1 + checkpointing.interval = 10000 + checkpointing.pause.between.seconds = 10000 + restart-strategy.attempts = 3 + restart-strategy.delay = 30000 # in milli-seconds + } + + redis.connection.timeout = 100 + redis { + host = ${dedup_redis_release_name}-master.${dedup_redis_namespace}.svc.cluster.local + port = 6379 + } + + redis-meta { + host = ${denorm_redis_release_name}-master.${denorm_redis_namespace}.svc.cluster.local + port = 6379 + } + + postgres { + host = ${postgresql_service_name}.svc.cluster.local + port = 5432 + maxConnections = 2 + user = ${postgresql_obsrv_username} + password = ${postgresql_obsrv_user_password} + database = ${postgresql_obsrv_database} + } + + lms-cassandra { + host = "localhost" + port = "9042" + } + +enable_lakehouse: ${enable_lakehouse} + +lakehouse-connector: + lakehouse-connector: |+ + include file("/data/flink/conf/baseconfig.conf") + kafka { + input.topic = $${job.env}".hudi.connector.in" + output.topic = $${job.env}".hudi.connector.out" + output.invalid.topic = $${job.env}".failed" + event.max.size = "1048576" # Max is only 1MB + groupId = $${job.env}"-hudi-writer-group" + producer { + max-request-size = 5242880 + } + } + + task { + checkpointing.compressed = true + checkpointing.interval = 30000 + checkpointing.pause.between.seconds = 30000 + restart-strategy.attempts = 3 + restart-strategy.delay = 30000 # in milli-seconds + parallelism = 1 + consumer.parallelism = 1 + downstream.operators.parallelism = 1 + } + + hudi { + hms { + enabled = true + uri = "thrift://hms-metastore-app.hudi.svc:9083" + database { + name = "hms" + username = "${postgresql_hms_username}" + password = "${postgresql_hms_user_password}" + } + } + table { + type = "MERGE_ON_READ" + base.path = "s3a://${hudi_bucket}/${hudi_prefix_path}" + } + compaction.enabled = true + write.tasks = 1 + } + flink-conf: |+ + jobmanager.memory.flink.size: 1024m + taskmanager.memory.flink.size: 1024m + taskmanager.numberOfTaskSlots: 1 + jobManager.numberOfTaskSlots: 1 + parallelism.default: 1 + jobmanager.execution.failover-strategy: region + taskmanager.memory.network.fraction: 0.1 + heartbeat.timeout: 8000 + heartbeat.interval: 5000 + state.savepoints.dir: file:///tmp + jobmanager.rpc.address: lakehouse-connector-jobmanager + blob.server.port: 6124 + jobmanager.rpc.port: 6123 + taskmanager.rpc.port: 6122 + job_classname: org.sunbird.obsrv.streaming.HudiConnectorStreamTask + +hadoop_core_site: ${hadoop_configuration} \ No newline at end of file diff --git a/terraform/modules/helm/lakehouse-connector/lakehouse-flink-helm-chart/Chart.yaml b/terraform/modules/helm/lakehouse-connector/lakehouse-flink-helm-chart/Chart.yaml new file mode 100644 index 00000000..1a27efca --- /dev/null +++ b/terraform/modules/helm/lakehouse-connector/lakehouse-flink-helm-chart/Chart.yaml @@ -0,0 +1,21 @@ +apiVersion: v2 +name: lakehouse-connector +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +version: 0.1.2 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. +appVersion: 1.16.0 diff --git a/terraform/modules/helm/lakehouse-connector/lakehouse-flink-helm-chart/templates/_helpers.tpl b/terraform/modules/helm/lakehouse-connector/lakehouse-flink-helm-chart/templates/_helpers.tpl new file mode 100644 index 00000000..8e606f3d --- /dev/null +++ b/terraform/modules/helm/lakehouse-connector/lakehouse-flink-helm-chart/templates/_helpers.tpl @@ -0,0 +1,67 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "flink.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{- define "name" -}} +{{- default .Release.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "flink.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "flink.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Common labels +*/}} +{{- define "flink.labels" -}} +helm.sh/chart: {{ include "flink.chart" . }} +{{ include "flink.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end -}} + +{{/* +Selector labels +*/}} +{{- define "flink.selectorLabels" -}} +app.kubernetes.io/name: {{ include "flink.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end -}} + +{{/* +Create the name of the service account to use +*/}} +{{- define "flink.serviceAccountName" -}} +{{- if .Values.serviceAccount.create -}} + {{ default (include "flink.fullname" .) .Values.serviceAccount.name }} +{{- else -}} + {{ default "default" .Values.serviceAccount.name }} +{{- end -}} +{{- end -}} diff --git a/terraform/modules/helm/lakehouse-connector/lakehouse-flink-helm-chart/templates/configmap.yaml b/terraform/modules/helm/lakehouse-connector/lakehouse-flink-helm-chart/templates/configmap.yaml new file mode 100644 index 00000000..6a419c6a --- /dev/null +++ b/terraform/modules/helm/lakehouse-connector/lakehouse-flink-helm-chart/templates/configmap.yaml @@ -0,0 +1,24 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Release.Name }}-config + namespace: {{ .Release.namespace }} + labels: + app: flink + system.processing: "true" +data: + base-config: |+ +{{ .Values.base_config | indent 4 }} +{{- $name := .Release.Name }} +{{ index .Values $name | toYaml | indent 2 }} + log4j_console_properties: |+ +{{ .Values.log4j_console_properties | indent 4 }} + core-site.xml: |+ + + {{- range $key, $value := .Values.hadoop_core_site }} + + {{ $key }} + {{ $value }} + + {{- end }} + \ No newline at end of file diff --git a/terraform/modules/helm/lakehouse-connector/lakehouse-flink-helm-chart/templates/deployment.yaml b/terraform/modules/helm/lakehouse-connector/lakehouse-flink-helm-chart/templates/deployment.yaml new file mode 100644 index 00000000..885970c1 --- /dev/null +++ b/terraform/modules/helm/lakehouse-connector/lakehouse-flink-helm-chart/templates/deployment.yaml @@ -0,0 +1,277 @@ +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ .Release.Name }}-jobmanager + namespace: {{ .Release.Namespace }} + labels: + app: flink + component: {{ .Release.Name }}-jobmanager + system.processing: "true" +spec: + type: ClusterIP + ports: + - name: rpc + port: {{ .Values.jobmanager.rpc_port }} + - name: blob + port: {{ .Values.jobmanager.blob_port }} + - name: query + port: {{ .Values.jobmanager.query_port }} + - name: ui + port: {{ .Values.jobmanager.ui_port }} + - name: prom + port: {{ .Values.jobmanager.prom_port }} + selector: + app: flink + component: {{ .Release.Name }}-jobmanager + system.processing: "true" + +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ .Release.Name }}-jobmanager-webui + namespace: {{ .Release.Namespace }} +spec: + {{- if eq .Values.service.type "ClusterIP" }} + type: ClusterIP + {{- end }} + {{- if eq .Values.service.type "LoadBalancer" }} + type: LoadBalancer + {{- end }} + ports: + - name: rest + port: {{ .Values.rest_port }} + protocol: TCP + targetPort: {{ .Values.resttcp_port }} + selector: + app: flink + component: {{ .Release.Name }}-jobmanager + +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ .Release.Name }}-taskmanager + namespace: {{ .Release.Namespace }} + labels: + app: flink + component: {{ .Release.Name }}-taskmanager + system.processing: "true" +spec: + type: ClusterIP + ports: + - name: prom + port: {{ .Values.taskmanager.prom_port }} + selector: + app: flink + component: {{ .Release.Name }}-taskmanager + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ .Release.Name }}-jobmanager + namespace: {{ .Release.Namespace }} +spec: + selector: + matchLabels: + app: flink + component: {{ .Release.Name }}-jobmanager + template: + metadata: + labels: + app: flink + component: {{ .Release.Name }}-jobmanager + system.processing: "true" + annotations: + prometheus.io/scrape: 'true' + prometheus.io/port: "{{ .Values.jobmanager.prom_port }}" + spec: + # imagePullSecrets: + # - name: {{ .Values.image.imagePullSecrets }} + serviceAccount: {{ .Release.Namespace }}-sa + volumes: + - configMap: + items: + - key: flink-conf + path: flink-conf.yaml + - key: base-config + path: base-config.conf + - key: {{ .Release.Name }} + path: {{ .Release.Name }}.conf + - key: log4j_console_properties + path: log4j-console.properties + - key: core-site.xml + path: core-site.xml + name: {{ .Release.Name }}-config + name: flink-config-volume + containers: + - name: {{ .Release.Name }}-jobmanager # Main container to start job-manager + image: "{{ .Values.image.registry }}/{{ .Values.image.repository }}:{{ .Values.image.tag }}" + # image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: Always + ports: + - containerPort: 6123 + name: rpc + - containerPort: 6124 + name: blob + - containerPort: 6125 + name: query + - containerPort: 8081 + name: ui + resources: + requests: + cpu: "{{ .Values.jobmanager.cpu_requests }}" + memory: "{{ .Values.jobmanager.memory_requests }}" + limits: + cpu: "{{ .Values.jobmanager.cpu_limits }}" + memory: "{{ .Values.jobmanager.memory_limits }}" + volumeMounts: + - name: flink-config-volume + mountPath: /opt/flink/conf/log4j-console.properties + subPath: log4j-console.properties + volumeMounts: + - mountPath: /opt/flink/conf/flink-conf.yaml + name: flink-config-volume + subPath: flink-conf.yaml + - mountPath: /data/flink/conf/baseconfig.conf + name: flink-config-volume + subPath: base-config.conf + - mountPath: /data/flink/conf/{{ .Release.Name }}.conf + name: flink-config-volume + subPath: {{ .Release.Name }}.conf + - mountPath: /opt/flink/conf/log4j-console.properties + name: flink-config-volume + subPath: log4j-console.properties + - name: flink-config-volume + mountPath: /opt/hadoop/etc/hadoop/core-site.xml + subPath: core-site.xml + workingDir: /opt/flink + args: ["jobmanager"] + env: + - name: HADOOP_CONF_DIR + value: "/opt/hadoop/etc/hadoop" + - name: FLINK_PROPERTIES + value: |+ + jobmanager.rpc.address: {{ .Release.Name }}-jobmanager + jobmanager.rpc.port=6123 + metrics.reporters: prom + metrics.reporter.prom.factory.class: org.apache.flink.metrics.prometheus.PrometheusReporterFactory + metrics.reporter.prom.host: {{ .Release.Name }}-jobmanager + metrics.reporter.prom.port: 9250 + - name: {{ .Release.Name }}-job-submit # side car to submit the hudi connector + image: "{{ .Values.image.registry }}/{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: Always + + command: + - /opt/flink/bin/flink + - run + - -m + - {{ .Release.Name }}-jobmanager.flink.svc.cluster.local:8081 + - /opt/flink/custom-lib/hudi-connector-1.0.0.jar + - --config.file.path + - /data/flink/conf/{{ .Release.Name }}.conf + volumeMounts: + - mountPath: /data/flink/conf/baseconfig.conf + name: flink-config-volume + subPath: base-config.conf + - mountPath: /data/flink/conf/{{ .Release.Name }}.conf + name: flink-config-volume + subPath: {{ .Release.Name }}.conf + workingDir: /opt/flink + volumes: + - configMap: + items: + - key: base-config + path: base-config.conf + - key: {{ .Release.Name }} + path: {{ .Release.Name }}.conf + name: {{ .Release.Name }}-config + name: flink-config-volume + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ .Release.Name }}-taskmanager + namespace: {{ .Release.Namespace }} +spec: + replicas: {{ .Values.taskmanager.replicas }} + selector: + matchLabels: + app: flink + component: {{ .Release.Name }}-taskmanager + template: + metadata: + labels: + system.processing: "true" + app: flink + component: {{ .Release.Name }}-taskmanager + spec: + # imagePullSecrets: + # - name: {{ .Values.image.imagePullSecrets }} + serviceAccount: {{ .Release.Namespace }}-sa + volumes: + - configMap: + items: + - key: flink-conf + path: flink-conf.yaml + - key: base-config + path: base-config.conf + - key: {{ .Release.Name }} + path: {{ .Release.Name }}.conf + - key: log4j_console_properties + path: log4j-console.properties + - key: core-site.xml + path: core-site.xml + name: {{ .Release.Name }}-config + name: flink-config-volume + containers: + - name: {{ .Release.Name }}-taskmanager + image: "{{ .Values.image.registry }}/{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: Always + ports: + - containerPort: 6122 + name: rpc + resources: + requests: + cpu: "{{ .Values.taskmanager.cpu_requests }}" + memory: "{{ .Values.taskmanager.memory_requests }}" + limits: + cpu: "{{ .Values.taskmanager.cpu_limits }}" + memory: "{{ .Values.taskmanager.memory_limits }}" + volumeMounts: + - name: flink-config-volume + mountPath: /opt/flink/conf/log4j-console.properties + subPath: log4j-console.properties + volumeMounts: + - mountPath: /opt/flink/conf/flink-conf.yaml + name: flink-config-volume + subPath: flink-conf.yaml + - mountPath: /data/flink/conf/baseconfig.conf + name: flink-config-volume + subPath: base-config.conf + - mountPath: /data/flink/conf/{{ .Release.Name }}.conf + name: flink-config-volume + subPath: {{ .Release.Name }}.conf + - mountPath: /opt/flink/conf/log4j-console.properties + name: flink-config-volume + subPath: log4j-console.properties + - name: flink-config-volume + mountPath: /opt/hadoop/etc/hadoop/core-site.xml + subPath: core-site.xml + workingDir: /opt/flink + args: ["taskmanager"] + env: + - name: HADOOP_CONF_DIR + value: "/opt/hadoop/etc/hadoop" + - name: FLINK_PROPERTIES + value: |+ + jobmanager.rpc.address: {{ .Release.Name }}-jobmanager + taskmanager.rpc.port=6122 + taskmanager.numberOfTaskSlots: 2 + metrics.reporters: prom + metrics.reporter.prom.factory.class: org.apache.flink.metrics.prometheus.PrometheusReporterFactory + metrics.reporter.prom.host: {{ .Release.Name }}-taskmanager + metrics.reporter.prom.port: 9251 diff --git a/terraform/modules/helm/lakehouse-connector/lakehouse-flink-helm-chart/templates/submit-job.yaml b/terraform/modules/helm/lakehouse-connector/lakehouse-flink-helm-chart/templates/submit-job.yaml new file mode 100644 index 00000000..0fe3bf37 --- /dev/null +++ b/terraform/modules/helm/lakehouse-connector/lakehouse-flink-helm-chart/templates/submit-job.yaml @@ -0,0 +1,55 @@ +# apiVersion: batch/v1 +# kind: Job +# metadata: +# name: {{ .Release.Name }}-job-submit +# namespace: {{ .Release.Namespace }} +# spec: +# template: +# spec: +# initContainers: +# - name: init +# image: "docker.io/library/ubuntu@sha256:33a5cc25d22c45900796a1aca487ad7a7cb09f09ea00b779e3b2026b4fc2faba" +# command: +# - sh +# - -c +# - | +# apt-get update && apt-get install -y curl +# while true; do +# if curl -sSf -o /dev/null http://{{ .Release.Name }}-jobmanager.flink.svc.cluster.local:8081; then +# break +# else +# echo "Waiting for Job Manager..." +# sleep 2 +# fi +# done +# containers: +# - name: {{ .Release.Name }}-job-submit +# image: "{{ .Values.image.registry }}/{{ .Values.image.repository }}:{{ .Values.image.tag }}" +# imagePullPolicy: Always +# command: +# - /opt/flink/bin/flink +# - run +# - -m +# - {{ .Release.Name }}-jobmanager.flink.svc.cluster.local:8081 +# - /opt/flink/custom-lib/{{ .Release.Name }}-1.0.0.jar +# - --config.file.path +# - /data/flink/conf/{{ .Release.Name }}.conf +# volumeMounts: +# - mountPath: /data/flink/conf/baseconfig.conf +# name: flink-config-volume +# subPath: base-config.conf +# - mountPath: /data/flink/conf/{{ .Release.Name }}.conf +# name: flink-config-volume +# subPath: {{ .Release.Name }}.conf +# workingDir: /opt/flink +# restartPolicy: Never +# volumes: +# - configMap: +# items: +# - key: base-config +# path: base-config.conf +# - key: {{ .Release.Name }} +# path: {{ .Release.Name }}.conf +# name: {{ .Release.Name }}-config +# name: flink-config-volume +# backoffLimit: 3 \ No newline at end of file diff --git a/terraform/modules/helm/lakehouse-connector/lakehouse-flink-helm-chart/values.yaml b/terraform/modules/helm/lakehouse-connector/lakehouse-flink-helm-chart/values.yaml new file mode 100644 index 00000000..d1c12c74 --- /dev/null +++ b/terraform/modules/helm/lakehouse-connector/lakehouse-flink-helm-chart/values.yaml @@ -0,0 +1,174 @@ +namespace: "flink" +imagepullsecrets: "" +image: + registry: sanketikahub + repository: lakehouse-connector + tag: 1.0.0 +serviceMonitor: + enabled: false +replicaCount: 1 + +jobmanager: + rpc_port: 6123 + blob_port: 6124 + query_port: 6125 + ui_port: 8081 + prom_port: 9250 + heap_memory: 1024 + cpu_requests: 0.25 + cpu_limits: 0.25 + memory_requests: 1024Mi + memory_limits: 1024Mi + +rest_port: 80 +resttcp_port: 8081 +service: + type: ClusterIP + +taskmanager: + prom_port: 9251 + rpc_port: 6122 + heap_memory: 1024 + replicas: 1 + cpu_requests: 0.5 + cpu_limits: 0.5 + memory_requests: 1024Mi + memory_limits: 1024Mi + +checkpoint_store_type: "s3" + +# AWS S3 Details +s3_access_key: "" +s3_secret_key: "" +s3_endpoint: "" + +# Azure Container Details +azure_account: "" +azure_secret: "" + +# Google Cloud Storage Service Account JSON Path +google_service_account_key_path: "" + +dataset_registry: | + postgres { + host = postgresql.postgresql.svc.cluster.local + port = 5432 + maxConnections = 2 + user = "flink" + password = "flink" + database = "flink" + } + +log4j_console_properties: | + # This affects logging for both user code and Flink + rootLogger.level = INFO + rootLogger.appenderRef.console.ref = ConsoleAppender + rootLogger.appenderRef.rolling.ref = RollingFileAppender + + # Uncomment this if you want to _only_ change Flink's logging + logger.flink.name = org.apache.flink + logger.flink.level = INFO + + # The following lines keep the log level of common libraries/connectors on + # log level INFO. The root logger does not override this. You have to manually + # change the log levels here. + logger.akka.name = akka + logger.akka.level = ERROR + logger.kafka.name= org.apache.kafka + logger.kafka.level = ERROR + logger.hadoop.name = org.apache.hadoop + logger.hadoop.level = ERROR + logger.zookeeper.name = org.apache.zookeeper + logger.zookeeper.level = ERROR + + # Log all infos to the console + appender.console.name = ConsoleAppender + appender.console.type = CONSOLE + appender.console.layout.type = PatternLayout + appender.console.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n + + # Log all infos in the given rolling file + appender.rolling.name = RollingFileAppender + appender.rolling.type = RollingFile + appender.rolling.append = false + appender.rolling.fileName = ${sys:log.file} + appender.rolling.filePattern = ${sys:log.file}.%i + appender.rolling.layout.type = PatternLayout + appender.rolling.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n + appender.rolling.policies.type = Policies + appender.rolling.policies.size.type = SizeBasedTriggeringPolicy + appender.rolling.policies.size.size=10MB + appender.rolling.strategy.type = DefaultRolloverStrategy + appender.rolling.strategy.max = 5 + + # Suppress the irrelevant (wrong) warnings from the Netty channel handler + logger.netty.name = org.apache.flink.shaded.akka.org.jboss.netty.channel.DefaultChannelPipeline + logger.netty.level = OFF + +base_config: | + job { + env = "dev" + enable.distributed.checkpointing = true + statebackend { + base.url = "s3://" + } + } + kafka { + broker-servers = "kafka-headless.kafka.svc.cluster.local:9092" + producer.broker-servers = "kafka-headless.kafka.svc.cluster.local:9092" + consumer.broker-servers = "kafka-headless.kafka.svc.cluster.local:9092" + zookeeper = "kafka-headless.kafka.svc.cluster.local:2181" + producer { + max-request-size = 10000024 + batch.size = 98304 + linger.ms = 10 + compression = "snappy" + } + output.system.event.topic = ${job.env}".system.events" + output.failed.topic = ${job.env}".failed" + } + task { + parallelism = 1 + consumer.parallelism = 1 + checkpointing.interval = 30000 + checkpointing.pause.between.seconds = 5000 + restart-strategy.attempts = 3 + restart-strategy.delay = 30000 # in milli-seconds + } + + redis.connection.timeout = 100 + redis { + host = obsrv-redis-master.redis.svc.cluster.local + port = 6379 + } + + redis-meta { + host = obsrv-redis-master.redis.svc.cluster.local + port = 6379 + } + + postgres { + host = postgresql-hl.postgresql.svc.cluster.local + port = 5432 + maxConnections = 2 + user = "postgres" + password = "postgres" + database = "obsrv" + } + + lms-cassandra { + host = "localhost" + port = "9042" + } + + +serviceAccount: + # Specifies whether a service account should be created + create: true + # Annotations to add to the service account + annotations: {} + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: "" + +enable_lakehouse: false \ No newline at end of file diff --git a/terraform/modules/helm/lakehouse-connector/main.tf b/terraform/modules/helm/lakehouse-connector/main.tf new file mode 100644 index 00000000..3ba23c5a --- /dev/null +++ b/terraform/modules/helm/lakehouse-connector/main.tf @@ -0,0 +1,42 @@ +resource "helm_release" "lakehouse-connector" { + name = "lakehouse-connector" + chart = "${path.module}/${var.flink_chart_path}" + namespace = var.flink_namespace + create_namespace = var.flink_create_namespace + # depends_on = [var.flink_chart_depends_on,helm_release.flink_sa] + wait_for_jobs = var.flink_wait_for_jobs + timeout = var.flink_chart_install_timeout + force_update = true + cleanup_on_fail = true + atomic = true + values = [ + templatefile("${path.module}/${var.flink_custom_values_yaml}", + { + env = var.env + flink_namespace = var.flink_namespace + flink_container_registry = "${var.flink_container_registry}" + flink_lakehouse_image_tag = var.flink_lakehouse_image_tag + flink_image_name = var.flink_image_name + checkpoint_store_type = var.flink_checkpoint_store_type + s3_access_key = var.s3_access_key + s3_secret_key = var.s3_secret_key + azure_account = var.azure_storage_account_name + azure_secret = var.azure_storage_account_key + postgresql_service_name = var.postgresql_service_name + postgresql_obsrv_username = var.postgresql_obsrv_username + postgresql_obsrv_user_password = var.postgresql_obsrv_user_password + postgresql_obsrv_database = var.postgresql_obsrv_database + checkpoint_base_url = var.checkpoint_base_url + denorm_redis_namespace = var.denorm_redis_namespace + denorm_redis_release_name = var.denorm_redis_release_name + dedup_redis_namespace = var.dedup_redis_namespace + dedup_redis_release_name = var.dedup_redis_release_name + hadoop_configuration = jsonencode(local.hadoop_configuration) + enable_lakehouse = var.enable_lakehouse + postgresql_hms_username = var.postgresql_hms_username + postgresql_hms_user_password = var.postgresql_hms_user_password + hudi_bucket = var.hudi_bucket + hudi_prefix_path = var.hudi_prefix_path + }) + ] +} diff --git a/terraform/modules/helm/lakehouse-connector/outputs.tf b/terraform/modules/helm/lakehouse-connector/outputs.tf new file mode 100644 index 00000000..c33a3822 --- /dev/null +++ b/terraform/modules/helm/lakehouse-connector/outputs.tf @@ -0,0 +1,3 @@ +output "flink_namespace" { + value = var.flink_namespace +} \ No newline at end of file diff --git a/terraform/modules/helm/lakehouse-connector/variables.tf b/terraform/modules/helm/lakehouse-connector/variables.tf new file mode 100644 index 00000000..1372b893 --- /dev/null +++ b/terraform/modules/helm/lakehouse-connector/variables.tf @@ -0,0 +1,205 @@ +variable "env" { + type = string + description = "Environment name. All resources will be prefixed with this value." +} + +variable "building_block" { + type = string + description = "Building block name. All resources will be prefixed with this value." +} + +variable "flink_namespace" { + type = string + description = "Flink namespace." +} + + +variable "flink_chart_path" { + type = string + description = "Flink chart path." + default = "lakehouse-flink-helm-chart" +} + +# *** changed this to release map. +# variable "flink_release_name" { +# type = string +# description = "Flink helm release name." +# default = "unified-pipeline" +# } +# *** changed this to release map. + +variable "flink_chart_install_timeout" { + type = number + description = "Flink chart install timeout." + default = 900 +} + +variable "flink_create_namespace" { + type = bool + description = "Create flink namespace." + default = true +} + +variable "flink_wait_for_jobs" { + type = bool + description = "Flink wait for jobs paramater." + default = false +} + +variable "flink_custom_values_yaml" { + type = string + description = "Flink chart values.yaml path." + default = "flink.yaml.tfpl" +} + +variable "flink_kubernetes_service_name" { + type = string + description = "Flink kubernetes service name." + default = "" +} + +variable "flink_container_registry" { + type = string + description = "Container registry. For example docker.io/obsrv" +} + + +variable "flink_lakehouse_image_tag" { + type = string + description = "Flink image tag for lakehouse image." +} + + +variable "flink_image_name" { + type = string + description = "Flink image name." +} + +variable "flink_checkpoint_store_type" { + type = string + description = "Flink checkpoint store type." +} + +variable "checkpoint_base_url" { + type = string + description = "checkpoint storage base url." + default = "" +} + +variable "flink_chart_depends_on" { + type = any + description = "List of helm release names that this chart depends on." + default = "" +} + +variable "postgresql_obsrv_username" { + type = string + description = "Postgresql obsrv username." + default = "obsrv" +} + +variable "postgresql_obsrv_user_password" { + type = string + description = "Postgresql obsrv user password." +} + +variable "postgresql_obsrv_database" { + type = string + description = "Postgresql obsrv database." +} + +variable "postgresql_service_name" { + type = string + description = "Postgresql service name." +} + +variable "denorm_redis_namespace" { + type = string + description = "Namespace of Redis installation." + default = "redis" +} + +variable "denorm_redis_release_name" { + type = string + description = "Release name for Redis installation." + default = "obsrv-denorm-redis" +} + +variable "dedup_redis_release_name" { + type = string + description = "Redis helm release name." + default = "obsrv-dedup-redis" +} + +variable "dedup_redis_namespace" { + type = string + description = "Redis namespace." + default = "redis" +} + +variable "s3_access_key" { + type = string + description = "S3 access key for flink checkpoints." + default = "" +} + +variable "s3_secret_key" { + type = string + description = "S3 secret key for flink checkpoints." + default = "" +} + +variable "azure_storage_account_name" { + type = string + description = "Azure storage account name for flink checkpoints." + default = "" +} + +variable "azure_storage_account_key" { + type = string + description = "Azure storage account key for flink checkpoints." + default = "" +} + +variable "flink_sa_annotations" { + type = string + description = "Service account annotations for flink service account." + default = "serviceAccountName: default" +} + +locals { + default_hadoop_metadata = { + "fs.s3a.impl" = "org.apache.hadoop.fs.s3a.S3AFileSystem" + "fs.s3a.connection.ssl.enabled" = "false" + } +} + +variable "hadoop_metadata" { + type = map(string) + description = "Hadoop core site configuration" +} + +locals { + hadoop_configuration = merge(local.default_hadoop_metadata, var.hadoop_metadata) +} + +variable "enable_lakehouse" { + type = bool + description = "Toggle to install hudi components (hms, trino and flink job)" +} +variable "postgresql_hms_username" { + type = string + description = "Postgresql hms username" +} +variable "postgresql_hms_user_password" { + type = string + description = "Postgresql hms user password." +} +variable "hudi_bucket" { + type = string + description = "Apache hudi bucket name" +} +variable "hudi_prefix_path" { + type = string + description = "Apache hudi bucket prefix path name" +} diff --git a/terraform/modules/helm/postgresql/outputs.tf b/terraform/modules/helm/postgresql/outputs.tf index 9605be24..6fd4ad14 100644 --- a/terraform/modules/helm/postgresql/outputs.tf +++ b/terraform/modules/helm/postgresql/outputs.tf @@ -44,4 +44,14 @@ output "postgresql_obsrv_database" { output "postgresql_service_name" { value = contains([var.postgresql_release_name], "postgresql") ? "${var.postgresql_release_name}.${var.postgresql_namespace}" : "${var.postgresql_release_name}-postgresql.${var.postgresql_namespace}" -} \ No newline at end of file +} + +output "postgresql_hms_user_password" { + value = var.postgresql_hms_user_password + sensitive = true +} + +output "postgresql_hms_username" { + value = var.postgresql_hms_username + sensitive = true +} diff --git a/terraform/modules/helm/postgresql/variables.tf b/terraform/modules/helm/postgresql/variables.tf index 76a630b7..b06576fd 100644 --- a/terraform/modules/helm/postgresql/variables.tf +++ b/terraform/modules/helm/postgresql/variables.tf @@ -121,3 +121,15 @@ variable "postgresql_druid_raw_user_password" { description = "Postgresql druid user password." default = "druidraw123" } + + +variable "postgresql_hms_username" { + type = string + description = "Postgresql hms username" + default = "hms" +} +variable "postgresql_hms_user_password" { + type = string + description = "Postgresql hms user password." + default = "hms123" +} \ No newline at end of file diff --git a/terraform/modules/helm/postgresql_migration/main.tf b/terraform/modules/helm/postgresql_migration/main.tf index e264addb..77973f43 100644 --- a/terraform/modules/helm/postgresql_migration/main.tf +++ b/terraform/modules/helm/postgresql_migration/main.tf @@ -19,6 +19,9 @@ resource "helm_release" "postgresql_migration" { postgresql_druid_raw_user_password = var.postgresql_druid_raw_user_password postgresql_obsrv_user_password = var.postgresql_obsrv_user_password data_encryption_key = var.data_encryption_key + postgresql_hms_user_password = var.postgresql_hms_user_password + enable_lakehouse = var.enable_lakehouse + }) ] } diff --git a/terraform/modules/helm/postgresql_migration/postgresql-migration-helm-chart/migration/03_obsrv/V3__ddl_datasource.sql b/terraform/modules/helm/postgresql_migration/postgresql-migration-helm-chart/migration/03_obsrv/V3__ddl_datasource.sql new file mode 100644 index 00000000..7e6ee3f8 --- /dev/null +++ b/terraform/modules/helm/postgresql_migration/postgresql-migration-helm-chart/migration/03_obsrv/V3__ddl_datasource.sql @@ -0,0 +1 @@ +ALTER TABLE datasources ADD COLUMN type TEXT not NULL DEFAULT 'druid'; \ No newline at end of file diff --git a/terraform/modules/helm/postgresql_migration/postgresql-migration-helm-chart/migration/05_hms/V1__create_hms_initial_data.sql b/terraform/modules/helm/postgresql_migration/postgresql-migration-helm-chart/migration/05_hms/V1__create_hms_initial_data.sql new file mode 100644 index 00000000..398350d6 --- /dev/null +++ b/terraform/modules/helm/postgresql_migration/postgresql-migration-helm-chart/migration/05_hms/V1__create_hms_initial_data.sql @@ -0,0 +1,28 @@ +DO $$ +BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_database WHERE datname = 'hms') THEN + CREATE DATABASE hms; + END IF; +END $$; + +DO +$do$ +BEGIN + IF EXISTS ( + SELECT FROM pg_catalog.pg_roles + WHERE rolname = 'hms') THEN + + RAISE NOTICE 'Role "hms" already exists. Skipping.'; + ELSE + BEGIN + CREATE ROLE hms LOGIN PASSWORD '{{ .Values.postgresql_hms_user_password }}'; + EXCEPTION + WHEN duplicate_object THEN + RAISE NOTICE 'Role "hms" was just created by a concurrent transaction. Skipping.'; + END; + END IF; +END +$do$; + +GRANT ALL PRIVILEGES ON DATABASE hms TO hms; +ALTER DATABASE hms OWNER TO hms; diff --git a/terraform/modules/helm/postgresql_migration/postgresql-migration-helm-chart/templates/flyway-cm.yaml b/terraform/modules/helm/postgresql_migration/postgresql-migration-helm-chart/templates/flyway-cm.yaml index 5b5b2d60..dc716ebf 100644 --- a/terraform/modules/helm/postgresql_migration/postgresql-migration-helm-chart/templates/flyway-cm.yaml +++ b/terraform/modules/helm/postgresql_migration/postgresql-migration-helm-chart/templates/flyway-cm.yaml @@ -31,4 +31,19 @@ metadata: name: obsrv-sql-files app: flyway-postgresql data: - {{- (tpl (.Files.Glob "migration/03_obsrv/*.sql").AsConfig .) | nindent 2 }} \ No newline at end of file + {{- (tpl (.Files.Glob "migration/03_obsrv/*.sql").AsConfig .) | nindent 2 }} + + +--- +{{- if .Values.enable_lakehouse }} +kind: ConfigMap +apiVersion: v1 +metadata: + name: hms-sql-files + namespace: {{ .Values.namespace }} + labels: + name: hms-sql-files + app: flyway-postgresql +data: + {{- (tpl (.Files.Glob "migration/05_hms/*.sql").AsConfig .) | nindent 2 }} +{{- end }} \ No newline at end of file diff --git a/terraform/modules/helm/postgresql_migration/postgresql-migration-helm-chart/templates/job.yaml b/terraform/modules/helm/postgresql_migration/postgresql-migration-helm-chart/templates/job.yaml index f3261ed6..123c1dbe 100644 --- a/terraform/modules/helm/postgresql_migration/postgresql-migration-helm-chart/templates/job.yaml +++ b/terraform/modules/helm/postgresql_migration/postgresql-migration-helm-chart/templates/job.yaml @@ -35,6 +35,10 @@ spec: mountPath: /flyway/migrations/02_druid_raw - name: obsrv-sql-files mountPath: /flyway/migrations/03_obsrv + {{- if .Values.enable_lakehouse }} + - name: hms-sql-files + mountPath: /flyway/migrations/05_hms + {{- end }} volumes: - name: flyway-migrate-script configMap: @@ -48,4 +52,9 @@ spec: name: druid-raw-sql-files - name: obsrv-sql-files configMap: - name: obsrv-sql-files \ No newline at end of file + name: obsrv-sql-files + {{- if .Values.enable_lakehouse }} + - name: hms-sql-files + configMap: + name: hms-sql-files + {{- end }} \ No newline at end of file diff --git a/terraform/modules/helm/postgresql_migration/postgresql-migration-helm-chart/values.yaml b/terraform/modules/helm/postgresql_migration/postgresql-migration-helm-chart/values.yaml index ae7a05c3..044089e3 100644 --- a/terraform/modules/helm/postgresql_migration/postgresql-migration-helm-chart/values.yaml +++ b/terraform/modules/helm/postgresql_migration/postgresql-migration-helm-chart/values.yaml @@ -20,3 +20,6 @@ system_settings: default_dataset_id: "ALL" max_event_size: 1048576 dedup_period: 604800 # In seconds (7 days) + +postgresql_hms_user_password: "" +enable_lakehouse: false \ No newline at end of file diff --git a/terraform/modules/helm/postgresql_migration/postgresql_migration.yaml.tfpl b/terraform/modules/helm/postgresql_migration/postgresql_migration.yaml.tfpl index 3792a126..427aee2e 100644 --- a/terraform/modules/helm/postgresql_migration/postgresql_migration.yaml.tfpl +++ b/terraform/modules/helm/postgresql_migration/postgresql_migration.yaml.tfpl @@ -12,3 +12,6 @@ postgresql_obsrv_user_password: "${postgresql_obsrv_user_password}" system_settings: encryption_key: "${data_encryption_key}" + +postgresql_hms_user_password: "${postgresql_hms_user_password}" +enable_lakehouse: ${enable_lakehouse} \ No newline at end of file diff --git a/terraform/modules/helm/postgresql_migration/variables.tf b/terraform/modules/helm/postgresql_migration/variables.tf index 9497ea3d..83090a62 100644 --- a/terraform/modules/helm/postgresql_migration/variables.tf +++ b/terraform/modules/helm/postgresql_migration/variables.tf @@ -83,3 +83,12 @@ variable "data_encryption_key" { type = string description = "Data encryption key. This is used to encrypt data in pipeline. This is a 32 character string." } + +variable "postgresql_hms_user_password" { + type = string + description = "Postgresql hms user password." +} +variable "enable_lakehouse" { + type = bool + description = "Toggle to install hudi components (hms, trino and flink job)" +} \ No newline at end of file diff --git a/terraform/modules/helm/trino/main.tf b/terraform/modules/helm/trino/main.tf new file mode 100644 index 00000000..3b75e8df --- /dev/null +++ b/terraform/modules/helm/trino/main.tf @@ -0,0 +1,21 @@ +resource "helm_release" "trino" { + name = var.trino_release_name + chart = "${path.module}/${var.trino_chart_path}" + namespace = var.trino_namespace + create_namespace = var.trino_create_namespace + depends_on = [var.trino_chart_depends_on] + force_update = true + cleanup_on_fail = true + atomic = true + values = [ + templatefile("${path.module}/${var.trino_custom_values_yaml}", + { + trino_namespace = var.trino_namespace + trino_image = var.trino_image + trino_workers_count = var.trino_workers_count + trino_service = var.trino_service + trino_catalogs = jsonencode({ for key, value in local.catalogs : key => join("\n", [for k, v in value : "${k}=${v}"]) }) + } + ) + ] +} diff --git a/terraform/modules/helm/trino/trino-helm-chart/Chart.yaml b/terraform/modules/helm/trino/trino-helm-chart/Chart.yaml new file mode 100644 index 00000000..8b1d8bd0 --- /dev/null +++ b/terraform/modules/helm/trino/trino-helm-chart/Chart.yaml @@ -0,0 +1,14 @@ +apiVersion: v2 +appVersion: "432" +description: Fast distributed SQL query engine for big data analytics that helps you + explore your data universe +home: https://trino.io/ +icon: https://trino.io/assets/trino.png +maintainers: +- name: Trino community +name: trino +sources: +- https://github.com/trinodb/charts +- https://github.com/trinodb/trino/tree/master/core/docker +type: application +version: 0.19.0 diff --git a/terraform/modules/helm/trino/trino-helm-chart/README.md b/terraform/modules/helm/trino/trino-helm-chart/README.md new file mode 100644 index 00000000..44465278 --- /dev/null +++ b/terraform/modules/helm/trino/trino-helm-chart/README.md @@ -0,0 +1,115 @@ + +Trino +=========== + +Fast distributed SQL query engine for big data analytics that helps you explore your data universe + + +## Configuration + +The following table lists the configurable parameters of the Trino chart and their default values. + +| Parameter | Description | Default | +| ------------------------ | ----------------------- | -------------- | +| `image.registry` | Image registry, defaults to empty, which results in DockerHub usage | `""` | +| `image.repository` | Repository location of the Trino image, typically `organization/imagename` | `"trinodb/trino"` | +| `image.tag` | Image tag, defaults to the Trino release version specified as `appVersion` from Chart.yaml | `""` | +| `image.digest` | Optional digest value of the image specified as `sha256:abcd...`. A specified value overrides `tag`. | `""` | +| `image.useRepositoryAsSoleImageReference` | When true, only the content in `repository` is used as image reference | `false` | +| `image.pullPolicy` | | `"IfNotPresent"` | +| `imagePullSecrets` | | `[{"name": "registry-credentials"}]` | +| `server.workers` | | `2` | +| `server.node.environment` | | `"production"` | +| `server.node.dataDir` | | `"/data/trino"` | +| `server.node.pluginDir` | | `"/usr/lib/trino/plugin"` | +| `server.log.trino.level` | | `"INFO"` | +| `server.config.path` | | `"/etc/trino"` | +| `server.config.http.port` | | `8080` | +| `server.config.https.enabled` | | `false` | +| `server.config.https.port` | | `8443` | +| `server.config.https.keystore.path` | | `""` | +| `server.config.authenticationType` | | `""` | +| `server.config.query.maxMemory` | | `"4GB"` | +| `server.exchangeManager.name` | | `"filesystem"` | +| `server.exchangeManager.baseDir` | | `"/tmp/trino-local-file-system-exchange-manager"` | +| `server.workerExtraConfig` | | `""` | +| `server.coordinatorExtraConfig` | | `""` | +| `server.autoscaling.enabled` | | `false` | +| `server.autoscaling.maxReplicas` | | `5` | +| `server.autoscaling.targetCPUUtilizationPercentage` | | `50` | +| `server.autoscaling.behavior` | | `{}` | +| `accessControl` | | `{}` | +| `resourceGroups` | | `{}` | +| `additionalNodeProperties` | | `{}` | +| `additionalConfigProperties` | | `{}` | +| `additionalLogProperties` | | `{}` | +| `additionalExchangeManagerProperties` | | `{}` | +| `eventListenerProperties` | | `{}` | +| `additionalCatalogs` | | `{}` | +| `env` | | `[]` | +| `envFrom` | | `[]` | +| `initContainers` | | `{}` | +| `sidecarContainers` | | `{}` | +| `securityContext.runAsUser` | | `1000` | +| `securityContext.runAsGroup` | | `1000` | +| `shareProcessNamespace.coordinator` | | `false` | +| `shareProcessNamespace.worker` | | `false` | +| `service.type` | | `"ClusterIP"` | +| `service.port` | | `8080` | +| `auth` | | `{}` | +| `serviceAccount.create` | | `false` | +| `serviceAccount.name` | | `""` | +| `serviceAccount.annotations` | | `{}` | +| `secretMounts` | | `[]` | +| `coordinator.jvm.maxHeapSize` | | `"8G"` | +| `coordinator.jvm.gcMethod.type` | | `"UseG1GC"` | +| `coordinator.jvm.gcMethod.g1.heapRegionSize` | | `"32M"` | +| `coordinator.config.memory.heapHeadroomPerNode` | | `""` | +| `coordinator.config.query.maxMemoryPerNode` | | `"1GB"` | +| `coordinator.additionalJVMConfig` | | `{}` | +| `coordinator.additionalExposedPorts` | | `{}` | +| `coordinator.resources` | | `{}` | +| `coordinator.livenessProbe` | | `{}` | +| `coordinator.readinessProbe` | | `{}` | +| `coordinator.nodeSelector` | | `{}` | +| `coordinator.tolerations` | | `[]` | +| `coordinator.affinity` | | `{}` | +| `coordinator.additionalConfigFiles` | | `{}` | +| `coordinator.additionalVolumes` | One or more additional volumes to add to the coordinator. | `[]` | +| `coordinator.additionalVolumeMounts` | One or more additional volume mounts to add to the coordinator. | `[]` | +| `coordinator.annotations` | | `{}` | +| `coordinator.labels` | | `{}` | +| `coordinator.secretMounts` | | `[]` | +| `worker.jvm.maxHeapSize` | | `"8G"` | +| `worker.jvm.gcMethod.type` | | `"UseG1GC"` | +| `worker.jvm.gcMethod.g1.heapRegionSize` | | `"32M"` | +| `worker.config.memory.heapHeadroomPerNode` | | `""` | +| `worker.config.query.maxMemoryPerNode` | | `"1GB"` | +| `worker.additionalJVMConfig` | | `{}` | +| `worker.additionalExposedPorts` | | `{}` | +| `worker.resources` | | `{}` | +| `worker.livenessProbe` | | `{}` | +| `worker.readinessProbe` | | `{}` | +| `worker.nodeSelector` | | `{}` | +| `worker.tolerations` | | `[]` | +| `worker.affinity` | | `{}` | +| `worker.additionalConfigFiles` | | `{}` | +| `worker.additionalVolumes` | One or more additional volume mounts to add to all workers. | `[]` | +| `worker.additionalVolumeMounts` | One or more additional volume mounts to add to all workers. | `[]` | +| `worker.annotations` | | `{}` | +| `worker.labels` | | `{}` | +| `worker.secretMounts` | | `[]` | +| `kafka.mountPath` | | `"/etc/trino/schemas"` | +| `kafka.tableDescriptions` | | `{}` | +| `commonLabels` | Labels that get applied to every resource's metadata | `{}` | +| `ingress.enabled` | | `false` | +| `ingress.className` | | `""` | +| `ingress.annotations` | | `{}` | +| `ingress.hosts` | | `[]` | +| `ingress.tls` | | `[]` | + + + +--- +_Documentation generated by [Frigate](https://frigate.readthedocs.io)._ + diff --git a/terraform/modules/helm/trino/trino-helm-chart/ci/custom-values.yaml b/terraform/modules/helm/trino/trino-helm-chart/ci/custom-values.yaml new file mode 100644 index 00000000..2a83c54a --- /dev/null +++ b/terraform/modules/helm/trino/trino-helm-chart/ci/custom-values.yaml @@ -0,0 +1,19 @@ +# All custom values belong here during testing. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +coordinator: + jvm: + maxHeapSize: "8G" + gcMethod: + type: "UseG1GC" + g1: + heapRegionSize: "32M" + +worker: + jvm: + maxHeapSize: "8G" + gcMethod: + type: "UseG1GC" + g1: + heapRegionSize: "32M" diff --git a/terraform/modules/helm/trino/trino-helm-chart/templates/NOTES.txt b/terraform/modules/helm/trino/trino-helm-chart/templates/NOTES.txt new file mode 100644 index 00000000..251ff875 --- /dev/null +++ b/terraform/modules/helm/trino/trino-helm-chart/templates/NOTES.txt @@ -0,0 +1,10 @@ +Get the application URL by running these commands: +{{- if contains "NodePort" .Values.service.type }} + export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ template "trino.fullname" . }}) + export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT +{{- else if contains "ClusterIP" .Values.service.type }} + export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app={{ template "trino.name" . }},release={{ .Release.Name }},component=coordinator" -o jsonpath="{.items[0].metadata.name}") + echo "Visit http://127.0.0.1:8080 to use your application" + kubectl port-forward $POD_NAME 8080:8080 +{{- end }} diff --git a/terraform/modules/helm/trino/trino-helm-chart/templates/_helpers.tpl b/terraform/modules/helm/trino/trino-helm-chart/templates/_helpers.tpl new file mode 100644 index 00000000..a5ddc1b4 --- /dev/null +++ b/terraform/modules/helm/trino/trino-helm-chart/templates/_helpers.tpl @@ -0,0 +1,122 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "trino.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "trino.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "trino.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{- define "trino.coordinator" -}} +{{- if .Values.coordinatorNameOverride }} +{{- .Values.coordinatorNameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }}-coordinator +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}-coordinator +{{- end }} +{{- end }} +{{- end }} + +{{- define "trino.worker" -}} +{{- if .Values.workerNameOverride }} +{{- .Values.workerNameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }}-worker +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}-worker +{{- end }} +{{- end }} +{{- end }} + + +{{- define "trino.catalog" -}} +{{ template "trino.fullname" . }}-catalog +{{- end -}} + +{{/* +Common labels +*/}} +{{- define "trino.labels" -}} +helm.sh/chart: {{ include "trino.chart" . }} +{{ include "trino.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "trino.selectorLabels" -}} +app.kubernetes.io/name: {{ include "trino.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "trino.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "trino.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + +{{/* +Return the proper image name +{{ include "trino.image" . }} + +Code is inspired from bitnami/common + +*/}} +{{- define "trino.image" -}} +{{- $repositoryName := .Values.image.repository -}} +{{- if .Values.image.useRepositoryAsSoleImageReference -}} + {{- printf "%s" $repositoryName -}} +{{- else -}} + {{- $repositoryName := .Values.image.repository -}} + {{- $registryName := .Values.image.registry -}} + {{- $separator := ":" -}} + {{- $termination := (default .Chart.AppVersion .Values.image.tag) | toString -}} + {{- if .Values.image.digest }} + {{- $separator = "@" -}} + {{- $termination = .Values.image.digest | toString -}} + {{- end -}} + {{- if $registryName }} + {{- printf "%s/%s%s%s" $registryName $repositoryName $separator $termination -}} + {{- else -}} + {{- printf "%s%s%s" $repositoryName $separator $termination -}} + {{- end -}} +{{- end -}} +{{- end -}} diff --git a/terraform/modules/helm/trino/trino-helm-chart/templates/autoscaler.yaml b/terraform/modules/helm/trino/trino-helm-chart/templates/autoscaler.yaml new file mode 100644 index 00000000..7476b3ab --- /dev/null +++ b/terraform/modules/helm/trino/trino-helm-chart/templates/autoscaler.yaml @@ -0,0 +1,28 @@ +{{- if .Values.server.autoscaling.enabled -}} +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ template "trino.worker" . }} + {{- if .Values.commonLabels }} + labels: + {{- tpl (toYaml .Values.commonLabels) . | nindent 4 }} + {{- end }} +spec: + maxReplicas: {{ .Values.server.autoscaling.maxReplicas }} + minReplicas: {{ .Values.server.workers }} + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ template "trino.worker" . }} + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: {{ .Values.server.autoscaling.targetCPUUtilizationPercentage }} + {{ if .Values.server.autoscaling.behavior -}} + behavior: + {{- toYaml .Values.server.autoscaling.behavior | nindent 4 }} + {{- end }} +{{- end }} diff --git a/terraform/modules/helm/trino/trino-helm-chart/templates/configmap-catalog.yaml b/terraform/modules/helm/trino/trino-helm-chart/templates/configmap-catalog.yaml new file mode 100644 index 00000000..9c1fea67 --- /dev/null +++ b/terraform/modules/helm/trino/trino-helm-chart/templates/configmap-catalog.yaml @@ -0,0 +1,26 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "trino.catalog" . }} + labels: + app: {{ template "trino.name" . }} + chart: {{ template "trino.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + role: catalogs + {{- if .Values.commonLabels }} + {{- tpl (toYaml .Values.commonLabels) . | nindent 4 }} + {{- end }} +data: + tpch.properties: | + connector.name=tpch + tpch.splits-per-node=4 + tpcds.properties: | + connector.name=tpcds + tpcds.splits-per-node=4 +{{- range $catalogName, $catalogProperties := .Values.additionalCatalogs }} + {{ $catalogName }}.properties: | + {{- $catalogProperties | nindent 4 }} +{{- end }} + + diff --git a/terraform/modules/helm/trino/trino-helm-chart/templates/configmap-coordinator.yaml b/terraform/modules/helm/trino/trino-helm-chart/templates/configmap-coordinator.yaml new file mode 100644 index 00000000..ba88283e --- /dev/null +++ b/terraform/modules/helm/trino/trino-helm-chart/templates/configmap-coordinator.yaml @@ -0,0 +1,173 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "trino.coordinator" . }} + labels: + app: {{ template "trino.name" . }} + chart: {{ template "trino.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + component: coordinator + {{- if .Values.commonLabels }} + {{- tpl (toYaml .Values.commonLabels) . | nindent 4 }} + {{- end }} +data: + node.properties: | + node.environment={{ .Values.server.node.environment }} + node.data-dir={{ .Values.server.node.dataDir }} + plugin.dir={{ .Values.server.node.pluginDir }} + {{- range $configValue := .Values.additionalNodeProperties }} + {{ $configValue }} + {{- end }} + + jvm.config: | + -server + -agentpath:/usr/lib/trino/bin/libjvmkill.so + -Xmx{{ .Values.coordinator.jvm.maxHeapSize }} + -XX:+{{ .Values.coordinator.jvm.gcMethod.type }} + -XX:G1HeapRegionSize={{ .Values.coordinator.jvm.gcMethod.g1.heapRegionSize }} + -XX:+ExplicitGCInvokesConcurrent + -XX:+HeapDumpOnOutOfMemoryError + -XX:+ExitOnOutOfMemoryError + -XX:-OmitStackTraceInFastThrow + -XX:ReservedCodeCacheSize=512M + -XX:PerMethodRecompilationCutoff=10000 + -XX:PerBytecodeRecompilationCutoff=10000 + -Djdk.attach.allowAttachSelf=true + -Djdk.nio.maxCachedBufferSize=2000000 + -XX:+UnlockDiagnosticVMOptions + # Reduce starvation of threads by GClocker, recommend to set about the number of cpu cores (JDK-8192647) + -XX:GCLockerRetryAllocationCount=32 + {{- range $configValue := .Values.coordinator.additionalJVMConfig }} + {{ $configValue }} + {{- end }} + + config.properties: | + coordinator=true +{{- if gt (int .Values.server.workers) 0 }} + node-scheduler.include-coordinator=false +{{- else }} + node-scheduler.include-coordinator=true +{{- end }} + http-server.http.port={{ .Values.service.port }} + query.max-memory={{ .Values.server.config.query.maxMemory }} + query.max-memory-per-node={{ .Values.coordinator.config.query.maxMemoryPerNode }} +{{- if .Values.coordinator.config.memory.heapHeadroomPerNode }} + memory.heap-headroom-per-node={{ .Values.coordinator.config.memory.heapHeadroomPerNode }} +{{- end }} + discovery.uri=http://localhost:{{ .Values.service.port }} +{{- if .Values.server.config.authenticationType }} + http-server.authentication.type={{ .Values.server.config.authenticationType }} +{{- end }} + {{- range $configValue := .Values.additionalConfigProperties }} + {{ $configValue }} + {{- end }} + {{- if .Values.server.config.https.enabled }} + http-server.https.enabled=true + http-server.https.port={{ .Values.server.config.https.port }} + http-server.https.keystore.path={{ .Values.server.config.https.keystore.path }} + {{- end }} + {{ .Values.server.coordinatorExtraConfig | indent 4 }} + +{{- if .Values.accessControl }}{{- if eq .Values.accessControl.type "configmap" }} + access-control.properties: | + access-control.name=file + {{- if .Values.accessControl.refreshPeriod }} + security.refresh-period={{ .Values.accessControl.refreshPeriod }} + {{- end }} + security.config-file={{ .Values.server.config.path }}/access-control/{{ .Values.accessControl.configFile | default "rules.json" }} +{{- end }}{{- end }} + +{{- if .Values.resourceGroups }} + resource-groups.properties: | + resource-groups.configuration-manager=file + resource-groups.config-file={{ .Values.server.config.path }}/resource-groups/resource-groups.json +{{- end }} + + exchange-manager.properties: | + exchange-manager.name={{ .Values.server.exchangeManager.name }} + {{ if eq .Values.server.exchangeManager.name "filesystem" }} + exchange.base-directories={{ .Values.server.exchangeManager.baseDir }} + {{- end }} + {{- range $configValue := .Values.additionalExchangeManagerProperties }} + {{ $configValue }} + {{- end }} + + log.properties: | + io.trino={{ .Values.server.log.trino.level }} + {{- range $configValue := .Values.additionalLogProperties }} + {{ $configValue }} + {{- end }} + + {{- if contains "PASSWORD" .Values.server.config.authenticationType }} + password-authenticator.properties: | + password-authenticator.name=file + file.password-file={{ .Values.server.config.path }}/auth/password.db + {{- end }} + {{- if .Values.auth.groups }}{{- if not (index .Values.coordinator.additionalConfigFiles "group-provider.properties") }} + group-provider.properties: | + group-provider.name=file + file.group-file={{ .Values.server.config.path }}/auth/group.db + {{- if .Values.auth.refreshPeriod }} + file.refresh-period={{ .Values.auth.refreshPeriod }} + {{- end }} + {{- end }}{{- end }} + +{{ if .Values.eventListenerProperties }} + event-listener.properties: | + {{- range $configValue := .Values.eventListenerProperties }} + {{ $configValue }} + {{- end }} +{{ end }} + +{{- range $fileName, $fileContent := .Values.coordinator.additionalConfigFiles }} + {{ $fileName }}: | + {{- $fileContent | nindent 4 }} +{{- end }} + +--- + +{{- if .Values.accessControl }}{{- if eq .Values.accessControl.type "configmap" }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: trino-access-control-volume-coordinator + labels: + {{- include "trino.labels" . | nindent 4 }} + app.kubernetes.io/component: coordinator +data: + {{- range $key, $val := .Values.accessControl.rules }} + {{ $key }}: {{ $val | quote }} + {{- end }} +{{- end }}{{- end }} + +--- + +{{- if .Values.resourceGroups }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: trino-resource-groups-volume-coordinator + labels: + {{- include "trino.labels" . | nindent 4 }} + app.kubernetes.io/component: coordinator +data: + resource-groups.json: |- + {{- .Values.resourceGroups.resourceGroupsConfig | nindent 4 }} +{{- end }} + +--- + +apiVersion: v1 +kind: ConfigMap +metadata: + name: schemas-volume-coordinator + labels: + {{- include "trino.labels" . | nindent 4 }} + app.kubernetes.io/component: coordinator +data: + {{- range $key, $val := .Values.kafka.tableDescriptions }} + {{ $key }}: {{ $val | quote }} + {{- end }} + +--- diff --git a/terraform/modules/helm/trino/trino-helm-chart/templates/configmap-worker.yaml b/terraform/modules/helm/trino/trino-helm-chart/templates/configmap-worker.yaml new file mode 100644 index 00000000..03617a9b --- /dev/null +++ b/terraform/modules/helm/trino/trino-helm-chart/templates/configmap-worker.yaml @@ -0,0 +1,100 @@ +{{- if gt (int .Values.server.workers) 0 }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "trino.worker" . }} + labels: + app: {{ template "trino.name" . }} + chart: {{ template "trino.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + component: worker + {{- if .Values.commonLabels }} + {{- tpl (toYaml .Values.commonLabels) . | nindent 4 }} + {{- end }} +data: + node.properties: | + node.environment={{ .Values.server.node.environment }} + node.data-dir={{ .Values.server.node.dataDir }} + plugin.dir={{ .Values.server.node.pluginDir }} + {{- range $configValue := .Values.additionalNodeProperties }} + {{ $configValue }} + {{- end }} + + jvm.config: | + -server + -agentpath:/usr/lib/trino/bin/libjvmkill.so + -Xmx{{ .Values.worker.jvm.maxHeapSize }} + -XX:+{{ .Values.worker.jvm.gcMethod.type }} + -XX:G1HeapRegionSize={{ .Values.worker.jvm.gcMethod.g1.heapRegionSize }} + -XX:+ExplicitGCInvokesConcurrent + -XX:+HeapDumpOnOutOfMemoryError + -XX:+ExitOnOutOfMemoryError + -XX:-OmitStackTraceInFastThrow + -XX:ReservedCodeCacheSize=512M + -XX:PerMethodRecompilationCutoff=10000 + -XX:PerBytecodeRecompilationCutoff=10000 + -Djdk.attach.allowAttachSelf=true + -Djdk.nio.maxCachedBufferSize=2000000 + -XX:+UnlockDiagnosticVMOptions + # Reduce starvation of threads by GClocker, recommend to set about the number of cpu cores (JDK-8192647) + -XX:GCLockerRetryAllocationCount=32 + {{- range $configValue := .Values.worker.additionalJVMConfig }} + {{ $configValue }} + {{- end }} + + config.properties: | + coordinator=false + http-server.http.port={{ .Values.service.port }} + query.max-memory={{ .Values.server.config.query.maxMemory }} + query.max-memory-per-node={{ .Values.worker.config.query.maxMemoryPerNode }} + {{- if .Values.worker.config.memory.heapHeadroomPerNode }} + memory.heap-headroom-per-node={{ .Values.worker.config.memory.heapHeadroomPerNode }} + {{- end }} + discovery.uri=http://{{ template "trino.fullname" . }}:{{ .Values.service.port }} + {{- range $configValue := .Values.additionalConfigProperties }} + {{ $configValue }} + {{- end }} + {{ .Values.server.workerExtraConfig | indent 4 }} + + exchange-manager.properties: | + exchange-manager.name={{ .Values.server.exchangeManager.name }} + {{ if eq .Values.server.exchangeManager.name "filesystem" }} + exchange.base-directories={{ .Values.server.exchangeManager.baseDir }} + {{- end }} + {{- range $configValue := .Values.additionalExchangeManagerProperties }} + {{ $configValue }} + {{- end }} + + log.properties: | + io.trino={{ .Values.server.log.trino.level }} + {{- range $configValue := .Values.additionalLogProperties }} + {{ $configValue }} + {{- end }} + +{{ if .Values.eventListenerProperties }} + event-listener.properties: | + {{- range $configValue := .Values.eventListenerProperties }} + {{ $configValue }} + {{- end }} +{{ end }} + +{{- range $fileName, $fileContent := .Values.worker.additionalConfigFiles }} + {{ $fileName }}: | + {{- $fileContent | nindent 4 }} +{{- end }} + +--- + +apiVersion: v1 +kind: ConfigMap +metadata: + name: schemas-volume-worker + labels: + {{- include "trino.labels" . | nindent 4 }} + app.kubernetes.io/component: worker +data: + {{- range $key, $val := .Values.kafka.tableDescriptions }} + {{ $key }}: {{ $val | quote }} + {{- end }} +{{- end }} diff --git a/terraform/modules/helm/trino/trino-helm-chart/templates/deployment-coordinator.yaml b/terraform/modules/helm/trino/trino-helm-chart/templates/deployment-coordinator.yaml new file mode 100644 index 00000000..844be4e7 --- /dev/null +++ b/terraform/modules/helm/trino/trino-helm-chart/templates/deployment-coordinator.yaml @@ -0,0 +1,184 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "trino.coordinator" . }} + labels: + app: {{ template "trino.name" . }} + chart: {{ template "trino.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + component: coordinator + {{- if .Values.commonLabels }} + {{- tpl (toYaml .Values.commonLabels) . | nindent 4 }} + {{- end }} +spec: + selector: + matchLabels: + app: {{ template "trino.name" . }} + release: {{ .Release.Name }} + component: coordinator + template: + metadata: + {{- if .Values.coordinator.annotations }} + annotations: + {{- tpl (toYaml .Values.coordinator.annotations) . | nindent 8 }} + {{- end }} + labels: + app: {{ template "trino.name" . }} + release: {{ .Release.Name }} + component: coordinator + {{- if .Values.coordinator.labels }} + {{- tpl (toYaml .Values.coordinator.labels) . | nindent 8 }} + {{- end }} + {{- if .Values.commonLabels }} + {{- tpl (toYaml .Values.commonLabels) . | nindent 8 }} + {{- end }} + spec: + serviceAccountName: {{ include "trino.serviceAccountName" . }} + {{- with .Values.securityContext }} + securityContext: + runAsUser: {{ .runAsUser }} + runAsGroup: {{ .runAsGroup }} + {{- end }} + {{- if .Values.shareProcessNamespace.coordinator }} + shareProcessNamespace: {{ .Values.shareProcessNamespace.coordinator }} + {{- end }} + volumes: + - name: config-volume + configMap: + name: {{ template "trino.coordinator" . }} + - name: catalog-volume + configMap: + name: {{ template "trino.catalog" . }} + - name: schemas-volume + configMap: + name: schemas-volume-coordinator + {{- if .Values.accessControl }}{{- if eq .Values.accessControl.type "configmap" }} + - name: access-control-volume + configMap: + name: trino-access-control-volume-coordinator + {{- end }}{{- end }} + {{- if .Values.resourceGroups }} + - name: resource-groups-volume + configMap: + name: trino-resource-groups-volume-coordinator + {{- end }} + {{- if or .Values.auth.passwordAuth .Values.auth.groups }} + - name: file-authentication-volume + secret: + {{- if and .Values.auth .Values.auth.passwordAuthSecret }} + secretName: {{ .Values.auth.passwordAuthSecret }} + {{- else }} + secretName: trino-file-authentication + {{- end }} + items: + {{- if .Values.auth.passwordAuth }} + - key: password.db + path: password.db + {{- end }} + {{- if .Values.auth.groups }} + - key: group.db + path: group.db + {{- end }} + {{- end }} + {{- range .Values.secretMounts }} + - name: {{ .name }} + secret: + secretName: {{ .secretName }} + {{- end }} + {{- range .Values.coordinator.secretMounts }} + - name: {{ .name }} + secret: + secretName: {{ .secretName }} + {{- end }} + {{- with .Values.coordinator.additionalVolumes }} + {{- . | toYaml | nindent 8 }} + {{- end }} + {{- if .Values.initContainers.coordinator }} + initContainers: + {{- tpl (toYaml .Values.initContainers.coordinator) . | nindent 6 }} + {{- end }} + imagePullSecrets: + {{- toYaml .Values.imagePullSecrets | nindent 8 }} + containers: + - name: {{ .Chart.Name }}-coordinator + image: {{ include "trino.image" . }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + env: + {{- toYaml .Values.env | nindent 12 }} + envFrom: + {{- toYaml .Values.envFrom | nindent 12 }} + volumeMounts: + - mountPath: {{ .Values.server.config.path }} + name: config-volume + - mountPath: {{ .Values.server.config.path }}/catalog + name: catalog-volume + - mountPath: {{ .Values.kafka.mountPath }} + name: schemas-volume + {{- if .Values.accessControl }}{{- if eq .Values.accessControl.type "configmap" }} + - mountPath: {{ .Values.server.config.path }}/access-control + name: access-control-volume + {{- end }}{{- end }} + {{- if .Values.resourceGroups }} + - mountPath: {{ .Values.server.config.path }}/resource-groups + name: resource-groups-volume + {{- end }} + {{- range .Values.secretMounts }} + - name: {{ .name }} + mountPath: {{ .path }} + {{- end }} + {{- range .Values.coordinator.secretMounts }} + - name: {{ .name }} + mountPath: {{ .path }} + {{- end }} + {{- if or .Values.auth.passwordAuth .Values.auth.groups }} + - mountPath: {{ .Values.server.config.path }}/auth + name: file-authentication-volume + {{- end }} + {{- with .Values.coordinator.additionalVolumeMounts }} + {{- . | toYaml | nindent 12 }} + {{- end }} + ports: + - name: http + containerPort: {{ .Values.service.port }} + protocol: TCP + {{- range $key, $value := .Values.coordinator.additionalExposedPorts }} + - name: {{ $value.name }} + containerPort: {{ $value.port }} + protocol: {{ $value.protocol }} + {{- end }} + livenessProbe: + httpGet: + path: /v1/info + port: http + initialDelaySeconds: {{ .Values.coordinator.livenessProbe.initialDelaySeconds | default 20 }} + periodSeconds: {{ .Values.coordinator.livenessProbe.periodSeconds | default 10 }} + timeoutSeconds: {{ .Values.coordinator.livenessProbe.timeoutSeconds | default 5 }} + failureThreshold: {{ .Values.coordinator.livenessProbe.failureThreshold | default 6 }} + successThreshold: {{ .Values.coordinator.livenessProbe.successThreshold | default 1 }} + readinessProbe: + httpGet: + path: /v1/info + port: http + initialDelaySeconds: {{ .Values.coordinator.readinessProbe.initialDelaySeconds | default 20 }} + periodSeconds: {{ .Values.coordinator.readinessProbe.periodSeconds | default 10 }} + timeoutSeconds: {{ .Values.coordinator.readinessProbe.timeoutSeconds | default 5 }} + failureThreshold: {{ .Values.coordinator.readinessProbe.failureThreshold | default 6 }} + successThreshold: {{ .Values.coordinator.readinessProbe.successThreshold | default 1 }} + resources: + {{- toYaml .Values.coordinator.resources | nindent 12 }} + {{- if .Values.sidecarContainers.coordinator }} + {{- toYaml .Values.sidecarContainers.coordinator | nindent 8 }} + {{- end }} + {{- with .Values.coordinator.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.coordinator.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.coordinator.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/terraform/modules/helm/trino/trino-helm-chart/templates/deployment-worker.yaml b/terraform/modules/helm/trino/trino-helm-chart/templates/deployment-worker.yaml new file mode 100644 index 00000000..5d9dd373 --- /dev/null +++ b/terraform/modules/helm/trino/trino-helm-chart/templates/deployment-worker.yaml @@ -0,0 +1,149 @@ +{{- if gt (int .Values.server.workers) 0 }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "trino.worker" . }} + labels: + app: {{ template "trino.name" . }} + chart: {{ template "trino.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + component: worker + {{- if .Values.commonLabels }} + {{- tpl (toYaml .Values.commonLabels) . | nindent 4 }} + {{- end }} +spec: + {{- if not .Values.server.autoscaling.enabled }} + replicas: {{ .Values.server.workers }} + {{- end }} + selector: + matchLabels: + app: {{ template "trino.name" . }} + release: {{ .Release.Name }} + component: worker + template: + metadata: + {{- if .Values.worker.annotations }} + annotations: + {{- tpl (toYaml .Values.worker.annotations) . | nindent 8 }} + {{- end }} + labels: + app: {{ template "trino.name" . }} + release: {{ .Release.Name }} + component: worker + {{- if .Values.worker.labels }} + {{- tpl (toYaml .Values.worker.labels) . | nindent 8 }} + {{- end }} + {{- if .Values.commonLabels }} + {{- tpl (toYaml .Values.commonLabels) . | nindent 8 }} + {{- end }} + spec: + serviceAccountName: {{ include "trino.serviceAccountName" . }} + {{- with .Values.securityContext }} + securityContext: + runAsUser: {{ .runAsUser }} + runAsGroup: {{ .runAsGroup }} + {{- end }} + {{- if .Values.shareProcessNamespace.worker }} + shareProcessNamespace: {{ .Values.shareProcessNamespace.worker }} + {{- end }} + volumes: + - name: config-volume + configMap: + name: {{ template "trino.worker" . }} + - name: catalog-volume + configMap: + name: {{ template "trino.catalog" . }} + - name: schemas-volume + configMap: + name: schemas-volume-worker + {{- range .Values.secretMounts }} + - name: {{ .name }} + secret: + secretName: {{ .secretName }} + {{- end }} + {{- range .Values.worker.secretMounts }} + - name: {{ .name }} + secret: + secretName: {{ .secretName }} + {{- end }} + {{- with .Values.worker.additionalVolumes }} + {{- . | toYaml | nindent 8 }} + {{- end }} + {{- if .Values.initContainers.worker }} + initContainers: + {{- tpl (toYaml .Values.initContainers.worker) . | nindent 6 }} + {{- end }} + imagePullSecrets: + {{- toYaml .Values.imagePullSecrets | nindent 8 }} + containers: + - name: {{ .Chart.Name }}-worker + image: {{ include "trino.image" . }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + env: + {{- toYaml .Values.env | nindent 12 }} + envFrom: + {{- toYaml .Values.envFrom | nindent 12 }} + volumeMounts: + - mountPath: {{ .Values.server.config.path }} + name: config-volume + - mountPath: {{ .Values.server.config.path }}/catalog + name: catalog-volume + - mountPath: {{ .Values.kafka.mountPath }} + name: schemas-volume + {{- range .Values.secretMounts }} + - name: {{ .name }} + mountPath: {{ .path }} + {{- end }} + {{- range .Values.worker.secretMounts }} + - name: {{ .name }} + mountPath: {{ .path }} + {{- end }} + {{- with .Values.worker.additionalVolumeMounts }} + {{- . | toYaml | nindent 12 }} + {{- end }} + ports: + - name: http + containerPort: {{ .Values.service.port }} + protocol: TCP + {{- range $key, $value := .Values.worker.additionalExposedPorts }} + - name: {{ $value.name }} + containerPort: {{ $value.port }} + protocol: {{ $value.protocol }} + {{- end }} + livenessProbe: + httpGet: + path: /v1/info + port: http + initialDelaySeconds: {{ .Values.worker.livenessProbe.initialDelaySeconds | default 20 }} + periodSeconds: {{ .Values.worker.livenessProbe.periodSeconds | default 10 }} + timeoutSeconds: {{ .Values.worker.livenessProbe.timeoutSeconds | default 5 }} + failureThreshold: {{ .Values.worker.livenessProbe.failureThreshold | default 6 }} + successThreshold: {{ .Values.worker.livenessProbe.successThreshold | default 1 }} + readinessProbe: + httpGet: + path: /v1/info + port: http + initialDelaySeconds: {{ .Values.worker.readinessProbe.initialDelaySeconds | default 20 }} + periodSeconds: {{ .Values.worker.readinessProbe.periodSeconds | default 10 }} + timeoutSeconds: {{ .Values.worker.readinessProbe.timeoutSeconds | default 5 }} + failureThreshold: {{ .Values.worker.readinessProbe.failureThreshold | default 6 }} + successThreshold: {{ .Values.worker.readinessProbe.successThreshold | default 1 }} + resources: + {{- toYaml .Values.worker.resources | nindent 12 }} + {{- if .Values.sidecarContainers.worker }} + {{- toYaml .Values.sidecarContainers.worker | nindent 8 }} + {{- end }} + {{- with .Values.worker.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.worker.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.worker.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/terraform/modules/helm/trino/trino-helm-chart/templates/ingress.yaml b/terraform/modules/helm/trino/trino-helm-chart/templates/ingress.yaml new file mode 100644 index 00000000..b7849748 --- /dev/null +++ b/terraform/modules/helm/trino/trino-helm-chart/templates/ingress.yaml @@ -0,0 +1,44 @@ +{{- if .Values.ingress.enabled -}} +{{- $fullName := include "trino.fullname" . -}} +{{- $svcPort := .Values.service.port -}} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ template "trino.coordinator" . }} + labels: + {{- include "trino.labels" . | nindent 4 }} + {{- if .Values.commonLabels }} + {{- tpl (toYaml .Values.commonLabels) . | nindent 4 }} + {{- end }} + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + ingressClassName: {{ .Values.ingress.className }} + {{- if .Values.ingress.tls }} + tls: + {{- range .Values.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ . | quote }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} + {{- end }} + rules: + {{- range .Values.ingress.hosts }} + - host: {{ .host | quote }} + http: + paths: + {{- range .paths }} + - path: {{ .path }} + pathType: {{ .pathType }} + backend: + service: + name: {{ $fullName }} + port: + number: {{ $svcPort }} + {{- end }} + {{- end }} +{{- end }} diff --git a/terraform/modules/helm/trino/trino-helm-chart/templates/secret.yaml b/terraform/modules/helm/trino/trino-helm-chart/templates/secret.yaml new file mode 100644 index 00000000..754353a5 --- /dev/null +++ b/terraform/modules/helm/trino/trino-helm-chart/templates/secret.yaml @@ -0,0 +1,19 @@ +{{- if or .Values.auth.passwordAuth .Values.auth.groups }} +apiVersion: v1 +kind: Secret +metadata: + {{- if and .Values.auth .Values.auth.passwordAuthSecret }} + name: {{ .Values.auth.passwordAuthSecret }} + {{- else }} + name: trino-file-authentication + {{- end }} + labels: + {{- include "trino.labels" . | nindent 4 }} +data: +{{- if .Values.auth.passwordAuth }} + password.db: {{ .Values.auth.passwordAuth | b64enc }} +{{- end }} +{{- if .Values.auth.groups}} + group.db: {{ .Values.auth.groups | b64enc }} +{{- end }} +{{- end }} diff --git a/terraform/modules/helm/trino/trino-helm-chart/templates/service.yaml b/terraform/modules/helm/trino/trino-helm-chart/templates/service.yaml new file mode 100644 index 00000000..cfb0b2f3 --- /dev/null +++ b/terraform/modules/helm/trino/trino-helm-chart/templates/service.yaml @@ -0,0 +1,26 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ template "trino.fullname" . }} + labels: + app: {{ template "trino.name" . }} + chart: {{ template "trino.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: http + protocol: TCP + name: http + {{- range $key, $value := .Values.coordinator.additionalExposedPorts }} + - port: {{ $value.servicePort }} + name: {{ $value.name }} + targetPort: {{ $value.port }} + protocol: {{ $value.protocol }} + {{- end }} + selector: + app: {{ template "trino.name" . }} + release: {{ .Release.Name }} + component: coordinator diff --git a/terraform/modules/helm/trino/trino-helm-chart/templates/serviceaccount.yaml b/terraform/modules/helm/trino/trino-helm-chart/templates/serviceaccount.yaml new file mode 100644 index 00000000..29dfdcc2 --- /dev/null +++ b/terraform/modules/helm/trino/trino-helm-chart/templates/serviceaccount.yaml @@ -0,0 +1,12 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "trino.serviceAccountName" . }} + labels: + {{- include "trino.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/terraform/modules/helm/trino/trino-helm-chart/values.yaml b/terraform/modules/helm/trino/trino-helm-chart/values.yaml new file mode 100644 index 00000000..05bc1020 --- /dev/null +++ b/terraform/modules/helm/trino/trino-helm-chart/values.yaml @@ -0,0 +1,448 @@ +# Default values for trino. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +image: + registry: "" # Image registry, defaults to empty, which results in DockerHub usage + repository: trinodb/trino # Repository location of the Trino image, typically `organization/imagename` + tag: "" # Image tag, defaults to the Trino release version specified as `appVersion` from Chart.yaml + digest: "" # Optional digest value of the image specified as `sha256:abcd...`. A specified value overrides `tag`. + useRepositoryAsSoleImageReference: false # When true, only the content in `repository` is used as image reference + pullPolicy: IfNotPresent + +imagePullSecrets: + - name: registry-credentials + +server: + workers: 1 + node: + environment: production + dataDir: /data/trino + pluginDir: /usr/lib/trino/plugin + log: + trino: + level: INFO + config: + path: /etc/trino + http: + port: 8080 + https: + enabled: false + port: 8443 + keystore: + path: "" + # Trino supports multiple authentication types: PASSWORD, CERTIFICATE, OAUTH2, JWT, KERBEROS + # For more info: https://trino.io/docs/current/security/authentication-types.html + authenticationType: "" + query: + maxMemory: "4GB" + exchangeManager: + name: "filesystem" + baseDir: "/tmp/trino-local-file-system-exchange-manager" + workerExtraConfig: "" + coordinatorExtraConfig: "" + autoscaling: + enabled: false + maxReplicas: 5 + targetCPUUtilizationPercentage: 50 + behavior: {} + # scaleDown: + # stabilizationWindowSeconds: 300 + # policies: + # - type: Percent + # value: 100 + # periodSeconds: 15 + # scaleUp: + # stabilizationWindowSeconds: 0 + # policies: + # - type: Percent + # value: 100 + # periodSeconds: 15 + # - type: Pods + # value: 4 + # periodSeconds: 15 + # selectPolicy: Max + +accessControl: {} + # type: configmap + # refreshPeriod: 60s + # # Rules file is mounted to /etc/trino/access-control + # configFile: "rules.json" + # rules: + # rules.json: |- + # { + # "catalogs": [ + # { + # "user": "admin", + # "catalog": "(mysql|system)", + # "allow": "all" + # }, + # { + # "group": "finance|human_resources", + # "catalog": "postgres", + # "allow": true + # }, + # { + # "catalog": "hive", + # "allow": "all" + # }, + # { + # "user": "alice", + # "catalog": "postgresql", + # "allow": "read-only" + # }, + # { + # "catalog": "system", + # "allow": "none" + # } + # ], + # "schemas": [ + # { + # "user": "admin", + # "schema": ".*", + # "owner": true + # }, + # { + # "user": "guest", + # "owner": false + # }, + # { + # "catalog": "default", + # "schema": "default", + # "owner": true + # } + # ] + # } + +resourceGroups: {} + # # Resource groups file is mounted to /etc/trino/resource-groups/resource-groups.json + # resourceGroupsConfig: |- + # { + # "rootGroups": [ + # { + # "name": "global", + # "softMemoryLimit": "80%", + # "hardConcurrencyLimit": 100, + # "maxQueued": 100, + # "schedulingPolicy": "fair", + # "jmxExport": true, + # "subGroups": [ + # { + # "name": "admin", + # "softMemoryLimit": "30%", + # "hardConcurrencyLimit": 20, + # "maxQueued": 10 + # }, + # { + # "name": "finance_human_resources", + # "softMemoryLimit": "20%", + # "hardConcurrencyLimit": 15, + # "maxQueued": 10 + # }, + # { + # "name": "general", + # "softMemoryLimit": "30%", + # "hardConcurrencyLimit": 20, + # "maxQueued": 10 + # }, + # { + # "name": "readonly", + # "softMemoryLimit": "10%", + # "hardConcurrencyLimit": 5, + # "maxQueued": 5 + # } + # ] + # } + # ], + # "selectors": [ + # { + # "user": "admin", + # "group": "global.admin" + # }, + # { + # "group": "finance|human_resources", + # "group": "global.finance_human_resources" + # }, + # { + # "user": "alice", + # "group": "global.readonly" + # }, + # { + # "group": "global.general" + # } + # ] + # } + +additionalNodeProperties: {} + +additionalConfigProperties: {} + +additionalLogProperties: {} + +additionalExchangeManagerProperties: {} + +eventListenerProperties: {} + +additionalCatalogs: + lakehouse: |- + connector.name=hudi + hive.metastore.uri=thrift://localhost:9083 + hive.s3.aws-access-key="" + hive.s3.aws-secret-key="" + hive.s3.endpoint=http://localhost:4566 + hive.s3.ssl.enabled=false + +# Array of EnvVar (https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.19/#envvar-v1-core) +env: [] + +# Array of EnvFromSource (https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.19/#envfromsource-v1-core) +envFrom: [] + +initContainers: {} + # coordinator: + # - name: init-coordinator + # image: busybox:1.28 + # imagePullPolicy: IfNotPresent + # command: ['sh', '-c', "until nslookup myservice.$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local; do echo waiting for myservice; sleep 2; done"] + # worker: + # - name: init-worker + # image: busybox:1.28 + # command: ['sh', '-c', 'echo The worker is running! && sleep 3600'] + +sidecarContainers: {} +# coordinator: +# - name: side-coordinator +# image: busybox:1.28 +# imagePullPolicy: IfNotPresent +# command: ['sleep', '1'] +# worker: +# - name: side-worker +# image: busybox:1.28 +# imagePullPolicy: IfNotPresent +# command: ['sleep', '1'] + +securityContext: + runAsUser: 1000 + runAsGroup: 1000 + +shareProcessNamespace: + coordinator: false + worker: false + +service: + type: ClusterIP + port: 8080 + +auth: {} + # Set username and password + # https://trino.io/docs/current/security/password-file.html#file-format + # passwordAuth: "username:encrypted-password-with-htpasswd" + # or set the name of a secret containing this file in the password.db key + # passwordAuthSecret: "trino-password-authentication" + # Set users' groups + # https://trino.io/docs/current/security/group-file.html#file-format + # refreshPeriod: 5s + # groups: "group_name:user_1,user_2,user_3" + +serviceAccount: + # Specifies whether a service account should be created + create: false + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: "" + # Annotations to add to the service account + annotations: {} + +secretMounts: [] + # - name: sample-secret + # secretName: sample-secret + # path: /secrets/sample.json + +coordinator: + jvm: + maxHeapSize: "8G" + gcMethod: + type: "UseG1GC" + g1: + heapRegionSize: "32M" + + config: + memory: + heapHeadroomPerNode: "" + query: + maxMemoryPerNode: "1GB" + + additionalJVMConfig: {} + + additionalExposedPorts: {} + + resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + + livenessProbe: {} + # initialDelaySeconds: 20 + # periodSeconds: 10 + # timeoutSeconds: 5 + # failureThreshold: 6 + # successThreshold: 1 + readinessProbe: {} + # initialDelaySeconds: 20 + # periodSeconds: 10 + # timeoutSeconds: 5 + # failureThreshold: 6 + # successThreshold: 1 + + nodeSelector: {} + + tolerations: [] + + affinity: {} + + additionalConfigFiles: {} + + additionalVolumes: [] # One or more additional volumes to add to the coordinator. + # - name: extras + # emptyDir: {} + + additionalVolumeMounts: [] # One or more additional volume mounts to add to the coordinator. + # - name: extras + # mountPath: /usr/share/extras + # readOnly: true + + annotations: {} + + labels: {} + + secretMounts: [] + # - name: sample-secret + # secretName: sample-secret + # path: /secrets/sample.json + +worker: + jvm: + maxHeapSize: "8G" + gcMethod: + type: "UseG1GC" + g1: + heapRegionSize: "32M" + + config: + memory: + heapHeadroomPerNode: "" + query: + maxMemoryPerNode: "1GB" + + additionalJVMConfig: {} + + additionalExposedPorts: {} + + resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + + livenessProbe: {} + # initialDelaySeconds: 20 + # periodSeconds: 10 + # timeoutSeconds: 5 + # failureThreshold: 6 + # successThreshold: 1 + readinessProbe: {} + # initialDelaySeconds: 20 + # periodSeconds: 10 + # timeoutSeconds: 5 + # failureThreshold: 6 + # successThreshold: 1 + + nodeSelector: {} + + tolerations: [] + + affinity: {} + + additionalConfigFiles: {} + + additionalVolumes: [] # One or more additional volume mounts to add to all workers. + # - name: extras + # emptyDir: {} + + additionalVolumeMounts: [] # One or more additional volume mounts to add to all workers. + # - name: extras + # mountPath: /usr/share/extras + # readOnly: true + + annotations: {} + + labels: {} + + secretMounts: [] + # - name: sample-secret + # secretName: sample-secret + # path: /secrets/sample.json + +kafka: + mountPath: "/etc/trino/schemas" + tableDescriptions: {} + # Custom kafka table descriptions that will be mounted in mountPath + # testschema.json: |- + # { + # "tableName": "testtable", + # "schemaName": "testschema", + # "topicName": "testtopic", + # "key": { + # "dataFormat": "json", + # "fields": [ + # { + # "name": "_key", + # "dataFormat": "VARCHAR", + # "type": "VARCHAR", + # "hidden": "false" + # } + # ] + # }, + # "message": { + # "dataFormat": "json", + # "fields": [ + # { + # "name": "id", + # "mapping": "id", + # "type": "BIGINT" + # }, + # { + # "name": "test_field", + # "mapping": "test_field", + # "type": "VARCHAR" + # } + # ] + # } + # } +commonLabels: {} # Labels that get applied to every resource's metadata +ingress: + enabled: false + className: "" + annotations: {} + hosts: [] + # - host: trino.example.com + # paths: + # - path: / + # pathType: ImplementationSpecific + tls: [] + # - secretName: chart-example-tls + # hosts: + # - chart-example.local diff --git a/terraform/modules/helm/trino/trino.yaml.tfpl b/terraform/modules/helm/trino/trino.yaml.tfpl new file mode 100644 index 00000000..28970425 --- /dev/null +++ b/terraform/modules/helm/trino/trino.yaml.tfpl @@ -0,0 +1,441 @@ +# Default values for trino. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +image: + registry: ${trino_image.registry} # Image registry, defaults to empty, which results in DockerHub usage + repository: ${trino_image.name} # Repository location of the Trino image, typically `organization/imagename` + tag: ${trino_image.tag} # Image tag, defaults to the Trino release version specified as `appVersion` from Chart.yaml + digest: "" # Optional digest value of the image specified as `sha256:abcd...`. A specified value overrides `tag`. + useRepositoryAsSoleImageReference: false # When true, only the content in `repository` is used as image reference + pullPolicy: ${trino_image.pullPolicy} + +imagePullSecrets: + - name: registry-credentials + +server: + workers: ${trino_workers_count} + node: + environment: production + dataDir: /data/trino + pluginDir: /usr/lib/trino/plugin + log: + trino: + level: INFO + config: + path: /etc/trino + http: + port: 8080 + https: + enabled: false + port: 8443 + keystore: + path: "" + # Trino supports multiple authentication types: PASSWORD, CERTIFICATE, OAUTH2, JWT, KERBEROS + # For more info: https://trino.io/docs/current/security/authentication-types.html + authenticationType: "" + query: + maxMemory: "4GB" + exchangeManager: + name: "filesystem" + baseDir: "/tmp/trino-local-file-system-exchange-manager" + workerExtraConfig: "" + coordinatorExtraConfig: "" + autoscaling: + enabled: false + maxReplicas: 5 + targetCPUUtilizationPercentage: 50 + behavior: {} + # scaleDown: + # stabilizationWindowSeconds: 300 + # policies: + # - type: Percent + # value: 100 + # periodSeconds: 15 + # scaleUp: + # stabilizationWindowSeconds: 0 + # policies: + # - type: Percent + # value: 100 + # periodSeconds: 15 + # - type: Pods + # value: 4 + # periodSeconds: 15 + # selectPolicy: Max + +accessControl: {} + # type: configmap + # refreshPeriod: 60s + # # Rules file is mounted to /etc/trino/access-control + # configFile: "rules.json" + # rules: + # rules.json: |- + # { + # "catalogs": [ + # { + # "user": "admin", + # "catalog": "(mysql|system)", + # "allow": "all" + # }, + # { + # "group": "finance|human_resources", + # "catalog": "postgres", + # "allow": true + # }, + # { + # "catalog": "hive", + # "allow": "all" + # }, + # { + # "user": "alice", + # "catalog": "postgresql", + # "allow": "read-only" + # }, + # { + # "catalog": "system", + # "allow": "none" + # } + # ], + # "schemas": [ + # { + # "user": "admin", + # "schema": ".*", + # "owner": true + # }, + # { + # "user": "guest", + # "owner": false + # }, + # { + # "catalog": "default", + # "schema": "default", + # "owner": true + # } + # ] + # } + +resourceGroups: {} + # # Resource groups file is mounted to /etc/trino/resource-groups/resource-groups.json + # resourceGroupsConfig: |- + # { + # "rootGroups": [ + # { + # "name": "global", + # "softMemoryLimit": "80%", + # "hardConcurrencyLimit": 100, + # "maxQueued": 100, + # "schedulingPolicy": "fair", + # "jmxExport": true, + # "subGroups": [ + # { + # "name": "admin", + # "softMemoryLimit": "30%", + # "hardConcurrencyLimit": 20, + # "maxQueued": 10 + # }, + # { + # "name": "finance_human_resources", + # "softMemoryLimit": "20%", + # "hardConcurrencyLimit": 15, + # "maxQueued": 10 + # }, + # { + # "name": "general", + # "softMemoryLimit": "30%", + # "hardConcurrencyLimit": 20, + # "maxQueued": 10 + # }, + # { + # "name": "readonly", + # "softMemoryLimit": "10%", + # "hardConcurrencyLimit": 5, + # "maxQueued": 5 + # } + # ] + # } + # ], + # "selectors": [ + # { + # "user": "admin", + # "group": "global.admin" + # }, + # { + # "group": "finance|human_resources", + # "group": "global.finance_human_resources" + # }, + # { + # "user": "alice", + # "group": "global.readonly" + # }, + # { + # "group": "global.general" + # } + # ] + # } + +additionalNodeProperties: {} + +additionalConfigProperties: {} + +additionalLogProperties: {} + +additionalExchangeManagerProperties: {} + +eventListenerProperties: {} + +additionalCatalogs: ${trino_catalogs} + +# Array of EnvVar (https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.19/#envvar-v1-core) +env: [] + +# Array of EnvFromSource (https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.19/#envfromsource-v1-core) +envFrom: [] + +initContainers: {} + # coordinator: + # - name: init-coordinator + # image: busybox:1.28 + # imagePullPolicy: IfNotPresent + # command: ['sh', '-c', "until nslookup myservice.$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local; do echo waiting for myservice; sleep 2; done"] + # worker: + # - name: init-worker + # image: busybox:1.28 + # command: ['sh', '-c', 'echo The worker is running! && sleep 3600'] + +sidecarContainers: {} +# coordinator: +# - name: side-coordinator +# image: busybox:1.28 +# imagePullPolicy: IfNotPresent +# command: ['sleep', '1'] +# worker: +# - name: side-worker +# image: busybox:1.28 +# imagePullPolicy: IfNotPresent +# command: ['sleep', '1'] + +securityContext: + runAsUser: 1000 + runAsGroup: 1000 + +shareProcessNamespace: + coordinator: false + worker: false + +service: + type: ${trino_service.type} + port: ${trino_service.port} + +auth: {} + # Set username and password + # https://trino.io/docs/current/security/password-file.html#file-format + # passwordAuth: "username:encrypted-password-with-htpasswd" + # or set the name of a secret containing this file in the password.db key + # passwordAuthSecret: "trino-password-authentication" + # Set users' groups + # https://trino.io/docs/current/security/group-file.html#file-format + # refreshPeriod: 5s + # groups: "group_name:user_1,user_2,user_3" + +serviceAccount: + # Specifies whether a service account should be created + create: false + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: "" + # Annotations to add to the service account + annotations: {} + +secretMounts: [] + # - name: sample-secret + # secretName: sample-secret + # path: /secrets/sample.json + +coordinator: + jvm: + maxHeapSize: "8G" + gcMethod: + type: "UseG1GC" + g1: + heapRegionSize: "32M" + + config: + memory: + heapHeadroomPerNode: "" + query: + maxMemoryPerNode: "1GB" + + additionalJVMConfig: {} + + additionalExposedPorts: {} + + resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + + livenessProbe: {} + # initialDelaySeconds: 20 + # periodSeconds: 10 + # timeoutSeconds: 5 + # failureThreshold: 6 + # successThreshold: 1 + readinessProbe: {} + # initialDelaySeconds: 20 + # periodSeconds: 10 + # timeoutSeconds: 5 + # failureThreshold: 6 + # successThreshold: 1 + + nodeSelector: {} + + tolerations: [] + + affinity: {} + + additionalConfigFiles: {} + + additionalVolumes: [] # One or more additional volumes to add to the coordinator. + # - name: extras + # emptyDir: {} + + additionalVolumeMounts: [] # One or more additional volume mounts to add to the coordinator. + # - name: extras + # mountPath: /usr/share/extras + # readOnly: true + + annotations: {} + + labels: {} + + secretMounts: [] + # - name: sample-secret + # secretName: sample-secret + # path: /secrets/sample.json + +worker: + jvm: + maxHeapSize: "8G" + gcMethod: + type: "UseG1GC" + g1: + heapRegionSize: "32M" + + config: + memory: + heapHeadroomPerNode: "" + query: + maxMemoryPerNode: "1GB" + + additionalJVMConfig: {} + + additionalExposedPorts: {} + + resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + + livenessProbe: {} + # initialDelaySeconds: 20 + # periodSeconds: 10 + # timeoutSeconds: 5 + # failureThreshold: 6 + # successThreshold: 1 + readinessProbe: {} + # initialDelaySeconds: 20 + # periodSeconds: 10 + # timeoutSeconds: 5 + # failureThreshold: 6 + # successThreshold: 1 + + nodeSelector: {} + + tolerations: [] + + affinity: {} + + additionalConfigFiles: {} + + additionalVolumes: [] # One or more additional volume mounts to add to all workers. + # - name: extras + # emptyDir: {} + + additionalVolumeMounts: [] # One or more additional volume mounts to add to all workers. + # - name: extras + # mountPath: /usr/share/extras + # readOnly: true + + annotations: {} + + labels: {} + + secretMounts: [] + # - name: sample-secret + # secretName: sample-secret + # path: /secrets/sample.json + +kafka: + mountPath: "/etc/trino/schemas" + tableDescriptions: {} + # Custom kafka table descriptions that will be mounted in mountPath + # testschema.json: |- + # { + # "tableName": "testtable", + # "schemaName": "testschema", + # "topicName": "testtopic", + # "key": { + # "dataFormat": "json", + # "fields": [ + # { + # "name": "_key", + # "dataFormat": "VARCHAR", + # "type": "VARCHAR", + # "hidden": "false" + # } + # ] + # }, + # "message": { + # "dataFormat": "json", + # "fields": [ + # { + # "name": "id", + # "mapping": "id", + # "type": "BIGINT" + # }, + # { + # "name": "test_field", + # "mapping": "test_field", + # "type": "VARCHAR" + # } + # ] + # } + # } +commonLabels: {} # Labels that get applied to every resource's metadata +ingress: + enabled: false + className: "" + annotations: {} + hosts: [] + # - host: trino.example.com + # paths: + # - path: / + # pathType: ImplementationSpecific + tls: [] + # - secretName: chart-example-tls + # hosts: + # - chart-example.local diff --git a/terraform/modules/helm/trino/variables.tf b/terraform/modules/helm/trino/variables.tf new file mode 100644 index 00000000..9cb3f9f4 --- /dev/null +++ b/terraform/modules/helm/trino/variables.tf @@ -0,0 +1,82 @@ + +variable "trino_image" { + type = object({ name = string, tag = string, registry = string, pullPolicy = string }) + description = "Trino image metadata" + default = { + name = "trinodb/trino" + tag = "latest" + pullPolicy = "IfNotPresent" + registry = "" + } +} +variable "trino_namespace" { + type = string + description = "Trino namespace" + default = "hudi" +} +variable "trino_create_namespace" { + type = bool + description = "Create Trino namespace." + default = true +} +variable "trino_wait_for_jobs" { + type = bool + description = "Trino wait for jobs paramater." + default = false +} + +variable "trino_chart_install_timeout" { + type = number + description = "Trino chart install timeout." + default = 900 +} + +variable "trino_custom_values_yaml" { + type = string + description = "Trino chart values.yaml path." + default = "trino.yaml.tfpl" +} + +variable "trino_workers_count" { + default = 1 + description = "Number of trino workers" + type = number +} + +variable "trino_release_name" { + type = string + description = "Trino release name" + default = "trino" +} + +variable "trino_chart_path" { + type = string + description = "Trino helm chart path." + default = "trino-helm-chart" +} +variable "trino_chart_depends_on" { + type = any + description = "List of helm release names that this chart depends on." + default = "" +} +variable "trino_service" { + type = object({ type = string, port = number }) + description = "Trino service metadata" + default = { type = "ClusterIP", port = 8080 } +} +variable "trino_lakehouse_metadata" { + type = map(string) + description = "Trino lakehouse config" +} +locals { + default_lakehouse_metadata = { + "connector.name" = "hudi" + "hive.metastore.uri" = "thrift://hms-metastore-app.hudi.svc:9083" + "hive.s3.ssl.enabled" = "false" + } +} +locals { + catalogs = { + lakehouse = merge(var.trino_lakehouse_metadata, local.default_lakehouse_metadata) + } +} diff --git a/terraform/modules/helm/unified_helm/obsrv/Chart.yaml b/terraform/modules/helm/unified_helm/obsrv/Chart.yaml index 8a9a9184..220aeda1 100644 --- a/terraform/modules/helm/unified_helm/obsrv/Chart.yaml +++ b/terraform/modules/helm/unified_helm/obsrv/Chart.yaml @@ -40,9 +40,9 @@ dependencies: - name: master-data-processor version: 0.1.2 condition: master-data-processor.enabled - - name: merged-pipeline + - name: unified-pipeline version: 0.1.2 - condition: merged-pipeline.enabled + condition: unified-pipeline.enabled - name: postgresql version: 12.2.7 condition: postgresql.enabled diff --git a/terraform/modules/helm/unified_helm/obsrv/charts/command-api/values.yaml b/terraform/modules/helm/unified_helm/obsrv/charts/command-api/values.yaml index ee006d0d..6d1c5b0a 100644 --- a/terraform/modules/helm/unified_helm/obsrv/charts/command-api/values.yaml +++ b/terraform/modules/helm/unified_helm/obsrv/charts/command-api/values.yaml @@ -12,9 +12,9 @@ service_config: | namespace: flink reinstall_sleep_time: 3 jobs: - - name: "PipelineMergedJob" - release_name: merged-pipeline - job_manager_url: "merged-pipeline-jobmanager.flink.svc.cluster.local:8081" + - name: "UnifiedPipelineJob" + release_name: unified-pipeline + job_manager_url: "unified-pipeline-jobmanager.flink.svc.cluster.local:8081" - name: "MasterDataProcessorJob" release_name: master-data-processor job_manager_url: "master-data-processor-jobmanager.flink.svc.cluster.local:8081" diff --git a/terraform/modules/helm/unified_helm/obsrv/charts/master-data-processor/templates/deployment.yaml b/terraform/modules/helm/unified_helm/obsrv/charts/master-data-processor/templates/deployment.yaml index fa4728e9..49d9d0ac 100644 --- a/terraform/modules/helm/unified_helm/obsrv/charts/master-data-processor/templates/deployment.yaml +++ b/terraform/modules/helm/unified_helm/obsrv/charts/master-data-processor/templates/deployment.yaml @@ -128,7 +128,7 @@ data: output.duplicate.topic = ${job.env}".masterdata.failed" output.denorm.topic = ${job.env}".masterdata.denorm" output.transform.topic = ${job.env}".masterdata.transform" - output.transform.failed.topic = ${job.env}".masterdata.transform.failed" + output.transform.failed.topic = ${job.env}".masterdata.failed" stats.topic = ${job.env}".masterdata.stats" groupId = ${job.env}"-masterdata-pipeline-group" diff --git a/terraform/modules/helm/unified_helm/obsrv/charts/master-data-processor/values.yaml b/terraform/modules/helm/unified_helm/obsrv/charts/master-data-processor/values.yaml index 49b8cde7..d3497e87 100644 --- a/terraform/modules/helm/unified_helm/obsrv/charts/master-data-processor/values.yaml +++ b/terraform/modules/helm/unified_helm/obsrv/charts/master-data-processor/values.yaml @@ -157,6 +157,7 @@ master-data-processor: output.unique.topic = ${job.env}".masterdata.unique" output.duplicate.topic = ${job.env}".masterdata.duplicate" output.transform.topic = ${job.env}".masterdata.transform" + output.transform.failed.topic = ${job.env}".masterdata.failed" stats.topic = ${job.env}".masterdata.stats" groupId = ${job.env}"-masterdata-pipeline-group" diff --git a/terraform/modules/helm/unified_helm/obsrv/charts/merged-pipeline/Chart.yaml b/terraform/modules/helm/unified_helm/obsrv/charts/merged-pipeline/Chart.yaml index eb70f976..f8f30dff 100644 --- a/terraform/modules/helm/unified_helm/obsrv/charts/merged-pipeline/Chart.yaml +++ b/terraform/modules/helm/unified_helm/obsrv/charts/merged-pipeline/Chart.yaml @@ -1,5 +1,5 @@ apiVersion: v2 -name: merged-pipeline +name: unified-pipeline description: A Helm chart for Kubernetes # A chart can be either an 'application' or a 'library' chart. diff --git a/terraform/modules/helm/unified_helm/obsrv/charts/merged-pipeline/templates/deployment.yaml b/terraform/modules/helm/unified_helm/obsrv/charts/merged-pipeline/templates/deployment.yaml index 29b9b08b..f3ff2074 100644 --- a/terraform/modules/helm/unified_helm/obsrv/charts/merged-pipeline/templates/deployment.yaml +++ b/terraform/modules/helm/unified_helm/obsrv/charts/merged-pipeline/templates/deployment.yaml @@ -1,5 +1,5 @@ --- -# Source: obsrv-chart/charts/merged-pipeline/templates/flink_job_configmap.yaml +# Source: obsrv-chart/charts/unified-pipeline/templates/flink_job_configmap.yaml apiVersion: v1 data: base-config: |+ @@ -71,7 +71,7 @@ data: taskmanager.memory.process.size: 1700m jobmanager.memory.process.size: 1600m state.savepoints.dir: file:///tmp - job_classname: org.sunbird.obsrv.pipeline.task.MergedPipelineStreamTask + job_classname: org.sunbird.obsrv.pipeline.task.UnifiedPipelineStreamTask log4j_console_properties: | # This affects logging for both user code and Flink rootLogger.level = INFO @@ -102,7 +102,7 @@ data: # Suppress the irrelevant (wrong) warnings from the Netty channel handler logger.netty.name = org.apache.flink.shaded.akka.org.jboss.netty.channel.DefaultChannelPipeline logger.netty.level = OFF - merged-pipeline: | + unified-pipeline: | include file("/data/flink/conf/baseconfig.conf") kafka { input.topic = ${job.env}".ingest" @@ -117,6 +117,7 @@ data: output.denorm.topic = ${job.env}".denorm" output.denorm.failed.topic = ${job.env}".denorm.failed" output.transform.topic = ${job.env}".transform" + output.transform.failed.topic = ${job.env}".failed" stats.topic = ${job.env}".stats" groupId = ${job.env}"-single-pipeline-group" producer { @@ -142,18 +143,18 @@ kind: ConfigMap metadata: labels: app: flink - name: merged-pipeline-config + name: unified-pipeline-config namespace: flink --- -# Source: obsrv-chart/charts/merged-pipeline/templates/deployment.yaml +# Source: obsrv-chart/charts/unified-pipeline/templates/deployment.yaml apiVersion: v1 kind: Service metadata: labels: app: flink - component: merged-pipeline-jobmanager - name: merged-pipeline-jobmanager + component: unified-pipeline-jobmanager + name: unified-pipeline-jobmanager namespace: flink spec: ports: @@ -169,14 +170,14 @@ spec: port: 9250 selector: app: flink - component: merged-pipeline-jobmanager + component: unified-pipeline-jobmanager type: ClusterIP --- -# Source: obsrv-chart/charts/merged-pipeline/templates/deployment.yaml +# Source: obsrv-chart/charts/unified-pipeline/templates/deployment.yaml apiVersion: v1 kind: Service metadata: - name: merged-pipeline-jobmanager-webui + name: unified-pipeline-jobmanager-webui namespace: flink spec: ports: @@ -186,17 +187,17 @@ spec: targetPort: 8081 selector: app: flink - component: merged-pipeline-jobmanager + component: unified-pipeline-jobmanager type: ClusterIP --- -# Source: obsrv-chart/charts/merged-pipeline/templates/deployment.yaml +# Source: obsrv-chart/charts/unified-pipeline/templates/deployment.yaml apiVersion: v1 kind: Service metadata: labels: app: flink - component: merged-pipeline-taskmanager - name: merged-pipeline-taskmanager + component: unified-pipeline-taskmanager + name: unified-pipeline-taskmanager namespace: flink spec: ports: @@ -204,29 +205,29 @@ spec: port: 9251 selector: app: flink - component: merged-pipeline-taskmanager + component: unified-pipeline-taskmanager type: ClusterIP --- --- -# Source: obsrv-chart/charts/merged-pipeline/templates/deployment.yaml +# Source: obsrv-chart/charts/unified-pipeline/templates/deployment.yaml apiVersion: apps/v1 kind: Deployment metadata: - name: merged-pipeline-taskmanager + name: unified-pipeline-taskmanager namespace: flink spec: replicas: 1 selector: matchLabels: app: flink - component: merged-pipeline-taskmanager + component: unified-pipeline-taskmanager template: metadata: labels: azure-extensions-usage-release-identifier: obsrv-base app: flink - component: merged-pipeline-taskmanager + component: unified-pipeline-taskmanager system.processing: "true" spec: containers: @@ -235,21 +236,21 @@ spec: - -Dfs.azure.account.key.{{ .Values.global.azure_storage_account_name }}.blob.core.windows.net={{ .Values.global.azure_storage_account_key }} - -Dweb.submit.enable=false - -Dmetrics.reporter.prom.class=org.apache.flink.metrics.prometheus.PrometheusReporter - - -Dmetrics.reporter.prom.host=merged-pipeline-taskmanager + - -Dmetrics.reporter.prom.host=unified-pipeline-taskmanager - -Dmetrics.reporter.prom.port=9251-9260 - - -Djobmanager.rpc.address=merged-pipeline-jobmanager + - -Djobmanager.rpc.address=unified-pipeline-jobmanager - -Dtaskmanager.rpc.port=6122 - --config.file.path - - /data/flink/conf/merged-pipeline.conf + - /data/flink/conf/unified-pipeline.conf command: - /opt/flink/bin/taskmanager.sh - {{ if .Values.global.azure.images.merged_pipeline }} - image: "{{ .Values.global.azure.images.merged_pipeline.registry }}/{{ .Values.global.azure.images.merged_pipeline.image }}:{{ .Values.global.azure.images.merged_pipeline.digest }}" + {{ if .Values.global.azure.images.unified_pipeline }} + image: "{{ .Values.global.azure.images.unified_pipeline.registry }}/{{ .Values.global.azure.images.unified_pipeline.image }}:{{ .Values.global.azure.images.unified_pipeline.digest }}" {{ else }} - image: sanketikahub/merged-pipeline:release-0.5.0_RC26 + image: sanketikahub/unified-pipeline:release-0.5.0_RC26 {{ end }} imagePullPolicy: IfNotPresent - name: merged-pipeline-taskmanager + name: unified-pipeline-taskmanager ports: - containerPort: 6122 name: rpc @@ -268,9 +269,9 @@ spec: - mountPath: /data/flink/conf/baseconfig.conf name: flink-config-volume subPath: base-config.conf - - mountPath: /data/flink/conf/merged-pipeline.conf + - mountPath: /data/flink/conf/unified-pipeline.conf name: flink-config-volume - subPath: merged-pipeline.conf + subPath: unified-pipeline.conf workingDir: volumes: - configMap: @@ -281,22 +282,22 @@ spec: path: log4j-console.properties - key: base-config path: base-config.conf - - key: merged-pipeline - path: merged-pipeline.conf - name: merged-pipeline-config + - key: unified-pipeline + path: unified-pipeline.conf + name: unified-pipeline-config name: flink-config-volume --- -# Source: obsrv-chart/charts/merged-pipeline/templates/deployment.yaml +# Source: obsrv-chart/charts/unified-pipeline/templates/deployment.yaml apiVersion: apps/v1 kind: Deployment metadata: - name: merged-pipeline-jobmanager + name: unified-pipeline-jobmanager namespace: flink spec: selector: matchLabels: app: flink - component: merged-pipeline-jobmanager + component: unified-pipeline-jobmanager azure-extensions-usage-release-identifier: obsrv-base template: metadata: @@ -305,7 +306,7 @@ spec: prometheus.io/scrape: "true" labels: app: flink - component: merged-pipeline-jobmanager + component: unified-pipeline-jobmanager azure-extensions-usage-release-identifier: obsrv-base spec: initContainers: @@ -340,26 +341,26 @@ spec: - args: - start-foreground - -Dfs.azure.account.key.{{ .Values.global.azure_storage_account_name }}.blob.core.windows.net={{ .Values.global.azure_storage_account_key }} - - --job-classname=org.sunbird.obsrv.pipeline.task.MergedPipelineStreamTask + - --job-classname=org.sunbird.obsrv.pipeline.task.UnifiedPipelineStreamTask - -Dweb.submit.enable=false - -Dmetrics.reporter.prom.class=org.apache.flink.metrics.prometheus.PrometheusReporter - -Dmetrics.reporter.prom.port=9250 - - -Djobmanager.rpc.address=merged-pipeline-jobmanager + - -Djobmanager.rpc.address=unified-pipeline-jobmanager - -Djobmanager.rpc.port=6123 - -Dparallelism.default=1 - -Dblob.server.port=6124 - -Dqueryable-state.server.ports=6125 - --config.file.path - - /data/flink/conf/merged-pipeline.conf + - /data/flink/conf/unified-pipeline.conf command: - /opt/flink/bin/standalone-job.sh - {{ if .Values.global.azure.images.merged_pipeline }} - image: "{{ .Values.global.azure.images.merged_pipeline.registry }}/{{ .Values.global.azure.images.merged_pipeline.image }}:{{ .Values.global.azure.images.merged_pipeline.digest }}" + {{ if .Values.global.azure.images.unified_pipeline }} + image: "{{ .Values.global.azure.images.unified_pipeline.registry }}/{{ .Values.global.azure.images.unified_pipeline.image }}:{{ .Values.global.azure.images.unified_pipeline.digest }}" {{ else }} - image: sanketikahub/merged-pipeline:release-0.5.0_RC26 + image: sanketikahub/unified-pipeline:release-0.5.0_RC26 {{ end }} imagePullPolicy: IfNotPresent - name: merged-pipeline-jobmanager + name: unified-pipeline-jobmanager ports: - containerPort: 6123 name: rpc @@ -381,9 +382,9 @@ spec: - mountPath: /data/flink/conf/baseconfig.conf name: flink-config-volume subPath: base-config.conf - - mountPath: /data/flink/conf/merged-pipeline.conf + - mountPath: /data/flink/conf/unified-pipeline.conf name: flink-config-volume - subPath: merged-pipeline.conf + subPath: unified-pipeline.conf - mountPath: /opt/flink/conf/log4j-console.properties name: flink-config-volume subPath: log4j-console.properties @@ -396,10 +397,10 @@ spec: path: flink-conf.yaml - key: base-config path: base-config.conf - - key: merged-pipeline - path: merged-pipeline.conf + - key: unified-pipeline + path: unified-pipeline.conf - key: log4j_console_properties path: log4j-console.properties - name: merged-pipeline-config + name: unified-pipeline-config name: flink-config-volume --- diff --git a/terraform/modules/helm/unified_helm/obsrv/charts/merged-pipeline/values.yaml b/terraform/modules/helm/unified_helm/obsrv/charts/merged-pipeline/values.yaml index d2c0b221..118a1316 100644 --- a/terraform/modules/helm/unified_helm/obsrv/charts/merged-pipeline/values.yaml +++ b/terraform/modules/helm/unified_helm/obsrv/charts/merged-pipeline/values.yaml @@ -2,7 +2,7 @@ namespace: "flink" imagepullsecrets: "" image: registry: sanketikahub - repository: merged-pipeline + repository: unified-pipeline tag: release-0.5.0_RC23 serviceMonitor: enabled: false @@ -142,8 +142,8 @@ base_config: | port = "9042" } -merged-pipeline: - merged-pipeline: |+ +unified-pipeline: + unified-pipeline: |+ include file("/data/flink/conf/baseconfig.conf") kafka { input.topic = ${job.env}".ingest" @@ -158,6 +158,7 @@ merged-pipeline: output.denorm.topic = ${job.env}".denorm" output.denorm.failed.topic = ${job.env}".denorm.failed" output.transform.topic = ${job.env}".transform" + output.transform.failed.topic = ${job.env}".failed" stats.topic = ${job.env}".stats" groupId = ${job.env}"-single-pipeline-group" producer { @@ -193,7 +194,7 @@ merged-pipeline: taskmanager.memory.process.size: 1700m jobmanager.memory.process.size: 1600m state.savepoints.dir: file:///tmp - job_classname: org.sunbird.obsrv.pipeline.task.MergedPipelineStreamTask + job_classname: org.sunbird.obsrv.pipeline.task.UnifiedPipelineStreamTask serviceAccount: # Specifies whether a service account should be created diff --git a/terraform/modules/helm/unified_helm/obsrv/values.yaml b/terraform/modules/helm/unified_helm/obsrv/values.yaml index 9dc108d6..dc21db97 100644 --- a/terraform/modules/helm/unified_helm/obsrv/values.yaml +++ b/terraform/modules/helm/unified_helm/obsrv/values.yaml @@ -41,10 +41,10 @@ global: # digest: sha256:77bdba3135998baadc20015e00a9742eebac52167b90c3e46d0c339a2d668b12 # image: os-shell # registry: docker.io/bitnami - merged_pipeline: + unified_pipeline: # tag: 1.0.2-GA digest: 1.0.3-GA - image: merged-pipeline + image: unified-pipeline registry: *sanketika_docker_registry master_data_processor: # tag: 1.0.2-GA @@ -377,9 +377,9 @@ druid-raw-cluster: eks.amazonaws.com/role-arn: arn:aws:iam::725876873105:role/dev-obsrv-test-druid-raw-sa-iam-role name: druid-raw-sa -merged-pipeline: +unified-pipeline: enabled: true - name: merged-pipeline + name: unified-pipeline namespace: flink env: *global-env checkpoint_store_type: *global-cloud-storage-provider