Skip to content

Commit

Permalink
Merge pull request #171 from Sanketika-Obsrv/develop
Browse files Browse the repository at this point in the history
Merging Develop into Main
  • Loading branch information
ravismula authored Jul 23, 2024
2 parents 05c1afc + 27a7de2 commit 9cd9df4
Show file tree
Hide file tree
Showing 81 changed files with 4,095 additions and 118 deletions.
73 changes: 71 additions & 2 deletions terraform/aws/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,9 @@ module "flink" {
building_block = var.building_block
flink_container_registry = var.flink_container_registry
flink_image_tag = var.flink_image_tag
flink_merged_pipeline_release_names = var.flink_merged_pipeline_release_names
flink_unified_pipeline_release_names = var.flink_unified_pipeline_release_names
flink_release_names = var.flink_release_names
merged_pipeline_enabled = var.merged_pipeline_enabled
unified_pipeline_enabled = var.unified_pipeline_enabled
flink_checkpoint_store_type = var.flink_checkpoint_store_type
flink_chart_depends_on = [module.kafka, module.postgresql_migration, module.redis_dedup, module.redis_denorm]
postgresql_obsrv_username = module.postgresql.postgresql_obsrv_username
Expand Down Expand Up @@ -226,6 +226,12 @@ module "dataset_api" {
dataset_api_namespace = module.eks.dataset_api_namespace
s3_bucket = module.s3.s3_bucket
service_type = var.service_type
enable_lakehouse = var.enable_lakehouse
lakehouse_host = var.lakehouse_host
lakehouse_port = var.lakehouse_port
lakehouse_catalog = var.lakehouse_catalog
lakehouse_schema = var.lakehouse_schema
lakehouse_default_user = var.lakehouse_default_user
}

module "secor" {
Expand Down Expand Up @@ -294,6 +300,7 @@ module "command_service" {
postgresql_obsrv_user_password = module.postgresql.postgresql_obsrv_user_password
postgresql_obsrv_database = module.postgresql.postgresql_obsrv_database
flink_namespace = module.flink.flink_namespace
enable_lakehouse = var.enable_lakehouse
}

module "postgresql_migration" {
Expand All @@ -308,4 +315,66 @@ module "postgresql_migration" {
postgresql_druid_raw_user_password = module.postgresql.postgresql_druid_raw_user_password
postgresql_obsrv_user_password = module.postgresql.postgresql_obsrv_user_password
data_encryption_key = resource.random_string.data_encryption_key.result
postgresql_hms_user_password = module.postgresql.postgresql_hms_user_password
enable_lakehouse = var.enable_lakehouse
}

module "trino" {
source = "../modules/helm/trino"
count = var.enable_lakehouse ? 1 : 0
trino_namespace = var.hudi_namespace
trino_lakehouse_metadata = {
"hive.s3.aws-access-key" = var.create_velero_user ? module.iam[0].s3_access_key : var.velero_aws_access_key_id
"hive.s3.aws-secret-key" = var.create_velero_user ? module.iam[0].s3_secret_key : var.velero_aws_secret_access_key
}
}

module "hms" {
source = "../modules/helm/hive_meta_store"
count = var.enable_lakehouse ? 1 : 0
hms_namespace = var.hudi_namespace
hms_db_metadata = {
"DATABASE_HOST" = "postgresql-hl.postgresql.svc"
"DATABASE_DB" = module.postgresql.postgresql_hms_database
"DATABASE_USER" = module.postgresql.postgresql_hms_username
"DATABASE_PASSWORD" = module.postgresql.postgresql_hms_user_password
"WAREHOUSE_DIR" = "s3a://${module.s3[0].s3_bucket}/${var.hudi_prefix_path}/"
"THRIFT_PORT" = "9083"
}
hadoop_metadata = {
"fs.s3a.access.key" = var.create_velero_user ? module.iam[0].s3_access_key : var.velero_aws_access_key_id
"fs.s3a.secret.key" = var.create_velero_user ? module.iam[0].s3_secret_key : var.velero_aws_secret_access_key
}
}

module "lakehouse-connector" {
source = "../modules/helm/lakehouse-connector"
count = var.enable_lakehouse ? 1 : 0
env = var.env
building_block = var.building_block
flink_container_registry = var.flink_container_registry
flink_lakehouse_image_tag = var.flink_lakehouse_image_tag
flink_image_name = var.flink_image_name
flink_checkpoint_store_type = var.flink_checkpoint_store_type
flink_chart_depends_on = [module.kafka, module.postgresql_migration, module.redis_dedup, module.redis_denorm]
postgresql_obsrv_username = module.postgresql.postgresql_obsrv_username
postgresql_obsrv_user_password = module.postgresql.postgresql_obsrv_user_password
postgresql_obsrv_database = module.postgresql.postgresql_obsrv_database
checkpoint_base_url = "s3://${module.s3[0].checkpoint_storage_bucket}"
denorm_redis_namespace = module.redis_denorm.redis_namespace
denorm_redis_release_name = module.redis_denorm.redis_release_name
dedup_redis_namespace = module.redis_dedup.redis_namespace
dedup_redis_release_name = module.redis_dedup.redis_release_name
flink_sa_annotations = "eks.amazonaws.com/role-arn: ${module.eks.flink_sa_iam_role}"
flink_namespace = module.eks.flink_namespace
postgresql_service_name = module.postgresql.postgresql_service_name
enable_lakehouse = var.enable_lakehouse
postgresql_hms_username = module.postgresql.postgresql_hms_username
postgresql_hms_user_password = module.postgresql.postgresql_hms_user_password
hudi_bucket = module.s3[0].s3_bucket
hudi_prefix_path = var.hudi_prefix_path
hadoop_metadata = {
"fs.s3a.access.key" = var.create_velero_user ? module.iam[0].s3_access_key : var.velero_aws_access_key_id
"fs.s3a.secret.key" = var.create_velero_user ? module.iam[0].s3_secret_key : var.velero_aws_secret_access_key
}
}
69 changes: 65 additions & 4 deletions terraform/aws/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -141,17 +141,17 @@ variable "flink_release_names" {
}
}

variable "flink_merged_pipeline_release_names" {
variable "flink_unified_pipeline_release_names" {
description = "Create release names"
type = map(string)
default = {
merged-pipeline = "merged-pipeline"
unified-pipeline = "unified-pipeline"
master-data-processor = "master-data-processor"
}
}

variable "merged_pipeline_enabled" {
description = "Toggle to deploy merged pipeline"
variable "unified_pipeline_enabled" {
description = "Toggle to deploy unified pipeline"
type = bool
default = true
}
Expand All @@ -170,3 +170,64 @@ variable "secor_image_tag" {
type = string
description = "secor image version"
}


variable "hudi_namespace" {
type = string
default = "hudi"
description = "Apache Hudi namespace"
}

variable "hudi_prefix_path" {
type = string
description = "Hudi prefix path"
default = "hudi"
}

variable "enable_lakehouse" {
type = bool
description = "Toggle to install hudi components (hms, trino and flink job)"
}

variable "lakehouse_host" {
type = string
description = "Lakehouse Host"
default = "http://trino.hudi.svc.cluster.local"
}

variable "lakehouse_port" {
type = string
description = "Trino port"
default = "8080"
}

variable "lakehouse_catalog" {
type = string
description = "Lakehouse Catalog name"
default = "lakehouse"
}

variable "lakehouse_schema" {
type = string
description = "Lakehouse Schema name"
default = "hms"
}

variable "lakehouse_default_user" {
type = string
description = "Lakehouse default user"
default = "admin"
}


variable "flink_image_name" {
type = string
description = "Flink image name."
default = "lakehouse-connector"
}

variable "flink_lakehouse_image_tag" {
type = string
description = "Flink lakehouse image tag."
default = "1.0.0"
}
3 changes: 3 additions & 0 deletions terraform/aws/vars/cluster_overrides.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,6 @@ dataset_api_image_tag = "1.0.2-GA"
flink_image_tag = "1.0.1-GA"
secor_image_tag = "1.0.0-GA"
superset_image_tag = "3.0.2"

enable_lakehouse = false
flink_lakehouse_image_tag = "1.0.1"
4 changes: 2 additions & 2 deletions terraform/gcp/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -390,9 +390,9 @@ module "flink" {
building_block = var.building_block
flink_container_registry = var.flink_container_registry
flink_image_tag = var.flink_image_tag
flink_merged_pipeline_release_names = var.flink_merged_pipeline_release_names
flink_unified_pipeline_release_names = var.flink_unified_pipeline_release_names
flink_release_names = var.flink_release_names
merged_pipeline_enabled = var.merged_pipeline_enabled
unified_pipeline_enabled = var.unified_pipeline_enabled
flink_checkpoint_store_type = var.flink_checkpoint_store_type
flink_chart_depends_on = [ module.kafka, module.postgresql_migration, module.redis_dedup, module.redis_denorm ]
postgresql_obsrv_username = module.postgresql.postgresql_obsrv_username
Expand Down
8 changes: 4 additions & 4 deletions terraform/gcp/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -226,17 +226,17 @@ variable "flink_release_names" {
}
}

variable "flink_merged_pipeline_release_names" {
variable "flink_unified_pipeline_release_names" {
description = "Create release names"
type = map(string)
default = {
merged-pipeline = "merged-pipeline"
unified-pipeline = "unified-pipeline"
master-data-processor = "master-data-processor"
}
}

variable "merged_pipeline_enabled" {
description = "Toggle to deploy merged pipeline"
variable "unified_pipeline_enabled" {
description = "Toggle to deploy unified pipeline"
type = bool
default = true
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,6 @@ metadata:
data:
service_config: |
{{ .Values.service_config | indent 4 }}
{{ if .Values.enable_lakehouse }}
{{ toYaml .Values.hudi_jobs | indent 8 }}
{{ end }}
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,14 @@ cpu_requests: 0.1
cpu_limits: 0.1
memory_requests: 512Mi
memory_limits: 512Mi
enable_lakehouse: false

hudi_jobs:
- name: "LakehouseConnectorJob"
release_name: lakehouse-connector
job_manager_url: "lakehouse-connector-jobmanager.flink.svc.cluster.local:8081"

service_config: |
flink:
namespace: flink
reinstall_sleep_time: 3
jobs:
- name: "PipelineMergedJob"
release_name: merged-pipeline
job_manager_url: "merged-pipeline-jobmanager.flink.svc.cluster.local:8081"
- name: "MasterDataProcessorJob"
release_name: master-data-processor
job_manager_url: "master-data-processor-jobmanager.flink.svc.cluster.local:8081"
- name: "KafkaConnectorJob"
release_name: kafka-connector
job_manager_url: "kafka-connector-jobmanager.flink.svc.cluster.local:8081"
commands:
RESTART_PIPELINE:
Expand All @@ -37,6 +30,21 @@ service_config: |
database: obsrv
helm_charts_base_dir: /app/helm-charts
flink:
namespace: flink
reinstall_sleep_time: 3
jobs:
- name: "UnifiedPipelineJob"
release_name: unified-pipeline
job_manager_url: "unified-pipeline-jobmanager.flink.svc.cluster.local:8081"
- name: "MasterDataProcessorJob"
release_name: master-data-processor
job_manager_url: "master-data-processor-jobmanager.flink.svc.cluster.local:8081"
- name: "KafkaConnectorJob"
release_name: kafka-connector
job_manager_url: "kafka-connector-jobmanager.flink.svc.cluster.local:8081"
rbac:
# kubernetes.rest-service.exposed.type: NodePort requires
Expand Down
38 changes: 24 additions & 14 deletions terraform/modules/helm/command_service/command_service.yaml.tfpl
Original file line number Diff line number Diff line change
Expand Up @@ -5,31 +5,41 @@ image:
tag: ${command_service_image_tag}

service_config: |

commands:
RESTART_PIPELINE:
workflow:
- RESTART_PIPELINE_JOBS

postgres:
db_host: postgresql-hl.postgresql.svc.cluster.local
db_port: 5432
db_user: ${postgresql_obsrv_username}
db_password: ${postgresql_obsrv_user_password}
database: ${postgresql_obsrv_database}

flink:
namespace: ${flink_namespace}
reinstall_sleep_time: 3
jobs:
- name: "PipelineMergedJob"
release_name: merged-pipeline
job_manager_url: "merged-pipeline-jobmanager.flink.svc.cluster.local:8081"
- name: "UnifiedPipelineJob"
release_name: unified-pipeline
job_manager_url: "unified-pipeline-jobmanager.flink.svc.cluster.local:8081"
- name: "MasterDataProcessor"
release_name: master-data-processor
job_manager_url: "master-data-processor-jobmanager.flink.svc.cluster.local:8081"
- name: "KafkaConnectorJob"
release_name: kafka-connector
job_manager_url: "kafka-connector-jobmanager.flink.svc.cluster.local:8081"

commands:
RESTART_PIPELINE:
workflow:
- RESTART_PIPELINE_JOBS

postgres:
db_host: postgresql-hl.postgresql.svc.cluster.local
db_port: 5432
db_user: ${postgresql_obsrv_username}
db_password: ${postgresql_obsrv_user_password}
database: ${postgresql_obsrv_database}
helm_charts_base_dir: /app/helm-charts


enable_lakehouse: ${enable_lakehouse}

hudi_jobs:
- name: "LakehouseConnectorJob"
release_name: lakehouse-connector
job_manager_url: "lakehouse-connector-jobmanager.flink.svc.cluster.local:8081"

helm_charts_base_dir: /app/helm-charts
2 changes: 2 additions & 0 deletions terraform/modules/helm/command_service/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ resource "helm_release" "command_service" {
postgresql_obsrv_user_password = var.postgresql_obsrv_user_password
postgresql_obsrv_database = var.postgresql_obsrv_database
flink_namespace = var.flink_namespace
enable_lakehouse = var.enable_lakehouse

})
]
}
6 changes: 5 additions & 1 deletion terraform/modules/helm/command_service/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -85,4 +85,8 @@ variable "flink_namespace" {
default = "flink"
}



variable "enable_lakehouse" {
type = bool
description = "Toggle to install hudi components (hms, trino and flink job)"
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,5 +44,11 @@ data:
grafana_token: "{{ .Values.grafana_service.GRAFANA_TOKEN }}"
{{- end }}


{{- if .Values.enable_lakehouse }}
lakehouse_host: "{{ .Values.lakehouse_service.LAKEHOUSE_HOST }}"
lakehouse_port: "{{ .Values.lakehouse_service.LAKEHOUSE_PORT }}"
lakehouse_catalog: "{{ .Values.lakehouse_service.LAKEHOUSE_CATALOG }}"
lakehouse_schema: "{{ .Values.lakehouse_service.LAKEHOUSE_SCHEMA }}"
lakehouse_default_user: "{{ .Values.lakehouse_service.LAKEHOUSE_DEFAULT_USER }}"
{{- end}}

Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,12 @@ EXCLUDE_DATASOURCE_VALIDATION: "system-stats,failed-events-summary,masterdata-sy
service_account_annotations:
eks.amazonaws.com/role-arn: ""
service:
type: LoadBalancer
type: LoadBalancer

enable_lakehouse: false
lakehouse_service:
LAKEHOUSE_HOST: http://obsrv-trino
LAKEHOUSE_PORT: "8080"
LAKEHOUSE_CATALOG: "lakehouse"
LAKEHOUSE_SCHEMA: "hms"
LAKEHOUSE_DEFAULT_USER: "admin"
Loading

0 comments on commit 9cd9df4

Please sign in to comment.