Skip to content

Commit

Permalink
add datahub
Browse files Browse the repository at this point in the history
  • Loading branch information
zhaojing1987 committed Oct 17, 2024
1 parent 9b7aefd commit dd2d145
Show file tree
Hide file tree
Showing 5 changed files with 298 additions and 24 deletions.
23 changes: 18 additions & 5 deletions apps/datahub/.env
Original file line number Diff line number Diff line change
@@ -1,6 +1,19 @@
W9_POWER_PASSWORD=2PrLxExE45LsCT
W9_NAME=datahub
W9_REPO=
W9_VERSION=latest
W9_HTTP_PORT=9001
W9_VERSION=v0.14.1
W9_DIST=community
W9_REPO=acryldata/datahub-frontend-react
W9_POWER_PASSWORD=8dHZPFLFGoPXuC

#### -- Not allowed to edit below environments when recreate app based on existing data -- ####
W9_ID=datahub
W9_HTTP_PORT=9002
W9_HTTP_PORT_SET=9002
W9_URL=appname.example.com
W9_NETWORK=websoft9
W9_LOGIN_USER=datahub
W9_LOGIN_PASSWORD=datahub
W9_DB_EXPOSE="mysql"
#### ---------------------------------------------------------------------------------------- ####
MYSQL_DATABASE=datahub
MYSQL_USER=datahub
MYSQL_PASSWORD=${W9_POWER_PASSWORD}
MYSQL_ROOT_PASSWORD=${W9_POWER_PASSWORD}
4 changes: 1 addition & 3 deletions apps/datahub/Notes.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
# DataHub


## FAQ
Redeploying the app will cause an error: a Kafka node with the same broker ID already exists in Zookeeper.
8 changes: 4 additions & 4 deletions apps/datahub/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,16 @@
This is an **Docker Compose solution** powered by [Websoft9](https://www.websoft9.com) based on Docker for DataHub:


- community: 0.10.4
- community: v0.14.1


## System Requirements

The following are the minimal [recommended requirements](https://github.com/datahub-project/datahub):

* **RAM**: 1 GB or more
* **CPU**: 1 cores or higher
* **Disk**: at least 1 GB of free space
* **RAM**: 8 GB or more
* **CPU**: 2 cores or higher
* **Disk**: at least 10 GB of free space
* **bandwidth**: more fluent experience over 100M

## Install
Expand Down
275 changes: 269 additions & 6 deletions apps/datahub/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,17 +1,280 @@
# image: https://hub.docker.com/r/linuxserver/mastodon
# docs: https://docs.linuxserver.io/images/docker-mastodon

#compose file :https://github.com/datahub-project/datahub/blob/master/docker/quickstart/docker-compose-without-neo4j.quickstart.yml

version: '3.8'

services:
broker:
image: confluentinc/cp-kafka:7.4.0
container_name: ${W9_ID}-broker
environment:
- KAFKA_BROKER_ID=1
- KAFKA_ZOOKEEPER_CONNECT=${W9_ID}-zookeeper:2181
- KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
- KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://${W9_ID}-broker:29092,PLAINTEXT_HOST://localhost:9092
- KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1
- KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0
- KAFKA_HEAP_OPTS=-Xms256m -Xmx256m
- KAFKA_CONFLUENT_SUPPORT_METRICS_ENABLE=false
- KAFKA_MESSAGE_MAX_BYTES=5242880
- KAFKA_MAX_MESSAGE_BYTES=5242880
depends_on:
zookeeper:
condition: service_healthy
healthcheck:
test: nc -z ${W9_ID}-broker 9092
interval: 1s
retries: 5
start_period: 60s
timeout: 5s
volumes:
- datahub-broker:/var/lib/kafka/data/

datahub-actions:
image: acryldata/datahub-actions:head
container_name: ${W9_ID}-actions
environment:
- DATAHUB_GMS_HOST=datahub-gms-${W9_HTTP_PORT_SET}
- DATAHUB_GMS_PORT=8080
- DATAHUB_GMS_PROTOCOL=http
- DATAHUB_SYSTEM_CLIENT_ID=__datahub_system
- DATAHUB_SYSTEM_CLIENT_SECRET=${W9_POWER_PASSWORD}
- KAFKA_BOOTSTRAP_SERVER=${W9_ID}-broker:29092
- KAFKA_PROPERTIES_SECURITY_PROTOCOL=PLAINTEXT
- METADATA_AUDIT_EVENT_NAME=MetadataAuditEvent_v4
- METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=MetadataChangeLog_Versioned_v1
- SCHEMA_REGISTRY_URL=http://${W9_ID}-schema-registry:8081
depends_on:
datahub-gms:
condition: service_healthy

datahub-frontend-react:
image: ${W9_REPO}:${W9_VERSION}
container_name: ${W9_ID}
ports:
- ${W9_HTTP_PORT_SET}:9002
env_file: .env
environment:
- DATAHUB_GMS_HOST=datahub-gms-${W9_HTTP_PORT_SET}
- DATAHUB_GMS_PORT=8080
- DATAHUB_SECRET=${W9_POWER_PASSWORD}
- DATAHUB_APP_VERSION=1.0
- DATAHUB_PLAY_MEM_BUFFER_SIZE=10MB
- JAVA_OPTS=-Xms512m -Xmx512m -Dhttp.port=9002 -Dconfig.file=datahub-frontend/conf/application.conf -Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf -Dlogback.configurationFile=datahub-frontend/conf/logback.xml -Dlogback.debug=false -Dpidfile.path=/dev/null
- KAFKA_BOOTSTRAP_SERVER=${W9_ID}-broker:29092
- DATAHUB_TRACKING_TOPIC=DataHubUsageEvent_v1
- ELASTIC_CLIENT_HOST=${W9_ID}-elasticsearch
- ELASTIC_CLIENT_PORT=9200
depends_on:
datahub-gms:
condition: service_healthy
volumes:
- datahub-plugins:/etc/datahub/plugins

datahub-gms:
image: acryldata/datahub-gms:${W9_VERSION}
container_name: datahub-gms-${W9_HTTP_PORT_SET}
environment:
- DATAHUB_SERVER_TYPE=quickstart
- DATAHUB_TELEMETRY_ENABLED=true
- DATAHUB_UPGRADE_HISTORY_KAFKA_CONSUMER_GROUP_ID=generic-duhe-consumer-job-client-gms
- EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
- EBEAN_DATASOURCE_HOST=${W9_ID}-mysql:3306
- EBEAN_DATASOURCE_PASSWORD=${W9_POWER_PASSWORD}
- EBEAN_DATASOURCE_URL=jdbc:mysql://${W9_ID}-mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8
- EBEAN_DATASOURCE_USERNAME=datahub
- ELASTICSEARCH_HOST=${W9_ID}-elasticsearch
- ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true
- ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true
- ELASTICSEARCH_PORT=9200
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
- ENTITY_SERVICE_ENABLE_RETENTION=true
- ES_BULK_REFRESH_POLICY=WAIT_UNTIL
- GRAPH_SERVICE_DIFF_MODE_ENABLED=true
- GRAPH_SERVICE_IMPL=elasticsearch
- JAVA_OPTS=-Xms1g -Xmx1g
- KAFKA_BOOTSTRAP_SERVER=${W9_ID}-broker:29092
- KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR=true
- KAFKA_SCHEMAREGISTRY_URL=http://${W9_ID}-schema-registry:8081
- MAE_CONSUMER_ENABLED=true
- MCE_CONSUMER_ENABLED=true
- METADATA_SERVICE_AUTH_ENABLED=false
- PE_CONSUMER_ENABLED=true
- UI_INGESTION_ENABLED=true
depends_on:
datahub-upgrade:
condition: service_completed_successfully
healthcheck:
test: curl -sS --fail http://datahub-gms-${W9_HTTP_PORT_SET}:8080/health
interval: 1s
retries: 5
start_period: 90s
timeout: 5s
volumes:
- datahub-plugins:/etc/datahub/plugins

datahub-upgrade:
image: acryldata/datahub-upgrade:${W9_VERSION}
container_name: ${W9_ID}-upgrade
command:
- -u
- SystemUpdate
depends_on:
elasticsearch-setup:
condition: service_completed_successfully
kafka-setup:
condition: service_completed_successfully
mysql-setup:
condition: service_completed_successfully
environment:
- EBEAN_DATASOURCE_USERNAME=datahub
- EBEAN_DATASOURCE_PASSWORD=${W9_POWER_PASSWORD}
- EBEAN_DATASOURCE_HOST=${W9_ID}-mysql:3306
- EBEAN_DATASOURCE_URL=jdbc:mysql://${W9_ID}-mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8
- EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
- KAFKA_BOOTSTRAP_SERVER=${W9_ID}-broker:29092
- KAFKA_SCHEMAREGISTRY_URL=http://${W9_ID}-schema-registry:8081
- ELASTICSEARCH_HOST=${W9_ID}-elasticsearch
- ELASTICSEARCH_PORT=9200
- ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true
- ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true
- ELASTICSEARCH_BUILD_INDICES_CLONE_INDICES=false
- GRAPH_SERVICE_IMPL=elasticsearch
- DATAHUB_GMS_HOST=datahub-gms-${W9_HTTP_PORT_SET} #The value of this variable does not support underscores
- DATAHUB_GMS_PORT=8080
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
- BACKFILL_BROWSE_PATHS_V2=true
- REPROCESS_DEFAULT_BROWSE_PATHS_V2=false
labels:
datahub_setup_job: true

elasticsearch:
image: elasticsearch:7.10.1
container_name: ${W9_ID}-elasticsearch
deploy:
resources:
limits:
memory: 1G
environment:
- discovery.type=single-node
- ${XPACK_SECURITY_ENABLED:-xpack.security.enabled=false}
- ES_JAVA_OPTS=-Xms256m -Xmx512m -Dlog4j2.formatMsgNoLookups=true
- OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m -Dlog4j2.formatMsgNoLookups=true
healthcheck:
test: curl -sS --fail http://${W9_ID}-elasticsearch:9200/_cluster/health?wait_for_status=yellow&timeout=0s
interval: 1s
retries: 3
start_period: 20s
timeout: 5s
volumes:
- datahub-esdata:/usr/share/elasticsearch/data

elasticsearch-setup:
image: acryldata/datahub-elasticsearch-setup:${W9_VERSION}
container_name: ${W9_ID}-elasticsearch-setup
depends_on:
elasticsearch:
condition: service_healthy
environment:
- ELASTICSEARCH_USE_SSL=false
- USE_AWS_ELASTICSEARCH=false
- ELASTICSEARCH_HOST=${W9_ID}-elasticsearch
- ELASTICSEARCH_PORT=9200
- ELASTICSEARCH_PROTOCOL=http
labels:
datahub_setup_job: true

kafka-setup:
image: acryldata/datahub-kafka-setup:${W9_VERSION}
container_name: ${W9_ID}-kafka-setup
depends_on:
broker:
condition: service_healthy
schema-registry:
condition: service_healthy
environment:
- DATAHUB_PRECREATE_TOPICS=false
- KAFKA_BOOTSTRAP_SERVER=${W9_ID}-broker:29092
- KAFKA_ZOOKEEPER_CONNECT=${W9_ID}-zookeeper:2181
- USE_CONFLUENT_SCHEMA_REGISTRY=TRUE
labels:
datahub_setup_job: true

mysql:
image: mysql:8.2
container_name: ${W9_ID}-mysql
command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin --default-authentication-plugin=mysql_native_password
environment:
- MYSQL_DATABASE=${MYSQL_DATABASE}
- MYSQL_USER=${MYSQL_USER}
- MYSQL_PASSWORD=${W9_POWER_PASSWORD}
- MYSQL_ROOT_PASSWORD=${MYSQL_ROOT_PASSWORD}
healthcheck:
test: mysqladmin ping -h mysql -u $$MYSQL_USER --password=$$MYSQL_PASSWORD
interval: 1s
retries: 5
start_period: 20s
timeout: 5s
restart: on-failure
volumes:
- datahub-mysqldata:/var/lib/mysql

mysql-setup:
image: acryldata/datahub-mysql-setup:${W9_VERSION}
container_name: ${W9_ID}-mysql-setup
depends_on:
mysql:
condition: service_healthy
environment:
- MYSQL_HOST=${W9_ID}-mysql
- MYSQL_PORT=3306
- MYSQL_USERNAME=${MYSQL_USER}
- MYSQL_PASSWORD=${W9_POWER_PASSWORD}
- DATAHUB_DB_NAME=${MYSQL_DATABASE}
labels:
datahub_setup_job: true

schema-registry:
image: confluentinc/cp-schema-registry:7.4.0
container_name: ${W9_ID}-schema-registry
environment:
- SCHEMA_REGISTRY_HOST_NAME=${W9_ID}-schema-registry
- SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL=PLAINTEXT
- SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS=${W9_ID}-broker:29092
depends_on:
broker:
condition: service_healthy
healthcheck:
test: nc -z ${W9_ID}-schema-registry 8081
interval: 1s
retries: 3
start_period: 60s
timeout: 5s

zookeeper:
image: confluentinc/cp-zookeeper:7.4.0
container_name: ${W9_ID}-zookeeper
environment:
- ZOOKEEPER_CLIENT_PORT=2181
- ZOOKEEPER_TICK_TIME=2000
healthcheck:
test: echo srvr | nc ${W9_ID}-zookeeper 2181
interval: 5s
retries: 3
start_period: 30s
timeout: 5s
volumes:
- datahub-zkdata:/var/lib/zookeeper/data
- datahub-zklogs:/var/lib/zookeeper/log

networks:
default:
name: $W9_NETWORK
name: ${W9_NETWORK}
external: true

volumes:
postgres:
redis:
datahub-plugins:
datahub-zkdata:
datahub-zklogs:
datahub-broker:
datahub-esdata:
datahub-mysqldata:
12 changes: 6 additions & 6 deletions apps/datahub/variables.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@
{
"dist": "community",
"version": [
"0.10.4"
"v0.14.1"
]
}
],
"requirements": {
"cpu": "1",
"memory": "1",
"disk": "1",
"url": "https://github.com/datahub-project/datahub"
"cpu": "2",
"memory": "8",
"disk": "10",
"url": "https://datahubproject.io/docs/quickstart/#prerequisites"
}
}
}

0 comments on commit dd2d145

Please sign in to comment.