From 25b6664d5102f699f74c720aeabe18905acd95ce Mon Sep 17 00:00:00 2001 From: Nirmalya Sen Date: Fri, 29 Mar 2019 13:02:37 -0700 Subject: [PATCH 01/24] DX-15734: add dremio-admin Make running it conditional - either this is running or the Dremio cluster. Controlled via helm value --set DremioAdmin=true Change-Id: Ia6d30bcda29e5ea606f57aa6ac22b65aad8a3a91 --- charts/dremio/README.md | 20 ++++++++++ charts/dremio/templates/dremio-admin.yaml | 40 +++++++++++++++++++ .../dremio/templates/dremio-coordinator.yaml | 2 + charts/dremio/templates/dremio-executor.yaml | 2 + charts/dremio/templates/dremio-master.yaml | 2 + .../templates/dremio-service-client.yaml | 2 + charts/dremio/templates/zookeeper.yaml | 2 + 7 files changed, 70 insertions(+) create mode 100644 charts/dremio/templates/dremio-admin.yaml diff --git a/charts/dremio/README.md b/charts/dremio/README.md index 08463be8..b5e620a5 100644 --- a/charts/dremio/README.md +++ b/charts/dremio/README.md @@ -93,6 +93,26 @@ helm upgrade dremio --set executor.count=5 You can also scale down the same way. +### Running offline dremio-admin commands +Administration commands restore, cleanup and set-password in dremio-admin needs to be run when +the Dremio cluster is not running. So, before running these commands, you need to shutdown +the Dremio cluster. Use the helm delete command to delete the helm release. +(Kubernetes does not delete the persistent store volumes when you delete statefulset pods and +when you install the cluster again using helm, the existing persistent store will be used and +you will get your Dremio cluster running again.) + +After Dremio cluster is shutdown, start the dremio-admin pod using +```bash +helm install --wait dremio --set DremioAdmin=true +``` +Once the pod is running, you can connect to the pod using +```bash +kubectl exec -it dremio-admin -- bash +``` +Now, you have a bash shell from where you can run the dremio-admin commands. + +Once you are done, you can delete the helm release for the dremio-admin and start your Dremio cluster. + #### Upgrading Dremio You should attempt upgrade when no queries are running on the cluster. Update the Dremio image tag in your values.yaml file. E.g. ```bash diff --git a/charts/dremio/templates/dremio-admin.yaml b/charts/dremio/templates/dremio-admin.yaml new file mode 100644 index 00000000..3de91c43 --- /dev/null +++ b/charts/dremio/templates/dremio-admin.yaml @@ -0,0 +1,40 @@ +{{ if .Values.DremioAdmin }} +# dremio-admin pod is used to run offline commands like +# clean, restore or set-password against the Dremio cluster. +# The Dremio cluster should be shutdown before attempting to +# create the dremio-admin pod. +# You connect to the pod (kubectl exec -it dremio-admin -- bash), +# go to /opt/dremio/bin and run dremio-admin commands as documented. +apiVersion: v1 +kind: Pod +metadata: + name: dremio-admin +spec: + containers: + - name: dremio-admin + image: {{.Values.image}} + imagePullPolicy: IfNotPresent + stdin: true + tty: true + resources: + requests: + memory: {{.Values.coordinator.memory}}M + cpu: {{.Values.coordinator.cpu}} + volumeMounts: + - name: dremio-master-volume + mountPath: /opt/dremio/data + - name: dremio-config + mountPath: /opt/dremio/conf + command: ["sleep", "infinity"] + {{- if .Values.imagePullSecrets }} + imagePullSecrets: + - name: {{ .Values.imagePullSecrets }} + {{- end}} + volumes: + - name: dremio-master-volume + persistentVolumeClaim: + claimName: dremio-master-volume-dremio-master-0 + - name: dremio-config + configMap: + name: dremio-config +{{ end }} diff --git a/charts/dremio/templates/dremio-coordinator.yaml b/charts/dremio/templates/dremio-coordinator.yaml index bfede34c..7867902b 100644 --- a/charts/dremio/templates/dremio-coordinator.yaml +++ b/charts/dremio/templates/dremio-coordinator.yaml @@ -1,3 +1,4 @@ +{{ if not .Values.DremioAdmin }} apiVersion: apps/v1 kind: StatefulSet metadata: @@ -70,3 +71,4 @@ spec: imagePullSecrets: - name: {{ .Values.imagePullSecrets }} {{- end}} +{{ end }} diff --git a/charts/dremio/templates/dremio-executor.yaml b/charts/dremio/templates/dremio-executor.yaml index 8facdd0c..72788140 100644 --- a/charts/dremio/templates/dremio-executor.yaml +++ b/charts/dremio/templates/dremio-executor.yaml @@ -1,3 +1,4 @@ +{{ if not .Values.DremioAdmin }} apiVersion: apps/v1 kind: StatefulSet metadata: @@ -91,3 +92,4 @@ spec: resources: requests: storage: {{.Values.executor.volumeSize}} +{{ end }} diff --git a/charts/dremio/templates/dremio-master.yaml b/charts/dremio/templates/dremio-master.yaml index 1930ba06..122728ed 100644 --- a/charts/dremio/templates/dremio-master.yaml +++ b/charts/dremio/templates/dremio-master.yaml @@ -1,3 +1,4 @@ +{{ if not .Values.DremioAdmin }} apiVersion: apps/v1 kind: StatefulSet metadata: @@ -111,3 +112,4 @@ spec: resources: requests: storage: {{.Values.coordinator.volumeSize}} +{{ end }} diff --git a/charts/dremio/templates/dremio-service-client.yaml b/charts/dremio/templates/dremio-service-client.yaml index 8149368e..a1906738 100644 --- a/charts/dremio/templates/dremio-service-client.yaml +++ b/charts/dremio/templates/dremio-service-client.yaml @@ -1,3 +1,4 @@ +{{ if not .Values.DremioAdmin }} apiVersion: v1 kind: Service metadata: @@ -38,3 +39,4 @@ spec: clusterIP: None selector: role: dremio-cluster-pod +{{ end }} diff --git a/charts/dremio/templates/zookeeper.yaml b/charts/dremio/templates/zookeeper.yaml index a272234c..7e36cb8e 100644 --- a/charts/dremio/templates/zookeeper.yaml +++ b/charts/dremio/templates/zookeeper.yaml @@ -1,3 +1,4 @@ +{{ if not .Values.DremioAdmin }} apiVersion: v1 kind: Service metadata: @@ -133,3 +134,4 @@ spec: resources: requests: storage: 10Gi +{{ end }} From a54caa82f3f02d3f4e4bbf88ed5e461b5cfe943f Mon Sep 17 00:00:00 2001 From: Nirmalya Sen Date: Wed, 3 Apr 2019 17:24:23 -0700 Subject: [PATCH 02/24] DX-15692: configurable zookeeper resource properties Change-Id: Ica602f61421506708689bedb36d0d30c887c8407 --- charts/dremio/templates/zookeeper.yaml | 10 +++++----- charts/dremio/values.yaml | 5 +++++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/charts/dremio/templates/zookeeper.yaml b/charts/dremio/templates/zookeeper.yaml index 7e36cb8e..d1bc3f93 100644 --- a/charts/dremio/templates/zookeeper.yaml +++ b/charts/dremio/templates/zookeeper.yaml @@ -49,7 +49,7 @@ spec: matchLabels: app: zk serviceName: zk-hs - replicas: 1 + replicas: {{.Values.zookeeper.count}} updateStrategy: type: RollingUpdate podManagementPolicy: Parallel @@ -74,8 +74,8 @@ spec: image: "k8s.gcr.io/kubernetes-zookeeper:1.0-3.4.10" resources: requests: - memory: "1Gi" - cpu: "0.5" + memory: "{{.Values.zookeeper.memory}}M" + cpu: "{{.Values.zookeeper.cpu}}" ports: - containerPort: 2181 name: client @@ -87,7 +87,7 @@ spec: - sh - -c - "start-zookeeper \ - --servers=1 \ + --servers={{.Values.zookeeper.count}} \ --data_dir=/var/lib/zookeeper/data \ --data_log_dir=/var/lib/zookeeper/data/log \ --conf_dir=/opt/zookeeper/conf \ @@ -133,5 +133,5 @@ spec: accessModes: [ "ReadWriteOnce" ] resources: requests: - storage: 10Gi + storage: {{.Values.zookeeper.volumeSize}} {{ end }} diff --git a/charts/dremio/values.yaml b/charts/dremio/values.yaml index ad01d742..eb1bf320 100644 --- a/charts/dremio/values.yaml +++ b/charts/dremio/values.yaml @@ -23,6 +23,11 @@ executor: cpu: 4 count: 3 volumeSize: 100Gi +zookeeper: + memory: 1024 + cpu: 0.5 + count: 3 + volumeSize: 10Gi # If your Kubernetes cluster does not support LoadBalancer, # comment out the line below for the helm chart to succeed or add From a7bff697e1557256668a9c9a93392a49b16239d2 Mon Sep 17 00:00:00 2001 From: Nirmalya Sen Date: Wed, 3 Apr 2019 17:06:31 -0700 Subject: [PATCH 03/24] DX-15572: Add support of uploads in S3 or ADLS Enabling support of uploads to S3 or ADLS via config entries in values.yaml. Change-Id: Ie3233cb7085268ecd6aef010be5f61ad67cefc3a --- charts/dremio/config/core-site.xml | 52 +++++++++++++++++++ charts/dremio/config/dremio.conf | 12 ++++- charts/dremio/templates/dremio-configmap.yaml | 2 +- charts/dremio/values.yaml | 17 ++++++ 4 files changed, 81 insertions(+), 2 deletions(-) create mode 100644 charts/dremio/config/core-site.xml diff --git a/charts/dremio/config/core-site.xml b/charts/dremio/config/core-site.xml new file mode 100644 index 00000000..1c2f3237 --- /dev/null +++ b/charts/dremio/config/core-site.xml @@ -0,0 +1,52 @@ + + + + + {{- if and .Values.uploads.type (eq .Values.uploads.type "aws") }} + + fs.s3a.access.key + AWS access key ID. + {{ required "AWS access key required" .Values.uploads.aws.accessKey}} + + + fs.s3a.secret.key + AWS secret key. + {{ required "AWS secret required" .Values.uploads.aws.secret}} + + {{- end }} + + {{- if and .Values.uploads.type (eq .Values.uploads.type "azure") }} + + + fs.adl.impl + Must be set to org.apache.hadoop.fs.adl.AdlFileSystem + org.apache.hadoop.fs.adl.AdlFileSystem + + + dfs.adls.oauth2.client.id + Application ID of the registered application under Azure Active Directory + {{required "Azure application ID required" .Values.uploads.azure.applicationId}} + + + dfs.adls.oauth2.credential + Generated password value for the registered application + {{required "Azure secret value required" .Values.uploads.azure.secret}} + + + dfs.adls.oauth2.refresh.url + Azure Active Directory OAuth 2.0 Token Endpoint for registered applications. + {{required "Azure OAuth2 token endpoint required" .Values.uploads.azure.oauth2EndPoint}} + + + dfs.adls.oauth2.access.token.provider.type + Must be set to ClientCredential + ClientCredential + + + fs.adl.impl.disable.cache + Only include this property AFTER validating the ADLS connection. + false + + {{- end }} + diff --git a/charts/dremio/config/dremio.conf b/charts/dremio/config/dremio.conf index 0aa7b656..4e754236 100644 --- a/charts/dremio/config/dremio.conf +++ b/charts/dremio/config/dremio.conf @@ -20,12 +20,22 @@ paths: { # the distributed path Dremio data including job results, downloads, uploads, etc #dist: "pdfs://"${paths.local}"/pdfs" + + # If you are editing the uploads value in this file, please delete all the lines starting with double curly braces + {{- if .Values.uploads.type }} + {{- if eq .Values.uploads.type "aws" }} + uploads: "s3a://{{required "AWS bucketname required" .Values.uploads.aws.bucketName}}{{required "Path required" .Values.uploads.aws.path}}" + {{- end }} + {{- if eq .Values.uploads.type "azure" }} + uploads: "adl://{{required "Azure Datalake store name required" .Values.uploads.azure.datalakeStoreName}}.azuredatalakestore.net{{required "Path required" .Values.uploads.azure.path}}" + {{- end }} + {{- end }} } services: { # The services running are controlled via command line options passed in # while starting the services via kubernetes. Updating the three values - # below will not impact what services are running. + # below will not impact what services are running. # coordinator.enabled: true, # coordinator.master.enabled: true, # executor.enabled: true diff --git a/charts/dremio/templates/dremio-configmap.yaml b/charts/dremio/templates/dremio-configmap.yaml index 33fb22bd..442ca1dd 100644 --- a/charts/dremio/templates/dremio-configmap.yaml +++ b/charts/dremio/templates/dremio-configmap.yaml @@ -3,4 +3,4 @@ kind: ConfigMap metadata: name: dremio-config data: - {{- (.Files.Glob "config/*").AsConfig | nindent 2 }} + {{- tpl (.Files.Glob "config/*").AsConfig . | nindent 2 }} diff --git a/charts/dremio/values.yaml b/charts/dremio/values.yaml index eb1bf320..82c8f7d9 100644 --- a/charts/dremio/values.yaml +++ b/charts/dremio/values.yaml @@ -52,3 +52,20 @@ serviceType: LoadBalancer # the credentials in a kubernetes secret and provide the secret name here. # For more information, see https://kubernetes.io/docs/concepts/containers/images/#specifying-imagepullsecrets-on-a-pod #imagePullSecrets=secretname + +# Control where uploaded files are stored. +# See https://docs.dremio.com/deployment/distributed-storage.html for more information +uploads: + # Valid values are local, aws or azure. aws and azure choice requires additional configuration data. + type: "local" + aws: + bucketName: "Your_AWS_bucket_name" + path: "/" + accessKey: "Your_AWS_Access_Key" + secret: "Your_AWS_Secret" + azure: + datalakeStoreName: "Your_Azure_DataLake_Storage_name" + path: "/" + applicationId: "Your_Azure_Application_Id" + secret: "Your_Azure_Secret" + oauth2EndPoint: "Azure_OAuth2_Endpoint" From e7ceae3f167a7851ca433e8105d9891efca17abb Mon Sep 17 00:00:00 2001 From: Nirmalya Sen Date: Tue, 9 Apr 2019 15:59:48 -0700 Subject: [PATCH 04/24] DX-15693: enable nodeSelector for pods Change-Id: I3b6746fb8eddeff8c56ae4876453d47bfc30ef9b --- charts/dremio/templates/dremio-coordinator.yaml | 6 ++++++ charts/dremio/templates/dremio-executor.yaml | 6 ++++++ charts/dremio/templates/dremio-master.yaml | 6 ++++++ charts/dremio/templates/zookeeper.yaml | 6 ++++++ charts/dremio/values.yaml | 5 +++++ 5 files changed, 29 insertions(+) diff --git a/charts/dremio/templates/dremio-coordinator.yaml b/charts/dremio/templates/dremio-coordinator.yaml index 7867902b..2e4c1eaa 100644 --- a/charts/dremio/templates/dremio-coordinator.yaml +++ b/charts/dremio/templates/dremio-coordinator.yaml @@ -25,6 +25,12 @@ spec: dremio-configmap/checksum: {{ (.Files.Glob "config/*").AsConfig | sha256sum }} spec: terminationGracePeriodSeconds: 5 + {{- if .Values.nodeSelector }} + nodeSelector: + {{- range $key, $value := .Values.nodeSelector }} + {{ $key }}: {{ $value }} + {{- end }} + {{- end }} containers: - name: dremio-coordinator image: {{.Values.image}} diff --git a/charts/dremio/templates/dremio-executor.yaml b/charts/dremio/templates/dremio-executor.yaml index 72788140..78ddc7af 100644 --- a/charts/dremio/templates/dremio-executor.yaml +++ b/charts/dremio/templates/dremio-executor.yaml @@ -25,6 +25,12 @@ spec: dremio-configmap/checksum: {{ (.Files.Glob "config/*").AsConfig | sha256sum }} spec: terminationGracePeriodSeconds: 5 + {{- if .Values.nodeSelector }} + nodeSelector: + {{- range $key, $value := .Values.nodeSelector }} + {{ $key }}: {{ $value }} + {{- end }} + {{- end }} containers: - name: dremio-executor image: {{.Values.image}} diff --git a/charts/dremio/templates/dremio-master.yaml b/charts/dremio/templates/dremio-master.yaml index 122728ed..4abb730e 100644 --- a/charts/dremio/templates/dremio-master.yaml +++ b/charts/dremio/templates/dremio-master.yaml @@ -28,6 +28,12 @@ spec: - dremio-master topologyKey: "kubernetes.io/hostname" terminationGracePeriodSeconds: 5 + {{- if .Values.nodeSelector }} + nodeSelector: + {{- range $key, $value := .Values.nodeSelector }} + {{ $key }}: {{ $value }} + {{- end }} + {{- end }} containers: - name: dremio-master-coordinator image: {{.Values.image}} diff --git a/charts/dremio/templates/zookeeper.yaml b/charts/dremio/templates/zookeeper.yaml index d1bc3f93..4ad5a9c6 100644 --- a/charts/dremio/templates/zookeeper.yaml +++ b/charts/dremio/templates/zookeeper.yaml @@ -68,6 +68,12 @@ spec: values: - zk topologyKey: "kubernetes.io/hostname" + {{- if .Values.nodeSelector }} + nodeSelector: + {{- range $key, $value := .Values.nodeSelector }} + {{ $key }}: {{ $value }} + {{- end }} + {{- end }} containers: - name: kubernetes-zookeeper imagePullPolicy: Always diff --git a/charts/dremio/values.yaml b/charts/dremio/values.yaml index 82c8f7d9..a4400a5e 100644 --- a/charts/dremio/values.yaml +++ b/charts/dremio/values.yaml @@ -53,6 +53,11 @@ serviceType: LoadBalancer # For more information, see https://kubernetes.io/docs/concepts/containers/images/#specifying-imagepullsecrets-on-a-pod #imagePullSecrets=secretname +# Target pods to nodes based on labels set on the nodes. +# For more information, see https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector +#nodeSelector: +# key: value + # Control where uploaded files are stored. # See https://docs.dremio.com/deployment/distributed-storage.html for more information uploads: From 85252f0873f1c0d1a73d858e58e962c91f2e7c30 Mon Sep 17 00:00:00 2001 From: Nirmalya Sen Date: Tue, 9 Apr 2019 16:11:12 -0700 Subject: [PATCH 05/24] DX-15949: enable readiness probe for dremio-master Change-Id: I2e3444eccd95cdce7fab18045b43270cbbb07f79 --- charts/dremio/templates/dremio-master.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/charts/dremio/templates/dremio-master.yaml b/charts/dremio/templates/dremio-master.yaml index 4abb730e..423ce78f 100644 --- a/charts/dremio/templates/dremio-master.yaml +++ b/charts/dremio/templates/dremio-master.yaml @@ -70,6 +70,11 @@ spec: name: client - containerPort: 45678 name: server + readinessProbe: + tcpSocket: + port: 9047 + initialDelaySeconds: 5 + periodSeconds: 5 initContainers: - name: start-only-one-master image: busybox From 1fb92f7f1869f2bd0ebfe5e2ef522c6bafdc591d Mon Sep 17 00:00:00 2001 From: Nirmalya Sen Date: Thu, 9 May 2019 17:16:38 -0700 Subject: [PATCH 06/24] DX-16307: Deploy to Azure using ARM templates Change-Id: I3534f554ee2168ee88e7ae19380e33696a27f2a3 --- README.md | 9 +- azure/arm-templates/README.md | 61 +++ azure/arm-templates/azuredeploy.json | 187 +++++++ azure/arm-templates/nested/dremioCluster.json | 460 ++++++++++++++++++ azure/arm-templates/nested/dremioState.json | 408 ++++++++++++++++ azure/arm-templates/scripts/setupDremio.sh | 101 ++++ charts/dremio/config/logback-admin.xml | 67 +++ 7 files changed, 1289 insertions(+), 4 deletions(-) create mode 100644 azure/arm-templates/README.md create mode 100644 azure/arm-templates/azuredeploy.json create mode 100644 azure/arm-templates/nested/dremioCluster.json create mode 100644 azure/arm-templates/nested/dremioState.json create mode 100644 azure/arm-templates/scripts/setupDremio.sh create mode 100644 charts/dremio/config/logback-admin.xml diff --git a/README.md b/README.md index b21a8209..32e7c51a 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,9 @@ -# Dremio Container Tools +# Dremio Cloud Tools -This repository contains: +This repository contains tools and utilities to deploy Dremio to cloud environments: -* Tools to build [Dremio Docker images](images/dremio-oss). -* Example [helm chart](charts/dremio) to deploy Dremio to Kubernetes. +* [Dockerfile](images/dremio-oss) to build Dremio Docker images. +* [Helm chart](charts/dremio) to deploy Dremio to Kubernetes. +* [Azure Resource Manager (ARM) template](azure/arm-templates) to deploy to Azure. These are currently *experimental* items and should be evaluated and extended based on individual needs. diff --git a/azure/arm-templates/README.md b/azure/arm-templates/README.md new file mode 100644 index 00000000..210a9f1c --- /dev/null +++ b/azure/arm-templates/README.md @@ -0,0 +1,61 @@ + +# Deploying Dremio to Azure + +This deploys a Dremio cluster on Azure VMs. The deployment creates a master coordinator node and number of executor nodes depending on the size of the cluster chosen. The table below provides the machine type and number of executor nodes for the different sizes of Dremio clusters. + +| Cluster size | Coordinator VM Type | Executor VM Type | No. of Executors | +|--------------|---------------------|------------------|------------------| +| X-Small | Standard_D4_v3 | Standard_E16s_v3 | 1 | +| Small | Standard_D4_v3 | Standard_E16s_v3 | 5 | +| Medium | Standard_D8_v3 | Standard_E16s_v3 | 10 | +| Large | Standard_D8_v3 | Standard_E16s_v3 | 25 | +| X-Large | Standard_D8_v3 | Standard_E16s_v3 | 50 | + +The deployment resources are: +``` +┌───────────────────────────┐ +│ WebUI on 9047 │ +│ JDBC/ODBC client on 31010 │ +└─────────────┬─────────────┘ + │ +┌────────────────────────────┼─────────────────────────────────────┐ +│ VirtualNetwork │ │ +│ ┌──────────────────────────▼───────────────────────────────────┐ │ +│ │ Subnet ┌──────────────────────────┐ ┌────────────────┐ │ │ +│ │ │ LoadBalancer │ │ Security Group │ │ │ +│ │ └──────────────────┬───────┘ │Allow access to │ │ │ +│ │ │ │22, 9047, 31010 │ │ │ +│ │ ┌───────────────────┘ └────────────────┘ │ │ +│ │ │ │ │ +│ │ │ │ │ +│ │ ▼ │ │ +│ │ ┌───────────────────┐ ┌───────────────────┐ │ │ +│ │ │Master Coordinator │ │ Executor ├┐ │ │ +│ │ │ (Azure VM) │───────────▶│(Azure VM Scaleset)│├─┐ │ │ +│ │ └───────────────────┘ └┬──────────────────┘│ │ │ │ +│ │ ┌───────────────────┐ └─┬─────────────────┘ │ │ │ +│ │ │ Dremio Metadata │ └───────────────────┘ │ │ +│ │ │ (Azure Disk) │ │ │ +│ │ └───────────────────┘ │ │ +│ └──────────────────────────────────────────────────────────────┘ │ +└──────────────────────────────────────────────────────────────────┘ +``` +You can try it out: [![Azure ARM Template](http://azuredeploy.net/deploybutton.png)](https://portal.azure.com/#create/microsoft.template/uri/https%3A%2F%2Fraw.githubusercontent.com%2Fdremio%2Fdremio-cloud-tools%2Fmaster%2Fazure%2Farm-templates%2Fazuredeploy.json) + +The inputs required during deployment are: + +|Input Parameter|Description | +|---|---| +| Subscription |Azure subscription where the cluster should be deployed. | +| Resource Group |The Azure Resource group where the cluster should be deployed. You can create a new one too. It is recommended to create a new one as all resources are created in that group and deleting the group will delete all resources created. | +| Location |The Azure location where the cluster resources will be deployed. | +| Cluster Name |A name for your cluster.| +| Cluster Size |Pick a size based on your needs.| +| SSH Username |The username that can be used to login to your nodes.| +| Authentication Type |Password or Key based authentication for ssh.| +| Password or SSH Public Key |The password or ssh public key | +| Use Existing Subnet | (Optional) id of an existing subnet. The subnet must be in the same region as the Dremio cluster resource group. It is of the form /subscriptions/xxxx/resourceGroups/xxxx/providers/Microsoft.Network/virtualNetworks/xxxx/subnets/xxxx| +| Use Private IP | Select true if you are using existing subnet and you want to use an internal ip from the subnet to access Dremio. | +| Dremio Binary | Publicly accessible URL to a Dremio installation rpm | + +Once the deployment is successful, you will find the URL to Dremio UI in the output section of the deployment. diff --git a/azure/arm-templates/azuredeploy.json b/azure/arm-templates/azuredeploy.json new file mode 100644 index 00000000..e9102fc8 --- /dev/null +++ b/azure/arm-templates/azuredeploy.json @@ -0,0 +1,187 @@ + +{ + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "clusterName": { + "type": "string", + "defaultValue": "mydremio", + "metadata": { + "description": "Dremio cluster name in Azure." + } + }, + "clusterSize": { + "type": "string", + "allowedValues": ["X-Small (1 executor)", "Small (5 executors)", "Medium (10 executors )", "Large (25 executors)", "X-Large (50 executors)"], + "metadata": { + "description": "The type and number of machines are chosen based on the size selected." + } + }, + "SSHUsername": { + "type": "string", + "defaultValue": "azuser", + "metadata": { + "description": "SSH username for the virtual machines. You need it if you want to login to the machines." + } + }, + "authenticationType": { + "type": "string", + "defaultValue": "password", + "allowedValues": [ + "password", + "SSHPublicKey" + ], + "metadata": { + "description": "Type of authentication to use on the virtual machines." + } + }, + "PasswordOrSSHPublicKey": { + "type": "securestring", + "metadata": { + "description": "Password or ssh public key for the virtual machines. If password, password must be minimum 12 characters with at least 1 upper case letter, 1 lower case letter and 1 number." + } + }, + "useExistingSubnet": { + "type": "string", + "defaultValue": "", + "metadata": { + "description": "Optional - resource id of existing subnet to deploy to; the subnet needs to be in the same region as the cluster. If empty, a new virtual network and subnet will be created." + } + }, + "usePrivateIP": { + "type": "bool", + "defaultValue": false, + "metadata": { + "description": "Select true if you are using existing subnet and you want to use an internal ip from the subnet to access Dremio." + } + }, + "dremioBinary": { + "type": "string", + "defaultValue": "", + "metadata": { + "description": "Optional - publicly accessible URL to a Dremio installation rpm" + } + } + }, + "variables": { + "baseURI": "https://raw.githubusercontent.com/dremio/dremio-cloud-tools/master/azure/arm-templates/nested/", + "apiVersion": "2018-05-01", + "shortName": "[take(resourceGroup().name, 40)]", + "rgName": "[resourceGroup().name]", + "location": "[resourceGroup().location]", + "stateRgName": "[resourceGroup().name]", + "dataDiskName": "[concat(parameters('clusterName'), '-master-data-disk')]", + "dataDiskId": "[concat(subscription().id, '/resourceGroups/', variables('stateRgName'), '/providers/Microsoft.Compute/disks/', variables('dataDiskName'))]", + "clusterSizes": { + "X-Small (1 executor)": { + "coordinatorVmSize": "Standard_D4_v3", + "coordinatorCount": 0, + "executorVmSize": "Standard_E16s_v3", + "executorCount": 1, + "metadataDiskSize": 10 + }, + "Small (5 executors)": { + "coordinatorVmSize": "Standard_D4_v3", + "coordinatorCount": 0, + "executorVmSize": "Standard_E16s_v3", + "executorCount": 5, + "metadataDiskSize": 50 + }, + "Medium (10 executors)": { + "coordinatorVmSize": "Standard_D8_v3", + "coordinatorCount": 0, + "executorVmSize": "Standard_E16s_v3", + "executorCount": 10, + "metadataDiskSize": 100 + }, + "Large (25 executors)": { + "coordinatorVmSize": "Standard_D8_v3", + "coordinatorCount": 0, + "executorVmSize": "Standard_E16s_v3", + "executorCount": 25, + "metadataDiskSize": 100 + }, + "X-Large (50 executors)": { + "coordinatorVmSize": "Standard_D8_v3", + "coordinatorCount": 0, + "executorVmSize": "Standard_E16s_v3", + "executorCount": 50, + "metadataDiskSize": 100 + } + } + }, + "resources": [ + { + "apiVersion": "2018-02-01", + "name": "pid-1f30d282-b6d2-5dc6-9630-85533cc11b98", + "type": "Microsoft.Resources/deployments", + "properties": { + "mode": "Incremental", + "template": { + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "resources": [] + } + } + }, + { + "type": "Microsoft.Resources/deployments", + "apiVersion": "[variables('apiVersion')]", + "name": "[concat(variables('shortName'), '-state-deployment')]", + "resourceGroup": "[variables('stateRgName')]", + "dependsOn": [ + ], + "properties": { + "mode": "Incremental", + "templateLink": { + "uri": "[concat(variables('baseURI'), 'dremioState.json')]", + "contentVersion": "1.0.0.0" + }, + "parameters": { + "dremioClusterName": {"value": "[parameters('clusterName')]"}, + "dataDiskName": {"value": "[variables('dataDiskName')]"}, + "dataDiskSize": {"value": "[variables('clusterSizes')[parameters('clusterSize')].metadataDiskSize]"}, + "virtualNetworkNewOrExisting": {"value": "[if(equals(trim(parameters('useExistingSubnet')), ''), 'new', 'existing')]"}, + "existingSubnet": {"value": "[parameters('useExistingSubnet')]"} + } + } + }, + { + "type": "Microsoft.Resources/deployments", + "apiVersion": "[variables('apiVersion')]", + "name": "[concat(variables('shortName'), '-compute-deployment')]", + "resourceGroup": "[variables('rgName')]", + "dependsOn": [ + "[concat(variables('shortName'), '-state-deployment')]" + ], + "properties": { + "mode": "Incremental", + "templateLink": { + "uri": "[concat(variables('baseURI'), 'dremioCluster.json')]", + "contentVersion": "1.0.0.0" + }, + "parameters": { + "dremioClusterName": {"value": "[parameters('clusterName')]"}, + "executorCount": {"value": "[variables('clusterSizes')[parameters('clusterSize')].executorCount]"}, + "executorVmSize": {"value": "[variables('clusterSizes')[parameters('clusterSize')].executorVmSize]"}, + "coordinatorCount": {"value": "[variables('clusterSizes')[parameters('clusterSize')].coordinatorCount]"}, + "coordinatorVmSize": {"value": "[variables('clusterSizes')[parameters('clusterSize')].coordinatorVmSize]"}, + "dremioDownloadURL": {"value": "[parameters('dremioBinary')]"}, + "dataDiskId": {"value": "[reference(concat(variables('shortName'), '-state-deployment')).outputs.dataDiskId.value]"}, + "sshUsername": {"value": "[parameters('SSHUsername')]"}, + "sshPasswordOrKey": {"value": "[parameters('PasswordOrSSHPublicKey')]"}, + "subnetId": {"value": "[if(equals(trim(parameters('useExistingSubnet')), ''), reference(concat(variables('shortName'), '-state-deployment')).outputs.subnetId.value, parameters('useExistingSubnet'))]"}, + "loadBalancerId": {"value": "[reference(concat(variables('shortName'), '-state-deployment')).outputs.loadBalancerId.value]"}, + "nsgId": {"value": "[reference(concat(variables('shortName'), '-state-deployment')).outputs.nsgId.value]"}, + "usePrivateIP": {"value": "[parameters('usePrivateIP')]"} + } + } + } + ], + "outputs": { + "dremioUi": { + "type": "string", + "value": "[concat('http://', reference(concat(variables('shortName'), '-compute-deployment')).outputs.dremioHost.value, ':9047')]" + } + } +} diff --git a/azure/arm-templates/nested/dremioCluster.json b/azure/arm-templates/nested/dremioCluster.json new file mode 100644 index 00000000..527e9346 --- /dev/null +++ b/azure/arm-templates/nested/dremioCluster.json @@ -0,0 +1,460 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2018-05-01/subscriptionDeploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "dremioClusterName": { + "type": "string", + "defaultValue": "mydremio", + "metadata": { + "description": "Name for the Dremio Cluster" + } + }, + "sshUsername": { + "type": "string", + "metadata": { + "description": "SSH username for the virtual machines." + } + }, + "authenticationType": { + "type": "string", + "defaultValue": "password", + "allowedValues": [ + "password", + "sshPublicKey" + ], + "metadata": { + "description": "Type of authentication to use on the virtual machines." + } + }, + "sshPasswordOrKey": { + "type": "securestring", + "metadata": { + "description": "Password or ssh key for the virtual machines." + } + }, + "dataDiskId": { + "type": "string" + }, + "publicIpNewOrExisting": { + "type": "string", + "defaultValue": "new", + "metadata": { + "description": "Determines whether or not a new public ip should be provisioned." + } + }, + "publicIpName": { + "type": "string", + "defaultValue": "[concat(parameters('dremioClusterName'), '-master-publicip')]", + "metadata": { + "description": "Name of the public ip address" + } + }, + "publicIpDns": { + "type": "string", + "defaultValue": "[concat('dremio-master-', uniqueString(resourceGroup().id, parameters('dremioClusterName')))]", + "metadata": { + "description": "DNS of the public ip address for the VM" + } + }, + "publicIpResourceGroupName": { + "type": "string", + "defaultValue": "[resourceGroup().name]", + "metadata": { + "description": "Name of the resource group for the public ip address" + } + }, + "publicIpAllocationMethod": { + "type": "string", + "defaultValue": "Static", + "allowedValues": [ + "Dynamic", + "Static" + ], + "metadata": { + "description": "Allocation method for the public ip address" + } + }, + "publicIpSku": { + "type": "string", + "defaultValue": "Standard", + "allowedValues": [ + "Basic", + "Standard" + ], + "metadata": { + "description": "Name of the resource group for the public ip address" + } + }, + "coordinatorVmSize": { + "type": "string", + "defaultValue": "Standard_A2_v2", + "metadata": { + "description": "Size for the coordinator virtual machines." + } + }, + "coordinatorCount": { + "type": "int", + "defaultValue": 0, + "metadata": { + "description": "Number of coordinators in the cluster" + } + }, + "executorVmSize": { + "type": "string", + "defaultValue": "Standard_A2_v2", + "metadata": { + "description": "Size for the executor virtual machines." + } + }, + "executorCount": { + "type": "int", + "defaultValue": 3, + "metadata": { + "description": "Number of executors in the cluster" + } + }, + "dremioDownloadURL": { + "type": "string", + "defaultValue": "", + "metadata": { + "description": "(Optional) URL to download Dremio rpm. By default, it will install the latest CE version." + } + }, + "loadBalancerId": { + "type": "string", + "metadata": { + "description": "Loadbalancer fronting the coordinators" + } + }, + "nsgId": { + "type": "string", + "metadata": { + "description": "The security group required - ports 9047, 31010 and 22(ssh) should be allowed" + } + }, + "subnetId": { + "type": "string", + "metadata": { + "description": "The subnet in which the Dremio cluster is to be deployed" + } + }, + "usePrivateIP": { + "type": "bool", + "defaultValue": false, + "metadata": { + "description": "Select to use the private ip address of the subnet for Dremio access." + } + } + }, + "variables": { + "computeApiVersion": "2018-06-01", + "location": "[resourceGroup().location]", + "nicName": "[concat(parameters('dremioClusterName'), '-nic')]", + "dremioImage": { + "publisher": "OpenLogic", + "offer": "CentOS", + "sku": "7.5", + "version": "7.5.20180815" + }, + "linuxConfiguration": { + "disablePasswordAuthentication": true, + "ssh": { + "publicKeys": [ + { + "path": "[concat('/home/', parameters('sshUsername'), '/.ssh/authorized_keys')]", + "keyData": "[parameters('sshPasswordOrKey')]" + } + ] + } + }, + "publicIpAddressId": { + "id": "[resourceId(parameters('publicIpResourceGroupName'), 'Microsoft.Network/publicIPAddresses', parameters('publicIpName'))]" + }, + "singlePlacementGroup": "true", + "enableAcceleratedNetworking": "false", + "priority": "Regular", + "ipAllocationMethod": "Dynamic", + "upgradeMode": "Manual", + "namingInfix": "[toLower(substring(concat(parameters('dremioClusterName'), uniqueString(resourceGroup().id)), 0, 9))]", + "bePoolName": "[concat(variables('namingInfix'), 'bepool')]", + "baseURI": "https://raw.githubusercontent.com/dremio/dremio-cloud-tools/master/azure/arm-templates/scripts/", + "scriptFileName": "setupDremio.sh", + "scriptURL": "[concat(variables('baseURI'), variables('scriptFileName'))]", + "install": false + }, + "resources": [ + { + "condition": "[and(not(parameters('usePrivateIP')), equals(parameters('publicIpNewOrExisting'), 'new'))]", + "type": "Microsoft.Network/publicIPAddresses", + "apiVersion": "[variables('computeApiVersion')]", + "name": "[parameters('publicIpName')]", + "location": "[variables('location')]", + "sku": { + "name": "Standard" + }, + "properties": { + "publicIPAllocationMethod": "[parameters('publicIpAllocationMethod')]", + "dnsSettings": { + "domainNameLabel": "[parameters('publicIpDns')]" + } + } + }, + { + "apiVersion": "[variables('computeApiVersion')]", + "type": "Microsoft.Network/networkInterfaces", + "name": "[variables('nicName')]", + "location": "[variables('location')]", + "dependsOn": [ + "[parameters('publicIpName')]" + ], + "properties": { + "ipConfigurations": [ + { + "name": "ipconfig-master", + "properties": { + "privateIPAllocationMethod": "[variables('ipAllocationMethod')]", + "subnet": { + "id": "[parameters('subnetId')]" + }, + "publicIPAddress": "[if(and(not(parameters('usePrivateIP')), equals(parameters('publicIpNewOrExisting'), 'new')), variables('publicIpAddressId') , json('null'))]" + } + } + ], + "networkSecurityGroup": { + "id": "[parameters('nsgId')]" + } + } + }, + { + "apiVersion": "[variables('computeApiVersion')]", + "type": "Microsoft.Compute/virtualMachines", + "name": "[concat(parameters('dremioClusterName'), '-master')]", + "location": "[variables('location')]", + "dependsOn": [ + "[variables('nicName')]" + ], + "properties": { + "hardwareProfile": { + "vmSize": "[parameters('coordinatorVmSize')]" + }, + "osProfile": { + "computerName": "[parameters('dremioClusterName')]", + "adminUsername": "[parameters('sshUsername')]", + "adminPassword": "[parameters('sshPasswordOrKey')]", + "linuxConfiguration": "[if(equals(parameters('authenticationType'), 'password'), json('null'), variables('linuxConfiguration'))]" + }, + "storageProfile": { + "imageReference": "[variables('dremioImage')]", + "osDisk": { + "caching": "ReadWrite", + "createOption": "FromImage" + }, + "dataDisks": [ + { + "lun": 0, + "managedDisk": { + "id": "[parameters('dataDiskId')]" + }, + "caching": "ReadWrite", + "createOption": "Attach" + } + ] + }, + "networkProfile": { + "networkInterfaces": [ + { + "id": "[resourceId('Microsoft.Network/networkInterfaces', variables('nicName'))]" + } + ] + } + }, + "resources": [ + { + "type": "extensions", + "name": "configScript", + "apiVersion": "[variables('computeApiVersion')]", + "location": "[variables('location')]", + "dependsOn": [ + "[concat(parameters('dremioClusterName'), '-master')]" + ], + "properties": { + "publisher": "Microsoft.Azure.Extensions", + "type": "CustomScript", + "typeHandlerVersion": "2.0", + "autoUpgradeMinorVersion": true, + "settings": { + "fileUris": [ + "[variables('scriptURL')]" + ] + }, + "protectedSettings": { + "commandToExecute": "[concat('DOWNLOAD_URL=\"', parameters('dremioDownloadURL'), '\" bash ', variables('scriptFileName'), ' master ')]" + } + } + } + ] + }, + { + "name": "[concat(parameters('dremioClusterName'), '-coordinators')]", + "type": "Microsoft.Compute/virtualMachineScaleSets", + "apiVersion": "[variables('computeApiVersion')]", + "location": "[variables('location')]", + "dependsOn": ["[concat(parameters('dremioClusterName'), '-master')]"], + "condition": "[variables('install')]", + "sku": { + "name": "[parameters('coordinatorVmSize')]", + "tier": "Standard", + "capacity": "[parameters('coordinatorCount')]" + }, + "properties": { + "overprovision": "true", + "upgradePolicy": { + "mode": "[variables('upgradeMode')]" + }, + "singlePlacementGroup": "[variables('singlePlacementGroup')]", + "virtualMachineProfile": { + "storageProfile": { + "imageReference": "[variables('dremioImage')]", + "osDisk": { + "createOption": "FromImage", + "caching": "ReadWrite" + } + }, + "priority": "[variables('priority')]", + "osProfile": { + "computerNamePrefix": "[variables('namingInfix')]", + "adminUsername": "[parameters('sshUsername')]", + "adminPassword": "[parameters('sshPasswordOrKey')]" + }, + "networkProfile": { + "networkInterfaceConfigurations": [ + { + "name": "[concat(parameters('dremioClusterName'), 'Nic')]", + "properties": { + "primary": "true", + "enableAcceleratedNetworking": "[variables('enableAcceleratedNetworking')]", + "ipConfigurations": [ + { + "name": "[concat(parameters('dremioClusterName'), 'ipconfig-coordinators')]", + "properties": { + "subnet": { + "id": "[parameters('subnetId')]" + }, + "loadBalancerBackendAddressPools": [ + { + "id": "[concat(parameters('loadBalancerId'), '/backendAddressPools/', variables('bePoolName'))]" + } + ] + } + } + ] + } + } + ] + }, + "extensionProfile": { + "extensions": [ + { + "name": "updatescriptextension", + "properties": { + "publisher": "Microsoft.Azure.Extensions", + "type": "CustomScript", + "typeHandlerVersion": "2.0", + "autoUpgradeMinorVersion": true, + "settings": { + "fileUris": [ + "[variables('scriptURL')]" + ], + "commandToExecute": "[concat('DOWNLOAD_URL=\"', parameters('dremioDownloadURL'), '\" bash ', variables('scriptFileName'), ' coordinator ', reference(concat(parameters('dremioClusterName'), '-nic'), variables('computeApiVersion')).ipConfigurations[0].properties.privateIPAddress)]" + } + } + } + ] + } + } + } + + }, + { + "name": "[concat(parameters('dremioClusterName'), '-executors')]", + "type": "Microsoft.Compute/virtualMachineScaleSets", + "apiVersion": "[variables('computeApiVersion')]", + "location": "[variables('location')]", + "dependsOn": ["[concat(parameters('dremioClusterName'), '-master')]"], + "sku": { + "name": "[parameters('executorVmSize')]", + "tier": "Standard", + "capacity": "[parameters('executorCount')]" + }, + "properties": { + "overprovision": "true", + "upgradePolicy": { + "mode": "[variables('upgradeMode')]" + }, + "singlePlacementGroup": "[variables('singlePlacementGroup')]", + "virtualMachineProfile": { + "storageProfile": { + "imageReference": "[variables('dremioImage')]", + "osDisk": { + "createOption": "FromImage", + "caching": "ReadWrite" + } + }, + "priority": "[variables('priority')]", + "osProfile": { + "computerNamePrefix": "[variables('namingInfix')]", + "adminUsername": "[parameters('sshUsername')]", + "adminPassword": "[parameters('sshPasswordOrKey')]" + }, + "networkProfile": { + "networkInterfaceConfigurations": [ + { + "name": "[concat(parameters('dremioClusterName'), 'Nic')]", + "properties": { + "primary": "true", + "enableAcceleratedNetworking": "[variables('enableAcceleratedNetworking')]", + "ipConfigurations": [ + { + "name": "[concat(parameters('dremioClusterName'), 'ipconfig-executors')]", + "properties": { + "subnet": { + "id": "[parameters('subnetId')]" + } + } + } + ] + } + } + ] + }, + "extensionProfile": { + "extensions": [ + { + "name": "updatescriptextension", + "properties": { + "publisher": "Microsoft.Azure.Extensions", + "type": "CustomScript", + "typeHandlerVersion": "2.0", + "autoUpgradeMinorVersion": true, + "settings": { + "fileUris": [ + "[variables('scriptURL')]" + ], + "commandToExecute": "[concat('DOWNLOAD_URL=\"', parameters('dremioDownloadURL'), '\" bash ', variables('scriptFileName'), ' executor ', reference(concat(parameters('dremioClusterName'), '-nic'), variables('computeApiVersion')).ipConfigurations[0].properties.privateIPAddress)]" + } + } + } + ] + } + } + } + + } + ], + "outputs": { + "dremioHost": { + "type": "string", + "value": "[if(parameters('usePrivateIP'), first(reference(variables('nicName')).ipConfigurations).properties.privateIPAddress, reference(parameters('publicIpName'), variables('computeApiVersion')).dnsSettings.fqdn)]" + } + } +} diff --git a/azure/arm-templates/nested/dremioState.json b/azure/arm-templates/nested/dremioState.json new file mode 100644 index 00000000..1defac4b --- /dev/null +++ b/azure/arm-templates/nested/dremioState.json @@ -0,0 +1,408 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2018-05-01/subscriptionDeploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "dremioClusterName": { + "type": "string", + "defaultValue": "mydremio", + "metadata": { + "description": "Name for the Dremio Cluster" + } + }, + "dataDiskName": { + "type": "string", + "defaultValue": "[concat(parameters('dremioClusterName'), '-data-disk')]", + "metadata": { + "description": "Name for the Dremio Master Data Disk" + } + }, + "dataDiskSize": { + "type": "int", + "defaultValue": 20, + "metadata": { + "description": "Size of the Dremio Master Data Disk" + } + }, + "storageNewOrExisting": { + "type": "string", + "defaultValue": "new", + "metadata": { + "description": "Determines whether or not a new storage account should be provisioned." + } + }, + "storageAccountName": { + "type": "string", + "defaultValue": "[concat('storage', uniqueString(resourceGroup().id))]", + "metadata": { + "description": "Name of the storage account" + } + }, + "storageAccountType": { + "type": "string", + "defaultValue": "Standard_LRS", + "metadata": { + "description": "Storage account type" + } + }, + "virtualNetworkNewOrExisting": { + "type": "string", + "defaultValue": "new", + "metadata": { + "description": "Determines whether or not a new virtual network should be provisioned." + } + }, + "addressPrefixes": { + "type": "array", + "defaultValue": [ + "10.0.0.0/16" + ], + "metadata": { + "description": "Address prefix of the virtual network" + } + }, + "subnetName": { + "type": "string", + "defaultValue": "default", + "metadata": { + "description": "Name of the subnet" + } + }, + "subnetPrefix": { + "type": "string", + "defaultValue": "10.0.0.0/24", + "metadata": { + "description": "Subnet prefix of the virtual network" + } + }, + "publicIpNewOrExisting": { + "type": "string", + "defaultValue": "new", + "metadata": { + "description": "Determines whether or not a new public ip should be provisioned." + } + }, + "publicIpName": { + "type": "string", + "defaultValue": "[concat(parameters('dremioClusterName'), '-webui-publicip')]", + "metadata": { + "description": "Name of the public ip address" + } + }, + "publicIpDns": { + "type": "string", + "defaultValue": "[concat(parameters('dremioClusterName'), '-dremio-', uniqueString(resourceGroup().id, deployment().name))]", + "metadata": { + "description": "DNS of the public ip address for the VM" + } + }, + "publicIpResourceGroupName": { + "type": "string", + "defaultValue": "[resourceGroup().name]", + "metadata": { + "description": "Name of the resource group for the public ip address" + } + }, + "publicIpAllocationMethod": { + "type": "string", + "defaultValue": "Static", + "allowedValues": [ + "Dynamic", + "Static" + ], + "metadata": { + "description": "Allocation method for the public ip address" + } + }, + "publicIpSku": { + "type": "string", + "defaultValue": "Basic", + "allowedValues": [ + "Basic", + "Standard" + ], + "metadata": { + "description": "Name of the resource group for the public ip address" + } + }, + "externalLoadBalancer": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Create an external load balancer" + } + }, + "existingSubnet": { + "type": "string", + "defaultValue": "", + "metadata": { + "description": "(Optional) - existing subnet" + } + } + }, + "variables": { + "computeApiVersion": "2018-06-01", + "storageApiVersion": "2018-07-01", + "location": "[resourceGroup().location]", + "virtualNetworkName": "[concat(parameters('dremioClusterName'), '-vnet')]", + "publicIpAddressId": "[resourceId(parameters('publicIpResourceGroupName'), 'Microsoft.Network/publicIPAddresses', parameters('publicIpName'))]", + "networkSecurityGroupName": "dremio-nsg", + "singlePlacementGroup": "true", + "enableAcceleratedNetworking": "false", + "priority": "Regular", + "ipAllocationMethod": "[parameters('publicIpAllocationMethod')]", + "upgradeMode": "Manual", + "namingInfix": "[toLower(substring(concat(parameters('dremioClusterName'), uniqueString(resourceGroup().id)), 0, 9))]", + "loadBalancerName": "[concat(variables('namingInfix'), '-lb')]", + "lbID": "[resourceId('Microsoft.Network/loadBalancers',variables('loadBalancerName'))]", + "natPoolName": "[concat(variables('namingInfix'), 'natpool')]", + "bePoolName": "[concat(variables('namingInfix'), 'bepool')]", + "natStartPort": 50000, + "natEndPort": 50119, + "natBackendPort": 9047, + "frontEndIPConfigId": "[concat(variables('lbID'),'/frontendIPConfigurations/loadBalancerFrontEnd')]", + "backendAddressPoolId": "[concat(variables('lbID'),'/backendAddressPools/', variables('bePoolName'))]", + "externallb": "[or(parameters('externalLoadBalancer'), equals(trim(parameters('existingSubnet')), ''))]", + "lbfrontEndIPConfig": "[if(variables('externallb'), variables('externallbFronEndIpConfig'), variables('internallbFrontEndIpConfig'))]", + "externallbFronEndIpConfig": { + "publicIPAddress": { + "id": "[variables('publicIpAddressId')]" + } + }, + "internallbFrontEndIpConfig": { + "subnet": { + "privateIPAllocationMethod": "Dynamic", + "id": "[parameters('existingSubnet')]" + } + }, + "install": false + }, + "resources": [ + { + "type": "Microsoft.Compute/disks", + "sku": { + "name": "StandardSSD_LRS", + "tier": "Standard" + }, + "name": "[parameters('dataDiskName')]", + "apiVersion": "[variables('computeApiVersion')]", + "location": "[variables('location')]", + "scale": null, + "properties": { + "creationData": { + "createOption": "Empty" + }, + "diskSizeGB": "[parameters('dataDiskSize')]" + } + }, + { + "condition": "[equals(parameters('virtualNetworkNewOrExisting'), 'new')]", + "type": "Microsoft.Network/virtualNetworks", + "apiVersion": "[variables('computeApiVersion')]", + "name": "[variables('virtualNetworkName')]", + "location": "[variables('location')]", + "properties": { + "addressSpace": { + "addressPrefixes": "[parameters('addressPrefixes')]" + }, + "subnets": [ + { + "name": "[parameters('subnetName')]", + "properties": { + "addressPrefix": "[parameters('subnetPrefix')]" + } + } + ] + } + }, + { + "name": "[variables('networkSecurityGroupName')]", + "type": "Microsoft.Network/networkSecurityGroups", + "apiVersion": "[variables('computeApiVersion')]", + "location": "[variables('location')]", + "properties": { + "securityRules": [ + { + "name": "default-allow-ssh", + "properties": { + "priority": 1000, + "sourceAddressPrefix": "*", + "protocol": "Tcp", + "destinationPortRange": "22", + "access": "Allow", + "direction": "Inbound", + "sourcePortRange": "*", + "destinationAddressPrefix": "*" + } + }, + { + "name": "default-allow-dremio-ui", + "properties": { + "priority": 100, + "sourceAddressPrefix": "*", + "protocol": "Tcp", + "destinationPortRange": "9047", + "access": "Allow", + "direction": "Inbound", + "sourcePortRange": "*", + "destinationAddressPrefix": "*" + } + }, + { + "name": "default-allow-dremio-client", + "properties": { + "priority": 110, + "sourceAddressPrefix": "*", + "protocol": "Tcp", + "destinationPortRange": "31010", + "access": "Allow", + "direction": "Inbound", + "sourcePortRange": "*", + "destinationAddressPrefix": "*" + } + } + ] + } + }, + { + "condition": "[and(variables('install'), variables('externallb'))]", + "type": "Microsoft.Network/publicIPAddresses", + "apiVersion": "[variables('computeApiVersion')]", + "name": "[parameters('publicIpName')]", + "location": "[variables('location')]", + "sku": { + "name": "Standard" + }, + "properties": { + "publicIPAllocationMethod": "[parameters('publicIpAllocationMethod')]", + "dnsSettings": { + "domainNameLabel": "[parameters('publicIpDns')]" + } + } + }, + { + "condition": "[variables('install')]", + "type": "Microsoft.Network/loadBalancers", + "name": "[variables('loadBalancerName')]", + "location": "[variables('location')]", + "apiVersion": "[variables('computeApiVersion')]", + "sku": { + "name": "Standard" + }, + "dependsOn": [ + "[concat('Microsoft.Network/virtualNetworks/', variables('virtualNetworkName'))]", + "[concat('Microsoft.Network/publicIPAddresses/', parameters('publicIpName'))]" + ], + "properties": { + "frontendIPConfigurations": [ + { + "name": "LoadBalancerFrontEnd", + "properties": "[variables('lbfrontEndIPConfig')]" + } + ], + "backendAddressPools": [ + { + "name": "[variables('bePoolName')]" + } + ], + "loadBalancingRules": [ + { + "name": "dremio-ui", + "properties": { + "frontendIPConfiguration": { + "id": "[variables('frontEndIPConfigId')]" + }, + "frontendPort": 9047, + "backendPort": 9047, + "enableFloatingIP": false, + "idleTimeoutInMinutes": 4, + "protocol": "Tcp", + "enableTcpReset": false, + "loadDistribution": "SourceIP", + "disableOutboundSnat": false, + "backendAddressPool": { + "id": "[variables('backendAddressPoolId')]" + }, + "probe": { + "id": "[concat(variables('lbID'), '/probes/dremio-ui')]" + } + } + }, + { + "name": "dremio-client", + "properties": { + "frontendIPConfiguration": { + "id": "[variables('frontEndIPConfigId')]" + }, + "frontendPort": 31010, + "backendPort": 31010, + "enableFloatingIP": false, + "idleTimeoutInMinutes": 4, + "protocol": "Tcp", + "enableTcpReset": false, + "loadDistribution": "SourceIP", + "disableOutboundSnat": false, + "backendAddressPool": { + "id": "[variables('backendAddressPoolId')]" + }, + "probe": { + "id": "[concat(variables('lbID'), '/probes/dremio-ui')]" + } + } + } + ], + "probes": [ + { + "name": "dremio-ui", + "properties": { + "protocol": "Tcp", + "port": 9047, + "intervalInSeconds": 15, + "numberOfProbes": 2 + } + } + ], + "inboundNatRules": [], + "outboundRules": [], + "inboundNatPools": [ + { + "name": "[variables('natPoolName')]", + "properties": { + "frontendIPConfiguration": { + "id": "[variables('frontEndIPConfigID')]" + }, + "protocol": "tcp", + "idleTimeoutInMinutes": 4, + "enableFloatingIP": false, + "enableTcpReset": false, + "frontendPortRangeStart": "[variables('natStartPort')]", + "frontendPortRangeEnd": "[variables('natEndPort')]", + "backendPort": "[variables('natBackendPort')]" + } + } + ] + } + } + ], + "outputs": { + "dataDiskId": { + "type": "string", + "value": "[resourceId('Microsoft.Compute/disks/', parameters('dataDiskName'))]" + }, + "subnetId": { + "type": "string", + "value": "[if(equals(parameters('virtualNetworkNewOrExisting'), 'new'), resourceId(resourceGroup().name, 'Microsoft.Network/virtualNetworks/subnets/', variables('virtualNetworkName'), parameters('subnetName')), '')]" + }, + "loadBalancerId": { + "type": "string", + "value": "[if(variables('install'), resourceId('Microsoft.Network/loadBalancers/', variables('loadBalancerName')), '')]" + }, + "nsgId": { + "type": "string", + "value": "[resourceId('Microsoft.Network/networkSecurityGroups/', variables('networkSecurityGroupName'))]" + }, + "dremioUIAddress": { + "type": "string", + "value": "[if(variables('install'), if(variables('externallb'), reference(concat('Microsoft.Network/publicIPAddresses/', parameters('publicIpName')), variables('computeApiVersion')).dnsSettings.fqdn, first(reference(variables('loadBalancerName')).frontendIPConfigurations).properties.privateIPAddress), '')]" + } + } +} diff --git a/azure/arm-templates/scripts/setupDremio.sh b/azure/arm-templates/scripts/setupDremio.sh new file mode 100644 index 00000000..d4cb5f18 --- /dev/null +++ b/azure/arm-templates/scripts/setupDremio.sh @@ -0,0 +1,101 @@ +#/bin/bash -e + +[ -z $DOWNLOAD_URL ] && DOWNLOAD_URL=http://download.dremio.com/community-server/dremio-community-LATEST.noarch.rpm +if [ ! -f /opt/dremio/bin/dremio ]; then + command -v yum >/dev/null 2>&1 || { echo >&2 "This script works only on Centos or Red Hat. Aborting."; exit 1; } + yum install -y java-1.8.0-openjdk + wget $DOWNLOAD_URL -O dremio-download.rpm + yum -y localinstall dremio-download.rpm +fi + +service=$1 +if [ -z "$service" ]; then + echo "Require the service to start - master, coordinator or executor" + exit 1 +fi + +# In Azure, /dev/sdb is ephemeral storage mapped to /mnt/resource. +# Additional disks are mounted after that... +DISK_NAME=/dev/sdc +DISK_PART=${DISK_NAME}1 +DREMIO_CONFIG_FILE=/etc/dremio/dremio.conf +DREMIO_DATA_DIR=/var/lib/dremio +# Azure Linux VMs have ephemeral/temporary disk +# always mounted on /mnt/resource/dremio +SPILL_DIR=/mnt/resource/dremio + +function partition_disk { + parted $DISK_NAME mklabel msdos + parted -s $DISK_NAME mkpart primary ext4 0% 100% + mkfs -t ext4 $DISK_PART +} + +if [ "$service" == "master" ]; then + lsblk -no FSTYPE $DISK_NAME | grep ext4 || partition_disk + mount $DISK_PART $DREMIO_DATA_DIR + chown dremio:dremio $DREMIO_DATA_DIR + echo "$DISK_PART $DREMIO_DATA_DIR ext4 defaults 0 0" >> /etc/fstab +else + zookeeper=$2 + if [ -z "$zookeeper" ]; then + echo "Non-master node requires zookeeper host" + exit 2 + fi +fi + +function setup_spill { + chmod +w /etc/sysconfig/dremio + cat >> /etc/sysconfig/dremio < /dev/null; do echo waiting for dremio master; sleep 2; done; + sed -i "s/coordinator.master.enabled: true/coordinator.master.enabled: false/; \ + s/executor.enabled: true/executor.enabled: false/" \ + $DREMIO_CONFIG_FILE + echo "zookeeper: \"$zookeeper:2181\"" >> $DREMIO_CONFIG_FILE +} + +function setup_executor { + setup_spill + sed -i "s/coordinator.master.enabled: true/coordinator.master.enabled: false/; \ + s/coordinator.enabled: true/coordinator.enabled: false/; \ + /local:/a \ \ spilling: [\"$SPILL_DIR/spill\"]" \ + $DREMIO_CONFIG_FILE + echo "zookeeper: \"$zookeeper:2181\"" >> $DREMIO_CONFIG_FILE +} + +setup_$service +service dremio start +chkconfig dremio on diff --git a/charts/dremio/config/logback-admin.xml b/charts/dremio/config/logback-admin.xml new file mode 100644 index 00000000..b393d02b --- /dev/null +++ b/charts/dremio/config/logback-admin.xml @@ -0,0 +1,67 @@ + + + + + + %msg%n%ex{0}%n + + + + + + + ${dremio.admin.log.verbosity:-OFF} + + + %date{ISO8601} [%thread] %-5level %logger{30} - %msg%n + + + + + + + + + ${dremio.admin.log.verbosity:-OFF} + + ${dremio.admin.log.path} + + %date{ISO8601} [%thread] %-5level %logger{36} - %msg%n + + + + + + + + + + + + + + + + + + + + + + From 72bd56d6af1e83669cfd7116771ec78b9ccf3ee2 Mon Sep 17 00:00:00 2001 From: Nirmalya Sen Date: Mon, 13 May 2019 14:17:04 -0700 Subject: [PATCH 07/24] DX-16307: change the message for password chars Change-Id: I83a272457e977ea54dc35812a6d28b68dda666ee --- azure/arm-templates/azuredeploy.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure/arm-templates/azuredeploy.json b/azure/arm-templates/azuredeploy.json index e9102fc8..b008183a 100644 --- a/azure/arm-templates/azuredeploy.json +++ b/azure/arm-templates/azuredeploy.json @@ -38,7 +38,7 @@ "PasswordOrSSHPublicKey": { "type": "securestring", "metadata": { - "description": "Password or ssh public key for the virtual machines. If password, password must be minimum 12 characters with at least 1 upper case letter, 1 lower case letter and 1 number." + "description": "Password or ssh public key for the virtual machines. If password, password must be minimum 8 characters with at least 1 upper case letter, 1 lower case letter and 1 number." } }, "useExistingSubnet": { From ac9811c249e0629582e2bc99a43778058797301d Mon Sep 17 00:00:00 2001 From: Nirmalya Sen Date: Mon, 13 May 2019 14:25:23 -0700 Subject: [PATCH 08/24] rebase to 3.2 release 1. Pull in the latest config files 2. Update memory, cpu in values.yaml so that they work with the recommended machine types in the docs Change-Id: I47970acda6fc776171f1970ddacb7e0d78dbff58 --- charts/dremio/README.md | 2 + charts/dremio/config/logback-access.xml | 5 +- charts/dremio/config/logback.xml | 68 +++++++++++++++++++++++-- charts/dremio/values.yaml | 8 +-- 4 files changed, 74 insertions(+), 9 deletions(-) diff --git a/charts/dremio/README.md b/charts/dremio/README.md index b5e620a5..6d8ecd0e 100644 --- a/charts/dremio/README.md +++ b/charts/dremio/README.md @@ -18,6 +18,8 @@ An appropriate distributed file store (S3, ADLS, HDFS, etc) should be used for p This assumes you already have kubernetes cluster setup, kubectl configured to talk to your kubernetes cluster and helm setup in your cluster. Review and update values.yaml to reflect values for your environment before installing the helm chart. This is specially important for for the memory and cpu values - your kubernetes cluster should have sufficient resources to provision the pods with those values. If your kubernetes installation does not support serviceType LoadBalancer, it is recommended to comment the serviceType value in values.yaml file before deploying. #### Installing the helm chart +Review charts/dremio/values.yaml and adjust the values as per your requirements. Note that the values for cpu and memory for the coordinator and the executors are set to work with AKS on Azure with worker nodes setup with machine types Standard_E16s_v3. + Run this from the charts directory ```bash cd charts diff --git a/charts/dremio/config/logback-access.xml b/charts/dremio/config/logback-access.xml index c0f2ed28..a00ae338 100644 --- a/charts/dremio/config/logback-access.xml +++ b/charts/dremio/config/logback-access.xml @@ -24,8 +24,11 @@ ${dremio.log.path}/access.log - ${dremio.log.path}/archive/access.%d{yyyy-MM-dd}.log.gz + ${dremio.log.path}/archive/access.%d{yyyy-MM-dd}.%i.log.gz 30 + + 100MB + diff --git a/charts/dremio/config/logback.xml b/charts/dremio/config/logback.xml index 8999c3bc..0ab3528b 100644 --- a/charts/dremio/config/logback.xml +++ b/charts/dremio/config/logback.xml @@ -30,7 +30,22 @@ ${dremio.log.path}/server.log - ${dremio.log.path}/archive/server.%d{yyyy-MM-dd}.log.gz + ${dremio.log.path}/archive/server.%d{yyyy-MM-dd}.%i.log.gz + 30 + + 100MB + + + + + %date{ISO8601} [%thread] %-5level %logger{36} - %msg%n + + + + + ${dremio.log.path}/metadata_refresh.log + + ${dremio.log.path}/archive/metadata_refresh.%d{yyyy-MM-dd}.log.gz 30 @@ -42,8 +57,11 @@ ${dremio.log.path}/json/server.json - ${dremio.log.path}/json/archive/server.%d{yyyy-MM-dd}.json.gz + ${dremio.log.path}/json/archive/server.%d{yyyy-MM-dd}.%i.json.gz 30 + + 100MB + @@ -56,15 +74,18 @@ message - + ${dremio.log.path}/queries.json - ${dremio.log.path}/archive/queries.%d{yyyy-MM-dd}.json.gz + ${dremio.log.path}/archive/queries.%d{yyyy-MM-dd}.%i.json.gz 30 + + 100MB + @@ -87,6 +108,45 @@ + + + + + + + + + + + + + + + + + + + + + + + + ${dremio.log.path}/hive.deprecated.function.warning.log + + ${dremio.log.path}/archive/hive.deprecated.function.warning.%d{yyyy-MM-dd}.%i.log.gz + 30 + + 100MB + + + + + %date{ISO8601} [%thread] %-5level %logger{36} - %msg%n + + + + + diff --git a/charts/dremio/values.yaml b/charts/dremio/values.yaml index a4400a5e..d2505714 100644 --- a/charts/dremio/values.yaml +++ b/charts/dremio/values.yaml @@ -7,8 +7,8 @@ image: dremio/dremio-oss:latest # the coordinators and the executors. # The value of memory should be in MB. CPU is in no of cores. coordinator: - memory: 16384 - cpu: 8 + memory: 122880 + cpu: 15 # This count is for slave coordinators only. # The chart will always create one master coordinator - you are # not required to have more than one master coordinator. @@ -19,8 +19,8 @@ coordinator: port: 31010 volumeSize: 100Gi executor: - memory: 16384 - cpu: 4 + memory: 122880 + cpu: 15 count: 3 volumeSize: 100Gi zookeeper: From 8f40a515642c716008d85bcc04e2179b3905728f Mon Sep 17 00:00:00 2001 From: Nirmalya Sen Date: Tue, 21 May 2019 10:14:28 -0700 Subject: [PATCH 09/24] DX-16588: Remove unnecessary space Plus change REAME layout and two label changes. Change-Id: Ia5e05f13910db5c03361c6166411704b40bf4a8e --- azure/arm-templates/README.md | 39 ++++++++++++++-------------- azure/arm-templates/azuredeploy.json | 6 ++--- 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/azure/arm-templates/README.md b/azure/arm-templates/README.md index 210a9f1c..59094555 100644 --- a/azure/arm-templates/README.md +++ b/azure/arm-templates/README.md @@ -1,6 +1,8 @@ # Deploying Dremio to Azure +You can try it out: [![Azure ARM Template](http://azuredeploy.net/deploybutton.png)](https://portal.azure.com/#create/microsoft.template/uri/https%3A%2F%2Fraw.githubusercontent.com%2Fdremio%2Fdremio-cloud-tools%2Fmaster%2Fazure%2Farm-templates%2Fazuredeploy.json) + This deploys a Dremio cluster on Azure VMs. The deployment creates a master coordinator node and number of executor nodes depending on the size of the cluster chosen. The table below provides the machine type and number of executor nodes for the different sizes of Dremio clusters. | Cluster size | Coordinator VM Type | Executor VM Type | No. of Executors | @@ -11,6 +13,24 @@ This deploys a Dremio cluster on Azure VMs. The deployment creates a master coor | Large | Standard_D8_v3 | Standard_E16s_v3 | 25 | | X-Large | Standard_D8_v3 | Standard_E16s_v3 | 50 | +The inputs required during deployment are: + +|Input Parameter|Description | +|---|---| +| Subscription |Azure subscription where the cluster should be deployed. | +| Resource Group |The Azure Resource group where the cluster should be deployed. You can create a new one too. It is recommended to create a new one as all resources are created in that group and deleting the group will delete all resources created. | +| Location |The Azure location where the cluster resources will be deployed. | +| Cluster Name |A name for your cluster.| +| Cluster Size |Pick a size based on your needs.| +| SSH Username |The username that can be used to login to your nodes.| +| Authentication Type |Password or Key based authentication for ssh.| +| Password or SSH Public Key |The password or ssh public key | +| Use Existing Subnet | (Optional) id of an existing subnet. The subnet must be in the same region as the Dremio cluster resource group. It is of the form /subscriptions/xxxx/resourceGroups/xxxx/providers/Microsoft.Network/virtualNetworks/xxxx/subnets/xxxx| +| Use Private IP | Select true if you are using existing subnet and you want to use an internal ip from the subnet to access Dremio. | +| Dremio Binary | Publicly accessible URL to a Dremio installation rpm | + +Once the deployment is successful, you will find the URL to Dremio UI in the output section of the deployment. + The deployment resources are: ``` ┌───────────────────────────┐ @@ -40,22 +60,3 @@ The deployment resources are: │ └──────────────────────────────────────────────────────────────┘ │ └──────────────────────────────────────────────────────────────────┘ ``` -You can try it out: [![Azure ARM Template](http://azuredeploy.net/deploybutton.png)](https://portal.azure.com/#create/microsoft.template/uri/https%3A%2F%2Fraw.githubusercontent.com%2Fdremio%2Fdremio-cloud-tools%2Fmaster%2Fazure%2Farm-templates%2Fazuredeploy.json) - -The inputs required during deployment are: - -|Input Parameter|Description | -|---|---| -| Subscription |Azure subscription where the cluster should be deployed. | -| Resource Group |The Azure Resource group where the cluster should be deployed. You can create a new one too. It is recommended to create a new one as all resources are created in that group and deleting the group will delete all resources created. | -| Location |The Azure location where the cluster resources will be deployed. | -| Cluster Name |A name for your cluster.| -| Cluster Size |Pick a size based on your needs.| -| SSH Username |The username that can be used to login to your nodes.| -| Authentication Type |Password or Key based authentication for ssh.| -| Password or SSH Public Key |The password or ssh public key | -| Use Existing Subnet | (Optional) id of an existing subnet. The subnet must be in the same region as the Dremio cluster resource group. It is of the form /subscriptions/xxxx/resourceGroups/xxxx/providers/Microsoft.Network/virtualNetworks/xxxx/subnets/xxxx| -| Use Private IP | Select true if you are using existing subnet and you want to use an internal ip from the subnet to access Dremio. | -| Dremio Binary | Publicly accessible URL to a Dremio installation rpm | - -Once the deployment is successful, you will find the URL to Dremio UI in the output section of the deployment. diff --git a/azure/arm-templates/azuredeploy.json b/azure/arm-templates/azuredeploy.json index b008183a..5f353e3c 100644 --- a/azure/arm-templates/azuredeploy.json +++ b/azure/arm-templates/azuredeploy.json @@ -12,7 +12,7 @@ }, "clusterSize": { "type": "string", - "allowedValues": ["X-Small (1 executor)", "Small (5 executors)", "Medium (10 executors )", "Large (25 executors)", "X-Large (50 executors)"], + "allowedValues": ["X-Small (1 executor)", "Small (5 executors)", "Medium (10 executors)", "Large (25 executors)", "X-Large (50 executors)"], "metadata": { "description": "The type and number of machines are chosen based on the size selected." } @@ -21,7 +21,7 @@ "type": "string", "defaultValue": "azuser", "metadata": { - "description": "SSH username for the virtual machines. You need it if you want to login to the machines." + "description": "SSH username for the virtual machines. (Can be used to SSH into machines for changing configuration, reviewing logs, etc.)" } }, "authenticationType": { @@ -32,7 +32,7 @@ "SSHPublicKey" ], "metadata": { - "description": "Type of authentication to use on the virtual machines." + "description": "Type of authentication to use for SSH." } }, "PasswordOrSSHPublicKey": { From 0238c7aa0deabcd22f39643d604986005607041f Mon Sep 17 00:00:00 2001 From: Nirmalya Sen Date: Wed, 22 May 2019 11:53:05 -0700 Subject: [PATCH 10/24] DX-16610: Remove the strategy from templates DX-16619: Fix the syntax in values.yaml Statefulsets attribute is updateStrategy and the default for that is RollingUpdate. So, deleting those lines would be sufficient and not loose any functionality. Kubernetes client libraries to v1.14 seems to be more strict than earlier version and throws an error with those lines in there. Dropping the lines works with earlier versions of helm. Change-Id: I727adc50a883a4801e737aec0be6b84788f6f357 --- charts/dremio/templates/dremio-coordinator.yaml | 5 ----- charts/dremio/templates/dremio-executor.yaml | 5 ----- charts/dremio/values.yaml | 2 +- 3 files changed, 1 insertion(+), 11 deletions(-) diff --git a/charts/dremio/templates/dremio-coordinator.yaml b/charts/dremio/templates/dremio-coordinator.yaml index 2e4c1eaa..3bffa687 100644 --- a/charts/dremio/templates/dremio-coordinator.yaml +++ b/charts/dremio/templates/dremio-coordinator.yaml @@ -8,11 +8,6 @@ spec: replicas: {{.Values.coordinator.count}} podManagementPolicy: "Parallel" revisionHistoryLimit: 1 - strategy: - type: RollingUpdate - rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 selector: matchLabels: app: dremio-coordinator diff --git a/charts/dremio/templates/dremio-executor.yaml b/charts/dremio/templates/dremio-executor.yaml index 78ddc7af..e511e1d3 100644 --- a/charts/dremio/templates/dremio-executor.yaml +++ b/charts/dremio/templates/dremio-executor.yaml @@ -8,11 +8,6 @@ spec: replicas: {{.Values.executor.count}} podManagementPolicy: "Parallel" revisionHistoryLimit: 1 - strategy: - type: RollingUpdate - rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 selector: matchLabels: app: dremio-executor diff --git a/charts/dremio/values.yaml b/charts/dremio/values.yaml index d2505714..624a11bc 100644 --- a/charts/dremio/values.yaml +++ b/charts/dremio/values.yaml @@ -51,7 +51,7 @@ serviceType: LoadBalancer # For private and protected docker image repository, you should store # the credentials in a kubernetes secret and provide the secret name here. # For more information, see https://kubernetes.io/docs/concepts/containers/images/#specifying-imagepullsecrets-on-a-pod -#imagePullSecrets=secretname +#imagePullSecrets: secretname # Target pods to nodes based on labels set on the nodes. # For more information, see https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector From 28a244354686a7e2fc4471b9d44b7459e00d4256 Mon Sep 17 00:00:00 2001 From: Nirmalya Sen Date: Tue, 21 May 2019 11:28:12 -0700 Subject: [PATCH 11/24] DX-16550: Support for Cloudformation in AWS Change-Id: Ic0aa8058ffbcd46144e451621312554b72dc4a67 --- aws/cloudformation/README.md | 34 +++ aws/cloudformation/dremio_cf.yaml | 363 ++++++++++++++++++++++++++++++ 2 files changed, 397 insertions(+) create mode 100644 aws/cloudformation/README.md create mode 100644 aws/cloudformation/dremio_cf.yaml diff --git a/aws/cloudformation/README.md b/aws/cloudformation/README.md new file mode 100644 index 00000000..dc837269 --- /dev/null +++ b/aws/cloudformation/README.md @@ -0,0 +1,34 @@ + +# Deploying Dremio to AWS + +_Note:_ To try on AWS, you should have: +* Permission to create Security Groups +* An AWS key pair created +* (Optional) A VPC and subnet created if you want to install to a non-default VPC + +Try it out [![AWS Cloudformation](https://s3.amazonaws.com/cloudformation-examples/cloudformation-launch-stack.png)](https://us-east-2.console.aws.amazon.com/cloudformation/home?region=us-east-2#/stacks/new?templateURL=https://s3-us-west-2.amazonaws.com/aws-cloudformation.dremio.com/dremio_cf.yaml&stackName=myDremio) + +This deploys a Dremio cluster on EC2 instances. The deployment creates a master coordinator node and number of executor nodes depending on the size of the cluster chosen. The table below provides the machine type and number of executor nodes for the different sizes of Dremio clusters. + +| Cluster size | Coordinator VM Type | Executor VM Type | No. of Executors | +|--------------|---------------------|------------------|------------------| +| X-Small | m5.2xlarge | r5d.4xlarge | 1 | +| Small | m5.2xlarge | r5d.4xlarge | 5 | +| Medium | m5.4xlarge | r5d.4xlarge | 10 | +| Large | m5.4xlarge | r5d.4xlarge | 25 | +| X-Large | m5.4xlarge | r5d.4xlarge | 50 | + +Make sure you are in the AWS region you are planning to deploy your cluster in. + +The inputs required during deployment are: + +|Input Parameter|Description | +|---|---| +| Stack name |Name of the stack. | +| Cluster Size |Pick a size based on your needs.| +| Deploy to VPC |VPC to deploy the cluster into.| +| Deploy to Subnet |Subnet to deploy the cluster into. Must be in the selected VPC.| +| Dremio Binary | Publicly accessible URL to a Dremio installation RPM | +| AWS keypair | AWS key pair to use to SSH to the VMs. SSH username for the VMs are centos (has sudo privilege). SSH into machines for changing configuration, reviewing logs, etc. | + +Once the deployment is successful, you will find the URL to Dremio UI in the output section of the deployment. diff --git a/aws/cloudformation/dremio_cf.yaml b/aws/cloudformation/dremio_cf.yaml new file mode 100644 index 00000000..f7b1d21c --- /dev/null +++ b/aws/cloudformation/dremio_cf.yaml @@ -0,0 +1,363 @@ +--- +AWSTemplateFormatVersion: '2010-09-09' +Description: 'Setup a Dremio cluster.' +Parameters: + keyName: + Type: AWS::EC2::KeyPair::KeyName + AllowedPattern: ".+" + ConstraintDescription: Must select an existing EC2 KeyPair + Description: "AWS key pair to use to SSH to the VMs. SSH username for the VMs are centos (has sudo privilege). SSH into machines for changing configuration, reviewing logs, etc." + clusterSize: + Type: String + Description: "The type and number of machines are chosen based on the size selected." + AllowedValues: + - "X-Small--1-executor" + - "Small--5-executors" + - "Medium--10-executors" + - "Large--25-executors" + - "X-Large--50-executors" + Default: "Small--5-executors" + useVPC: + Type: AWS::EC2::VPC::Id + Description: "VPC to deploy the cluster into." + useSubnet: + Type: AWS::EC2::Subnet::Id + Description: "Subnet to deploy the cluster into. Must be in the selected VPC." + dremioDownloadURL: + Type: String + Description: "(Optional) HTTP or HTTPS URL to a Dremio RPM. Leave empty to install the latest Dremio CE release." + Default: "" +Metadata: + AWS::CloudFormation::Interface: + ParameterGroups: + - + Label: + default: Dremio Cluster + Parameters: + - coordinatorInstanceType + - coordinatorCount + - executorInstanceType + - executorCount + - clusterSize + - useVPC + - useSubnet + - dremioDownloadURL + - + Label: + default: AWS resource configuration + Parameters: + - keyName + ParameterLabels: + keyName: + default : "AWS keypair" + clusterSize: + default: "Cluster size" + useSubnet: + default: "Deploy to Subnet" + useVPC: + default: "Deploy to VPC" + dremioDownloadURL: + default: "Dremio download URL" +Mappings: + Custom: + Variables: + URL: https://download.dremio.com/community-server/dremio-community-LATEST.noarch.rpm + ClusterSizes: + X-Small--1-executor: + coordinatorInstanceType: m5.2xlarge + coordinatorDiskSize: 10 + executorInstanceType: r5d.4xlarge + executorCount: 1 + executorDiskSize: 10 + Small--5-executors: + coordinatorInstanceType: m5.2xlarge + coordinatorDiskSize: 50 + executorInstanceType: r5d.4xlarge + executorCount: 5 + executorDiskSize: 50 + Medium--10-executors: + coordinatorInstanceType: m5.4xlarge + coordinatorDiskSize: 100 + executorInstanceType: r5d.4xlarge + executorCount: 10 + executorDiskSize: 100 + Large--25-executors: + coordinatorInstanceType: m5.4xlarge + coordinatorDiskSize: 100 + executorInstanceType: r5d.4xlarge + executorCount: 25 + executorDiskSize: 100 + X-Large--50-executors: + coordinatorInstanceType: m5.4xlarge + coordinatorDiskSize: 100 + executorInstanceType: r5d.4xlarge + executorCount: 50 + executorDiskSize: 100 + RegionMap: + # Centos 7 Images + us-east-1: # N Virginia + AMI: ami-02eac2c0129f6376b + us-east-2: # Ohio + AMI: ami-0f2b4fc905b0bd1f1 + us-west-1: # California + AMI: ami-074e2d6769f445be5 + us-west-2: # Oregon + AMI: ami-01ed306a12b7d1c96 + ca-central-1: # Québec + AMI: ami-033e6106180a626d0 + eu-central-1: # Frankfurt + AMI: ami-04cf43aca3e6f3de3 + eu-west-1: # Ireland + AMI: ami-0ff760d16d9497662 + eu-west-2: # London + AMI: ami-0eab3a90fc693af19 + ap-southeast-1: # Singapore + AMI: ami-0b4dd9d65556cac22 + ap-southeast-2: # Sydney + AMI: ami-08bd00d7713a39e7d + ap-south-1 : # Mumbai + AMI: ami-02e60be79e78fef21 + ap-northeast-1: # Tokyo + AMI: ami-045f38c93733dd48d + ap-northeast-2: # Seoul + AMI: ami-06cf2a72dadf92410 + sa-east-1: # São Paulo + AMI: ami-0b8d86d4bf91850af + SubnetConfig: + VPC: + CIDR: 10.0.0.0/16 + Public: + CIDR: 10.0.0.0/24 +Conditions: + CreateVPC: !Equals [!Ref useSubnet, ""] +Resources: + VPC: + Condition: CreateVPC + Type: AWS::EC2::VPC + Properties: + EnableDnsSupport: 'true' + EnableDnsHostnames: 'true' + CidrBlock: !FindInMap [SubnetConfig, VPC, CIDR] + Tags: + - Key: Name + Value: !Join ["-", [!Ref "AWS::StackName", "net"]] + + PublicSubnet: + Condition: CreateVPC + Type: AWS::EC2::Subnet + Properties: + VpcId: !Ref VPC + CidrBlock: !FindInMap [SubnetConfig, Public, CIDR] + Tags: + - Key: Name + Value: !Join ["-", [!Ref "AWS::StackName", "public"]] + - Key: Network + Value: Public + + InternetGateway: + Condition: CreateVPC + Type: AWS::EC2::InternetGateway + Properties: + Tags: + - Key: Name + Value: !Join ["-", [!Ref "AWS::StackName", "ig"]] + + VPCGatewayAttachment: + Condition: CreateVPC + Type: AWS::EC2::VPCGatewayAttachment + Properties: + VpcId: !Ref VPC + InternetGatewayId: !Ref InternetGateway + + PublicRouteTable: + Condition: CreateVPC + Type: AWS::EC2::RouteTable + Properties: + VpcId: !Ref VPC + Tags: + - Key: Name + Value: !Join ["-", [!Ref "AWS::StackName", "public"]] + + PublicSubnetRouteTableAssociation: + Condition: CreateVPC + Type: AWS::EC2::SubnetRouteTableAssociation + Properties: + SubnetId: !Ref PublicSubnet + RouteTableId: !Ref PublicRouteTable + + PublicRoute: + Condition: CreateVPC + Type: AWS::EC2::Route + DependsOn: VPCGatewayAttachment + Properties: + RouteTableId: !Ref PublicRouteTable + DestinationCidrBlock: 0.0.0.0/0 + GatewayId: !Ref InternetGateway + + DremioSecurityGroup: + Type: AWS::EC2::SecurityGroup + Properties: + GroupName: "Dremio Access" + GroupDescription: "Dremio Access" + VpcId: !If [CreateVPC, !Ref VPC, !Ref useVPC] + SecurityGroupIngress: + - IpProtocol: tcp + FromPort: '9047' + ToPort: '9047' + CidrIp: 0.0.0.0/0 + - IpProtocol: tcp + FromPort: '31010' + ToPort: '31010' + CidrIp: 0.0.0.0/0 + - IpProtocol: tcp + FromPort: '22' + ToPort: '22' + CidrIp: 0.0.0.0/0 + - IpProtocol: tcp + FromPort: '80' + ToPort: '80' + CidrIp: 0.0.0.0/0 + - IpProtocol: -1 + SourceSecurityGroupName: "Dremio Access" + AvailabilityWaitHandle: + Type: AWS::CloudFormation::WaitConditionHandle + AvailabilityWaitCondition: + Type: AWS::CloudFormation::WaitCondition + DependsOn: DremioMaster + Properties: + Handle: !Ref "AvailabilityWaitHandle" + Timeout: "600" + + DremioMaster: + Type: AWS::EC2::Instance + Properties: + Tags: + - Key: Name + Value: !Join ["-", [!Ref "AWS::StackName", "DremioMaster"]] + ImageId: !FindInMap [RegionMap, !Ref "AWS::Region", AMI] + KeyName: !Ref keyName + InstanceType: !FindInMap [ClusterSizes, !Ref clusterSize, coordinatorInstanceType] + NetworkInterfaces: + - DeleteOnTermination: "true" + AssociatePublicIpAddress: "true" + DeviceIndex: 0 + SubnetId: !If [CreateVPC, !Ref PublicSubnet, !Ref useSubnet] + GroupSet: [!Ref DremioSecurityGroup] + BlockDeviceMappings: + - DeviceName: /dev/sda1 + Ebs: + VolumeSize: !FindInMap [ClusterSizes, !Ref clusterSize, coordinatorDiskSize] + DeleteOnTermination: true + VolumeType: gp2 + UserData: + Fn::Base64: !Sub + - | + #!/bin/bash -x + statusFile=/tmp/statusfile + + if [ ! -d /opt/dremio ]; then + url=${dremioDownloadURL} + [ -z $url ] && url=${DOWNLOAD_URL} + yum -y install java-1.8.0-openjdk-devel $url + if [ $? != 0 ]; then + echo "{ \"Status\" : \"FAILURE\", \"UniqueId\" : \"${AWS::StackName}\", \"Data\" : \"Failed\", \"Reason\" : \"Unable to download Dremio\" }" > $statusFile + curl -T $statusFile '${AvailabilityWaitHandle}' + exit 1 + fi + fi + + DREMIO_HOME=/opt/dremio + DREMIO_CONFIG_FILE=/etc/dremio/dremio.conf + + sed -i "s/executor.enabled: true/executor.enabled: false/" $DREMIO_CONFIG_FILE + + cp $DREMIO_HOME/share/dremio/dremio.service /etc/systemd/system + systemctl daemon-reload + systemctl start dremio + systemctl enable dremio + + until curl -Iks http://localhost:9047; do + echo waiting for website availability + sleep 2 + done + echo "{ \"Status\" : \"SUCCESS\", \"UniqueId\" : \"${AWS::StackName}\", \"Data\" : \"Ready\", \"Reason\" : \"Website Available\" }" > $statusFile + curl -T $statusFile '${AvailabilityWaitHandle}' + - DOWNLOAD_URL: !FindInMap [ Custom, Variables, "URL"] + + DremioExecutorLC: + Type: AWS::AutoScaling::LaunchConfiguration + DependsOn: DremioMaster + Properties: + AssociatePublicIpAddress: true + #EbsOptimized: true + ImageId: + Fn::FindInMap: + - RegionMap + - !Ref AWS::Region + - AMI + InstanceMonitoring: true + InstanceType: !FindInMap [ClusterSizes, !Ref clusterSize, executorInstanceType] + KeyName: !Ref keyName + SecurityGroups: [!Ref DremioSecurityGroup] + BlockDeviceMappings: + - DeviceName: /dev/sda1 + Ebs: + VolumeSize: !FindInMap [ClusterSizes, !Ref clusterSize, executorDiskSize] + DeleteOnTermination: true + VolumeType: gp2 + UserData: + Fn::Base64: !Sub + - | + #!/bin/bash -x + + if [ ! -d /opt/dremio ]; then + url=${dremioDownloadURL} + [ -z $url ] && url=${DOWNLOAD_URL} + yum -y install java-1.8.0-openjdk-devel $url + fi + + mkdir /var/ephemeral + # Setup ephemeral disk - this is based on executors are r5d class machines + NVME=nvme1n1 + file -s /dev/$NVME | grep "/dev/$NVME: data" && mkfs -t xfs /dev/$NVME && \ + UUID=$(blkid | grep $NVME | awk -F'"' '{ print $2 }') && \ + echo "UUID=$UUID /var/ephemeral xfs defaults,nofail 0 2" >> /etc/fstab && \ + mount -a + chmod 777 /var/ephemeral + + SPILL_DIR=/var/ephemeral/dremio_spill + DREMIO_CONFIG_FILE=/etc/dremio/dremio.conf + + sed -i "s/coordinator.master.enabled: true/coordinator.master.enabled: false/; \ + s/coordinator.enabled: true/coordinator.enabled: false/; \ + /local:/a \ \ spilling: [\"$SPILL_DIR\"]" \ + $DREMIO_CONFIG_FILE + echo "zookeeper: \"${ZK}:2181\"" >> $DREMIO_CONFIG_FILE + cp $DREMIO_HOME/share/dremio/dremio.service /etc/systemd/system + systemctl daemon-reload + systemctl start dremio + systemctl enable dremio + + - ZK: !GetAtt DremioMaster.PrivateIp + DOWNLOAD_URL: !FindInMap [ Custom, Variables, "URL"] + + DremioExecutorASG: + Type: AWS::AutoScaling::AutoScalingGroup + DependsOn: DremioExecutorLC + Properties: + Tags: + - Key: Name + Value: !Join ["-", [!Ref "AWS::StackName", DremioExecutor]] + PropagateAtLaunch: true + ResourceType: "auto-scaling-group" + ResourceId: !Ref "AWS::StackName" + LaunchConfigurationName: !Ref DremioExecutorLC + VPCZoneIdentifier: [!If [CreateVPC, !Ref PublicSubnet, !Ref useSubnet]] + DesiredCapacity: !FindInMap [ClusterSizes, !Ref clusterSize, executorCount] + MaxSize: !FindInMap [ClusterSizes, !Ref clusterSize, executorCount] + MinSize: !FindInMap [ClusterSizes, !Ref clusterSize, executorCount] + +Outputs: + DremioUI: + Description: Dremio UI. + Value: !Join [ "", ["http://", !GetAtt DremioMaster.PublicIp, ":9047"]] From 4b85995343b9f74d17f4b1e39dca71dbdcf06395 Mon Sep 17 00:00:00 2001 From: Nirmalya Sen Date: Wed, 29 May 2019 13:11:55 -0700 Subject: [PATCH 12/24] DX-16739: open port 80 Certbot uses port 80 to handshake with LetsEncrypt to generate SSL certificates. So, port 80 is being opened up by default. Change-Id: I565bd37b6b2657c67751efd1eee2f4cc45be4ba8 --- azure/arm-templates/nested/dremioState.json | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/azure/arm-templates/nested/dremioState.json b/azure/arm-templates/nested/dremioState.json index 1defac4b..bed09915 100644 --- a/azure/arm-templates/nested/dremioState.json +++ b/azure/arm-templates/nested/dremioState.json @@ -259,6 +259,19 @@ "sourcePortRange": "*", "destinationAddressPrefix": "*" } + }, + { + "name": "default-allow-for-letsencrypt", + "properties": { + "priority": 1100, + "sourceAddressPrefix": "*", + "protocol": "Tcp", + "destinationPortRange": "80", + "access": "Allow", + "direction": "Inbound", + "sourcePortRange": "*", + "destinationAddressPrefix": "*" + } } ] } From 7e4c51641e017492d4e08d9bfa9e6f65c4800775 Mon Sep 17 00:00:00 2001 From: Nirmalya Sen Date: Tue, 4 Jun 2019 10:39:27 -0700 Subject: [PATCH 13/24] DX-16813: Add attribute for reverse DNS Azure VMs do not automatically do reverse DNS lookup of the VM's public ip address. Reverse DNS needs to be specifically enabled. Change-Id: Ie92fe9ca6c6358a3704947f201dae4e209602c66 --- azure/arm-templates/nested/dremioCluster.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/azure/arm-templates/nested/dremioCluster.json b/azure/arm-templates/nested/dremioCluster.json index 527e9346..ff4291a6 100644 --- a/azure/arm-templates/nested/dremioCluster.json +++ b/azure/arm-templates/nested/dremioCluster.json @@ -195,7 +195,8 @@ "properties": { "publicIPAllocationMethod": "[parameters('publicIpAllocationMethod')]", "dnsSettings": { - "domainNameLabel": "[parameters('publicIpDns')]" + "domainNameLabel": "[parameters('publicIpDns')]", + "reverseFqdn": "[concat(parameters('publicIpDns'), '.', variables('location'), '.cloudapp.azure.com')]" } } }, From b2fc9c00a452893d37c013774079c4c3c2affab4 Mon Sep 17 00:00:00 2001 From: Nirmalya Sen Date: Thu, 6 Jun 2019 11:14:40 -0700 Subject: [PATCH 14/24] DX-16712: Use Azure Storage for uploads and accelerator data 1. The URL for path.dist values for S3, ADLS are different from pre-3.2.0 and 3.2.0+. Handle it. 2. Add support for Azure Storage v2 for path.dist Change-Id: I7730ed4caac22240e579e41641e720bdedf05ec0 --- charts/dremio/config/core-site.xml | 42 +++++++++++++++++++++++++----- charts/dremio/config/dremio.conf | 23 ++++++++++++---- charts/dremio/values.yaml | 16 ++++++++---- 3 files changed, 64 insertions(+), 17 deletions(-) diff --git a/charts/dremio/config/core-site.xml b/charts/dremio/config/core-site.xml index 1c2f3237..8d91d757 100644 --- a/charts/dremio/config/core-site.xml +++ b/charts/dremio/config/core-site.xml @@ -2,20 +2,20 @@ - {{- if and .Values.uploads.type (eq .Values.uploads.type "aws") }} + {{- if and .Values.distStorage.type (eq .Values.distStorage.type "aws") }} fs.s3a.access.key AWS access key ID. - {{ required "AWS access key required" .Values.uploads.aws.accessKey}} + {{ required "AWS access key required" .Values.distStorage.aws.accessKey}} fs.s3a.secret.key AWS secret key. - {{ required "AWS secret required" .Values.uploads.aws.secret}} + {{ required "AWS secret required" .Values.distStorage.aws.secret}} {{- end }} - {{- if and .Values.uploads.type (eq .Values.uploads.type "azure") }} + {{- if and .Values.distStorage.type (eq .Values.distStorage.type "azure") }} @@ -26,17 +26,17 @@ dfs.adls.oauth2.client.id Application ID of the registered application under Azure Active Directory - {{required "Azure application ID required" .Values.uploads.azure.applicationId}} + {{required "Azure application ID required" .Values.distStorage.azure.applicationId}} dfs.adls.oauth2.credential Generated password value for the registered application - {{required "Azure secret value required" .Values.uploads.azure.secret}} + {{required "Azure secret value required" .Values.distStorage.azure.secret}} dfs.adls.oauth2.refresh.url Azure Active Directory OAuth 2.0 Token Endpoint for registered applications. - {{required "Azure OAuth2 token endpoint required" .Values.uploads.azure.oauth2EndPoint}} + {{required "Azure OAuth2 token endpoint required" .Values.distStorage.azure.oauth2EndPoint}} dfs.adls.oauth2.access.token.provider.type @@ -49,4 +49,32 @@ false {{- end }} + + {{- if and .Values.dremioVersion (ge .Values.dremioVersion "3.2.0") .Values.distStorage.type (eq .Values.distStorage.type "azureStorage") }} + + fs.dremioAzureStorage.impl + FileSystem implementation. Must always be com.dremio.plugins.azure.AzureStorageFileSystem + com.dremio.plugins.azure.AzureStorageFileSystem + + + dremio.azure.account + The name of the storage account. + {{required "Azure storage account name required" .Values.distStorage.azureStorage.accountName}} + + + dremio.azure.key + The shared access key for the storage account. + {{required "Shared access key for the storage account required" .Values.distStorage.azureStorage.accessKey}} + + + dremio.azure.mode + The storage account type. Value: STORAGE_V2 + STORAGE_V2 + + + dremio.azure.secure + Boolean option to enable SSL connections. Value: True/False + True + + {{- end }} diff --git a/charts/dremio/config/dremio.conf b/charts/dremio/config/dremio.conf index 4e754236..2b626a5c 100644 --- a/charts/dremio/config/dremio.conf +++ b/charts/dremio/config/dremio.conf @@ -22,12 +22,25 @@ paths: { #dist: "pdfs://"${paths.local}"/pdfs" # If you are editing the uploads value in this file, please delete all the lines starting with double curly braces - {{- if .Values.uploads.type }} - {{- if eq .Values.uploads.type "aws" }} - uploads: "s3a://{{required "AWS bucketname required" .Values.uploads.aws.bucketName}}{{required "Path required" .Values.uploads.aws.path}}" + {{- if .Values.distStorage.type }} + {{- if and .Values.dremioVersion (lt .Values.dremioVersion "3.2.0") }} + {{- if eq .Values.distStorage.type "aws" }} + uploads: "s3a://{{required "AWS bucketname required" .Values.distStorage.aws.bucketName}}{{required "Path required" .Values.distStorage.aws.path}}" + {{- end }} + {{- if eq .Values.distStorage.type "azure" }} + uploads: "adl://{{required "Azure Datalake store name required" .Values.distStorage.azure.datalakeStoreName}}.azuredatalakestore.net{{required "Path required" .Values.distStorage.azure.path}}" + {{- end }} + {{- else }} # dremio_version > 3.2.0 + {{- if eq .Values.distStorage.type "aws" }} + uploads: "dremioS3://{{required "AWS bucketname required" .Values.distStorage.aws.bucketName}}{{required "Path required" .Values.distStorage.aws.path}}" + {{- end }} + {{- if eq .Values.distStorage.type "azure" }} + uploads: "dremioAdl://{{required "Azure Datalake store name required" .Values.distStorage.azure.datalakeStoreName}}.azuredatalakestore.net{{required "Path required" .Values.distStorage.azure.path}}" + {{- end }} + {{- if eq .Values.distStorage.type "azureStorage" }} + uploads: "dremioAzureStorage://:///{{required "Azure Storage filesystem required" .Values.distStorage.azureStorage.filesystem}}/{{required "Path for uploads required" .Values.distStorage.azureStorage.uploadsPath}}" + accelerator: "dremioAzureStorage://:///{{required "Azure Storage filesystem required" .Values.distStorage.azureStorage.filesystem}}/{{required "Path for uploads required" .Values.distStorage.azureStorage.acceleratorPath}}" {{- end }} - {{- if eq .Values.uploads.type "azure" }} - uploads: "adl://{{required "Azure Datalake store name required" .Values.uploads.azure.datalakeStoreName}}.azuredatalakestore.net{{required "Path required" .Values.uploads.azure.path}}" {{- end }} {{- end }} } diff --git a/charts/dremio/values.yaml b/charts/dremio/values.yaml index 624a11bc..72e8bbf4 100644 --- a/charts/dremio/values.yaml +++ b/charts/dremio/values.yaml @@ -2,7 +2,6 @@ # version tag to the version that you are using. This will ensure that all # the pods are using the same version of the software. image: dremio/dremio-oss:latest - # Check out Dremio documentation for memory and cpu requirements for # the coordinators and the executors. # The value of memory should be in MB. CPU is in no of cores. @@ -60,17 +59,24 @@ serviceType: LoadBalancer # Control where uploaded files are stored. # See https://docs.dremio.com/deployment/distributed-storage.html for more information -uploads: - # Valid values are local, aws or azure. aws and azure choice requires additional configuration data. +dremioVersion: "3.2.0" # Dremio Version 3.2.0 or greater +distStorage: + # Valid values are local, aws, azure or azureStorage. aws and azure choice requires additional configuration data. type: "local" - aws: + aws: #S3 - used for only uploads bucketName: "Your_AWS_bucket_name" path: "/" accessKey: "Your_AWS_Access_Key" secret: "Your_AWS_Secret" - azure: + azure: #ADLS v1 - used for only uploads datalakeStoreName: "Your_Azure_DataLake_Storage_name" path: "/" applicationId: "Your_Azure_Application_Id" secret: "Your_Azure_Secret" oauth2EndPoint: "Azure_OAuth2_Endpoint" + azureStorage: #AzureStorage v2 - supported in Dremio version 3.2.0+ - used for uploads and accelerator + accountName: "Azure_storage_v2_account_name" + accessKey: "Access_key_for_the_storage_account" + filesystem: "Filesystem_in_storage_account" + uploadsPath: "Path_for_uploads" + acceleratorPath: "Path_for_accelerator" From 6ac0ad9da8b92ca6d71bde2adc663655e4f0c6b7 Mon Sep 17 00:00:00 2001 From: Nirmalya Sen Date: Wed, 5 Jun 2019 15:37:26 -0700 Subject: [PATCH 15/24] DX-16710: Configure accelerator and uploads to Storage V2 Create a Azure Storage V2 account, create a filesystem and directories in there and configure dremio to use that for accelerator and uploads. Change-Id: Ic1ce87d3ed8f2bfc6ac5a8d822af75f742b61d1e --- azure/arm-templates/azuredeploy.json | 2 +- azure/arm-templates/nested/dremioCluster.json | 53 ++++++++- azure/arm-templates/nested/dremioState.json | 26 +---- azure/arm-templates/scripts/setupDremio.sh | 110 +++++++++++++++++- 4 files changed, 157 insertions(+), 34 deletions(-) diff --git a/azure/arm-templates/azuredeploy.json b/azure/arm-templates/azuredeploy.json index 5f353e3c..89593a09 100644 --- a/azure/arm-templates/azuredeploy.json +++ b/azure/arm-templates/azuredeploy.json @@ -170,7 +170,7 @@ "dataDiskId": {"value": "[reference(concat(variables('shortName'), '-state-deployment')).outputs.dataDiskId.value]"}, "sshUsername": {"value": "[parameters('SSHUsername')]"}, "sshPasswordOrKey": {"value": "[parameters('PasswordOrSSHPublicKey')]"}, - "subnetId": {"value": "[if(equals(trim(parameters('useExistingSubnet')), ''), reference(concat(variables('shortName'), '-state-deployment')).outputs.subnetId.value, parameters('useExistingSubnet'))]"}, + "subnetId": {"value": "[reference(concat(variables('shortName'), '-state-deployment')).outputs.subnetId.value]"}, "loadBalancerId": {"value": "[reference(concat(variables('shortName'), '-state-deployment')).outputs.loadBalancerId.value]"}, "nsgId": {"value": "[reference(concat(variables('shortName'), '-state-deployment')).outputs.nsgId.value]"}, "usePrivateIP": {"value": "[parameters('usePrivateIP')]"} diff --git a/azure/arm-templates/nested/dremioCluster.json b/azure/arm-templates/nested/dremioCluster.json index ff4291a6..18832f6d 100644 --- a/azure/arm-templates/nested/dremioCluster.json +++ b/azure/arm-templates/nested/dremioCluster.json @@ -144,10 +144,39 @@ "metadata": { "description": "Select to use the private ip address of the subnet for Dremio access." } + }, + "storageAccountName": { + "type": "string", + "defaultValue": "[concat('dremiometa',uniqueString(resourceGroup().id, deployment().name))]", + "metadata": { + "description": "Name of the storage account" + } + }, + "storageAccountType": { + "type": "string", + "defaultValue": "Standard_LRS", + "metadata": { + "description": "Storage account type" + } + }, + "storageKind": { + "type": "string", + "defaultValue": "StorageV2", + "metadata": { + "description": "Storage account kind" + } + }, + "storageAccessTier": { + "type": "string", + "defaultValue": "Hot", + "metadata": { + "description": "Storage access tier" + } } }, "variables": { "computeApiVersion": "2018-06-01", + "storageApiVersion": "2018-07-01", "location": "[resourceGroup().location]", "nicName": "[concat(parameters('dremioClusterName'), '-nic')]", "dremioImage": { @@ -183,6 +212,21 @@ "install": false }, "resources": [ + { + "name": "[parameters('storageAccountName')]", + "type": "Microsoft.Storage/storageAccounts", + "apiVersion": "[variables('storageApiVersion')]", + "location": "[variables('location')]", + "properties": { + "accessTier": "[parameters('storageAccessTier')]", + "supportsHttpsTrafficOnly": true, + "isHnsEnabled": true + }, + "sku": { + "name": "[parameters('storageAccountType')]" + }, + "kind": "[parameters('storageKind')]" + }, { "condition": "[and(not(parameters('usePrivateIP')), equals(parameters('publicIpNewOrExisting'), 'new'))]", "type": "Microsoft.Network/publicIPAddresses", @@ -232,7 +276,8 @@ "name": "[concat(parameters('dremioClusterName'), '-master')]", "location": "[variables('location')]", "dependsOn": [ - "[variables('nicName')]" + "[variables('nicName')]", + "[parameters('storageAccountName')]" ], "properties": { "hardwareProfile": { @@ -289,7 +334,7 @@ ] }, "protectedSettings": { - "commandToExecute": "[concat('DOWNLOAD_URL=\"', parameters('dremioDownloadURL'), '\" bash ', variables('scriptFileName'), ' master ')]" + "commandToExecute": "[concat('DOWNLOAD_URL=\"', parameters('dremioDownloadURL'), '\" bash ', variables('scriptFileName'), ' master ', parameters('storageAccountName'), ' ', listKeys(resourceId('Microsoft.Storage/storageAccounts', parameters('storageAccountName')), providers('Microsoft.Storage', 'storageAccounts').apiVersions[0]).keys[0].value)]" } } } @@ -366,7 +411,7 @@ "fileUris": [ "[variables('scriptURL')]" ], - "commandToExecute": "[concat('DOWNLOAD_URL=\"', parameters('dremioDownloadURL'), '\" bash ', variables('scriptFileName'), ' coordinator ', reference(concat(parameters('dremioClusterName'), '-nic'), variables('computeApiVersion')).ipConfigurations[0].properties.privateIPAddress)]" + "commandToExecute": "[concat('DOWNLOAD_URL=\"', parameters('dremioDownloadURL'), '\" bash ', variables('scriptFileName'), ' coordinator ', parameters('storageAccountName'), ' ', listKeys(resourceId('Microsoft.Storage/storageAccounts', parameters('storageAccountName')), providers('Microsoft.Storage', 'storageAccounts').apiVersions[0]).keys[0].value, ' ', reference(concat(parameters('dremioClusterName'), '-nic'), variables('computeApiVersion')).ipConfigurations[0].properties.privateIPAddress)]" } } } @@ -441,7 +486,7 @@ "fileUris": [ "[variables('scriptURL')]" ], - "commandToExecute": "[concat('DOWNLOAD_URL=\"', parameters('dremioDownloadURL'), '\" bash ', variables('scriptFileName'), ' executor ', reference(concat(parameters('dremioClusterName'), '-nic'), variables('computeApiVersion')).ipConfigurations[0].properties.privateIPAddress)]" + "commandToExecute": "[concat('DOWNLOAD_URL=\"', parameters('dremioDownloadURL'), '\" bash ', variables('scriptFileName'), ' executor ', parameters('storageAccountName'), ' ', listKeys(resourceId('Microsoft.Storage/storageAccounts', parameters('storageAccountName')), providers('Microsoft.Storage', 'storageAccounts').apiVersions[0]).keys[0].value, ' ', reference(concat(parameters('dremioClusterName'), '-nic'), variables('computeApiVersion')).ipConfigurations[0].properties.privateIPAddress)]" } } } diff --git a/azure/arm-templates/nested/dremioState.json b/azure/arm-templates/nested/dremioState.json index bed09915..47359950 100644 --- a/azure/arm-templates/nested/dremioState.json +++ b/azure/arm-templates/nested/dremioState.json @@ -23,27 +23,6 @@ "description": "Size of the Dremio Master Data Disk" } }, - "storageNewOrExisting": { - "type": "string", - "defaultValue": "new", - "metadata": { - "description": "Determines whether or not a new storage account should be provisioned." - } - }, - "storageAccountName": { - "type": "string", - "defaultValue": "[concat('storage', uniqueString(resourceGroup().id))]", - "metadata": { - "description": "Name of the storage account" - } - }, - "storageAccountType": { - "type": "string", - "defaultValue": "Standard_LRS", - "metadata": { - "description": "Storage account type" - } - }, "virtualNetworkNewOrExisting": { "type": "string", "defaultValue": "new", @@ -141,7 +120,6 @@ }, "variables": { "computeApiVersion": "2018-06-01", - "storageApiVersion": "2018-07-01", "location": "[resourceGroup().location]", "virtualNetworkName": "[concat(parameters('dremioClusterName'), '-vnet')]", "publicIpAddressId": "[resourceId(parameters('publicIpResourceGroupName'), 'Microsoft.Network/publicIPAddresses', parameters('publicIpName'))]", @@ -174,6 +152,8 @@ "id": "[parameters('existingSubnet')]" } }, + "networkAclsBypass": "AzureServices", + "networkAclsDefaultAction": "Deny", "install": false }, "resources": [ @@ -403,7 +383,7 @@ }, "subnetId": { "type": "string", - "value": "[if(equals(parameters('virtualNetworkNewOrExisting'), 'new'), resourceId(resourceGroup().name, 'Microsoft.Network/virtualNetworks/subnets/', variables('virtualNetworkName'), parameters('subnetName')), '')]" + "value": "[if(equals(parameters('virtualNetworkNewOrExisting'), 'new'), resourceId(resourceGroup().name, 'Microsoft.Network/virtualNetworks/subnets/', variables('virtualNetworkName'), parameters('subnetName')), parameters('existingSubnet'))]" }, "loadBalancerId": { "type": "string", diff --git a/azure/arm-templates/scripts/setupDremio.sh b/azure/arm-templates/scripts/setupDremio.sh index d4cb5f18..dc4fd877 100644 --- a/azure/arm-templates/scripts/setupDremio.sh +++ b/azure/arm-templates/scripts/setupDremio.sh @@ -3,9 +3,7 @@ [ -z $DOWNLOAD_URL ] && DOWNLOAD_URL=http://download.dremio.com/community-server/dremio-community-LATEST.noarch.rpm if [ ! -f /opt/dremio/bin/dremio ]; then command -v yum >/dev/null 2>&1 || { echo >&2 "This script works only on Centos or Red Hat. Aborting."; exit 1; } - yum install -y java-1.8.0-openjdk - wget $DOWNLOAD_URL -O dremio-download.rpm - yum -y localinstall dremio-download.rpm + yum install -y java-1.8.0-openjdk-devel $DOWNLOAD_URL fi service=$1 @@ -13,12 +11,20 @@ if [ -z "$service" ]; then echo "Require the service to start - master, coordinator or executor" exit 1 fi +storage_account=$2 +access_key=$3 + +if [ -n "$storage_account" -a -n "$access_key" ]; then + use_azure_storage=1 +fi # In Azure, /dev/sdb is ephemeral storage mapped to /mnt/resource. # Additional disks are mounted after that... DISK_NAME=/dev/sdc DISK_PART=${DISK_NAME}1 -DREMIO_CONFIG_FILE=/etc/dremio/dremio.conf +DREMIO_HOME=/opt/dremio +DREMIO_CONFIG_DIR=/etc/dremio +DREMIO_CONFIG_FILE=$DREMIO_CONFIG_DIR/dremio.conf DREMIO_DATA_DIR=/var/lib/dremio # Azure Linux VMs have ephemeral/temporary disk # always mounted on /mnt/resource/dremio @@ -36,7 +42,11 @@ if [ "$service" == "master" ]; then chown dremio:dremio $DREMIO_DATA_DIR echo "$DISK_PART $DREMIO_DATA_DIR ext4 defaults 0 0" >> /etc/fstab else - zookeeper=$2 + if [ -n '$use_azure_storage' ]; then + zookeeper=$4 + else + zookeeper=$2 + fi if [ -z "$zookeeper" ]; then echo "Non-master node requires zookeeper host" exit 2 @@ -69,11 +79,18 @@ function upgrade_master { cd $DREMIO_DATA_DIR if [ -d db ]; then tar -zcvf dremio_db_$(date '+%Y-%m-%d_%H-%M').tar.gz db - /opt/dremio/bin/dremio-admin upgrade + sudo -u dremio /opt/dremio/bin/dremio-admin upgrade fi } function setup_master { + if [ -n '$use_azure_storage' ]; then + storage_create_action "dremiodata" filesystem && \ + storage_create_action "dremiodata/accelerator" directory && \ + storage_create_action "dremiodata/uploads" directory + fi + + configure_dremio_dist sed -i "s/executor.enabled: true/executor.enabled: false/" $DREMIO_CONFIG_FILE upgrade_master } @@ -81,6 +98,7 @@ function setup_master { function setup_coordinator { yum install -y nc until nc -z $zookeeper 9047 > /dev/null; do echo waiting for dremio master; sleep 2; done; + configure_dremio_dist sed -i "s/coordinator.master.enabled: true/coordinator.master.enabled: false/; \ s/executor.enabled: true/executor.enabled: false/" \ $DREMIO_CONFIG_FILE @@ -88,6 +106,7 @@ function setup_coordinator { } function setup_executor { + configure_dremio_dist setup_spill sed -i "s/coordinator.master.enabled: true/coordinator.master.enabled: false/; \ s/coordinator.enabled: true/coordinator.enabled: false/; \ @@ -96,6 +115,85 @@ function setup_executor { echo "zookeeper: \"$zookeeper:2181\"" >> $DREMIO_CONFIG_FILE } +function storage_create_action { + resource=$1 + resource_type=$2 + blob_store_url="dfs.core.windows.net" + authorization="SharedKey" + request_method="PUT" + request_date=$(TZ=GMT date "+%a, %d %h %Y %H:%M:%S %Z") + storage_service_version="2018-11-09" + # HTTP Request headers + x_ms_date_h="x-ms-date:$request_date" + x_ms_version_h="x-ms-version:$storage_service_version" + content_length_h="Content-Length: 0" + # Build the signature string + canonicalized_headers="${x_ms_date_h}\n${x_ms_version_h}" + canonicalized_resource="/${storage_account}/${resource}\nresource:${resource_type}" + string_to_sign="${request_method}\n\n\n\n\n\n\n\n\n\n\n\n${canonicalized_headers}\n${canonicalized_resource}" + # Decode the Base64 encoded access key, convert to Hex. + decoded_hex_key="$(echo -n $access_key | base64 -d -w0 | xxd -p -c256)" + # Create the HMAC signature for the Authorization header + signature=$(printf "$string_to_sign" | openssl dgst -sha256 -mac HMAC -macopt "hexkey:$decoded_hex_key" -binary | base64 -w0) + authorization_header="Authorization: $authorization $storage_account:$signature" + curl \ + -X $request_method \ + -H "$content_length_h" \ + -H "$x_ms_date_h" \ + -H "$x_ms_version_h" \ + -H "$authorization_header" \ + "https://${storage_account}.${blob_store_url}/${resource}?resource=${resource_type}" + return $? +} + +function write_coresite_xml { +cat > $DREMIO_CONFIG_DIR/core-site.xml < + + + fs.dremioAzureStorage.impl + FileSystem implementation. Must always be com.dremio.plugins.azure.AzureStorageFileSystem + com.dremio.plugins.azure.AzureStorageFileSystem + + + dremio.azure.account + The name of the storage account. + $storage_account + + + dremio.azure.key + The shared access key for the storage account. + $access_key + + + dremio.azure.mode + The storage account type. Value: STORAGE_V2 + STORAGE_V2 + + + dremio.azure.secure + Boolean option to enable SSL connections. Value: True/False + True + + +EOF +} + +function update_dremio_config { +cat >> $DREMIO_CONFIG_FILE < Date: Mon, 10 Jun 2019 14:21:04 -0700 Subject: [PATCH 16/24] DX-16819: Add TLS support in Helm chart. Change-Id: I17e70a423356849c0d2368648a72241b97d5afeb --- charts/dremio/config/dremio.conf | 15 ++++ .../dremio/templates/dremio-coordinator.yaml | 72 +++++++++++++++++ charts/dremio/templates/dremio-master.yaml | 78 ++++++++++++++++++- charts/dremio/values.yaml | 15 ++++ 4 files changed, 179 insertions(+), 1 deletion(-) diff --git a/charts/dremio/config/dremio.conf b/charts/dremio/config/dremio.conf index 2b626a5c..12252069 100644 --- a/charts/dremio/config/dremio.conf +++ b/charts/dremio/config/dremio.conf @@ -55,3 +55,18 @@ services: { # # Other service parameters can be customized via this file. } + +{{- if .Values.tls.ui.enabled }} +services.coordinator.web.ssl.enabled: true +services.coordinator.web.ssl.auto-certificate.enabled: false + +services.coordinator.web.ssl.keyStore: "/opt/dremio/tls/ui.pkcs12" +{{- end }} + +{{- if .Values.tls.client.enabled }} +# Client endpoint (i.e. ODBC/JDBC) encryption is only supported in Dremio Enterprise Edition. +services.coordinator.client-endpoint.ssl.enabled: true +services.coordinator.client-endpoint.ssl.auto-certificate.enabled: false + +services.coordinator.client-endpoint.ssl.keyStore: "/opt/dremio/tls/client.pkcs12" +{{- end }} \ No newline at end of file diff --git a/charts/dremio/templates/dremio-coordinator.yaml b/charts/dremio/templates/dremio-coordinator.yaml index 3bffa687..86d5f2ac 100644 --- a/charts/dremio/templates/dremio-coordinator.yaml +++ b/charts/dremio/templates/dremio-coordinator.yaml @@ -37,6 +37,10 @@ spec: volumeMounts: - name: dremio-config mountPath: /opt/dremio/conf + {{- if or .Values.tls.ui.enabled .Values.tls.client.enabled }} + - name: dremio-tls + mountPath: /opt/dremio/tls + {{- end }} env: - name: DREMIO_MAX_HEAP_MEMORY_SIZE_MB value: "{{ template "HeapMemory" .Values.coordinator.memory }}" @@ -64,10 +68,78 @@ spec: - name: wait-for-zk image: busybox command: ["sh", "-c", "until nc -z dremio-client {{ .Values.coordinator.web.port | default 9047 }} > /dev/null; do echo waiting for dremio master; sleep 2; done;"] + {{- if .Values.tls.ui.enabled }} + - name: generate-ui-keystore + image: {{.Values.image}} + imagePullPolicy: IfNotPresent + volumeMounts: + - name: dremio-tls + mountPath: /opt/dremio/tls + - name: dremio-tls-secret-ui + mountPath: /dremio-tls-secret + command: ["/usr/bin/openssl"] + args: + - "pkcs12" + - "-export" + - "-inkey" + - "/dremio-tls-secret/tls.key" + - "-in" + - "/dremio-tls-secret/tls.crt" + - "-out" + - "/opt/dremio/tls/ui.pkcs12" + - "-passout" + - "pass:" + {{- end }} + {{- if .Values.tls.client.enabled }} + - name: generate-client-keystore + image: {{.Values.image}} + imagePullPolicy: IfNotPresent + volumeMounts: + - name: dremio-tls + mountPath: /opt/dremio/tls + - name: dremio-tls-secret-client + mountPath: /dremio-tls-secret + command: ["/usr/bin/openssl"] + args: + - "pkcs12" + - "-export" + - "-inkey" + - "/dremio-tls-secret/tls.key" + - "-in" + - "/dremio-tls-secret/tls.crt" + - "-out" + - "/opt/dremio/tls/client.pkcs12" + - "-passout" + - "pass:" + {{- end }} volumes: - name: dremio-config configMap: name: dremio-config + {{- if or .Values.tls.ui.enabled .Values.tls.client.enabled }} + - name: dremio-tls + emptyDir: {} + {{- end }} + {{- if .Values.tls.ui.enabled }} + - name: dremio-tls-secret-ui + secret: + secretName: {{ .Values.tls.ui.secret }} + items: + - key: tls.key + path: tls.key + - key: tls.crt + path: tls.crt + {{- end }} + {{- if .Values.tls.client.enabled }} + - name: dremio-tls-secret-client + secret: + secretName: {{ .Values.tls.client.secret }} + items: + - key: tls.key + path: tls.key + - key: tls.crt + path: tls.crt + {{- end }} {{- if .Values.imagePullSecrets }} imagePullSecrets: - name: {{ .Values.imagePullSecrets }} diff --git a/charts/dremio/templates/dremio-master.yaml b/charts/dremio/templates/dremio-master.yaml index 423ce78f..f272eb32 100644 --- a/charts/dremio/templates/dremio-master.yaml +++ b/charts/dremio/templates/dremio-master.yaml @@ -47,6 +47,10 @@ spec: mountPath: /opt/dremio/data - name: dremio-config mountPath: /opt/dremio/conf + {{- if or .Values.tls.ui.enabled .Values.tls.client.enabled }} + - name: dremio-tls + mountPath: /opt/dremio/tls + {{- end }} env: - name: DREMIO_MAX_HEAP_MEMORY_SIZE_MB value: "{{ template "HeapMemory" .Values.coordinator.memory }}" @@ -71,7 +75,11 @@ spec: - containerPort: 45678 name: server readinessProbe: - tcpSocket: + httpGet: + path: / + {{- if .Values.tls.ui.enabled }} + scheme: HTTPS + {{- end }} port: 9047 initialDelaySeconds: 5 periodSeconds: 5 @@ -104,10 +112,78 @@ spec: command: ["/opt/dremio/bin/dremio-admin"] args: - "upgrade" + {{- if .Values.tls.ui.enabled }} + - name: generate-ui-keystore + image: {{.Values.image}} + imagePullPolicy: IfNotPresent + volumeMounts: + - name: dremio-tls + mountPath: /opt/dremio/tls + - name: dremio-tls-secret-ui + mountPath: /dremio-tls-secret + command: ["/usr/bin/openssl"] + args: + - "pkcs12" + - "-export" + - "-inkey" + - "/dremio-tls-secret/tls.key" + - "-in" + - "/dremio-tls-secret/tls.crt" + - "-out" + - "/opt/dremio/tls/ui.pkcs12" + - "-passout" + - "pass:" + {{- end }} + {{- if .Values.tls.client.enabled }} + - name: generate-client-keystore + image: {{.Values.image}} + imagePullPolicy: IfNotPresent + volumeMounts: + - name: dremio-tls + mountPath: /opt/dremio/tls + - name: dremio-tls-secret-client + mountPath: /dremio-tls-secret + command: ["/usr/bin/openssl"] + args: + - "pkcs12" + - "-export" + - "-inkey" + - "/dremio-tls-secret/tls.key" + - "-in" + - "/dremio-tls-secret/tls.crt" + - "-out" + - "/opt/dremio/tls/client.pkcs12" + - "-passout" + - "pass:" + {{- end }} volumes: - name: dremio-config configMap: name: dremio-config + {{- if or .Values.tls.ui.enabled .Values.tls.client.enabled }} + - name: dremio-tls + emptyDir: {} + {{- end }} + {{- if .Values.tls.ui.enabled }} + - name: dremio-tls-secret-ui + secret: + secretName: {{ .Values.tls.ui.secret }} + items: + - key: tls.key + path: tls.key + - key: tls.crt + path: tls.crt + {{- end }} + {{- if .Values.tls.client.enabled }} + - name: dremio-tls-secret-client + secret: + secretName: {{ .Values.tls.client.secret }} + items: + - key: tls.key + path: tls.key + - key: tls.crt + path: tls.crt + {{- end }} {{- if .Values.imagePullSecrets }} imagePullSecrets: - name: {{ .Values.imagePullSecrets }} diff --git a/charts/dremio/values.yaml b/charts/dremio/values.yaml index 72e8bbf4..2601ebba 100644 --- a/charts/dremio/values.yaml +++ b/charts/dremio/values.yaml @@ -28,6 +28,21 @@ zookeeper: count: 3 volumeSize: 10Gi +# To create a TLS secret, use the following command: +# kubectl create secret tls ${TLS_SECRET_NAME} --key ${KEY_FILE} --cert ${CERT_FILE} +tls: + ui: + # To enable TLS for the web UI, set the enabled flag to true and provide + # the appropriate Kubernetes TLS secret. + enabled: false + secret: dremio-tls-secret-ui + client: + # To enable TLS for the client endpoints, set the enabled flag to true and provide + # the appropriate Kubernetes TLS secret. Client endpoint encryption is available only on + # Dremio Enterprise Edition and should not be enabled otherwise. + enabled: false + secret: dremio-tls-secret-client + # If your Kubernetes cluster does not support LoadBalancer, # comment out the line below for the helm chart to succeed or add # the correct serviceType for your cluster. From 6519a4f368556b67cbf3d3eeeafa94063540ee61 Mon Sep 17 00:00:00 2001 From: Nirmalya Sen Date: Thu, 11 Jul 2019 10:31:50 -0700 Subject: [PATCH 17/24] DX-17385: Dockerfile to build dremio/cloud-tools The image includes useful cloud tools. See the Dockerfile for the list of tools/clients installed. Change-Id: I6edd554ffc37b353813b8c351a9d505332267930 --- utils/Dockerfile | 38 ++++++++++++++++++++++++++++++++++++++ utils/helm-init.sh | 14 ++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 utils/Dockerfile create mode 100755 utils/helm-init.sh diff --git a/utils/Dockerfile b/utils/Dockerfile new file mode 100644 index 00000000..da700d1d --- /dev/null +++ b/utils/Dockerfile @@ -0,0 +1,38 @@ +################################################################ +# Image with useful cloud tools installed: +# - aws cli +# - eksctl +# - azure cli +# - gcloud +# - kubectl +# - helm +# - git +# +# An image built with this is published in Dockerhub as +# dremio/cloud-tools +# +################################################################ +FROM centos + +ADD helm-init.sh /usr/local/bin + +RUN \ + yum install -y epel-release && \ + yum install -y which openssl git python-pip && \ + pip install --upgrade pip && \ + pip install awscli && \ + curl --silent --location "https://github.com/weaveworks/eksctl/releases/download/latest_release/eksctl_$(uname -s)_amd64.tar.gz" | tar xz -C /tmp && \ + mv /tmp/eksctl /usr/local/bin && \ + curl -s -o aws-iam-authenticator https://amazon-eks.s3-us-west-2.amazonaws.com/1.13.7/2019-06-11/bin/linux/amd64/aws-iam-authenticator && \ + chmod +x ./aws-iam-authenticator && \ + mv aws-iam-authenticator /usr/local/bin && \ + curl -s -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl && \ + chmod +x kubectl && \ + mv kubectl /usr/local/bin && \ + curl -s -L https://git.io/get_helm.sh | bash && \ + rpm --import https://packages.microsoft.com/keys/microsoft.asc && \ + sh -c 'echo -e "[azure-cli]\nname=Azure CLI\nbaseurl=https://packages.microsoft.com/yumrepos/azure-cli\nenabled=1\ngpgcheck=1\ngpgkey=https://packages.microsoft.com/keys/microsoft.asc" > /etc/yum.repos.d/azure-cli.repo' && \ + yum install -y azure-cli && \ + curl https://sdk.cloud.google.com | bash + +WORKDIR /root diff --git a/utils/helm-init.sh b/utils/helm-init.sh new file mode 100755 index 00000000..4029ff5d --- /dev/null +++ b/utils/helm-init.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +if ! command -v helm 2>&1 > /dev/null; then + echo "Helm not found. Installing helm..." + curl -L https://git.io/get_helm.sh | bash + if ! command -v helm 2>&1 > /dev/null; then + echo "Failed installation of Helm. Please check the script and debug. " + exit 1 + fi + echo "Helm successfully installed on your machine." +fi +kubectl create serviceaccount -n kube-system tiller +kubectl create clusterrolebinding tiller-binding --clusterrole=cluster-admin --serviceaccount kube-system:tiller +helm init --service-account tiller --wait From 0ed2e686ff728a07bca0973cab6092a40985e2b0 Mon Sep 17 00:00:00 2001 From: Mikhail Stolpner Date: Thu, 8 Aug 2019 17:23:24 -0700 Subject: [PATCH 18/24] DX-18003: Fixed CloudFormation template for security group ingress. Change-Id: I90e2515ebc230fa8da5940498516f9ae313a019e --- aws/cloudformation/dremio_cf.yaml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/aws/cloudformation/dremio_cf.yaml b/aws/cloudformation/dremio_cf.yaml index f7b1d21c..9b0e15c0 100644 --- a/aws/cloudformation/dremio_cf.yaml +++ b/aws/cloudformation/dremio_cf.yaml @@ -217,8 +217,6 @@ Resources: FromPort: '80' ToPort: '80' CidrIp: 0.0.0.0/0 - - IpProtocol: -1 - SourceSecurityGroupName: "Dremio Access" AvailabilityWaitHandle: Type: AWS::CloudFormation::WaitConditionHandle AvailabilityWaitCondition: @@ -228,6 +226,13 @@ Resources: Handle: !Ref "AvailabilityWaitHandle" Timeout: "600" + DremioSecurityGroupSelfIngress: + Type: AWS::EC2::SecurityGroupIngress + Properties: + GroupId: !Ref DremioSecurityGroup + IpProtocol: -1 + SourceSecurityGroupId: !Ref DremioSecurityGroup + DremioMaster: Type: AWS::EC2::Instance Properties: From 228380acfa54ec3177431ae2f77035fde1c953da Mon Sep 17 00:00:00 2001 From: J C Lawrence Date: Wed, 18 Sep 2019 15:46:48 -0700 Subject: [PATCH 19/24] DX-18724: Scale memory sizes with instance sizes Change-Id: If8b01fcc3b7d71563a07f8115ce0c4ae03bd3f9a --- aws/cloudformation/dremio_cf.yaml | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/aws/cloudformation/dremio_cf.yaml b/aws/cloudformation/dremio_cf.yaml index 9b0e15c0..0e7d3e1c 100644 --- a/aws/cloudformation/dremio_cf.yaml +++ b/aws/cloudformation/dremio_cf.yaml @@ -66,33 +66,43 @@ Mappings: X-Small--1-executor: coordinatorInstanceType: m5.2xlarge coordinatorDiskSize: 10 + coordinatorMaxMemory: 28672 executorInstanceType: r5d.4xlarge executorCount: 1 executorDiskSize: 10 + executorMaxMemory: 122880 Small--5-executors: coordinatorInstanceType: m5.2xlarge coordinatorDiskSize: 50 + coordinatorMaxMemory: 28672 executorInstanceType: r5d.4xlarge executorCount: 5 executorDiskSize: 50 + executorMaxMemory: 122880 Medium--10-executors: coordinatorInstanceType: m5.4xlarge coordinatorDiskSize: 100 + coordinatorMaxMemory: 61440 executorInstanceType: r5d.4xlarge executorCount: 10 executorDiskSize: 100 + executorMaxMemory: 122880 Large--25-executors: coordinatorInstanceType: m5.4xlarge coordinatorDiskSize: 100 + coordinatorMaxMemory: 61440 executorInstanceType: r5d.4xlarge executorCount: 25 executorDiskSize: 100 + executorMaxMemory: 122880 X-Large--50-executors: coordinatorInstanceType: m5.4xlarge coordinatorDiskSize: 100 + coordinatorMaxMemory: 61440 executorInstanceType: r5d.4xlarge executorCount: 50 executorDiskSize: 100 + executorMaxMemory: 122880 RegionMap: # Centos 7 Images us-east-1: # N Virginia @@ -273,8 +283,12 @@ Resources: DREMIO_HOME=/opt/dremio DREMIO_CONFIG_FILE=/etc/dremio/dremio.conf + DREMIO_ENV_FILE=/etc/dremio/dremio-env - sed -i "s/executor.enabled: true/executor.enabled: false/" $DREMIO_CONFIG_FILE + sed -i -e "s/executor.enabled: true/executor.enabled: false/" \ + $DREMIO_CONFIG_FILE + sed -i -e "s/#DREMIO_MAX_MEMORY_SIZE_MB=/DREMIO_MAX_MEMORY_SIZE_MB=${MEMORY_SIZE}/" \ + $DREMIO_ENV_FILE cp $DREMIO_HOME/share/dremio/dremio.service /etc/systemd/system systemctl daemon-reload @@ -288,6 +302,7 @@ Resources: echo "{ \"Status\" : \"SUCCESS\", \"UniqueId\" : \"${AWS::StackName}\", \"Data\" : \"Ready\", \"Reason\" : \"Website Available\" }" > $statusFile curl -T $statusFile '${AvailabilityWaitHandle}' - DOWNLOAD_URL: !FindInMap [ Custom, Variables, "URL"] + MEMORY_SIZE: !FindInMap [ClusterSizes, !Ref clusterSize, coordinatorMaxMemory] DremioExecutorLC: Type: AWS::AutoScaling::LaunchConfiguration @@ -332,19 +347,23 @@ Resources: SPILL_DIR=/var/ephemeral/dremio_spill DREMIO_CONFIG_FILE=/etc/dremio/dremio.conf + DREMIO_ENV_FILE=/etc/dremio/dremio-env - sed -i "s/coordinator.master.enabled: true/coordinator.master.enabled: false/; \ - s/coordinator.enabled: true/coordinator.enabled: false/; \ - /local:/a \ \ spilling: [\"$SPILL_DIR\"]" \ - $DREMIO_CONFIG_FILE + sed -i -e "s/coordinator.master.enabled: true/coordinator.master.enabled: false/" \ + -e "s/coordinator.enabled: true/coordinator.enabled: false/" \ + -e "/local:/a \ \ spilling: [\"$SPILL_DIR\"]" \ + $DREMIO_CONFIG_FILE + sed -i -e "s/#DREMIO_MAX_MEMORY_SIZE_MB=/DREMIO_MAX_MEMORY_SIZE_MB=${MEMORY_SIZE}/" \ + $DREMIO_ENV_FILE echo "zookeeper: \"${ZK}:2181\"" >> $DREMIO_CONFIG_FILE + cp $DREMIO_HOME/share/dremio/dremio.service /etc/systemd/system systemctl daemon-reload systemctl start dremio systemctl enable dremio - - ZK: !GetAtt DremioMaster.PrivateIp DOWNLOAD_URL: !FindInMap [ Custom, Variables, "URL"] + MEMORY_SIZE: !FindInMap [ClusterSizes, !Ref clusterSize, executorMaxMemory] DremioExecutorASG: Type: AWS::AutoScaling::AutoScalingGroup From fd8e42534d77847f2e5f858953bf17465760fd73 Mon Sep 17 00:00:00 2001 From: Ryan Tse Date: Wed, 18 Sep 2019 23:17:17 -0700 Subject: [PATCH 20/24] DX-18733: Enable C3 on Dremio executors for CFTs. - Additionally fixes deploying multiple instances of the CFT in the same subnet by removing the GroupName and allowing AWS to dynamically generate one since GroupName(s) must be unique in a subnet. Change-Id: I1fb6101528b857a40e09f41aebd11a9843ecb257 --- aws/cloudformation/dremio_cf.yaml | 37 ++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/aws/cloudformation/dremio_cf.yaml b/aws/cloudformation/dremio_cf.yaml index 0e7d3e1c..f30be27b 100644 --- a/aws/cloudformation/dremio_cf.yaml +++ b/aws/cloudformation/dremio_cf.yaml @@ -207,7 +207,6 @@ Resources: DremioSecurityGroup: Type: AWS::EC2::SecurityGroup Properties: - GroupName: "Dremio Access" GroupDescription: "Dremio Access" VpcId: !If [CreateVPC, !Ref VPC, !Ref useVPC] SecurityGroupIngress: @@ -336,16 +335,28 @@ Resources: yum -y install java-1.8.0-openjdk-devel $url fi - mkdir /var/ephemeral - # Setup ephemeral disk - this is based on executors are r5d class machines - NVME=nvme1n1 - file -s /dev/$NVME | grep "/dev/$NVME: data" && mkfs -t xfs /dev/$NVME && \ - UUID=$(blkid | grep $NVME | awk -F'"' '{ print $2 }') && \ - echo "UUID=$UUID /var/ephemeral xfs defaults,nofail 0 2" >> /etc/fstab && \ + # Setup ephemeral disk for spill - this is based on executors are r5d class machines + SPILL_DIR=/var/ephemeral/spill + NVME_SPILL=nvme1n1 + + mkdir -p $SPILL_DIR + file -s /dev/$NVME_SPILL | grep "/dev/$NVME_SPILL: data" && mkfs -t xfs /dev/$NVME_SPILL && \ + UUID=$(blkid | grep $NVME_SPILL | awk -F'"' '{ print $2 }') && \ + echo "UUID=$UUID $SPILL_DIR xfs defaults,nofail 0 2" >> /etc/fstab && \ + mount -a + chown dremio:dremio $SPILL_DIR + + # Setup ephemeral disk for C3 - this is based on executors are r5d class machines + CLOUDCACHE_DIR=/var/ephemeral/cloudcache + NVME_CLOUDCACHE=nvme2n1 + + mkdir -p $CLOUDCACHE_DIR + file -s /dev/$NVME_CLOUDCACHE | grep "/dev/$NVME_CLOUDCACHE: data" && mkfs -t xfs /dev/$NVME_CLOUDCACHE && \ + UUID=$(blkid | grep $NVME_CLOUDCACHE | awk -F'"' '{ print $2 }') && \ + echo "UUID=$UUID $CLOUDCACHE_DIR xfs defaults,nofail 0 2" >> /etc/fstab && \ mount -a - chmod 777 /var/ephemeral + chown dremio:dremio $CLOUDCACHE_DIR - SPILL_DIR=/var/ephemeral/dremio_spill DREMIO_CONFIG_FILE=/etc/dremio/dremio.conf DREMIO_ENV_FILE=/etc/dremio/dremio-env @@ -355,7 +366,13 @@ Resources: $DREMIO_CONFIG_FILE sed -i -e "s/#DREMIO_MAX_MEMORY_SIZE_MB=/DREMIO_MAX_MEMORY_SIZE_MB=${MEMORY_SIZE}/" \ $DREMIO_ENV_FILE - echo "zookeeper: \"${ZK}:2181\"" >> $DREMIO_CONFIG_FILE + cat <> $DREMIO_CONFIG_FILE + zookeeper: "${ZK}:2181" + services.executor.cache.path.db: "$CLOUDCACHE_DIR" + services.executor.cache.path.fs: ["$CLOUDCACHE_DIR"] + services.executor.cache.pctquota.db: 10 + services.executor.cache.pctquota.fs: [100] + EOF cp $DREMIO_HOME/share/dremio/dremio.service /etc/systemd/system systemctl daemon-reload From d199978f1eaf648ec014e5e171e481698b6bb554 Mon Sep 17 00:00:00 2001 From: Ryan Tse Date: Thu, 19 Sep 2019 01:03:37 -0700 Subject: [PATCH 21/24] Add Helm C3 executor and dist store caching - Dremio 4.0.0 or later required. - Adds the concept of an imageTag to expose features that are introduced only in newer versions of Dremio. - Removes the dremioVersion value that needs to be manually set to reference the same version that is used by the image. - Adds optional Cloud Cache support. Dist is split between PDFS and cloud storage. Change-Id: Idccb0acbd82a8f3ed50b249599c0b04e93641406 --- charts/dremio/Chart.yaml | 2 +- charts/dremio/README.md | 168 +++++++++++++----- charts/dremio/config/core-site.xml | 163 +++++++++-------- charts/dremio/config/dremio.conf | 68 ++++--- charts/dremio/templates/dremio-admin.yaml | 2 +- .../dremio/templates/dremio-coordinator.yaml | 6 +- charts/dremio/templates/dremio-executor.yaml | 7 +- charts/dremio/templates/dremio-master.yaml | 10 +- charts/dremio/values.yaml | 51 ++++-- 9 files changed, 298 insertions(+), 179 deletions(-) diff --git a/charts/dremio/Chart.yaml b/charts/dremio/Chart.yaml index 0457134e..7a40ce28 100644 --- a/charts/dremio/Chart.yaml +++ b/charts/dremio/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: "v1" name: "dremio" -version: "0.0.7" +version: "0.1.0" keywords: - dremio - data diff --git a/charts/dremio/README.md b/charts/dremio/README.md index 6d8ecd0e..8bab30c8 100644 --- a/charts/dremio/README.md +++ b/charts/dremio/README.md @@ -2,7 +2,10 @@ ## Overview -This is a Helm chart to deploy a Dremio cluster in kubernetes. It uses a persistent volume for the master node to store the metadata for the cluster. The default configuration uses the default persistent storage supported by the kubernetes platform. For example, +This is a Helm chart to deploy a Dremio cluster in kubernetes. It uses +a persistent volume for the master node to store the metadata for the +cluster. The default configuration uses the default persistent storage +supported by the kubernetes platform. For example, | Kubernetes platform | Persistent store | |---------------------|------------------| @@ -11,26 +14,48 @@ This is a Helm chart to deploy a Dremio cluster in kubernetes. It uses a persist | Google GKE | Persistent Disk | | Local K8S on Docker | Hostpath | -If you want to use a different storage class available in your kubernetes environment, add the storageClass in values.yaml. - -An appropriate distributed file store (S3, ADLS, HDFS, etc) should be used for paths.dist as this deployment will lose locally persisted reflections and uploads. You can update config/dremio.conf. Dremio [documentation](https://docs.dremio.com/deployment/distributed-storage.html) provides more information on this. - -This assumes you already have kubernetes cluster setup, kubectl configured to talk to your kubernetes cluster and helm setup in your cluster. Review and update values.yaml to reflect values for your environment before installing the helm chart. This is specially important for for the memory and cpu values - your kubernetes cluster should have sufficient resources to provision the pods with those values. If your kubernetes installation does not support serviceType LoadBalancer, it is recommended to comment the serviceType value in values.yaml file before deploying. +If you want to use a different storage class available in your +kubernetes environment, add the storageClass in values.yaml. + +An appropriate distributed file store (S3, ADLS, HDFS, etc) should be +used for paths.dist as this deployment will lose locally persisted +reflections and uploads. You can update config/dremio.conf. Dremio +[documentation](https://docs.dremio.com/deployment/distributed-storage.html) +provides more information on this. + +This assumes you already have kubernetes cluster setup, kubectl +configured to talk to your kubernetes cluster and helm setup in your +cluster. Review and update values.yaml to reflect values for your +environment before installing the helm chart. This is specially +important for for the memory and cpu values - your kubernetes cluster +should have sufficient resources to provision the pods with those +values. If your kubernetes installation does not support serviceType +LoadBalancer, it is recommended to comment the serviceType value in +values.yaml file before deploying. #### Installing the helm chart -Review charts/dremio/values.yaml and adjust the values as per your requirements. Note that the values for cpu and memory for the coordinator and the executors are set to work with AKS on Azure with worker nodes setup with machine types Standard_E16s_v3. + +Review charts/dremio/values.yaml and adjust the values as per your +requirements. Note that the values for cpu and memory for the +coordinator and the executors are set to work with AKS on Azure with +worker nodes setup with machine types Standard_E16s_v3. Run this from the charts directory + ```bash -cd charts -helm install --wait dremio -``` -If it takes longer than a couple of minutes to complete, check the status of the pods to see where they are waiting. If they are pending scheduling due to limited memory or cpu, either adjust the values in values.yaml and restart the process or add more resources to your kubernetes cluster. +cd charts helm install --wait dremio ``` + +If it takes longer than a couple of minutes to complete, check the +status of the pods to see where they are waiting. If they are pending +scheduling due to limited memory or cpu, either adjust the values in +values.yaml and restart the process or add more resources to your +kubernetes cluster. #### Connect to the Dremio UI -If your kubernetes supports serviceType LoadBalancer, you can get to the Dremio UI on the load balancer external ip. -For example, if your service output is: +If your kubernetes supports serviceType LoadBalancer, you can get to +the Dremio UI on the load balancer external IP. For example, if your +service output is: ```bash kubectl get services dremio-client @@ -38,24 +63,32 @@ NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) dremio-client LoadBalancer 10.99.227.180 35.226.31.211 31010:32260/TCP,9047:30620/TCP 2d ``` -you can get to the Dremio UI using the value under column EXTERNAL-IP: +You can get to the Dremio UI using the value under column EXTERNAL-IP: http://35.226.31.211:9047 -If your kubernetes does not have support of serviceType LoadBalancer, you can access the Dremio UI on the port exposed on the node. For example, if the service output is: +If your kubernetes does not have support of serviceType LoadBalancer, +you can access the Dremio UI on the port exposed on the node. For +example, if the service output is: ```bash kubectl get services dremio-client NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE dremio-client NodePort 10.110.65.97 31010:32390/TCP,9047:30670/TCP 1h ``` -where there is no external ip and the Dremio master is running on node "localhost", you can get to Dremio UI using: -http://localhost:30670 +Where there is no external IP and the Dremio master is running on node +"localhost", you can get to Dremio UI using: +http://localhost:30670 #### Dremio Client Port -The port 31010 is used for ODBC and JDBC connections. You can look up service dremio-client in kubernetes to find the host to use for ODBC or JDBC connections. Depending on your kubernetes cluster supporting serviceType LoadBalancer, you will use the load balancer external-ip or the node on which a coordinator is running. + +The port 31010 is used for ODBC and JDBC connections. You can look up +service dremio-client in kubernetes to find the host to use for ODBC +or JDBC connections. Depending on your kubernetes cluster supporting +serviceType LoadBalancer, you will use the load balancer external-ip +or the node on which a coordinator is running. ```bash kubectl get services dremio-client @@ -63,32 +96,37 @@ NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) dremio-client LoadBalancer 10.99.227.180 35.226.31.211 31010:32260/TCP,9047:30620/TCP 2d ``` -For example, in the above output, the service is exposed on an external-ip. So, you can use 35.226.31.211:31010 in your ODBC or JDBC connections. +For example, in the above output, the service is exposed on an +external-ip. So, you can use 35.226.31.211:31010 in your ODBC or JDBC +connections. #### Viewing logs -Logs are written to the container's console. All the logs - server.log, server.out, server.gc and access.log - are written into the console simultaneously. You can view the logs using kubectl. -``` -kubectl logs -``` -You can also tail the logs using the -f parameter. -``` -kubectl logs -f -``` + +Logs are written to the container's console. All the logs - +server.log, server.out, server.gc and access.log - are written into +the console simultaneously. You can view the logs using kubectl. ``` +kubectl logs ``` You can also tail the logs using the +-f parameter. ``` kubectl logs -f ``` #### Scale by adding additional Coordinators or Executors (optional) -Get the name of the helm release. In the example below, the release name is plundering-alpaca. + +Get the name of the helm release. In the example below, the release +name is plundering-alpaca: + ```bash helm list NAME REVISION UPDATED STATUS CHART NAMESPACE plundering-alpaca 1 Wed Jul 18 09:36:14 2018 DEPLOYED dremio-0.0.5 default ``` -Add additional coordinators +Add additional coordinators: + ```bash helm upgrade dremio --set coordinator.count=3 ``` -Add additional executors +Add additional executors: + ```bash helm upgrade dremio --set executor.count=5 ``` @@ -96,33 +134,44 @@ helm upgrade dremio --set executor.count=5 You can also scale down the same way. ### Running offline dremio-admin commands -Administration commands restore, cleanup and set-password in dremio-admin needs to be run when -the Dremio cluster is not running. So, before running these commands, you need to shutdown -the Dremio cluster. Use the helm delete command to delete the helm release. -(Kubernetes does not delete the persistent store volumes when you delete statefulset pods and -when you install the cluster again using helm, the existing persistent store will be used and -you will get your Dremio cluster running again.) - -After Dremio cluster is shutdown, start the dremio-admin pod using + +Administration commands restore, cleanup and set-password in +dremio-admin needs to be run when the Dremio cluster is not +running. So, before running these commands, you need to shutdown the +Dremio cluster. Use the helm delete command to delete the helm +release. (Kubernetes does not delete the persistent store volumes +when you delete statefulset pods and when you install the cluster +again using helm, the existing persistent store will be used and you +will get your Dremio cluster running again.) + +After Dremio cluster is shutdown, start the dremio-admin pod using: + ```bash helm install --wait dremio --set DremioAdmin=true ``` -Once the pod is running, you can connect to the pod using +Once the pod is running, you can connect to the pod using: + ```bash kubectl exec -it dremio-admin -- bash ``` Now, you have a bash shell from where you can run the dremio-admin commands. -Once you are done, you can delete the helm release for the dremio-admin and start your Dremio cluster. +Once you are done, you can delete the helm release for the +dremio-admin and start your Dremio cluster. #### Upgrading Dremio -You should attempt upgrade when no queries are running on the cluster. Update the Dremio image tag in your values.yaml file. E.g. + +You should attempt upgrade when no queries are running on the +cluster. Update the Dremio image tag in your values.yaml file. E.g: + ```bash image: dremio/dremio-oss:3.0.0 ... ``` -Get the name of the helm release. In the example below, the release name is plundering-alpaca. +Get the name of the helm release. In the example below, the release +name is plundering-alpaca. + ```bash helm list NAME REVISION UPDATED STATUS CHART NAMESPACE @@ -130,20 +179,45 @@ plundering-alpaca 1 Wed Jul 18 09:36:14 2018 DEPLOYED dremio-0.0.5 defaul ``` Upgrade the deployment via helm upgrade command: + ``` helm upgrade . ``` -Existing pods will be terminated and new pods will be created with the new image. You can +Existing pods will be terminated and new pods will be created with the +new image. You can + monitor the status of the pods by running: ``` kubectl get pods ``` -Once all the pods are restarted and running, your Dremio cluster is upgraded. +Once all the pods are restarted and running, your Dremio cluster is +upgraded. #### Customizing Dremio configuration -Dremio configuration files used by the deployment are in the config directory. These files are propagated to all the pods in the cluster. Updating the configuration and upgrading the helm release - just like doing an upgrade - would refresh all the pods with the new configuration. [Dremio documentation](https://docs.dremio.com/deployment/README-config.html) covers the configuration capabilities in Dremio. - -If you need to add a core-site.xml, you can add the file to the config directory and it will be propagated to all the pods on install or upgrade of the deployment. +Dremio configuration files used by the deployment are in the config +directory. These files are propagated to all the pods in the +cluster. Updating the configuration and upgrading the helm release - +just like doing an upgrade - would refresh all the pods with the new +configuration. [Dremio +documentation](https://docs.dremio.com/deployment/README-config.html) +covers the configuration capabilities in Dremio. + +If you need to add a core-site.xml, you can add the file to the config +directory and it will be propagated to all the pods on install or +upgrade of the deployment. + +#### Important Changes + +2019-09-19 (v0.1.0): BREAKING CHANGE. + + Dremio versions before 4.0.0 are no longer supported by this Helm + chart. Dremio image specifier was split into an imageName and + imageTag parts to follow best practices. "dist" value in + dremio.conf moved to cloud storage where possible (otherwise + defaults to pdfs) -- this will lose any previously extant + reflections materialisations, user uploads, scratch files, etc. + Also added Cloud Cache support (new in Dremio 4.0). Please see + values.yaml for details on this new configuration. diff --git a/charts/dremio/config/core-site.xml b/charts/dremio/config/core-site.xml index 8d91d757..3283eb32 100644 --- a/charts/dremio/config/core-site.xml +++ b/charts/dremio/config/core-site.xml @@ -1,80 +1,93 @@ +{{- if and .Values.distStorage.type (ne .Values.distStorage.type "local") }} - {{- if and .Values.distStorage.type (eq .Values.distStorage.type "aws") }} - - fs.s3a.access.key - AWS access key ID. - {{ required "AWS access key required" .Values.distStorage.aws.accessKey}} - - - fs.s3a.secret.key - AWS secret key. - {{ required "AWS secret required" .Values.distStorage.aws.secret}} - - {{- end }} - - {{- if and .Values.distStorage.type (eq .Values.distStorage.type "azure") }} - - - fs.adl.impl - Must be set to org.apache.hadoop.fs.adl.AdlFileSystem - org.apache.hadoop.fs.adl.AdlFileSystem - - - dfs.adls.oauth2.client.id - Application ID of the registered application under Azure Active Directory - {{required "Azure application ID required" .Values.distStorage.azure.applicationId}} - - - dfs.adls.oauth2.credential - Generated password value for the registered application - {{required "Azure secret value required" .Values.distStorage.azure.secret}} - - - dfs.adls.oauth2.refresh.url - Azure Active Directory OAuth 2.0 Token Endpoint for registered applications. - {{required "Azure OAuth2 token endpoint required" .Values.distStorage.azure.oauth2EndPoint}} - - - dfs.adls.oauth2.access.token.provider.type - Must be set to ClientCredential - ClientCredential - - - fs.adl.impl.disable.cache - Only include this property AFTER validating the ADLS connection. - false - - {{- end }} - - {{- if and .Values.dremioVersion (ge .Values.dremioVersion "3.2.0") .Values.distStorage.type (eq .Values.distStorage.type "azureStorage") }} - - fs.dremioAzureStorage.impl - FileSystem implementation. Must always be com.dremio.plugins.azure.AzureStorageFileSystem - com.dremio.plugins.azure.AzureStorageFileSystem - - - dremio.azure.account - The name of the storage account. - {{required "Azure storage account name required" .Values.distStorage.azureStorage.accountName}} - - - dremio.azure.key - The shared access key for the storage account. - {{required "Shared access key for the storage account required" .Values.distStorage.azureStorage.accessKey}} - - - dremio.azure.mode - The storage account type. Value: STORAGE_V2 - STORAGE_V2 - - - dremio.azure.secure - Boolean option to enable SSL connections. Value: True/False - True - - {{- end }} + {{- if eq .Values.distStorage.type "aws" }} + + fs.dremioS3.impl + The FileSystem implementation. Must be set to com.dremio.plugins.s3.store.S3FileSystem + com.dremio.plugins.s3.store.S3FileSystem + + + fs.s3a.aws.credentials.provider + The credential provider type. + org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider + + + fs.s3a.access.key + AWS access key ID. + {{ required "AWS access key required" .Values.distStorage.aws.accessKey}} + + + fs.s3a.secret.key + AWS secret key. + {{ required "AWS secret required" .Values.distStorage.aws.secret}} + + {{- end }} + + {{- if eq .Values.distStorage.type "azure" }} + + + fs.adl.impl + Must be set to org.apache.hadoop.fs.adl.AdlFileSystem + org.apache.hadoop.fs.adl.AdlFileSystem + + + dfs.adls.oauth2.client.id + Application ID of the registered application under Azure Active Directory + {{required "Azure application ID required" .Values.distStorage.azure.applicationId}} + + + dfs.adls.oauth2.credential + Generated password value for the registered application + {{required "Azure secret value required" .Values.distStorage.azure.secret}} + + + dfs.adls.oauth2.refresh.url + Azure Active Directory OAuth 2.0 Token Endpoint for registered applications. + {{required "Azure OAuth2 token endpoint required" .Values.distStorage.azure.oauth2EndPoint}} + + + dfs.adls.oauth2.access.token.provider.type + Must be set to ClientCredential + ClientCredential + + + fs.adl.impl.disable.cache + Only include this property AFTER validating the ADLS connection. + false + + {{- end }} + + {{- if eq .Values.distStorage.type "azureStorage" }} + + fs.dremioAzureStorage.impl + FileSystem implementation. Must always be com.dremio.plugins.azure.AzureStorageFileSystem + com.dremio.plugins.azure.AzureStorageFileSystem + + + dremio.azure.account + The name of the storage account. + {{required "Azure storage account name required" .Values.distStorage.azureStorage.accountName}} + + + dremio.azure.key + The shared access key for the storage account. + {{required "Shared access key for the storage account required" .Values.distStorage.azureStorage.accessKey}} + + + dremio.azure.mode + The storage account type. Value: STORAGE_V2 + STORAGE_V2 + + + dremio.azure.secure + Boolean option to enable SSL connections. Value: True/False + True + + {{- end }} +{{- end }} diff --git a/charts/dremio/config/dremio.conf b/charts/dremio/config/dremio.conf index 12252069..042b7640 100644 --- a/charts/dremio/config/dremio.conf +++ b/charts/dremio/config/dremio.conf @@ -15,33 +15,21 @@ # paths: { - # the local path for dremio to store data. + # Local path for dremio to store data. local: ${DREMIO_HOME}"/data" - - # the distributed path Dremio data including job results, downloads, uploads, etc - #dist: "pdfs://"${paths.local}"/pdfs" - - # If you are editing the uploads value in this file, please delete all the lines starting with double curly braces - {{- if .Values.distStorage.type }} - {{- if and .Values.dremioVersion (lt .Values.dremioVersion "3.2.0") }} - {{- if eq .Values.distStorage.type "aws" }} - uploads: "s3a://{{required "AWS bucketname required" .Values.distStorage.aws.bucketName}}{{required "Path required" .Values.distStorage.aws.path}}" - {{- end }} - {{- if eq .Values.distStorage.type "azure" }} - uploads: "adl://{{required "Azure Datalake store name required" .Values.distStorage.azure.datalakeStoreName}}.azuredatalakestore.net{{required "Path required" .Values.distStorage.azure.path}}" - {{- end }} - {{- else }} # dremio_version > 3.2.0 - {{- if eq .Values.distStorage.type "aws" }} - uploads: "dremioS3://{{required "AWS bucketname required" .Values.distStorage.aws.bucketName}}{{required "Path required" .Values.distStorage.aws.path}}" - {{- end }} - {{- if eq .Values.distStorage.type "azure" }} - uploads: "dremioAdl://{{required "Azure Datalake store name required" .Values.distStorage.azure.datalakeStoreName}}.azuredatalakestore.net{{required "Path required" .Values.distStorage.azure.path}}" - {{- end }} - {{- if eq .Values.distStorage.type "azureStorage" }} - uploads: "dremioAzureStorage://:///{{required "Azure Storage filesystem required" .Values.distStorage.azureStorage.filesystem}}/{{required "Path for uploads required" .Values.distStorage.azureStorage.uploadsPath}}" - accelerator: "dremioAzureStorage://:///{{required "Azure Storage filesystem required" .Values.distStorage.azureStorage.filesystem}}/{{required "Path for uploads required" .Values.distStorage.azureStorage.acceleratorPath}}" - {{- end }} - {{- end }} + # Distributed path Dremio data including job results, downloads, + # uploads, etc + {{- if ne .Values.distStorage.type "local" }} + results: "pdfs://"${paths.local}"/pdfs" + {{- if eq .Values.distStorage.type "aws" }} + dist: "dremioS3:///{{ required "AWS bucketname required" .Values.distStorage.aws.bucketName }}{{ required "Path required" .Values.distStorage.aws.path }}" + {{- else if eq .Values.distStorage.type "azure" }} + dist: "dremioAdl://{{ required "Azure Datalake store name required" .Values.distStorage.azure.datalakeStoreName }}.azuredatalakestore.net{{ required "Path required" .Values.distStorage.azure.path }}" + {{- else if eq .Values.distStorage.type "azureStorage" }} + dist: "dremioAzureStorage://:///{{ required "Azure Storage filesystem required" .Values.distStorage.azureStorage.filesystem }}/{{ required "Path for uploads required" .Values.distStorage.azureStorage.uploadsPath }}" + {{- end }} + {{- else }} + dist: "pdfs://"${paths.local}"/pdfs" {{- end }} } @@ -54,8 +42,30 @@ services: { # executor.enabled: true # # Other service parameters can be customized via this file. + + # Cloud Cache is supported in Dremio 4.0.0+. + {{- if and .Values.executor.cloudCache.enabled (or (ge .Values.imageTag "4.0.0") (eq .Values.imageTag "latest")) }} + executor: { + cache: { + path.db: "/var/lib/dremio", + path.fs: ["/var/lib/dremio"], + pctquota.db: {{ .Values.executor.cloudCache.quota.db_pct }}, + pctquota.fs: [{{ .Values.executor.cloudCache.quota.fs_pct }}] + } + } + {{- end }} } +{{- if and .Values.executor.cloudCache.enabled (ne .Values.distStorage.type "local") }} +debug: { + # Enable caching for distributed storage, it is turned off by default + dist.caching.enabled: true, + # Max percent of total available cache space to use when possible + # for distributed storage + dist.max.cache.space.percent: {{ .Values.executor.cloudCache.quota.cache_pct }} +} +{{- end }} + {{- if .Values.tls.ui.enabled }} services.coordinator.web.ssl.enabled: true services.coordinator.web.ssl.auto-certificate.enabled: false @@ -64,9 +74,9 @@ services.coordinator.web.ssl.keyStore: "/opt/dremio/tls/ui.pkcs12" {{- end }} {{- if .Values.tls.client.enabled }} -# Client endpoint (i.e. ODBC/JDBC) encryption is only supported in Dremio Enterprise Edition. +# Client endpoint (i.e. ODBC/JDBC) encryption is only supported in +# Dremio Enterprise Edition. services.coordinator.client-endpoint.ssl.enabled: true services.coordinator.client-endpoint.ssl.auto-certificate.enabled: false - services.coordinator.client-endpoint.ssl.keyStore: "/opt/dremio/tls/client.pkcs12" -{{- end }} \ No newline at end of file +{{- end }} diff --git a/charts/dremio/templates/dremio-admin.yaml b/charts/dremio/templates/dremio-admin.yaml index 3de91c43..feda9d9b 100644 --- a/charts/dremio/templates/dremio-admin.yaml +++ b/charts/dremio/templates/dremio-admin.yaml @@ -12,7 +12,7 @@ metadata: spec: containers: - name: dremio-admin - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent stdin: true tty: true diff --git a/charts/dremio/templates/dremio-coordinator.yaml b/charts/dremio/templates/dremio-coordinator.yaml index 86d5f2ac..3089c1ad 100644 --- a/charts/dremio/templates/dremio-coordinator.yaml +++ b/charts/dremio/templates/dremio-coordinator.yaml @@ -28,7 +28,7 @@ spec: {{- end }} containers: - name: dremio-coordinator - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent resources: requests: @@ -70,7 +70,7 @@ spec: command: ["sh", "-c", "until nc -z dremio-client {{ .Values.coordinator.web.port | default 9047 }} > /dev/null; do echo waiting for dremio master; sleep 2; done;"] {{- if .Values.tls.ui.enabled }} - name: generate-ui-keystore - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent volumeMounts: - name: dremio-tls @@ -92,7 +92,7 @@ spec: {{- end }} {{- if .Values.tls.client.enabled }} - name: generate-client-keystore - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent volumeMounts: - name: dremio-tls diff --git a/charts/dremio/templates/dremio-executor.yaml b/charts/dremio/templates/dremio-executor.yaml index e511e1d3..150e1fe1 100644 --- a/charts/dremio/templates/dremio-executor.yaml +++ b/charts/dremio/templates/dremio-executor.yaml @@ -28,7 +28,7 @@ spec: {{- end }} containers: - name: dremio-executor - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent resources: requests: @@ -61,9 +61,10 @@ spec: - name: wait-for-zk image: busybox command: ["sh", "-c", "until ping -c 1 -W 1 zk-hs > /dev/null; do echo waiting for zookeeper host; sleep 2; done;"] - # since we're mounting a separate volume, reset permission to dremio uid/gid + # since we're mounting a separate volume, reset permission to + # dremio uid/gid - name: chown-data-directory - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent securityContext: runAsUser: 0 diff --git a/charts/dremio/templates/dremio-master.yaml b/charts/dremio/templates/dremio-master.yaml index f272eb32..51619193 100644 --- a/charts/dremio/templates/dremio-master.yaml +++ b/charts/dremio/templates/dremio-master.yaml @@ -36,7 +36,7 @@ spec: {{- end }} containers: - name: dremio-master-coordinator - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent resources: requests: @@ -92,7 +92,7 @@ spec: command: ["sh", "-c", "until ping -c 1 -W 1 zk-hs > /dev/null; do echo waiting for zookeeper host; sleep 2; done;"] # since we're mounting a separate volume, reset permission to dremio uid/gid - name: chown-data-directory - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent securityContext: runAsUser: 0 @@ -104,7 +104,7 @@ spec: - "dremio:dremio" - "/opt/dremio/data" - name: upgrade-task - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent volumeMounts: - name: dremio-master-volume @@ -114,7 +114,7 @@ spec: - "upgrade" {{- if .Values.tls.ui.enabled }} - name: generate-ui-keystore - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent volumeMounts: - name: dremio-tls @@ -136,7 +136,7 @@ spec: {{- end }} {{- if .Values.tls.client.enabled }} - name: generate-client-keystore - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent volumeMounts: - name: dremio-tls diff --git a/charts/dremio/values.yaml b/charts/dremio/values.yaml index 2601ebba..a8a415fc 100644 --- a/charts/dremio/values.yaml +++ b/charts/dremio/values.yaml @@ -1,7 +1,9 @@ # The image used to build the Dremio cluster. It is recommended to update the # version tag to the version that you are using. This will ensure that all # the pods are using the same version of the software. -image: dremio/dremio-oss:latest +image: dremio/dremio-oss +imageTag: latest + # Check out Dremio documentation for memory and cpu requirements for # the coordinators and the executors. # The value of memory should be in MB. CPU is in no of cores. @@ -22,6 +24,20 @@ executor: cpu: 15 count: 3 volumeSize: 100Gi + cloudCache: + # Requires Dremio version 4.0.0 or later + enabled: false + quota: + # Percentage of the diskspace for the running Kubernetes node + # that can be used for Cloud Cache files. + fs_pct: 70 + # Percentage of that space that can be used for the internal + # Cloud Cache database. + db_pct: 70 + # Percentage of that space that can be used for cacheing + # materialised reflections. This is an upper-bound, not a + # reservation. + cache_pct: 100 zookeeper: memory: 1024 cpu: 0.5 @@ -37,9 +53,10 @@ tls: enabled: false secret: dremio-tls-secret-ui client: - # To enable TLS for the client endpoints, set the enabled flag to true and provide - # the appropriate Kubernetes TLS secret. Client endpoint encryption is available only on - # Dremio Enterprise Edition and should not be enabled otherwise. + # To enable TLS for the client endpoints, set the enabled flag to + # true and provide the appropriate Kubernetes TLS secret. Client + # endpoint encryption is available only on Dremio Enterprise + # Edition and should not be enabled otherwise. enabled: false secret: dremio-tls-secret-client @@ -63,35 +80,39 @@ serviceType: LoadBalancer #storageClass: managed-premium # For private and protected docker image repository, you should store -# the credentials in a kubernetes secret and provide the secret name here. -# For more information, see https://kubernetes.io/docs/concepts/containers/images/#specifying-imagepullsecrets-on-a-pod +# the credentials in a kubernetes secret and provide the secret name +# here. For more information, see +# https://kubernetes.io/docs/concepts/containers/images/#specifying-imagepullsecrets-on-a-pod #imagePullSecrets: secretname -# Target pods to nodes based on labels set on the nodes. -# For more information, see https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector +# Target pods to nodes based on labels set on the nodes. For more +# information, see +# https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector #nodeSelector: # key: value -# Control where uploaded files are stored. -# See https://docs.dremio.com/deployment/distributed-storage.html for more information -dremioVersion: "3.2.0" # Dremio Version 3.2.0 or greater +# Control where uploaded files are stored. See +# https://docs.dremio.com/deployment/distributed-storage.html for more +# information distStorage: - # Valid values are local, aws, azure or azureStorage. aws and azure choice requires additional configuration data. + # Valid values are local, aws, azure or azureStorage. aws and azure + # choice requires additional configuration data. type: "local" - aws: #S3 - used for only uploads + aws: # S3 bucketName: "Your_AWS_bucket_name" path: "/" accessKey: "Your_AWS_Access_Key" secret: "Your_AWS_Secret" - azure: #ADLS v1 - used for only uploads + azure: # ADLS gen1 datalakeStoreName: "Your_Azure_DataLake_Storage_name" path: "/" applicationId: "Your_Azure_Application_Id" secret: "Your_Azure_Secret" oauth2EndPoint: "Azure_OAuth2_Endpoint" - azureStorage: #AzureStorage v2 - supported in Dremio version 3.2.0+ - used for uploads and accelerator + azureStorage: # AzureStorage gen2v2 accountName: "Azure_storage_v2_account_name" accessKey: "Access_key_for_the_storage_account" + filesystem: "Filesystem_in_storage_account" uploadsPath: "Path_for_uploads" acceleratorPath: "Path_for_accelerator" From 45a04846d6d54712bd7eab846351665e66736dfc Mon Sep 17 00:00:00 2001 From: Ryan Tse Date: Fri, 27 Sep 2019 17:45:21 +0000 Subject: [PATCH 22/24] Revert "Add Helm C3 executor and dist store caching" This reverts commit d199978f1eaf648ec014e5e171e481698b6bb554. Change-Id: If484452e8608f2dbdfa2713ca33bb13b4c92c4f0 --- charts/dremio/Chart.yaml | 2 +- charts/dremio/README.md | 168 +++++------------- charts/dremio/config/core-site.xml | 163 ++++++++--------- charts/dremio/config/dremio.conf | 68 +++---- charts/dremio/templates/dremio-admin.yaml | 2 +- .../dremio/templates/dremio-coordinator.yaml | 6 +- charts/dremio/templates/dremio-executor.yaml | 7 +- charts/dremio/templates/dremio-master.yaml | 10 +- charts/dremio/values.yaml | 51 ++---- 9 files changed, 179 insertions(+), 298 deletions(-) diff --git a/charts/dremio/Chart.yaml b/charts/dremio/Chart.yaml index 7a40ce28..0457134e 100644 --- a/charts/dremio/Chart.yaml +++ b/charts/dremio/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: "v1" name: "dremio" -version: "0.1.0" +version: "0.0.7" keywords: - dremio - data diff --git a/charts/dremio/README.md b/charts/dremio/README.md index 8bab30c8..6d8ecd0e 100644 --- a/charts/dremio/README.md +++ b/charts/dremio/README.md @@ -2,10 +2,7 @@ ## Overview -This is a Helm chart to deploy a Dremio cluster in kubernetes. It uses -a persistent volume for the master node to store the metadata for the -cluster. The default configuration uses the default persistent storage -supported by the kubernetes platform. For example, +This is a Helm chart to deploy a Dremio cluster in kubernetes. It uses a persistent volume for the master node to store the metadata for the cluster. The default configuration uses the default persistent storage supported by the kubernetes platform. For example, | Kubernetes platform | Persistent store | |---------------------|------------------| @@ -14,48 +11,26 @@ supported by the kubernetes platform. For example, | Google GKE | Persistent Disk | | Local K8S on Docker | Hostpath | -If you want to use a different storage class available in your -kubernetes environment, add the storageClass in values.yaml. - -An appropriate distributed file store (S3, ADLS, HDFS, etc) should be -used for paths.dist as this deployment will lose locally persisted -reflections and uploads. You can update config/dremio.conf. Dremio -[documentation](https://docs.dremio.com/deployment/distributed-storage.html) -provides more information on this. - -This assumes you already have kubernetes cluster setup, kubectl -configured to talk to your kubernetes cluster and helm setup in your -cluster. Review and update values.yaml to reflect values for your -environment before installing the helm chart. This is specially -important for for the memory and cpu values - your kubernetes cluster -should have sufficient resources to provision the pods with those -values. If your kubernetes installation does not support serviceType -LoadBalancer, it is recommended to comment the serviceType value in -values.yaml file before deploying. +If you want to use a different storage class available in your kubernetes environment, add the storageClass in values.yaml. -#### Installing the helm chart +An appropriate distributed file store (S3, ADLS, HDFS, etc) should be used for paths.dist as this deployment will lose locally persisted reflections and uploads. You can update config/dremio.conf. Dremio [documentation](https://docs.dremio.com/deployment/distributed-storage.html) provides more information on this. -Review charts/dremio/values.yaml and adjust the values as per your -requirements. Note that the values for cpu and memory for the -coordinator and the executors are set to work with AKS on Azure with -worker nodes setup with machine types Standard_E16s_v3. +This assumes you already have kubernetes cluster setup, kubectl configured to talk to your kubernetes cluster and helm setup in your cluster. Review and update values.yaml to reflect values for your environment before installing the helm chart. This is specially important for for the memory and cpu values - your kubernetes cluster should have sufficient resources to provision the pods with those values. If your kubernetes installation does not support serviceType LoadBalancer, it is recommended to comment the serviceType value in values.yaml file before deploying. -Run this from the charts directory +#### Installing the helm chart +Review charts/dremio/values.yaml and adjust the values as per your requirements. Note that the values for cpu and memory for the coordinator and the executors are set to work with AKS on Azure with worker nodes setup with machine types Standard_E16s_v3. +Run this from the charts directory ```bash -cd charts helm install --wait dremio ``` - -If it takes longer than a couple of minutes to complete, check the -status of the pods to see where they are waiting. If they are pending -scheduling due to limited memory or cpu, either adjust the values in -values.yaml and restart the process or add more resources to your -kubernetes cluster. +cd charts +helm install --wait dremio +``` +If it takes longer than a couple of minutes to complete, check the status of the pods to see where they are waiting. If they are pending scheduling due to limited memory or cpu, either adjust the values in values.yaml and restart the process or add more resources to your kubernetes cluster. #### Connect to the Dremio UI +If your kubernetes supports serviceType LoadBalancer, you can get to the Dremio UI on the load balancer external ip. -If your kubernetes supports serviceType LoadBalancer, you can get to -the Dremio UI on the load balancer external IP. For example, if your -service output is: +For example, if your service output is: ```bash kubectl get services dremio-client @@ -63,32 +38,24 @@ NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) dremio-client LoadBalancer 10.99.227.180 35.226.31.211 31010:32260/TCP,9047:30620/TCP 2d ``` -You can get to the Dremio UI using the value under column EXTERNAL-IP: +you can get to the Dremio UI using the value under column EXTERNAL-IP: http://35.226.31.211:9047 -If your kubernetes does not have support of serviceType LoadBalancer, -you can access the Dremio UI on the port exposed on the node. For -example, if the service output is: +If your kubernetes does not have support of serviceType LoadBalancer, you can access the Dremio UI on the port exposed on the node. For example, if the service output is: ```bash kubectl get services dremio-client NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE dremio-client NodePort 10.110.65.97 31010:32390/TCP,9047:30670/TCP 1h ``` - -Where there is no external IP and the Dremio master is running on node -"localhost", you can get to Dremio UI using: +where there is no external ip and the Dremio master is running on node "localhost", you can get to Dremio UI using: http://localhost:30670 -#### Dremio Client Port -The port 31010 is used for ODBC and JDBC connections. You can look up -service dremio-client in kubernetes to find the host to use for ODBC -or JDBC connections. Depending on your kubernetes cluster supporting -serviceType LoadBalancer, you will use the load balancer external-ip -or the node on which a coordinator is running. +#### Dremio Client Port +The port 31010 is used for ODBC and JDBC connections. You can look up service dremio-client in kubernetes to find the host to use for ODBC or JDBC connections. Depending on your kubernetes cluster supporting serviceType LoadBalancer, you will use the load balancer external-ip or the node on which a coordinator is running. ```bash kubectl get services dremio-client @@ -96,37 +63,32 @@ NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) dremio-client LoadBalancer 10.99.227.180 35.226.31.211 31010:32260/TCP,9047:30620/TCP 2d ``` -For example, in the above output, the service is exposed on an -external-ip. So, you can use 35.226.31.211:31010 in your ODBC or JDBC -connections. +For example, in the above output, the service is exposed on an external-ip. So, you can use 35.226.31.211:31010 in your ODBC or JDBC connections. #### Viewing logs - -Logs are written to the container's console. All the logs - -server.log, server.out, server.gc and access.log - are written into -the console simultaneously. You can view the logs using kubectl. ``` -kubectl logs ``` You can also tail the logs using the --f parameter. ``` kubectl logs -f ``` +Logs are written to the container's console. All the logs - server.log, server.out, server.gc and access.log - are written into the console simultaneously. You can view the logs using kubectl. +``` +kubectl logs +``` +You can also tail the logs using the -f parameter. +``` +kubectl logs -f +``` #### Scale by adding additional Coordinators or Executors (optional) - -Get the name of the helm release. In the example below, the release -name is plundering-alpaca: - +Get the name of the helm release. In the example below, the release name is plundering-alpaca. ```bash helm list NAME REVISION UPDATED STATUS CHART NAMESPACE plundering-alpaca 1 Wed Jul 18 09:36:14 2018 DEPLOYED dremio-0.0.5 default ``` -Add additional coordinators: - +Add additional coordinators ```bash helm upgrade dremio --set coordinator.count=3 ``` -Add additional executors: - +Add additional executors ```bash helm upgrade dremio --set executor.count=5 ``` @@ -134,44 +96,33 @@ helm upgrade dremio --set executor.count=5 You can also scale down the same way. ### Running offline dremio-admin commands - -Administration commands restore, cleanup and set-password in -dremio-admin needs to be run when the Dremio cluster is not -running. So, before running these commands, you need to shutdown the -Dremio cluster. Use the helm delete command to delete the helm -release. (Kubernetes does not delete the persistent store volumes -when you delete statefulset pods and when you install the cluster -again using helm, the existing persistent store will be used and you -will get your Dremio cluster running again.) - -After Dremio cluster is shutdown, start the dremio-admin pod using: - +Administration commands restore, cleanup and set-password in dremio-admin needs to be run when +the Dremio cluster is not running. So, before running these commands, you need to shutdown +the Dremio cluster. Use the helm delete command to delete the helm release. +(Kubernetes does not delete the persistent store volumes when you delete statefulset pods and +when you install the cluster again using helm, the existing persistent store will be used and +you will get your Dremio cluster running again.) + +After Dremio cluster is shutdown, start the dremio-admin pod using ```bash helm install --wait dremio --set DremioAdmin=true ``` -Once the pod is running, you can connect to the pod using: - +Once the pod is running, you can connect to the pod using ```bash kubectl exec -it dremio-admin -- bash ``` Now, you have a bash shell from where you can run the dremio-admin commands. -Once you are done, you can delete the helm release for the -dremio-admin and start your Dremio cluster. +Once you are done, you can delete the helm release for the dremio-admin and start your Dremio cluster. #### Upgrading Dremio - -You should attempt upgrade when no queries are running on the -cluster. Update the Dremio image tag in your values.yaml file. E.g: - +You should attempt upgrade when no queries are running on the cluster. Update the Dremio image tag in your values.yaml file. E.g. ```bash image: dremio/dremio-oss:3.0.0 ... ``` -Get the name of the helm release. In the example below, the release -name is plundering-alpaca. - +Get the name of the helm release. In the example below, the release name is plundering-alpaca. ```bash helm list NAME REVISION UPDATED STATUS CHART NAMESPACE @@ -179,45 +130,20 @@ plundering-alpaca 1 Wed Jul 18 09:36:14 2018 DEPLOYED dremio-0.0.5 defaul ``` Upgrade the deployment via helm upgrade command: - ``` helm upgrade . ``` -Existing pods will be terminated and new pods will be created with the -new image. You can - +Existing pods will be terminated and new pods will be created with the new image. You can monitor the status of the pods by running: ``` kubectl get pods ``` -Once all the pods are restarted and running, your Dremio cluster is -upgraded. +Once all the pods are restarted and running, your Dremio cluster is upgraded. #### Customizing Dremio configuration -Dremio configuration files used by the deployment are in the config -directory. These files are propagated to all the pods in the -cluster. Updating the configuration and upgrading the helm release - -just like doing an upgrade - would refresh all the pods with the new -configuration. [Dremio -documentation](https://docs.dremio.com/deployment/README-config.html) -covers the configuration capabilities in Dremio. - -If you need to add a core-site.xml, you can add the file to the config -directory and it will be propagated to all the pods on install or -upgrade of the deployment. - -#### Important Changes - -2019-09-19 (v0.1.0): BREAKING CHANGE. - - Dremio versions before 4.0.0 are no longer supported by this Helm - chart. Dremio image specifier was split into an imageName and - imageTag parts to follow best practices. "dist" value in - dremio.conf moved to cloud storage where possible (otherwise - defaults to pdfs) -- this will lose any previously extant - reflections materialisations, user uploads, scratch files, etc. - Also added Cloud Cache support (new in Dremio 4.0). Please see - values.yaml for details on this new configuration. +Dremio configuration files used by the deployment are in the config directory. These files are propagated to all the pods in the cluster. Updating the configuration and upgrading the helm release - just like doing an upgrade - would refresh all the pods with the new configuration. [Dremio documentation](https://docs.dremio.com/deployment/README-config.html) covers the configuration capabilities in Dremio. + +If you need to add a core-site.xml, you can add the file to the config directory and it will be propagated to all the pods on install or upgrade of the deployment. diff --git a/charts/dremio/config/core-site.xml b/charts/dremio/config/core-site.xml index 3283eb32..8d91d757 100644 --- a/charts/dremio/config/core-site.xml +++ b/charts/dremio/config/core-site.xml @@ -1,93 +1,80 @@ -{{- if and .Values.distStorage.type (ne .Values.distStorage.type "local") }} - {{- if eq .Values.distStorage.type "aws" }} - - fs.dremioS3.impl - The FileSystem implementation. Must be set to com.dremio.plugins.s3.store.S3FileSystem - com.dremio.plugins.s3.store.S3FileSystem - - - fs.s3a.aws.credentials.provider - The credential provider type. - org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider - - - fs.s3a.access.key - AWS access key ID. - {{ required "AWS access key required" .Values.distStorage.aws.accessKey}} - - - fs.s3a.secret.key - AWS secret key. - {{ required "AWS secret required" .Values.distStorage.aws.secret}} - - {{- end }} - - {{- if eq .Values.distStorage.type "azure" }} - - - fs.adl.impl - Must be set to org.apache.hadoop.fs.adl.AdlFileSystem - org.apache.hadoop.fs.adl.AdlFileSystem - - - dfs.adls.oauth2.client.id - Application ID of the registered application under Azure Active Directory - {{required "Azure application ID required" .Values.distStorage.azure.applicationId}} - - - dfs.adls.oauth2.credential - Generated password value for the registered application - {{required "Azure secret value required" .Values.distStorage.azure.secret}} - - - dfs.adls.oauth2.refresh.url - Azure Active Directory OAuth 2.0 Token Endpoint for registered applications. - {{required "Azure OAuth2 token endpoint required" .Values.distStorage.azure.oauth2EndPoint}} - - - dfs.adls.oauth2.access.token.provider.type - Must be set to ClientCredential - ClientCredential - - - fs.adl.impl.disable.cache - Only include this property AFTER validating the ADLS connection. - false - - {{- end }} - - {{- if eq .Values.distStorage.type "azureStorage" }} - - fs.dremioAzureStorage.impl - FileSystem implementation. Must always be com.dremio.plugins.azure.AzureStorageFileSystem - com.dremio.plugins.azure.AzureStorageFileSystem - - - dremio.azure.account - The name of the storage account. - {{required "Azure storage account name required" .Values.distStorage.azureStorage.accountName}} - - - dremio.azure.key - The shared access key for the storage account. - {{required "Shared access key for the storage account required" .Values.distStorage.azureStorage.accessKey}} - - - dremio.azure.mode - The storage account type. Value: STORAGE_V2 - STORAGE_V2 - - - dremio.azure.secure - Boolean option to enable SSL connections. Value: True/False - True - - {{- end }} -{{- end }} + {{- if and .Values.distStorage.type (eq .Values.distStorage.type "aws") }} + + fs.s3a.access.key + AWS access key ID. + {{ required "AWS access key required" .Values.distStorage.aws.accessKey}} + + + fs.s3a.secret.key + AWS secret key. + {{ required "AWS secret required" .Values.distStorage.aws.secret}} + + {{- end }} + + {{- if and .Values.distStorage.type (eq .Values.distStorage.type "azure") }} + + + fs.adl.impl + Must be set to org.apache.hadoop.fs.adl.AdlFileSystem + org.apache.hadoop.fs.adl.AdlFileSystem + + + dfs.adls.oauth2.client.id + Application ID of the registered application under Azure Active Directory + {{required "Azure application ID required" .Values.distStorage.azure.applicationId}} + + + dfs.adls.oauth2.credential + Generated password value for the registered application + {{required "Azure secret value required" .Values.distStorage.azure.secret}} + + + dfs.adls.oauth2.refresh.url + Azure Active Directory OAuth 2.0 Token Endpoint for registered applications. + {{required "Azure OAuth2 token endpoint required" .Values.distStorage.azure.oauth2EndPoint}} + + + dfs.adls.oauth2.access.token.provider.type + Must be set to ClientCredential + ClientCredential + + + fs.adl.impl.disable.cache + Only include this property AFTER validating the ADLS connection. + false + + {{- end }} + + {{- if and .Values.dremioVersion (ge .Values.dremioVersion "3.2.0") .Values.distStorage.type (eq .Values.distStorage.type "azureStorage") }} + + fs.dremioAzureStorage.impl + FileSystem implementation. Must always be com.dremio.plugins.azure.AzureStorageFileSystem + com.dremio.plugins.azure.AzureStorageFileSystem + + + dremio.azure.account + The name of the storage account. + {{required "Azure storage account name required" .Values.distStorage.azureStorage.accountName}} + + + dremio.azure.key + The shared access key for the storage account. + {{required "Shared access key for the storage account required" .Values.distStorage.azureStorage.accessKey}} + + + dremio.azure.mode + The storage account type. Value: STORAGE_V2 + STORAGE_V2 + + + dremio.azure.secure + Boolean option to enable SSL connections. Value: True/False + True + + {{- end }} diff --git a/charts/dremio/config/dremio.conf b/charts/dremio/config/dremio.conf index 042b7640..12252069 100644 --- a/charts/dremio/config/dremio.conf +++ b/charts/dremio/config/dremio.conf @@ -15,21 +15,33 @@ # paths: { - # Local path for dremio to store data. + # the local path for dremio to store data. local: ${DREMIO_HOME}"/data" - # Distributed path Dremio data including job results, downloads, - # uploads, etc - {{- if ne .Values.distStorage.type "local" }} - results: "pdfs://"${paths.local}"/pdfs" - {{- if eq .Values.distStorage.type "aws" }} - dist: "dremioS3:///{{ required "AWS bucketname required" .Values.distStorage.aws.bucketName }}{{ required "Path required" .Values.distStorage.aws.path }}" - {{- else if eq .Values.distStorage.type "azure" }} - dist: "dremioAdl://{{ required "Azure Datalake store name required" .Values.distStorage.azure.datalakeStoreName }}.azuredatalakestore.net{{ required "Path required" .Values.distStorage.azure.path }}" - {{- else if eq .Values.distStorage.type "azureStorage" }} - dist: "dremioAzureStorage://:///{{ required "Azure Storage filesystem required" .Values.distStorage.azureStorage.filesystem }}/{{ required "Path for uploads required" .Values.distStorage.azureStorage.uploadsPath }}" - {{- end }} - {{- else }} - dist: "pdfs://"${paths.local}"/pdfs" + + # the distributed path Dremio data including job results, downloads, uploads, etc + #dist: "pdfs://"${paths.local}"/pdfs" + + # If you are editing the uploads value in this file, please delete all the lines starting with double curly braces + {{- if .Values.distStorage.type }} + {{- if and .Values.dremioVersion (lt .Values.dremioVersion "3.2.0") }} + {{- if eq .Values.distStorage.type "aws" }} + uploads: "s3a://{{required "AWS bucketname required" .Values.distStorage.aws.bucketName}}{{required "Path required" .Values.distStorage.aws.path}}" + {{- end }} + {{- if eq .Values.distStorage.type "azure" }} + uploads: "adl://{{required "Azure Datalake store name required" .Values.distStorage.azure.datalakeStoreName}}.azuredatalakestore.net{{required "Path required" .Values.distStorage.azure.path}}" + {{- end }} + {{- else }} # dremio_version > 3.2.0 + {{- if eq .Values.distStorage.type "aws" }} + uploads: "dremioS3://{{required "AWS bucketname required" .Values.distStorage.aws.bucketName}}{{required "Path required" .Values.distStorage.aws.path}}" + {{- end }} + {{- if eq .Values.distStorage.type "azure" }} + uploads: "dremioAdl://{{required "Azure Datalake store name required" .Values.distStorage.azure.datalakeStoreName}}.azuredatalakestore.net{{required "Path required" .Values.distStorage.azure.path}}" + {{- end }} + {{- if eq .Values.distStorage.type "azureStorage" }} + uploads: "dremioAzureStorage://:///{{required "Azure Storage filesystem required" .Values.distStorage.azureStorage.filesystem}}/{{required "Path for uploads required" .Values.distStorage.azureStorage.uploadsPath}}" + accelerator: "dremioAzureStorage://:///{{required "Azure Storage filesystem required" .Values.distStorage.azureStorage.filesystem}}/{{required "Path for uploads required" .Values.distStorage.azureStorage.acceleratorPath}}" + {{- end }} + {{- end }} {{- end }} } @@ -42,30 +54,8 @@ services: { # executor.enabled: true # # Other service parameters can be customized via this file. - - # Cloud Cache is supported in Dremio 4.0.0+. - {{- if and .Values.executor.cloudCache.enabled (or (ge .Values.imageTag "4.0.0") (eq .Values.imageTag "latest")) }} - executor: { - cache: { - path.db: "/var/lib/dremio", - path.fs: ["/var/lib/dremio"], - pctquota.db: {{ .Values.executor.cloudCache.quota.db_pct }}, - pctquota.fs: [{{ .Values.executor.cloudCache.quota.fs_pct }}] - } - } - {{- end }} } -{{- if and .Values.executor.cloudCache.enabled (ne .Values.distStorage.type "local") }} -debug: { - # Enable caching for distributed storage, it is turned off by default - dist.caching.enabled: true, - # Max percent of total available cache space to use when possible - # for distributed storage - dist.max.cache.space.percent: {{ .Values.executor.cloudCache.quota.cache_pct }} -} -{{- end }} - {{- if .Values.tls.ui.enabled }} services.coordinator.web.ssl.enabled: true services.coordinator.web.ssl.auto-certificate.enabled: false @@ -74,9 +64,9 @@ services.coordinator.web.ssl.keyStore: "/opt/dremio/tls/ui.pkcs12" {{- end }} {{- if .Values.tls.client.enabled }} -# Client endpoint (i.e. ODBC/JDBC) encryption is only supported in -# Dremio Enterprise Edition. +# Client endpoint (i.e. ODBC/JDBC) encryption is only supported in Dremio Enterprise Edition. services.coordinator.client-endpoint.ssl.enabled: true services.coordinator.client-endpoint.ssl.auto-certificate.enabled: false + services.coordinator.client-endpoint.ssl.keyStore: "/opt/dremio/tls/client.pkcs12" -{{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/dremio/templates/dremio-admin.yaml b/charts/dremio/templates/dremio-admin.yaml index feda9d9b..3de91c43 100644 --- a/charts/dremio/templates/dremio-admin.yaml +++ b/charts/dremio/templates/dremio-admin.yaml @@ -12,7 +12,7 @@ metadata: spec: containers: - name: dremio-admin - image: {{.Values.image}}:{{.Values.imageTag}} + image: {{.Values.image}} imagePullPolicy: IfNotPresent stdin: true tty: true diff --git a/charts/dremio/templates/dremio-coordinator.yaml b/charts/dremio/templates/dremio-coordinator.yaml index 3089c1ad..86d5f2ac 100644 --- a/charts/dremio/templates/dremio-coordinator.yaml +++ b/charts/dremio/templates/dremio-coordinator.yaml @@ -28,7 +28,7 @@ spec: {{- end }} containers: - name: dremio-coordinator - image: {{.Values.image}}:{{.Values.imageTag}} + image: {{.Values.image}} imagePullPolicy: IfNotPresent resources: requests: @@ -70,7 +70,7 @@ spec: command: ["sh", "-c", "until nc -z dremio-client {{ .Values.coordinator.web.port | default 9047 }} > /dev/null; do echo waiting for dremio master; sleep 2; done;"] {{- if .Values.tls.ui.enabled }} - name: generate-ui-keystore - image: {{.Values.image}}:{{.Values.imageTag}} + image: {{.Values.image}} imagePullPolicy: IfNotPresent volumeMounts: - name: dremio-tls @@ -92,7 +92,7 @@ spec: {{- end }} {{- if .Values.tls.client.enabled }} - name: generate-client-keystore - image: {{.Values.image}}:{{.Values.imageTag}} + image: {{.Values.image}} imagePullPolicy: IfNotPresent volumeMounts: - name: dremio-tls diff --git a/charts/dremio/templates/dremio-executor.yaml b/charts/dremio/templates/dremio-executor.yaml index 150e1fe1..e511e1d3 100644 --- a/charts/dremio/templates/dremio-executor.yaml +++ b/charts/dremio/templates/dremio-executor.yaml @@ -28,7 +28,7 @@ spec: {{- end }} containers: - name: dremio-executor - image: {{.Values.image}}:{{.Values.imageTag}} + image: {{.Values.image}} imagePullPolicy: IfNotPresent resources: requests: @@ -61,10 +61,9 @@ spec: - name: wait-for-zk image: busybox command: ["sh", "-c", "until ping -c 1 -W 1 zk-hs > /dev/null; do echo waiting for zookeeper host; sleep 2; done;"] - # since we're mounting a separate volume, reset permission to - # dremio uid/gid + # since we're mounting a separate volume, reset permission to dremio uid/gid - name: chown-data-directory - image: {{.Values.image}}:{{.Values.imageTag}} + image: {{.Values.image}} imagePullPolicy: IfNotPresent securityContext: runAsUser: 0 diff --git a/charts/dremio/templates/dremio-master.yaml b/charts/dremio/templates/dremio-master.yaml index 51619193..f272eb32 100644 --- a/charts/dremio/templates/dremio-master.yaml +++ b/charts/dremio/templates/dremio-master.yaml @@ -36,7 +36,7 @@ spec: {{- end }} containers: - name: dremio-master-coordinator - image: {{.Values.image}}:{{.Values.imageTag}} + image: {{.Values.image}} imagePullPolicy: IfNotPresent resources: requests: @@ -92,7 +92,7 @@ spec: command: ["sh", "-c", "until ping -c 1 -W 1 zk-hs > /dev/null; do echo waiting for zookeeper host; sleep 2; done;"] # since we're mounting a separate volume, reset permission to dremio uid/gid - name: chown-data-directory - image: {{.Values.image}}:{{.Values.imageTag}} + image: {{.Values.image}} imagePullPolicy: IfNotPresent securityContext: runAsUser: 0 @@ -104,7 +104,7 @@ spec: - "dremio:dremio" - "/opt/dremio/data" - name: upgrade-task - image: {{.Values.image}}:{{.Values.imageTag}} + image: {{.Values.image}} imagePullPolicy: IfNotPresent volumeMounts: - name: dremio-master-volume @@ -114,7 +114,7 @@ spec: - "upgrade" {{- if .Values.tls.ui.enabled }} - name: generate-ui-keystore - image: {{.Values.image}}:{{.Values.imageTag}} + image: {{.Values.image}} imagePullPolicy: IfNotPresent volumeMounts: - name: dremio-tls @@ -136,7 +136,7 @@ spec: {{- end }} {{- if .Values.tls.client.enabled }} - name: generate-client-keystore - image: {{.Values.image}}:{{.Values.imageTag}} + image: {{.Values.image}} imagePullPolicy: IfNotPresent volumeMounts: - name: dremio-tls diff --git a/charts/dremio/values.yaml b/charts/dremio/values.yaml index a8a415fc..2601ebba 100644 --- a/charts/dremio/values.yaml +++ b/charts/dremio/values.yaml @@ -1,9 +1,7 @@ # The image used to build the Dremio cluster. It is recommended to update the # version tag to the version that you are using. This will ensure that all # the pods are using the same version of the software. -image: dremio/dremio-oss -imageTag: latest - +image: dremio/dremio-oss:latest # Check out Dremio documentation for memory and cpu requirements for # the coordinators and the executors. # The value of memory should be in MB. CPU is in no of cores. @@ -24,20 +22,6 @@ executor: cpu: 15 count: 3 volumeSize: 100Gi - cloudCache: - # Requires Dremio version 4.0.0 or later - enabled: false - quota: - # Percentage of the diskspace for the running Kubernetes node - # that can be used for Cloud Cache files. - fs_pct: 70 - # Percentage of that space that can be used for the internal - # Cloud Cache database. - db_pct: 70 - # Percentage of that space that can be used for cacheing - # materialised reflections. This is an upper-bound, not a - # reservation. - cache_pct: 100 zookeeper: memory: 1024 cpu: 0.5 @@ -53,10 +37,9 @@ tls: enabled: false secret: dremio-tls-secret-ui client: - # To enable TLS for the client endpoints, set the enabled flag to - # true and provide the appropriate Kubernetes TLS secret. Client - # endpoint encryption is available only on Dremio Enterprise - # Edition and should not be enabled otherwise. + # To enable TLS for the client endpoints, set the enabled flag to true and provide + # the appropriate Kubernetes TLS secret. Client endpoint encryption is available only on + # Dremio Enterprise Edition and should not be enabled otherwise. enabled: false secret: dremio-tls-secret-client @@ -80,39 +63,35 @@ serviceType: LoadBalancer #storageClass: managed-premium # For private and protected docker image repository, you should store -# the credentials in a kubernetes secret and provide the secret name -# here. For more information, see -# https://kubernetes.io/docs/concepts/containers/images/#specifying-imagepullsecrets-on-a-pod +# the credentials in a kubernetes secret and provide the secret name here. +# For more information, see https://kubernetes.io/docs/concepts/containers/images/#specifying-imagepullsecrets-on-a-pod #imagePullSecrets: secretname -# Target pods to nodes based on labels set on the nodes. For more -# information, see -# https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector +# Target pods to nodes based on labels set on the nodes. +# For more information, see https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector #nodeSelector: # key: value -# Control where uploaded files are stored. See -# https://docs.dremio.com/deployment/distributed-storage.html for more -# information +# Control where uploaded files are stored. +# See https://docs.dremio.com/deployment/distributed-storage.html for more information +dremioVersion: "3.2.0" # Dremio Version 3.2.0 or greater distStorage: - # Valid values are local, aws, azure or azureStorage. aws and azure - # choice requires additional configuration data. + # Valid values are local, aws, azure or azureStorage. aws and azure choice requires additional configuration data. type: "local" - aws: # S3 + aws: #S3 - used for only uploads bucketName: "Your_AWS_bucket_name" path: "/" accessKey: "Your_AWS_Access_Key" secret: "Your_AWS_Secret" - azure: # ADLS gen1 + azure: #ADLS v1 - used for only uploads datalakeStoreName: "Your_Azure_DataLake_Storage_name" path: "/" applicationId: "Your_Azure_Application_Id" secret: "Your_Azure_Secret" oauth2EndPoint: "Azure_OAuth2_Endpoint" - azureStorage: # AzureStorage gen2v2 + azureStorage: #AzureStorage v2 - supported in Dremio version 3.2.0+ - used for uploads and accelerator accountName: "Azure_storage_v2_account_name" accessKey: "Access_key_for_the_storage_account" - filesystem: "Filesystem_in_storage_account" uploadsPath: "Path_for_uploads" acceleratorPath: "Path_for_accelerator" From c8124e9cc68ba49b30c85147f7059ed2fe9843c2 Mon Sep 17 00:00:00 2001 From: Ryan Tse Date: Fri, 27 Sep 2019 13:53:55 -0700 Subject: [PATCH 23/24] DX-18737: Add Helm C3 executor and dist store caching - Dremio 4.0.0 or later required. - Adds the concept of an imageTag to expose features that are introduced only in newer versions of Dremio. - Removes the dremioVersion value that needs to be manually set to reference the same version that is used by the image. - Adds optional Cloud Cache support. Dist is split between PDFS and cloud storage. Change-Id: I645c53bb772c0d52362052ef77925c08b30cc494 --- charts/dremio/Chart.yaml | 2 +- charts/dremio/README.md | 168 +++++++++++++----- charts/dremio/config/core-site.xml | 163 +++++++++-------- charts/dremio/config/dremio.conf | 68 ++++--- charts/dremio/templates/dremio-admin.yaml | 2 +- .../dremio/templates/dremio-coordinator.yaml | 6 +- charts/dremio/templates/dremio-executor.yaml | 7 +- charts/dremio/templates/dremio-master.yaml | 10 +- charts/dremio/values.yaml | 54 ++++-- 9 files changed, 299 insertions(+), 181 deletions(-) diff --git a/charts/dremio/Chart.yaml b/charts/dremio/Chart.yaml index 0457134e..7a40ce28 100644 --- a/charts/dremio/Chart.yaml +++ b/charts/dremio/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: "v1" name: "dremio" -version: "0.0.7" +version: "0.1.0" keywords: - dremio - data diff --git a/charts/dremio/README.md b/charts/dremio/README.md index 6d8ecd0e..8bab30c8 100644 --- a/charts/dremio/README.md +++ b/charts/dremio/README.md @@ -2,7 +2,10 @@ ## Overview -This is a Helm chart to deploy a Dremio cluster in kubernetes. It uses a persistent volume for the master node to store the metadata for the cluster. The default configuration uses the default persistent storage supported by the kubernetes platform. For example, +This is a Helm chart to deploy a Dremio cluster in kubernetes. It uses +a persistent volume for the master node to store the metadata for the +cluster. The default configuration uses the default persistent storage +supported by the kubernetes platform. For example, | Kubernetes platform | Persistent store | |---------------------|------------------| @@ -11,26 +14,48 @@ This is a Helm chart to deploy a Dremio cluster in kubernetes. It uses a persist | Google GKE | Persistent Disk | | Local K8S on Docker | Hostpath | -If you want to use a different storage class available in your kubernetes environment, add the storageClass in values.yaml. - -An appropriate distributed file store (S3, ADLS, HDFS, etc) should be used for paths.dist as this deployment will lose locally persisted reflections and uploads. You can update config/dremio.conf. Dremio [documentation](https://docs.dremio.com/deployment/distributed-storage.html) provides more information on this. - -This assumes you already have kubernetes cluster setup, kubectl configured to talk to your kubernetes cluster and helm setup in your cluster. Review and update values.yaml to reflect values for your environment before installing the helm chart. This is specially important for for the memory and cpu values - your kubernetes cluster should have sufficient resources to provision the pods with those values. If your kubernetes installation does not support serviceType LoadBalancer, it is recommended to comment the serviceType value in values.yaml file before deploying. +If you want to use a different storage class available in your +kubernetes environment, add the storageClass in values.yaml. + +An appropriate distributed file store (S3, ADLS, HDFS, etc) should be +used for paths.dist as this deployment will lose locally persisted +reflections and uploads. You can update config/dremio.conf. Dremio +[documentation](https://docs.dremio.com/deployment/distributed-storage.html) +provides more information on this. + +This assumes you already have kubernetes cluster setup, kubectl +configured to talk to your kubernetes cluster and helm setup in your +cluster. Review and update values.yaml to reflect values for your +environment before installing the helm chart. This is specially +important for for the memory and cpu values - your kubernetes cluster +should have sufficient resources to provision the pods with those +values. If your kubernetes installation does not support serviceType +LoadBalancer, it is recommended to comment the serviceType value in +values.yaml file before deploying. #### Installing the helm chart -Review charts/dremio/values.yaml and adjust the values as per your requirements. Note that the values for cpu and memory for the coordinator and the executors are set to work with AKS on Azure with worker nodes setup with machine types Standard_E16s_v3. + +Review charts/dremio/values.yaml and adjust the values as per your +requirements. Note that the values for cpu and memory for the +coordinator and the executors are set to work with AKS on Azure with +worker nodes setup with machine types Standard_E16s_v3. Run this from the charts directory + ```bash -cd charts -helm install --wait dremio -``` -If it takes longer than a couple of minutes to complete, check the status of the pods to see where they are waiting. If they are pending scheduling due to limited memory or cpu, either adjust the values in values.yaml and restart the process or add more resources to your kubernetes cluster. +cd charts helm install --wait dremio ``` + +If it takes longer than a couple of minutes to complete, check the +status of the pods to see where they are waiting. If they are pending +scheduling due to limited memory or cpu, either adjust the values in +values.yaml and restart the process or add more resources to your +kubernetes cluster. #### Connect to the Dremio UI -If your kubernetes supports serviceType LoadBalancer, you can get to the Dremio UI on the load balancer external ip. -For example, if your service output is: +If your kubernetes supports serviceType LoadBalancer, you can get to +the Dremio UI on the load balancer external IP. For example, if your +service output is: ```bash kubectl get services dremio-client @@ -38,24 +63,32 @@ NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) dremio-client LoadBalancer 10.99.227.180 35.226.31.211 31010:32260/TCP,9047:30620/TCP 2d ``` -you can get to the Dremio UI using the value under column EXTERNAL-IP: +You can get to the Dremio UI using the value under column EXTERNAL-IP: http://35.226.31.211:9047 -If your kubernetes does not have support of serviceType LoadBalancer, you can access the Dremio UI on the port exposed on the node. For example, if the service output is: +If your kubernetes does not have support of serviceType LoadBalancer, +you can access the Dremio UI on the port exposed on the node. For +example, if the service output is: ```bash kubectl get services dremio-client NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE dremio-client NodePort 10.110.65.97 31010:32390/TCP,9047:30670/TCP 1h ``` -where there is no external ip and the Dremio master is running on node "localhost", you can get to Dremio UI using: -http://localhost:30670 +Where there is no external IP and the Dremio master is running on node +"localhost", you can get to Dremio UI using: +http://localhost:30670 #### Dremio Client Port -The port 31010 is used for ODBC and JDBC connections. You can look up service dremio-client in kubernetes to find the host to use for ODBC or JDBC connections. Depending on your kubernetes cluster supporting serviceType LoadBalancer, you will use the load balancer external-ip or the node on which a coordinator is running. + +The port 31010 is used for ODBC and JDBC connections. You can look up +service dremio-client in kubernetes to find the host to use for ODBC +or JDBC connections. Depending on your kubernetes cluster supporting +serviceType LoadBalancer, you will use the load balancer external-ip +or the node on which a coordinator is running. ```bash kubectl get services dremio-client @@ -63,32 +96,37 @@ NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) dremio-client LoadBalancer 10.99.227.180 35.226.31.211 31010:32260/TCP,9047:30620/TCP 2d ``` -For example, in the above output, the service is exposed on an external-ip. So, you can use 35.226.31.211:31010 in your ODBC or JDBC connections. +For example, in the above output, the service is exposed on an +external-ip. So, you can use 35.226.31.211:31010 in your ODBC or JDBC +connections. #### Viewing logs -Logs are written to the container's console. All the logs - server.log, server.out, server.gc and access.log - are written into the console simultaneously. You can view the logs using kubectl. -``` -kubectl logs -``` -You can also tail the logs using the -f parameter. -``` -kubectl logs -f -``` + +Logs are written to the container's console. All the logs - +server.log, server.out, server.gc and access.log - are written into +the console simultaneously. You can view the logs using kubectl. ``` +kubectl logs ``` You can also tail the logs using the +-f parameter. ``` kubectl logs -f ``` #### Scale by adding additional Coordinators or Executors (optional) -Get the name of the helm release. In the example below, the release name is plundering-alpaca. + +Get the name of the helm release. In the example below, the release +name is plundering-alpaca: + ```bash helm list NAME REVISION UPDATED STATUS CHART NAMESPACE plundering-alpaca 1 Wed Jul 18 09:36:14 2018 DEPLOYED dremio-0.0.5 default ``` -Add additional coordinators +Add additional coordinators: + ```bash helm upgrade dremio --set coordinator.count=3 ``` -Add additional executors +Add additional executors: + ```bash helm upgrade dremio --set executor.count=5 ``` @@ -96,33 +134,44 @@ helm upgrade dremio --set executor.count=5 You can also scale down the same way. ### Running offline dremio-admin commands -Administration commands restore, cleanup and set-password in dremio-admin needs to be run when -the Dremio cluster is not running. So, before running these commands, you need to shutdown -the Dremio cluster. Use the helm delete command to delete the helm release. -(Kubernetes does not delete the persistent store volumes when you delete statefulset pods and -when you install the cluster again using helm, the existing persistent store will be used and -you will get your Dremio cluster running again.) - -After Dremio cluster is shutdown, start the dremio-admin pod using + +Administration commands restore, cleanup and set-password in +dremio-admin needs to be run when the Dremio cluster is not +running. So, before running these commands, you need to shutdown the +Dremio cluster. Use the helm delete command to delete the helm +release. (Kubernetes does not delete the persistent store volumes +when you delete statefulset pods and when you install the cluster +again using helm, the existing persistent store will be used and you +will get your Dremio cluster running again.) + +After Dremio cluster is shutdown, start the dremio-admin pod using: + ```bash helm install --wait dremio --set DremioAdmin=true ``` -Once the pod is running, you can connect to the pod using +Once the pod is running, you can connect to the pod using: + ```bash kubectl exec -it dremio-admin -- bash ``` Now, you have a bash shell from where you can run the dremio-admin commands. -Once you are done, you can delete the helm release for the dremio-admin and start your Dremio cluster. +Once you are done, you can delete the helm release for the +dremio-admin and start your Dremio cluster. #### Upgrading Dremio -You should attempt upgrade when no queries are running on the cluster. Update the Dremio image tag in your values.yaml file. E.g. + +You should attempt upgrade when no queries are running on the +cluster. Update the Dremio image tag in your values.yaml file. E.g: + ```bash image: dremio/dremio-oss:3.0.0 ... ``` -Get the name of the helm release. In the example below, the release name is plundering-alpaca. +Get the name of the helm release. In the example below, the release +name is plundering-alpaca. + ```bash helm list NAME REVISION UPDATED STATUS CHART NAMESPACE @@ -130,20 +179,45 @@ plundering-alpaca 1 Wed Jul 18 09:36:14 2018 DEPLOYED dremio-0.0.5 defaul ``` Upgrade the deployment via helm upgrade command: + ``` helm upgrade . ``` -Existing pods will be terminated and new pods will be created with the new image. You can +Existing pods will be terminated and new pods will be created with the +new image. You can + monitor the status of the pods by running: ``` kubectl get pods ``` -Once all the pods are restarted and running, your Dremio cluster is upgraded. +Once all the pods are restarted and running, your Dremio cluster is +upgraded. #### Customizing Dremio configuration -Dremio configuration files used by the deployment are in the config directory. These files are propagated to all the pods in the cluster. Updating the configuration and upgrading the helm release - just like doing an upgrade - would refresh all the pods with the new configuration. [Dremio documentation](https://docs.dremio.com/deployment/README-config.html) covers the configuration capabilities in Dremio. - -If you need to add a core-site.xml, you can add the file to the config directory and it will be propagated to all the pods on install or upgrade of the deployment. +Dremio configuration files used by the deployment are in the config +directory. These files are propagated to all the pods in the +cluster. Updating the configuration and upgrading the helm release - +just like doing an upgrade - would refresh all the pods with the new +configuration. [Dremio +documentation](https://docs.dremio.com/deployment/README-config.html) +covers the configuration capabilities in Dremio. + +If you need to add a core-site.xml, you can add the file to the config +directory and it will be propagated to all the pods on install or +upgrade of the deployment. + +#### Important Changes + +2019-09-19 (v0.1.0): BREAKING CHANGE. + + Dremio versions before 4.0.0 are no longer supported by this Helm + chart. Dremio image specifier was split into an imageName and + imageTag parts to follow best practices. "dist" value in + dremio.conf moved to cloud storage where possible (otherwise + defaults to pdfs) -- this will lose any previously extant + reflections materialisations, user uploads, scratch files, etc. + Also added Cloud Cache support (new in Dremio 4.0). Please see + values.yaml for details on this new configuration. diff --git a/charts/dremio/config/core-site.xml b/charts/dremio/config/core-site.xml index 8d91d757..3283eb32 100644 --- a/charts/dremio/config/core-site.xml +++ b/charts/dremio/config/core-site.xml @@ -1,80 +1,93 @@ +{{- if and .Values.distStorage.type (ne .Values.distStorage.type "local") }} - {{- if and .Values.distStorage.type (eq .Values.distStorage.type "aws") }} - - fs.s3a.access.key - AWS access key ID. - {{ required "AWS access key required" .Values.distStorage.aws.accessKey}} - - - fs.s3a.secret.key - AWS secret key. - {{ required "AWS secret required" .Values.distStorage.aws.secret}} - - {{- end }} - - {{- if and .Values.distStorage.type (eq .Values.distStorage.type "azure") }} - - - fs.adl.impl - Must be set to org.apache.hadoop.fs.adl.AdlFileSystem - org.apache.hadoop.fs.adl.AdlFileSystem - - - dfs.adls.oauth2.client.id - Application ID of the registered application under Azure Active Directory - {{required "Azure application ID required" .Values.distStorage.azure.applicationId}} - - - dfs.adls.oauth2.credential - Generated password value for the registered application - {{required "Azure secret value required" .Values.distStorage.azure.secret}} - - - dfs.adls.oauth2.refresh.url - Azure Active Directory OAuth 2.0 Token Endpoint for registered applications. - {{required "Azure OAuth2 token endpoint required" .Values.distStorage.azure.oauth2EndPoint}} - - - dfs.adls.oauth2.access.token.provider.type - Must be set to ClientCredential - ClientCredential - - - fs.adl.impl.disable.cache - Only include this property AFTER validating the ADLS connection. - false - - {{- end }} - - {{- if and .Values.dremioVersion (ge .Values.dremioVersion "3.2.0") .Values.distStorage.type (eq .Values.distStorage.type "azureStorage") }} - - fs.dremioAzureStorage.impl - FileSystem implementation. Must always be com.dremio.plugins.azure.AzureStorageFileSystem - com.dremio.plugins.azure.AzureStorageFileSystem - - - dremio.azure.account - The name of the storage account. - {{required "Azure storage account name required" .Values.distStorage.azureStorage.accountName}} - - - dremio.azure.key - The shared access key for the storage account. - {{required "Shared access key for the storage account required" .Values.distStorage.azureStorage.accessKey}} - - - dremio.azure.mode - The storage account type. Value: STORAGE_V2 - STORAGE_V2 - - - dremio.azure.secure - Boolean option to enable SSL connections. Value: True/False - True - - {{- end }} + {{- if eq .Values.distStorage.type "aws" }} + + fs.dremioS3.impl + The FileSystem implementation. Must be set to com.dremio.plugins.s3.store.S3FileSystem + com.dremio.plugins.s3.store.S3FileSystem + + + fs.s3a.aws.credentials.provider + The credential provider type. + org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider + + + fs.s3a.access.key + AWS access key ID. + {{ required "AWS access key required" .Values.distStorage.aws.accessKey}} + + + fs.s3a.secret.key + AWS secret key. + {{ required "AWS secret required" .Values.distStorage.aws.secret}} + + {{- end }} + + {{- if eq .Values.distStorage.type "azure" }} + + + fs.adl.impl + Must be set to org.apache.hadoop.fs.adl.AdlFileSystem + org.apache.hadoop.fs.adl.AdlFileSystem + + + dfs.adls.oauth2.client.id + Application ID of the registered application under Azure Active Directory + {{required "Azure application ID required" .Values.distStorage.azure.applicationId}} + + + dfs.adls.oauth2.credential + Generated password value for the registered application + {{required "Azure secret value required" .Values.distStorage.azure.secret}} + + + dfs.adls.oauth2.refresh.url + Azure Active Directory OAuth 2.0 Token Endpoint for registered applications. + {{required "Azure OAuth2 token endpoint required" .Values.distStorage.azure.oauth2EndPoint}} + + + dfs.adls.oauth2.access.token.provider.type + Must be set to ClientCredential + ClientCredential + + + fs.adl.impl.disable.cache + Only include this property AFTER validating the ADLS connection. + false + + {{- end }} + + {{- if eq .Values.distStorage.type "azureStorage" }} + + fs.dremioAzureStorage.impl + FileSystem implementation. Must always be com.dremio.plugins.azure.AzureStorageFileSystem + com.dremio.plugins.azure.AzureStorageFileSystem + + + dremio.azure.account + The name of the storage account. + {{required "Azure storage account name required" .Values.distStorage.azureStorage.accountName}} + + + dremio.azure.key + The shared access key for the storage account. + {{required "Shared access key for the storage account required" .Values.distStorage.azureStorage.accessKey}} + + + dremio.azure.mode + The storage account type. Value: STORAGE_V2 + STORAGE_V2 + + + dremio.azure.secure + Boolean option to enable SSL connections. Value: True/False + True + + {{- end }} +{{- end }} diff --git a/charts/dremio/config/dremio.conf b/charts/dremio/config/dremio.conf index 12252069..6dbe4f9a 100644 --- a/charts/dremio/config/dremio.conf +++ b/charts/dremio/config/dremio.conf @@ -15,33 +15,21 @@ # paths: { - # the local path for dremio to store data. + # Local path for dremio to store data. local: ${DREMIO_HOME}"/data" - - # the distributed path Dremio data including job results, downloads, uploads, etc - #dist: "pdfs://"${paths.local}"/pdfs" - - # If you are editing the uploads value in this file, please delete all the lines starting with double curly braces - {{- if .Values.distStorage.type }} - {{- if and .Values.dremioVersion (lt .Values.dremioVersion "3.2.0") }} - {{- if eq .Values.distStorage.type "aws" }} - uploads: "s3a://{{required "AWS bucketname required" .Values.distStorage.aws.bucketName}}{{required "Path required" .Values.distStorage.aws.path}}" - {{- end }} - {{- if eq .Values.distStorage.type "azure" }} - uploads: "adl://{{required "Azure Datalake store name required" .Values.distStorage.azure.datalakeStoreName}}.azuredatalakestore.net{{required "Path required" .Values.distStorage.azure.path}}" - {{- end }} - {{- else }} # dremio_version > 3.2.0 - {{- if eq .Values.distStorage.type "aws" }} - uploads: "dremioS3://{{required "AWS bucketname required" .Values.distStorage.aws.bucketName}}{{required "Path required" .Values.distStorage.aws.path}}" - {{- end }} - {{- if eq .Values.distStorage.type "azure" }} - uploads: "dremioAdl://{{required "Azure Datalake store name required" .Values.distStorage.azure.datalakeStoreName}}.azuredatalakestore.net{{required "Path required" .Values.distStorage.azure.path}}" - {{- end }} - {{- if eq .Values.distStorage.type "azureStorage" }} - uploads: "dremioAzureStorage://:///{{required "Azure Storage filesystem required" .Values.distStorage.azureStorage.filesystem}}/{{required "Path for uploads required" .Values.distStorage.azureStorage.uploadsPath}}" - accelerator: "dremioAzureStorage://:///{{required "Azure Storage filesystem required" .Values.distStorage.azureStorage.filesystem}}/{{required "Path for uploads required" .Values.distStorage.azureStorage.acceleratorPath}}" - {{- end }} - {{- end }} + # Distributed path Dremio data including job results, downloads, + # uploads, etc + {{- if ne .Values.distStorage.type "local" }} + results: "pdfs://"${paths.local}"/results" + {{- if eq .Values.distStorage.type "aws" }} + dist: "dremioS3:///{{ required "AWS bucketname required" .Values.distStorage.aws.bucketName }}{{ required "Path required" .Values.distStorage.aws.path }}" + {{- else if eq .Values.distStorage.type "azure" }} + dist: "dremioAdl://{{ required "Azure Datalake store name required" .Values.distStorage.azure.datalakeStoreName }}.azuredatalakestore.net{{ required "Path required" .Values.distStorage.azure.path }}" + {{- else if eq .Values.distStorage.type "azureStorage" }} + dist: "dremioAzureStorage://:///{{ required "Azure Storage filesystem required" .Values.distStorage.azureStorage.filesystem }}{{ required "Path required" .Values.distStorage.azureStorage.path }}" + {{- end }} + {{- else }} + dist: "pdfs://"${paths.local}"/pdfs" {{- end }} } @@ -54,8 +42,30 @@ services: { # executor.enabled: true # # Other service parameters can be customized via this file. + + # Cloud Cache is supported in Dremio 4.0.0+. + {{- if and .Values.executor.cloudCache.enabled (or (ge .Values.imageTag "4.0.0") (eq .Values.imageTag "latest")) }} + executor: { + cache: { + path.db: "/var/lib/dremio", + path.fs: ["/var/lib/dremio"], + pctquota.db: {{ .Values.executor.cloudCache.quota.db_pct }}, + pctquota.fs: [{{ .Values.executor.cloudCache.quota.fs_pct }}] + } + } + {{- end }} } +{{- if and .Values.executor.cloudCache.enabled (ne .Values.distStorage.type "local") }} +debug: { + # Enable caching for distributed storage, it is turned off by default + dist.caching.enabled: true, + # Max percent of total available cache space to use when possible + # for distributed storage + dist.max.cache.space.percent: {{ .Values.executor.cloudCache.quota.cache_pct }} +} +{{- end }} + {{- if .Values.tls.ui.enabled }} services.coordinator.web.ssl.enabled: true services.coordinator.web.ssl.auto-certificate.enabled: false @@ -64,9 +74,9 @@ services.coordinator.web.ssl.keyStore: "/opt/dremio/tls/ui.pkcs12" {{- end }} {{- if .Values.tls.client.enabled }} -# Client endpoint (i.e. ODBC/JDBC) encryption is only supported in Dremio Enterprise Edition. +# Client endpoint (i.e. ODBC/JDBC) encryption is only supported in +# Dremio Enterprise Edition. services.coordinator.client-endpoint.ssl.enabled: true services.coordinator.client-endpoint.ssl.auto-certificate.enabled: false - services.coordinator.client-endpoint.ssl.keyStore: "/opt/dremio/tls/client.pkcs12" -{{- end }} \ No newline at end of file +{{- end }} diff --git a/charts/dremio/templates/dremio-admin.yaml b/charts/dremio/templates/dremio-admin.yaml index 3de91c43..feda9d9b 100644 --- a/charts/dremio/templates/dremio-admin.yaml +++ b/charts/dremio/templates/dremio-admin.yaml @@ -12,7 +12,7 @@ metadata: spec: containers: - name: dremio-admin - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent stdin: true tty: true diff --git a/charts/dremio/templates/dremio-coordinator.yaml b/charts/dremio/templates/dremio-coordinator.yaml index 86d5f2ac..3089c1ad 100644 --- a/charts/dremio/templates/dremio-coordinator.yaml +++ b/charts/dremio/templates/dremio-coordinator.yaml @@ -28,7 +28,7 @@ spec: {{- end }} containers: - name: dremio-coordinator - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent resources: requests: @@ -70,7 +70,7 @@ spec: command: ["sh", "-c", "until nc -z dremio-client {{ .Values.coordinator.web.port | default 9047 }} > /dev/null; do echo waiting for dremio master; sleep 2; done;"] {{- if .Values.tls.ui.enabled }} - name: generate-ui-keystore - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent volumeMounts: - name: dremio-tls @@ -92,7 +92,7 @@ spec: {{- end }} {{- if .Values.tls.client.enabled }} - name: generate-client-keystore - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent volumeMounts: - name: dremio-tls diff --git a/charts/dremio/templates/dremio-executor.yaml b/charts/dremio/templates/dremio-executor.yaml index e511e1d3..150e1fe1 100644 --- a/charts/dremio/templates/dremio-executor.yaml +++ b/charts/dremio/templates/dremio-executor.yaml @@ -28,7 +28,7 @@ spec: {{- end }} containers: - name: dremio-executor - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent resources: requests: @@ -61,9 +61,10 @@ spec: - name: wait-for-zk image: busybox command: ["sh", "-c", "until ping -c 1 -W 1 zk-hs > /dev/null; do echo waiting for zookeeper host; sleep 2; done;"] - # since we're mounting a separate volume, reset permission to dremio uid/gid + # since we're mounting a separate volume, reset permission to + # dremio uid/gid - name: chown-data-directory - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent securityContext: runAsUser: 0 diff --git a/charts/dremio/templates/dremio-master.yaml b/charts/dremio/templates/dremio-master.yaml index f272eb32..51619193 100644 --- a/charts/dremio/templates/dremio-master.yaml +++ b/charts/dremio/templates/dremio-master.yaml @@ -36,7 +36,7 @@ spec: {{- end }} containers: - name: dremio-master-coordinator - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent resources: requests: @@ -92,7 +92,7 @@ spec: command: ["sh", "-c", "until ping -c 1 -W 1 zk-hs > /dev/null; do echo waiting for zookeeper host; sleep 2; done;"] # since we're mounting a separate volume, reset permission to dremio uid/gid - name: chown-data-directory - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent securityContext: runAsUser: 0 @@ -104,7 +104,7 @@ spec: - "dremio:dremio" - "/opt/dremio/data" - name: upgrade-task - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent volumeMounts: - name: dremio-master-volume @@ -114,7 +114,7 @@ spec: - "upgrade" {{- if .Values.tls.ui.enabled }} - name: generate-ui-keystore - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent volumeMounts: - name: dremio-tls @@ -136,7 +136,7 @@ spec: {{- end }} {{- if .Values.tls.client.enabled }} - name: generate-client-keystore - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent volumeMounts: - name: dremio-tls diff --git a/charts/dremio/values.yaml b/charts/dremio/values.yaml index 2601ebba..925b88a3 100644 --- a/charts/dremio/values.yaml +++ b/charts/dremio/values.yaml @@ -1,7 +1,9 @@ # The image used to build the Dremio cluster. It is recommended to update the # version tag to the version that you are using. This will ensure that all # the pods are using the same version of the software. -image: dremio/dremio-oss:latest +image: dremio/dremio-oss +imageTag: latest + # Check out Dremio documentation for memory and cpu requirements for # the coordinators and the executors. # The value of memory should be in MB. CPU is in no of cores. @@ -22,6 +24,20 @@ executor: cpu: 15 count: 3 volumeSize: 100Gi + cloudCache: + # Requires Dremio version 4.0.0 or later + enabled: true + quota: + # Percentage of the diskspace for the running Kubernetes node + # that can be used for Cloud Cache files. + fs_pct: 70 + # Percentage of that space that can be used for the internal + # Cloud Cache database. + db_pct: 70 + # Percentage of that space that can be used for cacheing + # materialised reflections. This is an upper-bound, not a + # reservation. + cache_pct: 100 zookeeper: memory: 1024 cpu: 0.5 @@ -37,9 +53,10 @@ tls: enabled: false secret: dremio-tls-secret-ui client: - # To enable TLS for the client endpoints, set the enabled flag to true and provide - # the appropriate Kubernetes TLS secret. Client endpoint encryption is available only on - # Dremio Enterprise Edition and should not be enabled otherwise. + # To enable TLS for the client endpoints, set the enabled flag to + # true and provide the appropriate Kubernetes TLS secret. Client + # endpoint encryption is available only on Dremio Enterprise + # Edition and should not be enabled otherwise. enabled: false secret: dremio-tls-secret-client @@ -63,35 +80,38 @@ serviceType: LoadBalancer #storageClass: managed-premium # For private and protected docker image repository, you should store -# the credentials in a kubernetes secret and provide the secret name here. -# For more information, see https://kubernetes.io/docs/concepts/containers/images/#specifying-imagepullsecrets-on-a-pod +# the credentials in a kubernetes secret and provide the secret name +# here. For more information, see +# https://kubernetes.io/docs/concepts/containers/images/#specifying-imagepullsecrets-on-a-pod #imagePullSecrets: secretname -# Target pods to nodes based on labels set on the nodes. -# For more information, see https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector +# Target pods to nodes based on labels set on the nodes. For more +# information, see +# https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector #nodeSelector: # key: value -# Control where uploaded files are stored. -# See https://docs.dremio.com/deployment/distributed-storage.html for more information -dremioVersion: "3.2.0" # Dremio Version 3.2.0 or greater +# Control where uploaded files are stored. See +# https://docs.dremio.com/deployment/distributed-storage.html for more +# information distStorage: - # Valid values are local, aws, azure or azureStorage. aws and azure choice requires additional configuration data. + # Valid values are local, aws, azure or azureStorage. aws and azure + # choice requires additional configuration data. type: "local" - aws: #S3 - used for only uploads + aws: # S3 bucketName: "Your_AWS_bucket_name" path: "/" accessKey: "Your_AWS_Access_Key" secret: "Your_AWS_Secret" - azure: #ADLS v1 - used for only uploads + azure: # ADLS gen1 datalakeStoreName: "Your_Azure_DataLake_Storage_name" path: "/" applicationId: "Your_Azure_Application_Id" secret: "Your_Azure_Secret" oauth2EndPoint: "Azure_OAuth2_Endpoint" - azureStorage: #AzureStorage v2 - supported in Dremio version 3.2.0+ - used for uploads and accelerator + azureStorage: # AzureStorage gen2v2 accountName: "Azure_storage_v2_account_name" accessKey: "Access_key_for_the_storage_account" + filesystem: "Filesystem_in_storage_account" - uploadsPath: "Path_for_uploads" - acceleratorPath: "Path_for_accelerator" + path: "/" From a21a26a285f611ef1985f13f5cabc31a66f000ee Mon Sep 17 00:00:00 2001 From: Naren <41924335+naren-dremio@users.noreply.github.com> Date: Fri, 14 Feb 2020 09:33:43 -0500 Subject: [PATCH 24/24] bump version --- images/dremio-oss/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/images/dremio-oss/Dockerfile b/images/dremio-oss/Dockerfile index 2c1cfec0..792ad517 100644 --- a/images/dremio-oss/Dockerfile +++ b/images/dremio-oss/Dockerfile @@ -21,7 +21,7 @@ MAINTAINER Dremio LABEL org.label-schema.name='dremio/dremio-oss' LABEL org.label-schema.description='Dremio OSS.' -ARG DOWNLOAD_URL=https://download.dremio.com/community-server/3.0.1-201811132128360291-804fe82/dremio-community-3.0.1-201811132128360291-804fe82.tar.gz +ARG DOWNLOAD_URL= https://download.dremio.com/community-server/4.1.4-202001240912140359-a90eb503/dremio-community-4.1.4-202001240912140359-a90eb503.tar.gz RUN \ mkdir -p /opt/dremio \ && mkdir -p /var/lib/dremio \