forked from vdesabou/kafka-docker-playground
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathazure-data-lake-storage-gen2.sh
executable file
·103 lines (80 loc) · 4.22 KB
/
azure-data-lake-storage-gen2.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/bin/bash
set -e
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
source ${DIR}/../../scripts/utils.sh
if [ ! -z "$AZ_USER" ] && [ ! -z "$AZ_PASS" ]
then
log "Logging to Azure using environment variables AZ_USER and AZ_PASS"
set +e
az logout
set -e
az login -u "$AZ_USER" -p "$AZ_PASS"
else
log "Logging to Azure using browser"
az login
fi
AZURE_NAME=playground$USER$TRAVIS_JOB_NUMBER
AZURE_NAME=${AZURE_NAME//[-._]/}
AZURE_RESOURCE_GROUP=$AZURE_NAME
AZURE_DATALAKE_ACCOUNT_NAME=$AZURE_NAME
AZURE_AD_APP_NAME=$AZURE_NAME
AZURE_REGION=westeurope
set +e
az group delete --name $AZURE_RESOURCE_GROUP --yes
AZURE_DATALAKE_CLIENT_ID=$(az ad app list --display-name $AZURE_AD_APP_NAME | jq -r '.[].objectId')
az ad app delete --id $AZURE_DATALAKE_CLIENT_ID
set -e
log "Add the CLI extension for Azure Data Lake Gen 2"
az extension add --name storage-preview
log "Creating resource $AZURE_RESOURCE_GROUP in $AZURE_REGION"
az group create \
--name $AZURE_RESOURCE_GROUP \
--location $AZURE_REGION
log "Registering active directory App $AZURE_AD_APP_NAME"
AZURE_DATALAKE_CLIENT_ID=$(az ad app create --display-name "$AZURE_AD_APP_NAME" --password mypassword --native-app false --available-to-other-tenants false --query appId -o tsv)
log "Creating Service Principal associated to the App"
SERVICE_PRINCIPAL_ID=$(az ad sp create --id $AZURE_DATALAKE_CLIENT_ID | jq -r '.objectId')
AZURE_TENANT_ID=$(az account list | jq -r '.[].tenantId')
AZURE_DATALAKE_TOKEN_ENDPOINT="https://login.microsoftonline.com/$AZURE_TENANT_ID/oauth2/token"
log "Creating data lake $AZURE_DATALAKE_ACCOUNT_NAME in resource $AZURE_RESOURCE_GROUP"
az storage account create \
--name $AZURE_DATALAKE_ACCOUNT_NAME \
--resource-group $AZURE_RESOURCE_GROUP \
--location $AZURE_REGION \
--sku Standard_LRS \
--kind StorageV2 \
--hierarchical-namespace true
sleep 20
log "Assigning Storage Blob Data Owner role to Service Principal $SERVICE_PRINCIPAL_ID"
az role assignment create --assignee $SERVICE_PRINCIPAL_ID --role "Storage Blob Data Owner"
${DIR}/../../environment/plaintext/start.sh "${PWD}/docker-compose.plaintext.yml"
log "Creating Data Lake Storage Gen2 Sink connector"
curl -X PUT \
-H "Content-Type: application/json" \
--data '{
"connector.class": "io.confluent.connect.azure.datalake.gen2.AzureDataLakeGen2SinkConnector",
"tasks.max": "1",
"topics": "datalake_topic",
"flush.size": "3",
"azure.datalake.gen2.client.id": "'"$AZURE_DATALAKE_CLIENT_ID"'",
"azure.datalake.gen2.client.key": "mypassword",
"azure.datalake.gen2.account.name": "'"$AZURE_DATALAKE_ACCOUNT_NAME"'",
"azure.datalake.gen2.token.endpoint": "'"$AZURE_DATALAKE_TOKEN_ENDPOINT"'",
"format.class": "io.confluent.connect.azure.storage.format.avro.AvroFormat",
"confluent.license": "",
"confluent.topic.bootstrap.servers": "broker:9092",
"confluent.topic.replication.factor": "1"
}' \
http://localhost:8083/connectors/azure-datalake-gen2-sink/config | jq .
log "Sending messages to topic datalake_topic"
seq -f "{\"f1\": \"value%g\"}" 10 | docker exec -i connect kafka-avro-console-producer --broker-list broker:9092 --property schema.registry.url=http://schema-registry:8081 --topic datalake_topic --property value.schema='{"type":"record","name":"myrecord","fields":[{"name":"f1","type":"string"}]}'
sleep 20
log "Listing ${AZURE_DATALAKE_ACCOUNT_NAME} in Azure Data Lake"
az storage blob list --account-name "${AZURE_DATALAKE_ACCOUNT_NAME}" --container-name topics
log "Getting one of the avro files locally and displaying content with avro-tools"
az storage blob download --container-name topics --name datalake_topic/partition=0/datalake_topic+0+0000000000.avro --file /tmp/datalake_topic+0+0000000000.avro --account-name "${AZURE_DATALAKE_ACCOUNT_NAME}"
docker run -v /tmp:/tmp actions/avro-tools tojson /tmp/datalake_topic+0+0000000000.avro
log "Deleting resource group"
az group delete --name $AZURE_RESOURCE_GROUP --yes
log "Deleting active directory app"
az ad app delete --id $AZURE_DATALAKE_CLIENT_ID