-
Notifications
You must be signed in to change notification settings - Fork 76
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Modified the demo files for DAIS and eventhub
- Loading branch information
1 parent
221918b
commit 3a9b41a
Showing
8 changed files
with
185 additions
and
57 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
10 changes: 10 additions & 0 deletions
10
demo/notebooks/afam_eventhub_runners/init_dlt_meta_pipeline.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# Databricks notebook source | ||
dlt_meta_whl = spark.conf.get("dlt_meta_whl") | ||
%pip install $dlt_meta_whl # noqa : E999 | ||
|
||
# COMMAND ---------- | ||
|
||
layer = spark.conf.get("layer", None) | ||
|
||
from src.dataflow_pipeline import DataflowPipeline | ||
DataflowPipeline.invoke_dlt_pipeline(spark, layer) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
# Databricks notebook source | ||
# MAGIC %md | ||
# MAGIC ## Install azure-eventhub | ||
|
||
# COMMAND ---------- | ||
|
||
# MAGIC %sh pip install azure-eventhub | ||
|
||
# COMMAND ---------- | ||
|
||
dbutils.library.restartPython() | ||
|
||
# COMMAND ---------- | ||
|
||
dbutils.widgets.text("eventhub_name","eventhub_name","") | ||
dbutils.widgets.text("eventhub_name_append_flow","eventhub_name_append_flow","") | ||
dbutils.widgets.text("eventhub_namespace","eventhub_namespace","") | ||
dbutils.widgets.text("eventhub_secrets_scope_name","eventhub_secrets_scope_name","") | ||
dbutils.widgets.text("eventhub_accesskey_name","eventhub_accesskey_name","") | ||
dbutils.widgets.text("eventhub_input_data","eventhub_input_data","") | ||
dbutils.widgets.text("eventhub_append_flow_input_data","eventhub_append_flow_input_data","") | ||
|
||
# COMMAND ---------- | ||
|
||
eventhub_name = dbutils.widgets.get("eventhub_name") | ||
eventhub_name_append_flow = dbutils.widgets.get("eventhub_name_append_flow") | ||
eventhub_namespace = dbutils.widgets.get("eventhub_namespace") | ||
eventhub_secrets_scope_name = dbutils.widgets.get("eventhub_secrets_scope_name") | ||
eventhub_accesskey_name = dbutils.widgets.get("eventhub_accesskey_name") | ||
eventhub_input_data = dbutils.widgets.get("eventhub_input_data") | ||
eventhub_append_flow_input_data = dbutils.widgets.get("eventhub_append_flow_input_data") | ||
|
||
# COMMAND ---------- | ||
|
||
print(f"eventhub_name={eventhub_name}, eventhub_name_append_flow={eventhub_name_append_flow}, eventhub_namespace={eventhub_namespace}, eventhub_secrets_scope_name={eventhub_secrets_scope_name}, eventhub_accesskey_name={eventhub_accesskey_name}, eventhub_input_data={eventhub_input_data}, eventhub_append_flow_input_data={eventhub_append_flow_input_data}") | ||
|
||
# COMMAND ---------- | ||
|
||
import json | ||
from azure.eventhub import EventHubProducerClient, EventData | ||
|
||
eventhub_shared_access_value = dbutils.secrets.get(scope = eventhub_secrets_scope_name, key = eventhub_accesskey_name) | ||
eventhub_conn = f"Endpoint=sb://{eventhub_namespace}.servicebus.windows.net/;SharedAccessKeyName={eventhub_accesskey_name};SharedAccessKey={eventhub_shared_access_value}" | ||
|
||
client = EventHubProducerClient.from_connection_string(eventhub_conn, eventhub_name=eventhub_name) | ||
|
||
|
||
|
||
# COMMAND ---------- | ||
|
||
# MAGIC %md | ||
# MAGIC ## Publish iot data to eventhub | ||
|
||
# COMMAND ---------- | ||
|
||
with open(f"{eventhub_input_data}") as f: | ||
data = json.load(f) | ||
|
||
for event in data: | ||
event_data_batch = client.create_batch() | ||
event_data_batch.add(EventData(json.dumps(event))) | ||
with client: | ||
client.send_batch(event_data_batch) | ||
|
||
# COMMAND ---------- | ||
|
||
append_flow_client = EventHubProducerClient.from_connection_string(eventhub_conn, eventhub_name=eventhub_name_append_flow) | ||
|
||
with open(f"{eventhub_append_flow_input_data}") as f: | ||
af_data = json.load(f) | ||
|
||
for event in af_data: | ||
event_data_batch = client.create_batch() | ||
event_data_batch.add(EventData(json.dumps(event))) | ||
with client: | ||
append_flow_client.send_batch(event_data_batch) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
# Databricks notebook source | ||
import pandas as pd | ||
|
||
run_id = dbutils.widgets.get("run_id") | ||
uc_enabled = eval(dbutils.widgets.get("uc_enabled")) | ||
uc_catalog_name = dbutils.widgets.get("uc_catalog_name") | ||
output_file_path = dbutils.widgets.get("output_file_path") | ||
bronze_schema = dbutils.widgets.get("bronze_schema") | ||
log_list = [] | ||
|
||
# Assumption is that to get to this notebook Bronze and Silver completed successfully | ||
log_list.append("Completed Bronze Eventhub DLT Pipeline.") | ||
|
||
UC_TABLES = { | ||
f"{uc_catalog_name}.{bronze_schema}.bronze_{run_id}_iot": 20, | ||
f"{uc_catalog_name}.{bronze_schema}.bronze_{run_id}_iot_quarantine": 2 | ||
} | ||
|
||
NON_UC_TABLES = { | ||
f"{uc_catalog_name}.{bronze_schema}.bronze_{run_id}_iot": 20, | ||
f"{uc_catalog_name}.{bronze_schema}.bronze_{run_id}_iot_quarantine": 2 | ||
} | ||
|
||
log_list.append("Validating DLT EVenthub Bronze Table Counts...") | ||
tables = UC_TABLES if uc_enabled else NON_UC_TABLES | ||
for table, counts in tables.items(): | ||
query = spark.sql(f"SELECT count(*) as cnt FROM {table}") | ||
cnt = query.collect()[0].cnt | ||
|
||
log_list.append(f"Validating Counts for Table {table}.") | ||
try: | ||
assert int(cnt) >= counts | ||
log_list.append(f"Expected >= {counts} Actual: {cnt}. Passed!") | ||
except AssertionError: | ||
log_list.append(f"Expected > {counts} Actual: {cnt}. Failed!") | ||
|
||
pd_df = pd.DataFrame(log_list) | ||
pd_df.to_csv(output_file_path) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# Databricks notebook source | ||
dlt_meta_whl = spark.conf.get("dlt_meta_whl") | ||
%pip install $dlt_meta_whl # noqa : E999 | ||
|
||
# COMMAND ---------- | ||
|
||
layer = spark.conf.get("layer", None) | ||
|
||
from src.dataflow_pipeline import DataflowPipeline | ||
DataflowPipeline.invoke_dlt_pipeline(spark, layer) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# Databricks notebook source | ||
dbfs_tmp_path = dbutils.widgets.get("dbfs_tmp_path") | ||
|
||
# COMMAND ---------- | ||
|
||
source_base_path = f"{dbfs_tmp_path}/demo/resources/incremental_data/" | ||
target_base_path = f"{dbfs_tmp_path}/demo/resources/data/" | ||
domains = ["customers","transactions","stores","products"] | ||
for domain in domains: | ||
dbutils.fs.cp(f"{source_base_path}{domain}/",f"{target_base_path}{domain}/",True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters