-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #19 from great-expectations/add-demo-scripts
Add demo scripts for gx-1.0.0a4
- Loading branch information
Showing
17 changed files
with
205 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
37 changes: 37 additions & 0 deletions
37
gx-1.0.0a4/demos/scripts/01-create-expectations-interactively.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
# TODO: will become from great_expectations import get_context, ExpectationSuite | ||
import great_expectations.expectations as gxe | ||
from great_expectations import get_context | ||
from great_expectations.core import ExpectationSuite | ||
from great_expectations.exceptions import DataContextError | ||
|
||
context = get_context(project_root_dir="./") | ||
|
||
try: | ||
suite = context.suites.get("project_name") | ||
# TODO: error will change to ResourceNotFoundError | ||
except DataContextError: | ||
# TODO: will change to: | ||
# suite = context.suites.add(name="project_name") | ||
suite = context.suites.add(ExpectationSuite(name="project_name")) | ||
|
||
batch = context.data_sources.pandas_default.read_parquet( | ||
"s3://nyc-tlc/trip data/yellow_tripdata_2019-01.parquet" | ||
) | ||
|
||
# TODO: column_index will not be required | ||
expectation = gxe.ExpectColumnToExist(column="VendorID", column_index=None) | ||
result = batch.validate(expectation) | ||
print(result) | ||
suite.add_expectation(expectation) | ||
|
||
expectation = gxe.ExpectColumnValuesToMatchRegex(column="VendorID", regex="^[123456]$") | ||
result = batch.validate(expectation) | ||
print(result) | ||
suite.add_expectation(expectation) | ||
|
||
expectation = gxe.ExpectColumnValuesToBeUnique(column="VendorID") | ||
result = batch.validate(expectation) | ||
print(result) | ||
suite.add_expectation(expectation) | ||
|
||
print(suite) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
import great_expectations.expectations as gxe | ||
from great_expectations import get_context | ||
|
||
context = get_context(project_root_dir="./") | ||
|
||
try: | ||
data_source = context.datasources["project_name"] | ||
# TODO: this will be updated to become | ||
# data_source = context.data_sources.get("project_name") | ||
# TODO: instead of keyerror will be ResourceNotFoundError | ||
except KeyError: | ||
data_source = context.data_sources.add_pandas_s3( | ||
name="project_name", | ||
bucket="nyc-tlc", | ||
) | ||
|
||
try: | ||
asset = data_source.get_asset("my_project") | ||
# TODO: instead of LookupError will be ResourceNotFoundError | ||
except LookupError: | ||
asset = data_source.add_parquet_asset("my_project", s3_prefix="trip data/") | ||
|
||
try: | ||
batch_definition = asset.get_batch_definition("monthly") | ||
except KeyError: | ||
import re | ||
|
||
pattern = re.compile( | ||
r"yellow_tripdata_(?P<year>[0-9]{4})-(?P<month>[0-9]{2}).parquet" | ||
) | ||
batch_definition = asset.add_batch_definition_monthly("monthly", regex=pattern) | ||
|
||
|
||
# To verify that things worked... | ||
batch = batch_definition.get_batch(batch_parameters={"year": "2020", "month": "04"}) | ||
|
||
print(batch.validate(gxe.ExpectColumnToExist(column="VendorID"))) |
30 changes: 30 additions & 0 deletions
30
gx-1.0.0a4/demos/scripts/03-create-expectations-programatically.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
import great_expectations.expectations as gxe | ||
from great_expectations import get_context | ||
|
||
# TODO: will become from great_expectations import get_context, ExpectationSuite | ||
from great_expectations.core import ExpectationSuite | ||
from great_expectations.exceptions import DataContextError | ||
|
||
context = get_context(project_root_dir="./") | ||
|
||
# Create Expectation Suite | ||
try: | ||
suite = context.suites.get("project_name") | ||
# TODO: instead of DataContextError will be ResourceNotFoundError | ||
except DataContextError: | ||
suite = context.suites.add(ExpectationSuite(name="project_name")) | ||
|
||
suite.expectations = [ | ||
gxe.ExpectColumnToExist(column="VendorID", column_index=None), | ||
gxe.ExpectColumnDistinctValuesToBeInSet( | ||
column="VendorID", value_set=[1, 2, 3, 4, 5, 6] | ||
), | ||
gxe.ExpectColumnValuesToBeBetween( | ||
column="passenger_count", min_value=1, max_value=5, mostly=0.95 | ||
), | ||
gxe.ExpectColumnValuesToBeBetween( | ||
column="passenger_count", min_value=0, max_value=10 | ||
), | ||
] | ||
|
||
suite.save() |
40 changes: 40 additions & 0 deletions
40
gx-1.0.0a4/demos/scripts/04-create-validation-definition.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
from great_expectations import get_context | ||
|
||
# TODO: will become from great_expectations import ValidationDefinition | ||
from great_expectations.core import ValidationDefinition | ||
from great_expectations.exceptions import DataContextError | ||
|
||
context = get_context(project_root_dir="./") | ||
|
||
# TODO: will become | ||
# batch_definition = context.data_sources.get("project_name").get_asset("my_project").get_batch_definition("monthly") | ||
batch_definition = ( | ||
context.datasources["project_name"] | ||
.get_asset("my_project") | ||
.get_batch_definition("monthly") | ||
) | ||
suite = context.suites.get("project_name") | ||
|
||
try: | ||
validation_definition = context.validation_definitions.get("my_project") | ||
# TODO: will become except ResourceNotFoundError: | ||
except DataContextError: | ||
# TODO: will become | ||
# validation_definition = context.validation_definitions.add( | ||
# name="my_project", | ||
# data=batch_definition, | ||
# suite=suite | ||
# ) | ||
validation_definition = context.validation_definitions.add( | ||
ValidationDefinition(name="my_project", data=batch_definition, suite=suite) | ||
) | ||
|
||
### To run this in your project it is critical to provide batch_parameters | ||
result = context.validation_definitions.get("my_project").run( | ||
batch_parameters={"year": "2020", "month": "04"} | ||
) | ||
|
||
# TODO: This should only run on the latest batch, or it should fail entirely with an error that | ||
# batch parameters are missing | ||
# result = context.validation_definitions.get("my_project").run() | ||
print(result) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
from great_expectations import get_context | ||
|
||
context = get_context(project_root_dir="./") | ||
# NOTE: It is critical to pass the batch_parameters to the run method, otherwise the validation stall | ||
# by trying to read all the data. We will have a fix in place before the final release. | ||
# TODO: Implement fix for above issue | ||
validation_definition = context.validation_definitions.get("my_project") | ||
result = validation_definition.run(batch_parameters={"year": "2020", "month": "04"}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
from great_expectations import get_context | ||
|
||
# TODO: will become from great_expectations import Checkpoint | ||
# TODO will become from great_expectations.actions import SlackNotificationAction | ||
from great_expectations.checkpoint import Checkpoint, SlackNotificationAction | ||
|
||
# TODO will become freom great_expectations.exceptions import ResourceNotFoundError | ||
from great_expectations.exceptions import DataContextError | ||
|
||
context = get_context(project_root_dir="./") | ||
|
||
try: | ||
checkpoint = context.checkpoints.get("project_integration_checkpoint") | ||
# TODO: Will become ResourceNotFoundError | ||
except DataContextError: | ||
checkpoint = context.checkpoints.add( | ||
Checkpoint( | ||
name="project_integration_checkpoint", | ||
validation_definitions=[context.validation_definitions.get("my_project")], | ||
actions=[ | ||
SlackNotificationAction( | ||
name="slack_notification", | ||
# TODO: config variable substitution not working | ||
slack_token="${SLACK_NOTIFICATION_TOKEN}", | ||
slack_channel="#alerts-timber-test", | ||
), | ||
], | ||
) | ||
) | ||
|
||
result = checkpoint.run(batch_parameters={"year": "2020", "month": "04"}) | ||
print(result) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
from great_expectations import get_context | ||
|
||
context = get_context(project_root_dir="./") | ||
checkpoint = context.checkpoints.get("project_integration_checkpoint") | ||
result = checkpoint.run(batch_parameters={"year": "2020", "month": "04"}) | ||
print(result) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,9 @@ | ||
boto3 | ||
fastparquet | ||
great-expectations==1.0.0a4 | ||
jupyter | ||
pandas | ||
psycopg2 | ||
pyarrow | ||
s3fs | ||
sqlalchemy |