Skip to content

Commit

Permalink
Remove gcs validation flag (#162)
Browse files Browse the repository at this point in the history
* removed gcs validation flag

* updated the flag description

* updated the flag description

* updated the flag description

* merged main
  • Loading branch information
AmandeepSinghCS authored Apr 5, 2022
1 parent 54a576b commit 50fb4f6
Show file tree
Hide file tree
Showing 6 changed files with 12 additions and 34 deletions.
10 changes: 0 additions & 10 deletions clouddq/classes/dq_configs_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,6 @@ def resolve_dataplex_entity_uris( # noqa: C901
bigquery_client: BigQueryClient,
target_rule_binding_ids: list[str],
default_configs: dict | None = None,
enable_experimental_dataplex_gcs_validation: bool = True,
) -> None:
logger.debug(
f"Using Dataplex default configs for resolving entity_uris:\n{pformat(default_configs)}"
Expand Down Expand Up @@ -260,7 +259,6 @@ def resolve_dataplex_entity_uris( # noqa: C901
entity_uri=entity_uri,
dataplex_client=dataplex_client,
bigquery_client=bigquery_client,
enable_experimental_dataplex_gcs_validation=enable_experimental_dataplex_gcs_validation,
)
elif entity_uri.scheme == "BIGQUERY":
clouddq_entity = self._resolve_bigquery_entity_uri(
Expand Down Expand Up @@ -399,7 +397,6 @@ def _resolve_dataplex_entity_uri(
entity_uri: dq_entity_uri.EntityUri,
dataplex_client: clouddq_dataplex.CloudDqDataplexClient,
bigquery_client: BigQueryClient,
enable_experimental_dataplex_gcs_validation: bool = True,
) -> dq_entity.DqEntity:
dataplex_entity = dataplex_client.get_dataplex_entity(
gcp_project_id=entity_uri.get_configs("projects"),
Expand All @@ -408,13 +405,6 @@ def _resolve_dataplex_entity_uri(
zone_id=entity_uri.get_configs("zones"),
entity_id=entity_uri.get_entity_id(),
)
if dataplex_entity.system == "CLOUD_STORAGE":
if not enable_experimental_dataplex_gcs_validation:
raise NotImplementedError(
"Use CLI flag --enable_experimental_dataplex_gcs_validation "
"to enable validating Dataplex GCS resources using BigQuery "
"External Tables"
)
clouddq_entity = dq_entity.DqEntity.from_dataplex_entity(
entity_id=entity_uri.get_db_primary_key(),
dataplex_entity=dataplex_entity,
Expand Down
4 changes: 0 additions & 4 deletions clouddq/integration/dataplex/clouddq_dataplex.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ def create_clouddq_task( # noqa: C901
clouddq_executable_checksum_path: str | None = None,
validate_only: bool = False,
clouddq_pyspark_driver_filename: str = "clouddq_pyspark_driver.py",
enable_experimental_dataplex_gcs_validation: bool = True,
) -> Response:
# Set default CloudDQ PySpark driver path if not manually overridden
clouddq_pyspark_driver_path = self._validate_clouddq_artifact_path(
Expand Down Expand Up @@ -146,9 +145,6 @@ def create_clouddq_task( # noqa: C901
f"{target_bq_result_dataset_name}."
f'{target_bq_result_table_name}",'
)
# Set experimental flags
if enable_experimental_dataplex_gcs_validation:
execution_arguments += "--enable_experimental_dataplex_gcs_validation,"
# Prepare Dataplex Task message body for CloudDQ Job
clouddq_post_body = {
"spark": {
Expand Down
10 changes: 5 additions & 5 deletions clouddq/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,10 +199,11 @@
)
@click.option(
"--enable_experimental_dataplex_gcs_validation",
help="If True, allows validating Dataplex GCS resources using "
"BigQuery External Tables",
help="This flag has no effect and is now deprecated. GCS resource validation via BigQuery "
"External Tables is supported regardless of the value of this flag. "
"Please remove it from your script.",
is_flag=True,
default=False,
default=True,
)
@click.option(
"--enable_experimental_bigquery_entity_uris",
Expand Down Expand Up @@ -232,7 +233,7 @@ def main( # noqa: C901
skip_sql_validation: bool = False,
summary_to_stdout: bool = False,
enable_experimental_bigquery_entity_uris: bool = True,
enable_experimental_dataplex_gcs_validation: bool = False,
enable_experimental_dataplex_gcs_validation: bool = True,
) -> None:
"""Run RULE_BINDING_IDS from a RULE_BINDING_CONFIG_PATH.
Expand Down Expand Up @@ -479,7 +480,6 @@ def main( # noqa: C901
bigquery_client=bigquery_client,
default_configs=dataplex_registry_defaults,
target_rule_binding_ids=target_rule_binding_ids,
enable_experimental_dataplex_gcs_validation=enable_experimental_dataplex_gcs_validation,
)
# Get Entities for entity-level summary views
target_entity_summary_configs: dict = (
Expand Down
18 changes: 6 additions & 12 deletions scripts/test_pip_install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -112,22 +112,19 @@ sed -i s/\<my-gcp-dataplex-zone-id\>/"${DATAPLEX_ZONE_ID}"/g "$TEST_DIR"/configs
# run with --dbt_profiles_dir
python3 -m clouddq T1_DQ_1_VALUE_NOT_NULL,T2_DQ_1_EMAIL,T3_DQ_1_EMAIL_DUPLICATE configs --dbt_profiles_dir="$TEST_DIR" \
--debug \
--dry_run \
--enable_experimental_dataplex_gcs_validation
--dry_run
python3 -m clouddq T1_DQ_1_VALUE_NOT_NULL,T2_DQ_1_EMAIL,T3_DQ_1_EMAIL_DUPLICATE configs --dbt_profiles_dir="$TEST_DIR" \
--dbt_path="$TEST_DIR" \
--debug \
--dry_run \
--enable_experimental_dataplex_gcs_validation
--dry_run

# test clouddq with direct connection profiles
python3 -m clouddq ALL configs \
--gcp_project_id="${GOOGLE_CLOUD_PROJECT}" \
--gcp_bq_dataset_id="${CLOUDDQ_BIGQUERY_DATASET}" \
--gcp_region_id="${CLOUDDQ_BIGQUERY_REGION}" \
--debug \
--dry_run \
--enable_experimental_dataplex_gcs_validation
--dry_run

if [[ -f "${GOOGLE_APPLICATION_CREDENTIALS:-}" ]]; then
# test clouddq with exported service account key
Expand All @@ -137,8 +134,7 @@ if [[ -f "${GOOGLE_APPLICATION_CREDENTIALS:-}" ]]; then
--gcp_region_id="${CLOUDDQ_BIGQUERY_REGION}" \
--gcp_service_account_key_path="${GOOGLE_APPLICATION_CREDENTIALS}" \
--debug \
--dry_run \
--enable_experimental_dataplex_gcs_validation
--dry_run
if [[ -f "${IMPERSONATION_SERVICE_ACCOUNT:-}" ]]; then
# test clouddq with exported service account key
python3 -m clouddq T1_DQ_1_VALUE_NOT_NULL,T2_DQ_1_EMAIL,T3_DQ_1_EMAIL_DUPLICATE configs \
Expand All @@ -148,8 +144,7 @@ if [[ -f "${GOOGLE_APPLICATION_CREDENTIALS:-}" ]]; then
--gcp_service_account_key_path="${GOOGLE_APPLICATION_CREDENTIALS}" \
--gcp_impersonation_credentials="${IMPERSONATION_SERVICE_ACCOUNT}" \
--debug \
--dry_run \
--enable_experimental_dataplex_gcs_validation
--dry_run
fi
fi

Expand All @@ -161,6 +156,5 @@ if [[ -f "${IMPERSONATION_SERVICE_ACCOUNT:-}" ]]; then
--gcp_region_id="${CLOUDDQ_BIGQUERY_REGION}" \
--gcp_impersonation_credentials="${IMPERSONATION_SERVICE_ACCOUNT}" \
--debug \
--dry_run \
--enable_experimental_dataplex_gcs_validation
--dry_run
fi
1 change: 0 additions & 1 deletion tests/integration/test_dataplex_integration_performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ def test_cli_16_rb_10_rules(
f"--gcp_project_id={gcp_project_id}",
f"--gcp_bq_dataset_id={gcp_bq_dataset}",
f"--target_bigquery_summary_table={target_table}",
"--enable_experimental_dataplex_gcs_validation",
"--debug",
]
logger.info(f"Args: {' '.join(args)}")
Expand Down
3 changes: 1 addition & 2 deletions tests/integration/test_dataplex_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,8 +369,7 @@ def test_create_bq_dataplex_task(self,
clouddq_executable_path=clouddq_executable_path,
clouddq_executable_checksum_path=clouddq_executable_checksum_path,
clouddq_pyspark_driver_path=clouddq_pyspark_driver_path,
clouddq_pyspark_driver_filename=clouddq_pyspark_driver_filename,
enable_experimental_dataplex_gcs_validation=True,)
clouddq_pyspark_driver_filename=clouddq_pyspark_driver_filename,)
# Check that the task has been created successfully
print(f"CloudDQ task creation response is {response.text}")
assert response.status_code == 200
Expand Down

0 comments on commit 50fb4f6

Please sign in to comment.