From fa0dc96b0d1517893b6299a865ae32840fb6631e Mon Sep 17 00:00:00 2001 From: Adam Sachs Date: Mon, 27 Jan 2025 14:53:00 -0500 Subject: [PATCH] add website connector type --- ...961528edfc6_add_website_connection_type.py | 108 ++++++++++++++++++ src/fides/api/models/connectionconfig.py | 3 + .../connection_configuration/__init__.py | 5 + .../connection_secrets_website.py | 14 +++ .../enums/system_type.py | 1 + .../service/connectors/website_connector.py | 81 +++++++++++++ 6 files changed, 212 insertions(+) create mode 100644 src/fides/api/alembic/migrations/versions/c961528edfc6_add_website_connection_type.py create mode 100644 src/fides/api/schemas/connection_configuration/connection_secrets_website.py create mode 100644 src/fides/api/service/connectors/website_connector.py diff --git a/src/fides/api/alembic/migrations/versions/c961528edfc6_add_website_connection_type.py b/src/fides/api/alembic/migrations/versions/c961528edfc6_add_website_connection_type.py new file mode 100644 index 0000000000..ebdcaafa7c --- /dev/null +++ b/src/fides/api/alembic/migrations/versions/c961528edfc6_add_website_connection_type.py @@ -0,0 +1,108 @@ +"""add website connection type + +Revision ID: c961528edfc6 +Revises: 021166731846 +Create Date: 2025-01-27 19:20:20.236548 + +""" + +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision = "c961528edfc6" +down_revision = "021166731846" +branch_labels = None +depends_on = None + + +def upgrade(): + # Add 'datahub' to ConnectionType enum + op.execute("ALTER TYPE connectiontype RENAME TO connectiontype_old") + op.execute( + """ + CREATE TYPE connectiontype AS ENUM ( + 'mongodb', + 'mysql', + 'https', + 'snowflake', + 'redshift', + 'mssql', + 'mariadb', + 'bigquery', + 'saas', + 'manual', + 'manual_webhook', + 'timescale', + 'fides', + 'sovrn', + 'attentive_email', + 'dynamodb', + 'postgres', + 'generic_consent_email', + 'generic_erasure_email', + 'scylla', + 's3', + 'google_cloud_sql_mysql', + 'google_cloud_sql_postgres', + 'dynamic_erasure_email', + 'rds_mysql', + 'rds_postgres', + 'datahub', + 'website' + ) + """ + ) + op.execute( + """ + ALTER TABLE connectionconfig ALTER COLUMN connection_type TYPE connectiontype USING + connection_type::text::connectiontype + """ + ) + op.execute("DROP TYPE connectiontype_old") + + +def downgrade(): + # Remove 'datahub' from ConnectionType enum + op.execute("DELETE FROM connectionconfig WHERE connection_type IN ('datahub')") + op.execute("ALTER TYPE connectiontype RENAME TO connectiontype_old") + op.execute( + """ + CREATE TYPE connectiontype AS ENUM ( + 'mongodb', + 'mysql', + 'https', + 'snowflake', + 'redshift', + 'mssql', + 'mariadb', + 'bigquery', + 'saas', + 'manual', + 'manual_webhook', + 'timescale', + 'fides', + 'sovrn', + 'attentive_email', + 'dynamodb', + 'postgres', + 'generic_consent_email', + 'generic_erasure_email', + 'scylla', + 's3', + 'google_cloud_sql_mysql', + 'google_cloud_sql_postgres', + 'dynamic_erasure_email', + 'rds_mysql', + 'rds_postgres', + 'datahub' + ) + """ + ) + op.execute( + """ + ALTER TABLE connectionconfig ALTER COLUMN connection_type TYPE connectiontype USING + connection_type::text::connectiontype + """ + ) + op.execute("DROP TYPE connectiontype_old") diff --git a/src/fides/api/models/connectionconfig.py b/src/fides/api/models/connectionconfig.py index 6775396734..f04ff41ab7 100644 --- a/src/fides/api/models/connectionconfig.py +++ b/src/fides/api/models/connectionconfig.py @@ -67,6 +67,7 @@ class ConnectionType(enum.Enum): snowflake = "snowflake" sovrn = "sovrn" timescale = "timescale" + website = "website" @property def human_readable(self) -> str: @@ -101,6 +102,7 @@ def human_readable(self) -> str: ConnectionType.snowflake.value: "Snowflake", ConnectionType.sovrn.value: "Sovrn", ConnectionType.timescale.value: "TimescaleDB", + ConnectionType.website.value: "Website", } try: return readable_mapping[self.value] @@ -143,6 +145,7 @@ def system_type(self) -> "SystemType": ConnectionType.snowflake.value: SystemType.database, ConnectionType.sovrn.value: SystemType.email, ConnectionType.timescale.value: SystemType.database, + ConnectionType.website.value: SystemType.website, } try: diff --git a/src/fides/api/schemas/connection_configuration/__init__.py b/src/fides/api/schemas/connection_configuration/__init__.py index a8e0f94d67..672e02eaf2 100644 --- a/src/fides/api/schemas/connection_configuration/__init__.py +++ b/src/fides/api/schemas/connection_configuration/__init__.py @@ -148,6 +148,9 @@ from fides.api.schemas.connection_configuration.connection_secrets_timescale import ( TimescaleSchema as TimescaleSchema, ) +from fides.api.schemas.connection_configuration.connection_secrets_website import ( + WebsiteSchema, +) from fides.api.schemas.connection_configuration.connections_secrets_https import ( HttpsSchema as HttpsSchema, ) @@ -180,6 +183,7 @@ ConnectionType.snowflake.value: SnowflakeSchema, ConnectionType.sovrn.value: SovrnSchema, ConnectionType.timescale.value: TimescaleSchema, + ConnectionType.website.value: WebsiteSchema, } @@ -237,4 +241,5 @@ def get_connection_secrets_schema( SnowflakeDocsSchema, SovrnDocsSchema, TimescaleDocsSchema, + WebsiteSchema, ] diff --git a/src/fides/api/schemas/connection_configuration/connection_secrets_website.py b/src/fides/api/schemas/connection_configuration/connection_secrets_website.py new file mode 100644 index 0000000000..fb42557695 --- /dev/null +++ b/src/fides/api/schemas/connection_configuration/connection_secrets_website.py @@ -0,0 +1,14 @@ +from fideslang.validation import AnyHttpUrlString +from pydantic import BaseModel + +from fides.api.schemas.base_class import NoValidationSchema + + +class WebsiteSchema(BaseModel): + """Schema to validate the secrets needed for a generic website connector""" + + url: AnyHttpUrlString + + +class WebsiteDocsScehma(WebsiteSchema, NoValidationSchema): + """Website Secrets Schema for API Docs""" diff --git a/src/fides/api/schemas/connection_configuration/enums/system_type.py b/src/fides/api/schemas/connection_configuration/enums/system_type.py index dad517822e..9d7430e58c 100644 --- a/src/fides/api/schemas/connection_configuration/enums/system_type.py +++ b/src/fides/api/schemas/connection_configuration/enums/system_type.py @@ -7,3 +7,4 @@ class SystemType(Enum): email = "email" manual = "manual" saas = "saas" + website = "website" diff --git a/src/fides/api/service/connectors/website_connector.py b/src/fides/api/service/connectors/website_connector.py new file mode 100644 index 0000000000..4108ba7b93 --- /dev/null +++ b/src/fides/api/service/connectors/website_connector.py @@ -0,0 +1,81 @@ +from typing import Any, Dict, List, Optional + +from loguru import logger +from requests import head + +from fides.api.common_exceptions import ConnectionException +from fides.api.graph.execution import ExecutionNode +from fides.api.models.connectionconfig import ConnectionTestStatus +from fides.api.models.policy import Policy +from fides.api.models.privacy_request import PrivacyRequest, RequestTask +from fides.api.service.connectors.base_connector import BaseConnector +from fides.api.service.connectors.query_configs.query_config import QueryConfig +from fides.api.util.collection_util import Row + + +class WebsiteConnector(BaseConnector): + """ + Website connector, used currently for Website 'monitoring' - this is class is used to test basic connections to the website + + NOTE: No DSR processing is supported for Website connectors. + """ + + def create_client(self) -> Any: # type: ignore + """Returns a client for the website""" + + def query_config(self, node: ExecutionNode) -> QueryConfig[Any]: + """DSR execution not supported for Website connectors""" + raise NotImplementedError() + + def test_connection(self) -> Optional[ConnectionTestStatus]: + """ + Validates the connection to the website by executing a `HEAD` request against the provided URL. + + TODO: can we perform a better validation by pinging the website from the web monitor proxy? + TODO: can we validate credentials somehow when we support them? + """ + website_url = self.configuration.secrets.get("url") + logger.info( + "Starting test connection to connector '{}', at URL '{}'", + self.configuration.key, + website_url, + ) + try: + response = head(self.configuration.secrets["url"]) + except Exception as error: + raise ConnectionException(str(error)) + + if response.status_code >= 400: + raise ConnectionException( + f"HEAD request to '{website_url}' resulted in error status code: '{response.status_code}'" + ) + + logger.info("Connection to '{}' succeeded", website_url) + + return ConnectionTestStatus.succeeded + + def retrieve_data( + self, + node: ExecutionNode, + policy: Policy, + privacy_request: PrivacyRequest, + request_task: RequestTask, + input_data: Dict[str, List[Any]], + ) -> List[Row]: + """DSR execution not supported for website connector""" + return [] + + def mask_data( + self, + node: ExecutionNode, + policy: Policy, + privacy_request: PrivacyRequest, + request_task: RequestTask, + rows: List[Row], + ) -> int: + """DSR execution not supported for website connector""" + return 0 + + def close(self) -> None: + """Close any held resources""" + # no held resources for website connector