MeltanoLabs · sicarul · Aug 12, 2024 · Aug 12, 2024 · Aug 12, 2024 · Aug 12, 2024
diff --git a/.flake8 b/.flake8
@@ -1,5 +1,5 @@
 [flake8]
-ignore = DAR
+ignore = DAR,W503
 max-line-length = 88
 docstring-convention = google
 per-file-ignores =

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -18,11 +18,11 @@ jobs:
       matrix:
         python-version:
         - "3.8"
-        - "3.9"
-        - "3.10"
-        - "3.11"
-        - "3.12"
-        - "3.13"
+        # - "3.9"
+        # - "3.10"
+        # - "3.11"
+        # - "3.12"
+        # - "3.13"
 
     steps:
     - name: Checkout code
@@ -58,6 +58,6 @@ jobs:
       env:
         TAP_PULUMI_CLOUD_TOKEN: ${{ secrets.TAP_PULUMI_CLOUD_TOKEN }}
         TAP_PULUMI_CLOUD_ORGANIZATIONS: ${{ secrets.TAP_PULUMI_CLOUD_ORGANIZATIONS }}
-        TAP_PULUMI_CLOUD_START_DATE: ${{ secrets.TAP_PULUMI_CLOUD_START_DATE }}
+        TAP_PULUMI_CLOUD_START_DATE: "2023-01-01T00:00:00Z"
       run: |
         nox
diff --git a/meltano.yml b/meltano.yml
@@ -21,6 +21,7 @@ plugins:
       kind: password
       label: API Token
       description: API Token for Pulumi Cloud
+      sensitive: true
     - name: requests_cache.enabled
       kind: boolean
       label: Enable Requests Cache
@@ -29,9 +30,14 @@ plugins:
       kind: object
       label: Requests Cache Config
       description: Configuration for requests cache
-    repository: https://github.com/edgarrmondragon/tap-pulumi-cloud
+    - name: start_date
+      kind: date_iso8601
+      value: 2024-01-01T00:00:00+00:00
+      label: Start Date
+      description: Start date
     config:
       organizations: [meltano]
+    repository: https://github.com/edgarrmondragon/tap-pulumi-cloud
   loaders:
   - name: target-jsonl
     variant: andyh1203

diff --git a/tap_pulumi_cloud/audit_logs.py b/tap_pulumi_cloud/audit_logs.py
@@ -0,0 +1,217 @@
+"""Stream type classes for tap-pulumi-cloud."""
+
+from __future__ import annotations
+
+import typing as t
+from datetime import datetime, timezone
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from requests import Response
+    from singer_sdk.helpers.types import Context
+from singer_sdk import metrics
+from singer_sdk import typing as th
+from singer_sdk.helpers.jsonpath import extract_jsonpath
+from singer_sdk.pagination import BaseAPIPaginator
+
+from tap_pulumi_cloud.client import _OrgPartitionedStream
+
+
+class AuditLogsPaginator(BaseAPIPaginator[t.Optional[str]]):
+    """Paginator class for APIs returning a pagination token in the response body."""
+
+    def __init__(
+        self,
+        jsonpath: str,
+        since: int,
+        *args: t.Any,
+        **kwargs: t.Any,
+    ) -> None:
+        """Create a new paginator.
+
+        Args:
+            jsonpath: A JSONPath expression.
+            since: Start date for the audit logs.
+            args: Paginator positional arguments for base class.
+            kwargs: Paginator keyword arguments for base class.
+        """
+        super().__init__(None, *args, **kwargs)
+        self._jsonpath = jsonpath
+        self._since = since
+
+    def get_next(self, response: Response) -> str | None:
+        """Get the next page token.
+
+        Args:
+            response: API response object.
+
+        Returns:
+            The next page token.
+        """
+        all_matches = extract_jsonpath(self._jsonpath, response.json())
+        matched = next(all_matches, None)
+        if matched is None or int(matched) < self._since:
+            return None
+        return matched
+
+
+class AuditLogs(_OrgPartitionedStream):
+    """Stream Audit Logs."""
+
+    name = "audit_logs"
+    path = "/api/orgs/{org_name}/auditlogs"
+    primary_keys: t.Sequence[str] = ["org_name", "timestamp", "event", "description"]
+    records_jsonpath = "$.auditLogEvents[*]"
+    replication_key = "timestamp"
+    is_sorted = False
+
+    schema = th.PropertiesList(
+        th.Property(
+            "org_name", th.StringType, description="The name of the organization."
+        ),
+        th.Property(
+            "timestamp",
+            th.DateTimeType,
+            description="The timestamp of the audit log event.",
+        ),
+        th.Property(
+            "source_ip",
+            th.StringType,
+            description="The source IP of the audit log event.",
+        ),
+        th.Property(
+            "event", th.StringType, description="The event of the audit log event."
+        ),
+        th.Property(
+            "description",
+            th.StringType,
+            description="The description of the audit log event.",
+        ),
+        th.Property(
+            "user",
+            th.ObjectType(
+                th.Property("name", th.StringType, description="The name of the user."),
+                th.Property(
+                    "github_login",
+                    th.StringType,
+                    description="The GitHub login of the user.",
+                ),
+                th.Property(
+                    "avatar_url",
+                    th.StringType,
+                    description="The avatar URL of the user.",
+                ),
+            ),
+            description="The user of the audit log event.",
+        ),
+        th.Property(
+            "token_id",
+            th.StringType,
+            description="The token id associated with this event.",
+        ),
+        th.Property(
+            "token_name",
+            th.StringType,
+            description="The token name associated with this event.",
+        ),
+        th.Property(
+            "req_org_admin",
+            th.BooleanType,
+            description="Required organization admin role.",
+        ),
+        th.Property(
+            "req_stack_admin", th.BooleanType, description="Required stack admin role."
+        ),
+        th.Property(
+            "auth_failure",
+            th.BooleanType,
+            description="Event was the result of an authentication check failure.",
+        ),
+    ).to_dict()
+
+    def get_new_paginator(self, context: Context | None) -> BaseAPIPaginator:
+        """Get a fresh paginator for this API endpoint.
+
+        Returns:
+            A paginator instance.
+        """
+        return AuditLogsPaginator(
+            self.next_page_token_jsonpath,
+            self.get_starting_timestamp(context).timestamp(),
+        )
+
+    def request_records(self, context: Context | None) -> t.Iterable[dict]:
+        """Request records from REST endpoint(s), returning response records.
+
+        If pagination is detected, pages will be recursed automatically.
+
+        Args:
+            context: Stream partition or context dictionary.
+
+        Yields:
+            An item for every record in the response.
+        """
+        paginator = self.get_new_paginator(context)
+        decorated_request = self.request_decorator(self._request)
+        pages = 0
+
+        with metrics.http_request_counter(self.name, self.path) as request_counter:
+            request_counter.context = context
+
+            while not paginator.finished:
+                prepared_request = self.prepare_request(
+                    context,
+                    next_page_token=paginator.current_value,
+                )
+                resp = decorated_request(prepared_request, context)
+                request_counter.increment()
+                self.update_sync_costs(prepared_request, resp, context)
+                records = iter(self.parse_response(resp))
+                try:
+                    first_record = next(records)
+                except StopIteration:
+                    self.logger.info(
+                        "Pagination stopped after %d pages because no records were "
+                        "found in the last response",
+                        pages,
+                    )
+                    break
+                yield first_record
+                yield from records
+                pages += 1
+
+                paginator.advance(resp)
+
+    def get_url_params(
+        self,
+        context: dict | None,
+        next_page_token: str | None,
+    ) -> dict[str, t.Any]:
+        """Return a dictionary of URL query parameters.
+
+        Args:
+            context: The stream sync context.
+            next_page_token: A token for the next page of results.
+
+        Returns:
+            A dictionary of URL query parameters.
+        """
+        params = {"pageSize": 100}
+        since = round(self.get_starting_timestamp(context).timestamp())
+        if next_page_token:
+            until = int(next_page_token)
+        else:
+            until = round(self.get_replication_key_signpost(context).timestamp())
+        params["startTime"] = since
+        params["endTime"] = until
+        return params
+
+    def post_process(
+        self,
+        row: dict,
+        context: dict | None = None,
+    ) -> dict | None:
+        """Post-process a row of data."""
+        row = super().post_process(row, context) or {}
+        row["timestamp"] = datetime.fromtimestamp(row["timestamp"], tz=timezone.utc)
+        return row
diff --git a/tap_pulumi_cloud/client.py b/tap_pulumi_cloud/client.py
@@ -2,11 +2,16 @@
 
 from __future__ import annotations
 
-from typing import Any
+import typing as t
+from http import HTTPStatus
+from typing import TYPE_CHECKING, Any
 
+if TYPE_CHECKING:
+    import requests
 import humps
 from singer_sdk import RESTStream
 from singer_sdk.authenticators import APIKeyAuthenticator
+from singer_sdk.exceptions import FatalAPIError, RetriableAPIError
 from singer_sdk.helpers._typing import TypeConformanceLevel
 
 
@@ -15,6 +20,7 @@ class PulumiCloudStream(RESTStream):
 
     url_base = "https://api.pulumi.com"
     next_page_token_jsonpath = "$.continuationToken"  # noqa: S105
+    tolerated_http_errors: t.Sequence[int] = []
 
     TYPE_CONFORMANCE_LEVEL = TypeConformanceLevel.ROOT_ONLY
 
@@ -60,7 +66,7 @@ def get_url_params(
         Returns:
             Mapping of URL query parameters.
         """
-        params: dict = {}
+        params: dict = {"pageSize": 100}
         if next_page_token:
             params["continuationToken"] = next_page_token
         return params
@@ -72,3 +78,76 @@ def post_process(
     ) -> dict | None:
         """Post-process a row of data."""
         return humps.decamelize(row)
+
+    def parse_response(self, response: requests.Response) -> t.Iterable[dict]:
+        """Parse the response and return an iterator of result records.
+
+        Args:
+            response: A raw :class:`requests.Response`
+
+        Yields:
+            One item for every item found in the response.
+        """
+        if response.status_code in self.tolerated_http_errors:
+            return []
+        return super().parse_response(response)
+
+    def validate_response(self, response: requests.Response) -> None:
+        """Validate HTTP response.
+
+        Checks for error status codes and whether they are fatal or retriable.
+
+        In case an error is deemed transient and can be safely retried, then this
+        method should raise an :class:`singer_sdk.exceptions.RetriableAPIError`.
+        By default this applies to 5xx error codes, along with values set in:
+        :attr:`~singer_sdk.RESTStream.extra_retry_statuses`
+
+        In case an error is unrecoverable raises a
+        :class:`singer_sdk.exceptions.FatalAPIError`. By default, this applies to
+        4xx errors, excluding values found in:
+        :attr:`~singer_sdk.RESTStream.extra_retry_statuses`
+
+        Tap developers are encouraged to override this method if their APIs use HTTP
+        status codes in non-conventional ways, or if they communicate errors
+        differently (e.g. in the response body).
+
+        .. image:: ../images/200.png
+
+        Args:
+            response: A :class:`requests.Response` object.
+
+        Raises:
+            FatalAPIError: If the request is not retriable.
+            RetriableAPIError: If the request is retriable.
+        """
+        if response.status_code in self.tolerated_http_errors:
+            msg = (
+                f"{response.status_code} Tolerated Status Code "
+                f"(Reason: {response.reason}) for path: {response.request.url}"
+            )
+            self.logger.info(msg)
+            return
+
+        if (
+            response.status_code in self.extra_retry_statuses
+            or response.status_code >= HTTPStatus.INTERNAL_SERVER_ERROR
+        ):
+            msg = self.response_error_message(response)
+            raise RetriableAPIError(msg, response)
+
+        if (
+            HTTPStatus.BAD_REQUEST
+            <= response.status_code
+            < HTTPStatus.INTERNAL_SERVER_ERROR
+        ):
+            msg = self.response_error_message(response)
+            raise FatalAPIError(msg)
+
+
+class _OrgPartitionedStream(PulumiCloudStream):
+    """Base class for streams that are partitioned by organization."""
+
+    @property
+    def partitions(self) -> list[dict] | None:
+        """List of organizations to sync."""
+        return [{"org_name": org} for org in self.config["organizations"]]