[AAP-39884] - Improve code coverage and fix some minor issues in anal…

…ytics collection (#1207) https://issues.redhat.com/browse/AAP-39884 This PR covers: 1. Add more Pytest cases to improve code coverage for analytics collection 2. Fix errors when the audit_rule table is empty ``` Traceback (most recent call last): File "/home/runner/.cache/pypoetry/virtualenvs/aap-eda-mUpJWeoH-py3.11/lib/python3.11/site-packages/insights_analytics_collector/collection.py", line 59, in gather result = self.fnc_collecting( ^^^^^^^^^^^^^^^^^^^^ File "/home/runner/work/eda-server/eda-server/src/aap_eda/analytics/analytics_collectors.py", line 289, in audit_rules_table audit_rules = _get_audit_rule_qs(since, until).values( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/runner/.cache/pypoetry/virtualenvs/aap-eda-mUpJWeoH-py3.11/lib/python3.11/site-packages/django/db/models/query.py", line 1313, in values clone = self._values(*fields, **expressions) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/runner/.cache/pypoetry/virtualenvs/aap-eda-mUpJWeoH-py3.11/lib/python3.11/site-packages/django/db/models/query.py", line 1308, in _values clone.query.set_values(fields) File "/home/runner/.cache/pypoetry/virtualenvs/aap-eda-mUpJWeoH-py3.11/lib/python3.11/site-packages/django/db/models/sql/query.py", line 2463, in set_values self.add_fields(field_names, True) File "/home/runner/.cache/pypoetry/virtualenvs/aap-eda-mUpJWeoH-py3.11/lib/python3.11/site-packages/django/db/models/sql/query.py", line 2195, in add_fields raise FieldError( django.core.exceptions.FieldError: Cannot resolve keyword 'created_at' into field. Choices are: activation, activation_id, activation_pod_id, activationinstancejobinstance, auditrule, ended_at, git_hash, id, log_read_at, name, organization, organization_id, parent_type, rulebookprocesslog, rulebookprocessqueue, started_at, status, status_message, updated_at ``` 3. Fix the previous two failed Pytest cases: ``` FAILED tests/integration/analytics/test_gather_analytics.py::test_gather_analytics_invalid_settings[https://url-True-Analytics collection is done] - requests.exceptions.ConnectionError: HTTPSConnectionPool(host='url', port=443): Max retries exceeded with url: / (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f9acaca9950>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution')) FAILED tests/integration/analytics/test_gather_analytics.py::test_gather_analytics_command[--ship-INFO-Analytics collection is done] - requests.exceptions.ConnectionError: HTTPSConnectionPool(host='url', port=443): Max retries exceeded with url: / (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f9acb3f0a90>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution')) ====== 2 failed, 922 passed, 24 skipped, 15 warnings in 391.17s (0:06:31) ====== ```
ansible · Feb 13, 2025 · f71b9f8 · f71b9f8
1 parent 58ce34c
commit f71b9f8
Show file tree

Hide file tree

Showing 13 changed files with 822 additions and 165 deletions.
diff --git a/src/aap_eda/analytics/analytics_collectors.py b/src/aap_eda/analytics/analytics_collectors.py
@@ -58,6 +58,7 @@
     config=True,
 )
 def config(**kwargs) -> dict:
+    os_info = f"{distro.name(pretty=True)} {distro.version(pretty=True)}"
     install_type = "traditional"
     if os.environ.get("container") == "oci":
         install_type = "openshift"
@@ -67,7 +68,7 @@ def config(**kwargs) -> dict:
         "install_uuid": service_id(),
         "platform": {
             "system": platform.system(),
-            "dist": distro.linux_distribution(),
+            "dist": os_info,
             "release": platform.release(),
             "type": install_type,
         },
@@ -832,7 +833,7 @@ def _get_audit_rule_qs(since: datetime, until: datetime):
     )
 
     if len(activation_instance_ids) == 0:
-        return models.RulebookProcess.objects.none()
+        return models.AuditRule.objects.none()
 
     if len(activation_instance_ids) == 1:
         audit_rules = models.AuditRule.objects.filter(

diff --git a/src/aap_eda/analytics/collector.py b/src/aap_eda/analytics/collector.py
@@ -47,26 +47,26 @@ def _is_valid_license(self) -> bool:
         # ignore license information checking for now
         return True
 
-    def _last_gathering(self) -> Optional[str]:
-        self.logger.info(
-            "Last gather: "
-            f"{application_settings.AUTOMATION_ANALYTICS_LAST_GATHER}"
-        )
-
-        return (
-            datetime.fromisoformat(
-                application_settings.AUTOMATION_ANALYTICS_LAST_GATHER
+    def _last_gathering(self) -> Optional[datetime]:
+        last_gather = application_settings.AUTOMATION_ANALYTICS_LAST_GATHER
+        if not last_gather:
+            return None
+
+        self.logger.info(f"Last gather: {last_gather}")
+        return datetime.fromisoformat(last_gather)
+
+    def _load_last_gathered_entries(self) -> dict:
+        try:
+            last_entries = (
+                application_settings.AUTOMATION_ANALYTICS_LAST_ENTRIES
             )
-            if bool(application_settings.AUTOMATION_ANALYTICS_LAST_GATHER)
-            else None
-        )
-
-    def _load_last_gathered_entries(self) -> str:
-        last_entries = application_settings.AUTOMATION_ANALYTICS_LAST_ENTRIES
-        last_entries = last_entries.replace("'", '"')
-        self.logger.info(f"Last collect entries: {last_entries}")
+            last_entries = last_entries.replace("'", '"')
+            self.logger.info(f"Last collect entries: {last_entries}")
 
-        return json.loads(last_entries, object_hook=utils.datetime_hook)
+            return json.loads(last_entries, object_hook=utils.datetime_hook)
+        except (json.JSONDecodeError, TypeError) as e:
+            self.logger.error(f"Failed to load last entries: {str(e)}")
+            return {}
 
     def _save_last_gathered_entries(self, last_gathered_entries: dict) -> None:
         application_settings.AUTOMATION_ANALYTICS_LAST_ENTRIES = json.dumps(

diff --git a/src/aap_eda/analytics/package.py b/src/aap_eda/analytics/package.py
@@ -11,69 +11,96 @@
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
+import logging
+
 from django.conf import settings
 from insights_analytics_collector import Package as InsightsAnalyticsPackage
 
 from aap_eda.conf import application_settings
 
+logger = logging.getLogger(__name__)
+
 
 class MissingUserPasswordError(Exception):
+    """Raised when required user credentials are missing."""
+
     pass
 
 
 class Package(InsightsAnalyticsPackage):
+    """Handles packaging and shipping analytics data to Red Hat services.
+
+    Attributes:
+        PAYLOAD_CONTENT_TYPE: MIME type for the analytics payload
+        USER_AGENT: Identifier for the analytics client
+        CREDENTIAL_SOURCES: Priority list of credential configurations
+    """
+
     PAYLOAD_CONTENT_TYPE = (
         "application/vnd.redhat.aap-event-driven-ansible.filename+tgz"
     )
+    USER_AGENT = "EDA-metrics-agent"
     CERT_PATH = settings.INSIGHTS_CERT_PATH
+    CREDENTIAL_SOURCES = [
+        ("REDHAT", ("REDHAT_USERNAME", "REDHAT_PASSWORD")),
+        (
+            "SUBSCRIPTIONS",
+            ("SUBSCRIPTIONS_USERNAME", "SUBSCRIPTIONS_PASSWORD"),
+        ),
+    ]
 
     def _tarname_base(self) -> str:
         timestamp = self.collector.gather_until
         return f'eda-analytics-{timestamp.strftime("%Y-%m-%d-%H%M%S%z")}'
 
     def get_ingress_url(self) -> str:
-        return application_settings.AUTOMATION_ANALYTICS_URL
+        return (
+            application_settings.AUTOMATION_ANALYTICS_URL
+            or settings.AUTOMATION_ANALYTICS_URL
+        )
 
     def shipping_auth_mode(self) -> str:
         return settings.AUTOMATION_AUTH_METHOD
 
     def _get_rh_user(self) -> str:
         self._check_users()
-        user_name = (
+        return settings.REDHAT_USERNAME or (
             application_settings.REDHAT_USERNAME
             or application_settings.SUBSCRIPTIONS_USERNAME
         )
 
-        return user_name
-
     def _get_rh_password(self) -> str:
         self._check_users()
-        user_password = (
+        return settings.REDHAT_PASSWORD or (
             application_settings.REDHAT_PASSWORD
             or application_settings.SUBSCRIPTIONS_PASSWORD
         )
 
-        return user_password
-
     def _get_http_request_headers(self) -> dict:
-        return {
+        headers = {
             "Content-Type": self.PAYLOAD_CONTENT_TYPE,
-            "User-Agent": "EDA-metrics-agent",
+            "User-Agent": self.USER_AGENT,
         }
+        if hasattr(settings, "EDA_VERSION"):
+            headers["X-EDA-Version"] = settings.EDA_VERSION
+        return headers
 
     def _check_users(self) -> None:
-        if (
-            application_settings.REDHAT_USERNAME
-            and application_settings.REDHAT_PASSWORD
-        ):
-            return
-
-        if (
-            application_settings.SUBSCRIPTIONS_USERNAME
-            and application_settings.SUBSCRIPTIONS_PASSWORD
-        ):
-            return
-
-        raise MissingUserPasswordError(
-            "User information is missing in application settings"
+        """Validate at least one set of credentials is fully configured.
+
+        Raises:
+            MissingUserPasswordError: If no complete credential pairs are found
+        """
+        has_valid_creds = any(
+            getattr(source, user_key, None) and getattr(source, pass_key, None)
+            for source in (application_settings, settings)
+            for _, (user_key, pass_key) in self.CREDENTIAL_SOURCES
         )
+
+        if not has_valid_creds:
+            logger.error(
+                "Missing required credentials in application settings"
+            )
+            raise MissingUserPasswordError(
+                "Valid user credentials not found in settings"
+            )
diff --git a/src/aap_eda/analytics/utils.py b/src/aap_eda/analytics/utils.py
@@ -12,84 +12,118 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 
-import base64
 import logging
 import re
 from typing import Optional, Tuple
 
 import requests
 import yaml
 from django.utils.dateparse import parse_datetime
+from requests.auth import AuthBase, HTTPBasicAuth
 
 from aap_eda.core import enums, models
-from aap_eda.utils import str_to_bool
 
 logger = logging.getLogger("aap_eda.analytics")
 
 
+class TokenAuth(AuthBase):
+    def __init__(self, token: str):
+        self.token = token
+
+    def __call__(self, r):
+        r.headers["Authorization"] = f"Bearer {self.token}"
+        return r
+
+
 def datetime_hook(dt: dict) -> dict:
     new_dt = {}
     for key, value in dt.items():
         try:
-            new_dt[key] = parse_datetime(value)
-        except TypeError:
+            new_dt[key] = parse_datetime(value) or value
+        except (TypeError, ValueError):
             new_dt[key] = value
     return new_dt
 
 
 def collect_controllers_info() -> dict:
-    aap_credentia_type = models.CredentialType.objects.get(
+    aap_credential_type = models.CredentialType.objects.get(
         name=enums.DefaultCredentialType.AAP
     )
     credentials = models.EdaCredential.objects.filter(
-        credential_type=aap_credentia_type
+        credential_type=aap_credential_type
     )
     info = {}
-    for credential in credentials:
-        controller_info = {}
-        inputs = yaml.safe_load(credential.inputs.get_secret_value())
-        host = inputs["host"]
-        url = f"{host}/api/v2/ping/"
-        verify = inputs.get("verify_ssl", False)
-        if isinstance(verify, str):
-            verify = str_to_bool(verify)
-
-        token = inputs.get("oauth_token")
-
-        controller_info["credential_id"] = credential.id
-        controller_info["inputs"] = inputs
-        if token:
-            headers = {"Authorization": f"Bearer {token}"}
-            logger.info("Use Bearer token to ping the controller.")
-        else:
-            user_pass = f"{inputs.get('username')}:{inputs.get('password')}"
-            auth_value = (
-                f"Basic {base64.b64encode(user_pass.encode()).decode()}"
-            )
-            headers = {"Authorization": f"{auth_value}"}
-            logger.info("Use Basic authentication to ping the controller.")
 
+    for credential in credentials:
         try:
-            resp = requests.get(url, headers=headers, verify=verify)
-            resp_json = resp.json()
-            controller_info["install_uuid"] = resp_json["install_uuid"]
-
-            info[host] = controller_info
+            inputs = yaml.safe_load(credential.inputs.get_secret_value())
+            host = inputs["host"].removesuffix("/api/controller/")
+            if not info.get(host):
+                url = f"{host}/api/v2/ping/"
+                auth = _get_auth(inputs)
+                verify = inputs.get("verify_ssl", False)
+
+                controller_info = {
+                    "credential_id": credential.id,
+                    "inputs": inputs,
+                }
+
+                # quickly to retrieve controller's info. timeout=3
+                resp = requests.get(url, auth=auth, verify=verify, timeout=3)
+                resp.raise_for_status()
+                controller_info["install_uuid"] = resp.json()["install_uuid"]
+                info[host] = controller_info
+
+        except KeyError as e:
+            logger.error(f"Missing key in credential inputs: {e}")
+            continue
+        except yaml.YAMLError as e:
+            logger.error(
+                f"YAML parsing error for credential {credential.id}: {e}"
+            )
+            continue
         except requests.exceptions.RequestException as e:
             logger.warning(
-                "Failed to connect with controller using credential "
-                f"{credential.name}: {e}"
+                f"Controller connection failed for {credential.name}: {e}"
             )
+            continue
+        except Exception as e:
+            logger.exception(
+                f"Unexpected error processing credential {credential.id}: {e}"
+            )
+            continue
 
     return info
 
 
+def _get_auth(inputs: dict) -> AuthBase:
+    # priority：Token > Basic Auth
+    if token := inputs.get("oauth_token"):
+        logger.debug("Use Bearer authentication")
+        return TokenAuth(token)
+
+    username = inputs.get("username")
+    password = inputs.get("password")
+    if username and password:
+        logger.debug("Use Basic authentication")
+        return HTTPBasicAuth(username, password)
+
+    raise ValueError(
+        "Invalid authentication configuration, must provide "
+        "Token or username/password"
+    )
+
+
 def extract_job_details(
     url: str,
     controllers_info: dict,
 ) -> Tuple[Optional[str], Optional[str], Optional[str]]:
     for host, info in controllers_info.items():
-        if not url.startswith(host):
+        if not url.lower().startswith(host.lower()):
+            continue
+
+        install_uuid = info.get("install_uuid")
+        if not install_uuid:
             continue
 
         pattern = r"/jobs/([a-zA-Z]+)/(\d+)/"
@@ -104,6 +138,6 @@ def extract_job_details(
                 else "run_workflow_template"
             )
             job_number = match.group(2)
-            return job_type, str(job_number), info["install_uuid"]
+            return job_type, str(job_number), install_uuid
 
     return None, None, None
diff --git a/src/aap_eda/settings/default.py b/src/aap_eda/settings/default.py
@@ -845,3 +845,6 @@ def get_rulebook_process_log_level() -> RulebookProcessLogLevel:
 # Available methods:
 # https://github.com/RedHatInsights/insights-analytics-collector/blob/main/insights_analytics_collector/package.py#L27
 AUTOMATION_AUTH_METHOD = settings.get("AUTOMATION_AUTH_METHOD", "user-pass")
+INSIGHTS_TRACKING_STATE = settings.get("INSIGHTS_TRACKING_STATE", True)
+REDHAT_USERNAME = settings.get("REDHAT_USERNAME", "")
+REDHAT_PASSWORD = settings.get("REDHAT_PASSWORD", "")