Merge pull request #95 from vishnuchalla/telco-splunk

Integrating Splunk for Telco KPIs
cloud-bulldozer · May 23, 2024 · e3c39d5 · e3c39d5
2 parents 309a5a1 + a1f404f
commit e3c39d5
Show file tree

Hide file tree

Showing 16 changed files with 983 additions and 504 deletions.
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # OpenShift Performance Dashboard
 
-## Elasticsearch configuration
+## Backend configuration
 
 ### Requires
 
@@ -15,6 +15,13 @@ indice=
 username=
 password=
 
+[<product>.splunk]
+host=
+port=
+indice=
+username=
+password=
+
 [ocp-server]
 port=8000
 
@@ -30,9 +37,9 @@ password=
 
 [TOML](https://toml.io/en/) is used above, but it also accepts YAML.
 
-The elasticsearch configuration should be set up by product, that way each product can configure their own ES server.
+The backend configuration should be set up by product and its data store type, that way each product can configure their own backend server.
 
-As an example for `OCP` the configuration looks like this:
+As an example for `OCP` with its ES configuration looks like this:
 
 ```toml
 [ocp.elasticsearch]
@@ -41,6 +48,7 @@ indice=
 username=
 password=
 ```
+**Note: The below applies only for the elastic search at the moment**  
 If you also have an archived internal instance that keeps track of older data, it can be specified with '.internal' suffix. Example of our `OCP` internal archived instance's configuration.
 ```toml
 [ocp.elasticsearch.internal]
@@ -136,7 +144,7 @@ $ podman run \
     ocpp-front
 ```
 
-## Integrating to the dashboard
+## ES Integration to the dashboard
 
 To integrate into our dashboard we provide a default set of fields that teams should adhere to. That set would be the one used to display a high level Homepage for all the teams.
 

diff --git a/backend/app/api/api.py b/backend/app/api/api.py
@@ -7,6 +7,7 @@
 from app.api.v1.endpoints.jira import jira
 from app.api.v1.endpoints.quay import quayJobs
 from app.api.v1.endpoints.quay import quayGraphs
+from app.api.v1.endpoints.telco import telcoJobs
 
 router = APIRouter()
 
@@ -22,5 +23,8 @@
 router.include_router(quayJobs.router, tags=['quay'])
 router.include_router(quayGraphs.router, tags=['quay'])
 
+# Telco endpoints
+router.include_router(telcoJobs.router, tags=['telco'])
+
 # Jira endpoints
 router.include_router(jira.router, tags=['jira'])
diff --git a/backend/app/api/v1/commons/example_responses.py b/backend/app/api/v1/commons/example_responses.py
@@ -119,12 +119,35 @@ def response_422():
     ]
 }
 
+telco_response_example ={
+    "startDate": "2023-09-20",
+    "endDate": "2023-09-20",
+    "results": [
+        {
+            "ciSystem": "Jenkins",
+            "uuid": "2cc5d4ca895ca5d84cab0fd7923db93b",
+            "encrypted": "gAAAAABmQALtP0g5UPMsOjQw46tZ-aBz77yl-8QNI4jwLfIEV1POnOlA1ny89cp3Nrik3OzpNwXrWO3K4ZwtOliTfk0SO5NkNZHY8reJhvOVJBGFEw2enyjRaHp9hIaJdE0Vrfuqt_NjiYX-vOZo0Sjc84R76LvxjAC6f_urceGGZICH36IkT2g=",
+            "releaseStream": "Release Candidate",
+            "jobStatus": "success",
+            "buildUrl": "https://ci-jenkins-xxx.com/job/your-tests/532",
+            "startDate": "2024-05-09 14:10:51+00:00",
+            "endDate": "2024-05-09 14:43:51+00:00",
+            "product": "telco",
+            "version": "4.16",
+            "testName": "reboot"
+        },
+    ]
+}
+
 def ocp_200_response():
     return response_200(ocp_response_example)
 
 def quay_200_response():
     return response_200(quay_response_example)
 
+def telco_200_response():
+    return response_200(telco_response_example)
+
 cpt_response_example ={
   "startDate": "2023-11-18",
   "endDate": "2023-11-23",

diff --git a/backend/app/api/v1/commons/hasher.py b/backend/app/api/v1/commons/hasher.py
@@ -0,0 +1,41 @@
+import zlib
+import hashlib
+from cryptography.fernet import Fernet
+
+symmetric_encryptor = b'k3tGwuK6O59c0SEMmnIeJUEpTN5kuxibPy8Q8VfYC6A='
+
+def hash_encrypt_json(json_data):
+    # Serialize the JSON data to a string
+    json_str = str(json_data)
+
+    # Generate an MD5 hash of the JSON string
+    hash_digest = hashlib.md5(json_str.encode()).hexdigest()
+
+    # Compress the JSON string
+    compressed_data = zlib.compress(json_str.encode())
+
+    cipher = Fernet(symmetric_encryptor)
+
+    # Encrypt the compressed JSON string
+    encrypted_data = cipher.encrypt(compressed_data)
+
+    return hash_digest, encrypted_data
+
+def decrypt_unhash_json(hash_digest, encrypted_data):
+    cipher = Fernet(symmetric_encryptor)
+
+    # Decrypt the encrypted JSON data
+    decompressed_data = cipher.decrypt(encrypted_data)
+
+    # Decompress the decrypted data
+    decompressed_json_str = zlib.decompress(decompressed_data).decode()
+
+    # Verify hash digest
+    calculated_hash = hashlib.md5(decompressed_json_str.encode()).hexdigest()
+    if calculated_hash != hash_digest:
+        raise ValueError("Hash digest does not match")
+
+    # Deserialize the JSON string back to JSON data
+    json_data = eval(decompressed_json_str)
+
+    return json_data
diff --git a/backend/app/api/v1/commons/telco.py b/backend/app/api/v1/commons/telco.py
@@ -0,0 +1,61 @@
+from datetime import date
+import pandas as pd
+from app import config
+from app.services.splunk import SplunkService
+import app.api.v1.commons.hasher as hasher
+from datetime import datetime, timezone
+
+
+async def getData(start_datetime: date, end_datetime: date, configpath: str):
+    test_types = ["oslat", "cyclictest", "cpu_util", "deployment", "ptp", "reboot", "rfc-2544"]
+    cfg = config.get_config()
+    try:
+        jenkins_url = cfg.get('telco.config.job_url')
+    except Exception as e:
+        print(f"Error reading telco configuration: {e}")
+    test_type_execution_times = {
+        "oslat": 3720,
+        "cyclictest": 3720,
+        "cpu_util": 6600,
+        "deployment": 3720,
+        "ptp": 4200,
+        "reboot": 1980,
+        "rfc-2544": 5580,
+    }
+    query = {
+        "earliest_time": "{}T00:00:00".format(start_datetime.strftime('%Y-%m-%d')),
+        "latest_time": "{}T23:59:59".format(end_datetime.strftime('%Y-%m-%d')),
+        "output_mode": "json"
+    }
+    searchList = ' OR '.join(['test_type="{}"'.format(test_type) for test_type in test_types])
+    splunk = SplunkService(configpath=configpath)
+    response = await splunk.query(query=query, searchList=searchList)
+    mapped_list = []
+
+    for each_response in response:
+        end_timestamp = int(each_response['timestamp'])
+        test_data = each_response['data']
+        hash_digest, encrypted_data = hasher.hash_encrypt_json(each_response)
+        execution_time_seconds = test_type_execution_times.get(test_data['test_type'], 0)
+        start_timestamp = end_timestamp - execution_time_seconds
+        start_time_utc = datetime.fromtimestamp(start_timestamp, tz=timezone.utc)
+        end_time_utc = datetime.fromtimestamp(end_timestamp, tz=timezone.utc)
+
+        mapped_list.append({
+            "uuid": hash_digest,
+            "encryptedData": encrypted_data.decode('utf-8'),
+            "ciSystem": "Jenkins",
+            "testName": test_data['test_type'],
+            "version": test_data['ocp_version'],
+            "releaseStream": test_data['ocp_build'],
+            "startDate": str(start_time_utc),
+            "endDate": str(end_time_utc),
+            "buildUrl": jenkins_url + "/" + str(test_data['cluster_artifacts']['ref']['jenkins_build']),
+            "jobStatus": "success"
+        })
+
+    jobs = pd.json_normalize(mapped_list)
+    if len(jobs) == 0:
+        return jobs
+
+    return jobs
diff --git a/backend/app/api/v1/commons/utils.py b/backend/app/api/v1/commons/utils.py
@@ -49,4 +49,23 @@ def clasifyAWSJobs(job):
 def getBuild(job):
     releaseStream = job["releaseStream"] + "-"
     ocpVersion = job["ocpVersion"]
-    return ocpVersion.replace(releaseStream, "")
+    return ocpVersion.replace(releaseStream, "")
+
+def getReleaseStream(row):
+    if row["releaseStream"].__contains__("fast"):
+        return "Fast"
+    elif row["releaseStream"].__contains__("stable"):
+        return "Stable"
+    elif row["releaseStream"].__contains__("eus"):
+        return "EUS"
+    elif row["releaseStream"].__contains__("candidate"):
+        return "Release Candidate"
+    elif row["releaseStream"].__contains__("rc"):
+        return "Release Candidate"
+    elif row["releaseStream"].__contains__("nightly"):
+        return "Nightly"
+    elif row["releaseStream"].__contains__("ci"):
+        return "ci"
+    elif row["releaseStream"].__contains__("ec"):
+        return "Engineering Candidate"
+    return "Stable"
diff --git a/backend/app/api/v1/endpoints/cpt/cptJobs.py b/backend/app/api/v1/endpoints/cpt/cptJobs.py
@@ -6,6 +6,7 @@
 from .maps.ocp import ocpMapper
 from .maps.quay import quayMapper
 from .maps.hce import hceMapper
+from .maps.telco import telcoMapper
 from ...commons.example_responses import cpt_200_response, response_422
 from fastapi.param_functions import Query
 
@@ -14,7 +15,8 @@
 products = {
             "ocp": ocpMapper,
             "quay": quayMapper,
-            "hce": hceMapper
+            "hce": hceMapper,
+            "telco": telcoMapper,
            }
 
 @router.get('/api/v1/cpt/jobs',
@@ -43,7 +45,7 @@ async def jobs(start_date: date = Query(None, description="Start date for search
     results = pd.DataFrame()
     for product in products:
         try:
-            df = await products[product](start_date, end_date, f'{product}.elasticsearch')
+            df = await products[product](start_date, end_date)
             results = pd.concat([results, df.loc[:, ["ciSystem", "uuid", "releaseStream", "jobStatus", "buildUrl", "startDate", "endDate", "product", "version", "testName"]]])
         except ConnectionError:
             print("Connection Error in mapper for product " + product)

diff --git a/backend/app/api/v1/endpoints/cpt/maps/hce.py b/backend/app/api/v1/endpoints/cpt/maps/hce.py
@@ -15,8 +15,8 @@
 #   "version"
 #   "testName"
 ################################################################
-async def hceMapper(start_datetime: date, end_datetime: date, configpath: str):
-    df = await getData(start_datetime, end_datetime, configpath)
+async def hceMapper(start_datetime: date, end_datetime: date):
+    df = await getData(start_datetime, end_datetime, f'hce.elasticsearch')
     df["releaseStream"] = "Nightly"
     df["ciSystem"] = "Jenkins"
     df["testName"] = df["product"] + ":" + df["test"]

diff --git a/backend/app/api/v1/endpoints/cpt/maps/ocp.py b/backend/app/api/v1/endpoints/cpt/maps/ocp.py
@@ -1,34 +1,15 @@
 from ....commons.ocp import getData
+from ....commons.utils import getReleaseStream
 from datetime import date
 
 
 ################################################################
 # This will return a DataFrame from OCP required by the CPT endpoint
 ################################################################
-async def ocpMapper(start_datetime: date, end_datetime: date, configpath: str):
-    df = await getData(start_datetime, end_datetime, configpath)
+async def ocpMapper(start_datetime: date, end_datetime: date):
+    df = await getData(start_datetime, end_datetime, f'ocp.elasticsearch')
     df.insert(len(df.columns), "product", "ocp")
     df["releaseStream"] = df.apply(getReleaseStream, axis=1)
     df["version"] = df["shortVersion"]
     df["testName"] = df["benchmark"]
     return df
-
-
-def getReleaseStream(row):
-    if row["releaseStream"].__contains__("fast"):
-        return "Fast"
-    elif row["releaseStream"].__contains__("stable"):
-        return "Stable"
-    elif row["releaseStream"].__contains__("eus"):
-        return "EUS"
-    elif row["releaseStream"].__contains__("candidate"):
-        return "Release Candidate"
-    elif row["releaseStream"].__contains__("rc"):
-        return "Release Candidate"
-    elif row["releaseStream"].__contains__("nightly"):
-        return "Nightly"
-    elif row["releaseStream"].__contains__("ci"):
-        return "ci"
-    elif row["releaseStream"].__contains__("ec"):
-        return "Engineering Candidate"
-    return "Stable"
diff --git a/backend/app/api/v1/endpoints/cpt/maps/quay.py b/backend/app/api/v1/endpoints/cpt/maps/quay.py
@@ -5,9 +5,9 @@
 #####################################################################
 # This will return a DataFrame from Quay required by the CPT endpoint
 #####################################################################
-async def quayMapper(start_datetime: date, end_datetime: date, configpath: str):
-    df = await getData(start_datetime, end_datetime, configpath)
+async def quayMapper(start_datetime: date, end_datetime: date):
+    df = await getData(start_datetime, end_datetime, f'quay.elasticsearch')
     df.insert(len(df.columns), "product", "quay")
     df["version"] = df["releaseStream"]
     df["testName"] = df["benchmark"]
-    return df
+    return df
diff --git a/backend/app/api/v1/endpoints/cpt/maps/telco.py b/backend/app/api/v1/endpoints/cpt/maps/telco.py
@@ -0,0 +1,13 @@
+from ....commons.telco import getData
+from ....commons.utils import getReleaseStream
+from datetime import date
+
+
+#####################################################################
+# This will return a DataFrame from Telco required by the CPT endpoint
+#####################################################################
+async def telcoMapper(start_datetime: date, end_datetime: date):
+    df = await getData(start_datetime, end_datetime, f'telco.splunk')
+    df.insert(len(df.columns), "product", "telco")
+    df["releaseStream"] = df.apply(getReleaseStream, axis=1)
+    return df
diff --git a/backend/app/api/v1/endpoints/telco/telcoJobs.py b/backend/app/api/v1/endpoints/telco/telcoJobs.py
@@ -0,0 +1,56 @@
+import json
+from fastapi import Response
+from datetime import datetime, timedelta, date
+from fastapi import APIRouter
+from ...commons.telco import getData
+from ...commons.example_responses import telco_200_response, response_422
+from fastapi.param_functions import Query
+
+router = APIRouter()
+
+
+
+@router.get('/api/v1/telco/jobs',
+            summary="Returns a job list",
+            description="Returns a list of jobs in the specified dates. \
+            If not dates are provided the API will default the values. \
+            `startDate`: will be set to the day of the request minus 5 days.\
+            `endDate`: will be set to the day of the request.",
+            responses={
+                200: telco_200_response(),
+                422: response_422(),
+        },)
+async def jobs(start_date: date = Query(None, description="Start date for searching jobs, format: 'YYYY-MM-DD'", examples=["2020-11-10"]),
+                end_date: date = Query(None, description="End date for searching jobs, format: 'YYYY-MM-DD'", examples=["2020-11-15"]),
+                pretty: bool = Query(False, description="Output content in pretty format.")):
+    if start_date is None:
+        start_date = datetime.utcnow().date()
+        start_date = start_date - timedelta(days=7)
+
+    if end_date is None:
+        end_date = datetime.utcnow().date()
+
+    if start_date > end_date:
+        return Response(content=json.dumps({'error': "invalid date format, start_date must be less than end_date"}), status_code=422)
+
+    results = await getData(start_date, end_date, 'telco.splunk')
+
+    if len(results) >= 1 :
+        response = {
+            'startDate': start_date.__str__(),
+            'endDate': end_date.__str__(),
+            'results': results.to_dict('records')
+        }
+    else :
+        response = {
+            'startDate': start_date.__str__(),
+            'endDate': end_date.__str__(),
+            'results': []
+        }
+
+    if pretty:
+        json_str = json.dumps(response, indent=4)
+        return Response(content=json_str, media_type='application/json')
+
+    jsonstring = json.dumps(response)
+    return jsonstring