From 0e054d7bf3d3994f2651fcd8d258c7d58b294073 Mon Sep 17 00:00:00 2001
From: "Joe Talerico (rook)" <jtaleric@redhat.com>
Date: Thu, 11 Jan 2024 15:42:01 -0500
Subject: [PATCH] Create pylint action and cleanup

Cleaning up
---
 .github/workflows/pylint.yml |  23 ++++
 fmatch/matcher.py            | 232 +++++++++++++++++++++--------------
 fmatch/test_fmatch.py        |  68 +++++-----
 3 files changed, 205 insertions(+), 118 deletions(-)
 create mode 100644 .github/workflows/pylint.yml

diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
new file mode 100644
index 0000000..11d3344
--- /dev/null
+++ b/.github/workflows/pylint.yml
@@ -0,0 +1,23 @@
+name: Pylint
+
+on: [push]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v3
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install pylint
+    - name: Analysing the code with pylint
+      run: |
+        pylint -d C0103 $(git ls-files '*.py')
diff --git a/fmatch/matcher.py b/fmatch/matcher.py
index 37d03a2..e71e51f 100644
--- a/fmatch/matcher.py
+++ b/fmatch/matcher.py
@@ -1,70 +1,84 @@
+""" metadata matcher
+"""
+import os
+# pylint: disable=import-error
 from elasticsearch7 import Elasticsearch
+# pylint: disable=import-error
 from elasticsearch.exceptions import NotFoundError
+# pylint: disable=import-error
 import pandas as pd
 
-import os
-import csv
-import json 
-
-ES_URL=os.getenv("ES_SERVER")
+ES_URL = os.getenv("ES_SERVER")
 
 class Matcher:
+    """ Matcher
+    """
     def __init__(self, index="perf_scale_ci"):
-        self.index=index
-        self.es_url=ES_URL
-        self.es=Elasticsearch([self.es_url],http_auth=["username","password"])
-        self.data=None
+        self.index = index
+        self.es_url = ES_URL
+        self.es = Elasticsearch([self.es_url], http_auth=[
+                                "username", "password"])
+        self.data = None
 
-    def get_metadata_by_uuid(self,uuid,index=None):
-        if index==None:
-            index=self.index
+    def get_metadata_by_uuid(self, uuid, index=None):
+        """ get_metadata_by_uuid
+        """
+        if index is None:
+            index = self.index
         query = {
-        "query": {
-            "match": {
-                "uuid": uuid
+            "query": {
+                "match": {
+                    "uuid": uuid
                 }
             }
         }
+        result = {}
         try:
             result = self.es.search(index=index, body=query)
             hits = result.get('hits', {}).get('hits', [])
             if hits:
-                return dict(hits[0]['_source'])
-            else:
-                return None
+                result = dict(hits[0]['_source'])
         except NotFoundError:
             print(f"UUID {uuid} not found in index {index}")
-            return None
+        return result
 
-
-    def get_uuid_by_metadata(self,meta,index=None):
-        if index==None:
-            index=self.index
-        version=meta["ocpVersion"][:4]
+    def get_uuid_by_metadata(self, meta, index=None):
+        """ get_uuid_by_metadata
+        """
+        if index is None:
+            index = self.index
+        version = meta["ocpVersion"][:4]
         query = {
-        "query": {
-            "bool": {
-                "must": [
-                    {
-                        "query_string": {
-                            "query": ' AND '.join([
-                                f'{field}: "{value}"' if isinstance(value, str) else f'{field}: {value}'
-                                for field, value in meta.items() if field!="ocpVersion"
-                            ]) +
-                            f' AND ocpVersion: {version}* AND jobStatus: success'
+            "query": {
+                "bool": {
+                    "must": [
+                        {
+                            "query_string": {
+                                "query": ' AND '.join([
+                                    f'{field}: "{value}"' if isinstance(
+                                        value, str) else f'{field}: {value}'
+                                    for field, value in meta.items() if field != "ocpVersion"
+                                ]) +
+                                f' AND ocpVersion: {version}* AND jobStatus: success'
+                            }
                         }
-                    }
-                ]
-            }
-        },
-        "size": 10000
+                    ]
+                }
+            },
+            "size": 10000
         }
         result = self.es.search(index=index, body=query)
         hits = result.get('hits', {}).get('hits', [])
-        uuids=[hit['_source']['uuid'] for hit in hits]
+        uuids = [hit['_source']['uuid'] for hit in hits]
         return uuids
-    
-    def match_kube_burner(self,uuids):
+
+    def match_kube_burner(self, uuids):
+        """ match kube burner runs
+        Args:
+            uuids (list): list of uuids
+        Returns:
+            list : list of runs
+        """
         index = "ripsaw-kube-burner*"
         ids = "\" OR uuid: \"".join(uuids)
         query = {
@@ -73,30 +87,45 @@ def match_kube_burner(self,uuids):
                     "query": (
                         f'( uuid: \"{ids}\" )'
                         f' AND metricName: "jobSummary"'
-                        )
+                    )
                 }
             },
-            "size":10000
+            "size": 10000
         }
-        result=self.es.search(index=index,body=query)
+        result = self.es.search(index=index, body=query)
         runs = [item['_source'] for item in result["hits"]["hits"]]
         return runs
 
-    def filter_runs(self,pdata,data):
-        columns = ['uuid','jobConfig.jobIterations']
+    def filter_runs(self, pdata, data):
+        """ filter out runs with different jobIterations
+        Args:
+            pdata (_type_): _description_
+            data (_type_): _description_
+        Returns:
+            _type_: _description_
+        """
+        columns = ['uuid', 'jobConfig.jobIterations']
         pdf = pd.json_normalize(pdata)
         pick_df = pd.DataFrame(pdf, columns=columns)
         iterations = pick_df.iloc[0]['jobConfig.jobIterations']
         df = pd.json_normalize(data)
         ndf = pd.DataFrame(df, columns=columns)
-        ids_df = ndf.loc[df['jobConfig.jobIterations'] == iterations ]
+        ids_df = ndf.loc[df['jobConfig.jobIterations'] == iterations]
         return ids_df['uuid'].to_list()
-    
-    def burner_results(self,uuid,uuids,index):
-        if len(uuids) > 1 :
-            if len(uuid) > 0 :
+
+    def burner_results(self, uuid, uuids, index):
+        """ kube burner podReadyLatency
+        Args:
+            uuid (_type_): _description_
+            uuids (_type_): _description_
+            index (_type_): _description_
+        Returns:
+            _type_: _description_
+        """
+        if len(uuids) > 1:
+            if len(uuid) > 0:
                 uuids.remove(uuid)
-        if len(uuids) < 1 :
+        if len(uuids) < 1:
             return []
         ids = "\" OR uuid: \"".join(uuids)
         query = {
@@ -106,40 +135,48 @@ def burner_results(self,uuid,uuids,index):
                         f'( uuid: \"{ids}\" )'
                         f' AND metricName: "podLatencyQuantilesMeasurement"'
                         f' AND quantileName: "Ready"'
-                        )
+                    )
                 }
             },
-            "size":10000
+            "size": 10000
         }
-        result=self.es.search(index=index,body=query)
+        result = self.es.search(index=index, body=query)
         runs = [item['_source'] for item in result["hits"]["hits"]]
         return runs
-    
-    def burner_cpu_results(self,uuids,namespace,index):
+
+    def burner_cpu_results(self, uuids, namespace, index):
+        """ kube burner CPU aggregated results for a namespace
+        Args:
+            uuids (_type_): _description_
+            namespace (_type_): _description_
+            index (_type_): _description_
+        Returns:
+            _type_: _description_
+        """
         ids = "\" OR uuid: \"".join(uuids)
         query = {
             "aggs": {
                 "time": {
-                "terms": {
-                    "field": "uuid.keyword",
-                    "size":10000
-                },
-                "aggs": {
-                    "time": {
-                    "avg": {
-                        "field": "timestamp"}
+                    "terms": {
+                        "field": "uuid.keyword",
+                        "size": 10000
+                    },
+                    "aggs": {
+                        "time": {
+                            "avg": {
+                                "field": "timestamp"}
+                        }
                     }
-                }
-            },
-            "uuid": {
-                "terms": {
-                    "field": "uuid.keyword",
-                    "size":10000
                 },
-                "aggs": {
-                    "cpu": {
-                        "avg": {
-                            "field": "value"
+                "uuid": {
+                    "terms": {
+                        "field": "uuid.keyword",
+                        "size": 10000
+                    },
+                    "aggs": {
+                        "cpu": {
+                            "avg": {
+                                "field": "value"
                             }
                         }
                     }
@@ -158,17 +195,23 @@ def burner_cpu_results(self,uuids,namespace,index):
                     }]
                 }
             },
-            "size":10000
+            "size": 10000
         }
-        runs=self.es.search(index=index,body=query)
-        data=self.parse_burner_cpu_results(runs)
+        runs = self.es.search(index=index, body=query)
+        data = self.parse_burner_cpu_results(runs)
         return data
-    
-    def parse_burner_cpu_results(self,data: dict):
+
+    def parse_burner_cpu_results(self, data: dict):
+        """ parse out CPU data from kube-burner query
+        Args:
+            data (dict): _description_
+        Returns:
+            _type_: _description_
+        """
         res = []
         stamps = data['aggregations']['time']['buckets']
         cpu = data['aggregations']['uuid']['buckets']
-        for stamp in stamps :
+        for stamp in stamps:
             dat = {}
             dat['uuid'] = stamp['key']
             dat['timestamp'] = stamp['time']['value_as_string']
@@ -176,17 +219,28 @@ def parse_burner_cpu_results(self,data: dict):
             dat['cpu_avg'] = acpu['cpu']['value']
             res.append(dat)
         return res
-    
-    def convert_to_df(self,data,columns=None):
+
+    def convert_to_df(self, data, columns=None):
+        """ convert to a dataframe
+        Args:
+            data (_type_): _description_
+            columns (_type_, optional): _description_. Defaults to None.
+        Returns:
+            _type_: _description_
+        """
         odf = pd.json_normalize(data)
-        if columns!=None:
+        if columns is not None:
             odf = pd.DataFrame(odf, columns=columns)
         odf = odf.sort_values(by=['timestamp'])
         return odf
 
-
-    def save_results(self,df,csv_file_path="output.csv",columns=None):
-        if columns!=None:
+    def save_results(self, df, csv_file_path="output.csv", columns=None):
+        """ write results to CSV
+        Args:
+            df (_type_): _description_
+            csv_file_path (str, optional): _description_. Defaults to "output.csv".
+            columns (_type_, optional): _description_. Defaults to None.
+        """
+        if columns is not None:
             df = pd.DataFrame(df, columns=columns)
         df.to_csv(csv_file_path)
-
diff --git a/fmatch/test_fmatch.py b/fmatch/test_fmatch.py
index 4697b03..985c338 100644
--- a/fmatch/test_fmatch.py
+++ b/fmatch/test_fmatch.py
@@ -1,42 +1,52 @@
-from matcher import Matcher
+"""
+test_fmatch
+"""
+import sys
+# pylint: disable=import-error
 import pandas as pd
-import json
+# pylint: disable=import-error
+from matcher import Matcher
 
-match=Matcher(index='perf_scale_ci')
+match = Matcher(index='perf_scale_ci')
 
-meta={}
-meta['benchmark']="cluster-density-v2"
+meta = {}
+meta['benchmark'] = "cluster-density-v2"
 meta['masterNodesType'] = "m6a.xlarge"
 meta['workerNodesType'] = "m6a.xlarge"
-meta['platform']="AWS"
-meta['masterNodesCount']=3
-meta['workerNodesCount']=24
-meta['jobStatus']="success"
-meta['ocpVersion']='4.15'
-meta['networkType']="OVNKubernetes"
+meta['platform'] = "AWS"
+meta['masterNodesCount'] = 3
+meta['workerNodesCount'] = 24
+meta['jobStatus'] = "success"
+meta['ocpVersion'] = '4.15'
+meta['networkType'] = "OVNKubernetes"
 
-uuids=match.get_uuid_by_metadata(meta)
-if len(uuids)==0:
+uuids = match.get_uuid_by_metadata(meta)
+if len(uuids) == 0:
     print("No UUID present for given metadata")
-    exit()
-runs=match.match_kube_burner(uuids)
-ids=match.filter_runs(runs,runs)
-podl=match.burner_results("",ids,"ripsaw-kube-burner*")
+    sys.exit()
+runs = match.match_kube_burner(uuids)
+ids = match.filter_runs(runs, runs)
+podl = match.burner_results("", ids, "ripsaw-kube-burner*")
 
-kapi_cpu=match.burner_cpu_results(ids,"openshift-kube-apiserver","ripsaw-kube-burner*")
-ovn_cpu=match.burner_cpu_results(ids,"openshift-ovn-kubernetes","ripsaw-kube-burner*")
-etcd_cpu=match.burner_cpu_results(ids,"openshift-etcd","ripsaw-kube-burner*")
+kapi_cpu = match.burner_cpu_results(
+    ids, "openshift-kube-apiserver", "ripsaw-kube-burner*")
+ovn_cpu = match.burner_cpu_results(
+    ids, "openshift-ovn-kubernetes", "ripsaw-kube-burner*")
+etcd_cpu = match.burner_cpu_results(
+    ids, "openshift-etcd", "ripsaw-kube-burner*")
 
-podl_df=match.convert_to_df(podl,columns=['uuid','timestamp', 'quantileName', 'P99'])
-kapi_cpu_df=match.convert_to_df(kapi_cpu)
-merge_df=pd.merge(kapi_cpu_df,podl_df,on="uuid")
-match.save_results(merge_df,"merged.csv",["uuid","timestamp_x","cpu_avg","P99"])
-match.save_results(kapi_cpu_df,"CPUavg24.csv")
-match.save_results(podl_df,"podlatency24.csv")
+podl_df = match.convert_to_df(
+    podl, columns=['uuid', 'timestamp', 'quantileName', 'P99'])
+kapi_cpu_df = match.convert_to_df(kapi_cpu)
+merge_df = pd.merge(kapi_cpu_df, podl_df, on="uuid")
+match.save_results(merge_df, "merged.csv", [
+                   "uuid", "timestamp_x", "cpu_avg", "P99"])
+match.save_results(kapi_cpu_df, "CPUavg24.csv")
+match.save_results(podl_df, "podlatency24.csv")
 
-df=pd.read_csv("merged.csv")
-ls=df["uuid"].to_list()
+df = pd.read_csv("merged.csv")
+ls = df["uuid"].to_list()
 # Check merged csv data - Debug
 for i in ls:
     # Debug - Ensure they are all using the same networkType
-    print(match.get_metadata_by_uuid(i)["networkType"])
\ No newline at end of file
+    print(match.get_metadata_by_uuid(i)["networkType"])