From 6a0a00c93facf4dee7a765b3e1cf82ac6eb59a21 Mon Sep 17 00:00:00 2001
From: Shashank Reddy Boyapally <sboyapal@redhat.com>
Date: Mon, 8 Jan 2024 16:15:02 -0500
Subject: [PATCH 1/5] added matcher class and functions

---
 .DS_Store          | Bin 0 -> 6148 bytes
 README.md          |   2 +-
 fmatch/__init__.py |   0
 fmatch/matcher.py  | 127 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 128 insertions(+), 1 deletion(-)
 create mode 100644 .DS_Store
 create mode 100644 fmatch/__init__.py
 create mode 100644 fmatch/matcher.py

diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..42ed45ebc9fc05e89fc760b351262b1fe79f07de
GIT binary patch
literal 6148
zcmeHKJ8l9o5S>X7T8V}-rLVvZtQDMq3xE;<1uRmOP;ZrU<!HS5G-N}!H0X^qe)f2t
zEx%%CM?|#!{#c02MWlfn%8w00vwib{y<|jzaGdcjhs*l2JD$7!Ap8G-ai_AAlWgVd
z58w7^RDcRl0V+TRsKB@iWPNRB;~vY?r~nms0tM{*P~e6&u@Cf52L^8efD?q>F!x>p
zSS$dniG3g<Fbyg&sG2Q?1|9K|c{Q;Q47zAGADTC7b|~t%<NV_3qBW2s6`%sQ3cSX0
zZuNf!ztjKUlDMJ*RN$!;(8+SWZ1JS5t%Ju|tu62k+;V>5W|%t#gO_8Vmt!of9KU%|
a<Q1D^zb5v9PDkA7K>iGvE;K6eXa%l1JQali

literal 0
HcmV?d00001

diff --git a/README.md b/README.md
index 3b5e7f1..f3fdc10 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,2 @@
 # py-commons
-Common Python Library 
+This common python library is dedicated to metadata matching and finding regressions.
diff --git a/fmatch/__init__.py b/fmatch/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/fmatch/matcher.py b/fmatch/matcher.py
new file mode 100644
index 0000000..9e9d7fa
--- /dev/null
+++ b/fmatch/matcher.py
@@ -0,0 +1,127 @@
+from elasticsearch7 import Elasticsearch
+from elasticsearch.exceptions import NotFoundError
+import pandas as pd
+
+import os
+import csv
+
+ES_URL=os.getenv("ES_SERVER")
+
+class Matcher:
+    def __init__(self, index="perf_scale_ci"):
+        self.index=index
+        self.es_url=ES_URL
+        self.es=Elasticsearch([self.es_url],http_auth=["username","password"])
+        self.data=None
+
+    def get_metadata_by_uuid(self,uuid,index=None):
+        if index==None:
+            index=self.index
+        query = {
+        "query": {
+            "match": {
+                "uuid": uuid
+                }
+            }
+        }
+        try:
+            result = self.es.search(index=index, body=query)
+            hits = result.get('hits', {}).get('hits', [])
+            if hits:
+                return dict(hits[0]['_source'])
+            else:
+                return None
+        except NotFoundError:
+            print(f"UUID {uuid} not found in index {index}")
+            return None
+
+
+    def get_uuid_by_metadata(self,meta,index=None):
+        if index==None:
+            index=self.index
+        version=meta["ocpVersion"][:4]
+        query = {
+        "query": {
+            "bool": {
+                "must": [
+                    {
+                        "query_string": {
+                            "query": ' AND '.join([
+                                f'{field}: "{value}"' if isinstance(value, str) else f'{field}: {value}'
+                                for field, value in meta.items() if field!="ocpVersion"
+                            ]) +
+                            f' AND ocpVersion: {version}* AND jobStatus: success'
+                        }
+                    }
+                ]
+            }
+        },
+        "size": 10000
+    }
+        result = self.es.search(index=index, body=query)
+        hits = result.get('hits', {}).get('hits', [])
+        uuids=[hit['_source']['uuid'] for hit in hits]
+        return uuids
+    
+    def match_kube_burner(self,uuids):
+        index = "ripsaw-kube-burner*"
+        ids = "\" OR uuid: \"".join(uuids)
+        query = {
+            "query": {
+                "query_string": {
+                    "query": (
+                        f'( uuid: \"{ids}\" )'
+                        f' AND metricName: "jobSummary"'
+                        )
+                }
+            },
+            "size":10000
+        }
+        result=self.es.search(index=index,body=query)
+        runs = [item['_source'] for item in result["hits"]["hits"]]
+        return runs
+
+    def filter_runs(self,pdata,data):
+        columns = ['uuid','jobConfig.jobIterations']
+        pdf = pd.json_normalize(pdata)
+        pdf.to_csv("check.csv")
+        pick_df = pd.DataFrame(pdf, columns=columns)
+        iterations = pick_df.iloc[0]['jobConfig.jobIterations']
+        df = pd.json_normalize(data)
+        ndf = pd.DataFrame(df, columns=columns)
+        ids_df = ndf.loc[df['jobConfig.jobIterations'] == iterations ]
+        return ids_df['uuid'].to_list()
+    
+    def burner_results(self,uuid,uuids,index):
+        if len(uuids) > 1 :
+            if len(uuid) > 0 :
+                uuids.remove(uuid)
+        if len(uuids) < 1 :
+            return []
+        ids = "\" OR uuid: \"".join(uuids)
+        query = {
+            "query": {
+                "query_string": {
+                    "query": (
+                        f'( uuid: \"{ids}\" )'
+                        f' AND metricName: "podLatencyQuantilesMeasurement"'
+                        f' AND quantileName: "Ready"'
+                        )
+                }
+            },
+            "size":10000
+        }
+        #print(query)
+        result=self.es.search(index=index,body=query)
+        runs = [item['_source'] for item in result["hits"]["hits"]]
+        self.data=runs
+        return runs
+
+    def saveResults(self,csv_file_path="output.csv"):
+        odf = pd.json_normalize(self.data)
+        odf.to_csv("check.csv")
+        columns = ['uuid','timestamp', 'quantileName','metricName', 'P99']
+        odf = pd.DataFrame(odf, columns=columns)
+        odf = odf.sort_values(by=['timestamp'])
+        odf.to_csv(csv_file_path)
+

From ca7083241ab7df12f8c75331130c626a227e3012 Mon Sep 17 00:00:00 2001
From: Shashank Reddy Boyapally <sboyapal@redhat.com>
Date: Wed, 10 Jan 2024 09:48:41 -0500
Subject: [PATCH 2/5] added readme for fmatch

---
 .DS_Store         | Bin 6148 -> 0 bytes
 .gitignore        |   3 +++
 README.md         |   2 +-
 fmatch/README.md  |   2 ++
 fmatch/matcher.py |   3 +--
 5 files changed, 7 insertions(+), 3 deletions(-)
 delete mode 100644 .DS_Store
 create mode 100644 fmatch/README.md

diff --git a/.DS_Store b/.DS_Store
deleted file mode 100644
index 42ed45ebc9fc05e89fc760b351262b1fe79f07de..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6148
zcmeHKJ8l9o5S>X7T8V}-rLVvZtQDMq3xE;<1uRmOP;ZrU<!HS5G-N}!H0X^qe)f2t
zEx%%CM?|#!{#c02MWlfn%8w00vwib{y<|jzaGdcjhs*l2JD$7!Ap8G-ai_AAlWgVd
z58w7^RDcRl0V+TRsKB@iWPNRB;~vY?r~nms0tM{*P~e6&u@Cf52L^8efD?q>F!x>p
zSS$dniG3g<Fbyg&sG2Q?1|9K|c{Q;Q47zAGADTC7b|~t%<NV_3qBW2s6`%sQ3cSX0
zZuNf!ztjKUlDMJ*RN$!;(8+SWZ1JS5t%Ju|tu62k+;V>5W|%t#gO_8Vmt!of9KU%|
a<Q1D^zb5v9PDkA7K>iGvE;K6eXa%l1JQali

diff --git a/.gitignore b/.gitignore
index 68bc17f..9b495d8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -158,3 +158,6 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
+
+.DS_Store
+fmatch/main.py
diff --git a/README.md b/README.md
index f3fdc10..3b5e7f1 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,2 @@
 # py-commons
-This common python library is dedicated to metadata matching and finding regressions.
+Common Python Library 
diff --git a/fmatch/README.md b/fmatch/README.md
new file mode 100644
index 0000000..c4d1e96
--- /dev/null
+++ b/fmatch/README.md
@@ -0,0 +1,2 @@
+# fmatch
+This common python library is dedicated to metadata matching and finding regressions.
\ No newline at end of file
diff --git a/fmatch/matcher.py b/fmatch/matcher.py
index 9e9d7fa..1d1a53a 100644
--- a/fmatch/matcher.py
+++ b/fmatch/matcher.py
@@ -84,7 +84,7 @@ def match_kube_burner(self,uuids):
     def filter_runs(self,pdata,data):
         columns = ['uuid','jobConfig.jobIterations']
         pdf = pd.json_normalize(pdata)
-        pdf.to_csv("check.csv")
+        #print(pdf)
         pick_df = pd.DataFrame(pdf, columns=columns)
         iterations = pick_df.iloc[0]['jobConfig.jobIterations']
         df = pd.json_normalize(data)
@@ -119,7 +119,6 @@ def burner_results(self,uuid,uuids,index):
 
     def saveResults(self,csv_file_path="output.csv"):
         odf = pd.json_normalize(self.data)
-        odf.to_csv("check.csv")
         columns = ['uuid','timestamp', 'quantileName','metricName', 'P99']
         odf = pd.DataFrame(odf, columns=columns)
         odf = odf.sort_values(by=['timestamp'])

From 557ae7ca32f8916b14af5da841204f898ca10c1d Mon Sep 17 00:00:00 2001
From: Shashank Reddy Boyapally <sboyapal@redhat.com>
Date: Thu, 11 Jan 2024 10:41:46 -0500
Subject: [PATCH 3/5] added cpu_avg

---
 fmatch/matcher.py | 86 +++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 76 insertions(+), 10 deletions(-)

diff --git a/fmatch/matcher.py b/fmatch/matcher.py
index 1d1a53a..37d03a2 100644
--- a/fmatch/matcher.py
+++ b/fmatch/matcher.py
@@ -4,6 +4,7 @@
 
 import os
 import csv
+import json 
 
 ES_URL=os.getenv("ES_SERVER")
 
@@ -57,7 +58,7 @@ def get_uuid_by_metadata(self,meta,index=None):
             }
         },
         "size": 10000
-    }
+        }
         result = self.es.search(index=index, body=query)
         hits = result.get('hits', {}).get('hits', [])
         uuids=[hit['_source']['uuid'] for hit in hits]
@@ -84,7 +85,6 @@ def match_kube_burner(self,uuids):
     def filter_runs(self,pdata,data):
         columns = ['uuid','jobConfig.jobIterations']
         pdf = pd.json_normalize(pdata)
-        #print(pdf)
         pick_df = pd.DataFrame(pdf, columns=columns)
         iterations = pick_df.iloc[0]['jobConfig.jobIterations']
         df = pd.json_normalize(data)
@@ -111,16 +111,82 @@ def burner_results(self,uuid,uuids,index):
             },
             "size":10000
         }
-        #print(query)
         result=self.es.search(index=index,body=query)
         runs = [item['_source'] for item in result["hits"]["hits"]]
-        self.data=runs
         return runs
-
-    def saveResults(self,csv_file_path="output.csv"):
-        odf = pd.json_normalize(self.data)
-        columns = ['uuid','timestamp', 'quantileName','metricName', 'P99']
-        odf = pd.DataFrame(odf, columns=columns)
+    
+    def burner_cpu_results(self,uuids,namespace,index):
+        ids = "\" OR uuid: \"".join(uuids)
+        query = {
+            "aggs": {
+                "time": {
+                "terms": {
+                    "field": "uuid.keyword",
+                    "size":10000
+                },
+                "aggs": {
+                    "time": {
+                    "avg": {
+                        "field": "timestamp"}
+                    }
+                }
+            },
+            "uuid": {
+                "terms": {
+                    "field": "uuid.keyword",
+                    "size":10000
+                },
+                "aggs": {
+                    "cpu": {
+                        "avg": {
+                            "field": "value"
+                            }
+                        }
+                    }
+                }
+            },
+            "query": {
+                "bool": {
+                    "must": [{
+                        "query_string": {
+                            "query": (
+                                f'( uuid: \"{ids}\" )'
+                                f' AND metricName: "containerCPU"'
+                                f' AND labels.namespace.keyword: {namespace}'
+                            )
+                        }
+                    }]
+                }
+            },
+            "size":10000
+        }
+        runs=self.es.search(index=index,body=query)
+        data=self.parse_burner_cpu_results(runs)
+        return data
+    
+    def parse_burner_cpu_results(self,data: dict):
+        res = []
+        stamps = data['aggregations']['time']['buckets']
+        cpu = data['aggregations']['uuid']['buckets']
+        for stamp in stamps :
+            dat = {}
+            dat['uuid'] = stamp['key']
+            dat['timestamp'] = stamp['time']['value_as_string']
+            acpu = next(item for item in cpu if item["key"] == stamp['key'])
+            dat['cpu_avg'] = acpu['cpu']['value']
+            res.append(dat)
+        return res
+    
+    def convert_to_df(self,data,columns=None):
+        odf = pd.json_normalize(data)
+        if columns!=None:
+            odf = pd.DataFrame(odf, columns=columns)
         odf = odf.sort_values(by=['timestamp'])
-        odf.to_csv(csv_file_path)
+        return odf
+
+
+    def save_results(self,df,csv_file_path="output.csv",columns=None):
+        if columns!=None:
+            df = pd.DataFrame(df, columns=columns)
+        df.to_csv(csv_file_path)
 

From a72da8d143ef35380442a5ef173777526f9ad6c1 Mon Sep 17 00:00:00 2001
From: Shashank Reddy Boyapally <sboyapal@redhat.com>
Date: Thu, 11 Jan 2024 13:11:30 -0500
Subject: [PATCH 4/5] added test and requirements

---
 fmatch/requirements.txt | 11 ++++++++
 fmatch/test_fmatch.py   | 61 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 72 insertions(+)
 create mode 100644 fmatch/requirements.txt
 create mode 100644 fmatch/test_fmatch.py

diff --git a/fmatch/requirements.txt b/fmatch/requirements.txt
new file mode 100644
index 0000000..75bae53
--- /dev/null
+++ b/fmatch/requirements.txt
@@ -0,0 +1,11 @@
+certifi==2023.11.17
+elastic-transport==8.11.0
+elasticsearch==8.11.1
+elasticsearch7==7.13.0
+numpy==1.26.3
+pandas==2.1.4
+python-dateutil==2.8.2
+pytz==2023.3.post1
+six==1.16.0
+tzdata==2023.4
+urllib3==1.26.18
diff --git a/fmatch/test_fmatch.py b/fmatch/test_fmatch.py
new file mode 100644
index 0000000..f3260f0
--- /dev/null
+++ b/fmatch/test_fmatch.py
@@ -0,0 +1,61 @@
+from matcher import Matcher
+import pandas as pd
+import json
+
+
+
+match=Matcher(index='perf_scale_ci')
+
+df=pd.read_csv("merged.csv")
+ls=df["uuid"].to_list()
+
+for i in ls:
+    print(match.get_metadata_by_uuid(i)["networkType"])
+    #print(json.dumps(match.get_metadata_by_uuid(i),sort_keys=False, indent=4))
+
+meta={}
+meta['benchmark']="cluster-density-v2"
+#meta['masterNodesType'] = "m6a.4xlarge"
+meta['masterNodesType'] = "m6a.xlarge"
+meta['workerNodesType'] = "m6a.xlarge"
+meta['platform']="AWS"
+meta['masterNodesCount']=3
+meta['workerNodesCount']=24
+meta['jobStatus']="success"
+meta['ocpVersion']='4.15'
+meta['networkType']="OVNKubernetes"
+
+
+uuids=match.get_uuid_by_metadata(meta)
+if len(uuids)==0:
+    print("No UUID present for given metadata")
+    exit()
+#print(uuids)
+#print("5eb93cb1-5db1-41cd-997d-4a35741e3236" in uuids)
+runs=match.match_kube_burner(uuids)
+#print("ef1b328b-1843-43f4-8529-5f4b6ceaadaf" in uuids)
+#print(runs)
+ids=match.filter_runs(runs,runs)
+podl=match.burner_results("",ids,"ripsaw-kube-burner*")
+
+kapi_cpu=match.burner_cpu_results(ids,"openshift-kube-apiserver","ripsaw-kube-burner*")
+ovn_cpu=match.burner_cpu_results(ids,"openshift-ovn-kubernetes","ripsaw-kube-burner*")
+etcd_cpu=match.burner_cpu_results(ids,"openshift-etcd","ripsaw-kube-burner*")
+
+
+podl_df=match.convert_to_df(podl,columns=['uuid','timestamp', 'quantileName', 'P99'])
+kapi_cpu_df=match.convert_to_df(kapi_cpu)
+merge_df=pd.merge(kapi_cpu_df,podl_df,on="uuid")
+match.save_results(merge_df,"merged.csv",["uuid","timestamp_x","cpu_avg","P99"])
+match.save_results(kapi_cpu_df,"CPUavg24.csv")
+match.save_results(podl_df,"podlatency24.csv")
+# cdf = pd.json_normalize(cdf)
+# cdf = cdf.sort_values(by=['timestamp'])
+# cdf.to_csv("output2.csv")
+#match.saveResults(nrs)
+#match.saveResults(nrs2,"output2.csv")
+
+# #print(json.dumps(runs[0],sort_keys=False, indent=4))
+# for run in runs:
+#     print(json.dumps(run,indent=4))
+

From b9799e580270a468ae6b78db5102d1cdb0e404ab Mon Sep 17 00:00:00 2001
From: Shashank Reddy Boyapally <sboyapal@redhat.com>
Date: Thu, 11 Jan 2024 14:23:08 -0500
Subject: [PATCH 5/5] cleaned up test_fmatch

---
 fmatch/test_fmatch.py | 31 ++++++-------------------------
 1 file changed, 6 insertions(+), 25 deletions(-)

diff --git a/fmatch/test_fmatch.py b/fmatch/test_fmatch.py
index f3260f0..4697b03 100644
--- a/fmatch/test_fmatch.py
+++ b/fmatch/test_fmatch.py
@@ -2,20 +2,10 @@
 import pandas as pd
 import json
 
-
-
 match=Matcher(index='perf_scale_ci')
 
-df=pd.read_csv("merged.csv")
-ls=df["uuid"].to_list()
-
-for i in ls:
-    print(match.get_metadata_by_uuid(i)["networkType"])
-    #print(json.dumps(match.get_metadata_by_uuid(i),sort_keys=False, indent=4))
-
 meta={}
 meta['benchmark']="cluster-density-v2"
-#meta['masterNodesType'] = "m6a.4xlarge"
 meta['masterNodesType'] = "m6a.xlarge"
 meta['workerNodesType'] = "m6a.xlarge"
 meta['platform']="AWS"
@@ -25,16 +15,11 @@
 meta['ocpVersion']='4.15'
 meta['networkType']="OVNKubernetes"
 
-
 uuids=match.get_uuid_by_metadata(meta)
 if len(uuids)==0:
     print("No UUID present for given metadata")
     exit()
-#print(uuids)
-#print("5eb93cb1-5db1-41cd-997d-4a35741e3236" in uuids)
 runs=match.match_kube_burner(uuids)
-#print("ef1b328b-1843-43f4-8529-5f4b6ceaadaf" in uuids)
-#print(runs)
 ids=match.filter_runs(runs,runs)
 podl=match.burner_results("",ids,"ripsaw-kube-burner*")
 
@@ -42,20 +27,16 @@
 ovn_cpu=match.burner_cpu_results(ids,"openshift-ovn-kubernetes","ripsaw-kube-burner*")
 etcd_cpu=match.burner_cpu_results(ids,"openshift-etcd","ripsaw-kube-burner*")
 
-
 podl_df=match.convert_to_df(podl,columns=['uuid','timestamp', 'quantileName', 'P99'])
 kapi_cpu_df=match.convert_to_df(kapi_cpu)
 merge_df=pd.merge(kapi_cpu_df,podl_df,on="uuid")
 match.save_results(merge_df,"merged.csv",["uuid","timestamp_x","cpu_avg","P99"])
 match.save_results(kapi_cpu_df,"CPUavg24.csv")
 match.save_results(podl_df,"podlatency24.csv")
-# cdf = pd.json_normalize(cdf)
-# cdf = cdf.sort_values(by=['timestamp'])
-# cdf.to_csv("output2.csv")
-#match.saveResults(nrs)
-#match.saveResults(nrs2,"output2.csv")
-
-# #print(json.dumps(runs[0],sort_keys=False, indent=4))
-# for run in runs:
-#     print(json.dumps(run,indent=4))
 
+df=pd.read_csv("merged.csv")
+ls=df["uuid"].to_list()
+# Check merged csv data - Debug
+for i in ls:
+    # Debug - Ensure they are all using the same networkType
+    print(match.get_metadata_by_uuid(i)["networkType"])
\ No newline at end of file