diff --git a/.gitignore b/.gitignore index 68bc17f..9b495d8 100644 --- a/.gitignore +++ b/.gitignore @@ -158,3 +158,6 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ + +.DS_Store +fmatch/main.py diff --git a/fmatch/README.md b/fmatch/README.md new file mode 100644 index 0000000..c4d1e96 --- /dev/null +++ b/fmatch/README.md @@ -0,0 +1,2 @@ +# fmatch +This common python library is dedicated to metadata matching and finding regressions. \ No newline at end of file diff --git a/fmatch/__init__.py b/fmatch/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fmatch/matcher.py b/fmatch/matcher.py new file mode 100644 index 0000000..37d03a2 --- /dev/null +++ b/fmatch/matcher.py @@ -0,0 +1,192 @@ +from elasticsearch7 import Elasticsearch +from elasticsearch.exceptions import NotFoundError +import pandas as pd + +import os +import csv +import json + +ES_URL=os.getenv("ES_SERVER") + +class Matcher: + def __init__(self, index="perf_scale_ci"): + self.index=index + self.es_url=ES_URL + self.es=Elasticsearch([self.es_url],http_auth=["username","password"]) + self.data=None + + def get_metadata_by_uuid(self,uuid,index=None): + if index==None: + index=self.index + query = { + "query": { + "match": { + "uuid": uuid + } + } + } + try: + result = self.es.search(index=index, body=query) + hits = result.get('hits', {}).get('hits', []) + if hits: + return dict(hits[0]['_source']) + else: + return None + except NotFoundError: + print(f"UUID {uuid} not found in index {index}") + return None + + + def get_uuid_by_metadata(self,meta,index=None): + if index==None: + index=self.index + version=meta["ocpVersion"][:4] + query = { + "query": { + "bool": { + "must": [ + { + "query_string": { + "query": ' AND '.join([ + f'{field}: "{value}"' if isinstance(value, str) else f'{field}: {value}' + for field, value in meta.items() if field!="ocpVersion" + ]) + + f' AND ocpVersion: {version}* AND jobStatus: success' + } + } + ] + } + }, + "size": 10000 + } + result = self.es.search(index=index, body=query) + hits = result.get('hits', {}).get('hits', []) + uuids=[hit['_source']['uuid'] for hit in hits] + return uuids + + def match_kube_burner(self,uuids): + index = "ripsaw-kube-burner*" + ids = "\" OR uuid: \"".join(uuids) + query = { + "query": { + "query_string": { + "query": ( + f'( uuid: \"{ids}\" )' + f' AND metricName: "jobSummary"' + ) + } + }, + "size":10000 + } + result=self.es.search(index=index,body=query) + runs = [item['_source'] for item in result["hits"]["hits"]] + return runs + + def filter_runs(self,pdata,data): + columns = ['uuid','jobConfig.jobIterations'] + pdf = pd.json_normalize(pdata) + pick_df = pd.DataFrame(pdf, columns=columns) + iterations = pick_df.iloc[0]['jobConfig.jobIterations'] + df = pd.json_normalize(data) + ndf = pd.DataFrame(df, columns=columns) + ids_df = ndf.loc[df['jobConfig.jobIterations'] == iterations ] + return ids_df['uuid'].to_list() + + def burner_results(self,uuid,uuids,index): + if len(uuids) > 1 : + if len(uuid) > 0 : + uuids.remove(uuid) + if len(uuids) < 1 : + return [] + ids = "\" OR uuid: \"".join(uuids) + query = { + "query": { + "query_string": { + "query": ( + f'( uuid: \"{ids}\" )' + f' AND metricName: "podLatencyQuantilesMeasurement"' + f' AND quantileName: "Ready"' + ) + } + }, + "size":10000 + } + result=self.es.search(index=index,body=query) + runs = [item['_source'] for item in result["hits"]["hits"]] + return runs + + def burner_cpu_results(self,uuids,namespace,index): + ids = "\" OR uuid: \"".join(uuids) + query = { + "aggs": { + "time": { + "terms": { + "field": "uuid.keyword", + "size":10000 + }, + "aggs": { + "time": { + "avg": { + "field": "timestamp"} + } + } + }, + "uuid": { + "terms": { + "field": "uuid.keyword", + "size":10000 + }, + "aggs": { + "cpu": { + "avg": { + "field": "value" + } + } + } + } + }, + "query": { + "bool": { + "must": [{ + "query_string": { + "query": ( + f'( uuid: \"{ids}\" )' + f' AND metricName: "containerCPU"' + f' AND labels.namespace.keyword: {namespace}' + ) + } + }] + } + }, + "size":10000 + } + runs=self.es.search(index=index,body=query) + data=self.parse_burner_cpu_results(runs) + return data + + def parse_burner_cpu_results(self,data: dict): + res = [] + stamps = data['aggregations']['time']['buckets'] + cpu = data['aggregations']['uuid']['buckets'] + for stamp in stamps : + dat = {} + dat['uuid'] = stamp['key'] + dat['timestamp'] = stamp['time']['value_as_string'] + acpu = next(item for item in cpu if item["key"] == stamp['key']) + dat['cpu_avg'] = acpu['cpu']['value'] + res.append(dat) + return res + + def convert_to_df(self,data,columns=None): + odf = pd.json_normalize(data) + if columns!=None: + odf = pd.DataFrame(odf, columns=columns) + odf = odf.sort_values(by=['timestamp']) + return odf + + + def save_results(self,df,csv_file_path="output.csv",columns=None): + if columns!=None: + df = pd.DataFrame(df, columns=columns) + df.to_csv(csv_file_path) + diff --git a/fmatch/requirements.txt b/fmatch/requirements.txt new file mode 100644 index 0000000..75bae53 --- /dev/null +++ b/fmatch/requirements.txt @@ -0,0 +1,11 @@ +certifi==2023.11.17 +elastic-transport==8.11.0 +elasticsearch==8.11.1 +elasticsearch7==7.13.0 +numpy==1.26.3 +pandas==2.1.4 +python-dateutil==2.8.2 +pytz==2023.3.post1 +six==1.16.0 +tzdata==2023.4 +urllib3==1.26.18 diff --git a/fmatch/test_fmatch.py b/fmatch/test_fmatch.py new file mode 100644 index 0000000..4697b03 --- /dev/null +++ b/fmatch/test_fmatch.py @@ -0,0 +1,42 @@ +from matcher import Matcher +import pandas as pd +import json + +match=Matcher(index='perf_scale_ci') + +meta={} +meta['benchmark']="cluster-density-v2" +meta['masterNodesType'] = "m6a.xlarge" +meta['workerNodesType'] = "m6a.xlarge" +meta['platform']="AWS" +meta['masterNodesCount']=3 +meta['workerNodesCount']=24 +meta['jobStatus']="success" +meta['ocpVersion']='4.15' +meta['networkType']="OVNKubernetes" + +uuids=match.get_uuid_by_metadata(meta) +if len(uuids)==0: + print("No UUID present for given metadata") + exit() +runs=match.match_kube_burner(uuids) +ids=match.filter_runs(runs,runs) +podl=match.burner_results("",ids,"ripsaw-kube-burner*") + +kapi_cpu=match.burner_cpu_results(ids,"openshift-kube-apiserver","ripsaw-kube-burner*") +ovn_cpu=match.burner_cpu_results(ids,"openshift-ovn-kubernetes","ripsaw-kube-burner*") +etcd_cpu=match.burner_cpu_results(ids,"openshift-etcd","ripsaw-kube-burner*") + +podl_df=match.convert_to_df(podl,columns=['uuid','timestamp', 'quantileName', 'P99']) +kapi_cpu_df=match.convert_to_df(kapi_cpu) +merge_df=pd.merge(kapi_cpu_df,podl_df,on="uuid") +match.save_results(merge_df,"merged.csv",["uuid","timestamp_x","cpu_avg","P99"]) +match.save_results(kapi_cpu_df,"CPUavg24.csv") +match.save_results(podl_df,"podlatency24.csv") + +df=pd.read_csv("merged.csv") +ls=df["uuid"].to_list() +# Check merged csv data - Debug +for i in ls: + # Debug - Ensure they are all using the same networkType + print(match.get_metadata_by_uuid(i)["networkType"]) \ No newline at end of file