From 9550782bf0a9edddd58fbc6f1651dbeae7ff8f58 Mon Sep 17 00:00:00 2001 From: Derek Weitzel Date: Tue, 9 May 2023 15:12:07 -0400 Subject: [PATCH 1/3] Adding initial hepscore changes --- opensciencegrid/gracc-apel/Dockerfile | 2 +- opensciencegrid/gracc-apel/apel_report.py | 101 ++++++++++++--- .../gracc-apel/test_apel_report.py | 116 ++++++++++++++++++ 3 files changed, 200 insertions(+), 19 deletions(-) create mode 100644 opensciencegrid/gracc-apel/test_apel_report.py diff --git a/opensciencegrid/gracc-apel/Dockerfile b/opensciencegrid/gracc-apel/Dockerfile index 9dfd0db4..f2e19991 100644 --- a/opensciencegrid/gracc-apel/Dockerfile +++ b/opensciencegrid/gracc-apel/Dockerfile @@ -9,7 +9,7 @@ ARG BASE_OS # install dependencies RUN yum -y install python3 python3-pip && \ - pip3 install elasticsearch-dsl && \ + pip3 install elasticsearch-dsl requests && \ pip3 install argo-ams-library RUN yum -y install http://rpm-repo.argo.grnet.gr/ARGO/devel/centos7/python-argo-ams-library-0.5.5-20210415071520.ff0c536.$BASE_OS.noarch.rpm diff --git a/opensciencegrid/gracc-apel/apel_report.py b/opensciencegrid/gracc-apel/apel_report.py index cf9fa3ca..d4bd7d89 100755 --- a/opensciencegrid/gracc-apel/apel_report.py +++ b/opensciencegrid/gracc-apel/apel_report.py @@ -10,6 +10,10 @@ import operator import sys import os +import requests +import json +from statistics import mean +from math import isclose #logging.basicConfig(level=logging.WARN) @@ -26,6 +30,44 @@ MAXSZ=2**30 MISSING='__MISSING__' +resource_group_map = None + +def get_hs23_portion(resource_group) -> float: + """ + Download the HS23 portion of the OSG site info from OIM. + + :param resource_group: The Topology resource group name. + :return: The HS23 portion of the site, or 0.0 if not found. + """ + global resource_group_map + if resource_group_map == None: + # Download the map from Topology + resp = requests.get("https://topology.opensciencegrid.org/api/resource_group_summary") + if resp.status_code != 200: + #print("Error downloading resource group summary from Topology: {}".format(resp.status_code)) + #return 0.0 + raise Exception("Error downloading resource group summary from Topology: {}".format(resp.status_code)) + + raw_json = resp.json() + # Parse the JSON response + resource_group_map = {} + for resource_group_name in raw_json: + hep_spec_percentages = [] + for resource in raw_json[resource_group_name]["Resources"]['Resource']: + if 'HEPScore23Percentage' in resource['WLCGInformation']: + hep_spec_percentages.append(float(resource['WLCGInformation']['HEPScore23Percentage'])) + if resource_group_name == "Nebraska": + print(hep_spec_percentages) + if len(hep_spec_percentages) > 0: + resource_group_map[resource_group_name] = mean(hep_spec_percentages) + else: + resource_group_map[resource_group_name] = 0.0 + + return resource_group_map.get(resource_group, 0.0) + + + + def add_bkt_metrics(bkt): bkt = bkt.metric('NormalFactor','terms', field='OIM_WLCGAPELNormalFactor') bkt = bkt.metric('CpuDuration_system', 'sum', field='CpuDuration_system') @@ -71,7 +113,7 @@ def gracc_query_apel(year, month): return response # Fixed entries: -fixed_header = "APEL-summary-job-message: v0.3" +fixed_header = "APEL-summary-job-message: v0.4" fixed_separator = "%%" fixed_infrastructure = "Gratia-OSG" fixed_nodecount = 1 @@ -186,29 +228,52 @@ def add_record(recs, vo, site, cores, dn, bkt): def print_header(): print(fixed_header) -def print_rk_recr(year, month, rk, rec): +def print_rk_recr(year, month, rk, rec, output_file=sys.stdout): if rk.dn == "N/A": dn = "generic %s user" % rk.vo else: dn = rk.dn - print("Site:", rk.site) - print("VO:", rk.vo) - print("EarliestEndTime:", rec.mintime) - print("LatestEndTime:", rec.maxtime + 60*60*24 - 1) - print("Month:", "%02d" % month) - print("Year:", year) - print("Infrastructure:", fixed_infrastructure) - print("GlobalUserName:", dn) - print("Processors:", rk.cores) - print("NodeCount:", fixed_nodecount) - print("WallDuration:", rec.walldur) - print("CpuDuration:", rec.cpudur) - print("NormalisedWallDuration:", int(rec.walldur * rec.nf)) - print("NormalisedCpuDuration:", int(rec.cpudur * rec.nf)) - print("NumberOfJobs:", rec.njobs) - print(fixed_separator) + # With no hs23 portion, the submit host is just "hepspec-hosts" + # With hs23 portion, it's both "hepspec-hosts" and "hepscore-hosts" + submit_hosts = ["hepspec-hosts"] + # Check the site name for the HS23 portion + hs23_portion = get_hs23_portion(rk.site) + if not isclose(hs23_portion, 0.0): + submit_hosts.append("hepscore-hosts") + + # Quick lambda to write the lines + write = lambda *line: print(*line, file=output_file) + + for submit_host in range(len(submit_hosts)): + # Index 0 is hepspec-hosts, index 1 is hepscore-hosts + # Do some clever math to get the portion + + if submit_host == 0: + portion = 1.0 - hs23_portion + elif submit_host == 1: + portion = hs23_portion + else: + raise ValueError(f"Invalid submit_host: {submit_host}") + + write("Site:", rk.site) + write("SubmitHost:", submit_hosts[submit_host]) + write("VO:", rk.vo) + write("EarliestEndTime:", rec.mintime) + write("LatestEndTime:", rec.maxtime + 60*60*24 - 1) + write("Month:", "%02d" % month) + write("Year:", year) + write("Infrastructure:", fixed_infrastructure) + write("GlobalUserName:", dn) + write("Processors:", rk.cores) + write("NodeCount:", fixed_nodecount) + write("WallDuration:", int(rec.walldur * portion)) + write("CpuDuration:", int(rec.cpudur * portion)) + write("NormalisedWallDuration:", int(rec.walldur * rec.nf * portion)) + write("NormalisedCpuDuration:", int(rec.cpudur * rec.nf * portion)) + write("NumberOfJobs:", int(rec.njobs * portion)) + write(fixed_separator) def bkt_key_lower(bkt): return bkt.key.lower() diff --git a/opensciencegrid/gracc-apel/test_apel_report.py b/opensciencegrid/gracc-apel/test_apel_report.py new file mode 100644 index 00000000..b488b54a --- /dev/null +++ b/opensciencegrid/gracc-apel/test_apel_report.py @@ -0,0 +1,116 @@ +""" +Testing the apel_report.py script + +""" + +import unittest +from io import StringIO +import apel_report + + +class TestApelReport(unittest.TestCase): + """ + Test the apel_report.py script + """ + + @staticmethod + def parse_reports(reports: str) -> list[dict]: + """ + Parse the reports string into a list of lines. Example report: + + Site: Nebraska + SubmitHost: hepspec-hosts + VO: cms + EarliestEndTime: 0 + LatestEndTime: 86499 + Month: 05 + Year: 2023 + Infrastructure: Gratia-OSG + GlobalUserName: cms + Processors: 1 + NodeCount: 1 + WallDuration: 90 + CpuDuration: 90 + NormalisedWallDuration: 90 + NormalisedCpuDuration: 90 + NumberOfJobs: 0 + %% + Site: Nebraska + SubmitHost: hepscore-hosts + VO: cms + EarliestEndTime: 0 + LatestEndTime: 86499 + Month: 05 + Year: 2023 + Infrastructure: Gratia-OSG + GlobalUserName: cms + Processors: 1 + NodeCount: 1 + WallDuration: 10 + CpuDuration: 10 + NormalisedWallDuration: 10 + NormalisedCpuDuration: 10 + NumberOfJobs: 0 + %% + + """ + # First spit by the separator, %% + reports = reports.strip().split("%%") + to_return = [] + + # Each line is in the form of : + # Split each line by the colon and return the value + for report in reports: + report_dict = {} + for line in report.split("\n"): + if line.strip() == "": + continue + kv = line.split(":") + try: + report_dict[kv[0].strip()] = kv[1].strip() + except IndexError as ie: + print("Failure on line:", line) + raise + to_return.append(report_dict) + + return to_return + + def test_print_rk_recr(self): + """ + Test the print_rk_recr function + """ + # First create the data structures we need + class rk: + site = "Nebraska" + vo = "cms" + cores = 1 + dn = "cms" + + class rec: + mintime = 0 + maxtime = 100 + walldur = 100 + cpudur = 100 + nf = 1 + njobs = 1 + + # Create a textio object to capture the output + # We need to use StringIO because print_rk_recr uses print + # and we want to capture the output + to_write = StringIO() + + # Now call the function + apel_report.print_rk_recr(2023, 5, rk, rec, to_write) + + # Now check the output + reports = self.parse_reports(to_write.getvalue()) + + self.assertEqual("Nebraska", reports[0]['Site']) + self.assertEqual("hepspec-hosts", reports[0]['SubmitHost']) + self.assertEqual("hepscore-hosts", reports[1]['SubmitHost']) + + + +if __name__ == '__main__': + unittest.main() + From 04436d9dcf93cfe707c96a0361dcfcbc70863173 Mon Sep 17 00:00:00 2001 From: Derek Weitzel Date: Thu, 7 Dec 2023 15:57:10 -0600 Subject: [PATCH 2/3] Change file writer from stdout to an argument --- opensciencegrid/gracc-apel/apel_report.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/opensciencegrid/gracc-apel/apel_report.py b/opensciencegrid/gracc-apel/apel_report.py index d4bd7d89..140c6d29 100755 --- a/opensciencegrid/gracc-apel/apel_report.py +++ b/opensciencegrid/gracc-apel/apel_report.py @@ -225,8 +225,8 @@ def add_record(recs, vo, site, cores, dn, bkt): recs[rk] += rec -def print_header(): - print(fixed_header) +def print_header(output_file = sys.stdout): + print(fixed_header, file=output_file) def print_rk_recr(year, month, rk, rec, output_file=sys.stdout): @@ -300,16 +300,15 @@ def main(): print("usage: %s [YEAR MONTH]" % os.path.basename(__file__), file=sys.stderr) sys.exit(0) - orig_stdout = sys.stdout - outfile = "%02d_%d.apel" % (month, year) - sys.stdout = open(outfile, "w") + outfile_name = "%02d_%d.apel" % (month, year) + outfile = open(outfile_name, "w") resp = gracc_query_apel(year, month) aggs = resp.aggregations recs = autodict() - print_header() + print_header(outfile) for cores_bkt in sorted_buckets(aggs.Cores): cores = cores_bkt.key for vo_bkt in sorted_buckets(cores_bkt.VO): @@ -327,10 +326,9 @@ def main(): add_record(recs, vo, site, cores, dn, site_bkt) for rk,rec in sorted(recs.items()): - print_rk_recr(year, month, rk, rec) + print_rk_recr(year, month, rk, rec, outfile) - sys.stdout = orig_stdout - print("wrote: %s" % outfile) + print("wrote: %s" % outfile_name) if __name__ == '__main__': main() From 8b8f3ecfddd43e9e63985f1da3a1359d8967fd90 Mon Sep 17 00:00:00 2001 From: Derek Weitzel Date: Thu, 8 Feb 2024 09:43:42 -0600 Subject: [PATCH 3/3] Adding new format for gracc-apel hepscore23 output --- opensciencegrid/gracc-apel/apel_report.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/opensciencegrid/gracc-apel/apel_report.py b/opensciencegrid/gracc-apel/apel_report.py index 140c6d29..7dad9477 100755 --- a/opensciencegrid/gracc-apel/apel_report.py +++ b/opensciencegrid/gracc-apel/apel_report.py @@ -56,8 +56,6 @@ def get_hs23_portion(resource_group) -> float: for resource in raw_json[resource_group_name]["Resources"]['Resource']: if 'HEPScore23Percentage' in resource['WLCGInformation']: hep_spec_percentages.append(float(resource['WLCGInformation']['HEPScore23Percentage'])) - if resource_group_name == "Nebraska": - print(hep_spec_percentages) if len(hep_spec_percentages) > 0: resource_group_map[resource_group_name] = mean(hep_spec_percentages) else: @@ -252,8 +250,10 @@ def print_rk_recr(year, month, rk, rec, output_file=sys.stdout): if submit_host == 0: portion = 1.0 - hs23_portion + metric_name = "hepspec" elif submit_host == 1: portion = hs23_portion + metric_name = "HEPscore23" else: raise ValueError(f"Invalid submit_host: {submit_host}") @@ -270,8 +270,8 @@ def print_rk_recr(year, month, rk, rec, output_file=sys.stdout): write("NodeCount:", fixed_nodecount) write("WallDuration:", int(rec.walldur * portion)) write("CpuDuration:", int(rec.cpudur * portion)) - write("NormalisedWallDuration:", int(rec.walldur * rec.nf * portion)) - write("NormalisedCpuDuration:", int(rec.cpudur * rec.nf * portion)) + write("NormalisedWallDuration:", "{" + metric_name + ": " + str(int(rec.walldur * rec.nf * portion)) + "}") + write("NormalisedCpuDuration:", "{" + metric_name + ": " + str(int(rec.cpudur * rec.nf * portion)) + "}") write("NumberOfJobs:", int(rec.njobs * portion)) write(fixed_separator)