diff --git a/explorer/dxt.py b/explorer/dxt.py index 5c52378..10eaf88 100644 --- a/explorer/dxt.py +++ b/explorer/dxt.py @@ -36,9 +36,16 @@ import pyarrow.feather as feather # import darshan.backend.cffi_backend as darshanll +from bs4 import BeautifulSoup from explorer import version as dxt_version from packaging import version +from recorder_utils import RecorderReader +from recorder_utils.build_offset_intervals import build_offset_intervals + +LOG_TYPE_DARSHAN = 0 +LOG_TYPE_RECORDER = 1 + class Explorer: def __init__(self, args): @@ -73,82 +80,85 @@ def configure_log(self): def run(self): self.explorer_start_time = time.time() - self.is_darshan_file(self.args.darshan) + log_type = self.check_log_type(self.args.log_path) if not self.args.prefix: self.prefix = os.getcwd() else: self.prefix = self.args.prefix - # log = darshanll.log_open(self.args.darshan) - # information = darshanll.log_get_job(log) - - # log_version = information["metadata"]["lib_ver"] - # library_version = darshanll.darshan.backend.cffi_backend.get_lib_version() - filename = self.args.darshan - # filename = self.check_log_version( - # self.args.darshan, log_version, library_version - # ) - report = darshan.DarshanReport(filename, read_all=True) - if "DXT_POSIX" not in report.records and "DXT_MPIIO" not in report.records: - self.logger.info("No DXT trace data found in file: {}".format(filename)) - exit() + report = None + filename = self.args.log_path + if log_type == LOG_TYPE_DARSHAN: + # log = darshanll.log_open(self.args.log_path) + # information = darshanll.log_get_job(log) + # log_version = information["metadata"]["lib_ver"] + # library_version = darshanll.darshan.backend.cffi_backend.get_lib_version() + # filename = self.check_log_version( + # self.args.log_path, log_version, library_version + # ) + report = darshan.DarshanReport(filename, read_all=True) + if "DXT_POSIX" not in report.records and "DXT_MPIIO" not in report.records: + self.logger.info("No DXT trace data found in file: {}".format(filename)) + exit() + elif log_type == LOG_TYPE_RECORDER: + report = RecorderReader(filename) if self.args.list_files: - self.list_files(report) + self.list_files(report, log_type) exit() - self.generate_plot(filename, report) + self.generate_plot(filename, report, log_type) if self.args.transfer: - self.generate_transfer_plot(filename, report) + self.generate_transfer_plot(filename, report, log_type) if self.args.spatiality: - self.generate_spatiality_plot(filename, report) + self.generate_spatiality_plot(filename, report, log_type) if self.args.io_phase: - self.generate_phase_plot(filename, report) + self.generate_phase_plot(filename, report, log_type) if self.args.ost_usage_operation: - self.generate_ost_usage_operation_plot(filename, report) + self.generate_ost_usage_operation_plot(filename, report, log_type) if self.args.ost_usage_transfer: - self.generate_ost_usage_transfer_plot(filename, report) - - self.generate_index(filename, report) - - def get_directory(self): - """Determine the install path to find the execution scripts.""" - try: - root = __file__ - if os.path.islink(root): - root = os.path.realpath(root) + self.generate_ost_usage_transfer_plot(filename, report, log_type) - return os.path.dirname(os.path.abspath(root)) - except Exception: - return None + self.generate_index(filename, report, log_type) - def is_darshan_file(self, file): - """Check if the provided file exists and is a .darshan file.""" - if not os.path.exists(self.args.darshan): - self.logger.error("{}: NOT FOUND".format(file)) - - exit(-1) - - if not self.args.darshan.endswith(".darshan"): - self.logger.error("{} is not a .darshan file".format(file)) - - exit(-1) + def check_log_type(self, path): + if path.endswith(".darshan"): + if not os.path.isfile(path): + self.logger.error('Unable to open .darshan file.') + sys.exit(os.EX_NOINPUT) + else: + return LOG_TYPE_DARSHAN + else: + # check whether is a valid recorder log + if not os.path.isdir(path): + self.logger.error('Unable to open recorder folder.') + sys.exit(os.EX_NOINPUT) + else: + return LOG_TYPE_RECORDER - def list_files(self, report, display=True): + def list_files(self, report, log_type, display=True): """Create a dictionary of file id as key and file name as value.""" total = 0 - file_ids = report.log["name_records"] - for key, value in dict(file_ids).items(): - if value == "": - del file_ids[key] - if value == "": - del file_ids[key] + file_ids = {} + + if log_type == LOG_TYPE_DARSHAN: + file_ids = report.log["name_records"] + for key, value in dict(file_ids).items(): + if value == "": + del file_ids[key] + if value == "": + del file_ids[key] + + elif log_type == LOG_TYPE_RECORDER: + ranks = report.GM.total_ranks + for rank in range(ranks): + file_ids.update(report.LMs[rank].filemap) for file_id, file_name in file_ids.items(): total += 1 @@ -156,11 +166,13 @@ def list_files(self, report, display=True): self.logger.info("FILE: {} (ID {})".format(file_name, file_id)) if total == 0: - self.logger.critical("No DXT records found in {}".format(self.args.darshan)) - self.logger.critical( - "To enable Darshan DXT, set this before your application runs:" - ) - self.logger.critical("$ export DXT_ENABLE_IO_TRACE=1") + self.logger.critical("No DXT records found in {}".format(self.args.log_path)) + + if log_type == LOG_TYPE_DARSHAN: + self.logger.critical( + "To enable Darshan DXT, set this before your application runs:" + ) + self.logger.critical("$ export DXT_ENABLE_IO_TRACE=1") exit() @@ -253,7 +265,7 @@ def dxt_record_attach_osts_inplace(self, report, record, lustre_records_by_id): return rec def create_dataframe( - self, file_id, subset_dataset_file, df_posix=None, df_mpiio=None + self, file_id, subset_dataset_file, log_type, df_posix=None, df_mpiio=None, df_hdf5=None ): """Create a dataframe from parsed records.""" @@ -273,68 +285,90 @@ def create_dataframe( runtime = 0 df = [] + result = pd.DataFrame() - if not df_posix.empty: - df_posix_temp = df_posix.loc[df_posix["id"] == file_id] - for index, row in df_posix_temp.iterrows(): - write_segments = row["write_segments"] - write_segments["operation"] = "write" - read_segments = row["read_segments"] - read_segments["operation"] = "read" - - temp_result = pd.concat([write_segments, read_segments]) - temp_result["file_id"] = file_id - temp_result["rank"] = row["rank"] - temp_result["api"] = "POSIX" - - temp_result = temp_result.rename( - columns={"length": "size", "start_time": "start", "end_time": "end"} - ) + if log_type == LOG_TYPE_DARSHAN: + if not df_posix.empty: + df_posix_temp = df_posix.loc[df_posix["id"] == file_id] + for index, row in df_posix_temp.iterrows(): + write_segments = row["write_segments"] + write_segments["operation"] = "write" + read_segments = row["read_segments"] + read_segments["operation"] = "read" + + temp_result = pd.concat([write_segments, read_segments]) + temp_result["file_id"] = file_id + temp_result["rank"] = row["rank"] + temp_result["api"] = "POSIX" + + temp_result = temp_result.rename( + columns={"length": "size", "start_time": "start", "end_time": "end"} + ) - total_logs = total_logs + len(temp_result) - runtime = max(runtime, temp_result["end"].max()) + total_logs = total_logs + len(temp_result) + runtime = max(runtime, temp_result["end"].max()) - temp_result["start"] = temp_result["start"].round(decimals=4) - temp_result["end"] = temp_result["end"].round(decimals=4) + temp_result["start"] = temp_result["start"].round(decimals=4) + temp_result["end"] = temp_result["end"].round(decimals=4) - temp_result.index.name = "segment" - temp_result.reset_index(inplace=True) - temp_result = temp_result.reindex(columns=column_names) + temp_result.index.name = "segment" + temp_result.reset_index(inplace=True) + temp_result = temp_result.reindex(columns=column_names) - df.append(temp_result) + df.append(temp_result) - if not df_mpiio.empty: - df_mpiio_temp = df_mpiio.loc[df_mpiio["id"] == file_id] - for index, row in df_mpiio_temp.iterrows(): - write_segments = row["write_segments"] - write_segments["operation"] = "write" - read_segments = row["read_segments"] - read_segments["operation"] = "read" + if not df_mpiio.empty: + df_mpiio_temp = df_mpiio.loc[df_mpiio["id"] == file_id] + for index, row in df_mpiio_temp.iterrows(): + write_segments = row["write_segments"] + write_segments["operation"] = "write" + read_segments = row["read_segments"] + read_segments["operation"] = "read" - temp_result = pd.concat([write_segments, read_segments]) - temp_result["file_id"] = file_id - temp_result["rank"] = row["rank"] - temp_result["api"] = "MPIIO" + temp_result = pd.concat([write_segments, read_segments]) + temp_result["file_id"] = file_id + temp_result["rank"] = row["rank"] + temp_result["api"] = "MPIIO" - temp_result = temp_result.rename( - columns={"length": "size", "start_time": "start", "end_time": "end"} - ) + temp_result = temp_result.rename( + columns={"length": "size", "start_time": "start", "end_time": "end"} + ) - total_logs = total_logs + len(temp_result) - runtime = max(runtime, temp_result["end"].max()) + total_logs = total_logs + len(temp_result) + runtime = max(runtime, temp_result["end"].max()) - temp_result["start"] = temp_result["start"].round(decimals=4) - temp_result["end"] = temp_result["end"].round(decimals=4) + temp_result["start"] = temp_result["start"].round(decimals=4) + temp_result["end"] = temp_result["end"].round(decimals=4) - temp_result.index.name = "segment" - temp_result.reset_index(inplace=True) - temp_result = temp_result.reindex(columns=column_names) + temp_result.index.name = "segment" + temp_result.reset_index(inplace=True) + temp_result = temp_result.reindex(columns=column_names) - df.append(temp_result) + df.append(temp_result) - result = pd.DataFrame() - if df: - result = pd.concat(df, axis=0, ignore_index=True) + if df: + result = pd.concat(df, axis=0, ignore_index=True) + + elif log_type == LOG_TYPE_RECORDER: + df_posix_temp, df_mpiio_temp, df_hdf5_temp = pd.DataFrame(), pd.DataFrame(), pd.DataFrame() + + if not df_posix.empty: + df_posix_temp = df_posix.loc[df_posix["file_id"] == file_id] + if not df_mpiio.empty: + df_mpiio_temp = df_mpiio.loc[df_mpiio["file_id"] == file_id] + if not df_hdf5.empty: + df_hdf5_temp = df_hdf5.loc[df_hdf5["file_id"] == file_id] + + if not df_posix_temp.empty or not df_mpiio_temp.empty or not df_hdf5_temp.empty: + result = pd.concat([df_posix_temp, df_mpiio_temp, df_hdf5_temp], ignore_index=True) + result = result.reindex(columns=column_names) + total_logs = len(result) + runtime = result['end'].max() + # The next 4 lines are added for demo hdf5 in plot + if self.args.debug: + df_hdf5_temp = {"api": "H5F"} + for _ in range(3): + result.loc[len(result.index)] = df_hdf5_temp feather.write_feather( result, subset_dataset_file + ".dxt", compression="uncompressed" @@ -342,8 +376,9 @@ def create_dataframe( if self.args.csv: result.to_csv( - subset_dataset_file + ".dxt.csv", mode="a", index=False, header=True + subset_dataset_file + ".dxt.csv", mode="w", index=False, header=True ) + column_names = ["total_logs", "runtime"] result = pd.DataFrame(columns=column_names) @@ -353,53 +388,84 @@ def create_dataframe( subset_dataset_file + ".summary.dxt.csv", mode="w", index=False, header=True ) - def subset_dataset(self, file, file_ids, report): + def subset_dataset(self, file, file_ids, report, log_type): """Subset the dataset based on file id and save to a csv file.""" self.logger.info("generating dataframes") - lustre_records_by_id = self.get_id_to_record_mapping(report, "LUSTRE") + if log_type == LOG_TYPE_DARSHAN: + df_posix, df_mpiio = [], [] + lustre_records_by_id = self.get_id_to_record_mapping(report, "LUSTRE") - if lustre_records_by_id: + if lustre_records_by_id: - def graceful_wrapper(r, rec, lustre_records_by_id): - try: - self.dxt_record_attach_osts_inplace( - report, rec, lustre_records_by_id - ) - except Exception: - pass + def graceful_wrapper(r, rec, lustre_records_by_id): + try: + self.dxt_record_attach_osts_inplace( + report, rec, lustre_records_by_id + ) + except Exception: + pass - list( - map( - lambda rec: graceful_wrapper(report, rec, lustre_records_by_id), - report.records["DXT_POSIX"], + list( + map( + lambda rec: graceful_wrapper(report, rec, lustre_records_by_id), + report.records["DXT_POSIX"], + ) ) - ) - list( - map( - lambda rec: graceful_wrapper(report, rec, lustre_records_by_id), - report.records["DXT_MPIIO"], + list( + map( + lambda rec: graceful_wrapper(report, rec, lustre_records_by_id), + report.records["DXT_MPIIO"], + ) ) - ) - df_posix = [] - if "DXT_POSIX" in report.records: - df_posix = report.records["DXT_POSIX"].to_df() + if "DXT_POSIX" in report.records: + df_posix = report.records["DXT_POSIX"].to_df() - df_mpiio = [] - if "DXT_MPIIO" in report.records: - df_mpiio = report.records["DXT_MPIIO"].to_df() + if "DXT_MPIIO" in report.records: + df_mpiio = report.records["DXT_MPIIO"].to_df() - df_posix = pd.DataFrame(df_posix) - df_mpiio = pd.DataFrame(df_mpiio) + df_posix = pd.DataFrame(df_posix) + df_mpiio = pd.DataFrame(df_mpiio) - for file_id in file_ids: - subset_dataset_file = "{}.{}".format(file, file_id) + for file_id in file_ids: + subset_dataset_file = "{}.{}".format(file, file_id) - if os.path.exists(subset_dataset_file + ".dxt"): - self.logger.debug("using existing parsed Darshan file") - continue + if os.path.exists(subset_dataset_file + ".dxt"): + self.logger.debug("using existing parsed log file") + continue - self.create_dataframe(file_id, subset_dataset_file, df_posix, df_mpiio) + self.create_dataframe(file_id, subset_dataset_file, log_type, df_posix, df_mpiio) + + elif log_type == LOG_TYPE_RECORDER: + def add_api(row): + if 'MPI' in row['function']: + return 'MPIIO' + elif 'H5' in row['function']: + return 'H5F' + else: + return 'POSIX' + + def add_operation(row): + if 'read' in row['function']: + return 'read' + else: + return 'write' + + df_intervals = build_offset_intervals(report) + df_intervals['api'] = df_intervals.apply(add_api, axis=1) + df_intervals['operation'] = df_intervals.apply(add_operation, axis=1) + df_posix = df_intervals[(df_intervals['api'] == 'POSIX')] + df_mpiio = df_intervals[(df_intervals['api'] == 'MPIIO')] + df_hdf5 = df_intervals[(df_intervals['api'] == 'H5F')] + + for file_id in file_ids: + subset_dataset_file = "{}.{}".format(file, file_id) + + if os.path.exists(subset_dataset_file + ".dxt"): + self.logger.debug("using existing parsed log file") + continue + + self.create_dataframe(file_id, subset_dataset_file, log_type, df_posix, df_mpiio, df_hdf5) def merge_overlapping_io_phases(self, overlapping_df, df, module): io_phases_df = pd.DataFrame( @@ -564,7 +630,18 @@ def calculate_io_phases( overlapping_MPIIO, df_mpiio, "MPIIO" ) - frames = [io_phases_df_posix, io_phases_df_mpiio] + df_hdf5 = df[df["api"] == "H5F"] + df_hdf5 = df_hdf5.sort_values("start") + + overlapping_HDF5 = overlapping[ + overlapping["Chromosome"] == "H5F" + ] + + io_phases_df_hdf5 = self.merge_overlapping_io_phases( + overlapping_HDF5, df_hdf5, "H5F" + ) + + frames = [io_phases_df_posix, io_phases_df_mpiio, io_phases_df_hdf5] result = pd.concat(frames) feather.write_feather(result, phases_file) else: @@ -619,14 +696,25 @@ def calculate_io_phases( overlapping_MPIIO, df_mpiio, "MPIIO" ) - frames = [io_phases_df_posix, io_phases_df_mpiio] + df_hdf5 = df[df["api"] == "H5F"] + df_hdf5 = df_hdf5.sort_values("start") + + overlapping_HDF5 = overlapping[ + overlapping["Chromosome"] == "H5F" + ] + + io_phases_df_hdf5 = self.merge_overlapping_io_phases( + overlapping_HDF5, df_hdf5, "H5F" + ) + + frames = [io_phases_df_posix, io_phases_df_mpiio, io_phases_df_hdf5] result = pd.concat(frames) feather.write_feather(result, phases_file) else: result = pd.DataFrame() feather.write_feather(result, phases_file) - def generate_plot(self, file, report): + def generate_plot(self, file, report, log_type): """Generate an interactive operation plot.""" limits = "" insights = "" @@ -649,12 +737,12 @@ def generate_plot(self, file, report): if self.args.unbalanced_workload: insights += " -1 {} ".format(self.args.unbalanced_workload) - file_ids = self.list_files(report) + file_ids = self.list_files(report, log_type) if len(file_ids) == 0: self.logger.info("No data to generate plots") else: - self.subset_dataset(file, file_ids, report) + self.subset_dataset(file, file_ids, report, log_type) if self.args.stragglers: insights += " -2 {} ".format(self.args.stragglers) @@ -732,9 +820,10 @@ def generate_plot(self, file, report): path = "plots/operation.py" script = pkg_resources.resource_filename(__name__, path) - command = "python3 {} -f {}.{}.{}-{}.dxt -i {}.{}.{}-{}.io_phases {} {} -o {} -x {} -t {} -r {}".format( + command = "python3 {} -p {} -f {}.{}.{}-{}.dxt -i {}.{}.{}-{}.io_phases {} {} -o {} -x {} -t {} -r {}".format( script, file, + file, file_id, "snapshot", snapshot, @@ -760,17 +849,20 @@ def generate_plot(self, file, report): s = subprocess.run(args) if s.returncode == 0: + if file_id not in self.generated_files: + self.generated_files[file_id] = [] + if os.path.exists(output_file): self.logger.info("SUCCESS: {}".format(output_file)) + + if self.args.browser: + webbrowser.open("file://{}".format(output_file), new=2) + + self.generated_files[file_id].append(output_file) + else: - self.logger.warning( - "no data to generate interactive plots" - ) - - if self.args.browser: - webbrowser.open( - "file://{}".format(output_file), new=2 - ) + self.logger.warning("no data to generate interactive plots") + else: self.logger.error( "failed to generate the interactive plots (error %s)", @@ -794,9 +886,10 @@ def generate_plot(self, file, report): path = "plots/operation.py" script = pkg_resources.resource_filename(__name__, path) - command = "python3 {} -f {}.{}.dxt -i {}.{}.io_phases{} {} -o {} -x {}".format( + command = "python3 {} -p {} -f {}.{}.dxt -i {}.{}.io_phases{} {} -o {} -x {}".format( script, file, + file, file_id, file, file_id, @@ -815,18 +908,20 @@ def generate_plot(self, file, report): s = subprocess.run(args) if s.returncode == 0: + if file_id not in self.generated_files: + self.generated_files[file_id] = [] + if os.path.exists(output_file): self.logger.info("SUCCESS: {}".format(output_file)) - else: - self.logger.warning("no data to generate interactive plots") - if self.args.browser: - webbrowser.open("file://{}".format(output_file), new=2) + if self.args.browser: + webbrowser.open("file://{}".format(output_file), new=2) - if file_id not in self.generated_files: - self.generated_files[file_id] = [] + self.generated_files[file_id].append(output_file) + + else: + self.logger.warning("no data to generate interactive plots") - self.generated_files[file_id].append(output_file) else: self.logger.error( "failed to generate the interactive plots (error %s)", @@ -835,7 +930,7 @@ def generate_plot(self, file, report): sys.exit(os.EX_SOFTWARE) - def generate_transfer_plot(self, file, report): + def generate_transfer_plot(self, file, report, log_type): """Generate an interactive transfer plot.""" limits = "" @@ -851,14 +946,13 @@ def generate_transfer_plot(self, file, report): if self.args.end_rank: limits += " -m {} ".format(self.args.end_rank) - file_ids = self.list_files(report) + file_ids = self.list_files(report, log_type) if len(file_ids) == 0: self.logger.info("No data to generate plots") else: # Generated the CSV files for each plot - - self.subset_dataset(file, file_ids, report) + self.subset_dataset(file, file_ids, report, log_type) for file_id, file_name in file_ids.items(): output_file = "{}/{}-{}.html".format(self.prefix, file_id, "transfer") @@ -880,33 +974,37 @@ def generate_transfer_plot(self, file, report): s = subprocess.run(args) if s.returncode == 0: - self.logger.info("SUCCESS: {}".format(output_file)) - - if self.args.browser: - webbrowser.open( - "file://{}.{}.transfer.html".format(file, file_id), new=2 - ) - if file_id not in self.generated_files: self.generated_files[file_id] = [] - self.generated_files[file_id].append(output_file) + if os.path.exists(output_file): + self.logger.info("SUCCESS: {}".format(output_file)) + + if self.args.browser: + webbrowser.open("file://{}".format(output_file), new=2) + + self.generated_files[file_id].append(output_file) + + else: + self.logger.warning("no data to generate transfer plots") + else: self.logger.error( - "failed to generate the interactive plots (error %s)", + "failed to generate the transfer plots (error %s)", s.returncode, ) sys.exit(os.EX_SOFTWARE) - def generate_spatiality_plot(self, file, report): + def generate_spatiality_plot(self, file, report, log_type): """Generate an interactive spatiality plot.""" - file_ids = self.list_files(report) + file_ids = self.list_files(report, log_type) + if len(file_ids) == 0: self.logger.info("No data to generate plots") else: # Generated the CSV files for each plot - self.subset_dataset(file, file_ids, report) + self.subset_dataset(file, file_ids, report, log_type) for file_id, file_name in file_ids.items(): output_file = "{}/{}-{}.html".format(self.prefix, file_id, "spatiality") @@ -926,18 +1024,19 @@ def generate_spatiality_plot(self, file, report): s = subprocess.run(args) if s.returncode == 0: + if file_id not in self.generated_files: + self.generated_files[file_id] = [] + if os.path.exists(output_file): self.logger.info("SUCCESS: {}".format(output_file)) else: self.logger.warning("no data to generate spatiality plots") - if self.args.browser: - webbrowser.open("file://{}".format(output_file), new=2) + if self.args.browser: + webbrowser.open("file://{}".format(output_file), new=2) - if file_id not in self.generated_files: - self.generated_files[file_id] = [] + self.generated_files[file_id].append(output_file) - self.generated_files[file_id].append(output_file) else: self.logger.error( "failed to generate the spatiality plots (error %s)", @@ -946,14 +1045,14 @@ def generate_spatiality_plot(self, file, report): sys.exit(os.EX_SOFTWARE) - def generate_phase_plot(self, file, report): + def generate_phase_plot(self, file, report, log_type): """Generate an interactive I/O phase plot.""" - file_ids = self.list_files(report) + file_ids = self.list_files(report, log_type) if len(file_ids) == 0: self.logger.info("No data to generate plots") else: - self.subset_dataset(file, file_ids, report) + self.subset_dataset(file, file_ids, report, log_type) self.calculate_io_phases(file, file_ids) for file_id, file_name in file_ids.items(): @@ -973,34 +1072,35 @@ def generate_phase_plot(self, file, report): s = subprocess.run(args) if s.returncode == 0: + if file_id not in self.generated_files: + self.generated_files[file_id] = [] + if os.path.exists(output_file): self.logger.info("SUCCESS: {}".format(output_file)) else: self.logger.warning("no data to generate interactive plots") - if self.args.browser: - webbrowser.open("file://{}".format(output_file), new=2) + if self.args.browser: + webbrowser.open("file://{}".format(output_file), new=2) - if file_id not in self.generated_files: - self.generated_files[file_id] = [] + self.generated_files[file_id].append(output_file) - self.generated_files[file_id].append(output_file) else: self.logger.error( - "failed to generate the interactive plots (error %s)", + "failed to generate I/O phase plots (error %s)", s.returncode, ) sys.exit(os.EX_SOFTWARE) - def generate_ost_usage_operation_plot(self, file, report): + def generate_ost_usage_operation_plot(self, file, report, log_type): """Generate an interactive OST usage operation plot.""" - file_ids = self.list_files(report) + file_ids = self.list_files(report, log_type) if len(file_ids) == 0: self.logger.info("No data to generate plots") else: - self.subset_dataset(file, file_ids, report) + self.subset_dataset(file, file_ids, report, log_type) for file_id, file_name in file_ids.items(): output_file = "{}/{}-{}.html".format( @@ -1023,34 +1123,36 @@ def generate_ost_usage_operation_plot(self, file, report): s = subprocess.run(args) if s.returncode == 0: + if file_id not in self.generated_files: + self.generated_files[file_id] = [] + if os.path.exists(output_file): self.logger.info("SUCCESS: {}".format(output_file)) else: - self.logger.warning("no data to generate interactive plots") + self.logger.warning("no data to generate interactive OST usage operation plots") - if self.args.browser: - webbrowser.open("file://{}".format(output_file), new=2) + if self.args.browser: + webbrowser.open("file://{}".format(output_file), new=2) - if file_id not in self.generated_files: - self.generated_files[file_id] = [] + self.generated_files[file_id].append(output_file) - self.generated_files[file_id].append(output_file) else: self.logger.error( - "failed to generate the interactive plots (error %s)", + "failed to generate interactive OST usage operation plots (error %s)", s.returncode, ) sys.exit(os.EX_SOFTWARE) - def generate_ost_usage_transfer_plot(self, file, report): + def generate_ost_usage_transfer_plot(self, file, report, log_type): """Generate an interactive OST usage data transfer plot.""" - file_ids = self.list_files(report) + file_ids = self.list_files(report, log_type) if len(file_ids) == 0: self.logger.info("No data to generate plots") else: - self.subset_dataset(file, file_ids, report) + + self.subset_dataset(file, file_ids, report, log_type) for file_id, file_name in file_ids.items(): output_file = "{}/{}-{}.html".format( @@ -1073,29 +1175,31 @@ def generate_ost_usage_transfer_plot(self, file, report): s = subprocess.run(args) if s.returncode == 0: + if file_id not in self.generated_files: + self.generated_files[file_id] = [] + if os.path.exists(output_file): self.logger.info("SUCCESS: {}".format(output_file)) - else: - self.logger.warning("no data to generate interactive plots") - if self.args.browser: - webbrowser.open("file://{}".format(output_file), new=2) + if self.args.browser: + webbrowser.open("file://{}".format(output_file), new=2) - if file_id not in self.generated_files: - self.generated_files[file_id] = [] + self.generated_files[file_id].append(output_file) + + else: + self.logger.warning("no data to generate interactive OST usage transfer plots") - self.generated_files[file_id].append(output_file) else: self.logger.error( - "failed to generate the interactive plots (error %s)", + "failed to generate interactive OST usage transfer plots (error %s)", s.returncode, ) sys.exit(os.EX_SOFTWARE) - def generate_index(self, file, report): + def generate_index(self, filename, report, log_type): """Generate index file with all the plots.""" - file_ids = self.list_files(report, False) + file_ids = self.list_files(report, log_type, False) file = open(os.path.join(self.ROOT, "plots/index.html"), mode="r") template = file.read() @@ -1150,7 +1254,7 @@ def generate_index(self, file, report): self.explorer_end_time = time.time() - template = template.replace("DXT_DARSHAN_FILE", self.args.darshan) + template = template.replace("DXT_LOG_PATH", self.args.log_path) template = template.replace("DXT_EXPLORER_FILES", file_index) template = template.replace("DXT_EXPLORER_VERSION", dxt_version.__version__) template = template.replace("DXT_EXPLORER_DATE", str(datetime.datetime.now())) @@ -1159,6 +1263,24 @@ def generate_index(self, file, report): "{:03f}".format(self.explorer_end_time - self.explorer_start_time), ) + size = 176 + command = "drishti --html --light --size {} {}".format(size, filename) + + args = shlex.split(command) + s = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + sOutput, sError = s.communicate() + + if s.returncode == 0: + drishti_output = open(filename + ".drishti", "w") + drishti_output.write(sOutput.decode()) + + with open(filename + '.html', "r") as html_file: + template = template.replace("DRISHTI_CODE", str(BeautifulSoup(html_file.read(), "html.parser").code)) + with open(filename + '.html', "r") as html_file: + template = template.replace("DRISHTI_STYLE", str(BeautifulSoup(html_file.read(), "html.parser").style.decode_contents())) + else: + sys.exit(os.EX_SOFTWARE) + output_file = "{}/{}.html".format(self.prefix, "index") file = open(output_file, mode="w") @@ -1196,7 +1318,7 @@ def check_log_version(self, file, log_version, library_version): def main(): PARSER = argparse.ArgumentParser(description="DXT Explorer: ") - PARSER.add_argument("darshan", help="Input .darshan file") + PARSER.add_argument("log_path", help="Input .darshan file or recorder folder") PARSER.add_argument( "-o", diff --git a/explorer/plots/index.html b/explorer/plots/index.html index 53a90c0..454fd9f 100644 --- a/explorer/plots/index.html +++ b/explorer/plots/index.html @@ -80,6 +80,8 @@ padding: 0; margin: 0; } + + DRISHTI_STYLE @@ -88,13 +90,17 @@

- DARSHAN: + PATH:

- DXT_DARSHAN_FILE + DXT_LOG_PATH

+ + DRISHTI_CODE + +
    DXT_EXPLORER_FILES
diff --git a/explorer/plots/operation.py b/explorer/plots/operation.py index e68e5e9..cfe222a 100644 --- a/explorer/plots/operation.py +++ b/explorer/plots/operation.py @@ -113,6 +113,15 @@ def determine_legend(fig, column): parser = OptionParser() +parser.add_option( + "-p", + "--file0", + type="string", + default=None, + help="Traces path", + metavar="FILE", +) + parser.add_option( "-f", "--file1", @@ -267,13 +276,14 @@ def determine_legend(fig, column): else: maximum_limit = options["runtime"] -if ("POSIX" in df["api"].unique()) & ("MPIIO" in df["api"].unique()): - facet_row = "api" - category_orders = {"api": ["MPIIO", "POSIX"]} -else: - facet_row = None - category_orders = None - +facet_row = "api" +category_orders = {"api": []} +if ("H5F" in df["api"].unique()): + category_orders["api"].append("H5F") +if ("MPIIO" in df["api"].unique()): + category_orders["api"].append("MPIIO") +if ("POSIX" in df["api"].unique()): + category_orders["api"].append("POSIX") dxt_issues = [] @@ -797,6 +807,8 @@ def determine_legend(fig, column): annotation.text = "POSIX" elif "MPIIO" in annotation.text: annotation.text = "MPIIO" + elif "H5F" in annotation.text: + annotation.text = "HDF5" if any_bottleneck: fig_annotations = fig.layout.annotations @@ -870,46 +882,51 @@ def determine_legend(fig, column): fig.write_html(options["output"]) -json_data = {} -json_data["dxt"] = dxt_issues -json_file_name = options["file1"].split(".dxt")[0] + ".json" -with open(json_file_name, "w") as outfile: - json.dump(json_data, outfile) -json_file_path = os.path.abspath(json_file_name) +if ".darshan" not in options["file1"]: + json_data = {} + json_data["dxt"] = dxt_issues + json_file_name = options["file1"].split(".dxt")[0] + ".json" + with open(json_file_name, "w") as outfile: + json.dump(json_data, outfile) + json_file_path = os.path.abspath(json_file_name) -if any_bottleneck: - size = 159 -else: - size = 176 + if any_bottleneck: + size = 159 + else: + size = 176 -file = options["file1"].split(".darshan")[0] -command = "drishti --html --light --size {} --json {} {}.darshan".format( - size, json_file_path, file -) -args = shlex.split(command) -s = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) -sOutput, sError = s.communicate() + file = options["file0"] + drishti_file = options["file1"].split(".dxt")[0] -if s.returncode == 0: - drishti_output = open(file + ".drishti", "w") - drishti_output.write(sOutput.decode()) + command = "drishti --html --split --light --size {} --json {} {}".format(size, json_file_path, file) - output_doc = BeautifulSoup() - output_doc.append(output_doc.new_tag("body")) - output_doc.append(output_doc.new_tag("head")) + s = None - with open(options["output"], "r") as html_file: - output_doc.body.extend(BeautifulSoup(html_file.read(), "html.parser").body) + if not os.path.exists(drishti_file + '.html'): + args = shlex.split(command) + s = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + sOutput, sError = s.communicate() - with open(file + ".darshan.html", "r") as html_file: - output_doc.head.extend(BeautifulSoup(html_file.read(), "html.parser").head) + if s is None or s.returncode == 0: + drishti_output = open(file + ".drishti", "w") + drishti_output.write(sOutput.decode()) - with open(file + ".darshan.html", "r") as html_file: - output_doc.body.extend(BeautifulSoup(html_file.read(), "html.parser").body) + output_doc = BeautifulSoup() + output_doc.append(output_doc.new_tag("body")) + output_doc.append(output_doc.new_tag("head")) - output_doc.style.append(BeautifulSoup("pre { padding-left: 60px;}", "html.parser")) + with open(options["output"], "r") as html_file: + output_doc.body.extend(BeautifulSoup(html_file.read(), "html.parser").body) - with open(options["output"], "w") as output_file: - output_file.write(str(output_doc)) -else: - sys.exit(os.EX_SOFTWARE) + with open(file + ".darshan.html", "r") as html_file: + output_doc.head.extend(BeautifulSoup(html_file.read(), "html.parser").head) + + with open(file + ".darshan.html", "r") as html_file: + output_doc.body.extend(BeautifulSoup(html_file.read(), "html.parser").body) + + output_doc.style.append(BeautifulSoup("pre { padding-left: 60px;}", "html.parser")) + + with open(options["output"], "w") as output_file: + output_file.write(str(output_doc)) + else: + sys.exit(os.EX_SOFTWARE) diff --git a/explorer/plots/spatiality.py b/explorer/plots/spatiality.py index ac303aa..79f471f 100644 --- a/explorer/plots/spatiality.py +++ b/explorer/plots/spatiality.py @@ -39,6 +39,9 @@ df = feather.read_feather(options["file"]) +if df.empty: + quit() + df["duration"] = df["end"] - df["start"] rank_gap = max(df["rank"]) * 0.075 maximum_rank = max(df["rank"]) diff --git a/explorer/plots/transfer.py b/explorer/plots/transfer.py index f2e9b73..1f8771a 100644 --- a/explorer/plots/transfer.py +++ b/explorer/plots/transfer.py @@ -66,6 +66,9 @@ df = feather.read_feather(options["file"]) +if df.empty: + quit() + df["duration"] = df["end"] - df["start"] duration = max(df["end"]) - min(df["start"]) diff --git a/requirements.txt b/requirements.txt index 534efcc..2857307 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,3 +8,4 @@ darshan pyarrow>=10.0.1 bs4>=0.0.1 drishti-io>=0.5 +recorder-utils \ No newline at end of file diff --git a/setup.py b/setup.py index f11762e..ba17eaf 100644 --- a/setup.py +++ b/setup.py @@ -27,6 +27,7 @@ "pyarrow>=10.0.1", "bs4>=0.0.1", "drishti-io>=0.5", + "recorder-utils" ], include_package_data=True, entry_points={"console_scripts": ["dxt-explorer=explorer.dxt:main"]},