From e19e61ab29668b1e822afa6e7a71a54fa3b10150 Mon Sep 17 00:00:00 2001 From: Fei Sun Date: Mon, 1 May 2023 14:07:08 -0700 Subject: [PATCH 1/2] monsoon aibench integration Summary: Enable AIBench to use monsoon power monitor to measure power/energy on any device. To make the measurement more accurate, need to take out the battery of the device. Differential Revision: https://internalfb.com/D45022173 fbshipit-source-id: 5d6c3591c83af8bceecfe814abac7c6b02d75f81 --- benchmarking/frameworks/framework_base.py | 38 +- .../platforms/android/android_platform.py | 14 +- benchmarking/utils/monsoon_power.py | 393 ++++++++++++------ 3 files changed, 320 insertions(+), 125 deletions(-) diff --git a/benchmarking/frameworks/framework_base.py b/benchmarking/frameworks/framework_base.py index 9c828b81..0e60770f 100644 --- a/benchmarking/frameworks/framework_base.py +++ b/benchmarking/frameworks/framework_base.py @@ -228,7 +228,7 @@ def runBenchmark(self, info, benchmark, platform): program = programs["program"] if "program" in programs else "" if test["metric"] == "power": - platform_args["power"] = True + platform_args["non_blocking"] = True method = test.get("method") platform_args["method"] = method @@ -240,6 +240,15 @@ def runBenchmark(self, info, benchmark, platform): # FIXME "Monsoon" was unimportable from utils.monsoon_power import collectPowerData + if method == "monsoon_with_usb": + # enter root mode in case we need to disable usb access + if hasattr(platform, "root"): + platform.root() + platform.util.run("wait-for-device") + # disable usb power line if test desires + if "disable_usb" in test: + platform.util.shell(f"echo 1 > {test['disable_usb']}") + # in power metric, the output is ignored total_num = 0 platform.killProgram(program) @@ -290,17 +299,35 @@ def runBenchmark(self, info, benchmark, platform): test["collection_time"] if "collection_time" in test else 180 ) voltage = float(test["voltage"]) if "voltage" in test else 4.0 + threshold = float(test["threshold"] if "threshold" in test else 300) + window_size_in_ms = float( + test["window_size"] if "window_size" in test else 1000 + ) + # each sample is 200us + window_size = int(window_size_in_ms / 0.2) output = collectPowerData( platform.platform_hash, collection_time, voltage, test["iter"], - self.args.monsoon_map, + method=test["method"] if "method" in test else "monsoon", + monsoon_map=self.args.monsoon_map, + threshold=threshold, + window_size=window_size, ) platform.waitForDevice(20) # kill the process if exists platform.killProgram(program) + if method == "monsoon_with_usb": + # re-enable usb power line + if "disable_usb" in test: + platform.util.shell(f"echo 0 > {test['disable_usb']}") + # exit root mode in case we need to disable usb access + if hasattr(platform, "unroot"): + platform.unroot() + platform.util.run("wait-for-device") + # remove the files before copying out the output files # this will save some time in ios platform, since in ios # all files are copied back to the host system @@ -589,6 +616,11 @@ def _runCommands( profiling_args.setdefault("types", [default_type]) profiling_args.setdefault("options", {}) platform_args["model_name"] = getModelName(model) + # we only run non_blocking on the last command. Previous commands + # may be set ups for the last command, and should be blocking. + non_blocking = platform_args.get("non_blocking", False) + if non_blocking: + del platform_args["non_blocking"] for idx, cmd in enumerate(cmds): # note that we only enable profiling for the last command # of the main commands. @@ -598,6 +630,8 @@ def _runCommands( else {"enabled": False} ) platform_args["model_files"] = model_files + if non_blocking and idx == len(cmds) - 1: + platform_args["non_blocking"] = True one_output = self.runOnPlatform( total_num, cmd, platform, platform_args, converter ) diff --git a/benchmarking/platforms/android/android_platform.py b/benchmarking/platforms/android/android_platform.py index 3a11fba3..bc7ceca5 100644 --- a/benchmarking/platforms/android/android_platform.py +++ b/benchmarking/platforms/android/android_platform.py @@ -197,8 +197,7 @@ def runAppBenchmark(self, cmd, *args, **kwargs): platform_args = {} if "platform_args" in kwargs: platform_args = kwargs["platform_args"] - if "power" in platform_args and platform_args["power"]: - platform_args["non_blocking"] = True + if "non_blocking" in platform_args and platform_args["non_blocking"]: self.util.shell(["am", "start", "-S", activity]) return [] if platform_args.get("profiling_args", {}).get("enabled", False): @@ -236,18 +235,19 @@ def runBinaryBenchmark(self, cmd, *args, **kwargs): sleep_before_run = str(platform_args["sleep_before_run"]) cmd = ["sleep", sleep_before_run, "&&"] + cmd del platform_args["sleep_before_run"] - if "power" in platform_args and platform_args["power"]: + if "non_blocking" in platform_args and platform_args["non_blocking"]: # launch settings page to prevent the phone # to go into sleep mode - self.util.shell(["am", "start", "-a", "android.settings.SETTINGS"]) + self.util.shell( + ["am", "start", "-a", "android.settings.SETTINGS"], + ignore_status=True, + ) time.sleep(1) cmd = ( ["nohup"] + ["sh", "-c", "'" + " ".join(cmd) + "'"] + [">", "/dev/null", "2>&1"] ) - platform_args["non_blocking"] = True - del platform_args["power"] enable_profiling = platform_args.get("profiling_args", {}).get( "enabled", False ) @@ -354,7 +354,7 @@ def killProgram(self, program): # if the program doesn't exist, the grep may fail # do not update status code success = getRunStatus() - res = self.util.shell(["ps", "|", "grep", basename]) + res = self.util.shell(["ps", "|", "grep", basename], ignore_status=True) setRunStatus(success, overwrite=True) if len(res) == 0: return diff --git a/benchmarking/utils/monsoon_power.py b/benchmarking/utils/monsoon_power.py index 3e198fc8..c396f1d0 100644 --- a/benchmarking/utils/monsoon_power.py +++ b/benchmarking/utils/monsoon_power.py @@ -11,19 +11,29 @@ from __future__ import absolute_import, division, print_function, unicode_literals import json -import re +import os import tempfile from time import sleep -# pyre-fixme[21]: Could not find module `Monsoon.HVPM`. import Monsoon.HVPM as HVPM +import Monsoon.Operations as op -# pyre-fixme[21]: Could not find module `Monsoon.sampleEngine`. import Monsoon.sampleEngine as sampleEngine +from bridge.file_storage.upload_files.file_uploader import FileUploader from utils.custom_logger import getLogger -def collectPowerData(hash, sample_time, voltage, num_iters, monsoon_map=None): +def collectPowerData( + hash, + sample_time, + voltage, + num_iters, + method="monsoon", + monsoon_map=None, + threshold=300, + window_size=1000, +): + has_usb = method == "monsoon_with_usb" serialno = _getSerialno(hash, monsoon_map) if serialno is not None: getLogger().info( @@ -34,183 +44,334 @@ def collectPowerData(hash, sample_time, voltage, num_iters, monsoon_map=None): Mon = HVPM.Monsoon() Mon.setup_usb(serialno) # Need to sleep to be functional correctly - sleep(0.2) - getLogger().info("Setup Vout") + # there may have some race condition, so need to sleep sufficiently long. + sleep(0.5) + getLogger().info(f"Setup Vout: {voltage}") Mon.setVout(voltage) - getLogger().info("Setup setPowerupTime") + getLogger().info("Setup setPowerupTime: 60") Mon.setPowerupTime(60) - getLogger().info("Setup setPowerUpCurrentLimit") + getLogger().info("Setup setPowerUpCurrentLimit: 14") Mon.setPowerUpCurrentLimit(14) - getLogger().info("Setup setRunTimeCurrentLimit") + getLogger().info("Setup setRunTimeCurrentLimit: 14") Mon.setRunTimeCurrentLimit(14) + Mon.fillStatusPacket() # main channel - getLogger().info("Setup setVoltageChannel") + getLogger().info("Setup setVoltageChannel: 0") Mon.setVoltageChannel(0) engine = sampleEngine.SampleEngine(Mon) - getLogger().info("Setup enableCSVOutput") - # we may leak the file content - f = tempfile.NamedTemporaryFile(delete=False) - f.close() - filename = f.name - engine.enableCSVOutput(filename) - getLogger().info("Setup ConsoleOutput") + engine.disableCSVOutput() + getLogger().info("Disable ConsoleOutput") engine.ConsoleOutput(False) + getLogger().info("Enable main current") + engine.enableChannel(sampleEngine.channels.MainCurrent) + getLogger().info("Enable main voltage") + engine.enableChannel(sampleEngine.channels.MainVoltage) + if has_usb: + getLogger().info("Enable usb current") + engine.enableChannel(sampleEngine.channels.USBCurrent) + getLogger().info("Enable usb voltage") + engine.enableChannel(sampleEngine.channels.USBVoltage) + Mon.setUSBPassthroughMode(op.USB_Passthrough.On) + else: + getLogger().info("Disable usb current") + engine.disableChannel(sampleEngine.channels.USBCurrent) + getLogger().info("Disable usb voltage") + engine.disableChannel(sampleEngine.channels.USBVoltage) + Mon.setUSBPassthroughMode(op.USB_Passthrough.Auto) sleep(1) # 200 us per sample num_samples = sample_time / 0.0002 - getLogger().info("startSampling on {}".format(filename)) + getLogger().info("startSampling") engine.startSampling(num_samples) - engine.disableCSVOutput() - getLogger().info("Written power data to file: {}".format(filename)) + samples = engine.getSamples() + + getLogger().info("Closing device") + Mon.closeDevice() + + power_data, url = _extract_samples(samples, has_usb) - # retrieve statistics from the power data - getLogger().info("Reading data from CSV file") - power_data = _getPowerData(filename) getLogger().info( "Calculating the benchmark data range from " "{} data points".format(len(power_data)) ) - start_idx, end_idx = _calculatePowerDataRange(power_data) - getLogger().info("Collecting data from " "{} to {}".format(start_idx, end_idx)) + max_range, max_low_range = _calculatePowerDataRange( + power_data, threshold, window_size + ) + if max_range is None or max_low_range is None: + getLogger().error("Metric collection failed") + return {} + + getLogger().info( + "Collecting baseline data from " + "{} to {}".format(max_low_range["start"], max_low_range["end"]) + ) + getLogger().info( + "Collecting data from " "{} to {}".format(max_range["start"], max_range["end"]) + ) getLogger().info( "Benchmark time: " - "{} - {} s".format(power_data[start_idx]["time"], power_data[end_idx]["time"]) + "{} - {} s".format( + power_data[max_range["start"]]["time"], power_data[max_range["end"]]["time"] + ) ) - data = _retrievePowerData(power_data, start_idx, end_idx, num_iters) - data["power_data"] = filename + data = _retrievePowerData(power_data, max_range, max_low_range, num_iters) + data["power_trace"] = url return data -def _getPowerData(filename): - lines = [] - with open(filename, "r") as f: - # skip the first line since it is the title - line = f.readline() - while line != "": - line = f.readline() - # only the main output channel is enabled - pattern = re.compile(r"^([\d|\.]+),([\d|\.]+),([\d|\.]+),") - match = pattern.match(line) - if match: - new_line = { - "time": float(match.group(1)), - "current": float(match.group(2)), - "voltage": float(match.group(3)), - } - lines.append(new_line) - return lines +def _extract_samples(samples, has_usb): + power_data = [] + + prev_time_stamp = -1 + for i in range(len(samples[sampleEngine.channels.timeStamp])): + time_stamp = samples[sampleEngine.channels.timeStamp][i] + current = samples[sampleEngine.channels.MainCurrent][i] + voltage = samples[sampleEngine.channels.MainVoltage][i] + if has_usb: + usb_current = samples[sampleEngine.channels.USBCurrent][i] + usb_voltage = samples[sampleEngine.channels.USBVoltage][i] + # there is a bug that two consecutive time stamps may be identical + # patch it by evenly divide the timestamps + if i >= 2 and prev_time_stamp == time_stamp: + power_data[-1]["time"] = (power_data[-2]["time"] + time_stamp) / 2 + prev_time_stamp = time_stamp + data = { + "time": time_stamp, + "current": current, + "voltage": voltage, + "usb_current": 0, + "usb_voltage": 0, + } + if has_usb: + data["usb_current"] = usb_current + data["usb_voltage"] = usb_voltage + + power_data.append(data) + + with tempfile.NamedTemporaryFile( + mode="w", delete=False, prefix="power_data_", suffix=".csv" + ) as f: + filename = f.name + getLogger().info("Writing power data to file: {}".format(f.name)) + f.write("time, current, voltage, usb_current, usb_voltage\n") + for i in range(len(power_data)): + entry = power_data[i] + f.write( + f"{entry['time']}, {entry['current']}, {entry['voltage']}, {entry['usb_current']}, {entry['usb_voltage']}\n" + ) + + getLogger().info(f"Uploading power file {filename}") + output_file_uploader = FileUploader("output_files").get_uploader() + url = output_file_uploader.upload_file(filename) + getLogger().info(f"Uploaded power url {url}") + os.unlink(filename) + return power_data, url + + +def _get_sum_current(power_data, start, end, window_size): + # Get the total current in the window + sum = 0 + for i in range(start, min(start + window_size, end)): + sum += power_data[i]["current"] + sum += power_data[i]["usb_current"] + return i, sum + + +def _find_first_window_below_threshold( + power_data, initial_sum, start, end, window_size, threshold +): + return _find_first_window( + power_data, initial_sum, start, end, window_size, threshold, False + ) + + +def _find_first_window_above_threshold( + power_data, initial_sum, start, end, window_size, threshold +): + return _find_first_window( + power_data, initial_sum, start, end, window_size, threshold, True + ) + + +def _find_first_window( + power_data, initial_sum, start, end, window_size, threshold, above=True +): + assert start >= window_size - 1 + i = start + sum = initial_sum + while i < end - 1 and ( + (sum / window_size) < threshold if above else (sum / window_size) > threshold + ): + # moving average advance one step + i = i + 1 + sum = ( + sum + - power_data[i - window_size]["current"] + - power_data[i - window_size]["usb_current"] + + power_data[i]["current"] + + power_data[i]["usb_current"] + ) + return i, sum + + +def _calculateOnePowerDataRange( + power_data, num, i, sum, threshold=300, window_size=1000 +): + # first find the average current is less than the threshold + i, sum = _find_first_window_below_threshold( + power_data, sum, i, num, window_size, threshold + ) + + # find the first window whose average current is above the threshold + i, sum = _find_first_window_above_threshold( + power_data, sum, i, num, window_size, threshold + ) + + window_i = i + + # find the last entry below threshold + while ( + i > 0 and (power_data[i]["current"] + power_data[i]["usb_current"]) > threshold + ): + i = i - 1 + # find the min of the constant decreasing current + while i > 0 and ( + power_data[i - 1]["current"] + power_data[i - 1]["usb_current"] + ) < (power_data[i]["current"] + power_data[i]["usb_current"]): + i = i - 1 + + # have found a possible start of the benchmark + start = i + + # find the first window whose current is below threshold again + i, sum = _find_first_window_below_threshold( + power_data, sum, window_i, num, window_size, threshold + ) + ii = max(0, i - window_size) + + # get the first entry below threshold + while ( + ii < num + and (power_data[ii]["current"] + power_data[ii]["usb_current"]) > threshold + ): + ii = ii + 1 + # get the min of the constant decreasing current + while ii < num - 1 and ( + power_data[ii]["current"] + power_data[ii]["usb_current"] + ) > (power_data[ii + 1]["current"] + power_data[ii + 1]["usb_current"]): + ii = ii + 1 + + # found a possible end of the benchmark + end = ii - 1 + + return start, end, i, sum # This only works in one specific scenario: # In the beginning, the current is low and below threshold # Then there is a sudden jump in current and the current keeps high -# After the test, the current restores back to below threshold for some time +# After the benchmark, the current restores back to below threshold for some time # All other scenarios are not caught -def _calculatePowerDataRange(power_data): +def _calculatePowerDataRange(power_data, threshold=300, window_size=1000): num = len(power_data) - WINDOW_SIZE = 500 - THRESHOLD = 150 - if num <= WINDOW_SIZE: - return -1, -1 + if num <= window_size: + getLogger().error( + f"Collected {num} samples from monsoon, which is less than the window size of {window_size}" + ) + return None, None # first get the sum of the window size values - sum = 0 - for i in range(WINDOW_SIZE): - sum += power_data[i]["current"] + i, sum = _get_sum_current(power_data, 0, num, window_size) ranges = [] - i = WINDOW_SIZE - 1 while i < num - 1: - # first find the average current is less than the threshold - while i < num - 1 and (sum / WINDOW_SIZE) > THRESHOLD: - i = i + 1 - sum = ( - sum - power_data[i - WINDOW_SIZE]["current"] + power_data[i]["current"] - ) - # find the first item with sudden jump in current - while ( - i < num - 1 - and ((sum / WINDOW_SIZE) <= THRESHOLD) - and ( - (power_data[i]["current"] < THRESHOLD) - or (power_data[i]["current"] < 2 * (sum / WINDOW_SIZE)) - ) - ): - i = i + 1 - sum = ( - sum - power_data[i - WINDOW_SIZE]["current"] + power_data[i]["current"] - ) - # find the last entry below threshold - while i > 0 and power_data[i]["current"] > THRESHOLD: - i = i - 1 - start = i - # find the last item whose current is above THRESHOLD but - # all later items are below THRESHOLD - sum = 0 - while i < num - 1 and i < start + WINDOW_SIZE: - i = i + 1 - sum += power_data[i]["current"] - # wait till the average of the current is below threshold - while i < num - 1 and ((sum / WINDOW_SIZE) > THRESHOLD): - i = i + 1 - sum = ( - sum - power_data[i - WINDOW_SIZE]["current"] + power_data[i]["current"] - ) - # get the last entry below threshold - end = i - while end > i - WINDOW_SIZE and (power_data[end - 1]["current"] < THRESHOLD): - end = end - 1 - if start < num and end < num: + start, end, i, sum = _calculateOnePowerDataRange( + power_data, num, i, sum, threshold, window_size + ) + if (start < num) and (end <= num) and (start < end): ranges.append({"start": start, "end": end}) if len(ranges) == 0: - return -1, -1 + getLogger().error( + "Cannot collect any useful metric from the monsoon data. Please examine the benchmark setting." + ) + return None, None + # get the max range of all collected ranges max_range = ranges[0] + r_start = 0 for r in ranges: + assert r["end"] >= r["start"] + assert r["start"] >= r_start + r_start = r["end"] if r["end"] - r["start"] > max_range["end"] - max_range["start"]: max_range = r - return max_range["start"], max_range["end"] + # get the range below the threshold + low_ranges = [{"start": 0, "end": -1}] + for r in ranges: + low_ranges[-1]["end"] = max(r["start"] - 1, low_ranges[-1]["start"]) + low_ranges.append({"start": r["end"] + 1, "end": -1}) + low_ranges[-1]["end"] = num - 1 + + # get the max range that is below the threshold + max_low_range = low_ranges[0] + for r in low_ranges: + if r["end"] - r["start"] > max_low_range["end"] - max_low_range["start"]: + max_low_range = r + getLogger().info(ranges) + getLogger().info(low_ranges) + # the test needs to be designed in a way that more than half of the collected + # data is executing the model. + """ + assert ( + max_range["end"] - max_range["start"] >= num / 2 + ), f"Test needs to be designed that over half of the collected data is model execution. " + """ + return max_range, max_low_range -def _retrievePowerData(power_data, start_idx, end_idx, num_iters): + +def _retrievePowerData(power_data, high_range, low_range, num_iters): data = {} - if start_idx < 0 or end_idx < 0: + if high_range["start"] < 0 or high_range["end"] < 0: return data # get base current. It is just an approximation - THRESHOLD = 150 - num = len(power_data) - i = end_idx - sum = 0 + + total_current = 0 + total_usb_current = 0 count = 0 - for i in range(end_idx, num): - if power_data[i]["current"] < THRESHOLD: - sum += power_data[i]["current"] - count += 1 - base_current = sum / count if count > 0 else 0 + for i in range(low_range["start"], low_range["end"]): + total_current += power_data[i]["current"] + total_usb_current += power_data[i]["usb_current"] + count += 1 + base_current = total_current / count if count > 0 else 0 + base_usb_current = total_usb_current / count if count > 0 else 0 energy = 0 - prev_time = power_data[start_idx - 1]["time"] - for i in range(start_idx, end_idx): + prev_time = power_data[max(0, high_range["start"] - 1)]["time"] + for i in range(high_range["start"], high_range["end"]): entry = power_data[i] curr_time = entry["time"] energy += ( - entry["voltage"] - * (entry["current"] - base_current) - * (curr_time - prev_time) - ) + entry["voltage"] * (entry["current"] - base_current) + + entry["usb_voltage"] * (entry["usb_current"] - base_usb_current) + ) * (curr_time - prev_time) prev_time = curr_time - total_time = power_data[end_idx]["time"] - power_data[start_idx]["time"] + total_time = ( + power_data[high_range["end"]]["time"] - power_data[high_range["start"]]["time"] + ) power = energy / total_time energy_per_inference = energy / num_iters latency = total_time * 1000 * 1000 / num_iters data["energy"] = _composeStructuredData(energy_per_inference, "energy", "mJ") data["power"] = _composeStructuredData(power, "power", "mW") data["latency"] = _composeStructuredData(latency, "latency", "uS") + + getLogger().info(f"Number of iterations: {num_iters}") getLogger().info("Base current: {} mA".format(base_current)) getLogger().info("Energy per inference: {} mJ".format(energy_per_inference)) getLogger().info("Power: {} mW".format(power)) From 9ad6982e3f5aad6ac1f5691de98a2925a8f0d568 Mon Sep 17 00:00:00 2001 From: Fei Sun Date: Mon, 1 May 2023 14:07:42 -0700 Subject: [PATCH 2/2] Add the script to enable Monsoon power monitor Summary: Execute this python script after resetting the Monsoon power monitor. It enables the monitor to send power to the connected device. python3 monsoon_startup.py Reviewed By: vmpuri Differential Revision: D45403286 fbshipit-source-id: 72f6ec21e307082b0f206987fdabfa37c621533c --- scripts/monsoon_startup.py | 48 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 scripts/monsoon_startup.py diff --git a/scripts/monsoon_startup.py b/scripts/monsoon_startup.py new file mode 100644 index 00000000..134e21fb --- /dev/null +++ b/scripts/monsoon_startup.py @@ -0,0 +1,48 @@ +import Monsoon.HVPM as HVPM +import Monsoon.pmapi as pmapi +import Monsoon.sampleEngine as sampleEngine + + +def testHVPM(serialno, Protocol): + HVMON = HVPM.Monsoon() + HVMON.setup_usb(serialno, Protocol) + print("HVPM Serial Number: " + repr(HVMON.getSerialNumber())) + HVMON.setPowerUpCurrentLimit(13) + HVMON.setRunTimeCurrentLimit(13) + HVMON.fillStatusPacket() + HVMON.setVout(4) + HVengine = sampleEngine.SampleEngine(HVMON) + # Output to CSV + # Turning off periodic console outputs. + HVengine.ConsoleOutput(True) + + # Setting all channels enabled + HVengine.enableChannel(sampleEngine.channels.MainCurrent) + HVengine.enableChannel(sampleEngine.channels.MainVoltage) + + # Setting trigger conditions + # numSamples=sampleEngine.triggers.SAMPLECOUNT_INFINITE + numSamples = 100 + + HVengine.setStartTrigger(sampleEngine.triggers.GREATER_THAN, 0) + HVengine.setStopTrigger(sampleEngine.triggers.GREATER_THAN, 60) + HVengine.setTriggerChannel(sampleEngine.channels.timeStamp) + + # Actually start collecting samples + HVengine.startSampling(numSamples, 1) + + print("dacCalHigh: ", HVMON.statusPacket.dacCalHigh) + print("dacCalLow: ", HVMON.statusPacket.dacCalLow) + print("powerupCurrentLimit: ", HVMON.statusPacket.powerupCurrentLimit) + print("runtimeCurrentLimit: ", HVMON.statusPacket.runtimeCurrentLimit) + print("serialNumber: ", HVMON.statusPacket.serialNumber) + HVMON.closeDevice() + + +def main(): + HVPMSerialNo = None + testHVPM(HVPMSerialNo, pmapi.USB_protocol()) + + +if __name__ == "__main__": + main()