diff --git a/torchao/utils.py b/torchao/utils.py new file mode 100644 index 0000000000..c414843da1 --- /dev/null +++ b/torchao/utils.py @@ -0,0 +1,26 @@ +import torch + + +def benchmark_model(model, num_runs, input_tensor): + torch.cuda.synchronize() + start_event = torch.cuda.Event(enable_timing=True) + end_event = torch.cuda.Event(enable_timing=True) + start_event.record() + + # benchmark + for _ in range(num_runs): + with torch.autograd.profiler.record_function("timed region"): + model(input_tensor) + + end_event.record() + torch.cuda.synchronize() + return start_event.elapsed_time(end_event) / num_runs + +def profiler_runner(path, fn, *args, **kwargs): + with torch.profiler.profile( + activities=[torch.profiler.ProfilerActivity.CPU, + torch.profiler.ProfilerActivity.CUDA], + record_shapes=True) as prof: + result = fn(*args, **kwargs) + prof.export_chrome_trace(path) + return result diff --git a/tutorials/quantize_vit/run_vit_b.py b/tutorials/quantize_vit/run_vit_b.py index ab19f7ba28..a7fd78f9b2 100644 --- a/tutorials/quantize_vit/run_vit_b.py +++ b/tutorials/quantize_vit/run_vit_b.py @@ -1,6 +1,8 @@ import torch import torchvision.models.vision_transformer as models +from torchao.utils import benchmark_model, profiler_runner +torch.set_float32_matmul_precision("high") # Load Vision Transformer model model = models.vit_b_16(pretrained=True) @@ -12,30 +14,6 @@ model = torch.compile(model, mode='max-autotune') -def benchmark_model(model, num_runs, input_tensor): - torch.cuda.synchronize() - start_event = torch.cuda.Event(enable_timing=True) - end_event = torch.cuda.Event(enable_timing=True) - start_event.record() - - # benchmark - for _ in range(num_runs): - with torch.autograd.profiler.record_function("timed region"): - model(input_tensor) - - end_event.record() - torch.cuda.synchronize() - return start_event.elapsed_time(end_event) / num_runs - -def profiler_runner(path, fn, *args, **kwargs): - with torch.profiler.profile( - activities=[torch.profiler.ProfilerActivity.CPU, - torch.profiler.ProfilerActivity.CUDA], - record_shapes=True) as prof: - result = fn(*args, **kwargs) - prof.export_chrome_trace(path) - return result - # Must run with no_grad when optimizing for inference with torch.no_grad(): # warmup diff --git a/tutorials/quantize_vit/run_vit_b_quant.py b/tutorials/quantize_vit/run_vit_b_quant.py index c329c28d0c..0396a9dffd 100644 --- a/tutorials/quantize_vit/run_vit_b_quant.py +++ b/tutorials/quantize_vit/run_vit_b_quant.py @@ -2,6 +2,8 @@ import torchao import torchvision.models.vision_transformer as models +from torchao.utils import benchmark_model, profiler_runner +torch.set_float32_matmul_precision("high") # Load Vision Transformer model model = models.vit_b_16(pretrained=True) @@ -19,30 +21,6 @@ model = torch.compile(model, mode='max-autotune') -def benchmark_model(model, num_runs, input_tensor): - torch.cuda.synchronize() - start_event = torch.cuda.Event(enable_timing=True) - end_event = torch.cuda.Event(enable_timing=True) - start_event.record() - - # benchmark - for _ in range(num_runs): - with torch.autograd.profiler.record_function("timed region"): - model(input_tensor) - - end_event.record() - torch.cuda.synchronize() - return start_event.elapsed_time(end_event) / num_runs - -def profiler_runner(path, fn, *args, **kwargs): - with torch.profiler.profile( - activities=[torch.profiler.ProfilerActivity.CPU, - torch.profiler.ProfilerActivity.CUDA], - record_shapes=True) as prof: - result = fn(*args, **kwargs) - prof.export_chrome_trace(path) - return result - # Must run with no_grad when optimizing for inference with torch.no_grad(): # warmup