Skip to content

Commit

Permalink
sync processes befor each measure to avoid hanging
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyasMoutawwakil committed Feb 19, 2024
1 parent 37a0b32 commit 8e0ad46
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 9 deletions.
10 changes: 7 additions & 3 deletions optimum_benchmark/trackers/energy.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ class EnergyTracker:
def __init__(self, device: str, device_ids: Optional[str] = None):
self.device = device
self.device_ids = device_ids
self.distributed = is_torch_distributed_available() and torch.distributed.is_initialized()

if self.device == "cuda":
if self.device_ids is None:
Expand All @@ -89,9 +90,6 @@ def __init__(self, device: str, device_ids: Optional[str] = None):
self.device_ids = list(map(int, self.device_ids.split(",")))
LOGGER.info(f"\t+ Tracking GPU energy on devices {self.device_ids}")

if is_torch_distributed_available() and torch.distributed.is_initialized():
torch.distributed.barrier()

self.reset()

def reset(self):
Expand Down Expand Up @@ -135,10 +133,16 @@ def track(self, interval=1, file_prefix="method"):
country_iso_code=os.environ.get("COUNTRY_ISO_CODE", "FRA"),
)

if self.distributed:
torch.distributed.barrier()

self.emission_tracker.start()
yield
self.emission_tracker.stop()

if self.distributed:
torch.distributed.barrier()

self.cpu_energy = self.emission_tracker._total_cpu_energy.kWh
self.gpu_energy = self.emission_tracker._total_gpu_energy.kWh
self.ram_energy = self.emission_tracker._total_ram_energy.kWh
Expand Down
10 changes: 7 additions & 3 deletions optimum_benchmark/trackers/latency.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,15 +92,13 @@ class LatencyTracker:
def __init__(self, device: str, backend: str):
self.device = device
self.backend = backend
self.distributed = is_torch_distributed_available() and torch.distributed.is_initialized()

if self.backend == "pytorch" and self.device == "cuda":
LOGGER.info("\t+ Tracking Pytorch CUDA latency")
else:
LOGGER.info("\t+ Tracking CPU latency")

if is_torch_distributed_available() and torch.distributed.is_initialized():
torch.distributed.barrier()

self.reset()

def reset(self):
Expand All @@ -110,11 +108,17 @@ def reset(self):

@contextmanager
def track(self):
if self.distributed:
torch.distributed.barrier()

if self.backend == "pytorch" and self.device == "cuda":
yield from self._pytorch_cuda_latency()
else:
yield from self._cpu_latency()

if self.distributed:
torch.distributed.barrier()

def _pytorch_cuda_latency(self):
start = torch.cuda.Event(enable_timing=True)
start.record()
Expand Down
12 changes: 9 additions & 3 deletions optimum_benchmark/trackers/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def __init__(self, device: str, backend: str, device_ids: Optional[str] = None):
self.device = device
self.backend = backend
self.device_ids = device_ids
self.distributed = is_torch_distributed_available() and torch.distributed.is_initialized()

LOGGER.info("\t+ Tracking RAM memory")

Expand All @@ -91,9 +92,6 @@ def __init__(self, device: str, backend: str, device_ids: Optional[str] = None):
)
LOGGER.info(f"\t+ Tracking Allocated/Reserved memory of {num_pytorch_devices} Pytorch CUDA devices")

if is_torch_distributed_available() and torch.distributed.is_initialized():
torch.distributed.barrier()

self.reset()

def reset(self):
Expand All @@ -104,13 +102,19 @@ def reset(self):

@contextmanager
def track(self):
if self.distributed:
torch.distributed.barrier()

if self.device == "cuda" and self.backend == "pytorch":
yield from self._cuda_pytorch_memory()
elif self.device == "cuda":
yield from self._cuda_memory()
else:
yield from self._cpu_memory()

if self.distributed:
torch.distributed.barrier()

def _cuda_pytorch_memory(self):
torch.cuda.empty_cache()

Expand All @@ -129,6 +133,8 @@ def _cuda_pytorch_memory(self):
torch.cuda.max_memory_reserved(device=device) / 1e6 for device in range(torch.cuda.device_count())
)

torch.cuda.empty_cache()

def _cuda_memory(self):
child_connection, parent_connection = Pipe()
memory_process = Process(
Expand Down

0 comments on commit 8e0ad46

Please sign in to comment.