diff --git a/fltk/__init__.py b/fltk/__init__.py index aa6e546b..d1eb1a0f 100644 --- a/fltk/__init__.py +++ b/fltk/__init__.py @@ -1,2 +1,2 @@ -__version__ = '0.3.1' \ No newline at end of file +__version__ = '0.3.2' \ No newline at end of file diff --git a/fltk/util/base_config.py b/fltk/util/base_config.py index f15cdedf..a5a3b74b 100644 --- a/fltk/util/base_config.py +++ b/fltk/util/base_config.py @@ -1,3 +1,5 @@ +from datetime import datetime + import torch import json @@ -109,6 +111,8 @@ def merge_yaml(self, cfg = {}): self.dataset_name = cfg['dataset'] if 'experiment_prefix' in cfg: self.experiment_prefix = cfg['experiment_prefix'] + else: + self.experiment_prefix = f'{datetime.now()}' if 'output_location' in cfg: self.output_location = cfg['output_location'] if 'tensor_board_active' in cfg: diff --git a/fltk/util/remote.py b/fltk/util/remote.py new file mode 100644 index 00000000..0202f92f --- /dev/null +++ b/fltk/util/remote.py @@ -0,0 +1,66 @@ +import time +from typing import Any, List + +from torch.distributed import rpc +from dataclasses import dataclass, field +from torch.futures import Future + +def _call_method(method, rref, *args, **kwargs): + return method(rref.local_value(), *args, **kwargs) + +def _remote_method(method, rref, *args, **kwargs): + args = [method, rref] + list(args) + return rpc.rpc_sync(rref.owner(), _call_method, args=args, kwargs=kwargs) + +def _remote_method_async(method, rref, *args, **kwargs): + args = [method, rref] + list(args) + return rpc.rpc_async(rref.owner(), _call_method, args=args, kwargs=kwargs) + +@dataclass +class TimingRecord: + client_id: str + metric: str + value: Any + epoch: int = None + timestamp: float = field(default_factory=time.time) + + +class ClientRef: + ref = None + name = "" + data_size = 0 + tb_writer = None + timing_data: List[TimingRecord] = [] + + def __init__(self, name, ref, tensorboard_writer): + self.name = name + self.ref = ref + self.tb_writer = tensorboard_writer + self.timing_data = [] + + def __repr__(self): + return self.name + +@dataclass +class AsyncCall: + future: Future + client: ClientRef + start_time: float = 0 + end_time: float = 0 + + def duration(self): + return self.end_time - self.start_time + + +def bind_timing_cb(response_obj: AsyncCall): + def callback(fut): + stop_time = time.time() + response_obj.end_time = stop_time + response_obj.future.then(callback) + +def timed_remote_async_call(client, method, rref, *args, **kwargs): + start_time = time.time() + fut = _remote_method_async(method, rref, *args, **kwargs) + response = AsyncCall(fut, client, start_time=start_time) + bind_timing_cb(response) + return response \ No newline at end of file diff --git a/fltk/util/results.py b/fltk/util/results.py index af560479..cf762b8a 100644 --- a/fltk/util/results.py +++ b/fltk/util/results.py @@ -4,8 +4,9 @@ @dataclass class EpochData: epoch_id: int - duration_train: int - duration_test: int + num_epochs: int + duration_train: float + duration_test: float loss_train: float accuracy: float loss: float