diff --git a/qlib/contrib/meta/data_selection/dataset.py b/qlib/contrib/meta/data_selection/dataset.py index 6f3ed878b6..58e160f110 100644 --- a/qlib/contrib/meta/data_selection/dataset.py +++ b/qlib/contrib/meta/data_selection/dataset.py @@ -388,11 +388,11 @@ def _prepare_seg(self, segment: Text) -> List[MetaTask]: train_task_n = int(len(self.meta_task_l) * self.segments) if segment == "train": train_tasks = self.meta_task_l[:train_task_n] - get_module_logger("MetaDatasetDS").info(f"The first train meta task: {train_tasks[0]}") + get_module_logger("MetaDatasetDS").info(f"The first train meta task: {train_tasks[0]}") return train_tasks elif segment == "test": test_tasks = self.meta_task_l[train_task_n:] - get_module_logger("MetaDatasetDS").info(f"The first test meta task: {test_tasks[0]}") + get_module_logger("MetaDatasetDS").info(f"The first test meta task: {test_tasks[0]}") return test_tasks else: raise NotImplementedError(f"This type of input is not supported") @@ -405,8 +405,8 @@ def _prepare_seg(self, segment: Text) -> List[MetaTask]: train_tasks.append(t) else: test_tasks.append(t) - get_module_logger("MetaDatasetDS").info(f"The first train meta task: {train_tasks[0]}") - get_module_logger("MetaDatasetDS").info(f"The first test meta task: {test_tasks[0]}") + get_module_logger("MetaDatasetDS").info(f"The first train meta task: {train_tasks[0]}") + get_module_logger("MetaDatasetDS").info(f"The first test meta task: {test_tasks[0]}") if segment == "train": return train_tasks elif segment == "test": diff --git a/qlib/contrib/meta/data_selection/utils.py b/qlib/contrib/meta/data_selection/utils.py index bf9158e577..2fddb00963 100644 --- a/qlib/contrib/meta/data_selection/utils.py +++ b/qlib/contrib/meta/data_selection/utils.py @@ -54,7 +54,7 @@ def forward(self, pred, y, idx): ) ic_all += ic_day if len(diff_point) - 1 - skip_n <= 0: - __import__('ipdb').set_trace() + __import__("ipdb").set_trace() raise ValueError("No enough data for calculating IC") if skip_n > 0: get_module_logger("ICLoss").info( diff --git a/qlib/contrib/model/pytorch_gru.py b/qlib/contrib/model/pytorch_gru.py index dd7242d88d..e0f883f094 100755 --- a/qlib/contrib/model/pytorch_gru.py +++ b/qlib/contrib/model/pytorch_gru.py @@ -37,21 +37,23 @@ class GRU(Model): the GPU ID(s) used for training """ - def __init__(self, - d_feat=6, - hidden_size=64, - num_layers=2, - dropout=0.0, - n_epochs=200, - lr=0.001, - metric="", - batch_size=2000, - early_stop=20, - loss="mse", - optimizer="adam", - GPU=0, - seed=None, - **kwargs): + def __init__( + self, + d_feat=6, + hidden_size=64, + num_layers=2, + dropout=0.0, + n_epochs=200, + lr=0.001, + metric="", + batch_size=2000, + early_stop=20, + loss="mse", + optimizer="adam", + GPU=0, + seed=None, + **kwargs + ): # Set logger. self.logger = get_module_logger("GRU") self.logger.info("GRU pytorch version...") @@ -71,36 +73,38 @@ def __init__(self, self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu") self.seed = seed - self.logger.info("GRU parameters setting:" - "\nd_feat : {}" - "\nhidden_size : {}" - "\nnum_layers : {}" - "\ndropout : {}" - "\nn_epochs : {}" - "\nlr : {}" - "\nmetric : {}" - "\nbatch_size : {}" - "\nearly_stop : {}" - "\noptimizer : {}" - "\nloss_type : {}" - "\nvisible_GPU : {}" - "\nuse_GPU : {}" - "\nseed : {}".format( - d_feat, - hidden_size, - num_layers, - dropout, - n_epochs, - lr, - metric, - batch_size, - early_stop, - optimizer.lower(), - loss, - GPU, - self.use_gpu, - seed, - )) + self.logger.info( + "GRU parameters setting:" + "\nd_feat : {}" + "\nhidden_size : {}" + "\nnum_layers : {}" + "\ndropout : {}" + "\nn_epochs : {}" + "\nlr : {}" + "\nmetric : {}" + "\nbatch_size : {}" + "\nearly_stop : {}" + "\noptimizer : {}" + "\nloss_type : {}" + "\nvisible_GPU : {}" + "\nuse_GPU : {}" + "\nseed : {}".format( + d_feat, + hidden_size, + num_layers, + dropout, + n_epochs, + lr, + metric, + batch_size, + early_stop, + optimizer.lower(), + loss, + GPU, + self.use_gpu, + seed, + ) + ) if self.seed is not None: np.random.seed(self.seed) @@ -130,7 +134,7 @@ def use_gpu(self): return self.device != torch.device("cpu") def mse(self, pred, label): - loss = (pred - label)**2 + loss = (pred - label) ** 2 return torch.mean(loss) def loss_fn(self, pred, label): @@ -158,12 +162,12 @@ def train_epoch(self, x_train, y_train): indices = np.arange(len(x_train_values)) np.random.shuffle(indices) - for i in range(len(indices))[::self.batch_size]: + for i in range(len(indices))[:: self.batch_size]: if len(indices) - i < self.batch_size: break - feature = torch.from_numpy(x_train_values[indices[i:i + self.batch_size]]).float().to(self.device) - label = torch.from_numpy(y_train_values[indices[i:i + self.batch_size]]).float().to(self.device) + feature = torch.from_numpy(x_train_values[indices[i : i + self.batch_size]]).float().to(self.device) + label = torch.from_numpy(y_train_values[indices[i : i + self.batch_size]]).float().to(self.device) pred = self.gru_model(feature) loss = self.loss_fn(pred, label) @@ -185,12 +189,12 @@ def test_epoch(self, data_x, data_y): indices = np.arange(len(x_values)) - for i in range(len(indices))[::self.batch_size]: + for i in range(len(indices))[:: self.batch_size]: if len(indices) - i < self.batch_size: break - feature = torch.from_numpy(x_values[indices[i:i + self.batch_size]]).float().to(self.device) - label = torch.from_numpy(y_values[indices[i:i + self.batch_size]]).float().to(self.device) + feature = torch.from_numpy(x_values[indices[i : i + self.batch_size]]).float().to(self.device) + label = torch.from_numpy(y_values[indices[i : i + self.batch_size]]).float().to(self.device) with torch.no_grad(): pred = self.gru_model(feature) @@ -203,10 +207,10 @@ def test_epoch(self, data_x, data_y): return np.mean(losses), np.mean(scores) def fit( - self, - dataset: DatasetH, - evals_result=dict(), - save_path=None, + self, + dataset: DatasetH, + evals_result=dict(), + save_path=None, ): # prepare training and validation data dfs = { @@ -214,7 +218,9 @@ def fit( k, col_set=["feature", "label"], data_key=DataHandlerLP.DK_L, - ) for k in ["train", "valid"] if k in dataset.segments + ) + for k in ["train", "valid"] + if k in dataset.segments } df_train, df_valid = dfs.get("train", pd.DataFrame()), dfs.get("valid", pd.DataFrame()) @@ -294,7 +300,7 @@ def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"): sample_num = x_values.shape[0] preds = [] - for begin in range(sample_num)[::self.batch_size]: + for begin in range(sample_num)[:: self.batch_size]: if sample_num - begin < self.batch_size: end = sample_num else: diff --git a/qlib/contrib/report/data/ana.py b/qlib/contrib/report/data/ana.py index 27df0c6f79..d01e852cee 100644 --- a/qlib/contrib/report/data/ana.py +++ b/qlib/contrib/report/data/ana.py @@ -184,14 +184,14 @@ def plot_single(self, col, ax): ax.set_xlabel("") ax.set_ylabel("mean") ax.legend() - ax.tick_params(axis='x', rotation=90) + ax.tick_params(axis="x", rotation=90) right_ax = ax.twinx() self._std[col].plot(ax=right_ax, label="std", color="green") right_ax.set_xlabel("") right_ax.set_ylabel("std") - right_ax.tick_params(axis='x', rotation=90) + right_ax.tick_params(axis="x", rotation=90) right_ax.grid(None) # set the grid to None to avoid two layer of grid h1, l1 = ax.get_legend_handles_labels() diff --git a/qlib/contrib/report/data/base.py b/qlib/contrib/report/data/base.py index 2935d4aa7f..0861233b6d 100644 --- a/qlib/contrib/report/data/base.py +++ b/qlib/contrib/report/data/base.py @@ -20,15 +20,15 @@ def __init__(self, dataset: pd.DataFrame): ---------- dataset : pd.DataFrame - We often have multiple columns for dataset. Each column corresponds to one sub figure. + We often have multiple columns for dataset. Each column corresponds to one sub figure. There will be a datatime column in the index levels. Aggretation will be used for more summarized metrics overtime. Here is an example of data: - .. code-block:: + .. code-block:: return - datetime instrument + datetime instrument 2007-02-06 equity_tpx 0.010087 equity_spx 0.000786 """ diff --git a/qlib/contrib/rolling/ddgda.py b/qlib/contrib/rolling/ddgda.py index 5b96861e8c..b62820ccea 100644 --- a/qlib/contrib/rolling/ddgda.py +++ b/qlib/contrib/rolling/ddgda.py @@ -83,7 +83,7 @@ def __init__( loss_skip_thresh: int = 50, fea_imp_n: Optional[int] = 30, meta_data_proc: Optional[str] = "V01", - segments: Union[float, str]= 0.62, + segments: Union[float, str] = 0.62, hist_step_n: int = 30, working_dir: Optional[Union[str, Path]] = None, **kwargs, @@ -258,7 +258,7 @@ def _train_meta_model(self, fill_method="max"): # 1) leverage the simplified proxy forecasting model to train meta model. # - Only the dataset part is important, in current version of meta model will integrate the - # NOTE: + # NOTE: # - The train_start for training meta model does not necessarily align with final rolling # But please select a right time to make sure the finnal rolling tasks are not leaked in the training data. # - The test_start is automatically aligned to the next day of test_end. Validation is ignored. @@ -306,7 +306,13 @@ def _train_meta_model(self, fill_method="max"): with R.start(experiment_name=self.meta_exp_name): R.log_params(**kwargs) mm = MetaModelDS( - step=self.step, hist_step_n=kwargs["hist_step_n"], lr=0.001, max_epoch=30, seed=43, alpha=self.alpha, loss_skip_thresh=self.loss_skip_thresh, + step=self.step, + hist_step_n=kwargs["hist_step_n"], + lr=0.001, + max_epoch=30, + seed=43, + alpha=self.alpha, + loss_skip_thresh=self.loss_skip_thresh, ) mm.fit(md) R.save_objects(model=mm) diff --git a/qlib/workflow/cli.py b/qlib/workflow/cli.py index d376daef58..70c206b598 100644 --- a/qlib/workflow/cli.py +++ b/qlib/workflow/cli.py @@ -117,8 +117,10 @@ def workflow(config_path, experiment_name="workflow", uri_folder="mlruns"): if base_config_path.exists(): path = base_config_path else: - logger.info(f"Can't find BASE_CONFIG_PATH base on: {Path.cwd()}, " - f"try using relative path to config path: {Path(config_path).absolute()}") + logger.info( + f"Can't find BASE_CONFIG_PATH base on: {Path.cwd()}, " + f"try using relative path to config path: {Path(config_path).absolute()}" + ) relative_path = Path(config_path).absolute().parent.joinpath(base_config_path) if relative_path.exists(): path = relative_path