4
4
import logging
5
5
import warnings
6
6
import pandas as pd
7
+ import numpy as np
8
+ from tqdm import trange
7
9
from pprint import pprint
8
- from typing import Union , List , Optional
10
+ from typing import Union , List , Optional , Dict
9
11
10
12
from qlib .utils .exceptions import LoadObjectError
11
13
from ..contrib .evaluate import risk_analysis , indicator_analysis
17
19
from ..utils import fill_placeholder , flatten_dict , class_casting , get_date_by_shift
18
20
from ..utils .time import Freq
19
21
from ..utils .data import deepcopy_basic_type
22
+ from ..utils .exceptions import QlibException
20
23
from ..contrib .eva .alpha import calc_ic , calc_long_short_return , calc_long_short_prec
21
24
22
25
@@ -230,9 +233,16 @@ def generate(self, *args, **kwargs):
230
233
except FileNotFoundError :
231
234
logger .warning ("The dependent data does not exists. Generation skipped." )
232
235
return
233
- return self ._generate (* args , ** kwargs )
236
+ artifact_dict = self ._generate (* args , ** kwargs )
237
+ if isinstance (artifact_dict , dict ):
238
+ self .save (** artifact_dict )
239
+ return artifact_dict
234
240
235
- def _generate (self , * args , ** kwargs ):
241
+ def _generate (self , * args , ** kwargs ) -> Dict [str , object ]:
242
+ """
243
+ Run the concrete generating task, return the dictionary of the generated results.
244
+ The caller method will save the results to the recorder.
245
+ """
236
246
raise NotImplementedError (f"Please implement the `_generate` method" )
237
247
238
248
@@ -336,8 +346,8 @@ def _generate(self, label: Optional[pd.DataFrame] = None, **kwargs):
336
346
}
337
347
)
338
348
self .recorder .log_metrics (** metrics )
339
- self .save (** objects )
340
349
pprint (metrics )
350
+ return objects
341
351
342
352
def list (self ):
343
353
paths = ["ic.pkl" , "ric.pkl" ]
@@ -468,17 +478,18 @@ def _generate(self, **kwargs):
468
478
if self .backtest_config ["end_time" ] is None :
469
479
self .backtest_config ["end_time" ] = get_date_by_shift (dt_values .max (), 1 )
470
480
481
+ artifact_objects = {}
471
482
# custom strategy and get backtest
472
483
portfolio_metric_dict , indicator_dict = normal_backtest (
473
484
executor = self .executor_config , strategy = self .strategy_config , ** self .backtest_config
474
485
)
475
486
for _freq , (report_normal , positions_normal ) in portfolio_metric_dict .items ():
476
- self . save ( ** {f"report_normal_{ _freq } .pkl" : report_normal })
477
- self . save ( ** {f"positions_normal_{ _freq } .pkl" : positions_normal })
487
+ artifact_objects . update ( {f"report_normal_{ _freq } .pkl" : report_normal })
488
+ artifact_objects . update ( {f"positions_normal_{ _freq } .pkl" : positions_normal })
478
489
479
490
for _freq , indicators_normal in indicator_dict .items ():
480
- self . save ( ** {f"indicators_normal_{ _freq } .pkl" : indicators_normal [0 ]})
481
- self . save ( ** {f"indicators_normal_{ _freq } _obj.pkl" : indicators_normal [1 ]})
491
+ artifact_objects . update ( {f"indicators_normal_{ _freq } .pkl" : indicators_normal [0 ]})
492
+ artifact_objects . update ( {f"indicators_normal_{ _freq } _obj.pkl" : indicators_normal [1 ]})
482
493
483
494
for _analysis_freq in self .risk_analysis_freq :
484
495
if _analysis_freq not in portfolio_metric_dict :
@@ -500,7 +511,7 @@ def _generate(self, **kwargs):
500
511
analysis_dict = flatten_dict (analysis_df ["risk" ].unstack ().T .to_dict ())
501
512
self .recorder .log_metrics (** {f"{ _analysis_freq } .{ k } " : v for k , v in analysis_dict .items ()})
502
513
# save results
503
- self . save ( ** {f"port_analysis_{ _analysis_freq } .pkl" : analysis_df })
514
+ artifact_objects . update ( {f"port_analysis_{ _analysis_freq } .pkl" : analysis_df })
504
515
logger .info (
505
516
f"Portfolio analysis record 'port_analysis_{ _analysis_freq } .pkl' has been saved as the artifact of the Experiment { self .recorder .experiment_id } "
506
517
)
@@ -525,12 +536,13 @@ def _generate(self, **kwargs):
525
536
analysis_dict = analysis_df ["value" ].to_dict ()
526
537
self .recorder .log_metrics (** {f"{ _analysis_freq } .{ k } " : v for k , v in analysis_dict .items ()})
527
538
# save results
528
- self . save ( ** {f"indicator_analysis_{ _analysis_freq } .pkl" : analysis_df })
539
+ artifact_objects . update ( {f"indicator_analysis_{ _analysis_freq } .pkl" : analysis_df })
529
540
logger .info (
530
541
f"Indicator analysis record 'indicator_analysis_{ _analysis_freq } .pkl' has been saved as the artifact of the Experiment { self .recorder .experiment_id } "
531
542
)
532
543
pprint (f"The following are analysis results of indicators({ _analysis_freq } )." )
533
544
pprint (analysis_df )
545
+ return artifact_objects
534
546
535
547
def list (self ):
536
548
list_path = []
@@ -553,3 +565,124 @@ def list(self):
553
565
else :
554
566
warnings .warn (f"indicator_analysis freq { _analysis_freq } is not found" )
555
567
return list_path
568
+
569
+
570
+ class MultiPassPortAnaRecord (PortAnaRecord ):
571
+ """
572
+ This is the Multiple Pass Portfolio Analysis Record class that run backtest multiple times and generates the analysis results such as those of backtest. This class inherits the ``PortAnaRecord`` class.
573
+
574
+ If shuffle_init_score enabled, the prediction score of the first backtest date will be shuffled, so that initial position will be random.
575
+ The shuffle_init_score will only works when the signal is used as <PRED> placeholder. The placeholder will be replaced by pred.pkl saved in recorder.
576
+
577
+ Parameters
578
+ ----------
579
+ recorder : Recorder
580
+ The recorder used to save the backtest results.
581
+ pass_num : int
582
+ The number of backtest passes.
583
+ shuffle_init_score : bool
584
+ Whether to shuffle the prediction score of the first backtest date.
585
+ """
586
+
587
+ depend_cls = SignalRecord
588
+
589
+ def __init__ (self , recorder , pass_num = 10 , shuffle_init_score = True , ** kwargs ):
590
+ """
591
+ Parameters
592
+ ----------
593
+ recorder : Recorder
594
+ The recorder used to save the backtest results.
595
+ pass_num : int
596
+ The number of backtest passes.
597
+ shuffle_init_score : bool
598
+ Whether to shuffle the prediction score of the first backtest date.
599
+ """
600
+ self .pass_num = pass_num
601
+ self .shuffle_init_score = shuffle_init_score
602
+
603
+ super ().__init__ (recorder , ** kwargs )
604
+
605
+ # Save original strategy so that pred df can be replaced in next generate
606
+ self .original_strategy = deepcopy_basic_type (self .strategy_config )
607
+ if not isinstance (self .original_strategy , dict ):
608
+ raise QlibException ("MultiPassPortAnaRecord require the passed in strategy to be a dict" )
609
+ if "signal" not in self .original_strategy .get ("kwargs" , {}):
610
+ raise QlibException ("MultiPassPortAnaRecord require the passed in strategy to have signal as a parameter" )
611
+
612
+ def random_init (self ):
613
+ pred_df = self .load ("pred.pkl" )
614
+
615
+ all_pred_dates = pred_df .index .get_level_values ("datetime" )
616
+ bt_start_date = pd .to_datetime (self .backtest_config .get ("start_time" ))
617
+ if bt_start_date is None :
618
+ first_bt_pred_date = all_pred_dates .min ()
619
+ else :
620
+ first_bt_pred_date = all_pred_dates [all_pred_dates >= bt_start_date ].min ()
621
+
622
+ # Shuffle the first backtest date's pred score
623
+ first_date_score = pred_df .loc [first_bt_pred_date ]["score" ]
624
+ np .random .shuffle (first_date_score .values )
625
+
626
+ # Use shuffled signal as the strategy signal
627
+ self .strategy_config = deepcopy_basic_type (self .original_strategy )
628
+ self .strategy_config ["kwargs" ]["signal" ] = pred_df
629
+
630
+ def _generate (self , ** kwargs ):
631
+ risk_analysis_df_map = {}
632
+
633
+ # Collect each frequency's analysis df as df list
634
+ for i in trange (self .pass_num ):
635
+ if self .shuffle_init_score :
636
+ self .random_init ()
637
+
638
+ # Not check for cache file list
639
+ single_run_artifacts = super ()._generate (** kwargs )
640
+
641
+ for _analysis_freq in self .risk_analysis_freq :
642
+ risk_analysis_df_list = risk_analysis_df_map .get (_analysis_freq , [])
643
+ risk_analysis_df_map [_analysis_freq ] = risk_analysis_df_list
644
+
645
+ analysis_df = single_run_artifacts [f"port_analysis_{ _analysis_freq } .pkl" ]
646
+ analysis_df ["run_id" ] = i
647
+ risk_analysis_df_list .append (analysis_df )
648
+
649
+ result_artifacts = {}
650
+ # Concat df list
651
+ for _analysis_freq in self .risk_analysis_freq :
652
+ combined_df = pd .concat (risk_analysis_df_map [_analysis_freq ])
653
+
654
+ # Calculate return and information ratio's mean, std and mean/std
655
+ multi_pass_port_analysis_df = combined_df .groupby (level = [0 , 1 ]).apply (
656
+ lambda x : pd .Series (
657
+ {"mean" : x ["risk" ].mean (), "std" : x ["risk" ].std (), "mean_std" : x ["risk" ].mean () / x ["risk" ].std ()}
658
+ )
659
+ )
660
+
661
+ # Only look at "annualized_return" and "information_ratio"
662
+ multi_pass_port_analysis_df = multi_pass_port_analysis_df .loc [
663
+ (slice (None ), ["annualized_return" , "information_ratio" ]), :
664
+ ]
665
+ pprint (multi_pass_port_analysis_df )
666
+
667
+ # Save new df
668
+ result_artifacts .update ({f"multi_pass_port_analysis_{ _analysis_freq } .pkl" : multi_pass_port_analysis_df })
669
+
670
+ # Log metrics
671
+ metrics = flatten_dict (
672
+ {
673
+ "mean" : multi_pass_port_analysis_df ["mean" ].unstack ().T .to_dict (),
674
+ "std" : multi_pass_port_analysis_df ["std" ].unstack ().T .to_dict (),
675
+ "mean_std" : multi_pass_port_analysis_df ["mean_std" ].unstack ().T .to_dict (),
676
+ }
677
+ )
678
+ self .recorder .log_metrics (** metrics )
679
+ return result_artifacts
680
+
681
+ def list (self ):
682
+ list_path = []
683
+ for _analysis_freq in self .risk_analysis_freq :
684
+ if _analysis_freq in self .all_freq :
685
+ list_path .append (f"multi_pass_port_analysis_{ _analysis_freq } .pkl" )
686
+ else :
687
+ warnings .warn (f"risk_analysis freq { _analysis_freq } is not found" )
688
+ return list_path
0 commit comments