forked from microsoft/qlib
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfuture_calendar_collector.py
121 lines (98 loc) · 4.07 KB
/
future_calendar_collector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import abc
import importlib
from pathlib import Path
from typing import Union, Iterable, List
import fire
import numpy as np
import pandas as pd
# pip install baostock
import baostock as bs
from loguru import logger
class CollectorFutureCalendar:
calendar_format = "%Y-%m-%d"
def __init__(self, qlib_dir: Union[str, Path], start_date: str = None, end_date: str = None):
"""
Parameters
----------
qlib_dir:
qlib data directory
start_date
start date
end_date
end date
"""
self.qlib_dir = Path(qlib_dir).expanduser().absolute()
self.calendar_path = self.qlib_dir.joinpath("calendars/day.txt")
self.future_path = self.qlib_dir.joinpath("calendars/day_future.txt")
self._calendar_list = self.calendar_list
_latest_date = self._calendar_list[-1]
self.start_date = _latest_date if start_date is None else pd.Timestamp(start_date)
self.end_date = _latest_date + pd.Timedelta(days=365 * 2) if end_date is None else pd.Timestamp(end_date)
@property
def calendar_list(self) -> List[pd.Timestamp]:
# load old calendar
if not self.calendar_path.exists():
raise ValueError(f"calendar does not exist: {self.calendar_path}")
calendar_df = pd.read_csv(self.calendar_path, header=None)
calendar_df.columns = ["date"]
calendar_df["date"] = pd.to_datetime(calendar_df["date"])
return calendar_df["date"].to_list()
def _format_datetime(self, datetime_d: [str, pd.Timestamp]):
datetime_d = pd.Timestamp(datetime_d)
return datetime_d.strftime(self.calendar_format)
def write_calendar(self, calendar: Iterable):
calendars_list = list(map(lambda x: self._format_datetime(x), sorted(set(self.calendar_list + calendar))))
np.savetxt(self.future_path, calendars_list, fmt="%s", encoding="utf-8")
@abc.abstractmethod
def collector(self) -> Iterable[pd.Timestamp]:
"""
Returns
-------
"""
raise NotImplementedError(f"Please implement the `collector` method")
class CollectorFutureCalendarCN(CollectorFutureCalendar):
def collector(self) -> Iterable[pd.Timestamp]:
lg = bs.login()
if lg.error_code != "0":
raise ValueError(f"login respond error_msg: {lg.error_msg}")
rs = bs.query_trade_dates(
start_date=self._format_datetime(self.start_date), end_date=self._format_datetime(self.end_date)
)
if rs.error_code != "0":
raise ValueError(f"query_trade_dates respond error_msg: {rs.error_msg}")
data_list = []
while (rs.error_code == "0") & rs.next():
data_list.append(rs.get_row_data())
calendar = pd.DataFrame(data_list, columns=rs.fields)
calendar["is_trading_day"] = calendar["is_trading_day"].astype(int)
return pd.to_datetime(calendar[calendar["is_trading_day"] == 1]["calendar_date"]).to_list()
class CollectorFutureCalendarUS(CollectorFutureCalendar):
def collector(self) -> Iterable[pd.Timestamp]:
# TODO: US future calendar
raise ValueError("Us calendar is not supported")
def run(qlib_dir: Union[str, Path], region: str = "cn", start_date: str = None, end_date: str = None):
"""Collect future calendar(day)
Parameters
----------
qlib_dir:
qlib data directory
region:
cn/CN or us/US
start_date
start date
end_date
end date
Examples
-------
# get cn future calendar
$ python future_calendar_collector.py --qlib_data_1d_dir <user data dir> --region cn
"""
logger.info(f"collector future calendar: region={region}")
_cur_module = importlib.import_module("future_calendar_collector")
_class = getattr(_cur_module, f"CollectorFutureCalendar{region.upper()}")
collector = _class(qlib_dir=qlib_dir, start_date=start_date, end_date=end_date)
collector.write_calendar(collector.collector())
if __name__ == "__main__":
fire.Fire(run)