Skip to content

Commit

Permalink
improve docstring
Browse files Browse the repository at this point in the history
  • Loading branch information
John Lyu committed Oct 26, 2023
1 parent 3ed3f17 commit 61c31ca
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 65 deletions.
12 changes: 11 additions & 1 deletion qlib/data/storage/file_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,7 @@ def write(self, data_array: np.ndarray, index: int = None) -> None:
Args:
data_array: Structured arrays contains date, period, value and next
index: _description_. Defaults to None.
index: target index to start writing. Defaults to None.
"""

if len(data_array) == 0:
Expand All @@ -509,6 +509,8 @@ def write(self, data_array: np.ndarray, index: int = None) -> None:
with self.uri.open("wb") as fp:
data_array.tofile(self.uri)
else:
if index is None or index > self.end_index:
index = self.end_index + 1
with self.uri.open("rb+") as fp:
fp.seek(index * self.itemsize)
data_array.tofile(fp)
Expand All @@ -525,6 +527,14 @@ def end_index(self) -> Union[int, None]:
return self.start_index + len(self) - 1

def np_data(self, i: Union[int, slice] = None) -> np.ndarray:
"""return numpy structured array
Args:
i: index or slice. Defaults to None.
Returns:
np.ndarray
"""
if not self.uri.exists():
if isinstance(i, int):
return None, None
Expand Down
102 changes: 69 additions & 33 deletions qlib/data/storage/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -495,10 +495,36 @@ def __len__(self) -> int:


class PITStorage(FeatureStorage):
"""PIT data is a special case of Feature data, it looks like
date period value _next
0 20070428 200701 0.090219 4294967295
1 20070817 200702 0.139330 4294967295
2 20071023 200703 0.245863 4294967295
3 20080301 200704 0.347900 80
4 20080313 200704 0.395989 4294967295
It is sorted by [date, period].
next field currently is not used. just for forward compatible.
"""

@property
def storage_name(self) -> str:
return "financial" # for compatibility

def np_data(self, i: Union[int, slice] = None) -> np.ndarray:
"""return numpy structured array
Args:
i: index or slice. Defaults to None.
Returns:
np.ndarray
"""

raise NotImplementedError("Subclass of FeatureStorage must implement `write` method")

@property
def data(self) -> pd.DataFrame:
"""get all data
Expand All @@ -511,7 +537,7 @@ def data(self) -> pd.DataFrame:
"""
raise NotImplementedError("Subclass of FeatureStorage must implement `data` method")

def write(self, data_array: Union[List, np.ndarray, Tuple], index: int = None):
def write(self, data_array: np.ndarray, index: int = None):
"""Write data_array to FeatureStorage starting from index.
Notes
Expand All @@ -526,42 +552,24 @@ def write(self, data_array: Union[List, np.ndarray, Tuple], index: int = None):
---------
.. code-block::
feature:
3 4
4 5
5 6
pit data:
date period value _next
0 20070428 200701 0.090219 4294967295
1 20070817 200702 0.139330 4294967295
2 20071023 200703 0.245863 4294967295
3 20080301 200704 0.347900 80
4 20080313 200704 0.395989 4294967295
>>> self.write([6, 7], index=6)
>>> s.write(np.array([(20070917, 200703, 0.239330, 0)], dtype=s.raw_dtype), 1)
feature:
3 4
4 5
5 6
6 6
7 7
>>> self.write([8], index=9)
feature:
3 4
4 5
5 6
6 6
7 7
8 np.nan
9 8
>>> self.write([1, np.nan], index=3)
feature:
3 1
4 np.nan
5 6
6 6
7 7
8 np.nan
9 8
date period value _next
0 20070428 200701 0.090219 4294967295
1 20070917 200703 0.239330 0
2 20071023 200703 0.245863 4294967295
3 20080301 200704 0.347900 80
4 20080313 200704 0.395989 4294967295
"""
raise NotImplementedError("Subclass of FeatureStorage must implement `write` method")
Expand All @@ -579,6 +587,34 @@ def rewrite(self, data: Union[List, np.ndarray, Tuple], index: int):
self.clear()
self.write(data, index)

def update(self, data_array: np.ndarray) -> None:
"""update data to storage, replace current data from start_date to end_date with given data_array
Args:
data_array: Structured arrays contains date, period, value and next. same with self.raw_dtype
Examples
---------
.. code-block::
pit data:
date period value _next
0 20070428 200701 0.090219 4294967295
1 20070817 200702 0.139330 4294967295
2 20071023 200703 0.245863 4294967295
3 20080301 200704 0.347900 80
4 20080313 200704 0.395989 4294967295
>>> s.update(np.array([(20070917, 200703, 0.111111, 0), (20100314, 200703, 0.111111, 0)], dtype=s.raw_dtype))
date period value _next
0 20070428 200701 0.090219 4294967295
1 20070817 200702 0.139330 4294967295
2 20070917 200703 0.111111 0
3 20100314 200703 0.111111 0
"""
raise NotImplementedError("Subclass of FeatureStorage must implement `update` method")

@overload
def __getitem__(self, s: slice) -> pd.Series:
"""x.__getitem__(slice(start: int, stop: int, step: int)) <==> x[start:stop:step]
Expand Down
62 changes: 31 additions & 31 deletions tests/test_pit.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,37 +35,37 @@


class TestPIT(unittest.TestCase):
# @classmethod
# def tearDownClass(cls) -> None:
# shutil.rmtree(str(DATA_DIR.resolve()))

# @classmethod
# def setUpClass(cls) -> None:
# cn_data_dir = str(QLIB_DIR.joinpath("cn_data").resolve())
# pit_dir = str(SOURCE_DIR.joinpath("pit").resolve())
# pit_normalized_dir = str(SOURCE_DIR.joinpath("pit_normalized").resolve())
# GetData().qlib_data(
# name="qlib_data_simple", target_dir=cn_data_dir, region="cn", delete_old=False, exists_skip=True
# )
# GetData().qlib_data(name="qlib_data", target_dir=pit_dir, region="pit", delete_old=False, exists_skip=True)

# # NOTE: This code does the same thing as line 43, but since baostock is not stable in downloading data, we have chosen to download offline data.
# # bs.login()
# # Run(
# # source_dir=pit_dir,
# # interval="quarterly",
# # ).download_data(start="2000-01-01", end="2020-01-01", symbol_regex="^(600519|000725).*")
# # bs.logout()

# Run(
# source_dir=pit_dir,
# normalize_dir=pit_normalized_dir,
# interval="quarterly",
# ).normalize_data()
# DumpPitData(
# csv_path=pit_normalized_dir,
# qlib_dir=cn_data_dir,
# ).dump(interval="quarterly")
@classmethod
def tearDownClass(cls) -> None:
shutil.rmtree(str(DATA_DIR.resolve()))

@classmethod
def setUpClass(cls) -> None:
cn_data_dir = str(QLIB_DIR.joinpath("cn_data").resolve())
pit_dir = str(SOURCE_DIR.joinpath("pit").resolve())
pit_normalized_dir = str(SOURCE_DIR.joinpath("pit_normalized").resolve())
GetData().qlib_data(
name="qlib_data_simple", target_dir=cn_data_dir, region="cn", delete_old=False, exists_skip=True
)
GetData().qlib_data(name="qlib_data", target_dir=pit_dir, region="pit", delete_old=False, exists_skip=True)

# NOTE: This code does the same thing as line 43, but since baostock is not stable in downloading data, we have chosen to download offline data.
# bs.login()
# Run(
# source_dir=pit_dir,
# interval="quarterly",
# ).download_data(start="2000-01-01", end="2020-01-01", symbol_regex="^(600519|000725).*")
# bs.logout()

Run(
source_dir=pit_dir,
normalize_dir=pit_normalized_dir,
interval="quarterly",
).normalize_data()
DumpPitData(
csv_path=pit_normalized_dir,
qlib_dir=cn_data_dir,
).dump(interval="quarterly")

def setUp(self):
# qlib.init(kernels=1) # NOTE: set kernel to 1 to make it debug easier
Expand Down

0 comments on commit 61c31ca

Please sign in to comment.