From 6ed83f7c04175767165f7891cc0a80218d25d70f Mon Sep 17 00:00:00 2001 From: fei long Date: Fri, 17 May 2024 18:43:12 +0800 Subject: [PATCH 1/9] data_collector: cn_index: fix missing dependencies package in requirements.txt (#1770) add yahooquery and openpyxl in requirements.txt Signed-off-by: YuLong Yao Co-authored-by: Linlang Lv (iSoftStone Information) --- scripts/data_collector/cn_index/requirements.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/data_collector/cn_index/requirements.txt b/scripts/data_collector/cn_index/requirements.txt index bff59525cd..87933e9d9d 100644 --- a/scripts/data_collector/cn_index/requirements.txt +++ b/scripts/data_collector/cn_index/requirements.txt @@ -5,3 +5,5 @@ pandas lxml loguru tqdm +yahooquery +openpyxl From 2ae4be426a6645e58e50764deabd3ac0d770e15f Mon Sep 17 00:00:00 2001 From: playfund Date: Fri, 17 May 2024 18:45:07 +0800 Subject: [PATCH 2/9] Delete redundant copy() code to speed up (#1732) Delete redundant copy() code to speed up Co-authored-by: Linlang Lv (iSoftStone Information) --- qlib/data/data.py | 1 - 1 file changed, 1 deletion(-) diff --git a/qlib/data/data.py b/qlib/data/data.py index 116827f232..1b1353ee4e 100644 --- a/qlib/data/data.py +++ b/qlib/data/data.py @@ -536,7 +536,6 @@ def get_column_names(fields): """ if len(fields) == 0: raise ValueError("fields cannot be empty") - fields = fields.copy() column_names = [str(f) for f in fields] return column_names From 8a087d0db9d9aec2e4a5685536e79ee75a4b45d7 Mon Sep 17 00:00:00 2001 From: Linlang <30293408+SunsetWolf@users.noreply.github.com> Date: Fri, 17 May 2024 19:19:45 +0800 Subject: [PATCH 3/9] fix docs (#1721) * fix docs * modify file extension * modify file extension --------- Co-authored-by: Linlang Lv (iSoftStone Information) --- .readthedocs.yml => .readthedocs.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) rename .readthedocs.yml => .readthedocs.yaml (80%) diff --git a/.readthedocs.yml b/.readthedocs.yaml similarity index 80% rename from .readthedocs.yml rename to .readthedocs.yaml index 7d4cb854ae..71b29a2279 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yaml @@ -5,6 +5,12 @@ # Required version: 2 +# Set the version of Python and other tools you might need +build: + os: ubuntu-22.04 + tools: + python: "3.7" + # Build documentation in the docs/ directory with Sphinx sphinx: configuration: docs/conf.py @@ -14,7 +20,6 @@ formats: all # Optionally set the version of Python and requirements required to build your docs python: - version: 3.7 install: - requirements: docs/requirements.txt - method: pip From f79a0eeaffe6da63a5967aaf2e9f85652f9eec68 Mon Sep 17 00:00:00 2001 From: Linlang <30293408+SunsetWolf@users.noreply.github.com> Date: Tue, 21 May 2024 04:23:55 +0800 Subject: [PATCH 4/9] fix docs (#1788) Co-authored-by: Linlang Lv (iSoftStone Information) --- docs/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/requirements.txt b/docs/requirements.txt index c10a86d4ee..9444c55737 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -5,3 +5,4 @@ scipy scikit-learn pandas tianshou +sphinx_rtd_theme From 63021018d680b55d65f03c919c86332e8bad23d0 Mon Sep 17 00:00:00 2001 From: you-n-g Date: Tue, 21 May 2024 08:15:18 +0800 Subject: [PATCH 5/9] Update README.md's dataset --- README.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/README.md b/README.md index a9d5e4cc23..65c4420e6b 100644 --- a/README.md +++ b/README.md @@ -175,6 +175,20 @@ Also, users can install the latest dev version ``Qlib`` by the source code accor **Tips for Mac**: If you are using Mac with M1, you might encounter issues in building the wheel for LightGBM, which is due to missing dependencies from OpenMP. To solve the problem, install openmp first with ``brew install libomp`` and then run ``pip install .`` to build it successfully. ## Data Preparation +❗ Due to more restrict data security policy. The offical dataset is disabled temporarily. You can try [this data source](https://github.com/chenditc/investment_data/releases) contributed by the community. +Here is an example to download the data updated on 20220720. +```bash +wget https://github.com/chenditc/investment_data/releases/download/20220720/qlib_bin.tar.gz +mkdir -p ~/.qlib/qlib_data/cn_data +tar -zxvf qlib_bin.tar.gz -C ~/.qlib/qlib_data/cn_data --strip-components=2 +rm -f qlib_bin.tar.gz +``` + +The official dataset below will resume in short future. + + +---- + Load and prepare data by running the following code: ### Get with module From 155f80323c6fc09e2f19a22767e20f569989d0cd Mon Sep 17 00:00:00 2001 From: Linlang <30293408+SunsetWolf@users.noreply.github.com> Date: Fri, 24 May 2024 12:59:50 +0800 Subject: [PATCH 6/9] fix get data error (#1793) * fix get data error * fix get v0 data error * optimize get_data code * fix pylint error * add comments --- examples/orderbook_data/README.md | 5 ++- qlib/tests/data.py | 68 ++++++++++++++++++++----------- 2 files changed, 47 insertions(+), 26 deletions(-) diff --git a/examples/orderbook_data/README.md b/examples/orderbook_data/README.md index 890e11f41e..53fd523d7f 100644 --- a/examples/orderbook_data/README.md +++ b/examples/orderbook_data/README.md @@ -16,7 +16,7 @@ Current version of script with default value tries to connect localhost **via de Run following command to install necessary libraries ``` -pip install pytest coverage +pip install pytest coverage gdown pip install arctic # NOTE: pip may fail to resolve the right package dependency !!! Please make sure the dependency are satisfied. ``` @@ -27,7 +27,8 @@ pip install arctic # NOTE: pip may fail to resolve the right package dependency 2. Please follow following steps to download example data ```bash cd examples/orderbook_data/ -python ../../scripts/get_data.py download_data --target_dir . --file_name highfreq_orderbook_example_data.zip +gdown https://drive.google.com/uc?id=15nZF7tFT_eKVZAcMFL1qPS4jGyJflH7e # Proxies may be necessary here. +python ../../scripts/get_data.py _unzip --file_path highfreq_orderbook_example_data.zip --target_dir . ``` 3. Please import the example data to your mongo db diff --git a/qlib/tests/data.py b/qlib/tests/data.py index f6bd780905..2fa76855b5 100644 --- a/qlib/tests/data.py +++ b/qlib/tests/data.py @@ -12,15 +12,11 @@ from tqdm import tqdm from pathlib import Path from loguru import logger -from cryptography.fernet import Fernet from qlib.utils import exists_qlib_data class GetData: - REMOTE_URL = "https://qlibpublic.blob.core.windows.net/data/default/stock_data" - # "?" is not included in the token. - TOKEN = b"gAAAAABkmDhojHc0VSCDdNK1MqmRzNLeDFXe5hy8obHpa6SDQh4de6nW5gtzuD-fa6O_WZb0yyqYOL7ndOfJX_751W3xN5YB4-n-P22jK-t6ucoZqhT70KPD0Lf0_P328QPJVZ1gDnjIdjhi2YLOcP4BFTHLNYO0mvzszR8TKm9iT5AKRvuysWnpi8bbYwGU9zAcJK3x9EPL43hOGtxliFHcPNGMBoJW4g_ercdhi0-Qgv5_JLsV-29_MV-_AhuaYvJuN2dEywBy" - KEY = "EYcA8cgorA8X9OhyMwVfuFxn_1W3jGk6jCbs3L2oPoA=" + REMOTE_URL = "https://github.com/SunsetWolf/qlib_dataset/releases/download" def __init__(self, delete_zip_file=False): """ @@ -33,9 +29,45 @@ def __init__(self, delete_zip_file=False): self.delete_zip_file = delete_zip_file def merge_remote_url(self, file_name: str): - fernet = Fernet(self.KEY) - token = fernet.decrypt(self.TOKEN).decode() - return f"{self.REMOTE_URL}/{file_name}?{token}" + """ + Generate download links. + + Parameters + ---------- + file_name: str + The name of the file to be downloaded. + The file name can be accompanied by a version number, (e.g.: v2/qlib_data_simple_cn_1d_latest.zip), + if no version number is attached, it will be downloaded from v0 by default. + """ + return f"{self.REMOTE_URL}/{file_name}" if "/" in file_name else f"{self.REMOTE_URL}/v0/{file_name}" + + def download(self, url: str, target_path: [Path, str]): + """ + Download a file from the specified url. + + Parameters + ---------- + url: str + The url of the data. + target_path: str + The location where the data is saved, including the file name. + """ + file_name = str(target_path).rsplit("/", maxsplit=1)[-1] + resp = requests.get(url, stream=True, timeout=60) + resp.raise_for_status() + if resp.status_code != 200: + raise requests.exceptions.HTTPError() + + chunk_size = 1024 + logger.warning( + f"The data for the example is collected from Yahoo Finance. Please be aware that the quality of the data might not be perfect. (You can refer to the original data source: https://finance.yahoo.com/lookup.)" + ) + logger.info(f"{os.path.basename(file_name)} downloading......") + with tqdm(total=int(resp.headers.get("Content-Length", 0))) as p_bar: + with target_path.open("wb") as fp: + for chunk in resp.iter_content(chunk_size=chunk_size): + fp.write(chunk) + p_bar.update(chunk_size) def download_data(self, file_name: str, target_dir: [Path, str], delete_old: bool = True): """ @@ -70,21 +102,7 @@ def download_data(self, file_name: str, target_dir: [Path, str], delete_old: boo target_path = target_dir.joinpath(_target_file_name) url = self.merge_remote_url(file_name) - resp = requests.get(url, stream=True, timeout=60) - resp.raise_for_status() - if resp.status_code != 200: - raise requests.exceptions.HTTPError() - - chunk_size = 1024 - logger.warning( - f"The data for the example is collected from Yahoo Finance. Please be aware that the quality of the data might not be perfect. (You can refer to the original data source: https://finance.yahoo.com/lookup.)" - ) - logger.info(f"{os.path.basename(file_name)} downloading......") - with tqdm(total=int(resp.headers.get("Content-Length", 0))) as p_bar: - with target_path.open("wb") as fp: - for chunk in resp.iter_content(chunk_size=chunk_size): - fp.write(chunk) - p_bar.update(chunk_size) + self.download(url=url, target_path=target_path) self._unzip(target_path, target_dir, delete_old) if self.delete_zip_file: @@ -99,7 +117,9 @@ def check_dataset(self, file_name: str): return status @staticmethod - def _unzip(file_path: Path, target_dir: Path, delete_old: bool = True): + def _unzip(file_path: [Path, str], target_dir: [Path, str], delete_old: bool = True): + file_path = Path(file_path) + target_dir = Path(target_dir) if delete_old: logger.warning( f"will delete the old qlib data directory(features, instruments, calendars, features_cache, dataset_cache): {target_dir}" From b892b21045df4cfc65fde9f7b9714e21bbbf41af Mon Sep 17 00:00:00 2001 From: Linlang Date: Fri, 24 May 2024 15:14:49 +0800 Subject: [PATCH 7/9] update version --- qlib/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qlib/__init__.py b/qlib/__init__.py index 98ba7f95ce..39935fd3c4 100644 --- a/qlib/__init__.py +++ b/qlib/__init__.py @@ -2,7 +2,7 @@ # Licensed under the MIT License. from pathlib import Path -__version__ = "0.9.4.99" +__version__ = "0.9.5" __version__bak = __version__ # This version is backup for QlibConfig.reset_qlib_version import os from typing import Union From 02fe6b6974573a730add42036d13301eb346e3e8 Mon Sep 17 00:00:00 2001 From: Linlang Date: Fri, 24 May 2024 16:38:48 +0800 Subject: [PATCH 8/9] bump verison --- qlib/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qlib/__init__.py b/qlib/__init__.py index 39935fd3c4..fca74e4567 100644 --- a/qlib/__init__.py +++ b/qlib/__init__.py @@ -2,7 +2,7 @@ # Licensed under the MIT License. from pathlib import Path -__version__ = "0.9.5" +__version__ = "0.9.5.99" __version__bak = __version__ # This version is backup for QlibConfig.reset_qlib_version import os from typing import Union From 907c888c23a5e40fa13ceccb6028b29d85aedfc1 Mon Sep 17 00:00:00 2001 From: igeni Date: Tue, 28 May 2024 07:13:12 +0300 Subject: [PATCH 9/9] changed concat of strings to f-strings and redundant type conversion was removed (#1767) Co-authored-by: Linlang --- qlib/data/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qlib/data/client.py b/qlib/data/client.py index b6733fd3ad..a9b4b2edf7 100644 --- a/qlib/data/client.py +++ b/qlib/data/client.py @@ -35,7 +35,7 @@ def __init__(self, host, port): def connect_server(self): """Connect to server.""" try: - self.sio.connect("ws://" + self.server_host + ":" + str(self.server_port)) + self.sio.connect(f"ws://{self.server_host}:{self.server_port}") except socketio.exceptions.ConnectionError: self.logger.error("Cannot connect to server - check your network or server status")