Skip to content

Commit

Permalink
Merge branch 'microsoft:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
YeewahChan authored Jun 14, 2024
2 parents 62d6720 + 155c17f commit 31af793
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 15 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/test_qlib_from_pip.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ jobs:
- name: Qlib installation test
run: |
# 2024-05-30 scs has released a new version: 3.2.4.post2,
# This will cause the CI to fail, so we have limited the version of scs for now.
python -m pip install "scs<=3.2.4"
python -m pip install pyqlib
- name: Install Lightgbm for MacOS
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ Recent released features
Features released before 2021 are not listed here.

<p align="center">
<img src="http://fintech.msra.cn/images_v070/logo/1.png" />
<img src="docs/_static/img/logo/1.png" />
</p>

Qlib is an open-source, AI-oriented quantitative investment platform that aims to realize the potential, empower research, and create value using AI technologies in quantitative investment, from exploring ideas to implementing productions. Qlib supports diverse machine learning modeling paradigms, including supervised learning, market dynamics modeling, and reinforcement learning.
Expand Down Expand Up @@ -166,7 +166,7 @@ Also, users can install the latest dev version ``Qlib`` by the source code accor
* Clone the repository and install ``Qlib`` as follows.
```bash
git clone https://github.com/microsoft/qlib.git && cd qlib
pip install .
pip install . # `pip install -e .[dev]` is recommended for development. check details in docs/developer/code_standard_and_dev_guide.rst
```
**Note**: You can install Qlib with `python setup.py install` as well. But it is not the recommended approach. It will skip `pip` and cause obscure problems. For example, **only** the command ``pip install .`` **can** overwrite the stable version installed by ``pip install pyqlib``, while the command ``python setup.py install`` **can't**.
Expand Down
2 changes: 1 addition & 1 deletion qlib/data/dataset/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from qlib.data.dataset import DataHandler


def get_level_index(df: pd.DataFrame, level=Union[str, int]) -> int:
def get_level_index(df: pd.DataFrame, level: Union[str, int]) -> int:
"""
get the level index of `df` given `level`
Expand Down
1 change: 1 addition & 0 deletions scripts/data_collector/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,7 @@ def _executor(self, file_path: Path):
na_values={col: symbol_na if col == self._symbol_field_name else default_na for col in columns},
)

# NOTE: It has been reported that there may be some problems here, and the specific issues will be dealt with when they are identified.
df = self._normalize_obj.normalize(df)
if df is not None and not df.empty:
if self._end_date is not None:
Expand Down
49 changes: 37 additions & 12 deletions scripts/data_collector/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

import numpy as np
import pandas as pd
from lxml import etree
from loguru import logger
from yahooquery import Ticker
from tqdm import tqdm
Expand Down Expand Up @@ -190,17 +189,43 @@ def get_hs_stock_symbols() -> list:
global _HS_SYMBOLS # pylint: disable=W0603

def _get_symbol():
_res = set()
for _k, _v in (("ha", "ss"), ("sa", "sz"), ("gem", "sz")):
resp = requests.get(HS_SYMBOLS_URL.format(s_type=_k), timeout=None)
_res |= set(
map(
lambda x: "{}.{}".format(re.findall(r"\d+", x)[0], _v), # pylint: disable=W0640
etree.HTML(resp.text).xpath("//div[@class='result']/ul//li/a/text()"), # pylint: disable=I1101
)
)
time.sleep(3)
return _res
"""
Get the stock pool from a web page and process it into the format required by yahooquery.
Format of data retrieved from the web page: 600519, 000001
The data format required by yahooquery: 600519.ss, 000001.sz
Returns
-------
set: Returns the set of symbol codes.
Examples:
-------
{600000.ss, 600001.ss, 600002.ss, 600003.ss, ...}
"""
url = "http://99.push2.eastmoney.com/api/qt/clist/get?pn=1&pz=10000&po=1&np=1&fs=m:0+t:6,m:0+t:80,m:1+t:2,m:1+t:23,m:0+t:81+s:2048&fields=f12"
try:
resp = requests.get(url, timeout=None)
resp.raise_for_status()
except requests.exceptions.HTTPError as e:
raise requests.exceptions.HTTPError(f"Request to {url} failed with status code {resp.status_code}") from e

try:
_symbols = [_v["f12"] for _v in resp.json()["data"]["diff"]]
except Exception as e:
logger.warning("An error occurred while extracting data from the response.")
raise

if len(_symbols) < 3900:
raise ValueError("The complete list of stocks is not available.")

# Add suffix after the stock code to conform to yahooquery standard, otherwise the data will not be fetched.
_symbols = [
_symbol + ".ss" if _symbol.startswith("6") else _symbol + ".sz" if _symbol.startswith(("0", "3")) else None
for _symbol in _symbols
]
_symbols = [_symbol for _symbol in _symbols if _symbol is not None]

return set(_symbols)

if _HS_SYMBOLS is None:
symbols = set()
Expand Down
3 changes: 3 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,9 @@ def get_version(rel_path: str) -> str:
"lxml",
"baostock",
"yahooquery",
# 2024-05-30 scs has released a new version: 3.2.4.post2,
# this version, causes qlib installation to fail, so we've limited the scs version a bit for now.
"scs<=3.2.4",
"beautifulsoup4",
# In version 0.4.11 of tianshou, the code:
# logits, hidden = self.actor(batch.obs, state=state, info=batch.info)
Expand Down

0 comments on commit 31af793

Please sign in to comment.