diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml new file mode 100644 index 0000000..3136294 --- /dev/null +++ b/.github/workflows/pre-commit.yml @@ -0,0 +1,14 @@ +name: pre-commit + +on: + pull_request: + push: + branches: [ main, master ] + +jobs: + pre-commit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v3 + - uses: pre-commit/action@v3.0.0 diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 0000000..2eae05d --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,30 @@ +name: Publish + +on: + release: + types: [created] + +jobs: + deploy: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: '3.8' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools wheel twine + pip install -r requirements.txt + - name: Build and Publish package + env: + TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} + TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} + run: | + python setup.py build_ext --inplace + python setup.py sdist bdist_wheel + twine upload dist/* diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..09e0029 --- /dev/null +++ b/.gitignore @@ -0,0 +1,163 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# VSCode +.vscode/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..13e27ef --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,11 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: check-docstring-first + - id: check-yaml + + - repo: https://github.com/PyCQA/isort + rev: 5.12.0 + hooks: + - id: isort diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..e2930bd --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 philsv + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index d409071..9ffc092 100644 --- a/README.md +++ b/README.md @@ -1 +1,51 @@ -# bojpy \ No newline at end of file +# bojpy + +[![PyPI version](https://d25lcipzij17d.cloudfront.net/badge.svg?id=py&r=r&ts=1683906897&type=6e&v=0.0.1&x2=0)](https://badge.fury.io/py/bojpy) +[![License: MIT](https://img.shields.io/badge/License-MIT-red.svg)](https://github.com/philsv/bojpy/blob/main/LICENSE) +[![Weekly Downloads](https://static.pepy.tech/personalized-badge/bojpy?period=week&units=international_system&left_color=grey&right_color=blue&left_text=downloads/week)](https://pepy.tech/project/bojpy) +[![Monthly Downloads](https://static.pepy.tech/personalized-badge/bojpy?period=month&units=international_system&left_color=grey&right_color=blue&left_text=downloads/month)](https://pepy.tech/project/bojpy) +[![Downloads](https://static.pepy.tech/personalized-badge/bojpy?period=total&units=international_system&left_color=grey&right_color=blue&left_text=downloads)](https://pepy.tech/project/bojpy) + +bojpy is a Python package that provides a simple interface to the [BOJ Time-Series Data Search](https://www.stat-search.boj.or.jp/index_en.html). + +## Installation + +```ini +pip install bojpy +``` + +## Requirements + +* beautifulsoup4 +* pandas +* requests + +## How to use + +```python +from bojpy import boj + +# By data series id +df = boj.get_data_series(series="BS01'MABJMTA") + +# By Time-series data html url +url = "https://www.stat-search.boj.or.jp/ssi/html/nme_R020MM.3576038.20240826070325.02.html" +df = boj.get_data_html(url) +``` + +## Output Example + +```ini +Date BS01'MABJMTA Bank of Japan Accounts/Assets/Total(Assets, or Liabilities and Net Assets)(s) + +2024-07-01 7617141.0 +2024-06-01 7536709.0 +2024-05-01 7610851.0 +2024-04-01 7583199.0 +2024-03-01 7564231.0 +... ... +``` + +## Disclaimer + +This package is nor endorsed by nor affiliated with the [Bank of Japan](https://www.boj.or.jp/en/). Please make sure to not abuse the BOJ servers by sending unnecessary requests. diff --git a/bojpy/boj.py b/bojpy/boj.py new file mode 100644 index 0000000..e0f21d7 --- /dev/null +++ b/bojpy/boj.py @@ -0,0 +1,86 @@ +import warnings +from urllib.parse import urlencode + +import numpy as np +import pandas as pd +import requests +from bs4 import BeautifulSoup + +warnings.filterwarnings("ignore", category=UserWarning) + + +def get_data_series( + series: str, + skiprows: int = 0, +) -> pd.DataFrame: + """ + Returns data series from the Bank of Japan (BOJ) Time-Series Data Search. + + Example: + >>> get_data_series(series="BS01'MABJMTA") + """ + base_url = "https://www.stat-search.boj.or.jp/ssi/" + search_path = "cgi-bin/famecgi2?cgi=%24nme_r030_en&chkfrq=MM&rdoheader=SIMPLE&rdodelimitar=COMMA&hdnYyyyFrom=&hdnYyyyTo=&sw_freq=NONE&sw_yearend=NONE&sw_observed=NONE&" + series_encoded = urlencode({"hdncode": series}) + url = f"{base_url}{search_path}{series_encoded}" + + response = requests.get(url) + response.raise_for_status() + + page_content = response.content + soup = BeautifulSoup(page_content, "lxml") + nodes = soup.select("a[href*=csv]") + + if not nodes: + raise ValueError(f"Could not find .csv file in {url}") + + url = f"https://www.stat-search.boj.or.jp/{nodes[0]['href']}" + df = pd.read_csv(url, skiprows=skiprows) + + first_row = df.iloc[0] + new_columns = df.columns + " " + first_row + df.columns = new_columns + df = df.drop(index=0) + + df[df.columns[0]] = pd.to_datetime(df[df.columns[0]]) + df = df.replace({"ND": np.nan}, regex=True) + + df = df.rename(columns={df.columns[0]: ""}) + df = df.rename_axis("Date", axis=1) + df = df.set_index(df.columns[0]) + df = df.astype(float) + + if df.index.is_monotonic_increasing: + df = df.sort_index(ascending=False) + + df = df.dropna() + return df + + +def get_data_html( + url: str, + skiprows: int = 0, +) -> pd.DataFrame: + """ + Returns the HTML content as a DataFrame of a given Time-series data URL. + + Example: + >>> get_data_html(url="https://www.stat-search.boj.or.jp/ssi/html/nme_R000.3576779.20240826071135.02.html") + """ + df = pd.read_html(url, skiprows=skiprows)[0] + df.columns = df.iloc[0] # type: ignore + df = df.drop(index=0) + + df[df.columns[0]] = pd.to_datetime(df[df.columns[0]]) + df = df.replace({"ND": np.nan}, regex=True) + + df = df.rename(columns={df.columns[0]: ""}) + df = df.rename_axis("Date", axis=1) + df = df.set_index(df.columns[0]) + df = df.astype(float) + + if df.index.is_monotonic_increasing: + df = df.sort_index(ascending=False) + + df = df.dropna() + return df diff --git a/bojpy/version.py b/bojpy/version.py new file mode 100644 index 0000000..b3c06d4 --- /dev/null +++ b/bojpy/version.py @@ -0,0 +1 @@ +__version__ = "0.0.1" \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..2cdab2a --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,4 @@ +[tool.pytest.ini_options] +pythonpath = [ + ".", "bojpy", "tests", "src", +] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..de85057 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +beautifulsoup4>=4.12.3 +pandas>=2.2.2 +requests>=2.32.3 \ No newline at end of file diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..b8a1655 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,3 @@ +[bdist_wheel] +universal = 1 + diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..ccb5167 --- /dev/null +++ b/setup.py @@ -0,0 +1,36 @@ +import setuptools + +from bojpy.version import __version__ + +with open("README.md", "r") as ld: + long_description = ld.read() + +setuptools.setup( + name="bojpy", + version=__version__, + packages=["bojpy"], + include_package_data=True, + install_requires=["pandas", "requests", "beautifulsoup4"], + url="https://github.com/philsv/bojpy", + license="MIT", + author="philsv", + author_email="frphsv@gmail.com", + description="Python Wrapper for the Bank of Japan (BOJ) Time-Series Data Search", + long_description=long_description, + long_description_content_type="text/markdown", + keywords=["boj", "bank of japan", "central bank", "statistical data"], + classifiers=[ + "Development Status :: 2 - Pre-Alpha", + "Intended Audience :: Developers", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Operating System :: Microsoft :: Windows", + "Operating System :: Unix", + "Operating System :: MacOS :: MacOS X", + "License :: OSI Approved :: MIT License", + ], +) diff --git a/tests/test_boj.py b/tests/test_boj.py new file mode 100644 index 0000000..0169f3f --- /dev/null +++ b/tests/test_boj.py @@ -0,0 +1,30 @@ +import pandas as pd +import pytest + +from bojpy import boj + + +@pytest.mark.parametrize( + "series", + [ + "BS01'MABJMTA", + "CO'TK99F0000201HCQ00000", + ], +) +def test_get_data_series(series): + """Test get_data_series method.""" + df = boj.get_data_series(series) + assert isinstance(df, pd.DataFrame) + + +@pytest.mark.parametrize( + "url", + [ + "https://www.stat-search.boj.or.jp/ssi/html/nme_R020MM.3576038.20240826070325.02.html", + "https://www.stat-search.boj.or.jp/ssi/html/nme_R020QQ.3579502.20240826074512.02.html", + ], +) +def test_get_data_html(url): + """Test get_data_html method.""" + df = boj.get_data_html(url) + assert isinstance(df, pd.DataFrame)