diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..abbd0dc --- /dev/null +++ b/.flake8 @@ -0,0 +1,3 @@ +[flake8] +max-line-length = 88 +ignore = E203, W503 diff --git a/.pylintrc b/.pylintrc index b520d24..3d370f4 100644 --- a/.pylintrc +++ b/.pylintrc @@ -1,7 +1,6 @@ [MESSAGES CONTROL] -disable=print-statement, - singleton-comparison, +disable=singleton-comparison, no-member, too-few-public-methods, protected-access, @@ -10,7 +9,8 @@ disable=print-statement, duplicate-code, import-error, nan-comparison, - consider-using-set-comprehension + consider-using-set-comprehension, + consider-using-f-string, [BASIC] diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..8463744 --- /dev/null +++ b/Makefile @@ -0,0 +1,24 @@ +PACKAGE_NAME = meteostat + +.PHONY: all help lint tests run + +all: help + +help: ## Show this help + @echo 'Usage: make COMMAND' + @echo + @echo "Commands:" + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-15s\033[0m %s\n", $$1, $$2}' + +lint: ## Run black, pylint and flake8 + black --check $(PACKAGE_NAME) ./tests + pylint $(PACKAGE_NAME) + flake8 $(PACKAGE_NAME) + +tests: ## Run tests with coverage and linting + pytest --version + pytest tests/ --log-cli-level=INFO --cov-branch --cov=$(PACKAGE_NAME) --cov-report xml + +format: ## Format the Python code using black + black $(PACKAGE_NAME) + black tests/ \ No newline at end of file diff --git a/README.md b/README.md index caa0852..f2a7ffa 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,13 @@ The Meteostat Python library is divided into multiple classes which provide acce * [Data Sources](https://dev.meteostat.net/sources.html) * [Terms & License](https://dev.meteostat.net/terms.html) +### Class Diagram +![Classes](classes.png) + + +### Package Diagram +![Packages](packages.png) + ## Example Let's plot 2018 temperature data for Vancouver, BC: @@ -69,6 +76,16 @@ Take a look at the expected output: Instructions on building and testing the Meteostat Python package can be found in the [documentation](https://dev.meteostat.net/python/contributing.html). More information about the Meteostat bulk data interface is available [here](https://dev.meteostat.net/bulk/). +## Developer setup for contributions + +- Fork the repository +- Create a new python virtual environment +- Activate virtual environment +- run pip install -U pip [get the latest version of pip] +- run pip install -r requirements_dev.in [install all packages required to hack the code] +- You can now use the Makefile to run tests and check style formatting as well as generate coverage information +- Hack Away + ## Donating If you want to support the project financially, you can make a donation using one of the following services: diff --git a/classes.png b/classes.png new file mode 100644 index 0000000..c7927bf Binary files /dev/null and b/classes.png differ diff --git a/meteostat/__init__.py b/meteostat/__init__.py index e693792..86da3d6 100644 --- a/meteostat/__init__.py +++ b/meteostat/__init__.py @@ -14,11 +14,11 @@ __appname__ = "meteostat" __version__ = "1.6.7" -from .interface.base import Base -from .interface.timeseries import TimeSeries -from .interface.stations import Stations -from .interface.point import Point -from .interface.hourly import Hourly -from .interface.daily import Daily -from .interface.monthly import Monthly -from .interface.normals import Normals +from .interface.base import Base # noqa +from .interface.timeseries import TimeSeries # noqa +from .interface.stations import Stations # noqa +from .interface.point import Point # noqa +from .interface.hourly import Hourly # noqa +from .interface.daily import Daily # noqa +from .interface.monthly import Monthly # noqa +from .interface.normals import Normals # noqa diff --git a/meteostat/core/cache.py b/meteostat/core/cache.py index b43b708..8a40e52 100644 --- a/meteostat/core/cache.py +++ b/meteostat/core/cache.py @@ -53,7 +53,6 @@ def clear_cache(cls, max_age: int = None) -> None: """ if os.path.exists(cls.cache_dir + os.sep + cls.cache_subdir): - # Set max_age if max_age is None: max_age = cls.max_age @@ -63,7 +62,6 @@ def clear_cache(cls, max_age: int = None) -> None: # Go through all files for file in os.listdir(cls.cache_dir + os.sep + cls.cache_subdir): - # Get full path path = os.path.join(cls.cache_dir + os.sep + cls.cache_subdir, file) diff --git a/meteostat/core/loader.py b/meteostat/core/loader.py index f552740..c47c945 100644 --- a/meteostat/core/loader.py +++ b/meteostat/core/loader.py @@ -28,10 +28,8 @@ def processing_handler( # Multi-core processing if cores > 1 and len(datasets) > 1: - # Create process pool with Pool(cores) as pool: - # Process datasets in pool output = pool.starmap(load, datasets) @@ -41,10 +39,8 @@ def processing_handler( # Multi-thread processing elif threads > 1 and len(datasets) > 1: - # Create process pool with ThreadPool(threads) as pool: - # Process datasets in pool output = pool.starmap(load, datasets) @@ -54,7 +50,6 @@ def processing_handler( # Single-thread processing else: - for dataset in datasets: output.append(load(*dataset)) @@ -77,7 +72,6 @@ def load_handler( """ try: - # Read CSV file from Meteostat endpoint df = pd.read_csv( endpoint + path, @@ -94,7 +88,6 @@ def load_handler( ) except (FileNotFoundError, HTTPError): - # Create empty DataFrane df = pd.DataFrame(columns=[*types]) diff --git a/meteostat/interface/daily.py b/meteostat/interface/daily.py index cadcc06..05a7d9b 100644 --- a/meteostat/interface/daily.py +++ b/meteostat/interface/daily.py @@ -93,7 +93,6 @@ def __init__( model: bool = True, # Include model data? flags: bool = False, # Load source flags? ) -> None: - # Initialize time series self._init_time_series(loc, start, end, model, flags) diff --git a/meteostat/interface/hourly.py b/meteostat/interface/hourly.py index 7687ae2..35d091c 100644 --- a/meteostat/interface/hourly.py +++ b/meteostat/interface/hourly.py @@ -105,7 +105,7 @@ def _set_time( """ # Don't use chunks if full dataset is requested - if start == None: + if start is None: self.chunked = False if timezone: @@ -113,7 +113,6 @@ def _set_time( self._timezone = timezone if start and end: - # Initialize time zone timezone = pytz.timezone(self._timezone) @@ -124,7 +123,9 @@ def _set_time( end = timezone.localize(end, is_dst=None).astimezone(pytz.utc) if self.chunked: - self._annual_steps = [start.year + i for i in range(end.year - start.year + 1)] + self._annual_steps = [ + start.year + i for i in range(end.year - start.year + 1) + ] self._start = start self._end = end @@ -138,7 +139,6 @@ def __init__( model: bool = True, # Include model data? flags: bool = False, # Load source flags? ) -> None: - # Set time zone and adapt period self._set_time(start, end, timezone) diff --git a/meteostat/interface/meteodata.py b/meteostat/interface/meteodata.py index e6c372c..01676dd 100644 --- a/meteostat/interface/meteodata.py +++ b/meteostat/interface/meteodata.py @@ -49,12 +49,10 @@ def _load_data(self, station: str, year: Union[int, None] = None) -> None: # Check if file in cache if self.max_age > 0 and file_in_cache(path, self.max_age): - # Read cached data df = pd.read_pickle(path) else: - # Get data from Meteostat df = load_handler( self.endpoint, file, self._columns, self._types, self._parse_dates @@ -119,7 +117,6 @@ def _get_data(self) -> None: """ if len(self._stations) > 0: - # Get list of datasets datasets = self._get_datasets() @@ -143,9 +140,7 @@ def _resolve_point( return None if method == "nearest": - if adapt_temp: - # Join elevation of involved weather stations data = self._data.join(stations["elevation"], on="station") @@ -156,7 +151,6 @@ def _resolve_point( data = data.drop("elevation", axis=1).round(1) else: - data = self._data if self.granularity == Granularity.NORMALS: @@ -168,7 +162,6 @@ def _resolve_point( ).agg("first") else: - # Join score and elevation of involved weather stations data = self._data.join(stations[["score", "elevation"]], on="station") diff --git a/meteostat/interface/monthly.py b/meteostat/interface/monthly.py index f63642a..5569460 100644 --- a/meteostat/interface/monthly.py +++ b/meteostat/interface/monthly.py @@ -84,7 +84,6 @@ def __init__( model: bool = True, # Include model data? flags: bool = False, # Load source flags? ) -> None: - # Set start date if start is not None: start = start.replace(day=1) diff --git a/meteostat/interface/normals.py b/meteostat/interface/normals.py index 43b8a8f..2eb19bf 100644 --- a/meteostat/interface/normals.py +++ b/meteostat/interface/normals.py @@ -79,7 +79,6 @@ def __init__( start: int = None, end: int = None, ) -> None: - # Set list of weather stations if isinstance(loc, pd.DataFrame): self._stations = loc.index diff --git a/meteostat/interface/point.py b/meteostat/interface/point.py index 7bb7876..dfa3653 100644 --- a/meteostat/interface/point.py +++ b/meteostat/interface/point.py @@ -53,7 +53,6 @@ class Point: _alt: int = None def __init__(self, lat: float, lon: float, alt: int = None) -> None: - self._lat = lat self._lon = lon self._alt = alt @@ -90,7 +89,7 @@ def get_stations( # Apply inventory filter if freq and start and end: age = (datetime.now() - end).days - if model == False or age > 180: + if model is False or age > 180: stations = stations.inventory(freq, (start, end)) # Apply altitude filter @@ -110,7 +109,6 @@ def get_stations( # Score values if self.radius: - # Calculate score values stations["score"] = ( (1 - (stations["distance"] / self.radius)) * self.weight_dist diff --git a/meteostat/interface/stations.py b/meteostat/interface/stations.py index 801e37b..90c1763 100644 --- a/meteostat/interface/stations.py +++ b/meteostat/interface/stations.py @@ -80,12 +80,10 @@ def _load(self) -> None: # Check if file in cache if self.max_age > 0 and file_in_cache(path, self.max_age): - # Read cached data df = pd.read_pickle(path) else: - # Get data from Meteostat df = load_handler( self.endpoint, file, self._columns, self._types, self._parse_dates, True @@ -102,7 +100,6 @@ def _load(self) -> None: self._data = df def __init__(self) -> None: - # Get all weather stations self._load() @@ -179,12 +176,12 @@ def inventory( if required is True: # Make sure data exists at all - temp._data = temp._data[(pd.isna(temp._data[freq + "_start"]) == False)] + temp._data = temp._data[(pd.isna(temp._data[freq + "_start"]) is False)] elif isinstance(required, tuple): # Make sure data exists across period temp._data = temp._data[ - (pd.isna(temp._data[freq + "_start"]) == False) + (pd.isna(temp._data[freq + "_start"]) is False) & (temp._data[freq + "_start"] <= required[0]) & ( temp._data[freq + "_end"] + timedelta(seconds=temp.max_age) @@ -195,7 +192,7 @@ def inventory( else: # Make sure data exists on a certain day temp._data = temp._data[ - (pd.isna(temp._data[freq + "_start"]) == False) + (pd.isna(temp._data[freq + "_start"]) is False) & (temp._data[freq + "_start"] <= required) & ( temp._data[freq + "_end"] + timedelta(seconds=temp.max_age) diff --git a/meteostat/interface/timeseries.py b/meteostat/interface/timeseries.py index e1967d2..a5ca375 100644 --- a/meteostat/interface/timeseries.py +++ b/meteostat/interface/timeseries.py @@ -57,12 +57,10 @@ def _load_flags(self, station: str, year: Union[int, None] = None) -> None: # Check if file in cache if self.max_age > 0 and file_in_cache(path, self.max_age): - # Read cached data df = pd.read_pickle(path) else: - # Get data from Meteostat df = load_handler( self.endpoint, @@ -99,7 +97,6 @@ def _get_flags(self) -> None: """ if len(self._stations) > 0: - # Get list of datasets datasets = self._get_datasets() diff --git a/meteostat/series/aggregate.py b/meteostat/series/aggregate.py index 34fcd37..8fd3c7e 100644 --- a/meteostat/series/aggregate.py +++ b/meteostat/series/aggregate.py @@ -19,7 +19,6 @@ def aggregate(self, freq: str = None, spatial: bool = False): """ if self.count() > 0 and not self._data.isnull().values.all(): - # Create temporal instance temp = copy(self) diff --git a/meteostat/series/interpolate.py b/meteostat/series/interpolate.py index 85bf089..b8212ab 100644 --- a/meteostat/series/interpolate.py +++ b/meteostat/series/interpolate.py @@ -18,7 +18,6 @@ def interpolate(self, limit: int = 3): """ if self.count() > 0 and not self._data.isnull().values.all(): - # Create temporal instance temp = copy(self) diff --git a/meteostat/series/normalize.py b/meteostat/series/normalize.py index fc2d54e..fa45d8c 100644 --- a/meteostat/series/normalize.py +++ b/meteostat/series/normalize.py @@ -27,7 +27,6 @@ def normalize(self): temp = copy(self) if temp._start and temp._end and temp.coverage() < 1: - # Create result DataFrame result = pd.DataFrame(columns=temp._columns[temp._first_met_col :]) diff --git a/packages.png b/packages.png new file mode 100644 index 0000000..e8a49a1 Binary files /dev/null and b/packages.png differ diff --git a/requirements.in b/requirements.in new file mode 100644 index 0000000..b79fdd1 --- /dev/null +++ b/requirements.in @@ -0,0 +1,6 @@ +pandas>=1.1 +pytz +numpy +matplotlib +wheel +twine \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index cb6a63a..fa63cf7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,106 @@ -pandas>=1.1 -pytz -numpy -matplotlib -pylint -pytest -black -wheel -twine \ No newline at end of file +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile --no-emit-index-url +# +certifi==2023.7.22 + # via requests +cffi==1.16.0 + # via cryptography +charset-normalizer==3.2.0 + # via requests +contourpy==1.1.1 + # via matplotlib +cryptography==41.0.4 + # via secretstorage +cycler==0.12.0 + # via matplotlib +docutils==0.20.1 + # via readme-renderer +fonttools==4.43.0 + # via matplotlib +idna==3.4 + # via requests +importlib-metadata==6.8.0 + # via + # keyring + # twine +jaraco-classes==3.3.0 + # via keyring +jeepney==0.8.0 + # via + # keyring + # secretstorage +keyring==24.2.0 + # via twine +kiwisolver==1.4.5 + # via matplotlib +markdown-it-py==3.0.0 + # via rich +matplotlib==3.8.0 + # via -r requirements.in +mdurl==0.1.2 + # via markdown-it-py +more-itertools==10.1.0 + # via jaraco-classes +nh3==0.2.14 + # via readme-renderer +numpy==1.26.0 + # via + # -r requirements.in + # contourpy + # matplotlib + # pandas +packaging==23.1 + # via matplotlib +pandas==2.1.1 + # via -r requirements.in +pillow==10.0.1 + # via matplotlib +pkginfo==1.9.6 + # via twine +pycparser==2.21 + # via cffi +pygments==2.16.1 + # via + # readme-renderer + # rich +pyparsing==3.1.1 + # via matplotlib +python-dateutil==2.8.2 + # via + # matplotlib + # pandas +pytz==2023.3.post1 + # via + # -r requirements.in + # pandas +readme-renderer==42.0 + # via twine +requests==2.31.0 + # via + # requests-toolbelt + # twine +requests-toolbelt==1.0.0 + # via twine +rfc3986==2.0.0 + # via twine +rich==13.5.3 + # via twine +secretstorage==3.3.3 + # via keyring +six==1.16.0 + # via python-dateutil +twine==4.0.2 + # via -r requirements.in +tzdata==2023.3 + # via pandas +urllib3==2.0.5 + # via + # requests + # twine +wheel==0.41.2 + # via -r requirements.in +zipp==3.17.0 + # via importlib-metadata diff --git a/requirements_dev.in b/requirements_dev.in new file mode 100644 index 0000000..1da0e82 --- /dev/null +++ b/requirements_dev.in @@ -0,0 +1,9 @@ +black +pytest +pytest-cov +mock +flake8 +pylint +pip-tools +meteostat +-r requirements.in \ No newline at end of file