Skip to content

Commit

Permalink
Implement code changes
Browse files Browse the repository at this point in the history
  • Loading branch information
FL550 committed Sep 11, 2024
1 parent 76857b0 commit 2df14cc
Show file tree
Hide file tree
Showing 5 changed files with 107 additions and 110 deletions.
75 changes: 0 additions & 75 deletions custom_test.py

This file was deleted.

100 changes: 74 additions & 26 deletions simple_dwd_weatherforecast/dwdforecast.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from enum import Enum
from io import BytesIO
from zipfile import ZipFile
import httpx
from stream_unzip import stream_unzip

import arrow
import requests
Expand Down Expand Up @@ -677,7 +679,7 @@ def get_weather_type(self, kmlTree, weatherDataType: WeatherDataType):

items = []
result = kmlTree.xpath(
'./kml:ExtendedData/dwd:Forecast[@dwd:elementName="{}"]/dwd:value'.format(
'./kml:Document/kml:Placemark/kml:ExtendedData/dwd:Forecast[@dwd:elementName="{}"]/dwd:value'.format(
weatherDataType.value[0]
),
namespaces=self.namespaces,
Expand All @@ -694,14 +696,11 @@ def get_weather_type(self, kmlTree, weatherDataType: WeatherDataType):
items.append(None)
return items

def parse_kml(self, kml, force_hourly=False):
stream = etree.iterparse(kml)
(_, tree) = next(stream)
def parse_kml(self, kml):
tree = etree.fromstring(kml) # type: ignore
timesteps = self.parse_timesteps(tree)
issue_time_new = self.parse_issue_time(tree)
tree.clear()

tree = self.parse_placemark(stream)
self.issue_time = issue_time_new

self.loaded_station_name = self.parse_station_name(tree)
Expand Down Expand Up @@ -748,17 +747,6 @@ def value(wdt):
for (i, t) in enumerate(timesteps)
)

def parse_placemark(self, stream):
for _, tree in stream:
for placemark in tree.findall(
".//kml:Placemark", namespaces=self.namespaces
):
item = placemark.find(".//kml:name", namespaces=self.namespaces)

if item is not None and item.text == self.station_id:
return placemark
# placemark.clear()

def parse_issue_time(self, tree):
issue_time_new = arrow.get(
tree.xpath("//dwd:IssueTime", namespaces=self.namespaces)[0].text,
Expand All @@ -767,7 +755,9 @@ def parse_issue_time(self, tree):
return issue_time_new

def parse_station_name(self, tree):
return tree.xpath("./kml:description", namespaces=self.namespaces)[0].text
return tree.xpath(
"./kml:Document/kml:Placemark/kml:description", namespaces=self.namespaces
)[0].text

def parse_timesteps(self, tree):
return [
Expand All @@ -781,7 +771,7 @@ def parse_condition(self, tree):
return [
elem.split(".")[0]
for elem in tree.xpath(
'./kml:ExtendedData/dwd:Forecast[@dwd:elementName="ww"]/dwd:value',
'./kml:Document/kml:Placemark/kml:ExtendedData/dwd:Forecast[@dwd:elementName="ww"]/dwd:value',
namespaces=self.namespaces,
)[0].text.split()
]
Expand Down Expand Up @@ -945,11 +935,8 @@ def download_weather_report(self, region_code):
except Exception as error:
print(f"Error in download_weather_report: {type(error)} args: {error.args}")

def download_latest_kml(self, stationid, force_hourly=False):
if force_hourly:
url = "https://opendata.dwd.de/weather/local_forecasts/mos/MOSMIX_S/all_stations/kml/MOSMIX_S_LATEST_240.kmz"
else:
url = f"https://opendata.dwd.de/weather/local_forecasts/mos/MOSMIX_L/single_stations/{stationid}/kml/MOSMIX_L_LATEST_{stationid}.kmz"
def download_small_kml(self, stationid) -> bytes | None:
url = f"https://opendata.dwd.de/weather/local_forecasts/mos/MOSMIX_L/single_stations/{stationid}/kml/MOSMIX_L_LATEST_{stationid}.kmz"
headers = {"If-None-Match": self.etags[url] if url in self.etags else ""} # type: ignore
try:
request = requests.get(url, headers=headers, timeout=30)
Expand All @@ -958,12 +945,73 @@ def download_latest_kml(self, stationid, force_hourly=False):
return
self.etags[url] = request.headers["ETag"] # type: ignore
with ZipFile(BytesIO(request.content), "r") as kmz:
# large RAM allocation
with kmz.open(kmz.namelist()[0], "r") as kml:
self.parse_kml(kml, force_hourly)
return kml.read()

except Exception as error:
print(f"Error in download_latest_kml: {type(error)} args: {error.args}")

def get_chunks(self):
def zipped_chunks():
# Iterable that yields the bytes of a zip file
with httpx.stream(
"GET",
"https://opendata.dwd.de/weather/local_forecasts/mos/MOSMIX_S/all_stations/kml/MOSMIX_S_LATEST_240.kmz",
) as r:
yield from r.iter_bytes(chunk_size=131072)

return stream_unzip(zipped_chunks())

def download_large_kml(self, stationid):
placemark = b""

for file_name, file_size, unzipped_chunks in self.get_chunks():
chunk1 = b""
chunk2 = b""
first_chunk = None

save_next = False
save_next_next = False
stop = False
# unzipped_chunks must be iterated to completion or UnfinishedIterationError will be raised
for chunk in unzipped_chunks:
if stop:
continue
if not first_chunk:
first_chunk = chunk
if save_next_next:
placemark = chunk1 + chunk2 + chunk
save_next_next = False
stop = True
if save_next:
chunk2 = chunk
save_next_next = True
save_next = False

if stationid.encode() in chunk:
chunk1 = chunk
save_next = True
if first_chunk:
start = placemark.find(b"<kml:Placemark>\n")

result = (
first_chunk[: first_chunk.find(b"<kml:Placemark>")]
+ placemark[
start : placemark.find(b"</kml:Placemark>\n", start) + 17
]
+ b"</kml:Document></kml:kml>"
)
return result

def download_latest_kml(self, stationid, force_hourly=False):
kml = (
self.download_large_kml(stationid)
if force_hourly
else self.download_small_kml(stationid)
)
if kml is not None:
self.parse_kml(kml)

def download_latest_report(self):
station_id = self.station_id
if len(station_id) == 4:
Expand Down
35 changes: 27 additions & 8 deletions tests/test_parsekml.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from datetime import datetime, timezone
import unittest
from unittest.mock import patch

from simple_dwd_weatherforecast import dwdforecast
import dummy_data
Expand All @@ -14,9 +15,24 @@ def setUp(self):

def test_parse_kml(self):
with open(self.FILE_NAME, "rb") as kml:
self.dwd_weather.parse_kml(kml)
self.dwd_weather.parse_kml(kml.read())
self.assertEqual(self.dwd_weather.forecast_data, dummy_data.parsed_data)
self.assertEqual(self.dwd_weather.issue_time, datetime(2020, 11, 6, 3, 0, tzinfo=timezone.utc))
self.assertEqual(
self.dwd_weather.issue_time,
datetime(2020, 11, 6, 3, 0, tzinfo=timezone.utc),
)


def helper():
result = []
read_size = 131072
# Iterable that yields the bytes of a zip file
with open("development/MOSMIX_L_2023100809_stripped.kml", "rb") as kml:
content = kml.read(read_size)
while len(content) > 0:
result.append(content)
content = kml.read(read_size)
return zip([0], [0], [result])


class KMLParseFullTestCase(unittest.TestCase):
Expand All @@ -25,9 +41,12 @@ class KMLParseFullTestCase(unittest.TestCase):
def setUp(self):
self.dwd_weather = dwdforecast.Weather("L511")

def test_parse_kml(self):
with open(self.FILE_NAME, "rb") as kml:
self.dwd_weather.parse_kml(kml)
self.assertEqual(
self.dwd_weather.forecast_data, dummy_data_full.parsed_data
)
@patch(
"simple_dwd_weatherforecast.dwdforecast.Weather.get_chunks",
return_value=helper(),
)
def test_parse_kml(self, _):
self.dwd_weather.download_latest_kml(
self.dwd_weather.station_id, force_hourly=True
)
self.assertEqual(self.dwd_weather.forecast_data, dummy_data_full.parsed_data)
5 changes: 4 additions & 1 deletion tests/test_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ def setUp(self):
def test_download(self, _1, _2):
self.dwd_weather.update()
self.assertIsNotNone(self.dwd_weather.forecast_data)
self.assertIsNotNone(self.dwd_weather.forecast_data)
self.assertEqual(self.dwd_weather.station_id, "H889")
self.assertEqual(self.dwd_weather.issue_time.date(), datetime.now().date()) # type: ignore

@patch(
"simple_dwd_weatherforecast.dwdforecast.Weather.download_latest_report",
Expand Down Expand Up @@ -55,7 +58,7 @@ def test_issue_time_old(self, mock_function, _1, _2):
self.dwd_weather.issue_time = datetime(
*(time.strptime("2020-11-06T03:00:00.000Z", "%Y-%m-%dT%H:%M:%S.%fZ")[0:6]),
0,
timezone.utc
timezone.utc,
)
self.dwd_weather.update()
mock_function.assert_called()
Expand Down
2 changes: 2 additions & 0 deletions tests/test_update_hourly.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ def setUp(self):
def test_download(self):
self.dwd_weather.update(force_hourly=True)
self.assertIsNotNone(self.dwd_weather.forecast_data)
self.assertEqual(self.dwd_weather.station_id, "H889")
self.assertEqual(self.dwd_weather.issue_time.date(), datetime.now().date()) # type: ignore

@patch(
"simple_dwd_weatherforecast.dwdforecast.Weather.download_latest_kml",
Expand Down

0 comments on commit 2df14cc

Please sign in to comment.