Skip to content

Commit

Permalink
Remove stations cataloge due to erroneous data
Browse files Browse the repository at this point in the history
  • Loading branch information
FL550 committed Oct 9, 2023
1 parent f77296c commit a66f98d
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 78 deletions.
88 changes: 11 additions & 77 deletions development/generate_stations.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,59 +28,6 @@
poi_links = [link.text[0:-9].replace("_", "") for link in soup.find_all("a")]
print("Done.")

print("Retrieving station name catalogue...")
while True:
request = requests.get(
"https://www.dwd.de/DE/leistungen/klimadatendeutschland/statliste/statlex_html.html?view=nasPublication"
)
if request.status_code == 200:
break
print("Parsing...")
soup = BeautifulSoup(request.text, "html.parser")
print("Done.")
stations_catalogue = []
now = datetime.datetime.now()
for row in soup.table.find_all("tr"):
cols = row.find_all("td")
if len(cols) > 5 and "." in cols[10].text:
end_year = int(cols[10].text.split(".")[2])
if end_year >= now.year:
stations_catalogue.append(
{
"id": cols[1].text.strip(),
"kennung": cols[3].text.strip(),
"name": cols[0].text.strip(),
"lat": cols[4].text.strip(),
"lon": cols[5].text.strip(),
"elev": cols[6].text.strip(),
"bundesland": cols[8].text.strip(),
"report_available": 1 if cols[3].text.strip() in poi_links else 0,
"date": datetime.datetime.strptime(
cols[10].text.strip(), "%d.%m.%Y"
),
}
)


stations_catalogue = pd.DataFrame(
stations_catalogue,
columns=[
"id",
"kennung",
"name",
"lat",
"lon",
"elev",
"bundesland",
"report_available",
"date",
],
)
stations_catalogue = stations_catalogue[
stations_catalogue["date"]
== stations_catalogue.groupby("id")["date"].transform("max")
].drop_duplicates()
print(f"Found {len(stations_catalogue)} active stations.")
range_iter = iter(range(len(mosmix_data) - 1))
first_run = True
stations = {}
Expand All @@ -103,30 +50,17 @@
"report_available": 1 if group_id in poi_links else 0,
"bundesland": "",
}
try:
station.update(
[
(
k,
stations_catalogue[stations_catalogue["kennung"] == group("id")][
k
].values[0],
)
for k in ("name", "lat", "lon", "elev", "bundesland")
]
)
except IndexError or KeyError:
station.update(
{
"name": group("name").title(),
"lat": group("lat"),
"lon": group("lon"),
"elev": group("elev"),
"bundesland": bundeslaender[f'{group("lat")};{group("lon")}']
if f'{group("lat")};{group("lon")}' in bundeslaender
else "",
}
)
station.update(
{
"name": group("name").title(),
"lat": group("lat"),
"lon": group("lon"),
"elev": group("elev"),
"bundesland": bundeslaender[f'{group("lat")};{group("lon")}']
if f'{group("lat")};{group("lon")}' in bundeslaender
else "",
}
)
stations[group_id] = station

with open("./stations.json", "w", encoding="utf-8") as f:
Expand Down
2 changes: 1 addition & 1 deletion stations.json

Large diffs are not rendered by default.

0 comments on commit a66f98d

Please sign in to comment.