diff --git a/scripts/build-site.sh b/scripts/build-site.sh index 88f76e2f..1a3ae00b 100755 --- a/scripts/build-site.sh +++ b/scripts/build-site.sh @@ -18,8 +18,8 @@ svn checkout --username $SVN_USER --password $SVN_PASSWORD --no-auth-cache --dep https://svn.vsp.tu-berlin.de/repos/public-svn/matsim/scenarios/countries/de/episim/original-data/Abwasser/ pip install html_to_json -python3 scripts/sewage-data-parser.py -cp *abwassersurveillance* Abwasser/ +python3 scripts/sewage_data_parser.py Köln +cp *sewage_data* Abwasser/ svn commit --username $SVN_USER --password $SVN_PASSWORD --no-auth-cache -m "autobuild: $TIMESTAMP" Abwasser diff --git a/scripts/sewage-data-parser.py b/scripts/sewage-data-parser.py deleted file mode 100644 index 4ce6ca59..00000000 --- a/scripts/sewage-data-parser.py +++ /dev/null @@ -1,27 +0,0 @@ -import urllib.request -import html_to_json -import json -import csv - -# download and parse the Abwassersurveillance from the rki -opener = urllib.request.FancyURLopener({}) -url = "https://www.rki.de/DE/Content/Institut/OrgEinheiten/Abt3/FG32/Abwassersurveillance/Bericht_Abwassersurveillance.html?__blob=publicationFile" -f = opener.open(url) -content = f.read() - -# convert the html-string to json, get the data, convert the data to json and get the x- and y-data -output_json = html_to_json.convert(content) -current = output_json["html"][0]["body"][0]["div"][0]["script"][1]["_value"] -current = json.loads(current)["x"]["data"] -dates = current[3]["x"] -viruslast = current[3]["y"] - -# write the data to a .csv file -with open('abwassersurveillance.csv', 'w', newline='') as file: - writer = csv.writer(file) - field = ["date", "viruslast"] - - writer.writerow(field) - - for i in range(len(dates)): - writer.writerow([dates[i], viruslast[i]]) \ No newline at end of file diff --git a/scripts/sewage_data_parser.py b/scripts/sewage_data_parser.py new file mode 100644 index 00000000..f50e43dc --- /dev/null +++ b/scripts/sewage_data_parser.py @@ -0,0 +1,131 @@ +import argparse +import json +import requests +from bs4 import BeautifulSoup +from bs2json import BS2Json +import csv +# import pandas as pd +# import matplotlib.pyplot as plt + +def fetch_data_from_website(url): + # Send an HTTP request to the website + response = requests.get(url) + response.raise_for_status() # Check for any HTTP errors + return response.text + +def parse_html(html_content): + # Parse HTML content using BeautifulSoup + soup = BeautifulSoup(html_content, 'html.parser') + return soup + +def find_matching_scripts(soup, text_to_find): + # Find all