Skip to content

Commit

Permalink
Merge branch 'main' into chore/data-update
Browse files Browse the repository at this point in the history
  • Loading branch information
CommanderStorm authored Apr 26, 2024
2 parents c0a1dfe + d256e52 commit e58e623
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 29 deletions.
42 changes: 20 additions & 22 deletions data/external/scrapers/tumonline.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def scrape_rooms() -> None:

logging.info("Scraping the rooms of tumonline")
room_index = {}
for building in buildings:
for building in tqdm(buildings, desc="Downloading the roomlist per building", unit="building")):
b_rooms = _retrieve_roomlist(
f_type="building",
f_name="pGebaeude",
Expand All @@ -125,8 +125,10 @@ def scrape_rooms() -> None:
# Only a few usage types are named in the filter, however with their id it's also possible
# to filter for other usage types. That's why we try them out.
rooms = []
usage_id = 1 # Observed: usage ids go up to 223, the limit below is for safety
while usage_id <= 300 and len(rooms) < len(room_index):
usage_ids = list(range(1, 301)) # Observed: usage ids go up to 223, the limit below is for safety
for usage_id in tqdm(usage_ids, desc="Extend the rooms by their usage", unit="usage"):
if len(rooms) >= len(room_index):
break;
u_rooms = _retrieve_roomlist(f_type="usage", f_name="pVerwendung", f_value=usage_id, area_id=0)
for room in u_rooms:
roomcode = room["roomcode"]
Expand Down Expand Up @@ -264,25 +266,21 @@ def _retrieve_roomlist(f_type: str, f_name: str, f_value: int, area_id: int = 0)
"""Retrieve all rooms from the TUMonline room search list (multipage)"""
scraped_rooms = ParsedRoomsList(rooms=[], num_pages=1, current_page=0)

with tqdm(desc=f"Searching Rooms for {f_type} {f_value}", total=scraped_rooms.num_pages, leave=False) as prog:
while scraped_rooms.current_page < scraped_rooms.num_pages:
search_params = {
"pStart": len(scraped_rooms.rooms) + 1, # 1 + current_page * 30,
"pSuchbegriff": "",
"pGebaeudebereich": area_id, # 0 for all areas
"pGebaeude": 0,
"pVerwendung": 0,
"pVerwalter": 1,
f_name: f_value,
}
req = requests.post(f"{TUMONLINE_URL}/wbSuche.raumSuche", data=search_params, timeout=30)
rooms_list = _parse_rooms_list(BeautifulSoup(req.text, "lxml"))
scraped_rooms = scraped_rooms.merge(rooms_list)

if prog.total != rooms_list.num_pages:
prog.reset(rooms_list.num_pages)
prog.update(1)
maybe_sleep(1.5)
while scraped_rooms.current_page < scraped_rooms.num_pages:
search_params = {
"pStart": len(scraped_rooms.rooms) + 1, # 1 + current_page * 30,
"pSuchbegriff": "",
"pGebaeudebereich": area_id, # 0 for all areas
"pGebaeude": 0,
"pVerwendung": 0,
"pVerwalter": 1,
f_name: f_value,
}
req = requests.post(f"{TUMONLINE_URL}/wbSuche.raumSuche", data=search_params, timeout=30)
rooms_list = _parse_rooms_list(BeautifulSoup(req.text, "lxml"))
scraped_rooms = scraped_rooms.merge(rooms_list)

maybe_sleep(1.5)
return scraped_rooms.rooms


Expand Down
6 changes: 6 additions & 0 deletions data/processors/areatree/config.areatree
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
0510:Verwaltungsbau (Z10):
0511:Elektro/Werkstatt/Lösungsmittel (Z11):
0512:Garagen (Z12):
0597:Vorplatz Audi-Max:
15,17:Klinikum rechts der Isar (MRI)|MRI:mri[site]
1501:Chirurgische Klinik (Bau 501):
1502:Radiologie, Orthopädie, Sportorthopädie, klinische Toxikologie (Bau 502):
Expand Down Expand Up @@ -171,6 +172,11 @@
2941:Campus D, Georg-Brauchle-Ring 60/62:
2353:Eislaufstadion Ost (Olympia):
2354:Regattaanlage Oberschleißheim (Olympia):
-2364:Wetterschutzhaus / WSH 1:
-2365:Wetterschutzhaus / WSH 2:
-2367:Wetterschutzhaus / WSH 4:
-2368:Wetterschutzhaus / WSH 5:
-2369:Wetterschutzhaus / WSH 6:
240:Holzforschung:holzforschung[site]
2401:Altbau Winzererstr. 45:
2402:Neubau Winzererstr. 45:
Expand Down
16 changes: 16 additions & 0 deletions data/sources/01_areas-extended.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,10 @@ zentralgelaende:
#"0512": # Garagen (Z12)
# not on OSM

"0597":
osm: [ "node/390320112" ] # osm is only an artwork
coords: { lat: 48.1489079, lon: 11.5673019 }

# Missing coords:
#mri
#biederstein
Expand Down Expand Up @@ -522,6 +526,18 @@ heilbronn:
# These are actually the coordinates of the newly built one
coords: { lat: 48.18232, lon: 11.54485 }

# coordinates for 2364-2369 absolutely unknown. unclear where they are => adress is taken as the coordinate
"2364":
coords: { lat: 48.1797454, lon: 11.5464106 }
"2365":
coords: { lat: 48.1797454, lon: 11.5464106 }
"2367":
coords: { lat: 48.1797454, lon: 11.5464106 }
"2368":
coords: { lat: 48.1797454, lon: 11.5464106 }
"2369":
coords: { lat: 48.1797454, lon: 11.5464106 }

2803:
coords: { lat: 48.13140, lon: 11.58564 }
osm: [ "way/30046693" ]
Expand Down
2 changes: 1 addition & 1 deletion webclient/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"@heroicons/vue": "2.1.3",
"@nuxt/content": "2.12.1",
"@nuxt/image": "1.6.0",
"@nuxtjs/color-mode": "3.4.0",
"@nuxtjs/color-mode": "3.4.1",
"@nuxtjs/partytown": "1.5.0",
"@vueuse/core": "10.9.0",
"@vueuse/nuxt": "10.9.0",
Expand Down
26 changes: 20 additions & 6 deletions webclient/pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit e58e623

Please sign in to comment.