Skip to content

Commit

Permalink
Merge branch 'main' into chore/inline-feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
CommanderStorm authored Apr 28, 2024
2 parents 8637fc7 + 4d328ca commit 330639f
Show file tree
Hide file tree
Showing 22 changed files with 6,973 additions and 5,182 deletions.
23 changes: 13 additions & 10 deletions data/external/scrapers/nat.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,19 @@
from multiprocessing.pool import ThreadPool
from pathlib import Path

import backoff
import requests
from external.scraping_utils import _download_file, CACHE_PATH
from tqdm import tqdm
from tqdm.contrib.concurrent import thread_map

from external.scraping_utils import _download_file, CACHE_PATH
from utils import TranslatableStr as _

NAT_API_URL = "https://api.srv.nat.tum.de/api/v1/rom"
NAT_CACHE_DIR = CACHE_PATH / "nat"


@backoff.on_exception(backoff.expo, requests.exceptions.RequestException)
def scrape_buildings():
"""Retrieve the buildings as in the NAT roomfinder."""
logging.info("Scraping the buildings of the NAT")
Expand Down Expand Up @@ -183,8 +186,9 @@ def _download_and_merge_room(base):
"""Download the room information and merge it with the base information."""
room_code = base["room_code"]
target_filepath = NAT_CACHE_DIR / f"room_{room_code}.json"
downloaded_file = _download_file(f"{NAT_API_URL}/{room_code}", target_filepath, quiet=True)
if not downloaded_file:
try:
downloaded_file = _download_file(f"{NAT_API_URL}/{room_code}", target_filepath)
except requests.exceptions.RequestException:
return None
content = json.loads(downloaded_file.read_text(encoding="utf-8"))
for useless_key in ["events_end", "events_start"]:
Expand Down Expand Up @@ -230,13 +234,12 @@ def _get_base_room_infos():


def _try_download_room_base_info(start: int, batch: int) -> tuple[tuple[int, int], Path | None]:
downloaded_file = _download_file(
f"{NAT_API_URL}/?limit={batch}&offset={start}",
NAT_CACHE_DIR / f"rooms_base_{start}_to_{start + batch - 1 }.json",
quiet=True,
quiet_errors=True,
)
return (start, batch), downloaded_file
try:
url = f"{NAT_API_URL}/?limit={batch}&offset={start}"
file_path = NAT_CACHE_DIR / f"rooms_base_{start}_to_{start + batch - 1}.json"
return (start, batch), _download_file(url, file_path)
except requests.exceptions.RequestException:
return (start, batch), None


def _report_undownloadable(undownloadable: list[int]) -> None:
Expand Down
13 changes: 10 additions & 3 deletions data/external/scrapers/roomfinder.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from pathlib import Path
from typing import Literal, TypedDict

import requests
import utm
from defusedxml import ElementTree as ET
from tqdm import tqdm
Expand Down Expand Up @@ -203,7 +204,7 @@ def _download_maps(used_maps):
# Download as file
url = f"{ROOMFINDER_API_URL}/getMapImage?m_id={_map[1].removeprefix('rf')}"
filepath = CACHE_PATH / "maps" / "roomfinder" / f"{_map[1]}.gif"
_download_file(url, filepath, quiet=True)
_download_file(url, filepath)
convert_to_webp(filepath)

map_data = {
Expand Down Expand Up @@ -244,9 +245,15 @@ def _download_map(_map_id: str, e_id: str, e_type: Literal["room", "building"])
if e_type == "room":
base_url = "https://portal.mytum.de/campus/roomfinder/getRoomPlacemark"
url = f"{base_url}?roomid={urllib.parse.quote_plus(e_id)}&mapid={_map_id.removeprefix('rf')}"
return _download_file(url, filepath, quiet=True)
try:
_download_file(url, filepath)
except requests.exceptions.RequestException:
return None
if e_type == "building":
base_url = "https://portal.mytum.de/campus/roomfinder/getBuildingPlacemark"
url = f"{base_url}?b_id={e_id}&mapid={_map_id.removeprefix('rf')}"
return _download_file(url, filepath, quiet=True)
try:
_download_file(url, filepath)
except requests.exceptions.RequestException:
return None
raise RuntimeError(f"Unknown entity type: {e_type}")
13 changes: 11 additions & 2 deletions data/external/scrapers/tumonline.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import typing
from pathlib import Path

import backoff
import requests
from bs4 import BeautifulSoup, element
from defusedxml import ElementTree as ET
Expand Down Expand Up @@ -193,6 +194,7 @@ def scrape_usages() -> None:
json.dump(usages, file, indent=2, sort_keys=True)


@backoff.on_exception(backoff.expo, requests.exceptions.RequestException)
def scrape_orgs(lang: typing.Literal["de", "en"]) -> None:
"""
Retrieve all organisations in TUMonline, that may operate rooms.
Expand Down Expand Up @@ -261,6 +263,12 @@ def merge(self, other: "ParsedRoomsList") -> "ParsedRoomsList":
)


@backoff.on_exception(backoff.expo, requests.exceptions.RequestException)
def _tumonline_roomsearch(search_params) -> ParsedRoomsList:
req = requests.post(f"{TUMONLINE_URL}/wbSuche.raumSuche", data=search_params, timeout=30)
return _parse_rooms_list(BeautifulSoup(req.text, "lxml"))


@functools.cache
def _retrieve_roomlist(f_type: str, f_name: str, f_value: int, area_id: int = 0) -> list[ParsedRoom]:
"""Retrieve all rooms from the TUMonline room search list (multipage)"""
Expand All @@ -276,8 +284,7 @@ def _retrieve_roomlist(f_type: str, f_name: str, f_value: int, area_id: int = 0)
"pVerwalter": 1,
f_name: f_value,
}
req = requests.post(f"{TUMONLINE_URL}/wbSuche.raumSuche", data=search_params, timeout=30)
rooms_list = _parse_rooms_list(BeautifulSoup(req.text, "lxml"))
rooms_list = _tumonline_roomsearch(search_params)
scraped_rooms = scraped_rooms.merge(rooms_list)

maybe_sleep(1.5)
Expand Down Expand Up @@ -412,6 +419,7 @@ def _get_roomsearch_xml(url: str, params: dict[str, str | int], cache_fname: str
return BeautifulSoup(elem.text, "lxml")


@backoff.on_exception(backoff.expo, requests.exceptions.RequestException)
def _get_xml(url: str, params: dict[str, str | int], cache_fname: str) -> ET:
cache_path = CACHE_PATH / cache_fname
if cache_path.exists():
Expand All @@ -425,6 +433,7 @@ def _get_xml(url: str, params: dict[str, str | int], cache_fname: str) -> ET:
return ET.fromstring(req.text)


@backoff.on_exception(backoff.expo, requests.exceptions.RequestException)
def _get_html(url: str, cached_xml_file: Path) -> BeautifulSoup:
if cached_xml_file.exists():
with open(cached_xml_file, encoding="utf-8") as file:
Expand Down
30 changes: 14 additions & 16 deletions data/external/scraping_utils.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,26 @@
import logging
import time
import urllib.request
from pathlib import Path
from urllib.error import HTTPError

import backoff
import requests

CACHE_PATH = Path(__file__).parent / "results"


def maybe_sleep(duration: float) -> None:
"""Sleep for the given duration, but only if the script was called during a workday and working hours."""
if time.gmtime().tm_wday not in [5, 6] and 5 <= time.gmtime().tm_hour <= 22:
if time.gmtime().tm_wday not in [5, 6] and 7 <= time.gmtime().tm_hour <= 20:
time.sleep(duration)


def _download_file(url: str, target_cache_file: Path, quiet: bool = False, quiet_errors: bool = False) -> Path | None:
if not target_cache_file.exists():
# url parameter does not allow path traversal, because we build it further up in the callstack
try:
urllib.request.urlretrieve(url, target_cache_file) # nosec: B310
except HTTPError as error:
if not quiet_errors:
logging.warning(f"GET {url} -> Failed to retrieve because: {error}")
return None
if not quiet:
logging.info(f"GET {url}")

@backoff.on_exception(backoff.expo, requests.exceptions.RequestException)
def _download_file(url: str, target_cache_file: Path) -> Path | None:
if target_cache_file.exists():
target_cache_file.unlink()
# url parameter does not allow path traversal, because we build it further up in the callstack
with requests.get(url, stream=True, timeout=10) as r:
r.raise_for_status()
with open(target_cache_file, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
return target_cache_file
27 changes: 15 additions & 12 deletions data/processors/sitemap.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
from pathlib import Path
from typing import Literal, TypedDict

import backoff
import requests
from defusedxml import ElementTree as defusedET

from utils import DEBUG_MODE

OLD_DATA_URL = "https://nav.tum.de/cdn/api_data.json"
Expand Down Expand Up @@ -48,7 +50,11 @@ def generate_sitemap() -> None:
# sitemaps name. In case there aren't, we assume this sitemap is new,
# and all entries will be marked as changed
old_sitemaps = _download_online_sitemaps()
old_data = _download_old_data()
try:
old_data = _download_old_data()
except requests.exceptions.RequestException as error:
logging.warning(f"Could not download online data because of {error}. Assuming all entries are new.")
old_data = []

sitemaps: Sitemaps = _extract_sitemap_data(new_data, old_data, old_sitemaps)

Expand All @@ -58,20 +64,17 @@ def generate_sitemap() -> None:
_write_sitemapindex_xml(OUTPUT_DIR / "sitemap.xml", sitemaps)


@backoff.on_exception(backoff.expo, requests.exceptions.RequestException)
def _download_old_data() -> list:
"""Download the currently online data from the server"""
try:
req = requests.get(OLD_DATA_URL, headers={"Accept-Encoding": "gzip"}, timeout=120)
if req.status_code != 200:
logging.warning(f"Could not download online data because of {req.status_code=}. Assuming all are new")
return []
old_data = req.json()
if isinstance(old_data, dict):
old_data = list(old_data.values())
return old_data
except requests.exceptions.RequestException as error:
logging.warning(f"Could not download online data because of {error}. Assuming all entries are new.")
req = requests.get(OLD_DATA_URL, headers={"Accept-Encoding": "gzip"}, timeout=120)
if req.status_code != 200:
logging.warning(f"Could not download online data because of {req.status_code=}. Assuming all are new")
return []
old_data = req.json()
if isinstance(old_data, dict):
old_data = list(old_data.values())
return old_data


def _extract_sitemap_data(new_data: list, old_data: list, old_sitemaps: SimplifiedSitemaps) -> Sitemaps:
Expand Down
3 changes: 2 additions & 1 deletion data/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
backoff~=2.2.1
beautifulsoup4~=4.12.2
defusedxml~=0.7.1
lxml~=5.2.0
numba~=0.59.0rc1
Pillow~=10.3.0
pydantic~=2.7.0
pytest~=8.1.1
pytest~=8.2.0
pyyaml~=6.0
requests~=2.31.0
ruamel.yaml~=0.18.5
Expand Down
71 changes: 0 additions & 71 deletions webclient/.eslintrc.cjs

This file was deleted.

5 changes: 0 additions & 5 deletions webclient/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,3 @@ coverage
*.njsproj
*.sln
*.sw?
/cdn/

# lockfiles
pnpm-lock.yaml
package-lock.json
5 changes: 2 additions & 3 deletions webclient/components/AppSearchBar.vue
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ watchEffect(() => {
<ul v-for="s in data.sections" v-cloak :key="s.facet" class="flex flex-col gap-2">
<div class="flex items-center">
<span class="text-md text-zinc-800 me-4 flex-shrink">{{ t(`sections.${s.facet}`) }}</span>
<div class="border-zinc-800 flex-grow border-t"></div>
<div class="border-zinc-800 flex-grow border-t" />
</div>

<template v-for="(e, i) in s.entries" :key="e.id">
Expand All @@ -181,8 +181,7 @@ watchEffect(() => {
@click="searchBarFocused = false"
@mousedown="keep_focus = true"
@mouseover="highlighted = null"
>
</SearchResultItem>
/>
</template>
<li class="-mt-2">
<Btn
Expand Down
3 changes: 1 addition & 2 deletions webclient/components/DetailsInteractiveMap.vue
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@ function loadInteractiveMap(fromUi = false) {
if (document.getElementById("interactive-map")?.classList.contains("maplibregl-map")) {
marker.value?.remove();
} else {
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore recursive calls are not supported by ts
// @ts-expect-error recursive calls are not supported by ts
map.value = initMap("interactive-map");
document.getElementById("interactive-map")?.classList.remove("loading");
Expand Down
3 changes: 1 addition & 2 deletions webclient/components/FeedbackModal.vue
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,7 @@ const feedback = useFeedback();
class="focusable bg-zinc-200 border-zinc-400 resize-y rounded border px-2 py-1"
:placeholder="t('message')"
rows="6"
>
</textarea>
/>
<p class="text-zinc-500 text-xs">
{{
{
Expand Down
2 changes: 1 addition & 1 deletion webclient/components/RoomfinderImageLocation.vue
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ onMounted(draw);
'max-w-2xl': map.height <= map.width,
}"
>
<canvas :id="props.id" class="w-full" :width="map.width" :height="map.height"></canvas>
<canvas :id="props.id" class="w-full" :width="map.width" :height="map.height" />
</div>
</template>

Expand Down
4 changes: 2 additions & 2 deletions webclient/components/SearchResultItem.vue
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ type RoomEntry = components["schemas"]["RoomEntry"];
<small>
{{ item.subtext }}
<template v-if="item.type === 'room' || item.type === 'virtual_room' || item.type === 'poi'"
>, <b v-html="item.subtext_bold"></b
></template>
>, <b v-html="item.subtext_bold"
/></template>
</small>
</div>
</NuxtLink>
Expand Down
Loading

0 comments on commit 330639f

Please sign in to comment.