Skip to content

Commit

Permalink
Merge branch 'main' into calendar_frontend
Browse files Browse the repository at this point in the history
  • Loading branch information
CommanderStorm authored Aug 10, 2023
2 parents 8bfacc9 + e47abf6 commit 45a5f7c
Show file tree
Hide file tree
Showing 72 changed files with 88,414 additions and 442 deletions.
4 changes: 4 additions & 0 deletions .github/FUNDING.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# These are supported funding model platforms

github: TUM-Dev
open_collective: tum-dev
2 changes: 1 addition & 1 deletion .github/workflows/data-cicd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ jobs:
# - name: Install dependencies
# run: |
# python -m pip install --upgrade pip
# pip install -r data/requirements.txt -r requirements_dev.txt -r server/main-api/test/requirements.txt
# pip install -r data/requirements.txt -r requirements-dev.txt -r server/main-api/test/requirements.txt
# - name: Run mypy
# run: |
# mypy --strict data
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/linting.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,6 @@ jobs:
- name: Install python dependencies
run: |
python -m pip install --upgrade pip
pip install -r data/requirements.txt -r requirements_dev.txt -r server/main-api/test/requirements.txt
pip install -r data/requirements.txt -r requirements-dev.txt -r server/main-api/test/requirements.txt
- name: Run pre-commit
uses: pre-commit/[email protected]
37 changes: 37 additions & 0 deletions .github/workflows/webclient-cicd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,45 @@ on:
workflow_dispatch:

jobs:
webclient-ui-test:
strategy:
matrix:
browser:
- chrome
- firefox
runs-on: ubuntu-latest
container:
image: cypress/browsers:node16.16.0-chrome107-ff107
options: --user 1001
steps:
- uses: actions/checkout@v3
- uses: actions/setup-node@v3
with:
cache: 'npm'
cache-dependency-path: 'webclient/package-lock.json'
- name: Cypress run
uses: cypress-io/github-action@v5
with:
start: npm run dev
wait-on: "http://localhost:8000"
browser: ${{ matrix.browser }}
working-directory: webclient
webclient-linting:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-node@v3
with:
cache: 'npm'
cache-dependency-path: 'webclient/package.json'
- run: npm install --prefix webclient
- run: npm run lint --prefix webclient
#- run: npm run type-check --prefix webclient
webclient-build:
uses: ./.github/workflows/_docker-build.yml
needs:
- webclient-linting
- webclient-ui-test
with:
image_suffix: webclient
context: ./webclient
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ since we use [pre-commit](https://pre-commit.com/) to format our code, you can i
```bash
python3 -m venv venv
source venv/bin/activate
pip install -r data/requirements.txt -r server/test/requirements.txt -r requirements_dev.txt # for mypy the server and data requirements are needed
pip install -r data/requirements.txt -r server/test/requirements.txt -r requirements-dev.txt # for mypy the server and data requirements are needed
```

To format all files, run the following command:
Expand Down
2 changes: 1 addition & 1 deletion data/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ From the root of the project, run:
```bash
python3 -m venv venv
source venv/bin/activate
pip install -r data/requirements.txt -r requirements_dev.txt
pip install -r data/requirements.txt -r requirements-dev.txt
```

## Getting external data
Expand Down
2 changes: 1 addition & 1 deletion data/compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@


# pylint: disable=too-many-locals,too-many-statements
def main():
def main() -> None:
"""Main function"""
# --- Read base data ---
logging.info("-- 00 areatree")
Expand Down
12 changes: 5 additions & 7 deletions data/external/scrapers/public_transport.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,8 @@ def _load_bus_stations(stations: dict) -> None:
for sub in repeat_later:
if parent := stations.get(sub["parent"]):
parent["sub_stations"].append(sub)
else:
if sub["station_id"]:
logging.warning(f"{sub['name']} with id {sub['station_id']} has no parent in our data")
elif sub["station_id"]:
logging.warning(f"{sub['name']} with id {sub['station_id']} has no parent in our data")


def _load_train_stations(stations: dict) -> None:
Expand Down Expand Up @@ -89,13 +88,12 @@ def _load_train_stations(stations: dict) -> None:
for sub in repeat_later:
if parent := stations.get(sub["parent"]):
parent["sub_stations"].append(sub)
else:
if sub["station_id"]:
logging.warning(f"{sub['name']} with id {sub['station_id']} has no parent in our data")
elif sub["station_id"]:
logging.warning(f"{sub['name']} with id {sub['station_id']} has no parent in our data")


@cached_json("public_transport.json")
def scrape_stations():
def scrape_stations() -> list[dict]:
"""Scrape the stations from the MVV GTFS data and return them as a list of dicts"""
stations = {}
_load_train_stations(stations)
Expand Down
17 changes: 9 additions & 8 deletions data/external/scrapers/roomfinder.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
import urllib.parse
import xmlrpc.client
import zipfile
from typing import TypedDict
from pathlib import Path
from typing import Iterator, Literal, TypedDict

from defusedxml import ElementTree as ET
from external.scraping_utils import _download_file, CACHE_PATH, cached_json, maybe_sleep
Expand Down Expand Up @@ -102,9 +103,9 @@ def scrape_rooms():
return sorted(rooms, key=lambda r: (r["b_id"], r["r_id"]))


def _guess_queries(rooms, n_rooms):
def _guess_queries(rooms: list, n_rooms: int) -> Iterator[str]:
"""
Iterates through all single/double digit/ascii_lowercase strings to find successfull queries
Iterates through all single/double character strings consisting of digit/ascii_lowercase to find successful queries
Ordering because of number of entries:
- single before double
Expand Down Expand Up @@ -176,7 +177,7 @@ def _download_maps(used_maps):
if _map[1] == 9:
continue

f_path = _download_map(_map, e_id, e_type)
f_path = _download_map(_map[1], e_id, e_type)

with zipfile.ZipFile(f_path, "r") as zip_f, zip_f.open("RoomFinder.kml") as file:
root = ET.fromstring(file.read())
Expand All @@ -194,14 +195,14 @@ def _download_maps(used_maps):
return maps


def _download_map(_map, e_id, e_type):
filepath = CACHE_PATH / "maps" / "roomfinder" / "kmz" / f"{_map[1]}.kmz"
def _download_map(_map_id: int, e_id: str, e_type: Literal["room", "building"]) -> Path | None:
filepath = CACHE_PATH / "maps" / "roomfinder" / "kmz" / f"{_map_id}.kmz"
if e_type == "room":
base_url = "https://portal.mytum.de/campus/roomfinder/getRoomPlacemark"
url = f"{base_url}?roomid={urllib.parse.quote_plus(e_id)}&mapid={_map[1]}"
url = f"{base_url}?roomid={urllib.parse.quote_plus(e_id)}&mapid={_map_id}"
return _download_file(url, filepath)
if e_type == "building":
base_url = "https://portal.mytum.de/campus/roomfinder/getBuildingPlacemark"
url = f"{base_url}?b_id={e_id}&mapid={_map[1]}"
url = f"{base_url}?b_id={e_id}&mapid={_map_id}"
return _download_file(url, filepath)
raise RuntimeError(f"Unknown entity type: {e_type}")
24 changes: 12 additions & 12 deletions data/external/scrapers/tumonline.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
TUMONLINE_URL = "https://campus.tum.de/tumonline"


def scrape_areas():
def scrape_areas() -> list[dict[str, typing.Any]]:
"""
Retrieve the building areas as in TUMonline.
Expand All @@ -31,7 +31,7 @@ def scrape_areas():
return [{"id": int(attrs), "name": text} for (attrs, text) in _parse_filter_options(filters, "pGebaeudebereich")]


def scrape_usages_filter():
def scrape_usages_filter() -> list[dict[str, typing.Any]]:
"""
Retrieve the room usage types that are available as a filter in TUMonline.
These are not all usage types known to TUMonline!
Expand All @@ -48,7 +48,7 @@ def scrape_usages_filter():


@cached_json("buildings_tumonline.json")
def scrape_buildings():
def scrape_buildings() -> list[dict[str, typing.Any]]:
"""
Retrieve the buildings as in TUMonline with their assigned TUMonline area.
This may retrieve TUMonline areas.
Expand All @@ -65,7 +65,7 @@ def scrape_buildings():
)
all_buildings = _parse_filter_options(filters, "pGebaeude")

buildings = []
buildings: list[typing.Any] = []
for area in areas:
filters_area = _get_roomsearch_xml(
_get_tumonline_api_url("wbSuche.cbRaumForm"),
Expand All @@ -89,7 +89,7 @@ def scrape_buildings():


@cached_json("rooms_tumonline.json")
def scrape_rooms():
def scrape_rooms() -> list[dict[str, typing.Any]]:
"""
Retrieve the rooms as in TUMonline including building and usage type.
For some room types (e.g. lecture halls) additional information is retrieved.
Expand Down Expand Up @@ -153,7 +153,7 @@ class Usage(typing.TypedDict):


@cached_json("usages_tumonline.json")
def scrape_usages():
def scrape_usages() -> list[Usage]:
"""
Retrieve all usage types available in TUMonline.
This may retrieve TUMonline rooms.
Expand All @@ -164,7 +164,7 @@ def scrape_usages():

logging.info("Scraping the room-usages of tumonline")

used_usage_types: dict[str,] = {}
used_usage_types: dict[str, typing.Any] = {}
for room in rooms:
if room["usage"] not in used_usage_types:
used_usage_types[room["usage"]] = room
Expand Down Expand Up @@ -193,7 +193,7 @@ def scrape_usages():


@cached_json("orgs-{lang}_tumonline.json")
def scrape_orgs(lang):
def scrape_orgs(lang: typing.Literal["de", "en"]) -> dict[str, typing.Any]:
"""
Retrieve all organisations in TUMonline, that may operate rooms.
Expand Down Expand Up @@ -255,7 +255,7 @@ class ParsedRoomsList(typing.NamedTuple):

@cached_json("tumonline/{f_value}.{area_id}.json")
def _retrieve_roomlist(f_type: str, f_name: str, f_value: int, area_id: int = 0) -> list[ParsedRoom]:
"""Retrieve all rooms (multi-page) from the TUMonline room search list"""
"""Retrieve all rooms from the TUMonline room search list (multipage)"""

scraped_rooms = ParsedRoomsList()

Expand Down Expand Up @@ -435,8 +435,8 @@ def _get_html(url: str, cached_xml_file: Path) -> BeautifulSoup:
return BeautifulSoup(result, "lxml")


def _get_tumonline_api_url(base_target):
# I have no idea, what this magic_string is, or why it exists..
# Usage is the same as from TUMonline..
def _get_tumonline_api_url(base_target: str) -> str:
# I have no idea, what this magic_string is, or why it exists
# Usage is the same as from TUMonline
magic_string = f"NC_{str(random.randint(0, 9999)).zfill(4)}" # nosec: random is not used security/crypto purposes
return f"{TUMONLINE_URL}/{base_target}/{magic_string}"
17 changes: 11 additions & 6 deletions data/external/scraping_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,36 @@
import time
import urllib.request
from pathlib import Path
from typing import Callable, ParamSpec, TypeVar
from urllib.error import HTTPError

CACHE_PATH = Path(__file__).parent / "cache"


def maybe_sleep(duration):
def maybe_sleep(duration: float) -> None:
"""
Sleep for the given duration, but only if the script was called during a workday and working hours.
"""
if time.gmtime().tm_wday not in [5, 6] and 5 <= time.gmtime().tm_hour <= 22:
time.sleep(duration)


def cached_json(filename: str):
P = ParamSpec("P")
R = TypeVar("R")


def cached_json(filename: str) -> Callable[[Callable[P, R]], Callable[P, R]]:
"""
Decorator which caches the functions' returned results in json format
:filename: where to store the file
"""

def decorator(func): # needed, as we want to pass filename to the annotation
decorator_filename = filename # needed, as otherwise this context would be lost
def decorator(func: Callable[[], R]) -> Callable[P, R]: # needed, as we want to pass filename to the annotation
decorator_filename: str = filename # needed, as otherwise this context would be lost

@functools.wraps(func)
def wrapper(*args, **kwargs):
def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
# prepare the filepath
wrapper_filename = decorator_filename
if args or kwargs:
Expand All @@ -37,7 +42,7 @@ def wrapper(*args, **kwargs):
# get already existing file
if path.exists():
with open(path, encoding="utf-8") as file:
return json.load(file)
return json.load(file) # type: ignore
# produce new file
result = func(*args, **kwargs)
with open(CACHE_PATH / wrapper_filename, "w", encoding="utf-8") as file:
Expand Down
1 change: 1 addition & 0 deletions data/processors/areatree/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,4 @@ class AreatreeBuidling(TypedDict):
type: str
name: str
short_name: NotRequired[str]
parents: list[str]
12 changes: 7 additions & 5 deletions data/processors/areatree/process.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import logging
from pathlib import Path
from typing import Iterator

from processors.areatree import models

AREATREE_FILE = Path(__file__).parent / "config.areatree"


def read_areatree():
def read_areatree() -> dict[str, models.AreatreeBuidling]:
"""Reads the areatree file and the basic data, gained from the areatree"""

parent_stack: list[str] = []
Expand All @@ -21,13 +22,13 @@ def read_areatree():
elif (indent // 2) < len(parent_stack):
parent_stack = parent_stack[: indent // 2]

building_data = _parse_areatree_line(line)
building_data = _parse_areatree_line(line, parent_stack[:])
data[building_data["id"]] = building_data
last_element = building_data["id"]
data[building_data["id"]] = {"parents": parent_stack[:], **building_data}
return data


def _areatree_lines():
def _areatree_lines() -> Iterator[str]:
"""
Generator that yields lines from the areatree file
Expand Down Expand Up @@ -59,7 +60,7 @@ def _split_line(line: str) -> tuple[str, str, str]:
return building_ids.strip(), raw_names.strip(), internal_id.strip()


def _parse_areatree_line(line: str) -> models.AreatreeBuidling:
def _parse_areatree_line(line: str, parents: list[str]) -> models.AreatreeBuidling:
"""Parses a line from the areatree file to reveal the correct parent and children"""
(building_ids, raw_names, internal_id) = _split_line(line)

Expand All @@ -71,6 +72,7 @@ def _parse_areatree_line(line: str) -> models.AreatreeBuidling:
"id": id_and_type["id"],
"type": id_and_type["type"],
"name": names["name"],
"parents": parents,
}
if "data_quality" in building_data:
result["data_quality"] = building_data["data_quality"]
Expand Down
Loading

0 comments on commit 45a5f7c

Please sign in to comment.