diff --git a/README.md b/README.md index b5cffd4ed..812f98d33 100644 --- a/README.md +++ b/README.md @@ -64,12 +64,7 @@ cd Navigatum ### Data Processing In case you do not want to work on the data processing, you can instead -download the latest compiled files: - -```bash -wget -P data/output https://nav.tum.de/cdn/api_data.json -wget -P data/output https://nav.tum.de/cdn/search_data.json -``` +download the latest compiled files via running the server. Else you can follow the steps in the [data documentation](data/README.md). @@ -84,9 +79,11 @@ docker compose -f docker-compose.local.yml up --build ``` > [!NOTE] -> While most of the setup is simple, we need to download data (only Oberbayern is needed) for the initial setup. This takes 1-2 minutes. -> Please first bring up a [postgis](https://postgis.net/) instance (for example via `docker compose -f docker-compose.local.yml up --build`) and then run: -> +> While most of the setup is simple, we need to download data (only Oberbayern is needed) for the initial setup. This +> takes 1-2 minutes. +> Please first bring up a [postgis](https://postgis.net/) instance (for example +> via `docker compose -f docker-compose.local.yml up --build`) and then run: +> > ```bash > wget -O data.pbf https://download.geofabrik.de/europe/germany/bayern/oberbayern-latest.osm.pbf > docker run -it -v $(pwd):/data -e PGPASSWORD=CHANGE_ME --network="host" iboates/osm2pgsql:latest osm2pgsql --create --slim --database postgres --user postgres --host 127.0.0.1 --port 5432 /data/data.pbf --hstore --hstore-add-index --hstore-column raw diff --git a/data/README.md b/data/README.md index 939099caa..b57606d4e 100644 --- a/data/README.md +++ b/data/README.md @@ -9,16 +9,21 @@ This folder contains: The code to retrieve external data, as well as externally retrieved data is located under `external`. > [!WARNING] -> A lot of this code is more a work in progress than finished. Especially features such as POIs, custom maps or other data types such as events are drafted but not yet fully implemented. +> A lot of this code is more a work-in-progress than finished. +> Especially features such as POIs, custom maps or other data types such as events are drafted but not yet fully implemented. > -> New external data might break the scripts from time to time, as either rooms or buildings are removed, the external data has errors or we make assumptions here that turn out to be wrong. +> New external data might break the scripts from time to time, +> - as either rooms or buildings are removed, +> - the external data has errors, +> - or we make assumptions here that turn out to be wrong. ## Getting started ### Prerequisites -For getting started, there are some system dependencys which you will need. -Please follow the [system dependencys docs](/resources/documentation/Dependencys.md) before trying to run this part of our project. +For getting started, there are some system dependencies which you will need. +Please follow the [system dependencies docs](/resources/documentation/Dependencys.md) before trying to run this part of +our project. ### Dependencies @@ -63,7 +68,8 @@ python3 tumonline.py python3 compile.py ``` -The exported datasets will be stored in `output/` as JSON files. +The exported datasets will be stored in `output/` +as [JSON](https://www.json.org/json-de.html)/[Parquet](https://wikipedia.org/wiki/Apache_Parquet) files. ### Directory structure @@ -92,18 +98,33 @@ data ```json { - "entry-id": { - "id": "entry-id", - "type": "room", - ... data as specified in `data-format.yaml` - }, - ... all other entries in the same form + "entry-id": { + "id": "entry-id", + "type": "room", + ... + data + as + specified + in + ` + data-format.yaml + ` + }, + ... + all + other + entries + in + the + same + form } ``` ## Compilation process -The data compilation is made of indiviual processing steps, where each step adds new or modifies the current data. The basic structure of the data however stays the same from the beginning on and is specified in `data-format_*.yaml`. +The data compilation is made of indiviual processing steps, where each step adds new or modifies the current data. The +basic structure of the data however stays the same from the beginning on and is specified in `data-format_*.yaml`. - **Step 00**: The first step reads the base root node, areas, buildings etc. from the `sources/00_areatree` file and creates an object collection (python dictionary) @@ -111,18 +132,18 @@ The data compilation is made of indiviual processing steps, where each step adds - **Steps 01-29**: Within these steps, new rooms or POIs might be added, however no new areas or buildings, since all areas and buildings have to be defined in the _areatree_. After them, no new entries are being added to the data. - - **Steps 0x**: Supplement the base data with extended custom data. - - **Steps 1x**: Import rooms and building information from external sources - - **Steps 2x**: Import POIs + - **Steps 0x**: Supplement the base data with extended custom data. + - **Steps 1x**: Import rooms and building information from external sources + - **Steps 2x**: Import POIs - **Steps 30-89**: Later steps are intended to augment the entries with even more information and to ensure a consistent format. After them, no new (external or custom) information should be added to the data. - - **Steps 3x**: Make data more coherent & structural stuff - - **Steps 4x**: Coordinates and maps - - **Steps 5x**: Add images - - **Steps 6x**: - - - **Steps 7x**: - - - **Steps 8x**: Generate properties and sections (such as overview sections) + - **Steps 3x**: Make data more coherent & structural stuff + - **Steps 4x**: Coordinates and maps + - **Steps 5x**: Add images + - **Steps 6x**: - + - **Steps 7x**: - + - **Steps 8x**: Generate properties and sections (such as overview sections) - **Steps 90-99**: Process and export for search. - **Step 100**: Export final data (for use in the API). Some temporary data fields might be removed at this point. @@ -136,12 +157,16 @@ Details about the formatting are given at the head of the file. ## License -The source data (i.e. all files located in `sources/` that are not images) is made available under the Open Database License: . -Any rights in individual contents of the database are licensed under the Database Contents License: . +The source data (i.e. all files located in `sources/` that are not images) is made available under the Open Database +License: . +Any rights in individual contents of the database are licensed under the Database Contents +License: . > [!WARNING] -> The images in `sources/img/` are subject to their own licensing terms, which are stated in the file `sources/img/img-sources.yaml`. -> The compiled database may contain contents from external sources (i.e. all files in `external/`) that do have different license terms. +> The images in `sources/img/` are subject to their own licensing terms, which are stated in the +> file `sources/img/img-sources.yaml`. +> The compiled database may contain contents from external sources (i.e. all files in `external/`) that do have +> different license terms. --- diff --git a/server/Cargo.lock b/server/Cargo.lock index e89300e2f..8e321a5db 100644 --- a/server/Cargo.lock +++ b/server/Cargo.lock @@ -76,7 +76,7 @@ dependencies = [ "ahash 0.8.11", "base64 0.22.1", "bitflags 2.6.0", - "brotli", + "brotli 6.0.0", "bytes", "bytestring", "derive_more", @@ -109,7 +109,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e01ed3140b2f8d422c68afa1ed2e85d996ea619c988ac834d255db32138655cb" dependencies = [ "quote", - "syn", + "syn 2.0.72", ] [[package]] @@ -223,7 +223,7 @@ dependencies = [ "actix-router", "proc-macro2", "quote", - "syn", + "syn 2.0.72", ] [[package]] @@ -366,9 +366,24 @@ checksum = "0ae92a5119aa49cdbcf6b9f893fe4e1d98b04ccbf82ee0584ad948a44a734dea" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.72", ] +[[package]] +name = "argminmax" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52424b59d69d69d5056d508b260553afd91c57e21849579cd1f50ee8b8b88eaa" +dependencies = [ + "num-traits", +] + +[[package]] +name = "array-init-cursor" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf7d0a018de4f6aa429b9d33d69edf69072b1c5b1cb8d3e4a5f7ef898fc3eb76" + [[package]] name = "arrayvec" version = "0.7.4" @@ -381,7 +396,7 @@ version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fec134f64e2bc57411226dfc4e52dec859ddfc7e711fc5e07b612584f000e4aa" dependencies = [ - "brotli", + "brotli 6.0.0", "flate2", "futures-core", "memchr", @@ -391,6 +406,28 @@ dependencies = [ "zstd-safe", ] +[[package]] +name = "async-stream" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd56dd203fef61ac097dd65721a419ddccb106b2d2b70ba60a6b529f03961a51" +dependencies = [ + "async-stream-impl", + "futures-core", + "pin-project-lite", +] + +[[package]] +name = "async-stream-impl" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.72", +] + [[package]] name = "async-trait" version = "0.1.81" @@ -399,7 +436,7 @@ checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.72", ] [[package]] @@ -411,6 +448,12 @@ dependencies = [ "num-traits", ] +[[package]] +name = "atoi_simd" +version = "0.15.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ae037714f313c1353189ead58ef9eec30a8e8dc101b2622d461418fd59e28a9" + [[package]] name = "atomic-waker" version = "1.1.2" @@ -537,7 +580,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn", + "syn 2.0.72", "which", ] @@ -627,6 +670,17 @@ dependencies = [ "serde_with", ] +[[package]] +name = "brotli" +version = "5.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19483b140a7ac7174d34b5a581b406c64f84da5409d3e09cf4fff604f9270e67" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", +] + [[package]] name = "brotli" version = "6.0.0" @@ -665,6 +719,20 @@ name = "bytemuck" version = "1.16.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "102087e286b4677862ea56cf8fc58bb2cdfa8725c40ffb80fe3a008eb7f2fc83" +dependencies = [ + "bytemuck_derive", +] + +[[package]] +name = "bytemuck_derive" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ee891b04274a59bd38b412188e24b849617b2e45a0fd8d057deb63e7403761b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.72", +] [[package]] name = "byteorder" @@ -724,7 +792,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn", + "syn 2.0.72", ] [[package]] @@ -783,6 +851,28 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "chrono-tz" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d59ae0466b83e838b81a54256c39d5d7c20b9d7daa10510a242d9b75abd5936e" +dependencies = [ + "chrono", + "chrono-tz-build", + "phf", +] + +[[package]] +name = "chrono-tz-build" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "433e39f13c9a060046954e0592a8d0a4bcb1040125cbf91cb8ee58964cfb350f" +dependencies = [ + "parse-zoneinfo", + "phf", + "phf_codegen", +] + [[package]] name = "clang-sys" version = "1.8.1" @@ -809,6 +899,18 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b" +[[package]] +name = "comfy-table" +version = "7.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b34115915337defe99b2aff5c2ce6771e5fbc4079f4b506301f5cf394c8452f7" +dependencies = [ + "crossterm", + "strum", + "strum_macros", + "unicode-width", +] + [[package]] name = "concurrent-queue" version = "2.5.0" @@ -926,6 +1028,15 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crossbeam-channel" +version = "0.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-deque" version = "0.8.5" @@ -960,6 +1071,28 @@ version = "0.8.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" +[[package]] +name = "crossterm" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df" +dependencies = [ + "bitflags 2.6.0", + "crossterm_winapi", + "libc", + "parking_lot 0.12.3", + "winapi", +] + +[[package]] +name = "crossterm_winapi" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acdd7c62a3665c7f6830a51635d9ac9b23ed385797f70a83bb8bafe9c572ab2b" +dependencies = [ + "winapi", +] + [[package]] name = "crunchy" version = "0.2.2" @@ -997,7 +1130,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn", + "syn 2.0.72", ] [[package]] @@ -1008,7 +1141,7 @@ checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" dependencies = [ "darling_core", "quote", - "syn", + "syn 2.0.72", ] [[package]] @@ -1071,7 +1204,7 @@ dependencies = [ "proc-macro2", "quote", "rustc_version", - "syn", + "syn 2.0.72", ] [[package]] @@ -1145,6 +1278,12 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56ce8c6da7551ec6c462cbaf3bfbc75131ebbfa1c944aeaa9dab51ca1c5f0c3b" +[[package]] +name = "dyn-clone" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d6ef0072f8a535281e4876be788938b528e9a1d43900b82c2569af7da799125" + [[package]] name = "either" version = "1.13.0" @@ -1178,7 +1317,19 @@ dependencies = [ "heck 0.4.1", "proc-macro2", "quote", - "syn", + "syn 2.0.72", +] + +[[package]] +name = "enum_dispatch" +version = "0.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa18ce2bc66555b3218614519ac839ddb759a7d6720732f979ef8d13be147ecd" +dependencies = [ + "once_cell", + "proc-macro2", + "quote", + "syn 2.0.72", ] [[package]] @@ -1208,6 +1359,12 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "ethnum" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b90ca2580b73ab6a1f724b76ca11ab632df820fd6040c336200d2c1df7b3c82c" + [[package]] name = "event-listener" version = "5.3.1" @@ -1235,6 +1392,18 @@ dependencies = [ "zune-inflate", ] +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + +[[package]] +name = "fast-float" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c" + [[package]] name = "fastrand" version = "2.1.0" @@ -1304,6 +1473,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" +[[package]] +name = "foreign_vec" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee1b05cbd864bcaecbd3455d6d967862d446e4ebfc3c2e5e5b9841e53cba6673" + [[package]] name = "form_urlencoded" version = "1.2.1" @@ -1396,7 +1571,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.72", ] [[package]] @@ -1574,6 +1749,8 @@ checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" dependencies = [ "ahash 0.8.11", "allocator-api2", + "rayon", + "serde", ] [[package]] @@ -2096,7 +2273,7 @@ checksum = "c34819042dc3d3971c46c2190835914dfbe0c3c13f61449b2997f4e9722dfa60" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.72", ] [[package]] @@ -2150,6 +2327,12 @@ version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +[[package]] +name = "itoap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9028f49264629065d057f340a86acb84867925865f73bbf8d47b4d149a7e88b8" + [[package]] name = "jobserver" version = "0.1.32" @@ -2336,7 +2519,7 @@ dependencies = [ "proc-macro2", "quote", "regex-syntax 0.8.4", - "syn", + "syn 2.0.72", ] [[package]] @@ -2375,6 +2558,26 @@ dependencies = [ "linked-hash-map", ] +[[package]] +name = "lz4" +version = "1.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "958b4caa893816eea05507c20cfe47574a43d9a697138a7872990bba8a0ece68" +dependencies = [ + "libc", + "lz4-sys", +] + +[[package]] +name = "lz4-sys" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "109de74d5d2353660401699a4174a4ff23fcc649caf553df71933c7fb45ad868" +dependencies = [ + "cc", + "libc", +] + [[package]] name = "match_cfg" version = "0.1.0" @@ -2429,7 +2632,7 @@ dependencies = [ "proc-macro2", "quote", "structmeta", - "syn", + "syn 2.0.72", ] [[package]] @@ -2465,6 +2668,15 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +[[package]] +name = "memmap2" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f49388d20533534cd19360ad3d6a7dadc885944aa802ba3995040c5ec11288c6" +dependencies = [ + "libc", +] + [[package]] name = "mime" version = "0.3.17" @@ -2517,6 +2729,28 @@ version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c9be0862c1b3f26a88803c4a49de6889c10e608b3ee9344e6ef5b45fb37ad3d1" +[[package]] +name = "multiversion" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4851161a11d3ad0bf9402d90ffc3967bf231768bfd7aeb61755ad06dbf1a142" +dependencies = [ + "multiversion-macros", + "target-features", +] + +[[package]] +name = "multiversion-macros" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79a74ddee9e0c27d2578323c13905793e91622148f138ba29738f9dddb835e90" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", + "target-features", +] + [[package]] name = "mutually_exclusive_features" version = "0.0.3" @@ -2577,6 +2811,7 @@ dependencies = [ "meilisearch-sdk", "oauth2", "octocrab", + "polars", "pretty_assertions", "rand", "regex", @@ -2635,6 +2870,24 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0676bb32a98c1a483ce53e500a81ad9c3d5b3f7c920c28c24e9cb0980d0b5bc8" +[[package]] +name = "now" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d89e9874397a1f0a52fc1f197a8effd9735223cb2390e9dcc83ac6cd02923d0" +dependencies = [ + "chrono", +] + +[[package]] +name = "ntapi" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4" +dependencies = [ + "winapi", +] + [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -2709,7 +2962,7 @@ checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.72", ] [[package]] @@ -2847,7 +3100,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.72", ] [[package]] @@ -2954,6 +3207,16 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "parquet-format-safe" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1131c54b167dd4e4799ce762e1ab01549ebb94d5bdd13e6ec1b467491c378e1f" +dependencies = [ + "async-trait", + "futures", +] + [[package]] name = "parse-display" version = "0.9.1" @@ -2976,7 +3239,16 @@ dependencies = [ "regex", "regex-syntax 0.8.4", "structmeta", - "syn", + "syn 2.0.72", +] + +[[package]] +name = "parse-zoneinfo" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f2a05b18d44e2957b88f96ba460715e295bc1d7510468a2f3d3b44535d26c24" +dependencies = [ + "regex", ] [[package]] @@ -3041,7 +3313,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn", + "syn 2.0.72", ] [[package]] @@ -3055,6 +3327,44 @@ dependencies = [ "sha2", ] +[[package]] +name = "phf" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_codegen" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" +dependencies = [ + "phf_shared", + "rand", +] + +[[package]] +name = "phf_shared" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" +dependencies = [ + "siphasher", +] + [[package]] name = "pin-project" version = "1.1.5" @@ -3072,7 +3382,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.72", ] [[package]] @@ -3114,6 +3424,15 @@ version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" +[[package]] +name = "planus" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc1691dd09e82f428ce8d6310bd6d5da2557c82ff17694d2a32cad7242aea89f" +dependencies = [ + "array-init-cursor", +] + [[package]] name = "png" version = "0.17.13" @@ -3128,34 +3447,444 @@ dependencies = [ ] [[package]] -name = "portable-atomic" -version = "1.7.0" +name = "polars" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da544ee218f0d287a911e9c99a39a8c9bc8fcad3cb8db5959940044ecfc67265" +checksum = "8e3351ea4570e54cd556e6755b78fe7a2c85368d820c0307cca73c96e796a7ba" +dependencies = [ + "getrandom", + "polars-arrow", + "polars-core", + "polars-error", + "polars-io", + "polars-lazy", + "polars-ops", + "polars-parquet", + "polars-sql", + "polars-time", + "polars-utils", + "version_check", +] [[package]] -name = "powerfmt" -version = "0.2.0" +name = "polars-arrow" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" +checksum = "ba65fc4bcabbd64fca01fd30e759f8b2043f0963c57619e331d4b534576c0b47" +dependencies = [ + "ahash 0.8.11", + "atoi", + "atoi_simd", + "bytemuck", + "chrono", + "chrono-tz", + "dyn-clone", + "either", + "ethnum", + "fast-float", + "foreign_vec", + "futures", + "getrandom", + "hashbrown 0.14.5", + "itoa", + "itoap", + "lz4", + "multiversion", + "num-traits", + "polars-arrow-format", + "polars-error", + "polars-utils", + "ryu", + "simdutf8", + "streaming-iterator", + "strength_reduce", + "version_check", + "zstd", +] [[package]] -name = "ppv-lite86" -version = "0.2.20" +name = "polars-arrow-format" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +checksum = "19b0ef2474af9396b19025b189d96e992311e6a47f90c53cd998b36c4c64b84c" dependencies = [ - "zerocopy", + "planus", + "serde", ] [[package]] -name = "pretty_assertions" -version = "1.4.0" +name = "polars-compute" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66" +checksum = "9f099516af30ac9ae4b4480f4ad02aa017d624f2f37b7a16ad4e9ba52f7e5269" dependencies = [ - "diff", - "yansi", + "bytemuck", + "either", + "num-traits", + "polars-arrow", + "polars-error", + "polars-utils", + "strength_reduce", + "version_check", +] + +[[package]] +name = "polars-core" +version = "0.41.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2439484be228b8c302328e2f953e64cfd93930636e5c7ceed90339ece7fef6c" +dependencies = [ + "ahash 0.8.11", + "bitflags 2.6.0", + "bytemuck", + "chrono", + "chrono-tz", + "comfy-table", + "either", + "hashbrown 0.14.5", + "indexmap 2.3.0", + "num-traits", + "once_cell", + "polars-arrow", + "polars-compute", + "polars-error", + "polars-row", + "polars-utils", + "rand", + "rand_distr", + "rayon", + "regex", + "smartstring", + "thiserror", + "version_check", + "xxhash-rust", +] + +[[package]] +name = "polars-error" +version = "0.41.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c9b06dfbe79cabe50a7f0a90396864b5ee2c0e0f8d6a9353b2343c29c56e937" +dependencies = [ + "polars-arrow-format", + "regex", + "simdutf8", + "thiserror", +] + +[[package]] +name = "polars-expr" +version = "0.41.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c630385a56a867c410a20f30772d088f90ec3d004864562b84250b35268f97" +dependencies = [ + "ahash 0.8.11", + "bitflags 2.6.0", + "once_cell", + "polars-arrow", + "polars-core", + "polars-io", + "polars-ops", + "polars-plan", + "polars-time", + "polars-utils", + "rayon", + "smartstring", +] + +[[package]] +name = "polars-io" +version = "0.41.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d7363cd14e4696a28b334a56bd11013ff49cc96064818ab3f91a126e453462d" +dependencies = [ + "ahash 0.8.11", + "async-trait", + "atoi_simd", + "bytes", + "chrono", + "fast-float", + "futures", + "home", + "itoa", + "memchr", + "memmap2", + "num-traits", + "once_cell", + "percent-encoding", + "polars-arrow", + "polars-core", + "polars-error", + "polars-parquet", + "polars-time", + "polars-utils", + "rayon", + "regex", + "ryu", + "simdutf8", + "smartstring", + "tokio", + "tokio-util", +] + +[[package]] +name = "polars-lazy" +version = "0.41.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03877e74e42b5340ae52ded705f6d5d14563d90554c9177b01b91ed2412a56ed" +dependencies = [ + "ahash 0.8.11", + "bitflags 2.6.0", + "glob", + "memchr", + "once_cell", + "polars-arrow", + "polars-core", + "polars-expr", + "polars-io", + "polars-mem-engine", + "polars-ops", + "polars-pipe", + "polars-plan", + "polars-time", + "polars-utils", + "rayon", + "smartstring", + "version_check", +] + +[[package]] +name = "polars-mem-engine" +version = "0.41.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dea9e17771af750c94bf959885e4b3f5b14149576c62ef3ec1c9ef5827b2a30f" +dependencies = [ + "polars-arrow", + "polars-core", + "polars-error", + "polars-expr", + "polars-io", + "polars-ops", + "polars-plan", + "polars-time", + "polars-utils", + "rayon", +] + +[[package]] +name = "polars-ops" +version = "0.41.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6066552eb577d43b307027fb38096910b643ffb2c89a21628c7e41caf57848d0" +dependencies = [ + "ahash 0.8.11", + "argminmax", + "base64 0.22.1", + "bytemuck", + "chrono", + "chrono-tz", + "either", + "hashbrown 0.14.5", + "hex", + "indexmap 2.3.0", + "memchr", + "num-traits", + "polars-arrow", + "polars-compute", + "polars-core", + "polars-error", + "polars-utils", + "rayon", + "regex", + "smartstring", + "unicode-reverse", + "version_check", +] + +[[package]] +name = "polars-parquet" +version = "0.41.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b35b2592a2e7ef7ce9942dc2120dc4576142626c0e661668e4c6b805042e461" +dependencies = [ + "ahash 0.8.11", + "async-stream", + "base64 0.22.1", + "brotli 5.0.0", + "ethnum", + "flate2", + "futures", + "lz4", + "num-traits", + "parquet-format-safe", + "polars-arrow", + "polars-compute", + "polars-error", + "polars-utils", + "simdutf8", + "snap", + "streaming-decompression", + "zstd", +] + +[[package]] +name = "polars-pipe" +version = "0.41.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "021bce7768c330687d735340395a77453aa18dd70d57c184cbb302311e87c1b9" +dependencies = [ + "crossbeam-channel", + "crossbeam-queue", + "enum_dispatch", + "hashbrown 0.14.5", + "num-traits", + "polars-arrow", + "polars-compute", + "polars-core", + "polars-expr", + "polars-io", + "polars-ops", + "polars-plan", + "polars-row", + "polars-utils", + "rayon", + "smartstring", + "uuid", + "version_check", +] + +[[package]] +name = "polars-plan" +version = "0.41.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "220d0d7c02d1c4375802b2813dbedcd1a184df39c43b74689e729ede8d5c2921" +dependencies = [ + "ahash 0.8.11", + "bytemuck", + "chrono-tz", + "either", + "hashbrown 0.14.5", + "once_cell", + "percent-encoding", + "polars-arrow", + "polars-core", + "polars-io", + "polars-ops", + "polars-parquet", + "polars-time", + "polars-utils", + "rayon", + "recursive", + "regex", + "smartstring", + "strum_macros", + "version_check", +] + +[[package]] +name = "polars-row" +version = "0.41.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1d70d87a2882a64a43b431aea1329cb9a2c4100547c95c417cc426bb82408b3" +dependencies = [ + "bytemuck", + "polars-arrow", + "polars-error", + "polars-utils", +] + +[[package]] +name = "polars-sql" +version = "0.41.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6fc1c9b778862f09f4a347f768dfdd3d0ba9957499d306d83c7103e0fa8dc5b" +dependencies = [ + "hex", + "once_cell", + "polars-arrow", + "polars-core", + "polars-error", + "polars-lazy", + "polars-ops", + "polars-plan", + "polars-time", + "rand", + "serde", + "serde_json", + "sqlparser", +] + +[[package]] +name = "polars-time" +version = "0.41.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "179f98313a15c0bfdbc8cc0f1d3076d08d567485b9952d46439f94fbc3085df5" +dependencies = [ + "atoi", + "bytemuck", + "chrono", + "chrono-tz", + "now", + "once_cell", + "polars-arrow", + "polars-core", + "polars-error", + "polars-ops", + "polars-utils", + "regex", + "smartstring", +] + +[[package]] +name = "polars-utils" +version = "0.41.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53e6dd89fcccb1ec1a62f752c9a9f2d482a85e9255153f46efecc617b4996d50" +dependencies = [ + "ahash 0.8.11", + "bytemuck", + "hashbrown 0.14.5", + "indexmap 2.3.0", + "num-traits", + "once_cell", + "polars-error", + "raw-cpuid", + "rayon", + "smartstring", + "stacker", + "sysinfo", + "version_check", +] + +[[package]] +name = "portable-atomic" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da544ee218f0d287a911e9c99a39a8c9bc8fcad3cb8db5959940044ecfc67265" + +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + +[[package]] +name = "ppv-lite86" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "pretty_assertions" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66" +dependencies = [ + "diff", + "yansi", ] [[package]] @@ -3165,7 +3894,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f12335488a2f3b0a83b14edad48dca9879ce89b2edd10e80237e4e852dd645e" dependencies = [ "proc-macro2", - "syn", + "syn 2.0.72", ] [[package]] @@ -3193,7 +3922,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8021cf59c8ec9c432cfc2526ac6b8aa508ecaf29cd415f271b8406c1b851c3fd" dependencies = [ "quote", - "syn", + "syn 2.0.72", ] [[package]] @@ -3210,6 +3939,15 @@ dependencies = [ "thiserror", ] +[[package]] +name = "psm" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874" +dependencies = [ + "cc", +] + [[package]] name = "qoi" version = "0.4.1" @@ -3425,6 +4163,26 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "recursive" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" +dependencies = [ + "recursive-proc-macro-impl", + "stacker", +] + +[[package]] +name = "recursive-proc-macro-impl" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" +dependencies = [ + "quote", + "syn 2.0.72", +] + [[package]] name = "redox_syscall" version = "0.2.16" @@ -3819,6 +4577,12 @@ dependencies = [ "untrusted", ] +[[package]] +name = "rustversion" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" + [[package]] name = "ryu" version = "1.0.18" @@ -4037,7 +4801,7 @@ checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.72", ] [[package]] @@ -4070,7 +4834,7 @@ checksum = "6c64451ba24fc7a6a2d60fc75dd9c83c90903b19028d4eff35e88fc1e86564e9" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.72", ] [[package]] @@ -4121,7 +4885,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn", + "syn 2.0.72", ] [[package]] @@ -4231,6 +4995,12 @@ dependencies = [ "quote", ] +[[package]] +name = "simdutf8" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a" + [[package]] name = "similar" version = "2.6.0" @@ -4249,6 +5019,12 @@ dependencies = [ "time", ] +[[package]] +name = "siphasher" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" + [[package]] name = "slab" version = "0.4.9" @@ -4283,6 +5059,17 @@ dependencies = [ "serde", ] +[[package]] +name = "smartstring" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fb72c633efbaa2dd666986505016c32c3044395ceaf881518399d2f4127ee29" +dependencies = [ + "autocfg", + "static_assertions", + "version_check", +] + [[package]] name = "snafu" version = "0.8.4" @@ -4301,9 +5088,15 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn", + "syn 2.0.72", ] +[[package]] +name = "snap" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" + [[package]] name = "socket2" version = "0.5.7" @@ -4352,6 +5145,15 @@ dependencies = [ "unicode_categories", ] +[[package]] +name = "sqlparser" +version = "0.47.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "295e9930cd7a97e58ca2a070541a3ca502b17f5d1fa7157376d0fabd85324f25" +dependencies = [ + "log", +] + [[package]] name = "sqlx" version = "0.8.0" @@ -4418,7 +5220,7 @@ dependencies = [ "quote", "sqlx-core", "sqlx-macros-core", - "syn", + "syn 2.0.72", ] [[package]] @@ -4441,7 +5243,7 @@ dependencies = [ "sqlx-mysql", "sqlx-postgres", "sqlx-sqlite", - "syn", + "syn 2.0.72", "tempfile", "tokio", "url", @@ -4553,6 +5355,46 @@ dependencies = [ "url", ] +[[package]] +name = "stacker" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "winapi", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "streaming-decompression" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf6cc3b19bfb128a8ad11026086e31d3ce9ad23f8ea37354b31383a187c44cf3" +dependencies = [ + "fallible-streaming-iterator", +] + +[[package]] +name = "streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" + +[[package]] +name = "strength_reduce" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe895eb47f22e2ddd4dabc02bce419d2e643c8e3b585c78158b349195bc24d82" + [[package]] name = "strfmt" version = "0.2.4" @@ -4585,7 +5427,7 @@ dependencies = [ "proc-macro2", "quote", "structmeta-derive", - "syn", + "syn 2.0.72", ] [[package]] @@ -4596,7 +5438,26 @@ checksum = "152a0b65a590ff6c3da95cabe2353ee04e6167c896b28e3b14478c2636c922fc" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.72", +] + +[[package]] +name = "strum" +version = "0.26.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" + +[[package]] +name = "strum_macros" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.72", ] [[package]] @@ -4605,6 +5466,17 @@ version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "syn" version = "2.0.72" @@ -4628,6 +5500,20 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" +[[package]] +name = "sysinfo" +version = "0.30.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a5b4ddaee55fb2bea2bf0e5000747e5f5c0de765e5a5ff87f4cd106439f4bb3" +dependencies = [ + "cfg-if", + "core-foundation-sys", + "libc", + "ntapi", + "once_cell", + "windows", +] + [[package]] name = "system-configuration" version = "0.5.1" @@ -4662,6 +5548,12 @@ dependencies = [ "version-compare", ] +[[package]] +name = "target-features" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1bbb9f3c5c463a01705937a24fdabc5047929ac764b2d5b9cf681c1f5041ed5" + [[package]] name = "target-lexicon" version = "0.12.16" @@ -4738,7 +5630,7 @@ checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.72", ] [[package]] @@ -4834,7 +5726,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.72", ] [[package]] @@ -5016,7 +5908,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.72", ] [[package]] @@ -5091,7 +5983,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04659ddb06c87d233c566112c1c9c5b9e98256d9af50ec3bc9c8327f873a7568" dependencies = [ "quote", - "syn", + "syn 2.0.72", ] [[package]] @@ -5154,6 +6046,15 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e4259d9d4425d9f0661581b804cb85fe66a4c631cadd8f490d1c13a35d5d9291" +[[package]] +name = "unicode-reverse" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b6f4888ebc23094adfb574fdca9fdc891826287a6397d2cd28802ffd6f20c76" +dependencies = [ + "unicode-segmentation", +] + [[package]] name = "unicode-segmentation" version = "1.11.0" @@ -5311,7 +6212,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn", + "syn 2.0.72", "wasm-bindgen-shared", ] @@ -5345,7 +6246,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.72", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -5657,6 +6558,12 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "xxhash-rust" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a5cbf750400958819fb6178eaa83bee5cd9c29a26a40cc241df8c70fdd46984" + [[package]] name = "yansi" version = "0.5.1" @@ -5692,7 +6599,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.72", ] [[package]] @@ -5712,7 +6619,7 @@ checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.72", ] [[package]] diff --git a/server/main-api/Cargo.toml b/server/main-api/Cargo.toml index 4d9aa5778..30cbc7e05 100644 --- a/server/main-api/Cargo.toml +++ b/server/main-api/Cargo.toml @@ -69,6 +69,8 @@ actix-governor = { version = "0.5.0", features = ["logger"] } tempfile = "3.10.1" base64 = "0.22.1" time = "0.3.36" +polars = { version = "0.41.3", features = ["parquet", "dtype-struct"] } +#polars = { git = "https://github.com/CommanderStorm/polars.git", branch = "serialisation-experiment", features = ["parquet", "serde", "dtype-full"] } [dev-dependencies] insta = { version = "1.39.0", features = ["yaml", "json", "redactions"] } diff --git a/server/main-api/src/setup/database/alias.rs b/server/main-api/src/setup/database/alias.rs index 02726788f..047c1cf07 100644 --- a/server/main-api/src/setup/database/alias.rs +++ b/server/main-api/src/setup/database/alias.rs @@ -1,6 +1,7 @@ -use serde::Deserialize; - use crate::limited::vec::LimitedVec; +use polars::prelude::*; +use std::io::Write; +use tempfile::tempfile; #[derive(Debug)] pub(super) struct Alias { @@ -10,76 +11,6 @@ pub(super) struct Alias { visible_id: String, } -#[derive(Debug, Deserialize)] -struct AliasData { - id: String, - visible_id: Option, - aliases: Vec, - r#type: String, // what we display in the url -} -struct AliasIterator { - data: AliasData, - state: AliasIteratorState, -} -#[derive(Debug, Default, Copy, Clone, PartialEq, Eq)] -enum AliasIteratorState { - #[default] - Key, - VisibleId, - Alias(usize), - Done, -} -impl AliasIteratorState { - fn next_state(&mut self) -> Self { - match self { - Self::Key => Self::VisibleId, - Self::VisibleId => Self::Alias(0), - Self::Alias(i) => Self::Alias(*i + 1), - Self::Done => Self::Done, - } - } -} - -impl From for AliasIterator { - fn from(alias_data: AliasData) -> Self { - Self { - data: alias_data, - state: AliasIteratorState::default(), - } - } -} -impl Iterator for AliasIterator { - type Item = Alias; - fn next(&mut self) -> Option { - use AliasIteratorState as State; - let visible_id = self.data.visible_id.clone().unwrap_or(self.data.id.clone()); - let alias_len = self.data.aliases.len(); - let state = self.state; - self.state = self.state.next_state(); - match state { - State::Key => Some(Alias { - alias: self.data.id.clone(), - key: self.data.id.clone(), - r#type: self.data.r#type.clone(), - visible_id, - }), - State::VisibleId => Some(Alias { - alias: visible_id.clone(), - key: self.data.id.clone(), - r#type: self.data.r#type.clone(), - visible_id, - }), - State::Alias(index) if index < alias_len => Some(Alias { - alias: self.data.aliases[index].clone(), - key: self.data.id.clone(), - r#type: self.data.r#type.clone(), - visible_id, - }), - State::Alias(_) | State::Done => None, - } - } -} - impl Alias { async fn store( self, @@ -102,24 +33,74 @@ impl Alias { } } #[tracing::instrument] -pub async fn download_updates( - keys_which_need_updating: &LimitedVec, -) -> Result, crate::BoxedError> { +pub async fn download_updates() -> Result, crate::BoxedError> { let cdn_url = std::env::var("CDN_URL").unwrap_or_else(|_| "https://nav.tum.de/cdn".to_string()); - let aliase = reqwest::get(format!("{cdn_url}/api_data.json")) + let body = reqwest::get(format!("{cdn_url}/api_data.parquet")) .await? - .json::>() - .await? - .into_iter() - .filter(|d| { - keys_which_need_updating.is_empty() || keys_which_need_updating.0.contains(&d.id) - }) - .map(AliasIterator::from); - Ok(LimitedVec( - aliase - .flat_map(IntoIterator::into_iter) - .collect::>(), - )) + .error_for_status()? + .bytes() + .await?; + let mut aliase = Vec::::new(); + let mut file = tempfile()?; + file.write_all(&body)?; + let df = ParquetReader::new(&mut file) + .with_columns(Some(vec![ + "id".to_string(), + "type".to_string(), + "visible_id".to_string(), + "aliases".to_string(), + ])) + .finish() + .unwrap(); + let id_col = df.column("id")?.str()?; + let type_col = df.column("type")?.str()?; + let visible_id_col = df.column("visible_id")?.str()?; + for index in 0..id_col.len() { + let id = id_col.get(index).unwrap(); + let r#type = type_col.get(index).unwrap(); + let visible_id = visible_id_col.get(index); + let visible_id = match visible_id { + Some(v) => v.to_string(), + None => id.to_string(), + }; + aliase.push(Alias { + alias: id.to_string(), + key: id.to_string(), + r#type: r#type.to_string(), + visible_id: visible_id.clone(), + }); + aliase.push(Alias { + alias: visible_id.clone(), + key: id.to_string(), + r#type: r#type.to_string(), + visible_id: visible_id.clone(), + }); + } + + let df_expanded = df.explode(["aliases"])?; + let mask = df_expanded.column("aliases")?.is_not_null(); + let df_expanded = df_expanded.filter(&mask)?; + let id_col = df_expanded.column("id")?.str()?; + let type_col = df_expanded.column("type")?.str()?; + let visible_id_col = df_expanded.column("visible_id")?.str()?; + let aliases_col = df_expanded.column("aliases")?.str()?; + for index in 0..id_col.len() { + let alias = aliases_col.get(index).unwrap(); + let id = id_col.get(index).unwrap(); + let r#type = type_col.get(index).unwrap(); + let visible_id = visible_id_col.get(index); + let visible_id = match visible_id { + Some(v) => v.to_string(), + None => id.to_string(), + }; + aliase.push(Alias { + alias: alias.to_string(), + key: id.to_string(), + r#type: r#type.to_string(), + visible_id, + }); + } + Ok(LimitedVec(aliase)) } #[tracing::instrument(skip(tx))] pub async fn load_all_to_db( diff --git a/server/main-api/src/setup/database/data.rs b/server/main-api/src/setup/database/data.rs index 2e07bbc65..789922091 100644 --- a/server/main-api/src/setup/database/data.rs +++ b/server/main-api/src/setup/database/data.rs @@ -1,10 +1,12 @@ +use crate::limited::vec::LimitedVec; +use polars::prelude::ParquetReader; +use polars::prelude::*; +use serde_json::Value; use std::collections::HashMap; use std::fmt; use std::hash::{Hash, Hasher}; - -use serde_json::Value; - -use crate::limited::vec::LimitedVec; +use std::io::Write; +use tempfile::tempfile; #[derive(Clone)] pub(super) struct DelocalisedValues { @@ -148,11 +150,22 @@ pub(super) async fn load_all_to_db( Ok(()) } #[tracing::instrument] -pub async fn download_status() -> Result, crate::BoxedError> { +pub async fn download_status() -> Result<(LimitedVec, LimitedVec), crate::BoxedError> { let cdn_url = std::env::var("CDN_URL").unwrap_or_else(|_| "https://nav.tum.de/cdn".to_string()); - let tasks = reqwest::get(format!("{cdn_url}/status_data.json")) + let body = reqwest::get(format!("{cdn_url}/status_data.parquet")) .await? - .json::>() + .error_for_status()? + .bytes() .await?; - Ok(LimitedVec(tasks)) + let mut file = tempfile()?; + file.write_all(&body)?; + let df = ParquetReader::new(&mut file).finish().unwrap(); + let id_col = Vec::from(df.column("id")?.str()?); + let id_col = id_col + .into_iter() + .filter_map(|s| s.map(String::from)) + .collect(); + let hash_col = Vec::from(df.column("hash")?.i64()?); + let hash_col = hash_col.into_iter().flatten().collect(); + Ok((LimitedVec(id_col), LimitedVec(hash_col))) } diff --git a/server/main-api/src/setup/database/mod.rs b/server/main-api/src/setup/database/mod.rs index 4b9c80629..aa763b4e1 100644 --- a/server/main-api/src/setup/database/mod.rs +++ b/server/main-api/src/setup/database/mod.rs @@ -14,16 +14,7 @@ pub async fn setup(pool: &sqlx::PgPool) -> Result<(), crate::BoxedError> { } #[tracing::instrument(skip(pool))] pub async fn load_data(pool: &sqlx::PgPool) -> Result<(), crate::BoxedError> { - let status = data::download_status().await?.0; - let new_keys = status - .clone() - .into_iter() - .map(|(k, _)| k) - .collect::>(); - let new_hashes = status - .into_iter() - .map(|(_, h)| h) - .collect::>(); + let (new_keys, new_hashes) = data::download_status().await?; { let _ = info_span!("deleting old data").enter(); let mut tx = pool.begin().await?; @@ -39,10 +30,8 @@ pub async fn load_data(pool: &sqlx::PgPool) -> Result<(), crate::BoxedError> { data::load_all_to_db(data, &mut tx).await?; tx.commit().await?; } - - if !keys_which_need_updating.is_empty() { - let _ = info_span!("loading new aliases").enter(); - let aliases = alias::download_updates(&keys_which_need_updating).await?; + { + let aliases = alias::download_updates().await?; let mut tx = pool.begin().await?; alias::load_all_to_db(aliases, &mut tx).await?; tx.commit().await?; diff --git a/server/main-api/src/setup/meilisearch.rs b/server/main-api/src/setup/meilisearch.rs index c5673998a..d46927b91 100644 --- a/server/main-api/src/setup/meilisearch.rs +++ b/server/main-api/src/setup/meilisearch.rs @@ -105,6 +105,7 @@ pub async fn load_data(client: &Client) -> Result<(), crate::BoxedError> { let cdn_url = std::env::var("CDN_URL").unwrap_or_else(|_| "https://nav.tum.de/cdn".to_string()); let documents = reqwest::get(format!("{cdn_url}/search_data.json")) .await? + .error_for_status()? .json::>() .await?; let res = entries diff --git a/server/main-api/src/setup/transportation.rs b/server/main-api/src/setup/transportation.rs index a51c87cbd..ed47f95e9 100644 --- a/server/main-api/src/setup/transportation.rs +++ b/server/main-api/src/setup/transportation.rs @@ -54,7 +54,11 @@ impl DBStation { #[tracing::instrument(skip(pool))] pub async fn setup(pool: &sqlx::PgPool) -> Result<(), crate::BoxedError> { let url = "https://raw.githubusercontent.com/TUM-Dev/NavigaTUM/main/data/external/results/public_transport.json"; - let transportations: Vec = reqwest::get(url).await?.json().await?; + let transportations = reqwest::get(url) + .await? + .error_for_status()? + .json::>() + .await?; let transportations = transportations.into_iter().flat_map(|s| { let id = s.station.station_id.clone(); let mut stations = vec![DBStation::from_station(s.station, None)];