From 0543f03a661732609da9b788a4e8b78bb31232be Mon Sep 17 00:00:00 2001 From: Frank Elsinga Date: Sun, 12 Nov 2023 20:19:06 +0100 Subject: [PATCH] tmp --- ...b53f88c6c86d53816936311bd26142e000306.json | 18 +++ ...aa5f62fea14e6071bc590546b3aec9274e713.json | 34 +++++ ...f91a95272231c6aac78c0207fddc062e8dd13.json | 16 ++ ...a83339d69e3e4c4cbe67b44b94871754cdf13.json | 144 ++++++++++++++++++ ...02c98ee5202b6d0dadd5be5c894d03b9e1354.json | 46 ++++++ ...88f82b10327b0ecca14f0954057ffc514629a.json | 14 ++ ...ed3c8f876f1974161d94dbd27646143b8411.json} | 4 +- server/Cargo.lock | 50 +----- server/calendar/Cargo.toml | 2 +- server/calendar/diesel.toml | 5 - server/calendar/src/calendar.rs | 68 +++++---- server/calendar/src/main.rs | 14 +- server/calendar/src/models.rs | 8 +- server/calendar/src/schema.rs | 39 ----- .../src/scrape_task/main_api_connector.rs | 62 ++++---- server/calendar/src/scrape_task/mod.rs | 34 ++--- .../tumonline_calendar_connector.rs | 52 ++++--- server/calendar/src/scraper.rs | 15 +- server/calendar/src/utils.rs | 6 +- server/main-api/src/setup/database/alias.rs | 3 +- 20 files changed, 429 insertions(+), 205 deletions(-) create mode 100644 server/.sqlx/query-0a72a5f6754447b2fa56208083ab53f88c6c86d53816936311bd26142e000306.json create mode 100644 server/.sqlx/query-16078c7014e218f54a65b8a3e0eaa5f62fea14e6071bc590546b3aec9274e713.json create mode 100644 server/.sqlx/query-334f5e3016bbe104ba8f360d61ef91a95272231c6aac78c0207fddc062e8dd13.json create mode 100644 server/.sqlx/query-7ad82242b6dc392f71b63b4513da83339d69e3e4c4cbe67b44b94871754cdf13.json create mode 100644 server/.sqlx/query-a975b979fb570d9cc18fb71425102c98ee5202b6d0dadd5be5c894d03b9e1354.json create mode 100644 server/.sqlx/query-f142eb3b0a7027d50af7f60df5288f82b10327b0ecca14f0954057ffc514629a.json rename server/.sqlx/{query-153c0940344b3504b9f03696b29d5138f3ebcdd7b2b41dad6bb698e7248b5f18.json => query-fe797b7bbbe890d6aeb9878dc793ed3c8f876f1974161d94dbd27646143b8411.json} (63%) delete mode 100644 server/calendar/diesel.toml delete mode 100644 server/calendar/src/schema.rs diff --git a/server/.sqlx/query-0a72a5f6754447b2fa56208083ab53f88c6c86d53816936311bd26142e000306.json b/server/.sqlx/query-0a72a5f6754447b2fa56208083ab53f88c6c86d53816936311bd26142e000306.json new file mode 100644 index 000000000..84129c14b --- /dev/null +++ b/server/.sqlx/query-0a72a5f6754447b2fa56208083ab53f88c6c86d53816936311bd26142e000306.json @@ -0,0 +1,18 @@ +{ + "db_name": "PostgreSQL", + "query": "\n INSERT INTO rooms(key,tumonline_org_id,tumonline_calendar_id,tumonline_room_id,last_scrape)\n VALUES ($1,$2,$3,$4,$5)\n ON CONFLICT (key) DO UPDATE SET\n tumonline_org_id=$2,\n tumonline_calendar_id=$3,\n tumonline_room_id=$4,\n last_scrape=$5", + "describe": { + "columns": [], + "parameters": { + "Left": [ + "Text", + "Int4", + "Int4", + "Int4", + "Timestamp" + ] + }, + "nullable": [] + }, + "hash": "0a72a5f6754447b2fa56208083ab53f88c6c86d53816936311bd26142e000306" +} diff --git a/server/.sqlx/query-16078c7014e218f54a65b8a3e0eaa5f62fea14e6071bc590546b3aec9274e713.json b/server/.sqlx/query-16078c7014e218f54a65b8a3e0eaa5f62fea14e6071bc590546b3aec9274e713.json new file mode 100644 index 000000000..3fa667766 --- /dev/null +++ b/server/.sqlx/query-16078c7014e218f54a65b8a3e0eaa5f62fea14e6071bc590546b3aec9274e713.json @@ -0,0 +1,34 @@ +{ + "db_name": "PostgreSQL", + "query": "\n INSERT INTO calendar(key, dtstart, dtend, dtstamp, event_id, event_title, single_event_id, single_event_type_id, single_event_type_name, event_type_id, event_type_name, course_type_name, course_type, course_code, course_semester_hours, group_id, xgroup, status_id, status, comment, last_scrape)\n VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21)\n ON CONFLICT (key, dtstart, dtend) DO UPDATE SET\n dtstamp=$4,\n event_id=$5,\n event_title=$6,\n single_event_id=$7,\n single_event_type_id=$8,\n single_event_type_name=$9,\n event_type_id=$10,\n event_type_name=$11,\n course_type_name=$12,\n course_type=$13,\n course_code=$14,\n course_semester_hours=$15,\n group_id=$16,\n xgroup=$17,\n status_id=$18,\n status=$19,\n comment=$20,\n last_scrape=$21", + "describe": { + "columns": [], + "parameters": { + "Left": [ + "Varchar", + "Timestamp", + "Timestamp", + "Timestamp", + "Int4", + "Text", + "Int4", + "Text", + "Text", + "Text", + "Text", + "Text", + "Text", + "Text", + "Int4", + "Text", + "Text", + "Text", + "Text", + "Text", + "Timestamp" + ] + }, + "nullable": [] + }, + "hash": "16078c7014e218f54a65b8a3e0eaa5f62fea14e6071bc590546b3aec9274e713" +} diff --git a/server/.sqlx/query-334f5e3016bbe104ba8f360d61ef91a95272231c6aac78c0207fddc062e8dd13.json b/server/.sqlx/query-334f5e3016bbe104ba8f360d61ef91a95272231c6aac78c0207fddc062e8dd13.json new file mode 100644 index 000000000..ed335486f --- /dev/null +++ b/server/.sqlx/query-334f5e3016bbe104ba8f360d61ef91a95272231c6aac78c0207fddc062e8dd13.json @@ -0,0 +1,16 @@ +{ + "db_name": "PostgreSQL", + "query": "DELETE FROM calendar WHERE dtstart > $1 AND dtend < $2 AND last_scrape < $3", + "describe": { + "columns": [], + "parameters": { + "Left": [ + "Timestamp", + "Timestamp", + "Timestamp" + ] + }, + "nullable": [] + }, + "hash": "334f5e3016bbe104ba8f360d61ef91a95272231c6aac78c0207fddc062e8dd13" +} diff --git a/server/.sqlx/query-7ad82242b6dc392f71b63b4513da83339d69e3e4c4cbe67b44b94871754cdf13.json b/server/.sqlx/query-7ad82242b6dc392f71b63b4513da83339d69e3e4c4cbe67b44b94871754cdf13.json new file mode 100644 index 000000000..e9d15fa1a --- /dev/null +++ b/server/.sqlx/query-7ad82242b6dc392f71b63b4513da83339d69e3e4c4cbe67b44b94871754cdf13.json @@ -0,0 +1,144 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT *\n FROM calendar\n WHERE key = $1 AND dtstart >= $2 AND dtend <= $3\n ORDER BY dtstart", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "key", + "type_info": "Varchar" + }, + { + "ordinal": 1, + "name": "dtstart", + "type_info": "Timestamp" + }, + { + "ordinal": 2, + "name": "dtend", + "type_info": "Timestamp" + }, + { + "ordinal": 3, + "name": "dtstamp", + "type_info": "Timestamp" + }, + { + "ordinal": 4, + "name": "event_id", + "type_info": "Int4" + }, + { + "ordinal": 5, + "name": "event_title", + "type_info": "Text" + }, + { + "ordinal": 6, + "name": "single_event_id", + "type_info": "Int4" + }, + { + "ordinal": 7, + "name": "single_event_type_id", + "type_info": "Text" + }, + { + "ordinal": 8, + "name": "single_event_type_name", + "type_info": "Text" + }, + { + "ordinal": 9, + "name": "event_type_id", + "type_info": "Text" + }, + { + "ordinal": 10, + "name": "event_type_name", + "type_info": "Text" + }, + { + "ordinal": 11, + "name": "course_type_name", + "type_info": "Text" + }, + { + "ordinal": 12, + "name": "course_type", + "type_info": "Text" + }, + { + "ordinal": 13, + "name": "course_code", + "type_info": "Text" + }, + { + "ordinal": 14, + "name": "course_semester_hours", + "type_info": "Int4" + }, + { + "ordinal": 15, + "name": "group_id", + "type_info": "Text" + }, + { + "ordinal": 16, + "name": "xgroup", + "type_info": "Text" + }, + { + "ordinal": 17, + "name": "status_id", + "type_info": "Text" + }, + { + "ordinal": 18, + "name": "status", + "type_info": "Text" + }, + { + "ordinal": 19, + "name": "comment", + "type_info": "Text" + }, + { + "ordinal": 20, + "name": "last_scrape", + "type_info": "Timestamp" + } + ], + "parameters": { + "Left": [ + "Text", + "Timestamp", + "Timestamp" + ] + }, + "nullable": [ + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + true, + true, + true, + true, + true, + true, + true, + false, + false, + false, + false + ] + }, + "hash": "7ad82242b6dc392f71b63b4513da83339d69e3e4c4cbe67b44b94871754cdf13" +} diff --git a/server/.sqlx/query-a975b979fb570d9cc18fb71425102c98ee5202b6d0dadd5be5c894d03b9e1354.json b/server/.sqlx/query-a975b979fb570d9cc18fb71425102c98ee5202b6d0dadd5be5c894d03b9e1354.json new file mode 100644 index 000000000..a9e71f898 --- /dev/null +++ b/server/.sqlx/query-a975b979fb570d9cc18fb71425102c98ee5202b6d0dadd5be5c894d03b9e1354.json @@ -0,0 +1,46 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT * FROM rooms WHERE key = $1", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "key", + "type_info": "Text" + }, + { + "ordinal": 1, + "name": "tumonline_org_id", + "type_info": "Int4" + }, + { + "ordinal": 2, + "name": "tumonline_calendar_id", + "type_info": "Int4" + }, + { + "ordinal": 3, + "name": "tumonline_room_id", + "type_info": "Int4" + }, + { + "ordinal": 4, + "name": "last_scrape", + "type_info": "Timestamp" + } + ], + "parameters": { + "Left": [ + "Text" + ] + }, + "nullable": [ + false, + false, + false, + false, + false + ] + }, + "hash": "a975b979fb570d9cc18fb71425102c98ee5202b6d0dadd5be5c894d03b9e1354" +} diff --git a/server/.sqlx/query-f142eb3b0a7027d50af7f60df5288f82b10327b0ecca14f0954057ffc514629a.json b/server/.sqlx/query-f142eb3b0a7027d50af7f60df5288f82b10327b0ecca14f0954057ffc514629a.json new file mode 100644 index 000000000..eddb287d7 --- /dev/null +++ b/server/.sqlx/query-f142eb3b0a7027d50af7f60df5288f82b10327b0ecca14f0954057ffc514629a.json @@ -0,0 +1,14 @@ +{ + "db_name": "PostgreSQL", + "query": "DELETE FROM rooms WHERE last_scrape < $1", + "describe": { + "columns": [], + "parameters": { + "Left": [ + "Timestamp" + ] + }, + "nullable": [] + }, + "hash": "f142eb3b0a7027d50af7f60df5288f82b10327b0ecca14f0954057ffc514629a" +} diff --git a/server/.sqlx/query-153c0940344b3504b9f03696b29d5138f3ebcdd7b2b41dad6bb698e7248b5f18.json b/server/.sqlx/query-fe797b7bbbe890d6aeb9878dc793ed3c8f876f1974161d94dbd27646143b8411.json similarity index 63% rename from server/.sqlx/query-153c0940344b3504b9f03696b29d5138f3ebcdd7b2b41dad6bb698e7248b5f18.json rename to server/.sqlx/query-fe797b7bbbe890d6aeb9878dc793ed3c8f876f1974161d94dbd27646143b8411.json index 237d747ca..839dde1d2 100644 --- a/server/.sqlx/query-153c0940344b3504b9f03696b29d5138f3ebcdd7b2b41dad6bb698e7248b5f18.json +++ b/server/.sqlx/query-fe797b7bbbe890d6aeb9878dc793ed3c8f876f1974161d94dbd27646143b8411.json @@ -1,6 +1,6 @@ { "db_name": "PostgreSQL", - "query": "INSERT INTO aliases (alias, key, type, visible_id)\n VALUES ($1, $2, $3, $4)\n ON CONFLICT (alias,key) DO UPDATE\n SET\n key = $2,\n type = $3,\n visible_id = $4", + "query": "INSERT INTO aliases (alias, key, type, visible_id)\n VALUES ($1, $2, $3, $4)\n ON CONFLICT (alias,key) DO UPDATE SET\n key = $2,\n type = $3,\n visible_id = $4", "describe": { "columns": [], "parameters": { @@ -13,5 +13,5 @@ }, "nullable": [] }, - "hash": "153c0940344b3504b9f03696b29d5138f3ebcdd7b2b41dad6bb698e7248b5f18" + "hash": "fe797b7bbbe890d6aeb9878dc793ed3c8f876f1974161d94dbd27646143b8411" } diff --git a/server/Cargo.lock b/server/Cargo.lock index 243090bc2..7fe9731db 100644 --- a/server/Cargo.lock +++ b/server/Cargo.lock @@ -807,41 +807,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "diesel" -version = "2.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2268a214a6f118fce1838edba3d1561cf0e78d8de785475957a580a7f8c69d33" -dependencies = [ - "bitflags 2.4.0", - "byteorder", - "chrono", - "diesel_derives", - "itoa", - "pq-sys", -] - -[[package]] -name = "diesel_derives" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e054665eaf6d97d1e7125512bb2d35d07c73ac86cc6920174cb42d1ab697a554" -dependencies = [ - "diesel_table_macro_syntax", - "proc-macro2", - "quote", - "syn 2.0.31", -] - -[[package]] -name = "diesel_table_macro_syntax" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc5557efc453706fed5e4fa85006fe9817c224c3f480a34c7e5959fd700921c5" -dependencies = [ - "syn 2.0.31", -] - [[package]] name = "diff" version = "0.1.13" @@ -1980,7 +1945,6 @@ dependencies = [ "actix-web", "actix-web-prom", "chrono", - "diesel", "futures", "lazy_static", "log", @@ -1993,6 +1957,7 @@ dependencies = [ "rustls", "serde", "serde_json", + "sqlx", "structured-logger", "tokio", ] @@ -2434,15 +2399,6 @@ version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" -[[package]] -name = "pq-sys" -version = "0.4.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31c0052426df997c0cbd30789eb44ca097e3541717a7b8fa36b1c464ee7edebd" -dependencies = [ - "vcpkg", -] - [[package]] name = "pretty_assertions" version = "1.4.0" @@ -3257,6 +3213,7 @@ dependencies = [ "atoi", "byteorder", "bytes", + "chrono", "crc", "crossbeam-queue", "dotenvy", @@ -3340,6 +3297,7 @@ dependencies = [ "bitflags 2.4.0", "byteorder", "bytes", + "chrono", "crc", "digest", "dotenvy", @@ -3381,6 +3339,7 @@ dependencies = [ "base64", "bitflags 2.4.0", "byteorder", + "chrono", "crc", "dotenvy", "etcetera", @@ -3417,6 +3376,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d59dc83cf45d89c555a577694534fcd1b55c545a816c816ce51f20bbe56a4f3f" dependencies = [ "atoi", + "chrono", "flume 0.11.0", "futures-channel", "futures-core", diff --git a/server/calendar/Cargo.toml b/server/calendar/Cargo.toml index a26fbf689..b9e94c097 100644 --- a/server/calendar/Cargo.toml +++ b/server/calendar/Cargo.toml @@ -31,7 +31,7 @@ serde_json.workspace = true chrono = { version = "0.4.31", default-features = false, features = ["serde"] } # database -diesel = { version = "2.1.3", default-features = false, features = ["default", "chrono", "postgres"] } +sqlx = { version = "0.7.2", features = ["postgres", "runtime-tokio-rustls", "migrate", "macros","chrono"] } # metrics lazy_static = "1.4.0" diff --git a/server/calendar/diesel.toml b/server/calendar/diesel.toml deleted file mode 100644 index 925e2ed30..000000000 --- a/server/calendar/diesel.toml +++ /dev/null @@ -1,5 +0,0 @@ -# For documentation on how to configure this file, -# see https://diesel.rs/guides/configuring-diesel-cli - -[print_schema] -file = "src/schema.rs" diff --git a/server/calendar/src/calendar.rs b/server/calendar/src/calendar.rs index 1d0dc6508..11b7da7c0 100644 --- a/server/calendar/src/calendar.rs +++ b/server/calendar/src/calendar.rs @@ -1,11 +1,10 @@ +use crate::models::Room; use crate::models::XMLEvent; -use crate::schema::rooms::dsl; -use crate::utils; use actix_web::{get, web, HttpResponse}; use chrono::NaiveDateTime; -use diesel::prelude::*; use log::error; use serde::{Deserialize, Serialize}; +use sqlx::PgPool; use std::fmt::Debug; #[derive(Deserialize, Debug)] @@ -14,45 +13,55 @@ pub struct QueryArguments { end: NaiveDateTime, // eg. 2022-01-07T00:00:00 } -fn get_room_information( +async fn get_room_information( requested_key: &str, - conn: &mut PgConnection, -) -> QueryResult<(String, NaiveDateTime)> { - let room = dsl::rooms - .filter(dsl::key.eq(requested_key)) - .first::(conn)?; - let calendar_url = format!( - "https://campus.tum.de/tumonline/wbKalender.wbRessource?pResNr={id}", - id = room.tumonline_calendar_id - ); - Ok((calendar_url, room.last_scrape)) + conn: &PgPool, +) -> Result, sqlx::Error> { + let room = sqlx::query_as!(Room, "SELECT * FROM rooms WHERE key = $1", requested_key) + .fetch_optional(conn) + .await?; + match room { + Some(r) => { + let calendar_url = format!( + "https://campus.tum.de/tumonline/wbKalender.wbRessource?pResNr={id}", + id = r.tumonline_calendar_id + ); + Ok(Some((calendar_url, r.last_scrape))) + } + None => Ok(None), + } } -fn get_entries( +async fn get_entries( requested_key: &str, args: &QueryArguments, - conn: &mut PgConnection, -) -> QueryResult> { - use crate::schema::calendar::dsl; - dsl::calendar - .filter(dsl::key.eq(&requested_key)) - .filter(dsl::dtstart.ge(&args.start)) - .filter(dsl::dtend.le(&args.end)) - .order(dsl::dtstart) - .load::(conn) + conn: &PgPool, +) -> Result, sqlx::Error> { + sqlx::query_as!( + XMLEvent, + r#"SELECT * + FROM calendar + WHERE key = $1 AND dtstart >= $2 AND dtend <= $3 + ORDER BY dtstart"#, + requested_key, + args.start, + args.end + ) + .fetch_all(conn) + .await } #[get("/api/calendar/{id}")] pub async fn calendar_handler( params: web::Path, web::Query(args): web::Query, + data: web::Data, ) -> HttpResponse { let id = params.into_inner(); - let conn = &mut utils::establish_connection(); - let results = get_entries(&id, &args, conn); - let room_information = get_room_information(&id, conn); + let results = get_entries(&id, &args, &data.db).await; + let room_information = get_room_information(&id, &data.db).await; match (results, room_information) { - (Ok(results), Ok((calendar_url, last_room_sync))) => { + (Ok(results), Ok(Some((calendar_url, last_room_sync)))) => { let last_calendar_sync = results.iter().map(|e| e.last_scrape).min(); let events = results.into_iter().map(Event::from).collect(); HttpResponse::Ok().json(Events { @@ -61,6 +70,9 @@ pub async fn calendar_handler( calendar_url, }) } + (_, Ok(None)) => HttpResponse::NotFound() + .content_type("text/plain") + .body("Room not found"), (Err(e), _) => { error!("Error loading calendar entries: {e:?}"); HttpResponse::InternalServerError() diff --git a/server/calendar/src/main.rs b/server/calendar/src/main.rs index d9d978584..49578b123 100644 --- a/server/calendar/src/main.rs +++ b/server/calendar/src/main.rs @@ -1,16 +1,22 @@ mod calendar; mod models; -mod schema; mod utils; use actix_cors::Cors; use actix_web::{get, middleware, web, App, HttpResponse, HttpServer}; use actix_web_prom::PrometheusMetricsBuilder; +use sqlx::postgres::PgPoolOptions; +use sqlx::PgPool; use std::collections::HashMap; use std::error::Error; use structured_logger::async_json::new_writer; use structured_logger::Builder; +#[derive(Clone, Debug)] +pub struct AppData { + db: PgPool, +} + const MAX_JSON_PAYLOAD: usize = 1024 * 1024; // 1 MB #[get("/api/calendar/status")] @@ -29,6 +35,11 @@ async fn main() -> Result<(), Box> { Builder::with_level("info") .with_target_writer("*", new_writer(tokio::io::stdout())) .init(); + let uri = utils::connection_string(); + let pool = PgPoolOptions::new() + .max_connections(20) + .connect(&uri) + .await?; // metrics let labels = HashMap::from([( @@ -54,6 +65,7 @@ async fn main() -> Result<(), Box> { .wrap(middleware::Logger::default().exclude("/api/calendar/status")) .wrap(middleware::Compress::default()) .app_data(web::JsonConfig::default().limit(MAX_JSON_PAYLOAD)) + .app_data(web::Data::new(AppData { db: pool.clone() })) .service(health_status_handler) .service(calendar::calendar_handler) }) diff --git a/server/calendar/src/models.rs b/server/calendar/src/models.rs index 4646d613e..96bf997ba 100644 --- a/server/calendar/src/models.rs +++ b/server/calendar/src/models.rs @@ -1,9 +1,6 @@ use chrono::NaiveDateTime; -use diesel::prelude::*; -use diesel::Insertable; -#[derive(Insertable, Queryable, AsChangeset)] -#[diesel(table_name = crate::schema::calendar)] +#[derive(Clone, Debug)] pub struct XMLEvent { pub key: String, pub dtstart: NaiveDateTime, @@ -28,8 +25,7 @@ pub struct XMLEvent { pub last_scrape: NaiveDateTime, } -#[derive(Insertable, Queryable, AsChangeset, Clone)] -#[diesel(table_name = crate::schema::rooms)] +#[derive(Clone, Debug)] pub struct Room { pub key: String, pub tumonline_org_id: i32, diff --git a/server/calendar/src/schema.rs b/server/calendar/src/schema.rs deleted file mode 100644 index ab463f4fd..000000000 --- a/server/calendar/src/schema.rs +++ /dev/null @@ -1,39 +0,0 @@ -// @generated automatically by Diesel CLI. - -diesel::table! { - calendar (single_event_id) { - key -> Varchar, - dtstart -> Timestamp, - dtend -> Timestamp, - dtstamp -> Timestamp, - event_id -> Int4, - event_title -> Text, - single_event_id -> Int4, - single_event_type_id -> Text, - single_event_type_name -> Text, - event_type_id -> Text, - event_type_name -> Nullable, - course_type_name -> Nullable, - course_type -> Nullable, - course_code -> Nullable, - course_semester_hours -> Nullable, - group_id -> Nullable, - xgroup -> Nullable, - status_id -> Text, - status -> Text, - comment -> Text, - last_scrape -> Timestamp, - } -} - -diesel::table! { - rooms (key) { - key -> Text, - tumonline_org_id -> Int4, - tumonline_calendar_id -> Int4, - tumonline_room_id -> Int4, - last_scrape -> Timestamp, - } -} - -diesel::allow_tables_to_appear_in_same_query!(calendar, rooms,); diff --git a/server/calendar/src/scrape_task/main_api_connector.rs b/server/calendar/src/scrape_task/main_api_connector.rs index c6d6ac16e..3fb3b5352 100644 --- a/server/calendar/src/scrape_task/main_api_connector.rs +++ b/server/calendar/src/scrape_task/main_api_connector.rs @@ -1,8 +1,8 @@ use chrono::{NaiveDateTime, Utc}; -use diesel::PgConnection; use log::{error, info}; use regex::Regex; use serde::Deserialize; +use sqlx::PgPool; fn api_url_from_env() -> Option { let main_api_addr = std::env::var("CDN_SVC_SERVICE_HOST").ok()?; @@ -47,7 +47,7 @@ impl Room { } } -pub async fn get_all_ids() -> Vec { +pub async fn get_all_ids(conn: &PgPool) -> Vec { let url = api_url_from_env().unwrap_or_else(|| "https://nav.tum.de/cdn/api_data.json".to_string()); let res = reqwest::get(&url).await; @@ -63,43 +63,49 @@ pub async fn get_all_ids() -> Vec { Err(e) => panic!("Failed to parse main-api response: {e:#?}"), }; let start_time = Utc::now().naive_utc(); - let conn = &mut crate::utils::establish_connection(); - store_in_db(conn, &rooms, &start_time); - delete_stale_results(conn, start_time); + store_in_db(conn, &rooms, &start_time).await; + delete_stale_results(conn, start_time).await; rooms } -fn store_in_db(conn: &mut PgConnection, rooms_to_store: &[Room], start_time: &NaiveDateTime) { - use crate::schema::rooms::dsl; - use diesel::prelude::*; - info!("Storing {} rooms in database", rooms_to_store.len()); - rooms_to_store - .iter() - .map(|room| crate::models::Room { +async fn store_in_db(conn: &PgPool, rooms_to_store: &[Room], start_time: &NaiveDateTime) { + info!( + "Storing {cnt} rooms in database", + cnt = rooms_to_store.len() + ); + for room in rooms_to_store { + let room = crate::models::Room { key: room.sap_id.clone(), tumonline_org_id: room.tumonline_org_id, tumonline_calendar_id: room.tumonline_calendar_id, tumonline_room_id: room.tumonline_room_id, last_scrape: *start_time, - }) - .for_each(|room| { - let res = diesel::insert_into(dsl::rooms) - .values(&room) - .on_conflict(dsl::key) - .do_update() - .set(&room) - .execute(conn); - if let Err(e) = res { + }; + if let Err(e) =sqlx::query!(r#" + INSERT INTO rooms(key,tumonline_org_id,tumonline_calendar_id,tumonline_room_id,last_scrape) + VALUES ($1,$2,$3,$4,$5) + ON CONFLICT (key) DO UPDATE SET + tumonline_org_id=$2, + tumonline_calendar_id=$3, + tumonline_room_id=$4, + last_scrape=$5"#, + room.key, + room.tumonline_org_id, + room.tumonline_calendar_id, + room.tumonline_room_id, + room.last_scrape) + .execute(conn) + .await { error!("Error inserting into database: {e:?}"); } - }); + } } -fn delete_stale_results(conn: &mut PgConnection, start_time: NaiveDateTime) { - use crate::schema::rooms::dsl; - use diesel::prelude::*; +async fn delete_stale_results(conn: &PgPool, start_time: NaiveDateTime) { info!("Deleting stale rooms from the database"); - diesel::delete(dsl::rooms) - .filter(dsl::last_scrape.lt(start_time)) + if let Err(e) = sqlx::query!("DELETE FROM rooms WHERE last_scrape < $1", start_time) .execute(conn) - .expect("Failed to delete stale rooms"); + .await + { + error!("Error deleting stale rooms from database: {e:?}"); + } } diff --git a/server/calendar/src/scrape_task/mod.rs b/server/calendar/src/scrape_task/mod.rs index 81fbf5bac..a47efa256 100644 --- a/server/calendar/src/scrape_task/mod.rs +++ b/server/calendar/src/scrape_task/mod.rs @@ -5,14 +5,13 @@ pub mod tumonline_calendar_connector; use crate::scrape_task::main_api_connector::{get_all_ids, Room}; use crate::scrape_task::scrape_room_task::ScrapeRoomTask; use crate::scrape_task::tumonline_calendar_connector::{Strategy, XMLEvents}; -use crate::utils; use chrono::{DateTime, NaiveDate, Utc}; -use diesel::prelude::*; use futures::stream::FuturesUnordered; use futures::StreamExt; use lazy_static::lazy_static; use log::{info, warn}; use prometheus::{register_counter, register_histogram, Counter, Histogram}; +use sqlx::PgPool; use std::time::{Duration, Instant}; use tokio::time::sleep; lazy_static! { @@ -44,8 +43,8 @@ pub struct ScrapeTask { const CONCURRENT_REQUESTS: usize = 2; impl ScrapeTask { - pub async fn new(time_window: chrono::Duration) -> Self { - let rooms_to_scrape = get_all_ids().await; + pub async fn new(conn: &PgPool, time_window: chrono::Duration) -> Self { + let rooms_to_scrape = get_all_ids(conn).await; let rooms_cnt = rooms_to_scrape.len(); Self { rooms_to_scrape, @@ -55,7 +54,7 @@ impl ScrapeTask { } } - pub async fn scrape_to_db(&mut self) { + pub async fn scrape_to_db(&mut self, conn: &sqlx::PgPool) { info!("Starting scraping calendar entries"); let mut work_queue = FuturesUnordered::new(); let start = self.scraping_start - self.time_window / 2; @@ -66,7 +65,7 @@ impl ScrapeTask { // It is critical for successfully scraping that we are not blocked. sleep(Duration::from_millis(50)).await; - work_queue.push(scrape(room, start.date_naive(), self.time_window)); + work_queue.push(scrape(conn, room, start.date_naive(), self.time_window)); } } work_queue.next().await; @@ -92,20 +91,21 @@ impl ScrapeTask { (Utc::now() - self.scraping_start).to_std().unwrap() } - pub fn delete_stale_results(&self) { - use crate::schema::calendar::dsl; + pub async fn delete_stale_results(&self, conn: &PgPool) { let start_time = Instant::now(); let scrape_interval = ( self.scraping_start - self.time_window / 2, self.scraping_start + self.time_window / 2, ); - let conn = &mut utils::establish_connection(); - diesel::delete(dsl::calendar) - .filter(dsl::dtstart.gt(scrape_interval.0.naive_local())) - .filter(dsl::dtend.le(scrape_interval.1.naive_local())) - .filter(dsl::last_scrape.le(self.scraping_start.naive_local())) - .execute(conn) - .expect("Failed to delete calendar"); + sqlx::query!( + "DELETE FROM calendar WHERE dtstart > $1 AND dtend < $2 AND last_scrape < $3", + scrape_interval.0.naive_local(), + scrape_interval.1.naive_local(), + self.scraping_start.naive_local() + ) + .execute(conn) + .await + .expect("Failed to delete calendar"); info!( "Finished deleting stale results ({time_window} in {passed:?})", @@ -115,7 +115,7 @@ impl ScrapeTask { } } -async fn scrape(room: Room, from: NaiveDate, duration: chrono::Duration) { +async fn scrape(conn: &PgPool, room: Room, from: NaiveDate, duration: chrono::Duration) { let _timer = REQ_TIME_HISTOGRAM.start_timer(); // drop as observe // request and parse the xml file @@ -130,7 +130,7 @@ async fn scrape(room: Room, from: NaiveDate, duration: chrono::Duration) { match events { Ok(events) => { success_cnt += events.len(); - events.store_in_db(); + events.store_in_db(conn).await; } Err(retry) => match retry { Strategy::NoRetry => {} diff --git a/server/calendar/src/scrape_task/tumonline_calendar_connector.rs b/server/calendar/src/scrape_task/tumonline_calendar_connector.rs index c5dccad29..0495d7aff 100644 --- a/server/calendar/src/scrape_task/tumonline_calendar_connector.rs +++ b/server/calendar/src/scrape_task/tumonline_calendar_connector.rs @@ -1,12 +1,11 @@ use crate::models::XMLEvent; use crate::scrape_task::main_api_connector::Room; use crate::scrape_task::scrape_room_task::ScrapeRoomTask; -use crate::{schema, utils}; use chrono::{NaiveDateTime, Utc}; -use diesel::prelude::*; use log::{debug, error, warn}; use minidom::Element; use rand::Rng; +use sqlx::PgPool; use std::collections::HashMap; use std::time::Duration; use tokio::time::sleep; @@ -149,26 +148,35 @@ impl XMLEvents { pub(crate) fn len(&self) -> usize { self.events.len() } - pub(crate) fn store_in_db(self) -> bool { - let conn = &mut utils::establish_connection(); - use schema::calendar::dsl; - self.events - .iter() - .map(|event| { - diesel::insert_into(dsl::calendar) - .values(event) - .on_conflict(dsl::single_event_id) - .do_update() - .set(event) - .execute(conn) - }) - .all(|res| match res { - Ok(_) => true, - Err(e) => { - error!("Error inserting into database: {e:?}"); - false - } - }) + pub(crate) async fn store_in_db(self, conn: &PgPool) { + for event in self.events { + if let Err(e) = sqlx::query!(r#" + INSERT INTO calendar(key, dtstart, dtend, dtstamp, event_id, event_title, single_event_id, single_event_type_id, single_event_type_name, event_type_id, event_type_name, course_type_name, course_type, course_code, course_semester_hours, group_id, xgroup, status_id, status, comment, last_scrape) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21) + ON CONFLICT (key, dtstart, dtend) DO UPDATE SET + dtstamp=$4, + event_id=$5, + event_title=$6, + single_event_id=$7, + single_event_type_id=$8, + single_event_type_name=$9, + event_type_id=$10, + event_type_name=$11, + course_type_name=$12, + course_type=$13, + course_code=$14, + course_semester_hours=$15, + group_id=$16, + xgroup=$17, + status_id=$18, + status=$19, + comment=$20, + last_scrape=$21"#, + event.key, event.dtstart, event.dtend, event.dtstamp, event.event_id, event.event_title, event.single_event_id, event.single_event_type_id, event.single_event_type_name, event.event_type_id, event.event_type_name, event.course_type_name, event.course_type, event.course_code, event.course_semester_hours, event.group_id, event.xgroup, event.status_id, event.status, event.comment, event.last_scrape) + .execute(conn).await { + error!("#Error inserting into database: {e:?}"); + } + } } fn new(requested_room: &Room, body: &str) -> Option { let root = match body.parse::() { diff --git a/server/calendar/src/scraper.rs b/server/calendar/src/scraper.rs index c82d83d14..595d757fc 100644 --- a/server/calendar/src/scraper.rs +++ b/server/calendar/src/scraper.rs @@ -5,8 +5,9 @@ use structured_logger::{async_json::new_writer, Builder}; use std::fmt; use prometheus::labels; +use sqlx::postgres::PgPoolOptions; + mod models; -mod schema; mod scrape_task; mod utils; @@ -45,11 +46,17 @@ async fn main() { .with_target_writer("*", new_writer(tokio::io::stdout())) .init(); + let uri = utils::connection_string(); + let pool = PgPoolOptions::new() + .max_connections(20) + .connect(&uri) + .await + .expect("Failed to connect to database"); let time_window = TimeWindow::init_from_env(); info!("Scraping time window: {time_window:?}"); - let mut scraper = ScrapeTask::new(time_window.duration).await; - scraper.scrape_to_db().await; - scraper.delete_stale_results(); + let mut scraper = ScrapeTask::new(&pool, time_window.duration).await; + scraper.scrape_to_db(&pool).await; + scraper.delete_stale_results(&pool).await; info!("Pushing metrics to the pushgateway"); tokio::task::spawn_blocking(move || { diff --git a/server/calendar/src/utils.rs b/server/calendar/src/utils.rs index 3ca0a4b3c..017a6612c 100644 --- a/server/calendar/src/utils.rs +++ b/server/calendar/src/utils.rs @@ -1,11 +1,7 @@ -use diesel::{Connection, PgConnection}; -fn connection_string() -> String { +pub fn connection_string() -> String { let username = std::env::var("POSTGRES_USER").unwrap_or_else(|_| "postgres".to_string()); let password = std::env::var("POSTGRES_PASSWORD").unwrap_or_else(|_| "password".to_string()); let url = std::env::var("POSTGRES_URL").unwrap_or_else(|_| "localhost".to_string()); let db = std::env::var("POSTGRES_DB").unwrap_or_else(|_| username.clone()); format!("postgres://{username}:{password}@{url}/{db}") } -pub fn establish_connection() -> PgConnection { - PgConnection::establish(&connection_string()).expect("Cannot open database") -} diff --git a/server/main-api/src/setup/database/alias.rs b/server/main-api/src/setup/database/alias.rs index 511d57bda..a1628df58 100644 --- a/server/main-api/src/setup/database/alias.rs +++ b/server/main-api/src/setup/database/alias.rs @@ -89,8 +89,7 @@ impl Alias { sqlx::query!( r#"INSERT INTO aliases (alias, key, type, visible_id) VALUES ($1, $2, $3, $4) - ON CONFLICT (alias,key) DO UPDATE - SET + ON CONFLICT (alias,key) DO UPDATE SET key = $2, type = $3, visible_id = $4"#,