diff --git a/engine/Cargo.lock b/engine/Cargo.lock index fec03a4..c5ef6d9 100644 --- a/engine/Cargo.lock +++ b/engine/Cargo.lock @@ -566,6 +566,29 @@ dependencies = [ "typenum", ] +[[package]] +name = "cssparser" +version = "0.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c66d1cd8ed61bf80b38432613a7a2f09401ab8d0501110655f8b341484a3e3" +dependencies = [ + "cssparser-macros", + "dtoa-short", + "itoa", + "phf", + "smallvec", +] + +[[package]] +name = "cssparser-macros" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" +dependencies = [ + "quote", + "syn 2.0.89", +] + [[package]] name = "ctr" version = "0.9.2" @@ -596,7 +619,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.72", + "syn 2.0.89", ] [[package]] @@ -607,7 +630,7 @@ checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" dependencies = [ "darling_core", "quote", - "syn 2.0.72", + "syn 2.0.89", ] [[package]] @@ -647,7 +670,7 @@ dependencies = [ "proc-macro2", "quote", "rustc_version", - "syn 2.0.72", + "syn 2.0.89", ] [[package]] @@ -674,6 +697,27 @@ version = "0.15.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" +[[package]] +name = "dtoa" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcbb2bf8e87535c23f7a8a321e364ce21462d0ff10cb6407820e8e96dfff6653" + +[[package]] +name = "dtoa-short" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87" +dependencies = [ + "dtoa", +] + +[[package]] +name = "ego-tree" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c6ba7d4eec39eaa9ab24d44a0e73a7949a1095a8b3f3abb11eddf27dbb56a53" + [[package]] name = "either" version = "1.13.0" @@ -821,6 +865,16 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "futf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" +dependencies = [ + "mac", + "new_debug_unreachable", +] + [[package]] name = "futures-channel" version = "0.3.30" @@ -901,7 +955,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.89", ] [[package]] @@ -933,6 +987,15 @@ dependencies = [ "slab", ] +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + [[package]] name = "generic-array" version = "0.14.7" @@ -943,6 +1006,15 @@ dependencies = [ "version_check", ] +[[package]] +name = "getopts" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" +dependencies = [ + "unicode-width", +] + [[package]] name = "getrandom" version = "0.2.15" @@ -1104,6 +1176,20 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "html5ever" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e15626aaf9c351bc696217cbe29cb9b5e86c43f8a46b5e2f5c6c5cf7cb904ce" +dependencies = [ + "log", + "mac", + "markup5ever", + "proc-macro2", + "quote", + "syn 2.0.89", +] + [[package]] name = "http" version = "1.0.0" @@ -1422,6 +1508,26 @@ dependencies = [ "value-bag", ] +[[package]] +name = "mac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" + +[[package]] +name = "markup5ever" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82c88c6129bd24319e62a0359cb6b958fa7e8be6e19bb1663bc396b90883aca5" +dependencies = [ + "log", + "phf", + "phf_codegen", + "string_cache", + "string_cache_codegen", + "tendril", +] + [[package]] name = "md-5" version = "0.10.6" @@ -1516,6 +1622,12 @@ dependencies = [ "tempfile", ] +[[package]] +name = "new_debug_unreachable" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" + [[package]] name = "nix" version = "0.29.0" @@ -1646,7 +1758,7 @@ dependencies = [ "reqwest", "serde", "serde_json", - "thiserror", + "thiserror 1.0.55", "url", "validator", ] @@ -1674,7 +1786,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.89", ] [[package]] @@ -1757,6 +1869,77 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +[[package]] +name = "phf" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" +dependencies = [ + "phf_macros", + "phf_shared 0.11.2", +] + +[[package]] +name = "phf_codegen" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" +dependencies = [ + "phf_generator 0.11.2", + "phf_shared 0.11.2", +] + +[[package]] +name = "phf_generator" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6" +dependencies = [ + "phf_shared 0.10.0", + "rand", +] + +[[package]] +name = "phf_generator" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" +dependencies = [ + "phf_shared 0.11.2", + "rand", +] + +[[package]] +name = "phf_macros" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3444646e286606587e49f3bcf1679b8cef1dc2c5ecc29ddacaffc305180d464b" +dependencies = [ + "phf_generator 0.11.2", + "phf_shared 0.11.2", + "proc-macro2", + "quote", + "syn 2.0.89", +] + +[[package]] +name = "phf_shared" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" +dependencies = [ + "siphasher", +] + +[[package]] +name = "phf_shared" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" +dependencies = [ + "siphasher", +] + [[package]] name = "pin-project" version = "1.1.3" @@ -1774,7 +1957,7 @@ checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.89", ] [[package]] @@ -1860,7 +2043,7 @@ dependencies = [ "smallvec", "sync_wrapper", "tempfile", - "thiserror", + "thiserror 1.0.55", "time", "tokio", "tokio-stream", @@ -1877,7 +2060,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.89", ] [[package]] @@ -1903,7 +2086,7 @@ dependencies = [ "serde_urlencoded", "serde_yaml", "sqlx", - "thiserror", + "thiserror 1.0.55", "tokio", "url", "uuid", @@ -1922,8 +2105,8 @@ dependencies = [ "proc-macro2", "quote", "regex", - "syn 2.0.72", - "thiserror", + "syn 2.0.89", + "thiserror 1.0.55", ] [[package]] @@ -1981,6 +2164,12 @@ version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + [[package]] name = "proc-macro-crate" version = "3.1.0" @@ -2016,9 +2205,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.86" +version = "1.0.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" dependencies = [ "unicode-ident", ] @@ -2083,9 +2272,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.5" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ "aho-corasick", "memchr", @@ -2095,9 +2284,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.7" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" dependencies = [ "aho-corasick", "memchr", @@ -2106,9 +2295,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.4" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "reqwest" @@ -2330,6 +2519,22 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "scraper" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0e749d29b2064585327af5038a5a8eb73aeebad4a3472e83531a436563f7208" +dependencies = [ + "ahash", + "cssparser", + "ego-tree", + "getopts", + "html5ever", + "precomputed-hash", + "selectors", + "tendril", +] + [[package]] name = "sct" version = "0.7.1" @@ -2363,6 +2568,25 @@ dependencies = [ "libc", ] +[[package]] +name = "selectors" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd568a4c9bb598e291a08244a5c1f5a8a6650bee243b5b0f8dbb3d9cc1d87fe8" +dependencies = [ + "bitflags 2.6.0", + "cssparser", + "derive_more", + "fxhash", + "log", + "new_debug_unreachable", + "phf", + "phf_codegen", + "precomputed-hash", + "servo_arc", + "smallvec", +] + [[package]] name = "semver" version = "1.0.23" @@ -2386,7 +2610,7 @@ checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.89", ] [[package]] @@ -2440,7 +2664,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.89", ] [[package]] @@ -2456,6 +2680,15 @@ dependencies = [ "unsafe-libyaml", ] +[[package]] +name = "servo_arc" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae65c4249478a2647db249fb43e23cec56a2c8974a427e7bd8cb5a1d0964921a" +dependencies = [ + "stable_deref_trait", +] + [[package]] name = "sha1" version = "0.10.6" @@ -2497,6 +2730,12 @@ dependencies = [ "rand_core", ] +[[package]] +name = "siphasher" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" + [[package]] name = "slab" version = "0.4.9" @@ -2624,7 +2863,7 @@ dependencies = [ "sha2", "smallvec", "sqlformat", - "thiserror", + "thiserror 1.0.55", "tracing", "url", "uuid", @@ -2708,7 +2947,7 @@ dependencies = [ "smallvec", "sqlx-core", "stringprep", - "thiserror", + "thiserror 1.0.55", "tracing", "uuid", "whoami", @@ -2749,7 +2988,7 @@ dependencies = [ "smallvec", "sqlx-core", "stringprep", - "thiserror", + "thiserror 1.0.55", "tracing", "uuid", "whoami", @@ -2780,6 +3019,38 @@ dependencies = [ "uuid", ] +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + +[[package]] +name = "string_cache" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f91138e76242f575eb1d3b38b4f1362f10d3a43f47d182a5b359af488a02293b" +dependencies = [ + "new_debug_unreachable", + "once_cell", + "parking_lot", + "phf_shared 0.10.0", + "precomputed-hash", + "serde", +] + +[[package]] +name = "string_cache_codegen" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bb30289b722be4ff74a408c3cc27edeaad656e06cb1fe8fa9231fa59c728988" +dependencies = [ + "phf_generator 0.10.0", + "phf_shared 0.10.0", + "proc-macro2", + "quote", +] + [[package]] name = "stringprep" version = "0.1.5" @@ -2816,9 +3087,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.72" +version = "2.0.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af" +checksum = "44d46482f1c1c87acd84dea20c1bf5ebff4c757009ed6bf19cfd36fb10e92c4e" dependencies = [ "proc-macro2", "quote", @@ -2867,6 +3138,17 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "tendril" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0" +dependencies = [ + "futf", + "mac", + "utf-8", +] + [[package]] name = "terminal-banner" version = "0.4.1" @@ -2905,7 +3187,16 @@ version = "1.0.55" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e3de26b0965292219b4287ff031fcba86837900fe9cd2b34ea8ad893c0953d2" dependencies = [ - "thiserror-impl", + "thiserror-impl 1.0.55", +] + +[[package]] +name = "thiserror" +version = "2.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c006c85c7651b3cf2ada4584faa36773bd07bac24acfb39f3c431b36d7e667aa" +dependencies = [ + "thiserror-impl 2.0.3", ] [[package]] @@ -2916,7 +3207,18 @@ checksum = "268026685b2be38d7103e9e507c938a1fcb3d7e6eb15e87870b617bf37b6d581" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.89", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f077553d607adc1caf65430528a576c757a71ed73944b66ebb58ef2bbd243568" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.89", ] [[package]] @@ -2999,7 +3301,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.89", ] [[package]] @@ -3112,7 +3414,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.89", ] [[package]] @@ -3281,6 +3583,12 @@ version = "2.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + [[package]] name = "uuid" version = "1.10.0" @@ -3301,7 +3609,7 @@ checksum = "ee1cd046f83ea2c4e920d6ee9f7c3537ef928d75dce5d84a87c2c5d6b3999a3a" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.89", ] [[package]] @@ -3319,13 +3627,16 @@ dependencies = [ "openid", "poem", "poem-openapi", + "regex", "reqwest", + "scraper", "serde", "serde_json", "serde_with", "sha2", "sqlx", "terminal-banner", + "thiserror 2.0.3", "tracing", "tracing-subscriber", "url", @@ -3359,7 +3670,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.89", ] [[package]] @@ -3434,7 +3745,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.89", "wasm-bindgen-shared", ] @@ -3468,7 +3779,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.89", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3717,7 +4028,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.89", ] [[package]] diff --git a/engine/Cargo.toml b/engine/Cargo.toml index f709031..16a7166 100644 --- a/engine/Cargo.toml +++ b/engine/Cargo.toml @@ -26,7 +26,9 @@ poem-openapi = { version = "5", git = "https://github.com/poem-web/poem", branch "redoc", "static-files", ] } +regex = "1.11.1" reqwest = "0.12.5" +scraper = "0.21.0" serde = "1.0.204" serde_json = "1.0.120" serde_with = { version = "3.9.0", features = ["json", "chrono"] } @@ -41,6 +43,7 @@ sqlx = { version = "0.7.4", features = [ "ipnetwork", ] } terminal-banner = "0.4.1" +thiserror = "2.0.3" tracing = "0.1.40" tracing-subscriber = "0.3.18" url = { version = "2.5.2", features = ["serde"] } diff --git a/engine/src/ingress/mod.rs b/engine/src/ingress/mod.rs new file mode 100644 index 0000000..68024e1 --- /dev/null +++ b/engine/src/ingress/mod.rs @@ -0,0 +1 @@ +pub mod product; diff --git a/engine/src/ingress/product/1855004.example.json b/engine/src/ingress/product/1855004.example.json new file mode 100644 index 0000000..9299f1f --- /dev/null +++ b/engine/src/ingress/product/1855004.example.json @@ -0,0 +1,43 @@ +{ + "@type": "Product", + "@id": "https://tweakers.net/pricewatch/1855004/anker-737-power-bank-powercore-24k.html#Product-1855004", + "name": "Anker 737 Power Bank (PowerCore 24K)", + "@context": "https://schema.org", + "url": "https://tweakers.net/pricewatch/1855004/anker-737-power-bank-powercore-24k.html", + "brand": { + "@type": "Brand", + "name": "Anker", + "url": "https://tweakers.net/merk/2742/anker/" + }, + "image": [ + "https://tweakers.net/ext/i/2005317900.webp", + "https://tweakers.net/ext/i/2005565422.jpeg", + "https://tweakers.net/ext/i/2006644124.jpeg", + "https://tweakers.net/ext/i/2006644126.jpeg", + "https://tweakers.net/ext/i/2006644128.jpeg", + "https://tweakers.net/ext/i/2006644130.jpeg", + "https://tweakers.net/ext/i/2006644132.jpeg", + "https://tweakers.net/ext/i/2006644134.jpeg", + "https://tweakers.net/ext/i/2006644136.jpeg" + ], + "gtin13": [ + "0194644098728" + ], + "mpn": [ + "a1289", + "A1289011" + ], + "description": "1x USB A, 2x USB type-C", + "aggregateRating": { + "@type": "AggregateRating", + "ratingValue": 3.5, + "ratingCount": 4 + }, + "offers": { + "@type": "AggregateOffer", + "lowPrice": 84, + "highPrice": 153, + "offerCount": 15, + "priceCurrency": "EUR" + } +} diff --git a/engine/src/ingress/product/mod.rs b/engine/src/ingress/product/mod.rs new file mode 100644 index 0000000..0bf6eeb --- /dev/null +++ b/engine/src/ingress/product/mod.rs @@ -0,0 +1 @@ +pub mod tweakers; diff --git a/engine/src/ingress/product/tweakers.rs b/engine/src/ingress/product/tweakers.rs new file mode 100644 index 0000000..db3f9a3 --- /dev/null +++ b/engine/src/ingress/product/tweakers.rs @@ -0,0 +1,56 @@ +use scraper::{Html, Selector}; +use serde::{Deserialize, Serialize}; +use thiserror::Error; + +// "{\"@type\":\"Product\",\"@id\":\"https:\\/\\/tweakers.net\\/pricewatch\\/1855004\\/anker-737-power-bank-powercore-24k.html#Product-1855004\",\"name\":\"Anker 737 Power Bank (PowerCore 24K)\",\"@context\":\"https:\\/\\/schema.org\",\"url\":\"https:\\/\\/tweakers.net\\/pricewatch\\/1855004\\/anker-737-power-bank-powercore-24k.html\",\"brand\":{\"@type\":\"Brand\",\"name\":\"Anker\",\"url\":\"https:\\/\\/tweakers.net\\/merk\\/2742\\/anker\\/\"},\"image\":[\"https:\\/\\/tweakers.net\\/ext\\/i\\/2005317900.webp\",\"https:\\/\\/tweakers.net\\/ext\\/i\\/2005565422.jpeg\",\"https:\\/\\/tweakers.net\\/ext\\/i\\/2006644124.jpeg\",\"https:\\/\\/tweakers.net\\/ext\\/i\\/2006644126.jpeg\",\"https:\\/\\/tweakers.net\\/ext\\/i\\/2006644128.jpeg\",\"https:\\/\\/tweakers.net\\/ext\\/i\\/2006644130.jpeg\",\"https:\\/\\/tweakers.net\\/ext\\/i\\/2006644132.jpeg\",\"https:\\/\\/tweakers.net\\/ext\\/i\\/2006644134.jpeg\",\"https:\\/\\/tweakers.net\\/ext\\/i\\/2006644136.jpeg\"],\"gtin13\":[\"0194644098728\"],\"mpn\":[\"a1289\",\"A1289011\"],\"description\":\"1x USB A, 2x USB type-C\",\"aggregateRating\":{\"@type\":\"AggregateRating\",\"ratingValue\":3.5,\"ratingCount\":4},\"offers\":{\"@type\":\"AggregateOffer\",\"lowPrice\":84,\"highPrice\":153,\"offerCount\":15,\"priceCurrency\":\"EUR\"}}" + +#[derive(Debug, Serialize, Deserialize)] +pub struct TweakerProduct { + pub id: String, +} + +#[derive(Debug, Error)] +pub enum TweakerError { + #[error(transparent)] + Request(#[from] reqwest::Error), +} + +// Fetch https://tweakers.net/pricewatch/1855004/anker-737-power-bank-powercore-24k/specificaties/ +// and parse the application/ld+json +pub async fn get_by_tweaker_id(tweaker_id: String) -> Result { + + let response = reqwest::get(format!("https://tweakers.net/pricewatch/{}/specificaties/", tweaker_id)).await.unwrap(); + let body = response.text().await.unwrap(); + + // println!("{}", body); + // // regex match for + // let re = regex::Regex::new(r#""#).unwrap(); + // let captures = re.captures(&body).unwrap(); + + // for capture in captures.iter() { + // println!("{}", capture.unwrap().as_str()); + // } + + let document = Html::parse_document(&body); + + // Define a selector for