diff --git a/example/client/burl/Jamfile b/example/client/burl/Jamfile index 7753d0e..bffc34d 100644 --- a/example/client/burl/Jamfile +++ b/example/client/burl/Jamfile @@ -23,5 +23,6 @@ project ; exe burl : + cookie.cpp main.cpp ; diff --git a/example/client/burl/cookie.cpp b/example/client/burl/cookie.cpp new file mode 100644 index 0000000..45c3658 --- /dev/null +++ b/example/client/burl/cookie.cpp @@ -0,0 +1,351 @@ +// +// Copyright (c) 2024 Mohammad Nejati +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/cppalliance/http_io +// + +#include "cookie.hpp" + +#include + +namespace{ + +struct name_chars_t +{ + constexpr + bool + operator()(char c) const noexcept + { + //TODO: add a fast path + + // non-printable chars and space + if(c <= 0x20) + return false; + + // DEL + if(c == 0x7F) + return false; + + return + c != '(' && c != ')' && c != '<' && c != '>' && c != '@' && + c != ',' && c != ';' && c != ':' && c != '\\' && c != '"' && + c != '/' && c != '[' && c != ']' && c != '?' && c != '=' && + c != '{' && c != '}'; + } +}; + +constexpr auto name_chars = name_chars_t{}; + +struct value_chars_t +{ + constexpr + bool + operator()(char c) const noexcept + { + return + (c == 0x21 ) || + (c >= 0x23 && c <= 0x2B) || + (c >= 0x2D && c <= 0x3A) || + (c >= 0x3C && c <= 0x5B) || + (c >= 0x5D && c <= 0x7E); + } +}; + +constexpr auto value_chars = value_chars_t{}; + +constexpr auto attr_chars = + urls::grammar::all_chars - + urls::grammar::lut_chars("\x1F\x7f;"); + +bool +domain_match( + core::string_view r_domain, + core::string_view c_domain, + bool subdomains) noexcept +{ + if(!subdomains) + return r_domain == c_domain; + + if(c_domain.starts_with('.')) + c_domain.remove_prefix(1); + + if(r_domain.ends_with(c_domain)) + { + if(r_domain.size() == c_domain.size()) + return true; + + return r_domain[r_domain.size() - c_domain.size() - 1] == '.'; + } + + return false; +} + +bool +path_match( + core::string_view r_path, + core::string_view c_path) noexcept +{ + if(r_path.empty()) + return true; + + if(r_path.starts_with(c_path)) + { + if(r_path.size() == c_path.size()) + return true; + + if(c_path.ends_with('/')) + return true; + + return r_path[r_path.size() - c_path.size()] == '/'; + } + + return false; +} + +} // namespace + +boost::system::result +parse_cookie(core::string_view sv) +{ + namespace ug = urls::grammar; + + const auto parse_rs = ug::parse( + sv, + ug::tuple_rule( + ug::token_rule(name_chars), + ug::squelch(ug::delim_rule('=')), + ug::optional_rule(ug::token_rule(value_chars)), + ug::range_rule( + ug::tuple_rule( + ug::squelch(ug::delim_rule(';')), + ug::squelch(ug::optional_rule(ug::delim_rule(' '))), + ug::token_rule(attr_chars - ug::lut_chars('=')), + ug::squelch(ug::optional_rule(ug::delim_rule('='))), + ug::optional_rule(ug::token_rule(attr_chars)))))); + + if(parse_rs.has_error()) + return parse_rs.error(); + + auto rs = cookie{}; + rs.name = std::get<0>(parse_rs.value()); + rs.value = std::get<1>(parse_rs.value()); + + for( auto&& [name, value] : std::get<2>(parse_rs.value())) + { + if(ug::ci_is_equal(name, "Expires")) + { + if(!value) + return ug::error::invalid; + // TODO: There are more date formats; we need a + // better parsing method. + auto tm = std::tm{}; + auto ss = std::stringstream{ *value }; + if(value->find('-') != core::string_view::npos) + ss >> std::get_time(&tm, "%a, %d-%b-%Y %H:%M:%S GMT"); + else + ss >> std::get_time(&tm, "%a, %d %b %Y %H:%M:%S GMT"); + rs.expires = std::chrono::system_clock::from_time_t( + std::mktime(&tm)); + } + else if(ug::ci_is_equal(name, "Max-Age")) + { + if(!value) + return ug::error::invalid; + // Convert to expiry date + // TODO: replace std::stoll + rs.expires = + std::chrono::system_clock::now() + + std::chrono::seconds{ std::stoll(*value) }; + } + else if(ug::ci_is_equal(name, "Domain")) + { + if(!value) + return ug::error::invalid; + + rs.domain = *value; + } + else if(ug::ci_is_equal(name, "Path")) + { + if(!value) + return ug::error::invalid; + rs.path = *value; + } + else if(ug::ci_is_equal(name, "SameSite")) + { + if(ug::ci_is_equal(value.value_or(""), "Strict")) + rs.same_site = cookie::same_site_t::strict; + else if(ug::ci_is_equal(value.value_or(""), "Lax")) + rs.same_site = cookie::same_site_t::lax; + else if(ug::ci_is_equal(value.value_or(""), "None")) + rs.same_site = cookie::same_site_t::none; + else + return ug::error::invalid; + } + else if(ug::ci_is_equal(name, "Partitioned")) + { + rs.partitioned = true; + } + else if(ug::ci_is_equal(name, "Secure")) + { + rs.secure = true; + } + else if(ug::ci_is_equal(name, "HttpOnly")) + { + rs.http_only = true; + } + } + + // "__Secure-" prefix requirements + if(core::string_view{ rs.name }.starts_with("__Host-")) + { + if(!rs.secure) + return ug::error::invalid; + } + + // "__Host-" prefix requirements + if(core::string_view{ rs.name }.starts_with("__Host-")) + { + if(!rs.secure) + return ug::error::invalid; + + if(!rs.path || rs.path.value() != "/") + return ug::error::invalid; + + if(rs.domain) + return ug::error::invalid; + } + + return rs; +} + +void +cookie_jar::add(urls::url_view url, cookie c) +{ + auto m = meta_t{}; + + if(c.domain.has_value()) + { + // TODO: Verify with the current URL and Public Suffix List + } + else + { + m.subdomains = false; + c.domain.emplace(url.encoded_host()); + } + + if(!c.path.has_value()) + { + c.path.emplace(); + auto segs = url.encoded_segments(); + auto end = std::prev(segs.end(), !segs.empty()); + for(auto it = segs.begin(); it != end; ++it) + { + c.path->push_back('/'); + c.path->append(*it); + } + if(c.path->empty()) + c.path->push_back('/'); + } + + cookies_.erase( + std::remove_if( + cookies_.begin(), + cookies_.end(), + [&](const auto& p) { + return + c.name == p.c.name && + c.path == p.c.path && + c.domain == p.c.domain; + }), + cookies_.end()); + + // Check expiry date last to allow servers to remove cookies + if(c.expires.has_value() && + c.expires.value() < std::chrono::system_clock::now()) + { + return; + } + + cookies_.emplace_back(m, std::move(c)); +} + +std::string +cookie_jar::make_field(urls::url_view url) +{ + const auto r_domain = url.host(); + const auto r_path = url.encoded_path(); + const auto r_is_secure = url.scheme_id() == urls::scheme::https; + const auto now = std::chrono::system_clock::now(); + + auto rs = std::string{}; + for(auto it = cookies_.begin(); it != cookies_.end();) + { + if(it->c.expires.has_value() && it->c.expires <= now) + { + it = cookies_.erase(it); + continue; + } + + if( !domain_match(r_domain, it->c.domain.value(), it->m.subdomains) || + !path_match(r_path, it->c.path.value()) || + (it->c.secure && !r_is_secure)) + { + ++it; + continue; + } + + rs.append(it->c.name); + rs.push_back('='); + rs.append(it->c.value.value_or("")); + rs.append("; "); + + ++it; + } + return rs; +} + +std::ostream& +operator<<(std::ostream& os, const cookie_jar& cj) +{ + for(const auto&p : cj.cookies_) + { + os + << p.m.subdomains << ' ' + << p.c.name << '=' << p.c.value.value_or("") + << "; Domain=" << p.c.domain.value() + << "; Path=" << p.c.path.value(); + + if(p.c.secure) + os << "; Secure"; + + if(p.c.http_only) + os << "; HttpOnly"; + + if(p.c.expires) + { + auto tt = std::chrono::system_clock::to_time_t(*p.c.expires); + std::tm tm = *std::gmtime(&tt); + os + << "; Expires=" + << std::put_time(&tm, "%a, %d %b %Y %H:%M:%S GMT"); + } + os << std::endl; + } + return os; +} + +std::istream& +operator>>(std::istream& is, cookie_jar& cj) +{ + for(std::string line; getline(is, line);) + { + auto sv = core::string_view{ line }; + auto meta = cookie_jar::meta_t{ .subdomains = sv.starts_with("1 ") }; + auto cookie = parse_cookie(sv.substr(2)).value(); + cj.cookies_.emplace_back(meta, std::move(cookie)); + } + return is; +} diff --git a/example/client/burl/cookie.hpp b/example/client/burl/cookie.hpp new file mode 100644 index 0000000..b3243a8 --- /dev/null +++ b/example/client/burl/cookie.hpp @@ -0,0 +1,82 @@ +// +// Copyright (c) 2024 Mohammad Nejati +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/cppalliance/http_io +// + +#ifndef BURL_COOKIES_HPP +#define BURL_COOKIES_HPP + +#include + +#include +#include +#include + +namespace core = boost::core; +namespace urls = boost::urls; + +struct cookie +{ + enum same_site_t + { + strict, + lax, + none + }; + + std::string name; + boost::optional value; + boost::optional expires; + boost::optional domain; + boost::optional path; + boost::optional same_site; + bool partitioned = false; + bool secure = false; + bool http_only = false; +}; + +boost::system::result +parse_cookie(core::string_view sv); + +class cookie_jar +{ + struct meta_t + { + bool subdomains = true; + }; + + struct pair_t + { + meta_t m; + cookie c; + + pair_t(meta_t m_, cookie c_) + : m{ m_ } + , c{ c_ } + { + } + }; + + std::list cookies_; + +public: + void + add(urls::url_view url, cookie c); + + std::string + make_field(urls::url_view url); + + friend + std::ostream& + operator<<(std::ostream& os, const cookie_jar& cj); + + friend + std::istream& + operator>>(std::istream& is, cookie_jar& cj); +}; + +#endif diff --git a/example/client/burl/main.cpp b/example/client/burl/main.cpp index 4863802..2325d92 100644 --- a/example/client/burl/main.cpp +++ b/example/client/burl/main.cpp @@ -7,6 +7,8 @@ // Official repository: https://github.com/cppalliance/http_io // +#include "cookie.hpp" + #include #include #include @@ -16,6 +18,7 @@ #include #include +#include #include #include @@ -53,17 +56,17 @@ mime_type(core::string_view path) noexcept return path.substr(pos); }(); - using ci_equal = urls::grammar::ci_equal; - if(ci_equal{}(ext, ".gif")) return "image/gif"; - if(ci_equal{}(ext, ".jpg")) return "image/jpeg"; - if(ci_equal{}(ext, ".jpeg")) return "image/jpeg"; - if(ci_equal{}(ext, ".png")) return "image/png"; - if(ci_equal{}(ext, ".svg")) return "image/svg+xml"; - if(ci_equal{}(ext, ".txt")) return "text/plain"; - if(ci_equal{}(ext, ".htm")) return "text/html"; - if(ci_equal{}(ext, ".html")) return "text/html"; - if(ci_equal{}(ext, ".pdf")) return "application/pdf"; - if(ci_equal{}(ext, ".xml")) return "application/xml"; + namespace ug = urls::grammar; + if(ug::ci_is_equal(ext, ".gif")) return "image/gif"; + if(ug::ci_is_equal(ext, ".jpg")) return "image/jpeg"; + if(ug::ci_is_equal(ext, ".jpeg")) return "image/jpeg"; + if(ug::ci_is_equal(ext, ".png")) return "image/png"; + if(ug::ci_is_equal(ext, ".svg")) return "image/svg+xml"; + if(ug::ci_is_equal(ext, ".txt")) return "text/plain"; + if(ug::ci_is_equal(ext, ".htm")) return "text/html"; + if(ug::ci_is_equal(ext, ".html")) return "text/html"; + if(ug::ci_is_equal(ext, ".pdf")) return "application/pdf"; + if(ug::ci_is_equal(ext, ".xml")) return "application/xml"; return "application/octet-stream"; } @@ -111,10 +114,10 @@ effective_port(urls::url_view url) if(url.has_port()) return url.port(); - if(url.scheme() == "https") + if(url.scheme_id() == urls::scheme::https) return "443"; - if(url.scheme() == "http") + if(url.scheme_id() == urls::scheme::http) return "80"; throw std::runtime_error{ @@ -887,7 +890,7 @@ connect_http_proxy( // Connect to the proxy server co_await asio::async_connect(stream, rresults); - using http_proto::field; + using field = http_proto::field; auto request = http_proto::request{}; auto host_port = [&]() { @@ -978,7 +981,7 @@ connect( } // TLS handshake - if(url.scheme() == "https") + if(url.scheme_id() == urls::scheme::https) { auto ssl_stream = ssl::stream{ std::move(stream), ssl_ctx }; @@ -1004,9 +1007,9 @@ create_request( const message& msg, urls::url_view url) { - using http_proto::field; - using http_proto::method; - using http_proto::version; + using field = http_proto::field; + using method = http_proto::method; + using version = http_proto::version; auto request = http_proto::request{}; @@ -1075,21 +1078,45 @@ request( const po::variables_map& vm, output_stream& output, message& msg, + std::optional& cookie_jar, + core::string_view explicit_cookies, ssl::context& ssl_ctx, http_proto::context& http_proto_ctx, http_proto::request request, urls::url_view url) { + using field = http_proto::field; auto stream = co_await connect(vm, ssl_ctx, http_proto_ctx, url); auto parser = http_proto::response_parser{ http_proto_ctx }; auto serializer = http_proto::serializer{ http_proto_ctx }; + auto set_cookies = [&](urls::url_view url) + { + auto field = cookie_jar ? cookie_jar->make_field(url) : std::string{}; + field.append(explicit_cookies); + if(!field.empty()) + request.set(field::cookie, field); + }; + + auto extract_cookies = [&]( + urls::url_view url, + http_proto::response_view response) + { + if(!cookie_jar) + return; + + for(auto sv : response.find_all(field::set_cookie)) + cookie_jar->add(url, parse_cookie(sv).value()); + }; + + set_cookies(url); msg.start_serializer(serializer, request); co_await http_io::async_write(stream, serializer); parser.reset(); parser.start(); co_await http_io::async_read_header(stream, parser); + extract_cookies(url, parser.get()); // handle redirects auto referer = urls::url{ url }; @@ -1102,10 +1129,10 @@ request( break; auto response = parser.get(); - if(auto it = response.find(http_proto::field::location); + if(auto it = response.find(field::location); it != response.end()) { - auto location = urls::parse_uri(it->value).value(); + urls::url location = urls::parse_uri(it->value).value(); // Consume the body co_await http_io::async_read(stream, parser); @@ -1124,12 +1151,16 @@ request( { request.set_method(http_proto::method::get); request.set_content_length(0); - request.erase(http_proto::field::content_type); + request.erase(field::content_type); msg = {}; // drop the body } request.set_target(target(location)); - request.set(http_proto::field::host, location.host()); - request.set(http_proto::field::referer, location); + request.set(field::host, location.host()); + request.set(field::referer, location); + + // Update the cookies for the new url + request.erase(field::cookie); + set_cookies(location); referer = location; @@ -1140,6 +1171,7 @@ request( parser.reset(); parser.start(); co_await http_io::async_read_header(stream, parser); + extract_cookies(location, parser.get()); } else { @@ -1193,6 +1225,12 @@ main(int argc, char* argv[]) ("continue-at,C", po::value()->value_name(""), "Resume transfer offset") + ("cookie,b", + po::value>()->value_name(""), + "Send cookies from string/file") + ("cookie-jar,c", + po::value()->value_name(""), + "Write cookies to after operation") ("data,d", po::value>()->value_name(""), "HTTP POST data") @@ -1349,12 +1387,39 @@ main(int argc, char* argv[]) msg = std::move(form); } + auto cookie_jar = std::optional<::cookie_jar>{}; + auto explicit_cookies = std::string{}; + + if(vm.count("cookie") || vm.count("cookie-jar")) + cookie_jar.emplace(); + + if(vm.count("cookie")) + { + for(auto& option : vm.at("cookie").as>()) + { + if(option.find('=') != std::string::npos) + { + if(!explicit_cookies.ends_with(';')) + explicit_cookies.push_back(';'); + explicit_cookies.append(option); + } + else + { + auto ifs = std::ifstream{ option }; + ifs.exceptions(std::ifstream::badbit); + ifs >> cookie_jar.value(); + } + } + } + asio::co_spawn( ioc, request( vm, output, msg, + cookie_jar, + explicit_cookies, ssl_ctx, http_proto_ctx, create_request(vm, msg, url.value()), @@ -1366,6 +1431,13 @@ main(int argc, char* argv[]) }); ioc.run(); + + if(vm.count("cookie-jar")) + { + auto ofs = std::ofstream{ vm.at("cookie-jar").as() }; + ofs.exceptions(std::ofstream::badbit); + ofs << cookie_jar.value(); + } } catch(std::exception const& e) {