diff --git a/doc/src/manual/cowboy_http.asciidoc b/doc/src/manual/cowboy_http.asciidoc index 33d2888ce..e589758d4 100644 --- a/doc/src/manual/cowboy_http.asciidoc +++ b/doc/src/manual/cowboy_http.asciidoc @@ -20,6 +20,7 @@ opts() :: #{ active_n => pos_integer(), chunked => boolean(), connection_type => worker | supervisor, + headers_raw => boolean(), http10_keepalive => boolean(), idle_timeout => timeout(), inactivity_timeout => timeout(), @@ -121,6 +122,12 @@ max_headers (100):: Maximum number of headers allowed per request. +headers_raw (false):: + +Populate `headers_raw` in the Req object, with a `binary()` (for HTTP/1.x) +or `cow_http:headers()` (for HTTP/2) that retains the raw headers sent by +the client; useful for passive user-agent identification. + max_keepalive (1000):: Maximum number of requests allowed per connection. diff --git a/src/cowboy_http.erl b/src/cowboy_http.erl index c9bceed85..a6c19677f 100644 --- a/src/cowboy_http.erl +++ b/src/cowboy_http.erl @@ -27,6 +27,7 @@ compress_threshold => non_neg_integer(), connection_type => worker | supervisor, env => cowboy_middleware:env(), + headers_raw => boolean(), http10_keepalive => boolean(), idle_timeout => timeout(), inactivity_timeout => timeout(), @@ -69,6 +70,7 @@ qs = undefined :: binary(), version = undefined :: cowboy:http_version(), headers = undefined :: cowboy:http_headers() | undefined, + headers_raw = undefined :: binary(), name = undefined :: binary() | undefined }). @@ -575,8 +577,9 @@ parse_version(_, State, _, _, _, _) -> 'Unsupported HTTP version. (RFC7230 2.6)'}). before_parse_headers(Rest, State, M, A, P, Q, V) -> + HR = case maps:get(headers_raw, State#state.opts, false) of true -> <<>>; _ -> undefined end, parse_header(Rest, State#state{in_state=#ps_header{ - method=M, authority=A, path=P, qs=Q, version=V}}, #{}). + method=M, authority=A, path=P, qs=Q, version=V, headers_raw=HR}}, #{}). %% Headers. @@ -626,6 +629,9 @@ match_colon(<< _, Rest/bits >>, N) -> match_colon(_, _) -> nomatch. +parse_hd_name(<< $:, Rest/bits >>, State=#state{in_state=PS=#ps_header{headers_raw=HR0}}, H, SoFar) when is_binary(HR0) -> + HR = <>, + parse_hd_before_value(Rest, State#state{in_state=PS#ps_header{headers_raw=HR}}, H, SoFar); parse_hd_name(<< $:, Rest/bits >>, State, H, SoFar) -> parse_hd_before_value(Rest, State, H, SoFar); parse_hd_name(<< C, _/bits >>, State=#state{in_state=PS}, H, <<>>) when ?IS_WS(C) -> @@ -636,12 +642,16 @@ parse_hd_name(<< C, _/bits >>, State=#state{in_state=PS}, H, _) when ?IS_WS(C) - error_terminate(400, State#state{in_state=PS#ps_header{headers=H}}, {connection_error, protocol_error, 'Whitespace is not allowed between the header name and the colon. (RFC7230 3.2.4)'}); +parse_hd_name(<< C, Rest/bits >>, State=#state{in_state=PS=#ps_header{headers_raw=HR0}}, H, SoFar) when is_binary(HR0) -> + HR = <>, + ?LOWER(parse_hd_name, Rest, State#state{in_state=PS#ps_header{headers_raw=HR}}, H, SoFar); parse_hd_name(<< C, Rest/bits >>, State, H, SoFar) -> ?LOWER(parse_hd_name, Rest, State, H, SoFar). -parse_hd_before_value(<< $\s, Rest/bits >>, S, H, N) -> - parse_hd_before_value(Rest, S, H, N); -parse_hd_before_value(<< $\t, Rest/bits >>, S, H, N) -> +parse_hd_before_value(<< C, Rest/bits >>, S=#state{in_state=PS=#ps_header{headers_raw=HR0}}, H, N) when ?IS_WS(C), is_binary(HR0) -> + HR = <>, + parse_hd_before_value(Rest, S#state{in_state=PS#ps_header{headers_raw=HR}}, H, N); +parse_hd_before_value(<< C, Rest/bits >>, S, H, N) when ?IS_WS(C) -> parse_hd_before_value(Rest, S, H, N); parse_hd_before_value(Buffer, State=#state{opts=Opts, in_state=PS}, H, N) -> MaxLength = maps:get(max_header_value_length, Opts, 4096), @@ -656,7 +666,7 @@ parse_hd_before_value(Buffer, State=#state{opts=Opts, in_state=PS}, H, N) -> parse_hd_value(Buffer, State, H, N, <<>>) end. -parse_hd_value(<< $\r, $\n, Rest/bits >>, S, Headers0, Name, SoFar) -> +parse_hd_value(<< $\r, $\n, Rest/bits >>, S=#state{in_state=PS=#ps_header{headers_raw=HR0}}, Headers0, Name, SoFar) -> Value = clean_value_ws_end(SoFar, byte_size(SoFar) - 1), Headers = case maps:get(Name, Headers0, undefined) of undefined -> Headers0#{Name => Value}; @@ -664,7 +674,16 @@ parse_hd_value(<< $\r, $\n, Rest/bits >>, S, Headers0, Name, SoFar) -> Value0 when Name =:= <<"cookie">> -> Headers0#{Name => << Value0/binary, "; ", Value/binary >>}; Value0 -> Headers0#{Name => << Value0/binary, ", ", Value/binary >>} end, - parse_header(Rest, S, Headers); + if + is_binary(HR0) -> + HR = <>, + parse_header(Rest, S#state{in_state=PS#ps_header{headers_raw=HR}}, Headers); + true -> + parse_header(Rest, S, Headers) + end; +parse_hd_value(<< C, Rest/bits >>, S=#state{in_state=PS=#ps_header{headers_raw=HR0}}, H, N, SoFar) when is_binary(HR0) -> + HR = <>, + parse_hd_value(Rest, S#state{in_state=PS#ps_header{headers_raw=HR}}, H, N, << SoFar/binary, C >>); parse_hd_value(<< C, Rest/bits >>, S, H, N, SoFar) -> parse_hd_value(Rest, S, H, N, << SoFar/binary, C >>). @@ -749,7 +768,7 @@ default_port(_) -> 80. request(Buffer, State0=#state{ref=Ref, transport=Transport, peer=Peer, sock=Sock, cert=Cert, proxy_header=ProxyHeader, in_streamid=StreamID, in_state= - PS=#ps_header{method=Method, path=Path, qs=Qs, version=Version}}, + PS=#ps_header{method=Method, path=Path, qs=Qs, version=Version, headers_raw=HeadersRaw}}, Headers0, Host, Port) -> Scheme = case Transport:secure() of true -> <<"https">>; @@ -821,11 +840,11 @@ request(Buffer, State0=#state{ref=Ref, transport=Transport, peer=Peer, sock=Sock false -> State0#state{in_streamid=StreamID + 1, in_state=#ps_request_line{}} end, - {request, Req, State#state{buffer=Buffer}}; + {request, Req#{headers_raw => HeadersRaw}, State#state{buffer=Buffer}}; {true, HTTP2Settings} -> %% We save the headers in case the upgrade will fail %% and we need to pass them to cowboy_stream:early_error. - http2_upgrade(State0#state{in_state=PS#ps_header{headers=Headers}}, + http2_upgrade(State0#state{in_state=PS#ps_header{headers=Headers,headers_raw=undefined}}, Buffer, HTTP2Settings, Req) end. diff --git a/src/cowboy_http2.erl b/src/cowboy_http2.erl index ed2623c68..bedf7d073 100644 --- a/src/cowboy_http2.erl +++ b/src/cowboy_http2.erl @@ -33,6 +33,7 @@ env => cowboy_middleware:env(), goaway_initial_timeout => timeout(), goaway_complete_timeout => timeout(), + headers_raw => boolean(), idle_timeout => timeout(), inactivity_timeout => timeout(), initial_connection_window_size => 65535..16#7fffffff, @@ -452,7 +453,7 @@ headers_frame(State, StreamID, IsFin, Headers, PseudoHeaders, BodyLen) -> 'Requests translated from HTTP/1.1 must include a host header. (RFC7540 8.1.2.3, RFC7230 5.4)'}) end. -headers_frame_parse_host(State=#state{ref=Ref, peer=Peer, sock=Sock, cert=Cert, proxy_header=ProxyHeader}, +headers_frame_parse_host(State=#state{ref=Ref, peer=Peer, sock=Sock, cert=Cert, proxy_header=ProxyHeader, opts=#{headers_raw:=HeadersRaw}}, StreamID, IsFin, Headers, PseudoHeaders=#{method := Method, scheme := Scheme, path := PathWithQs}, BodyLen, Authority) -> try cow_http_hd:parse_host(Authority) of @@ -486,10 +487,14 @@ headers_frame_parse_host(State=#state{ref=Ref, peer=Peer, sock=Sock, cert=Cert, undefined -> Req0; _ -> Req0#{proxy_header => ProxyHeader} end, + Req2 = case HeadersRaw of + undefined -> Req1; + _ -> Req1#{headers_raw => Headers} + end, %% We add the protocol information for extended CONNECTs. Req = case PseudoHeaders of - #{protocol := Protocol} -> Req1#{protocol => Protocol}; - _ -> Req1 + #{protocol := Protocol} -> Req2#{protocol => Protocol}; + _ -> Req2 end, headers_frame(State, StreamID, Req) catch _:_ -> diff --git a/src/cowboy_req.erl b/src/cowboy_req.erl index 90c5a3a09..bfbd83050 100644 --- a/src/cowboy_req.erl +++ b/src/cowboy_req.erl @@ -126,6 +126,7 @@ path := binary(), qs := binary(), headers := cowboy:http_headers(), + headers_raw := cow_http:headers() | binary() | undefined, peer := {inet:ip_address(), inet:port_number()}, sock := {inet:ip_address(), inet:port_number()}, cert := binary() | undefined,