Skip to content

Commit

Permalink
Merge pull request #339 from hearchco/as/feat/favicon-proxy
Browse files Browse the repository at this point in the history
feat: Favicons proxy
  • Loading branch information
aleksasiriski authored Jun 26, 2024
2 parents 29bbcac + 4dcada0 commit e030a19
Show file tree
Hide file tree
Showing 4 changed files with 154 additions and 18 deletions.
111 changes: 94 additions & 17 deletions src/router/routes/route_proxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,18 @@ import (
"net/http"
"net/http/httputil"
"net/url"
"strconv"

"github.com/rs/zerolog/log"

"github.com/hearchco/agent/src/config"
"github.com/hearchco/agent/src/search/useragent"
"github.com/hearchco/agent/src/utils/anonymize"
"github.com/hearchco/agent/src/utils/moreurls"
)

func routeProxy(w http.ResponseWriter, r *http.Request, salt string, timeouts config.ImageProxyTimeouts) error {
// Parse the form.
err := r.ParseForm()
if err != nil {
// Server error.
Expand All @@ -25,41 +28,125 @@ func routeProxy(w http.ResponseWriter, r *http.Request, salt string, timeouts co
return err
}

// Get the parameters.
params := r.Form

urlParam := getParamOrDefault(params, "url")
hashParam := getParamOrDefault(params, "hash")
faviconParam := getParamOrDefault(params, "favicon", strconv.FormatBool(false))

// Check the required parameters.
if urlParam == "" || hashParam == "" {
// User error.
return writeResponse(w, http.StatusBadRequest, "url and hash are required")
}

// Check if only favicon is requested.
favicon, err := strconv.ParseBool(faviconParam)
if err != nil {
// User error.
return writeResponse(w, http.StatusBadRequest, "favicon must be a boolean")
}

// Get url to verify and to proxy.
urlToVerify, urlToProxy, err := getUrlToVerifyAndToProxy(urlParam, favicon)
if err != nil {
// User error.
log.Debug().
Err(err).
Str("url", urlParam).
Str("url_to_verify", urlToVerify).
Str("url_to_proxy", urlToProxy).
Str("hash", hashParam).
Str("favicon", faviconParam).
Msg("Failed to get URL to verify and to proxy")
return writeResponse(w, http.StatusBadRequest, fmt.Sprintf("failed to get URL to verify and to proxy: %v", err))
}

// Check if hash is valid.
if !anonymize.VerifyHash(hashParam, urlParam, salt) {
if !anonymize.VerifyHash(hashParam, urlToVerify, salt) {
// User error.
log.Debug().
Str("url", urlParam).
Str("url_to_verify", urlToVerify).
Str("hash", hashParam).
Str("favicon", faviconParam).
Msg("Invalid hash")
return writeResponse(w, http.StatusUnauthorized, "invalid hash")
}

// Parse the url.
target, err := url.Parse(urlParam)
target, err := url.Parse(urlToProxy)
if err != nil {
// User error.
log.Debug().
Str("url", urlParam).
Str("url_to_proxy", urlToProxy).
Msg("Invalid url")
return writeResponse(w, http.StatusBadRequest, "invalid url")
}

// Create a new request.
nr := createAnonRequest(r, target)
log.Trace().
Caller().
Str("request", fmt.Sprint(nr)).
Msg("Created a new anon request")

// Create reverse proxy with timeout.
rp := createReverseProxy(timeouts)

// Proxy the request.
log.Debug().
Str("url", target.String()).
Msg("Proxying request")
rp.ServeHTTP(w, &nr) // Use the new request.

return nil
}

func getUrlToVerifyAndToProxy(urlParam string, favicon bool) (string, string, error) {
urlToVerify := urlParam
urlToProxy := urlParam

if favicon {
// Get the URI to verify.
urlUri, err := moreurls.GetURIToVerify(urlParam)
if err != nil {
return "", "", fmt.Errorf("failed to extract URI from URL: %w", err)
}

// Get the favicon URL.
faviconUrl, err := getFaviconURL(urlParam)
if err != nil {
return "", "", fmt.Errorf("failed to extract favicon URL: %w", err)
}

// Set the URLs.
urlToVerify = urlUri
urlToProxy = faviconUrl
}

return urlToVerify, urlToProxy, nil
}

// Appends the path to favicon to the URI of the URL.
func getFaviconURL(urll string) (string, error) {
// TODO: Impl getting the favicon path from the html head.
const faviconPath = "/favicon.ico"
uri, err := moreurls.GetURI(urll)
if err != nil {
return "", err
} else {
return uri + faviconPath, nil
}
}

func createAnonRequest(r *http.Request, target *url.URL) http.Request {
// Get random UserAgent with corresponding Sec-Ch-Ua headers.
ua := useragent.RandomUserAgentWithHeaders()

// Create a new request.
nr := &http.Request{
return http.Request{
Method: http.MethodGet,
URL: target,
Host: target.Host,
Expand All @@ -80,12 +167,9 @@ func routeProxy(w http.ResponseWriter, r *http.Request, salt string, timeouts co
"User-Agent": {ua.UserAgent},
},
}
}

log.Trace().
Caller().
Str("request", fmt.Sprint(nr)).
Msg("Created a new request")

func createReverseProxy(timeouts config.ImageProxyTimeouts) httputil.ReverseProxy {
// Create reverse proxy with timeout.
rp := httputil.ReverseProxy{Director: func(r *http.Request) {}}
rp.Transport = &http.Transport{
Expand All @@ -95,12 +179,5 @@ func routeProxy(w http.ResponseWriter, r *http.Request, salt string, timeouts co
}).DialContext,
TLSHandshakeTimeout: timeouts.TLSHandshake,
}

// Proxy the request.
log.Debug().
Str("url", target.String()).
Msg("Proxying request")
rp.ServeHTTP(w, nr) // Use the new request.

return nil
return rp
}
17 changes: 16 additions & 1 deletion src/search/result/general.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package result

import (
"github.com/hearchco/agent/src/utils/anonymize"
"github.com/hearchco/agent/src/utils/moreurls"
"github.com/rs/zerolog/log"
)

Expand Down Expand Up @@ -96,5 +98,18 @@ func (r *General) AppendEngineRanks(rank Rank) {
}

func (r General) ConvertToOutput(salt string) ResultOutput {
return r
urlToVerify, err := moreurls.GetURIToVerify(r.URL())
if err != nil {
log.Panic().
Err(err).
Str("url", r.URL()).
Msg("Failed to get URI to verify")
// ^PANIC - This should never happen.
}
return GeneralOutput{
generalOutputJSON{
r,
anonymize.HashToSHA256B64Salted(urlToVerify, salt),
},
}
}
11 changes: 11 additions & 0 deletions src/search/result/general_output.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package result

type GeneralOutput struct {
generalOutputJSON
}

type generalOutputJSON struct {
General

FaviconHash string `json:"favicon_hash,omitempty"`
}
33 changes: 33 additions & 0 deletions src/utils/moreurls/verify.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package moreurls

import (
"fmt"
"regexp"
)

const (
faviconVerifierPrefix = "favicon-verify://"
uriPattern = "^(http(s?))(://)([^/]+)"
)

// Prepends the URI with the favicon-verify prefix.
func GetURIToVerify(urll string) (string, error) {
uri, err := GetURI(urll)
if err != nil {
return "", err
} else {
return faviconVerifierPrefix + uri, nil
}
}

// Extracts the URI from the URL.
// https://www.example.com/some/path -> https://www.example.com
func GetURI(urll string) (string, error) {
re := regexp.MustCompile(uriPattern)
ss := re.FindString(urll)
if ss == "" {
return "", fmt.Errorf("failed to extract URI from URL")
} else {
return ss, nil
}
}

0 comments on commit e030a19

Please sign in to comment.