diff --git a/api/bludv.go b/api/bludv.go index 3eae22f..26a78a4 100644 --- a/api/bludv.go +++ b/api/bludv.go @@ -7,13 +7,17 @@ import ( "net/http" "net/url" "regexp" + "slices" "strings" "time" "github.com/PuerkitoBio/goquery" + "github.com/hbollon/go-edlib" + "github.com/felipemarinho97/torrent-indexer/magnet" "github.com/felipemarinho97/torrent-indexer/schema" goscrape "github.com/felipemarinho97/torrent-indexer/scrape" + "github.com/felipemarinho97/torrent-indexer/utils" ) var bludv = IndexerMeta{ @@ -29,7 +33,7 @@ func (i *Indexer) HandlerBluDVIndexer(w http.ResponseWriter, r *http.Request) { }() ctx := r.Context() - // supported query params: q, season, episode + // supported query params: q, season, episode, filter_results q := r.URL.Query().Get("q") // URL encode query param @@ -87,6 +91,25 @@ func (i *Indexer) HandlerBluDVIndexer(w http.ResponseWriter, r *http.Request) { } } + for i, it := range indexedTorrents { + jLower := strings.ReplaceAll(strings.ToLower(fmt.Sprintf("%s %s", it.Title, it.OriginalTitle)), ".", " ") + qLower := strings.ToLower(q) + splitLength := 2 + indexedTorrents[i].Similarity = edlib.JaccardSimilarity(jLower, qLower, splitLength) + } + + // remove the ones with zero similarity + if len(indexedTorrents) > 20 && r.URL.Query().Get("filter_results") != "" && r.URL.Query().Get("q") != "" { + indexedTorrents = utils.Filter(indexedTorrents, func(it IndexedTorrent) bool { + return it.Similarity > 0 + }) + } + + // sort by similarity + slices.SortFunc(indexedTorrents, func(i, j IndexedTorrent) int { + return int((j.Similarity - i.Similarity) * 1000) + }) + w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(Response{ Results: indexedTorrents, diff --git a/api/comando_torrents.go b/api/comando_torrents.go index 60e372e..9d0a313 100644 --- a/api/comando_torrents.go +++ b/api/comando_torrents.go @@ -9,6 +9,7 @@ import ( "net/http" "net/url" "regexp" + "slices" "strings" "time" @@ -16,6 +17,8 @@ import ( "github.com/felipemarinho97/torrent-indexer/magnet" "github.com/felipemarinho97/torrent-indexer/schema" goscrape "github.com/felipemarinho97/torrent-indexer/scrape" + "github.com/felipemarinho97/torrent-indexer/utils" + "github.com/hbollon/go-edlib" ) var comando = IndexerMeta{ @@ -104,6 +107,25 @@ func (i *Indexer) HandlerComandoIndexer(w http.ResponseWriter, r *http.Request) } } + for i, it := range indexedTorrents { + jLower := strings.ReplaceAll(strings.ToLower(fmt.Sprintf("%s %s", it.Title, it.OriginalTitle)), ".", " ") + qLower := strings.ToLower(q) + splitLength := 2 + indexedTorrents[i].Similarity = edlib.JaccardSimilarity(jLower, qLower, splitLength) + } + + // remove the ones with zero similarity + if len(indexedTorrents) > 20 && r.URL.Query().Get("filter_results") != "" && r.URL.Query().Get("q") != "" { + indexedTorrents = utils.Filter(indexedTorrents, func(it IndexedTorrent) bool { + return it.Similarity > 0 + }) + } + + // sort by similarity + slices.SortFunc(indexedTorrents, func(i, j IndexedTorrent) int { + return int((j.Similarity - i.Similarity) * 1000) + }) + w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(Response{ Results: indexedTorrents, @@ -176,7 +198,7 @@ func getTorrents(ctx context.Context, i *Indexer, link string) ([]IndexedTorrent // find any link from imdb imdbLink := "" - article.Find("div.content a").Each(func(i int, s *goquery.Selection) { + article.Find("a").Each(func(i int, s *goquery.Selection) { link, _ := s.Attr("href") re := regexp.MustCompile(`https://www.imdb.com/title/(tt\d+)`) matches := re.FindStringSubmatch(link) diff --git a/api/index.go b/api/index.go index 35ccfbf..a9efb23 100644 --- a/api/index.go +++ b/api/index.go @@ -39,6 +39,7 @@ type IndexedTorrent struct { Size string `json:"size"` LeechCount int `json:"leech_count"` SeedCount int `json:"seed_count"` + Similarity float32 `json:"similarity"` } func NewIndexers(redis *cache.Redis, metrics *monitoring.Metrics) *Indexer { @@ -59,14 +60,16 @@ func HandlerIndex(w http.ResponseWriter, r *http.Request) { "method": "GET", "description": "Indexer for comando torrents", "query_params": map[string]string{ - "q": "search query", + "q": "search query", + "filter_results": "if results with similarity equals to zero should be filtered (true/false)", }, }, "/indexers/bludv": map[string]interface{}{ "method": "GET", "description": "Indexer for bludv", "query_params": map[string]string{ - "q": "search query", + "q": "search query", + "filter_results": "if results with similarity equals to zero should be filtered (true/false)", }, }, }, diff --git a/go.mod b/go.mod index dcdbe58..76d9309 100644 --- a/go.mod +++ b/go.mod @@ -19,5 +19,6 @@ require ( require ( github.com/PuerkitoBio/goquery v1.9.1 + github.com/hbollon/go-edlib v1.6.0 github.com/prometheus/client_golang v1.19.0 ) diff --git a/go.sum b/go.sum index 3d8faca..1ea89d1 100644 --- a/go.sum +++ b/go.sum @@ -10,10 +10,14 @@ github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA= github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78= github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/hbollon/go-edlib v1.6.0 h1:ga7AwwVIvP8mHm9GsPueC0d71cfRU/52hmPJ7Tprv4E= +github.com/hbollon/go-edlib v1.6.0/go.mod h1:wnt6o6EIVEzUfgbUZY7BerzQ2uvzp354qmS2xaLkrhM= github.com/prometheus/client_golang v1.19.0 h1:ygXvpU1AoN1MhdzckN+PyD9QJOSD4x7kmXYlnfbA6JU= github.com/prometheus/client_golang v1.19.0/go.mod h1:ZRM9uEAypZakd+q/x7+gmsvXdURP+DABIEIjnmDdp+k= github.com/prometheus/client_model v0.6.0 h1:k1v3CzpSRUTrKMppY35TLwPvxHqBu0bYgxZzqGIgaos= diff --git a/utils/util.go b/utils/util.go new file mode 100644 index 0000000..66dbe63 --- /dev/null +++ b/utils/util.go @@ -0,0 +1,12 @@ +package utils + +func Filter[A any](arr []A, f func(A) bool) []A { + var res []A + res = make([]A, 0) + for _, v := range arr { + if f(v) { + res = append(res, v) + } + } + return res +}