From ccce10b4b971649c9eec7c2f6494e472612d8cfa Mon Sep 17 00:00:00 2001 From: Frank Elsinga Date: Tue, 19 Sep 2023 19:38:15 +0200 Subject: [PATCH] extracted the imdb parsing to another function and added a testcase --- client/go.mod | 2 +- client/go.sum | 4 +- server/backend/cron/movie_test.go | 69 +++++++++++++++++++++++++++++++ server/backend/cron/movies.go | 15 ++++--- 4 files changed, 81 insertions(+), 9 deletions(-) create mode 100644 server/backend/cron/movie_test.go diff --git a/client/go.mod b/client/go.mod index 3d345397..7915adde 100644 --- a/client/go.mod +++ b/client/go.mod @@ -3,7 +3,7 @@ module github.com/TUM-Dev/Campus-Backend/client go 1.21 require ( - github.com/TUM-Dev/Campus-Backend/server v0.0.0-20230919155641-f895a75987e0 + github.com/TUM-Dev/Campus-Backend/server v0.0.0-20230919162132-71bec88330f7 github.com/sirupsen/logrus v1.9.3 google.golang.org/grpc v1.58.1 ) diff --git a/client/go.sum b/client/go.sum index ba3cbef0..a8611e9f 100644 --- a/client/go.sum +++ b/client/go.sum @@ -1,5 +1,5 @@ -github.com/TUM-Dev/Campus-Backend/server v0.0.0-20230919155641-f895a75987e0 h1:rZqUJmywWU9aV9Bk/IYKIYN76+nYcATD2q+pFVBYQN4= -github.com/TUM-Dev/Campus-Backend/server v0.0.0-20230919155641-f895a75987e0/go.mod h1:fjoLL3rbdY6wTRJIksekT2p3OUp5ocFfXjB/avV/TVI= +github.com/TUM-Dev/Campus-Backend/server v0.0.0-20230919162132-71bec88330f7 h1:TDgiN5Z1vi3V0Qo94MIXURiD9+U7TGiRtiUIqN/rulo= +github.com/TUM-Dev/Campus-Backend/server v0.0.0-20230919162132-71bec88330f7/go.mod h1:fjoLL3rbdY6wTRJIksekT2p3OUp5ocFfXjB/avV/TVI= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= diff --git a/server/backend/cron/movie_test.go b/server/backend/cron/movie_test.go new file mode 100644 index 00000000..3f95b1b3 --- /dev/null +++ b/server/backend/cron/movie_test.go @@ -0,0 +1,69 @@ +package cron + +import ( + "strings" + "testing" +) + +func TestIMDBExtration(t *testing.T) { + reader := strings.NewReader(` + + +
+
+ +
+
+
+

Vorstellung

+
Do, 6. April 2023
+ um 20:00 Uhr
+ Hörsaal MW1801, Campus Garching
+

6. April: Babylon (Garching, OV)

+
+ + + + + + + + +
+

Babylon (Digital)

+

USA (2022)

+
+
+Zum Trailer
+ab 16 +Dolby Digital +CinemaScope +
Regie: Damien Chazelle +
Schauspieler: Brad Pitt, Margot Robbie, Jean Smart +
189 Minuten +
+
Do you know where I can find some drugs?
+
+

Its the 1920s, California, a chaotic world of parties and movie sets, crime and exuberance. Newcomers Nellie LaRoy (Margot Robbie) and Manny Torres (Diego Calva) will do everything to find their success. In front of the camera or behind it. When movies + start getting lounder and sets get quiet, they and movie stars of old like Jack Conrad (Brad Pitt) will have to adapt or face extinction.

+

+ +Dont let that story stop you tough, these three hours are filled with dance and drugs, music and montages, porn and poetry. You might not need that much of an attention span and you might not want it. +

+In the end, there is one thing Hollywood does best: Make movies about themselves. Did they forget to mention that these movies were made for nazi germany? Is all this crime and perversion really ok because the movies are just that good? Don't think about it too much, you will still enjoy it.

+
+
Chazelle’s film commemorates the era’s hubris as it indulges in a bit of its own. This is how a world ends. Not with a whimper but a great deal of banging, baby. And vomiting. And snorting. (Irish Times)
+
+
+
+ + +`) + imdbID, err := parseImdbIDFromReader(reader) + if err != nil { + t.Error(err) + } + if imdbID != "tt10640346" { + t.Error("imdbID is not correct") + } +} diff --git a/server/backend/cron/movies.go b/server/backend/cron/movies.go index 2d7453bc..ebba1075 100644 --- a/server/backend/cron/movies.go +++ b/server/backend/cron/movies.go @@ -8,6 +8,7 @@ import ( "io" "net/http" "os" + "regexp" "strings" "time" @@ -175,7 +176,11 @@ func extractImdbIDFromTUFilmWebsite(url string) (string, error) { } }(resp.Body) // parse the response body - doc, err := goquery.NewDocumentFromReader(resp.Body) + return parseImdbIDFromReader(resp.Body) +} + +func parseImdbIDFromReader(body io.Reader) (string, error) { + doc, err := goquery.NewDocumentFromReader(body) if err != nil { log.WithError(err).Error("Error while parsing document") return "", err @@ -190,14 +195,12 @@ func extractImdbIDFromTUFilmWebsite(url string) (string, error) { return "", errors.New("no imdb link found") } if imdbLinks.Length() > 1 { - log.WithField("url", url).Warn("more than one imdb link found. using first one") + log.Warn("more than one imdb link found. using first one") } // extract the imdb id from the link href, _ := imdbLinks.First().Attr("href") - href = strings.TrimSuffix(href, "/") - hrefParts := strings.Split(href, "/") - imdbID := hrefParts[len(hrefParts)-1] - return imdbID, nil + re := regexp.MustCompile(`https?://www.imdb.com/title/(?P[^/]+)/?`) + return re.FindStringSubmatch(href)[re.SubexpIndex("imdb_id")], nil } // parseUpcomingFeed downloads a file from a given url and returns the path to the file