From 3bc09ef77fe33145393491255d904af2bed58aed Mon Sep 17 00:00:00 2001 From: Frank Elsinga Date: Tue, 5 Sep 2023 14:18:07 +0200 Subject: [PATCH] implemented a cronjob to save to the db --- docker-compose.yaml | 1 + server/backend/cron/cronjobs.go | 11 +- server/backend/cron/kino.go | 7 - server/backend/cron/movies.go | 219 +++++++++++++++++++ server/backend/cron/news.go | 10 +- server/backend/migration/2023090410000000.go | 57 +++++ server/backend/migration/migration.go | 1 + server/model/crontab.go | 2 +- server/model/kino.go | 30 +++ 9 files changed, 321 insertions(+), 17 deletions(-) delete mode 100644 server/backend/cron/kino.go create mode 100644 server/backend/cron/movies.go create mode 100644 server/backend/migration/2023090410000000.go create mode 100644 server/model/kino.go diff --git a/docker-compose.yaml b/docker-compose.yaml index 72b4bb46..2de13e99 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -13,6 +13,7 @@ services: - APNS_KEY_ID=${APNS_KEY_ID} - APNS_TEAM_ID=${APNS_TEAM_ID} - APNS_P8_FILE_PATH=${APNS_P8_FILE_PATH} + - OMDB_API_KEY=${OMDB_API_KEY} volumes: - ./apns_auth_key.p8:${APNS_P8_FILE_PATH} depends_on: diff --git a/server/backend/cron/cronjobs.go b/server/backend/cron/cronjobs.go index 8bf15fad..f47b99f2 100644 --- a/server/backend/cron/cronjobs.go +++ b/server/backend/cron/cronjobs.go @@ -20,18 +20,18 @@ type CronService struct { // names for cron jobs as specified in database const ( + StorageDir = "/Storage/" // target location of files NewsType = "news" FileDownloadType = "fileDownload" DishNameDownload = "dishNameDownload" AverageRatingComputation = "averageRatingComputation" CanteenHeadcount = "canteenHeadCount" - StorageDir = "/Storage/" // target location of files IOSNotifications = "iosNotifications" IOSActivityReset = "iosActivityReset" + KovieType = "movie" /* MensaType = "mensa" ChatType = "chat" - KinoType = "kino" RoomfinderType = "roomfinder" TicketSaleType = "ticketsale" AlarmType = "alarm" */ @@ -101,14 +101,17 @@ func (c *CronService) Run() error { if c.useMensa { g.Go(c.averageRatingComputation) } + case KovieType: + // if this is not copied here, this may not be threads save due to go's guarantees + // loop variable cronjob captured by func literal (govet) + copyCronjob := cronjob + g.Go(func() error { return c.movieCron(©Cronjob) }) /* TODO: Implement handlers for other cronjobs case MensaType: g.Go(func() error { return c.mensaCron() }) case ChatType: g.Go(func() error { return c.chatCron() }) - case KinoType: - g.Go(func() error { return c.kinoCron() }) case RoomfinderType: g.Go(func() error { return c.roomFinderCron() }) case TicketSaleType: diff --git a/server/backend/cron/kino.go b/server/backend/cron/kino.go deleted file mode 100644 index 0ad8106a..00000000 --- a/server/backend/cron/kino.go +++ /dev/null @@ -1,7 +0,0 @@ -package cron - -//lint:ignore U1000 stub -func (c *CronService) kinoCron() error { - // TODO: implement - return nil -} diff --git a/server/backend/cron/movies.go b/server/backend/cron/movies.go new file mode 100644 index 00000000..cbac4ad9 --- /dev/null +++ b/server/backend/cron/movies.go @@ -0,0 +1,219 @@ +package cron + +import ( + "database/sql" + "encoding/json" + "encoding/xml" + "errors" + "github.com/PuerkitoBio/goquery" + "github.com/TUM-Dev/Campus-Backend/server/model" + log "github.com/sirupsen/logrus" + "io" + "net/http" + "os" + "strings" + "time" +) + +type MovieItems struct { + Title string `xml:"title"` + Link string `xml:"link"` + PubDate string `xml:"pubDate"` + Location string `xml:"location"` + Enclosure struct { + Url string `xml:"url,attr"` + Length string `xml:"length,attr"` + Type string `xml:"type,attr"` + } `xml:"enclosure"` +} + +type MovieChannel struct { + Items []MovieItems `xml:"item"` +} + +const ( + MovieImageDirectory = "movie/" +) + +func (c *CronService) movieCron(cronjob *model.Crontab) error { + //Get the news feed we want to get our data from + if !cronjob.ID.Valid { + log.WithField("cron", cronjob.Cron).Error("skipping movie job, id of source is null") + return errors.New("skipping movie job, id of source is null") + } + var newsSource model.NewsSource + if err := c.db.Model(&model.NewsSource{}).First(newsSource, "source = ?", cronjob.ID.Int64).Error; err != nil { + log.WithField("cron", cronjob.Cron).Error("error getting news source from database") + return err + } + //Parse the data into a struct + channels, err := parseUpcomingFeed(newsSource.URL.String) + if err != nil { + return err + } + for _, channel := range channels { + for _, item := range channel.Items { + logFields := log.Fields{"link": item.Link, "title": item.Title, "date": item.PubDate, "location": item.Location, "url": item.Enclosure.Url} + var exists bool + if err := c.db.Model(model.Kino{}).Select("count(*) > 0").Where("link = ?", item.Link).Find(&exists).Error; err != nil { + log.WithError(err).WithFields(logFields).Error("Cound lot check if movie already exists") + continue + } + if exists { + log.WithFields(logFields).Trace("Movie already exists") + continue + } + + // data cleanup + date, err := time.Parse(time.RFC1123Z, item.PubDate) + if err != nil { + log.WithFields(logFields).WithError(err).Error("error while parsing date") + continue + } + + // populate extra data from imdb + imdbID, err := extractTUFilmWebsite(item.Link) + if err != nil { + log.WithFields(logFields).WithError(err).Error("error while finding imdb id") + continue + } + imdbMovie, err := getIMDB(imdbID) + if err != nil { + log.WithFields(logFields).WithError(err).Error("error while getting imdb movie") + continue + } + + // add a file to preview (downloaded in another cronjob) + file := model.Files{ + Name: item.Title, + Path: MovieImageDirectory, + URL: sql.NullString{String: item.Enclosure.Url, Valid: true}, + } + if err := c.db.Create(&file).Error; err != nil { + log.WithFields(logFields).WithError(err).Error("error while creating file") + continue + } + + // save the result of the previous steps (🎉) + movie := model.Kino{ + Date: date, + Title: item.Title, + Year: imdbMovie.Year, + Runtime: imdbMovie.Runtime, + Genre: imdbMovie.Genre, + Director: imdbMovie.Director, + Actors: imdbMovie.Actors, + ImdbRating: imdbMovie.imdbRating, + Description: imdbMovie.Plot, // we get this from imdb as tu-fim does truncate their plot + FilesID: file.File, + Files: file, + Link: item.Link, + } + if err := c.db.Create(&movie).Error; err != nil { + log.WithFields(logFields).WithError(err).Error("error while creating movie") + continue + } else { + log.WithFields(logFields).Info("created movie") + } + } + } + return nil +} + +type imdbResults struct { + Year string + Runtime string + Genre string + Director string + Actors string + Plot string + imdbRating string +} + +func getIMDB(id string) (*imdbResults, error) { + url := "https://www.omdbapi.com/?r=json&v=1&i=" + id + "&apikey=" + os.Getenv("OMDB_API_KEY") + resp, err := http.Get(url) + if err != nil { + log.WithField("url", url).WithError(err).Error("Error while getting response for request") + return nil, err + } + defer func(Body io.ReadCloser) { + err := Body.Close() + if err != nil { + log.WithField("url", url).WithError(err).Error("Error while closing body") + } + }(resp.Body) + // parse the response body + var res imdbResults + err = json.NewDecoder(resp.Body).Decode(&res) + if err != nil { + log.WithField("url", url).WithError(err).Error("Error while unmarshalling imdbResults") + return nil, err + } + return &res, nil +} + +// extractTUFilmWebsite scrapes the imdb id and fullDescription from the tu-film website +// url: url of the tu-film website, e.g. https://www.tu-film.de/programm/view/1204 +func extractTUFilmWebsite(url string) (string, error) { + resp, err := http.Get(url) + if err != nil { + return "", errors.New("error while getting response for request") + } + defer func(Body io.ReadCloser) { + err := Body.Close() + if err != nil { + log.WithError(err).Error("Error while closing body") + } + }(resp.Body) + // parse the response body + doc, err := goquery.NewDocumentFromReader(resp.Body) + if err != nil { + log.WithError(err).Error("Error while parsing document") + return "", err + } + + // extract the imdb link + imdbLinks := doc.Find("a").FilterFunction(func(i int, s *goquery.Selection) bool { + href, hrefExists := s.Attr("href") + return hrefExists && strings.Contains(href, "imdb.com/title/") + }) + if imdbLinks.Length() == 0 { + return "", errors.New("no imdb link found") + } + if imdbLinks.Length() > 1 { + log.WithField("url", url).Warn("more than one imdb link found. using first one") + } + // extrat the imdb id from the link + href, _ := imdbLinks.First().Attr("href") + href = strings.TrimSuffix(href, "/") + hrefParts := strings.Split(href, "/") + imdbID := hrefParts[len(hrefParts)-1] + return imdbID, nil +} + +// parseUpcomingFeed downloads a file from a given url and returns the path to the file +// url: download url of the file, e.g. http://www.tu-film.de/programm/index/upcoming.rss +func parseUpcomingFeed(url string) ([]MovieChannel, error) { + resp, err := http.Get(url) + if err != nil { + log.WithField("url", url).WithError(err).Error("Error while getting response for request") + return nil, err + } + defer func(Body io.ReadCloser) { + err := Body.Close() + if err != nil { + log.WithError(err).Error("Error while closing body") + } + }(resp.Body) + //Parse the data into a struct + var upcomingMovies struct { + Channels []MovieChannel `xml:"channel"` + } + err = xml.NewDecoder(resp.Body).Decode(&upcomingMovies) + if err != nil { + log.WithError(err).Error("Error while unmarshalling UpcomingFeed") + return nil, err + } + return upcomingMovies.Channels, nil +} diff --git a/server/backend/cron/news.go b/server/backend/cron/news.go index 4a7f4723..a493a0ae 100644 --- a/server/backend/cron/news.go +++ b/server/backend/cron/news.go @@ -18,9 +18,9 @@ import ( ) const ( - ImageDirectory = "news/newspread/" - NewspreadHook = "newspread" - ImpulsivHook = "impulsivHook" + NewsImageDirectory = "news/newspread/" + NewspreadHook = "newspread" + ImpulsivHook = "impulsivHook" //MAX_IMAGE_RETRYS = 3 ) @@ -137,7 +137,7 @@ func (c *CronService) parseNewsFeed(source model.NewsSource) error { return nil } -// saveImage Saves an image to the database so it can be downloaded by another cronjob and returns it's id +// saveImage Saves an image to the database, so it can be downloaded by another cronjob and returns its id func (c *CronService) saveImage(url string) (null.Int, error) { targetFileName := fmt.Sprintf("%x.jpg", md5.Sum([]byte(url))) var fileId null.Int @@ -154,7 +154,7 @@ func (c *CronService) saveImage(url string) (null.Int, error) { // otherwise store in database: file := model.Files{ Name: targetFileName, - Path: ImageDirectory, + Path: NewsImageDirectory, URL: sql.NullString{String: url, Valid: true}, Downloaded: sql.NullBool{Bool: false, Valid: true}, } diff --git a/server/backend/migration/2023090410000000.go b/server/backend/migration/2023090410000000.go new file mode 100644 index 00000000..bdc2661b --- /dev/null +++ b/server/backend/migration/2023090410000000.go @@ -0,0 +1,57 @@ +package migration + +import ( + "database/sql" + "github.com/TUM-Dev/Campus-Backend/server/model" + "github.com/go-gormigrate/gormigrate/v2" + "github.com/guregu/null" + "gorm.io/gorm" +) + +// migrate2023090410000000 +// migrates the crontap from kino to movie crontab +func (m TumDBMigrator) migrate2023090410000000() *gormigrate.Migration { + return &gormigrate.Migration{ + ID: "2023090410000000", + Migrate: func(tx *gorm.DB) error { + // modify the crontab + if err := SafeEnumMigrate(tx, &model.Crontab{}, "type", "movie"); err != nil { + return err + } + if err := tx.Create(&model.Crontab{ + Interval: 60 * 5, // Every 5 minutes + Type: null.String{NullString: sql.NullString{String: "canteenHeadCount", Valid: true}}, + ID: null.Int{NullInt64: sql.NullInt64{Int64: 2, Valid: true}}, + }).Error; err != nil { + return err + } + if err := tx.Delete(&model.Crontab{}, "type = ?", "kino").Error; err != nil { + return err + } + if err := SafeEnumRollback(tx, &model.Crontab{}, "type", "kino"); err != nil { + return err + } + return nil + }, + + Rollback: func(tx *gorm.DB) error { + // modify the crontab + if err := SafeEnumMigrate(tx, &model.Crontab{}, "type", "movie"); err != nil { + return err + } + if err := tx.Create(&model.Crontab{ + Interval: 24 * 60 * 60, // daily + Type: null.String{NullString: sql.NullString{String: "canteenHeadCount", Valid: true}}, + }).Error; err != nil { + return err + } + if err := tx.Delete(&model.Crontab{}, "type = ?", "kino").Error; err != nil { + return err + } + if err := SafeEnumRollback(tx, &model.Crontab{}, "type", "kino"); err != nil { + return err + } + return nil + }, + } +} diff --git a/server/backend/migration/migration.go b/server/backend/migration/migration.go index 5302bf11..eae29baf 100644 --- a/server/backend/migration/migration.go +++ b/server/backend/migration/migration.go @@ -41,6 +41,7 @@ func (m TumDBMigrator) Migrate() error { m.migrate20220713000000(), m.migrate20221119131300(), m.migrate20221210000000(), + m.migrate2023090410000000(), }) err := mig.Migrate() return err diff --git a/server/model/crontab.go b/server/model/crontab.go index 54f175ea..8c4bfa98 100644 --- a/server/model/crontab.go +++ b/server/model/crontab.go @@ -18,7 +18,7 @@ type Crontab struct { //[ 2] lastRun int null: false primary: false isArray: false auto: false col: int len: -1 default: [0] LastRun int32 `gorm:"column:lastRun;type:int;default:0;" json:"last_run"` //[ 3] type char(10) null: true primary: false isArray: false auto: false col: char len: 10 default: [] - Type null.String `gorm:"column:type;type:enum ('news', 'mensa', 'chat', 'kino', 'roomfinder', 'ticketsale', 'alarm', 'fileDownload','dishNameDownload','averageRatingComputation', 'iosNotifications', 'iosActivityReset', 'canteenHeadCount');" json:"type"` + Type null.String `gorm:"column:type;type:enum ('news', 'mensa', 'chat', 'movie', 'roomfinder', 'ticketsale', 'alarm', 'fileDownload','dishNameDownload','averageRatingComputation', 'iosNotifications', 'iosActivityReset', 'canteenHeadCount');" json:"type"` //[ 4] id int null: true primary: false isArray: false auto: false col: int len: -1 default: [] ID null.Int `gorm:"column:id;type:int;" json:"id"` } diff --git a/server/model/kino.go b/server/model/kino.go new file mode 100644 index 00000000..6f7a7026 --- /dev/null +++ b/server/model/kino.go @@ -0,0 +1,30 @@ +package model + +import ( + "database/sql" + "time" +) + +// Kino stores all movies +type Kino struct { + Id int32 `gorm:"primary_key;AUTO_INCREMENT;column:kino;type:int;not null;"` + Date time.Time `gorm:"column:date;type:datetime;not null;"` + Created time.Time `gorm:"column:created;type:timestamp;not null;default:CURRENT_TIMESTAMP"` + Title string `gorm:"column:title;type:text;not null;"` + Year string `gorm:"column:year;type:varchar(4);not null;"` + Runtime string `gorm:"column:runtime;type:varchar(40);not null;"` + Genre string `gorm:"column:genre;type:varchar(100);not null;"` + Director string `gorm:"column:director;type:text;not null;"` + Actors string `gorm:"column:actors;type:text;not null;"` + ImdbRating string `gorm:"column:rating;type:varchar(4);not null;"` + Description string `gorm:"column:description;type:text;not null;"` + Trailer sql.NullString `gorm:"column:trailer"` + FilesID int32 `gorm:"column:cover"` + Files Files `gorm:"foreignKey:FilesID;references:file"` + Link string `gorm:"column:link;type:varchar(190);not null;unique;"` +} + +// TableName sets the insert table name for this struct type +func (n *Kino) TableName() string { + return "kino" +}