implemented a cronjob to save to the db

TUM-Dev · Sep 7, 2023 · 01d4444 · 01d4444
1 parent 4ab8058
commit 01d4444
Show file tree

Hide file tree

Showing 9 changed files with 341 additions and 18 deletions.
diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -13,6 +13,7 @@ services:
       - APNS_KEY_ID=${APNS_KEY_ID}
       - APNS_TEAM_ID=${APNS_TEAM_ID}
       - APNS_P8_FILE_PATH=${APNS_P8_FILE_PATH}
+      - OMDB_API_KEY=${OMDB_API_KEY}
     volumes:
       - ./apns_auth_key.p8:${APNS_P8_FILE_PATH}
     depends_on:

diff --git a/server/backend/cron/cronjobs.go b/server/backend/cron/cronjobs.go
@@ -20,18 +20,18 @@ type CronService struct {
 
 // names for cron jobs as specified in database
 const (
+	StorageDir               = "/Storage/" // target location of files
 	NewsType                 = "news"
 	FileDownloadType         = "fileDownload"
 	DishNameDownload         = "dishNameDownload"
 	AverageRatingComputation = "averageRatingComputation"
 	CanteenHeadcount         = "canteenHeadCount"
-	StorageDir               = "/Storage/" // target location of files
 	IOSNotifications         = "iosNotifications"
 	IOSActivityReset         = "iosActivityReset"
+	MovieType                = "movie"
 
 	/* MensaType      = "mensa"
 	ChatType       = "chat"
-	KinoType       = "kino"
 	RoomfinderType = "roomfinder"
 	TicketSaleType = "ticketsale"
 	AlarmType      = "alarm" */
@@ -58,7 +58,7 @@ func (c *CronService) Run() error {
 		var res []model.Crontab
 
 		c.db.Model(&model.Crontab{}).
-			Where("`interval` > 0 AND (lastRun+`interval`) < ? AND type IN (?, ?, ?, ?, ?, ?, ?)",
+			Where("`interval` > 0 AND (lastRun+`interval`) < ? AND type IN (?, ?, ?, ?, ?, ?, ?, ?)",
 				time.Now().Unix(),
 				NewsType,
 				FileDownloadType,
@@ -67,6 +67,7 @@ func (c *CronService) Run() error {
 				CanteenHeadcount,
 				IOSNotifications,
 				IOSActivityReset,
+				MovieType,
 			).
 			Scan(&res)
 
@@ -80,6 +81,8 @@ func (c *CronService) Run() error {
 					}
 				}
 			}
+			cronFields := log.Fields{"Cron (id)": cronjob.Cron, "type": cronjob.Type.String, "offset": offset, "LastRun": cronjob.LastRun, "interval": cronjob.Interval, "id (not real id)": cronjob.ID.Int64}
+			log.WithFields(cronFields).Trace("Running cronjob")
 
 			cronjob.LastRun = int32(time.Now().Unix()) + offset
 			c.db.Save(&cronjob)
@@ -101,14 +104,17 @@ func (c *CronService) Run() error {
 				if c.useMensa {
 					g.Go(c.averageRatingComputation)
 				}
+			case MovieType:
+				// if this is not copied here, this may not be threads save due to go's guarantees
+				// loop variable cronjob captured by func literal (govet)
+				copyCronjob := cronjob
+				g.Go(func() error { return c.movieCron(&copyCronjob) })
 				/*
 					TODO: Implement handlers for other cronjobs
 					case MensaType:
 						g.Go(func() error { return c.mensaCron() })
 					case ChatType:
 						g.Go(func() error { return c.chatCron() })
-					case KinoType:
-						g.Go(func() error { return c.kinoCron() })
 					case RoomfinderType:
 						g.Go(func() error { return c.roomFinderCron() })
 					case TicketSaleType:

diff --git a/server/backend/cron/kino.go b/server/backend/cron/kino.go
diff --git a/server/backend/cron/movies.go b/server/backend/cron/movies.go
@@ -0,0 +1,235 @@
+package cron
+
+import (
+	"database/sql"
+	"encoding/json"
+	"encoding/xml"
+	"errors"
+	"github.com/PuerkitoBio/goquery"
+	"github.com/TUM-Dev/Campus-Backend/server/model"
+	log "github.com/sirupsen/logrus"
+	"io"
+	"net/http"
+	"os"
+	"strings"
+	"time"
+)
+
+type MovieItems struct {
+	Title     string `xml:"title"`
+	Link      string `xml:"link"`
+	PubDate   string `xml:"pubDate"`
+	Location  string `xml:"location"`
+	Enclosure struct {
+		Url    string `xml:"url,attr"`
+		Length string `xml:"length,attr"`
+		Type   string `xml:"type,attr"`
+	} `xml:"enclosure"`
+}
+
+type MovieChannel struct {
+	Items []MovieItems `xml:"item"`
+}
+
+const (
+	MovieImageDirectory = "movie/"
+)
+
+func (c *CronService) movieCron(cronjob *model.Crontab) error {
+	//Get the news feed we want to get our data from
+	if !cronjob.ID.Valid {
+		log.WithField("cron", cronjob.Cron).Error("skipping movie job, id of source is null")
+		return errors.New("skipping movie job, id of source is null")
+	}
+	var newsSource model.NewsSource
+	if err := c.db.First(&newsSource, cronjob.ID.Int64).Error; err != nil {
+		log.WithField("cron", cronjob.Cron).Error("error getting news source from database")
+		return err
+	}
+	//Parse the data into a struct
+	log.WithField("url", newsSource.URL.String).Trace("parsing upcoming feed")
+	channels, err := parseUpcomingFeed(newsSource.URL.String)
+	if err != nil {
+		return err
+	}
+	for _, channel := range channels {
+		for _, item := range channel.Items {
+			logFields := log.Fields{"link": item.Link, "title": item.Title, "date": item.PubDate, "location": item.Location, "url": item.Enclosure.Url}
+			var exists bool
+			if err := c.db.Model(model.Kino{}).Select("count(*) > 0").Where("link = ?", item.Link).Find(&exists).Error; err != nil {
+				log.WithError(err).WithFields(logFields).Error("Cound lot check if movie already exists")
+				continue
+			}
+			if exists {
+				log.WithFields(logFields).Trace("Movie already exists")
+				continue
+			}
+
+			// data cleanup
+			date, err := time.Parse(time.RFC1123Z, item.PubDate)
+			if err != nil {
+				log.WithFields(logFields).WithError(err).Error("error while parsing date")
+				continue
+			}
+
+			// populate extra data from imdb
+			imdbID, err := extractTUFilmWebsite(item.Link)
+			if err != nil {
+				log.WithFields(logFields).WithError(err).Error("error while finding imdb id")
+				continue
+			}
+			imdbMovie, err := getIMDB(imdbID)
+			if err != nil {
+				log.WithFields(logFields).WithError(err).Error("error while getting imdb movie")
+				continue
+			}
+
+			// add a file to preview (downloaded in another cronjob)
+			file := model.Files{
+				Name: item.Title,
+				Path: MovieImageDirectory,
+				URL:  sql.NullString{String: item.Enclosure.Url, Valid: true},
+			}
+			if err := c.db.Create(&file).Error; err != nil {
+				log.WithFields(logFields).WithError(err).Error("error while creating file")
+				continue
+			}
+
+			// save the result of the previous steps (🎉)
+			movie := model.Kino{
+				Date:        date,
+				Title:       item.Title,
+				Year:        imdbMovie.Year,
+				Runtime:     imdbMovie.Runtime,
+				Genre:       imdbMovie.Genre,
+				Director:    imdbMovie.Director,
+				Actors:      imdbMovie.Actors,
+				ImdbRating:  imdbMovie.imdbRating,
+				Description: imdbMovie.Plot, // we get this from imdb as tu-fim does truncate their plot
+				FilesID:     file.File,
+				Files:       file,
+				Link:        item.Link,
+			}
+			if err := c.db.Create(&movie).Error; err != nil {
+				log.WithFields(logFields).WithError(err).Error("error while creating movie")
+				continue
+			} else {
+				log.WithFields(logFields).Info("created movie")
+			}
+		}
+	}
+	return nil
+}
+
+type imdbResults struct {
+	Year       string
+	Runtime    string
+	Genre      string
+	Director   string
+	Actors     string
+	Plot       string
+	imdbRating string
+}
+
+func getIMDB(id string) (*imdbResults, error) {
+	url := "https://www.omdbapi.com/?r=json&v=1&i=" + id + "&apikey=" + os.Getenv("OMDB_API_KEY")
+	resp, err := http.Get(url)
+	if err != nil {
+		log.WithField("url", url).WithError(err).Error("Error while getting response for request")
+		return nil, err
+	}
+	// check if the api key is valid
+	if resp.StatusCode == http.StatusUnauthorized {
+		return nil, errors.New("missing or invalid api key for omdb (environment variable OMDB_API_KEY)")
+	}
+	// other errors
+	if resp.StatusCode != http.StatusOK {
+		body, err := io.ReadAll(resp.Body)
+		if err != nil {
+			log.WithError(err).Warn("Unable to read http body")
+			return nil, err
+		} else {
+			log.WithField("status", resp.StatusCode).WithField("status", resp.Status).WithField("body", string(body)).Error("error while getting imdb movie")
+			return nil, errors.New("error while getting imdb movie")
+		}
+	}
+	defer func(Body io.ReadCloser) {
+		err := Body.Close()
+		if err != nil {
+			log.WithField("url", url).WithError(err).Error("Error while closing body")
+		}
+	}(resp.Body)
+	// parse the response body
+	var res imdbResults
+	err = json.NewDecoder(resp.Body).Decode(&res)
+	if err != nil {
+		log.WithField("url", url).WithError(err).Error("Error while unmarshalling imdbResults")
+		return nil, err
+	}
+	return &res, nil
+}
+
+// extractTUFilmWebsite scrapes the imdb id and fullDescription from the tu-film website
+// url: url of the tu-film website, e.g. https://www.tu-film.de/programm/view/1204
+func extractTUFilmWebsite(url string) (string, error) {
+	resp, err := http.Get(url)
+	if err != nil {
+		return "", errors.New("error while getting response for request")
+	}
+	defer func(Body io.ReadCloser) {
+		err := Body.Close()
+		if err != nil {
+			log.WithError(err).Error("Error while closing body")
+		}
+	}(resp.Body)
+	// parse the response body
+	doc, err := goquery.NewDocumentFromReader(resp.Body)
+	if err != nil {
+		log.WithError(err).Error("Error while parsing document")
+		return "", err
+	}
+
+	// extract the imdb link
+	imdbLinks := doc.Find("a").FilterFunction(func(i int, s *goquery.Selection) bool {
+		href, hrefExists := s.Attr("href")
+		return hrefExists && strings.Contains(href, "imdb.com/title/")
+	})
+	if imdbLinks.Length() == 0 {
+		return "", errors.New("no imdb link found")
+	}
+	if imdbLinks.Length() > 1 {
+		log.WithField("url", url).Warn("more than one imdb link found. using first one")
+	}
+	// extrat the imdb id from the link
+	href, _ := imdbLinks.First().Attr("href")
+	href = strings.TrimSuffix(href, "/")
+	hrefParts := strings.Split(href, "/")
+	imdbID := hrefParts[len(hrefParts)-1]
+	return imdbID, nil
+}
+
+// parseUpcomingFeed downloads a file from a given url and returns the path to the file
+// url: download url of the file, e.g. http://www.tu-film.de/programm/index/upcoming.rss
+func parseUpcomingFeed(url string) ([]MovieChannel, error) {
+	resp, err := http.Get(url)
+	if err != nil {
+		log.WithField("url", url).WithError(err).Error("Error while getting response for request")
+		return nil, err
+	}
+	defer func(Body io.ReadCloser) {
+		err := Body.Close()
+		if err != nil {
+			log.WithError(err).Error("Error while closing body")
+		}
+	}(resp.Body)
+	//Parse the data into a struct
+	var upcomingMovies struct {
+		Channels []MovieChannel `xml:"channel"`
+	}
+	err = xml.NewDecoder(resp.Body).Decode(&upcomingMovies)
+	if err != nil {
+		log.WithError(err).Error("Error while unmarshalling UpcomingFeed")
+		return nil, err
+	}
+	return upcomingMovies.Channels, nil
+}
diff --git a/server/backend/cron/news.go b/server/backend/cron/news.go
@@ -18,9 +18,9 @@ import (
 )
 
 const (
-	ImageDirectory = "news/newspread/"
-	NewspreadHook  = "newspread"
-	ImpulsivHook   = "impulsivHook"
+	NewsImageDirectory = "news/newspread/"
+	NewspreadHook      = "newspread"
+	ImpulsivHook       = "impulsivHook"
 	//MAX_IMAGE_RETRYS = 3
 )
 
@@ -137,7 +137,7 @@ func (c *CronService) parseNewsFeed(source model.NewsSource) error {
 	return nil
 }
 
-// saveImage Saves an image to the database so it can be downloaded by another cronjob and returns it's id
+// saveImage Saves an image to the database, so it can be downloaded by another cronjob and returns its id
 func (c *CronService) saveImage(url string) (null.Int, error) {
 	targetFileName := fmt.Sprintf("%x.jpg", md5.Sum([]byte(url)))
 	var fileId null.Int
@@ -154,7 +154,7 @@ func (c *CronService) saveImage(url string) (null.Int, error) {
 	// otherwise store in database:
 	file := model.Files{
 		Name:       targetFileName,
-		Path:       ImageDirectory,
+		Path:       NewsImageDirectory,
 		URL:        sql.NullString{String: url, Valid: true},
 		Downloaded: sql.NullBool{Bool: false, Valid: true},
 	}