Skip to content

Commit

Permalink
implemented a cronjob to save to the db
Browse files Browse the repository at this point in the history
  • Loading branch information
CommanderStorm committed Sep 7, 2023
1 parent 4ab8058 commit 01d4444
Show file tree
Hide file tree
Showing 9 changed files with 341 additions and 18 deletions.
1 change: 1 addition & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ services:
- APNS_KEY_ID=${APNS_KEY_ID}
- APNS_TEAM_ID=${APNS_TEAM_ID}
- APNS_P8_FILE_PATH=${APNS_P8_FILE_PATH}
- OMDB_API_KEY=${OMDB_API_KEY}
volumes:
- ./apns_auth_key.p8:${APNS_P8_FILE_PATH}
depends_on:
Expand Down
16 changes: 11 additions & 5 deletions server/backend/cron/cronjobs.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,18 @@ type CronService struct {

// names for cron jobs as specified in database
const (
StorageDir = "/Storage/" // target location of files
NewsType = "news"
FileDownloadType = "fileDownload"
DishNameDownload = "dishNameDownload"
AverageRatingComputation = "averageRatingComputation"
CanteenHeadcount = "canteenHeadCount"
StorageDir = "/Storage/" // target location of files
IOSNotifications = "iosNotifications"
IOSActivityReset = "iosActivityReset"
MovieType = "movie"

/* MensaType = "mensa"
ChatType = "chat"
KinoType = "kino"
RoomfinderType = "roomfinder"
TicketSaleType = "ticketsale"
AlarmType = "alarm" */
Expand All @@ -58,7 +58,7 @@ func (c *CronService) Run() error {
var res []model.Crontab

c.db.Model(&model.Crontab{}).
Where("`interval` > 0 AND (lastRun+`interval`) < ? AND type IN (?, ?, ?, ?, ?, ?, ?)",
Where("`interval` > 0 AND (lastRun+`interval`) < ? AND type IN (?, ?, ?, ?, ?, ?, ?, ?)",
time.Now().Unix(),
NewsType,
FileDownloadType,
Expand All @@ -67,6 +67,7 @@ func (c *CronService) Run() error {
CanteenHeadcount,
IOSNotifications,
IOSActivityReset,
MovieType,
).
Scan(&res)

Expand All @@ -80,6 +81,8 @@ func (c *CronService) Run() error {
}
}
}
cronFields := log.Fields{"Cron (id)": cronjob.Cron, "type": cronjob.Type.String, "offset": offset, "LastRun": cronjob.LastRun, "interval": cronjob.Interval, "id (not real id)": cronjob.ID.Int64}
log.WithFields(cronFields).Trace("Running cronjob")

cronjob.LastRun = int32(time.Now().Unix()) + offset
c.db.Save(&cronjob)
Expand All @@ -101,14 +104,17 @@ func (c *CronService) Run() error {
if c.useMensa {
g.Go(c.averageRatingComputation)
}
case MovieType:
// if this is not copied here, this may not be threads save due to go's guarantees
// loop variable cronjob captured by func literal (govet)
copyCronjob := cronjob
g.Go(func() error { return c.movieCron(&copyCronjob) })
/*
TODO: Implement handlers for other cronjobs
case MensaType:
g.Go(func() error { return c.mensaCron() })
case ChatType:
g.Go(func() error { return c.chatCron() })
case KinoType:
g.Go(func() error { return c.kinoCron() })
case RoomfinderType:
g.Go(func() error { return c.roomFinderCron() })
case TicketSaleType:
Expand Down
7 changes: 0 additions & 7 deletions server/backend/cron/kino.go

This file was deleted.

235 changes: 235 additions & 0 deletions server/backend/cron/movies.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
package cron

import (
"database/sql"
"encoding/json"
"encoding/xml"
"errors"
"github.com/PuerkitoBio/goquery"
"github.com/TUM-Dev/Campus-Backend/server/model"
log "github.com/sirupsen/logrus"
"io"
"net/http"
"os"
"strings"
"time"
)

type MovieItems struct {
Title string `xml:"title"`
Link string `xml:"link"`
PubDate string `xml:"pubDate"`
Location string `xml:"location"`
Enclosure struct {
Url string `xml:"url,attr"`
Length string `xml:"length,attr"`
Type string `xml:"type,attr"`
} `xml:"enclosure"`
}

type MovieChannel struct {
Items []MovieItems `xml:"item"`
}

const (
MovieImageDirectory = "movie/"
)

func (c *CronService) movieCron(cronjob *model.Crontab) error {
//Get the news feed we want to get our data from
if !cronjob.ID.Valid {
log.WithField("cron", cronjob.Cron).Error("skipping movie job, id of source is null")
return errors.New("skipping movie job, id of source is null")
}
var newsSource model.NewsSource
if err := c.db.First(&newsSource, cronjob.ID.Int64).Error; err != nil {
log.WithField("cron", cronjob.Cron).Error("error getting news source from database")
return err
}
//Parse the data into a struct
log.WithField("url", newsSource.URL.String).Trace("parsing upcoming feed")
channels, err := parseUpcomingFeed(newsSource.URL.String)
if err != nil {
return err
}
for _, channel := range channels {
for _, item := range channel.Items {
logFields := log.Fields{"link": item.Link, "title": item.Title, "date": item.PubDate, "location": item.Location, "url": item.Enclosure.Url}
var exists bool
if err := c.db.Model(model.Kino{}).Select("count(*) > 0").Where("link = ?", item.Link).Find(&exists).Error; err != nil {
log.WithError(err).WithFields(logFields).Error("Cound lot check if movie already exists")
continue
}
if exists {
log.WithFields(logFields).Trace("Movie already exists")
continue
}

// data cleanup
date, err := time.Parse(time.RFC1123Z, item.PubDate)
if err != nil {
log.WithFields(logFields).WithError(err).Error("error while parsing date")
continue
}

// populate extra data from imdb
imdbID, err := extractTUFilmWebsite(item.Link)
if err != nil {
log.WithFields(logFields).WithError(err).Error("error while finding imdb id")
continue
}
imdbMovie, err := getIMDB(imdbID)
if err != nil {
log.WithFields(logFields).WithError(err).Error("error while getting imdb movie")
continue
}

// add a file to preview (downloaded in another cronjob)
file := model.Files{
Name: item.Title,
Path: MovieImageDirectory,
URL: sql.NullString{String: item.Enclosure.Url, Valid: true},
}
if err := c.db.Create(&file).Error; err != nil {
log.WithFields(logFields).WithError(err).Error("error while creating file")
continue
}

// save the result of the previous steps (🎉)
movie := model.Kino{
Date: date,
Title: item.Title,
Year: imdbMovie.Year,
Runtime: imdbMovie.Runtime,
Genre: imdbMovie.Genre,
Director: imdbMovie.Director,
Actors: imdbMovie.Actors,
ImdbRating: imdbMovie.imdbRating,
Description: imdbMovie.Plot, // we get this from imdb as tu-fim does truncate their plot
FilesID: file.File,
Files: file,
Link: item.Link,
}
if err := c.db.Create(&movie).Error; err != nil {
log.WithFields(logFields).WithError(err).Error("error while creating movie")
continue
} else {
log.WithFields(logFields).Info("created movie")
}
}
}
return nil
}

type imdbResults struct {
Year string
Runtime string
Genre string
Director string
Actors string
Plot string
imdbRating string
}

func getIMDB(id string) (*imdbResults, error) {
url := "https://www.omdbapi.com/?r=json&v=1&i=" + id + "&apikey=" + os.Getenv("OMDB_API_KEY")
resp, err := http.Get(url)
if err != nil {
log.WithField("url", url).WithError(err).Error("Error while getting response for request")
return nil, err
}
// check if the api key is valid
if resp.StatusCode == http.StatusUnauthorized {
return nil, errors.New("missing or invalid api key for omdb (environment variable OMDB_API_KEY)")
}
// other errors
if resp.StatusCode != http.StatusOK {
body, err := io.ReadAll(resp.Body)
if err != nil {
log.WithError(err).Warn("Unable to read http body")
return nil, err
} else {
log.WithField("status", resp.StatusCode).WithField("status", resp.Status).WithField("body", string(body)).Error("error while getting imdb movie")
return nil, errors.New("error while getting imdb movie")
}
}
defer func(Body io.ReadCloser) {
err := Body.Close()
if err != nil {
log.WithField("url", url).WithError(err).Error("Error while closing body")
}
}(resp.Body)
// parse the response body
var res imdbResults
err = json.NewDecoder(resp.Body).Decode(&res)
if err != nil {
log.WithField("url", url).WithError(err).Error("Error while unmarshalling imdbResults")
return nil, err
}
return &res, nil
}

// extractTUFilmWebsite scrapes the imdb id and fullDescription from the tu-film website
// url: url of the tu-film website, e.g. https://www.tu-film.de/programm/view/1204
func extractTUFilmWebsite(url string) (string, error) {
resp, err := http.Get(url)
if err != nil {
return "", errors.New("error while getting response for request")
}
defer func(Body io.ReadCloser) {
err := Body.Close()
if err != nil {
log.WithError(err).Error("Error while closing body")
}
}(resp.Body)
// parse the response body
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
log.WithError(err).Error("Error while parsing document")
return "", err
}

// extract the imdb link
imdbLinks := doc.Find("a").FilterFunction(func(i int, s *goquery.Selection) bool {
href, hrefExists := s.Attr("href")
return hrefExists && strings.Contains(href, "imdb.com/title/")
})
if imdbLinks.Length() == 0 {
return "", errors.New("no imdb link found")
}
if imdbLinks.Length() > 1 {
log.WithField("url", url).Warn("more than one imdb link found. using first one")
}
// extrat the imdb id from the link
href, _ := imdbLinks.First().Attr("href")
href = strings.TrimSuffix(href, "/")
hrefParts := strings.Split(href, "/")
imdbID := hrefParts[len(hrefParts)-1]
return imdbID, nil
}

// parseUpcomingFeed downloads a file from a given url and returns the path to the file
// url: download url of the file, e.g. http://www.tu-film.de/programm/index/upcoming.rss
func parseUpcomingFeed(url string) ([]MovieChannel, error) {
resp, err := http.Get(url)
if err != nil {
log.WithField("url", url).WithError(err).Error("Error while getting response for request")
return nil, err
}
defer func(Body io.ReadCloser) {
err := Body.Close()
if err != nil {
log.WithError(err).Error("Error while closing body")
}
}(resp.Body)
//Parse the data into a struct
var upcomingMovies struct {
Channels []MovieChannel `xml:"channel"`
}
err = xml.NewDecoder(resp.Body).Decode(&upcomingMovies)
if err != nil {
log.WithError(err).Error("Error while unmarshalling UpcomingFeed")
return nil, err
}
return upcomingMovies.Channels, nil
}
10 changes: 5 additions & 5 deletions server/backend/cron/news.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ import (
)

const (
ImageDirectory = "news/newspread/"
NewspreadHook = "newspread"
ImpulsivHook = "impulsivHook"
NewsImageDirectory = "news/newspread/"
NewspreadHook = "newspread"
ImpulsivHook = "impulsivHook"
//MAX_IMAGE_RETRYS = 3
)

Expand Down Expand Up @@ -137,7 +137,7 @@ func (c *CronService) parseNewsFeed(source model.NewsSource) error {
return nil
}

// saveImage Saves an image to the database so it can be downloaded by another cronjob and returns it's id
// saveImage Saves an image to the database, so it can be downloaded by another cronjob and returns its id
func (c *CronService) saveImage(url string) (null.Int, error) {
targetFileName := fmt.Sprintf("%x.jpg", md5.Sum([]byte(url)))
var fileId null.Int
Expand All @@ -154,7 +154,7 @@ func (c *CronService) saveImage(url string) (null.Int, error) {
// otherwise store in database:
file := model.Files{
Name: targetFileName,
Path: ImageDirectory,
Path: NewsImageDirectory,
URL: sql.NullString{String: url, Valid: true},
Downloaded: sql.NullBool{Bool: false, Valid: true},
}
Expand Down
Loading

0 comments on commit 01d4444

Please sign in to comment.