Skip to content

Commit

Permalink
bug:file download (#237)
Browse files Browse the repository at this point in the history
* made the logging path of fileDownload.go more straight-forward

* refactored the file downloader into separate testable functions

* added a testcase for `maybeResizeImage`

* made shure that the file-downloading runs in a transaction

* added a testcase for `ensureFileDoesNotExist`

* fixed a typo

* rebase
  • Loading branch information
CommanderStorm authored Sep 21, 2023
1 parent 301a0c7 commit 892c29b
Show file tree
Hide file tree
Showing 5 changed files with 183 additions and 57 deletions.
2 changes: 1 addition & 1 deletion client/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module github.com/TUM-Dev/Campus-Backend/client
go 1.21

require (
github.com/TUM-Dev/Campus-Backend/server v0.0.0-20230920223017-73df1e4061c8
github.com/TUM-Dev/Campus-Backend/server v0.0.0-20230921201035-301a0c7bab98
github.com/sirupsen/logrus v1.9.3
google.golang.org/grpc v1.58.1
)
Expand Down
4 changes: 2 additions & 2 deletions client/go.sum
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
github.com/TUM-Dev/Campus-Backend/server v0.0.0-20230920223017-73df1e4061c8 h1:FYhgX9DA9JvEyP06VX/Ot/8Oav9fS8hwHhGOyLfvX6E=
github.com/TUM-Dev/Campus-Backend/server v0.0.0-20230920223017-73df1e4061c8/go.mod h1:fjoLL3rbdY6wTRJIksekT2p3OUp5ocFfXjB/avV/TVI=
github.com/TUM-Dev/Campus-Backend/server v0.0.0-20230921201035-301a0c7bab98 h1:cifzCP3eUZgN+iqfhfGPTL1JFz4g/qjBNpwhcjlKOWk=
github.com/TUM-Dev/Campus-Backend/server v0.0.0-20230921201035-301a0c7bab98/go.mod h1:fjoLL3rbdY6wTRJIksekT2p3OUp5ocFfXjB/avV/TVI=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
Expand Down
140 changes: 87 additions & 53 deletions server/backend/cron/fileDownload.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
package cron

import (
"bytes"
"fmt"
"image"
"errors"
"io"
"net/http"
"os"
"path"
"strings"

"github.com/TUM-Dev/Campus-Backend/server/model"
Expand All @@ -16,69 +15,104 @@ import (
"gorm.io/gorm"
)

// fileDownloadCron Downloads all files that are not marked as finished in the database.
// fileDownloadCron downloads all files that are not marked as finished in the database
func (c *CronService) fileDownloadCron() error {
var files []model.Files
err := c.db.Find(&files, "downloaded = 0").Scan(&files).Error
if err != nil && err != gorm.ErrRecordNotFound {
return err
}
for i := range files {
if files[i].URL.Valid {
c.downloadFile(files[i])
return c.db.Transaction(func(tx *gorm.DB) error {
var files []model.Files
err := tx.Find(&files, "downloaded = 0 AND url IS NOT NULL").Error
if err != nil && !errors.Is(err, gorm.ErrRecordNotFound) {
log.WithError(err).Error("Could not get files from database")
return err
}
}
return nil
for _, file := range files {
// in our case resolves to /Storage/news/newspread/1234abc.jpg
dstPath := path.Join(StorageDir, file.Path, file.Name)
fields := log.Fields{"url": file.URL.String, "dstPath": dstPath}
log.WithFields(fields).Info("downloading file")

if err = tx.Model(&model.Files{File: file.File}).Update("downloads", file.Downloads+1).Error; err != nil {
log.WithError(err).WithFields(fields).Error("Could not set update the download-count")
continue
}

if err := ensureFileDoesNotExist(dstPath); err != nil {
log.WithError(err).WithFields(fields).Warn("Could not ensure file does not exist")
continue
}
if err := downloadFile(file.URL.String, dstPath); err != nil {
log.WithError(err).WithFields(fields).Warn("Could not download file")
continue
}
if err := maybeResizeImage(dstPath); err != nil {
log.WithError(err).WithFields(fields).Warn("Could not resize image")
continue
}
// everything went well => we can mark the file as downloaded
if err = tx.Model(&model.Files{URL: file.URL}).Update("downloaded", true).Error; err != nil {
log.WithError(err).WithFields(fields).Error("Could not set image to downloaded.")
continue
}
}
return nil
})
}

// downloadFile Downloads a file, marks it downloaded and resizes it if it's an image.
// url: download url of the file
// name: target name of the file
func (c *CronService) downloadFile(file model.Files) {
if !file.URL.Valid {
log.WithField("fileId", file.File).Info("skipping file without url")
// ensureFileDoesNotExist makes sure that the file does not exist, but the directory in which it should be does
func ensureFileDoesNotExist(dstPath string) error {
if _, err := os.Stat(dstPath); err == nil {
// file already exists
return os.Remove(dstPath)
}
url := file.URL.String
log.WithField("url", url).Info("downloading file")
resp, err := http.Get(url)
return os.MkdirAll(path.Dir(dstPath), 0755)
}

// maybeResizeImage resizes the image if it's an image to 1280px width keeping the aspect ratio
func maybeResizeImage(dstPath string) error {
mime, err := mimetype.DetectFile(dstPath)
if err != nil {
log.WithError(err).WithField("url", url).Warn("Could not download image")
return
return err
}
if !strings.HasPrefix(mime.String(), "image/") {
return nil
}
// read body here because we can't exhaust the io.reader twice
body, err := io.ReadAll(resp.Body)

img, err := imaging.Open(dstPath)
if err != nil {
log.WithError(err).Warn("Unable to read http body")
return
return err
}
resizedImage := imaging.Resize(img, 1280, 0, imaging.Lanczos)
return imaging.Save(resizedImage, dstPath, imaging.JPEGQuality(75))
}

// resize if file is image
mime := mimetype.Detect(body)
if strings.HasPrefix(mime.String(), "image/") {
downloadedImg, _, err := image.Decode(bytes.NewReader(body))
if err != nil {
log.WithError(err).WithField("url", url).Warn("Couldn't decode source image")
return
// downloadFile Downloads a file from the given url and saves it to the given path
func downloadFile(url string, dstPath string) error {
fields := log.Fields{"url": url, "dstPath": dstPath}
resp, err := http.Get(url)
if err != nil {
return err
}
defer func(Body io.ReadCloser) {
if err := Body.Close(); err != nil {
log.WithError(err).WithFields(fields).Error("Error while closing body")
}
}(resp.Body)
if resp.StatusCode != http.StatusOK {
return err
}

// in our case resolves to /Storage/news/newspread/1234abc.jpg
dstFileName := fmt.Sprintf("%s%s", file.Path, file.Name)
dstImage := imaging.Resize(downloadedImg, 1280, 0, imaging.Lanczos)
err = imaging.Save(dstImage, StorageDir+dstFileName, imaging.JPEGQuality(75))
if err != nil {
log.WithError(err).WithField("url", url).Warn("Could not save image file")
return
}
} else {
// save without resizing image
err = os.WriteFile(fmt.Sprintf("%s%s", file.Path, file.Name), body, 0644)
// save the file to disk
out, err := os.Create(dstPath)
if err != nil {
return err
}
defer func(out *os.File) {
err := out.Close()
if err != nil {
log.WithError(err).Error("Can't save file to disk")
return
log.WithError(err).WithFields(fields).Error("Error while closing file")
}
}(out)
if _, err := io.Copy(out, resp.Body); err != nil {
return err
}
err = c.db.Model(&model.Files{}).Where("url = ?", url).Update("downloaded", true).Error
if err != nil {
log.WithError(err).Error("Could not set image to downloaded.")
}
return nil
}
91 changes: 91 additions & 0 deletions server/backend/cron/fileDownload_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
package cron

import (
"image"
"os"
"testing"

"github.com/disintegration/imaging"
"github.com/stretchr/testify/require"
)

func TestMaybeResizeImage(t *testing.T) {
t.Run("Resize Image", func(t *testing.T) {
dstPath := "test_image.jpg"
require.NoError(t, createDummyImage(dstPath, 2000, 1000))
defer os.Remove(dstPath)
require.NoError(t, maybeResizeImage(dstPath))
img, err := imaging.Open(dstPath)
require.NoError(t, err)
require.Equal(t, 1280, img.Bounds().Dx())
require.Equal(t, 640, img.Bounds().Dy())
})
t.Run("Do not Resize smaller Image", func(t *testing.T) {
dstPath := "test_image.jpg"
require.NoError(t, createDummyImage(dstPath, 1000, 2000))
defer os.Remove(dstPath)
require.NoError(t, maybeResizeImage(dstPath))
img, err := imaging.Open(dstPath)
require.NoError(t, err)
require.Equal(t, 1280, img.Bounds().Dx())
require.Equal(t, 2560, img.Bounds().Dy())
})

t.Run("Skip Non-Image", func(t *testing.T) {
nonImageFile := "non_image.txt"
content := []byte("Dummy Text")
require.NoError(t, createDummyFile(nonImageFile, content))
defer os.Remove(nonImageFile)
require.NoError(t, maybeResizeImage(nonImageFile))
contentAfterExecution, err := os.ReadFile(nonImageFile)
require.NoError(t, err)
require.Equal(t, content, contentAfterExecution)
})
}

func TestEnsureFileDoesNotExist(t *testing.T) {
tmpFilePath := "test_dir/test_file.txt"
defer func() { _ = os.RemoveAll("test_dir") }()

t.Run("FileDoesNotExist", func(t *testing.T) {
require.NoError(t, ensureFileDoesNotExist(tmpFilePath))

_, dirErr := os.Stat("test_dir")
require.NoError(t, dirErr)

_, fileErr := os.Stat(tmpFilePath)
require.True(t, os.IsNotExist(fileErr))
})

t.Run("FileExists", func(t *testing.T) {
_, createErr := os.Create(tmpFilePath)
require.NoError(t, createErr)

require.NoError(t, ensureFileDoesNotExist(tmpFilePath))

_, dirErr := os.Stat("test_dir")
require.NoError(t, dirErr)

_, fileErr := os.Stat(tmpFilePath)
require.True(t, os.IsNotExist(fileErr))
})
}

// createDummyImage creates a dummy image file with the specified dimensions
func createDummyImage(filePath string, width, height int) error {
img := image.NewRGBA(image.Rect(0, 0, width, height))
return imaging.Save(img, filePath, imaging.JPEGQuality(75))
}

// createDummyFile creates a dummy non-image file
func createDummyFile(filePath string, content []byte) error {
file, err := os.Create(filePath)
if err != nil {
return err
}
defer file.Close()
if _, err := file.Write(content); err != nil {
return err
}
return nil
}
3 changes: 2 additions & 1 deletion server/backend/rpcserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@ package backend
import (
"context"
"errors"
"github.com/TUM-Dev/Campus-Backend/server/env"
"net"

"github.com/TUM-Dev/Campus-Backend/server/env"

pb "github.com/TUM-Dev/Campus-Backend/server/api/tumdev"
"github.com/TUM-Dev/Campus-Backend/server/backend/ios_notifications/ios_apns"
"github.com/TUM-Dev/Campus-Backend/server/backend/ios_notifications/ios_apns/ios_apns_jwt"
Expand Down

0 comments on commit 892c29b

Please sign in to comment.