From 892c29b68f7f4db9d521d07dd7cb4a51ef670a44 Mon Sep 17 00:00:00 2001 From: Frank Elsinga Date: Fri, 22 Sep 2023 01:40:12 +0200 Subject: [PATCH] bug:file download (#237) * made the logging path of fileDownload.go more straight-forward * refactored the file downloader into separate testable functions * added a testcase for `maybeResizeImage` * made shure that the file-downloading runs in a transaction * added a testcase for `ensureFileDoesNotExist` * fixed a typo * rebase --- client/go.mod | 2 +- client/go.sum | 4 +- server/backend/cron/fileDownload.go | 140 ++++++++++++++--------- server/backend/cron/fileDownload_test.go | 91 +++++++++++++++ server/backend/rpcserver.go | 3 +- 5 files changed, 183 insertions(+), 57 deletions(-) create mode 100644 server/backend/cron/fileDownload_test.go diff --git a/client/go.mod b/client/go.mod index 0b101708..b52b76d1 100644 --- a/client/go.mod +++ b/client/go.mod @@ -3,7 +3,7 @@ module github.com/TUM-Dev/Campus-Backend/client go 1.21 require ( - github.com/TUM-Dev/Campus-Backend/server v0.0.0-20230920223017-73df1e4061c8 + github.com/TUM-Dev/Campus-Backend/server v0.0.0-20230921201035-301a0c7bab98 github.com/sirupsen/logrus v1.9.3 google.golang.org/grpc v1.58.1 ) diff --git a/client/go.sum b/client/go.sum index 72830782..5a226406 100644 --- a/client/go.sum +++ b/client/go.sum @@ -1,5 +1,5 @@ -github.com/TUM-Dev/Campus-Backend/server v0.0.0-20230920223017-73df1e4061c8 h1:FYhgX9DA9JvEyP06VX/Ot/8Oav9fS8hwHhGOyLfvX6E= -github.com/TUM-Dev/Campus-Backend/server v0.0.0-20230920223017-73df1e4061c8/go.mod h1:fjoLL3rbdY6wTRJIksekT2p3OUp5ocFfXjB/avV/TVI= +github.com/TUM-Dev/Campus-Backend/server v0.0.0-20230921201035-301a0c7bab98 h1:cifzCP3eUZgN+iqfhfGPTL1JFz4g/qjBNpwhcjlKOWk= +github.com/TUM-Dev/Campus-Backend/server v0.0.0-20230921201035-301a0c7bab98/go.mod h1:fjoLL3rbdY6wTRJIksekT2p3OUp5ocFfXjB/avV/TVI= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= diff --git a/server/backend/cron/fileDownload.go b/server/backend/cron/fileDownload.go index f70e1f10..3e16f5a0 100644 --- a/server/backend/cron/fileDownload.go +++ b/server/backend/cron/fileDownload.go @@ -1,12 +1,11 @@ package cron import ( - "bytes" - "fmt" - "image" + "errors" "io" "net/http" "os" + "path" "strings" "github.com/TUM-Dev/Campus-Backend/server/model" @@ -16,69 +15,104 @@ import ( "gorm.io/gorm" ) -// fileDownloadCron Downloads all files that are not marked as finished in the database. +// fileDownloadCron downloads all files that are not marked as finished in the database func (c *CronService) fileDownloadCron() error { - var files []model.Files - err := c.db.Find(&files, "downloaded = 0").Scan(&files).Error - if err != nil && err != gorm.ErrRecordNotFound { - return err - } - for i := range files { - if files[i].URL.Valid { - c.downloadFile(files[i]) + return c.db.Transaction(func(tx *gorm.DB) error { + var files []model.Files + err := tx.Find(&files, "downloaded = 0 AND url IS NOT NULL").Error + if err != nil && !errors.Is(err, gorm.ErrRecordNotFound) { + log.WithError(err).Error("Could not get files from database") + return err } - } - return nil + for _, file := range files { + // in our case resolves to /Storage/news/newspread/1234abc.jpg + dstPath := path.Join(StorageDir, file.Path, file.Name) + fields := log.Fields{"url": file.URL.String, "dstPath": dstPath} + log.WithFields(fields).Info("downloading file") + + if err = tx.Model(&model.Files{File: file.File}).Update("downloads", file.Downloads+1).Error; err != nil { + log.WithError(err).WithFields(fields).Error("Could not set update the download-count") + continue + } + + if err := ensureFileDoesNotExist(dstPath); err != nil { + log.WithError(err).WithFields(fields).Warn("Could not ensure file does not exist") + continue + } + if err := downloadFile(file.URL.String, dstPath); err != nil { + log.WithError(err).WithFields(fields).Warn("Could not download file") + continue + } + if err := maybeResizeImage(dstPath); err != nil { + log.WithError(err).WithFields(fields).Warn("Could not resize image") + continue + } + // everything went well => we can mark the file as downloaded + if err = tx.Model(&model.Files{URL: file.URL}).Update("downloaded", true).Error; err != nil { + log.WithError(err).WithFields(fields).Error("Could not set image to downloaded.") + continue + } + } + return nil + }) } -// downloadFile Downloads a file, marks it downloaded and resizes it if it's an image. -// url: download url of the file -// name: target name of the file -func (c *CronService) downloadFile(file model.Files) { - if !file.URL.Valid { - log.WithField("fileId", file.File).Info("skipping file without url") +// ensureFileDoesNotExist makes sure that the file does not exist, but the directory in which it should be does +func ensureFileDoesNotExist(dstPath string) error { + if _, err := os.Stat(dstPath); err == nil { + // file already exists + return os.Remove(dstPath) } - url := file.URL.String - log.WithField("url", url).Info("downloading file") - resp, err := http.Get(url) + return os.MkdirAll(path.Dir(dstPath), 0755) +} + +// maybeResizeImage resizes the image if it's an image to 1280px width keeping the aspect ratio +func maybeResizeImage(dstPath string) error { + mime, err := mimetype.DetectFile(dstPath) if err != nil { - log.WithError(err).WithField("url", url).Warn("Could not download image") - return + return err + } + if !strings.HasPrefix(mime.String(), "image/") { + return nil } - // read body here because we can't exhaust the io.reader twice - body, err := io.ReadAll(resp.Body) + + img, err := imaging.Open(dstPath) if err != nil { - log.WithError(err).Warn("Unable to read http body") - return + return err } + resizedImage := imaging.Resize(img, 1280, 0, imaging.Lanczos) + return imaging.Save(resizedImage, dstPath, imaging.JPEGQuality(75)) +} - // resize if file is image - mime := mimetype.Detect(body) - if strings.HasPrefix(mime.String(), "image/") { - downloadedImg, _, err := image.Decode(bytes.NewReader(body)) - if err != nil { - log.WithError(err).WithField("url", url).Warn("Couldn't decode source image") - return +// downloadFile Downloads a file from the given url and saves it to the given path +func downloadFile(url string, dstPath string) error { + fields := log.Fields{"url": url, "dstPath": dstPath} + resp, err := http.Get(url) + if err != nil { + return err + } + defer func(Body io.ReadCloser) { + if err := Body.Close(); err != nil { + log.WithError(err).WithFields(fields).Error("Error while closing body") } + }(resp.Body) + if resp.StatusCode != http.StatusOK { + return err + } - // in our case resolves to /Storage/news/newspread/1234abc.jpg - dstFileName := fmt.Sprintf("%s%s", file.Path, file.Name) - dstImage := imaging.Resize(downloadedImg, 1280, 0, imaging.Lanczos) - err = imaging.Save(dstImage, StorageDir+dstFileName, imaging.JPEGQuality(75)) - if err != nil { - log.WithError(err).WithField("url", url).Warn("Could not save image file") - return - } - } else { - // save without resizing image - err = os.WriteFile(fmt.Sprintf("%s%s", file.Path, file.Name), body, 0644) + // save the file to disk + out, err := os.Create(dstPath) + if err != nil { + return err + } + defer func(out *os.File) { + err := out.Close() if err != nil { - log.WithError(err).Error("Can't save file to disk") - return + log.WithError(err).WithFields(fields).Error("Error while closing file") } + }(out) + if _, err := io.Copy(out, resp.Body); err != nil { + return err } - err = c.db.Model(&model.Files{}).Where("url = ?", url).Update("downloaded", true).Error - if err != nil { - log.WithError(err).Error("Could not set image to downloaded.") - } + return nil } diff --git a/server/backend/cron/fileDownload_test.go b/server/backend/cron/fileDownload_test.go new file mode 100644 index 00000000..18e0e0c1 --- /dev/null +++ b/server/backend/cron/fileDownload_test.go @@ -0,0 +1,91 @@ +package cron + +import ( + "image" + "os" + "testing" + + "github.com/disintegration/imaging" + "github.com/stretchr/testify/require" +) + +func TestMaybeResizeImage(t *testing.T) { + t.Run("Resize Image", func(t *testing.T) { + dstPath := "test_image.jpg" + require.NoError(t, createDummyImage(dstPath, 2000, 1000)) + defer os.Remove(dstPath) + require.NoError(t, maybeResizeImage(dstPath)) + img, err := imaging.Open(dstPath) + require.NoError(t, err) + require.Equal(t, 1280, img.Bounds().Dx()) + require.Equal(t, 640, img.Bounds().Dy()) + }) + t.Run("Do not Resize smaller Image", func(t *testing.T) { + dstPath := "test_image.jpg" + require.NoError(t, createDummyImage(dstPath, 1000, 2000)) + defer os.Remove(dstPath) + require.NoError(t, maybeResizeImage(dstPath)) + img, err := imaging.Open(dstPath) + require.NoError(t, err) + require.Equal(t, 1280, img.Bounds().Dx()) + require.Equal(t, 2560, img.Bounds().Dy()) + }) + + t.Run("Skip Non-Image", func(t *testing.T) { + nonImageFile := "non_image.txt" + content := []byte("Dummy Text") + require.NoError(t, createDummyFile(nonImageFile, content)) + defer os.Remove(nonImageFile) + require.NoError(t, maybeResizeImage(nonImageFile)) + contentAfterExecution, err := os.ReadFile(nonImageFile) + require.NoError(t, err) + require.Equal(t, content, contentAfterExecution) + }) +} + +func TestEnsureFileDoesNotExist(t *testing.T) { + tmpFilePath := "test_dir/test_file.txt" + defer func() { _ = os.RemoveAll("test_dir") }() + + t.Run("FileDoesNotExist", func(t *testing.T) { + require.NoError(t, ensureFileDoesNotExist(tmpFilePath)) + + _, dirErr := os.Stat("test_dir") + require.NoError(t, dirErr) + + _, fileErr := os.Stat(tmpFilePath) + require.True(t, os.IsNotExist(fileErr)) + }) + + t.Run("FileExists", func(t *testing.T) { + _, createErr := os.Create(tmpFilePath) + require.NoError(t, createErr) + + require.NoError(t, ensureFileDoesNotExist(tmpFilePath)) + + _, dirErr := os.Stat("test_dir") + require.NoError(t, dirErr) + + _, fileErr := os.Stat(tmpFilePath) + require.True(t, os.IsNotExist(fileErr)) + }) +} + +// createDummyImage creates a dummy image file with the specified dimensions +func createDummyImage(filePath string, width, height int) error { + img := image.NewRGBA(image.Rect(0, 0, width, height)) + return imaging.Save(img, filePath, imaging.JPEGQuality(75)) +} + +// createDummyFile creates a dummy non-image file +func createDummyFile(filePath string, content []byte) error { + file, err := os.Create(filePath) + if err != nil { + return err + } + defer file.Close() + if _, err := file.Write(content); err != nil { + return err + } + return nil +} diff --git a/server/backend/rpcserver.go b/server/backend/rpcserver.go index fa26e1ed..3544fef1 100644 --- a/server/backend/rpcserver.go +++ b/server/backend/rpcserver.go @@ -3,9 +3,10 @@ package backend import ( "context" "errors" - "github.com/TUM-Dev/Campus-Backend/server/env" "net" + "github.com/TUM-Dev/Campus-Backend/server/env" + pb "github.com/TUM-Dev/Campus-Backend/server/api/tumdev" "github.com/TUM-Dev/Campus-Backend/server/backend/ios_notifications/ios_apns" "github.com/TUM-Dev/Campus-Backend/server/backend/ios_notifications/ios_apns/ios_apns_jwt"