From 2d2d7b278a24243eb0027acd11683d0ea0ae59c2 Mon Sep 17 00:00:00 2001 From: Peter Nemere Date: Fri, 20 Dec 2024 07:32:26 +1000 Subject: [PATCH] Added second attempt of archive downloading without trailing / so it's compatible with both FM and uploaded datasets --- api/dataimport/datasetArchive/download.go | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/api/dataimport/datasetArchive/download.go b/api/dataimport/datasetArchive/download.go index 1a237f51..80c81fdb 100644 --- a/api/dataimport/datasetArchive/download.go +++ b/api/dataimport/datasetArchive/download.go @@ -125,6 +125,11 @@ func (dl *DatasetArchiveDownloader) downloadArchivedZipsForDataset(datasetID str // Download all zip files that have the dataset ID prefixed in their file name // Unzip them in timestamp order into downloadPath archiveSearchPath := path.Join(filepaths.RootArchive, datasetID) + + // NOTE: For importing datasets from FM, we don't want a / at the end, but for importing from uploaded data, we do! + // Uploaded datasets may have the same prefix at the start (eg user uploads dataset AA then later uploads A) so if + // we don't have a trailing / when reading dataset A, we'd get the files from AA and it'll fail. For this reason + // we have a second attempt after this with no / if no files were found if !strings.HasSuffix(archiveSearchPath, "/") { archiveSearchPath = archiveSearchPath + "/" } @@ -136,6 +141,18 @@ func (dl *DatasetArchiveDownloader) downloadArchivedZipsForDataset(datasetID str return []string{}, err } + // If nothing has been found try search again without a trailing / + if len(archivedFiles) <= 0 { + archiveSearchPath = path.Join(filepaths.RootArchive, datasetID) + + dl.log.Infof("Searching again for archived files in: s3://%v/%v", dl.datasetBucket, archiveSearchPath) + + archivedFiles, err = dl.remoteFS.ListObjects(dl.datasetBucket, archiveSearchPath) + if err != nil { + return []string{}, err + } + } + orderedArchivedFiles, err := getOrderedArchiveFiles(archivedFiles) if err != nil {