Skip to content

Commit

Permalink
Archive tool fixed to run in place, less options, it now just read al…
Browse files Browse the repository at this point in the history
…l zips for a scan, if there is more than 1, zips it, uploads the zip (overwriting the latest file), then deletes the others
  • Loading branch information
Peter Nemere committed Nov 10, 2024
1 parent 3408227 commit 7a0c922
Show file tree
Hide file tree
Showing 3 changed files with 161 additions and 199 deletions.
28 changes: 14 additions & 14 deletions api/dataimport/datasetArchive/download.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,35 +59,35 @@ func NewDatasetArchiveDownloader(
// Unzipped files path (archive zips unzipped here),
// How many zips loaded from archive
// Error (if any)
func (dl *DatasetArchiveDownloader) DownloadFromDatasetArchive(datasetID string, workingDir string) (string, string, int, string, error) {
func (dl *DatasetArchiveDownloader) DownloadFromDatasetArchive(datasetID string, workingDir string) (string, string, []string, error) {
// Create a directories to process data in
dl.log.Debugf("Preparing to download archived dataset %v...", datasetID)

downloadPath, err := fileaccess.MakeEmptyLocalDirectory(workingDir, "download")
if err != nil {
err = fmt.Errorf("Failed to generate directory for importer downloads: %v", err)
//dl.log.Errorf("%v", err)
return "", "", 0, "", err
return "", "", []string{}, err
}
unzippedPath, err := fileaccess.MakeEmptyLocalDirectory(workingDir, "unzipped")
if err != nil {
err = fmt.Errorf("Failed to generate directory for importer unzips: %v", err)
//dl.log.Errorf("%v", err)
return "", "", 0, "", err
return "", "", []string{}, err
}

// Download all zip files from archive for this dataset ID, and extract them as required
dl.log.Debugf("Downloading archived zip files...")

zipCount, lastZipName, err := dl.downloadArchivedZipsForDataset(datasetID, downloadPath, unzippedPath)
zipFilesOrdered, err := dl.downloadArchivedZipsForDataset(datasetID, downloadPath, unzippedPath)
if err != nil {
err = fmt.Errorf("Failed to download archived zip files for dataset ID: %v. Error: %v", datasetID, err)
//dl.log.Errorf("%v", err)
return downloadPath, unzippedPath, zipCount, lastZipName, err
return downloadPath, unzippedPath, zipFilesOrdered, err
}

dl.log.Debugf("Dataset %v downloaded %v zip files from archive", datasetID, zipCount)
return downloadPath, unzippedPath, zipCount, lastZipName, nil
dl.log.Debugf("Dataset %v downloaded %v zip files from archive", datasetID, len(zipFilesOrdered))
return downloadPath, unzippedPath, zipFilesOrdered, nil
}

func (dl *DatasetArchiveDownloader) DownloadPseudoIntensityRangesFile(configBucket string, downloadPath string, version string) (string, error) {
Expand Down Expand Up @@ -121,45 +121,45 @@ func (dl *DatasetArchiveDownloader) fetchFile(bucketFrom string, pathFrom string
// Returns 2 things:
// Number of zips loaded
// Error if there was one
func (dl *DatasetArchiveDownloader) downloadArchivedZipsForDataset(datasetID string, downloadPath string, unzippedPath string) (int, string, error) {
func (dl *DatasetArchiveDownloader) downloadArchivedZipsForDataset(datasetID string, downloadPath string, unzippedPath string) ([]string, error) {
// Download all zip files that have the dataset ID prefixed in their file name
// Unzip them in timestamp order into downloadPath
archiveSearchPath := path.Join(filepaths.RootArchive, datasetID)
dl.log.Infof("Searching for archived files in: s3://%v/%v", dl.datasetBucket, archiveSearchPath)

archivedFiles, err := dl.remoteFS.ListObjects(dl.datasetBucket, archiveSearchPath)
if err != nil {
return 0, "", err
return []string{}, err
}

orderedArchivedFiles, err := getOrderedArchiveFiles(archivedFiles)

if err != nil {
// Stop here if we find a bad file
return 0, "", err
return []string{}, err
}

fileCount := 0

for _, filePath := range orderedArchivedFiles {
fileName := path.Base(filePath)
if !strings.HasSuffix(fileName, ".zip") {
return 0, "", errors.New("Expected zip file, got: " + fileName)
return []string{}, errors.New("Expected zip file, got: " + fileName)
}

savePath := filepath.Join(downloadPath, fileName)
err = dl.fetchFile(dl.datasetBucket, filePath, savePath)

if err != nil {
return 0, "", err
return []string{}, err
}

dl.log.Debugf("Unzipping: \"%v\"", savePath)

// Unzip the file
unzippedFileNames, err := utils.UnzipDirectory(savePath, unzippedPath, false)
if err != nil {
return 0, "", err
return []string{}, err
}

fileCount += len(unzippedFileNames)
Expand All @@ -181,7 +181,7 @@ func (dl *DatasetArchiveDownloader) downloadArchivedZipsForDataset(datasetID str
}

dl.log.Infof("Downloaded %v zip files, unzipped %v files. Last file name: %v", len(orderedArchivedFiles), fileCount, lastFileName)
return len(orderedArchivedFiles), filepath.Base(lastFileName), nil
return orderedArchivedFiles, nil
}

func (dl *DatasetArchiveDownloader) DownloadUserCustomisationsForDataset(datasetID string, downloadPath string) error {
Expand Down
6 changes: 3 additions & 3 deletions api/dataimport/import.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,13 @@ func ImportDataset(

// Firstly, we download from the archive
archive := datasetArchive.NewDatasetArchiveDownloader(remoteFS, localFS, log, datasetBucket, manualUploadBucket)
localDownloadPath, localUnzippedPath, zipCount, _, err := archive.DownloadFromDatasetArchive(datasetID, workingDir)
localDownloadPath, localUnzippedPath, zipFiles, err := archive.DownloadFromDatasetArchive(datasetID, workingDir)
if err != nil {
return workingDir, savedSummary, "", false, err
}

// If no zip files were loaded, maybe this dataset is a manually uploaded one, try to import from there instead
if zipCount == 0 {
if len(zipFiles) == 0 {
log.Infof("No zip files found in archive, dataset may have been manually uploaded. Trying to download...")
localDownloadPath, localUnzippedPath, err = archive.DownloadFromDatasetUploads(datasetID, workingDir)
if err != nil {
Expand Down Expand Up @@ -154,7 +154,7 @@ func ImportDataset(
}
}

return workingDir, savedSummary, updatenotificationtype, !justArchived && zipCount > 1, err
return workingDir, savedSummary, updatenotificationtype, !justArchived && len(zipFiles) > 1, err
}

// ImportFromLocalFileSystem - As the name says, imports from directory on local file system
Expand Down
Loading

0 comments on commit 7a0c922

Please sign in to comment.