diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/MetadataBitstreamRestRepository.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/MetadataBitstreamRestRepository.java index c53717306fcc..224dc5a656c7 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/MetadataBitstreamRestRepository.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/MetadataBitstreamRestRepository.java @@ -17,6 +17,7 @@ import java.nio.file.FileSystems; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.Paths; import java.nio.file.StandardCopyOption; import java.sql.SQLException; import java.util.ArrayList; @@ -33,8 +34,8 @@ import javax.xml.parsers.ParserConfigurationException; import org.apache.commons.compress.archivers.ArchiveException; -import org.apache.commons.compress.archivers.ArchiveInputStream; -import org.apache.commons.compress.archivers.ArchiveStreamFactory; +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.Logger; import org.dspace.app.rest.Parameter; @@ -82,7 +83,8 @@ @Component(MetadataBitstreamWrapperRest.CATEGORY + "." + MetadataBitstreamWrapperRest.NAME) public class MetadataBitstreamRestRepository extends DSpaceRestRepository { private static Logger log = org.apache.logging.log4j.LogManager.getLogger(MetadataBitstreamRestRepository.class); - + private final String ARCHIVE_TYPE_ZIP = "zip"; + private final String ARCHIVE_TYPE_TAR = "tar"; // This constant is used to limit the length of the preview content stored in the database to prevent // the database from being overloaded with large amounts of data. private static final int MAX_PREVIEW_COUNT_LENGTH = 2000; @@ -329,17 +331,19 @@ private List processInputStreamToFilePreview(Context context, Bitstrea } else { String data = ""; if (bitstream.getFormat(context).getMIMEType().equals("application/zip")) { - data = extractFile(inputStream, "zip"); + data = extractFile(inputStream, ARCHIVE_TYPE_ZIP); try { fileInfos = FileTreeViewGenerator.parse(data); } catch (Exception e) { log.error("Cannot extract file content because: {}", e.getMessage()); } } else if (bitstream.getFormat(context).getMIMEType().equals("application/x-tar")) { - ArchiveInputStream is = new ArchiveStreamFactory().createArchiveInputStream(ArchiveStreamFactory.TAR, - inputStream); - data = extractFile(is, "tar"); - fileInfos = FileTreeViewGenerator.parse(data); + data = extractFile(inputStream, ARCHIVE_TYPE_TAR); + try { + fileInfos = FileTreeViewGenerator.parse(data); + } catch (Exception e) { + log.error("Cannot extract file content because: {}", e.getMessage()); + } } } return fileInfos; @@ -382,76 +386,108 @@ private String composePreviewURL(Context context, Item item, Bitstream bitstream return url; } - /** - * Convert ZIP file into structured String. - * @param inputStream Input stream with ZIP content - * @param fileType ZIP/TAR - * @return structured String + * Creates a temporary file with the appropriate extension based on the specified file type. + * @param fileType the type of file for which to create a temporary file + * @return a Path object representing the temporary file + * @throws IOException if an I/O error occurs while creating the file */ - public String extractFile(InputStream inputStream, String fileType) { - List filePaths = new ArrayList<>(); - Path tempFile = null; - FileSystem zipFileSystem = null; + private Path createTempFile(String fileType) throws IOException { + String extension = ARCHIVE_TYPE_TAR.equals(fileType) ? + String.format(".%s", ARCHIVE_TYPE_TAR) : String.format(".%s", ARCHIVE_TYPE_ZIP); + return Files.createTempFile("temp", extension); + } - try { - switch (fileType) { - case "tar": - tempFile = Files.createTempFile("temp", ".tar"); - break; - default: - tempFile = Files.createTempFile("temp", ".zip"); + /** + * Adds a file path and its size to the list of file paths. + * If the path represents a directory, appends a "/" to the path. + * @param filePaths the list of file paths to add to + * @param path the file or directory path + * @param size the size of the file or directory + */ + private void addFilePath(List filePaths, String path, long size) { + String fileInfo = (Files.isDirectory(Paths.get(path))) ? path + "/|" + size : path + "|" + size; + filePaths.add(fileInfo); + } + /** + * Processes a TAR file, extracting its entries and adding their paths to the provided list. + * @param filePaths the list to populate with the extracted file paths + * @param tempFile the temporary TAR file to process + * @throws IOException if an I/O error occurs while reading the TAR file + */ + private void processTarFile(List filePaths, Path tempFile) throws IOException { + try (InputStream fi = Files.newInputStream(tempFile); + TarArchiveInputStream tis = new TarArchiveInputStream(fi)) { + TarArchiveEntry entry; + while ((entry = tis.getNextTarEntry()) != null) { + addFilePath(filePaths, entry.getName(), entry.getSize()); } + } + } - Files.copy(inputStream, tempFile, StandardCopyOption.REPLACE_EXISTING); + /** + * Processes a ZIP file, extracting its entries and adding their paths to the provided list. + * @param filePaths the list to populate with the extracted file paths + * @param zipFileSystem the FileSystem object representing the ZIP file + * @throws IOException if an I/O error occurs while reading the ZIP file + */ + private void processZipFile(List filePaths, FileSystem zipFileSystem) throws IOException { + Path root = zipFileSystem.getPath("/"); + Files.walk(root).forEach(path -> { + try { + long fileSize = Files.size(path); + addFilePath(filePaths, path.toString().substring(1), fileSize); + } catch (IOException e) { + log.error("An error occurred while getting the size of the zip file.", e); + } + }); + } - zipFileSystem = FileSystems.newFileSystem(tempFile, (ClassLoader) null); - Path root = zipFileSystem.getPath("/"); - Files.walk(root) - .forEach(path -> { - try { - long fileSize = Files.size(path); - if (Files.isDirectory(path)) { - filePaths.add(path.toString().substring(1) + "/|" + fileSize ); - } else { - filePaths.add(path.toString().substring(1) + "|" + fileSize ); - } - } catch (IOException e) { - e.printStackTrace(); - } - }); - } catch (IOException e) { - e.printStackTrace(); - } finally { - if (zipFileSystem != null) { - try { - zipFileSystem.close(); - } catch (IOException e) { - e.printStackTrace(); - } + /** + * Closes the specified FileSystem resource if it is not null. + * @param zipFileSystem the FileSystem to close + */ + private void closeFileSystem(FileSystem zipFileSystem) { + if (Objects.nonNull(zipFileSystem)) { + try { + zipFileSystem.close(); + } catch (IOException e) { + log.error("An error occurred while closing the zip file.", e); } + } + } - if (tempFile != null) { - try { - Files.delete(tempFile); - } catch (IOException e) { - e.printStackTrace(); - } + /** + * Deletes the specified temporary file if it is not null. + * @param tempFile the Path object representing the temporary file to delete + */ + private void deleteTempFile(Path tempFile) { + if (Objects.nonNull(tempFile)) { + try { + Files.delete(tempFile); + } catch (IOException e) { + log.error("An error occurred while deleting temp file.", e); } } + } + /** + * Builds an XML response string based on the provided list of file paths. + * @param filePaths the list of file paths to include in the XML response + * @return an XML string representation of the file paths + */ + private String buildXmlResponse(List filePaths) { // Is a folder regex String folderRegex = "/|\\d+"; Pattern pattern = Pattern.compile(folderRegex); StringBuilder sb = new StringBuilder(); - sb.append(("")); + sb.append(""); Iterator iterator = filePaths.iterator(); int fileCounter = 0; - while ((iterator.hasNext() && fileCounter < maxPreviewCount)) { + while (iterator.hasNext() && fileCounter < maxPreviewCount) { String filePath = iterator.next(); - // Check if the file is a folder Matcher matcher = pattern.matcher(filePath); if (!matcher.matches()) { @@ -464,10 +500,46 @@ public String extractFile(InputStream inputStream, String fileType) { if (fileCounter > maxPreviewCount) { sb.append("...too many files...|0"); } - sb.append(("")); + sb.append(""); return sb.toString(); } + /** + * Extracts files from an InputStream, processes them based on the specified file type (tar or zip), + * and returns an XML representation of the file paths. + * @param inputStream the InputStream containing the file data + * @param fileType the type of file to extract ("tar" or "zip") + * @return an XML string representing the extracted file paths + */ + public String extractFile(InputStream inputStream, String fileType) { + List filePaths = new ArrayList<>(); + Path tempFile = null; + FileSystem zipFileSystem = null; + + try { + // Create a temporary file based on the file type + tempFile = createTempFile(fileType); + + // Copy the input stream to the temporary file + Files.copy(inputStream, tempFile, StandardCopyOption.REPLACE_EXISTING); + + // Process the file based on its type + if (ARCHIVE_TYPE_TAR.equals(fileType)) { + processTarFile(filePaths, tempFile); + } else { + zipFileSystem = FileSystems.newFileSystem(tempFile, (ClassLoader) null); + processZipFile(filePaths, zipFileSystem); + } + } catch (IOException e) { + log.error(String.format("An error occurred while extracting file of type %s.", fileType), e); + } finally { + closeFileSystem(zipFileSystem); + deleteTempFile(tempFile); + } + + return buildXmlResponse(filePaths); + } + /** * Read input stream and return content as String * @param inputStream to read