Skip to content

Commit

Permalink
Fix some bugs with PDF imports (#12297)
Browse files Browse the repository at this point in the history
* Fix some bugs with PDF imports

* Fix checkstyle

* Fix tests

* Update from code review

* Fix importing files
  • Loading branch information
InAnYan authored Dec 23, 2024
1 parent 5e94f20 commit d4624f7
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 50 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ public List<ImportFilesResultItemViewModel> call() {
entry.clearField(StandardField.FILE);
// Modifiers do not work on macOS: https://bugs.openjdk.org/browse/JDK-8264172
// Similar code as org.jabref.gui.preview.PreviewPanel.PreviewPanel
DragDrop.handleDropOfFiles(files, transferMode, fileLinker, entry);
DragDrop.handleDropOfFiles(List.of(file), transferMode, fileLinker, entry);
entriesToAdd.addAll(pdfEntriesInFile);
addResultToList(file, true, Localization.lang("File was successfully imported as a new entry"));
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import java.util.List;
import java.util.Optional;
import java.util.function.Predicate;
import java.util.stream.Collectors;

import javax.swing.undo.UndoManager;

Expand Down Expand Up @@ -145,43 +144,46 @@ public void startSearch() {
}

public void startImport() {
Path directory = this.getSearchDirectory();
List<Path> fileList = checkedFileListProperty.stream()
.map(item -> item.getValue().getPath())
.filter(path -> path.toFile().isFile())
.map(path -> directory.relativize(path))
.collect(Collectors.toList());
List<Path> fileList = checkedFileListProperty
.stream()
.map(TreeItem::getValue)
.map(FileNodeViewModel::getPath)
.filter(Files::isRegularFile)
.toList();

if (fileList.isEmpty()) {
LOGGER.warn("There are no valid files checked");
LOGGER.warn("There are no valid files checked for import");
return;
}
resultList.clear();

importFilesBackgroundTask = importHandler.importFilesInBackground(fileList, bibDatabase, preferences.getFilePreferences(), TransferMode.LINK)
.onRunning(() -> {
progressValueProperty.bind(importFilesBackgroundTask.workDonePercentageProperty());
progressTextProperty.bind(importFilesBackgroundTask.messageProperty());
taskActiveProperty.setValue(true);
})
.onFinished(() -> {
progressValueProperty.unbind();
progressTextProperty.unbind();
taskActiveProperty.setValue(false);
})
.onSuccess(resultList::addAll);
importFilesBackgroundTask = importHandler
.importFilesInBackground(fileList, bibDatabase, preferences.getFilePreferences(), TransferMode.LINK)
.onRunning(() -> {
progressValueProperty.bind(importFilesBackgroundTask.workDonePercentageProperty());
progressTextProperty.bind(importFilesBackgroundTask.messageProperty());
taskActiveProperty.setValue(true);
})
.onFinished(() -> {
progressValueProperty.unbind();
progressTextProperty.unbind();
taskActiveProperty.setValue(false);
})
.onSuccess(resultList::addAll);
importFilesBackgroundTask.executeWith(taskExecutor);
}

/**
* This starts the export of all files of all selected nodes in the file tree view.
*/
public void startExport() {
List<Path> fileList = checkedFileListProperty.stream()
.map(item -> item.getValue().getPath())
.filter(path -> path.toFile().isFile())
.toList();
List<Path> fileList = checkedFileListProperty
.stream()
.map(item -> item.getValue().getPath())
.filter(Files::isRegularFile)
.toList();
if (fileList.isEmpty()) {
LOGGER.warn("There are no valid files checked");
LOGGER.warn("There are no valid files checked for export");
return;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ public ParserResult importDatabase(Path filePath) {
List<BibEntry> result = new ArrayList<>(1);
try (PDDocument document = new XmpUtilReader().loadWithAutomaticDecryption(filePath)) {
String firstPageContents = getFirstPageContents(document);
String titleByFontSize = extractTitleFromDocument(document);
Optional<String> titleByFontSize = extractTitleFromDocument(document);
Optional<BibEntry> entry = getEntryFromPDFContent(firstPageContents, OS.NEWLINE, titleByFontSize);
entry.ifPresent(result::add);
} catch (EncryptedPdfsNotSupportedException e) {
Expand All @@ -216,7 +216,7 @@ public ParserResult importDatabase(Path filePath) {
return new ParserResult(result);
}

private static String extractTitleFromDocument(PDDocument document) throws IOException {
private static Optional<String> extractTitleFromDocument(PDDocument document) throws IOException {
TitleExtractorByFontSize stripper = new TitleExtractorByFontSize();
return stripper.getTitle(document);
}
Expand All @@ -230,7 +230,7 @@ public TitleExtractorByFontSize() {
this.textPositionsList = new ArrayList<>();
}

public String getTitle(PDDocument document) throws IOException {
public Optional<String> getTitle(PDDocument document) throws IOException {
this.setStartPage(1);
this.setEndPage(2);
this.writeText(document, new StringWriter());
Expand Down Expand Up @@ -266,7 +266,7 @@ private boolean isUnwantedText(TextPosition previousTextPosition, TextPosition t
return isFarAway(previousTextPosition, textPosition);
}

private String findLargestFontText(List<TextPosition> textPositions) {
private Optional<String> findLargestFontText(List<TextPosition> textPositions) {
Map<Float, StringBuilder> fontSizeTextMap = new TreeMap<>(Collections.reverseOrder());
TextPosition previousTextPosition = null;
for (TextPosition textPosition : textPositions) {
Expand All @@ -285,10 +285,10 @@ private String findLargestFontText(List<TextPosition> textPositions) {
for (Map.Entry<Float, StringBuilder> entry : fontSizeTextMap.entrySet()) {
String candidateText = entry.getValue().toString().trim();
if (isLegalTitle(candidateText)) {
return candidateText;
return Optional.of(candidateText);
}
}
return fontSizeTextMap.values().iterator().next().toString().trim();
return fontSizeTextMap.values().stream().findFirst().map(StringBuilder::toString).map(String::trim);
}

private boolean isLegalTitle(String candidateText) {
Expand Down Expand Up @@ -334,7 +334,7 @@ private boolean isThereSpace(TextPosition previous, TextPosition current) {
* is successful. Otherwise, an empty {@link Optional}.
*/
@VisibleForTesting
Optional<BibEntry> getEntryFromPDFContent(String firstpageContents, String lineSeparator, String titleByFontSize) {
Optional<BibEntry> getEntryFromPDFContent(String firstpageContents, String lineSeparator, Optional<String> titleByFontSize) {
String firstpageContentsUnifiedLineBreaks = StringUtil.unifyLineBreaks(firstpageContents, lineSeparator);

lines = firstpageContentsUnifiedLineBreaks.split(lineSeparator);
Expand Down Expand Up @@ -393,8 +393,8 @@ Optional<BibEntry> getEntryFromPDFContent(String firstpageContents, String lineS
title = streamlineTitle(curString);
// i points to the next non-empty line
curString = "";
if (!isNullOrEmpty(titleByFontSize)) {
title = titleByFontSize;
if (titleByFontSize.isPresent() && !isNullOrEmpty(titleByFontSize.get())) {
title = titleByFontSize.get();
}

// after title: authors
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,22 +136,10 @@ public ParserResult importDatabase(Path filePath) throws IOException {
return new ParserResult(List.of(entry));
}

/**
* A modified version of {@link PdfMergeMetadataImporter#importDatabase(Path)}, but it
* relativizes the {@code filePath} if there are working directories before parsing it
* into {@link PdfMergeMetadataImporter#importDatabase(Path)}
* (Otherwise no path modification happens).
*
* @param filePath The unrelativized {@code filePath}.
*/
public ParserResult importDatabase(Path filePath, BibDatabaseContext context, FilePreferences filePreferences) throws IOException {
Objects.requireNonNull(context);
Objects.requireNonNull(filePreferences);

List<Path> directories = context.getFileDirectories(filePreferences);

filePath = FileUtil.relativize(filePath, directories);

return importDatabase(filePath);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ void parsingEditorWithoutPagesorSeriesInformation() {
Corpus linguistics investigates human language by starting out from large
""";

assertEquals(Optional.of(entry), importer.getEntryFromPDFContent(firstPageContents, "\n", ""));
assertEquals(Optional.of(entry), importer.getEntryFromPDFContent(firstPageContents, "\n", Optional.empty()));
}

@Test
Expand All @@ -93,7 +93,7 @@ Smith, Lucy Anna (2014) Mortality in the Ornamental Fish Retail Sector: an Analy
UNSPECIFIED
Master of Research (MRes) thesis, University of Kent,.""";

assertEquals(Optional.of(entry), importer.getEntryFromPDFContent(firstPageContents, "\n", ""));
assertEquals(Optional.of(entry), importer.getEntryFromPDFContent(firstPageContents, "\n", Optional.empty()));
}

@Test
Expand Down Expand Up @@ -126,7 +126,7 @@ British Journal of Nutrition (2008), 99, 1–11 doi: 10.1017/S0007114507795296
British Journal of Nutrition
https://doi.org/10.1017/S0007114507795296 Published online by Cambridge University Press""";

assertEquals(Optional.of(entry), importer.getEntryFromPDFContent(firstPageContent, "\n", ""));
assertEquals(Optional.of(entry), importer.getEntryFromPDFContent(firstPageContent, "\n", Optional.empty()));
}

@ParameterizedTest
Expand Down

0 comments on commit d4624f7

Please sign in to comment.