Skip to content

Commit

Permalink
MVStoreDAO implementation for citations relations (JabRef#11189):
Browse files Browse the repository at this point in the history
* Solve task 1
* Implementation of a DAO chain: memory cache and MVStore
* Persist citations as relations to disk after a fetch
* Avoid fetching data if relations are available from MVStore
* Avoid reading data from MVStore if available in memory
* Consume less from network, minimize disk usage
  • Loading branch information
alexandre-cremieux committed Nov 17, 2024
1 parent 6a8b21b commit 01f6da4
Show file tree
Hide file tree
Showing 9 changed files with 330 additions and 107 deletions.
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
package org.jabref.gui.entryeditor.citationrelationtab;

import java.io.File;
import java.io.IOException;
import java.io.StringWriter;
import java.net.URI;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.List;

Expand Down Expand Up @@ -43,8 +47,7 @@
import org.jabref.logic.bibtex.BibEntryWriter;
import org.jabref.logic.bibtex.FieldPreferences;
import org.jabref.logic.bibtex.FieldWriter;
import org.jabref.logic.citation.repository.LRUBibEntryRelationsCache;
import org.jabref.logic.citation.repository.LRUBibEntryRelationsRepository;
import org.jabref.logic.citation.repository.ChainBibEntryRelationsRepository;
import org.jabref.logic.citation.SearchCitationsRelationsService;
import org.jabref.logic.database.DuplicateCheck;
import org.jabref.logic.exporter.BibWriter;
Expand Down Expand Up @@ -112,12 +115,20 @@ public CitationRelationsTab(DialogService dialogService,

this.entryTypesManager = bibEntryTypesManager;
this.duplicateCheck = new DuplicateCheck(entryTypesManager);
var bibEntryRelationsRepository = new LRUBibEntryRelationsRepository(
new LRUBibEntryRelationsCache()
);
this.searchCitationsRelationsService = new SearchCitationsRelationsService(
new SemanticScholarCitationFetcher(preferences.getImporterPreferences()), bibEntryRelationsRepository
);

try {
var jabRefPath = Paths.get("/home/sacha/Documents/projects/JabRef");
var citationsPath = Path.of(jabRefPath.toAbsolutePath() + File.separator + "citations");
var relationsPath = Path.of(jabRefPath.toAbsolutePath() + File.separator + "references");
var bibEntryRelationsRepository = new ChainBibEntryRelationsRepository(citationsPath, relationsPath);
this.searchCitationsRelationsService = new SearchCitationsRelationsService(
new SemanticScholarCitationFetcher(preferences.getImporterPreferences()), bibEntryRelationsRepository
);
} catch (Exception e) {
e.printStackTrace();
throw new RuntimeException(e);
}

citationsRelationsTabViewModel = new CitationsRelationsTabViewModel(
databaseContext,
preferences,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package org.jabref.logic.citation.repository;

import java.util.List;

import org.jabref.model.entry.BibEntry;

public class ChainBibEntryRelationDAO implements BibEntryRelationDAO {

private static final BibEntryRelationDAO EMPTY = new ChainBibEntryRelationDAO(null, null);

private final BibEntryRelationDAO current;
private final BibEntryRelationDAO next;

ChainBibEntryRelationDAO(BibEntryRelationDAO current, BibEntryRelationDAO next) {
this.current = current;
this.next = next;
}

@Override
public List<BibEntry> getRelations(BibEntry entry) {
if (this.current.containsKey(entry)) {
return this.current.getRelations(entry);
}
if (this.next == EMPTY) {
return List.of();
}
var relations = this.next.getRelations(entry);
this.current.cacheOrMergeRelations(entry, relations);
// Makes sure to obtain a copy and not a direct reference to what was inserted
return this.current.getRelations(entry);
}

@Override
public void cacheOrMergeRelations(BibEntry entry, List<BibEntry> relations) {
if (this.next != EMPTY) {
this.next.cacheOrMergeRelations(entry, relations);
}
this.current.cacheOrMergeRelations(entry, relations);
}

@Override
public boolean containsKey(BibEntry entry) {
return this.current.containsKey(entry)
|| (this.next != EMPTY && this.next.containsKey(entry));
}

public static BibEntryRelationDAO of(BibEntryRelationDAO... dao) {
return List.of(dao)
.reversed()
.stream()
.reduce(EMPTY, (acc, current) -> new ChainBibEntryRelationDAO(current, acc));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package org.jabref.logic.citation.repository;

import java.nio.file.Path;
import java.util.List;
import java.util.Objects;

import org.jabref.model.entry.BibEntry;

public class ChainBibEntryRelationsRepository implements BibEntryRelationsRepository {

private final BibEntryRelationDAO citationsDao;
private final BibEntryRelationDAO referencesDao;

public ChainBibEntryRelationsRepository(Path citationsStore, Path relationsStore) {
this.citationsDao = ChainBibEntryRelationDAO.of(
LRUCacheBibEntryRelationsDAO.CITATIONS, new MVStoreBibEntryRelationDAO(citationsStore, "citations")
);
this.referencesDao = ChainBibEntryRelationDAO.of(
LRUCacheBibEntryRelationsDAO.REFERENCES, new MVStoreBibEntryRelationDAO(relationsStore, "relations")
);
}

@Override
public void insertCitations(BibEntry entry, List<BibEntry> citations) {
citationsDao.cacheOrMergeRelations(
entry, Objects.requireNonNullElseGet(citations, List::of)
);
}

@Override
public List<BibEntry> readCitations(BibEntry entry) {
return citationsDao.getRelations(entry);
}

@Override
public boolean containsCitations(BibEntry entry) {
return citationsDao.containsKey(entry);
}

@Override
public void insertReferences(BibEntry entry, List<BibEntry> references) {
referencesDao.cacheOrMergeRelations(
entry, Objects.requireNonNullElseGet(references, List::of)
);
}

@Override
public List<BibEntry> readReferences(BibEntry entry) {
return referencesDao.getRelations(entry);
}

@Override
public boolean containsReferences(BibEntry entry) {
return referencesDao.containsKey(entry);
}
}

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,16 @@

import org.eclipse.jgit.util.LRUMap;

public abstract class LRUCacheBibEntryRelationsDAO implements BibEntryRelationDAO {
import static org.jabref.logic.citation.repository.LRUCacheBibEntryRelationsDAO.Configuration.MAX_CACHED_ENTRIES;

private static final Integer MAX_CACHED_ENTRIES = 100;
public enum LRUCacheBibEntryRelationsDAO implements BibEntryRelationDAO {

CITATIONS(new LRUMap<>(MAX_CACHED_ENTRIES, MAX_CACHED_ENTRIES)),
REFERENCES(new LRUMap<>(MAX_CACHED_ENTRIES, MAX_CACHED_ENTRIES));

public static class Configuration {
public static final int MAX_CACHED_ENTRIES = 100;
}

private final Map<DOI, Set<BibEntry>> relationsMap;

Expand Down Expand Up @@ -46,30 +53,4 @@ public boolean containsKey(BibEntry entry) {
public void clearEntries() {
this.relationsMap.clear();
}

public static class LRUCacheBibEntryCitations extends LRUCacheBibEntryRelationsDAO {

private final static LRUCacheBibEntryCitations CITATIONS_CACHE = new LRUCacheBibEntryCitations();

private LRUCacheBibEntryCitations() {
super(new LRUMap<>(MAX_CACHED_ENTRIES, MAX_CACHED_ENTRIES));
}

public static LRUCacheBibEntryCitations getInstance() {
return CITATIONS_CACHE;
}
}

public static class LRUCacheBibEntryReferences extends LRUCacheBibEntryRelationsDAO {

private final static LRUCacheBibEntryReferences REFERENCES_CACHE = new LRUCacheBibEntryReferences();

private LRUCacheBibEntryReferences() {
super(new LRUMap<>(MAX_CACHED_ENTRIES, MAX_CACHED_ENTRIES));
}

public static LRUCacheBibEntryReferences getInstance() {
return REFERENCES_CACHE;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,30 +15,29 @@
import org.jabref.model.entry.identifier.DOI;
import org.jabref.model.entry.types.StandardEntryType;

import org.h2.mvstore.DataUtils;
import org.h2.mvstore.MVMap;
import org.h2.mvstore.MVStore;
import org.h2.mvstore.WriteBuffer;
import org.h2.mvstore.type.BasicDataType;

public class MVStoreBibEntryRelationDAO implements BibEntryRelationDAO {

private final Path path;
private final String mapName;
private final MVStore.Builder storeConfiguration;
private final MVMap.Builder<String, LinkedHashSet<BibEntry>> mapConfiguration =
new MVMap.Builder<String, LinkedHashSet<BibEntry>>().valueType(new BibEntryHashSetSerializer());

MVStoreBibEntryRelationDAO(Path path, String mapName) {
this.path = Objects.requireNonNull(path);
this.mapName = mapName;
this.storeConfiguration = new MVStore.Builder().autoCommitDisabled().fileName(path.toAbsolutePath().toString());
}

@Override
public List<BibEntry> getRelations(BibEntry entry) {
return entry
.getDOI()
.map(doi -> {
try (var store = new MVStore.Builder().fileName(path.toAbsolutePath().toString()).open()) {
try (var store = this.storeConfiguration.open()) {
MVMap<String, LinkedHashSet<BibEntry>> relationsMap = store.openMap(mapName, mapConfiguration);
return relationsMap.getOrDefault(doi.getDOI(), new LinkedHashSet<>()).stream().toList();
}
Expand All @@ -49,7 +48,7 @@ public List<BibEntry> getRelations(BibEntry entry) {
@Override
synchronized public void cacheOrMergeRelations(BibEntry entry, List<BibEntry> relations) {
entry.getDOI().ifPresent(doi -> {
try (var store = new MVStore.Builder().fileName(path.toAbsolutePath().toString()).open()) {
try (var store = this.storeConfiguration.open()) {
MVMap<String, LinkedHashSet<BibEntry>> relationsMap = store.openMap(mapName, mapConfiguration);
var relationsAlreadyStored = relationsMap.getOrDefault(doi.getDOI(), new LinkedHashSet<>());
relationsAlreadyStored.addAll(relations);
Expand All @@ -64,7 +63,7 @@ public boolean containsKey(BibEntry entry) {
return entry
.getDOI()
.map(doi -> {
try (var store = new MVStore.Builder().fileName(path.toAbsolutePath().toString()).open()) {
try (var store = this.storeConfiguration.open()) {
MVMap<String, LinkedHashSet<BibEntry>> relationsMap = store.openMap(mapName, mapConfiguration);
return relationsMap.containsKey(doi.getDOI());
}
Expand All @@ -82,7 +81,7 @@ private static String toString(BibEntry entry) {
entry.getTitle().orElse("null"),
entry.getField(StandardField.YEAR).orElse("null"),
entry.getField(StandardField.AUTHOR).orElse("null"),
entry.getType().getDisplayName(),
entry.getType().getDisplayName() == null ? "null" : entry.getType().getDisplayName(),
entry.getDOI().map(DOI::getDOI).orElse("null"),
entry.getField(StandardField.URL).orElse("null"),
entry.getField(StandardField.ABSTRACT).orElse("null")
Expand Down Expand Up @@ -124,8 +123,9 @@ public void write(WriteBuffer buff, BibEntry bibEntry) {
@Override
public BibEntry read(ByteBuffer buff) {
int serializedEntrySize = buff.getInt();
var serializedEntry = DataUtils.readString(buff, serializedEntrySize);
return fromString(serializedEntry);
var serializedEntry = new byte[serializedEntrySize];
buff.get(serializedEntry);
return fromString(new String(serializedEntry, StandardCharsets.UTF_8));
}

@Override
Expand All @@ -140,6 +140,11 @@ public int compare(BibEntry a, BibEntry b) {
public BibEntry[] createStorage(int size) {
return new BibEntry[size];
}

@Override
public boolean isMemoryEstimationAllowed() {
return false;
}
}

private static class BibEntryHashSetSerializer extends BasicDataType<LinkedHashSet<BibEntry>> {
Expand All @@ -149,6 +154,7 @@ private static class BibEntryHashSetSerializer extends BasicDataType<LinkedHashS
/**
* Memory size is the sum of all aggregated bibEntries memory size plus 4 bytes.
* Those 4 bytes are used to store the length of the collection itself.
*
* @param bibEntries should not be null
* @return total size in memory of the serialized collection of bib entries
*/
Expand All @@ -174,8 +180,14 @@ public LinkedHashSet<BibEntry> read(ByteBuffer buff) {
}

@Override
@SuppressWarnings("unchecked")
public LinkedHashSet<BibEntry>[] createStorage(int size) {
return new LinkedHashSet[size];
}

@Override
public boolean isMemoryEstimationAllowed() {
return false;
}
}
}
Loading

0 comments on commit 01f6da4

Please sign in to comment.