Skip to content

Commit

Permalink
Implement a search lock mechanism for citations relations (JabRef#11189
Browse files Browse the repository at this point in the history
…):

* Solve part of task 2: make impossible to force a search on a BibEntry over a week since last insertion
* The MVStoreDAO search lock is based on a timestamp map (doi -> lastInsertionDate)
* All time computation are based on UTC
* The LRU cache will always return true -> the computer could stay up during a week, leaving cache in memory
  • Loading branch information
alexandre-cremieux committed Nov 24, 2024
1 parent 01f6da4 commit 337780d
Show file tree
Hide file tree
Showing 11 changed files with 184 additions and 85 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ public SearchCitationsRelationsService(
}

public List<BibEntry> searchReferences(BibEntry referencer, boolean forceUpdate) {
if (forceUpdate || !this.relationsRepository.containsReferences(referencer)) {
if ((forceUpdate && this.relationsRepository.isReferencesUpdatable(referencer))
|| !this.relationsRepository.containsReferences(referencer)) {
try {
var references = this.citationFetcher.searchCiting(referencer);
if (!references.isEmpty()) {
Expand All @@ -40,7 +41,8 @@ public List<BibEntry> searchReferences(BibEntry referencer, boolean forceUpdate)
}

public List<BibEntry> searchCitations(BibEntry cited, boolean forceUpdate) {
if (forceUpdate || !this.relationsRepository.containsCitations(cited)) {
if ((forceUpdate && this.relationsRepository.isCitationsUpdatable(cited))
|| !this.relationsRepository.containsCitations(cited)) {
try {
var citations = this.citationFetcher.searchCitedBy(cited);
if (!citations.isEmpty()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,8 @@ public interface BibEntryRelationDAO {
void cacheOrMergeRelations(BibEntry entry, List<BibEntry> relations);

boolean containsKey(BibEntry entry);

default boolean isUpdatable(BibEntry entry) {
return true;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@

import org.jabref.model.entry.BibEntry;

public class ChainBibEntryRelationDAO implements BibEntryRelationDAO {
public class BibEntryRelationDAOChain implements BibEntryRelationDAO {

private static final BibEntryRelationDAO EMPTY = new ChainBibEntryRelationDAO(null, null);
private static final BibEntryRelationDAO EMPTY = new BibEntryRelationDAOChain(null, null);

private final BibEntryRelationDAO current;
private final BibEntryRelationDAO next;

ChainBibEntryRelationDAO(BibEntryRelationDAO current, BibEntryRelationDAO next) {
BibEntryRelationDAOChain(BibEntryRelationDAO current, BibEntryRelationDAO next) {
this.current = current;
this.next = next;
}
Expand Down Expand Up @@ -44,10 +44,16 @@ public boolean containsKey(BibEntry entry) {
|| (this.next != EMPTY && this.next.containsKey(entry));
}

@Override
public boolean isUpdatable(BibEntry entry) {
return this.current.isUpdatable(entry)
&& (this.next == EMPTY || this.next.isUpdatable(entry));
}

public static BibEntryRelationDAO of(BibEntryRelationDAO... dao) {
return List.of(dao)
.reversed()
.stream()
.reduce(EMPTY, (acc, current) -> new ChainBibEntryRelationDAO(current, acc));
.reduce(EMPTY, (acc, current) -> new BibEntryRelationDAOChain(current, acc));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,13 @@ public interface BibEntryRelationsRepository {

boolean containsCitations(BibEntry entry);

boolean isCitationsUpdatable(BibEntry entry);

void insertReferences(BibEntry entry, List<BibEntry> citations);

List<BibEntry> readReferences(BibEntry entry);

boolean containsReferences(BibEntry entry);

boolean isReferencesUpdatable(BibEntry entry);
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ public class ChainBibEntryRelationsRepository implements BibEntryRelationsReposi
private final BibEntryRelationDAO referencesDao;

public ChainBibEntryRelationsRepository(Path citationsStore, Path relationsStore) {
this.citationsDao = ChainBibEntryRelationDAO.of(
this.citationsDao = BibEntryRelationDAOChain.of(
LRUCacheBibEntryRelationsDAO.CITATIONS, new MVStoreBibEntryRelationDAO(citationsStore, "citations")
);
this.referencesDao = ChainBibEntryRelationDAO.of(
this.referencesDao = BibEntryRelationDAOChain.of(
LRUCacheBibEntryRelationsDAO.REFERENCES, new MVStoreBibEntryRelationDAO(relationsStore, "relations")
);
}
Expand All @@ -37,6 +37,11 @@ public boolean containsCitations(BibEntry entry) {
return citationsDao.containsKey(entry);
}

@Override
public boolean isCitationsUpdatable(BibEntry entry) {
return citationsDao.isUpdatable(entry);
}

@Override
public void insertReferences(BibEntry entry, List<BibEntry> references) {
referencesDao.cacheOrMergeRelations(
Expand All @@ -53,4 +58,9 @@ public List<BibEntry> readReferences(BibEntry entry) {
public boolean containsReferences(BibEntry entry) {
return referencesDao.containsKey(entry);
}

@Override
public boolean isReferencesUpdatable(BibEntry entry) {
return referencesDao.isUpdatable(entry);
}
}

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,40 @@
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.time.Clock;
import java.time.ZoneId;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.time.LocalDateTime;

import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.entry.identifier.DOI;
import org.jabref.model.entry.types.StandardEntryType;

import com.google.common.annotations.VisibleForTesting;
import org.h2.mvstore.MVMap;
import org.h2.mvstore.MVStore;
import org.h2.mvstore.WriteBuffer;
import org.h2.mvstore.type.BasicDataType;

public class MVStoreBibEntryRelationDAO implements BibEntryRelationDAO {

private final static ZoneId TIME_STAMP_ZONE_ID = ZoneId.of("UTC");

private final String mapName;
private final String insertionTimeStampMapName;
private final MVStore.Builder storeConfiguration;
private final MVMap.Builder<String, LinkedHashSet<BibEntry>> mapConfiguration =
new MVMap.Builder<String, LinkedHashSet<BibEntry>>().valueType(new BibEntryHashSetSerializer());

MVStoreBibEntryRelationDAO(Path path, String mapName) {
this.mapName = mapName;
this.insertionTimeStampMapName = mapName + "-insertion-timestamp";
this.storeConfiguration = new MVStore.Builder().autoCommitDisabled().fileName(path.toAbsolutePath().toString());
}

Expand All @@ -47,19 +55,30 @@ public List<BibEntry> getRelations(BibEntry entry) {

@Override
synchronized public void cacheOrMergeRelations(BibEntry entry, List<BibEntry> relations) {
if (relations.isEmpty()) {
return;
}
entry.getDOI().ifPresent(doi -> {
try (var store = this.storeConfiguration.open()) {
// Save the relations
MVMap<String, LinkedHashSet<BibEntry>> relationsMap = store.openMap(mapName, mapConfiguration);
var relationsAlreadyStored = relationsMap.getOrDefault(doi.getDOI(), new LinkedHashSet<>());
relationsAlreadyStored.addAll(relations);
relationsMap.put(doi.getDOI(), relationsAlreadyStored);

// Save insertion timestamp
var insertionTime = LocalDateTime.now(TIME_STAMP_ZONE_ID);
MVMap<String, LocalDateTime> insertionTimeStampMap = store.openMap(insertionTimeStampMapName);
insertionTimeStampMap.put(doi.getDOI(), insertionTime);

// Commit
store.commit();
}
});
}

@Override
public boolean containsKey(BibEntry entry) {
synchronized public boolean containsKey(BibEntry entry) {
return entry
.getDOI()
.map(doi -> {
Expand All @@ -71,6 +90,29 @@ public boolean containsKey(BibEntry entry) {
.orElse(false);
}

@Override
synchronized public boolean isUpdatable(BibEntry entry) {
var clock = Clock.system(TIME_STAMP_ZONE_ID);
return this.isUpdatable(entry, clock);
}

@VisibleForTesting
boolean isUpdatable(final BibEntry entry, final Clock clock) {
final var executionTime = LocalDateTime.now(clock);
return entry
.getDOI()
.map(doi -> {
try (var store = this.storeConfiguration.open()) {
MVMap<String, LocalDateTime> insertionTimeStampMap = store.openMap(insertionTimeStampMapName);
return insertionTimeStampMap.getOrDefault(doi.getDOI(), executionTime);
}
})
.map(lastExecutionTime ->
lastExecutionTime.equals(executionTime) || lastExecutionTime.isBefore(executionTime.minusWeeks(1))
)
.orElse(true);
}

private static class BibEntrySerializer extends BasicDataType<BibEntry> {

private final static String FIELD_SEPARATOR = "--";
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
package org.jabref.logic.citation.service;
package org.jabref.logic.citation;

import java.util.HashMap;
import java.util.List;

import org.jabref.logic.citation.SearchCitationsRelationsService;
import org.jabref.logic.citation.repository.BibEntryRelationsRepositoryHelpersForTest;
import org.jabref.logic.importer.fetcher.CitationFetcherHelpersForTest;
import org.jabref.model.entry.BibEntry;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

class ChainBibEntryRelationDAOTest {
class BibEntryRelationDAOChainTest {

private static Stream<BibEntry> createBibEntries() {
return IntStream
.range(0, 150)
.mapToObj(ChainBibEntryRelationDAOTest::createBibEntry);
.mapToObj(BibEntryRelationDAOChainTest::createBibEntry);
}

private static BibEntry createBibEntry(int i) {
Expand All @@ -37,6 +37,7 @@ private static BibEntry createBibEntry(int i) {
* Create a fake list of relations for a bibEntry based on the {@link PaperDetails#toBibEntry()} logic
* that corresponds to this use case: we want to make sure that relations coming from SemanticScholar
* and mapped as BibEntry will be serializable by the MVStore.
*
* @param entry should not be null
* @return never empty
*/
Expand Down Expand Up @@ -68,7 +69,7 @@ private static Stream<Arguments> createCacheAndBibEntry() {
});
}

private class DaoMock implements BibEntryRelationDAO {
private static class DaoMock implements BibEntryRelationDAO {

Map<BibEntry, List<BibEntry>> table = new HashMap<>();

Expand All @@ -86,6 +87,11 @@ public void cacheOrMergeRelations(BibEntry entry, List<BibEntry> relations) {
public boolean containsKey(BibEntry entry) {
return this.table.containsKey(entry);
}

@Override
public boolean isUpdatable(BibEntry entry) {
return !this.containsKey(entry);
}
}

@ParameterizedTest
Expand All @@ -95,7 +101,7 @@ void theChainShouldReadFromFirstNode(BibEntryRelationDAO dao, BibEntry entry) {
var relations = createRelations(entry);
dao.cacheOrMergeRelations(entry, relations);
var secondDao = new DaoMock();
var doaChain = ChainBibEntryRelationDAO.of(dao, secondDao);
var doaChain = BibEntryRelationDAOChain.of(dao, secondDao);

// WHEN
var relationsFromChain = doaChain.getRelations(entry);
Expand All @@ -112,7 +118,7 @@ void theChainShouldReadFromSecondNode(BibEntryRelationDAO dao, BibEntry entry) {
var relations = createRelations(entry);
dao.cacheOrMergeRelations(entry, relations);
var firstDao = new DaoMock();
var doaChain = ChainBibEntryRelationDAO.of(firstDao, dao);
var doaChain = BibEntryRelationDAOChain.of(firstDao, dao);

// WHEN
var relationsFromChain = doaChain.getRelations(entry);
Expand All @@ -128,7 +134,7 @@ void theChainShouldReadFromSecondNodeAndRecopyToFirstNode(BibEntryRelationDAO da
// GIVEN
var relations = createRelations(entry);
var firstDao = new DaoMock();
var doaChain = ChainBibEntryRelationDAO.of(firstDao, dao);
var doaChain = BibEntryRelationDAOChain.of(firstDao, dao);

// WHEN
doaChain.cacheOrMergeRelations(entry, relations);
Expand All @@ -142,18 +148,34 @@ void theChainShouldReadFromSecondNodeAndRecopyToFirstNode(BibEntryRelationDAO da

@ParameterizedTest
@MethodSource("createCacheAndBibEntry")
void theChainShouldContainAKeyEvenIfItWasOnlyInsertedInLastNode(BibEntryRelationDAO dao, BibEntry entry) {
void theChainShouldContainAKeyEvenIfItWasOnlyInsertedInLastNode(BibEntryRelationDAO secondDao, BibEntry entry) {
// GIVEN
var relations = createRelations(entry);
var firstDao = new DaoMock();
var doaChain = ChainBibEntryRelationDAO.of(firstDao, dao);
var doaChain = BibEntryRelationDAOChain.of(firstDao, secondDao);

// WHEN
dao.cacheOrMergeRelations(entry, relations);
secondDao.cacheOrMergeRelations(entry, relations);

// THEN
Assertions.assertFalse(firstDao.containsKey(entry));
boolean doesChainContainsTheKey = doaChain.containsKey(entry);
Assertions.assertTrue(doaChain.containsKey(entry));
}

@ParameterizedTest
@MethodSource("createCacheAndBibEntry")
void theChainShouldNotBeUpdatableBeforeInsertionAndNotAfterAnInsertion(BibEntryRelationDAO dao, BibEntry entry) {
// GIVEN
var relations = createRelations(entry);
var lastDao = new DaoMock();
var daoChain = BibEntryRelationDAOChain.of(dao, lastDao);
Assertions.assertTrue(daoChain.isUpdatable(entry));

// WHEN
daoChain.cacheOrMergeRelations(entry, relations);

// THEN
Assertions.assertTrue(doesChainContainsTheKey);
Assertions.assertTrue(daoChain.containsKey(entry));
Assertions.assertFalse(daoChain.isUpdatable(entry));
}
}
Loading

0 comments on commit 337780d

Please sign in to comment.