Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MCR-3046 allow id generation independent from actual storage #2067

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
/*
* This file is part of *** M y C o R e ***
* See http://www.mycore.de/ for details.
*
* MyCoRe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* MyCoRe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with MyCoRe. If not, see <http://www.gnu.org/licenses/>.
*/

package org.mycore.datamodel.common;
rsteph-de marked this conversation as resolved.
Show resolved Hide resolved

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.mycore.common.MCRException;
import org.mycore.common.MCRUtils;
import org.mycore.common.config.MCRConfiguration2;
import org.mycore.datamodel.metadata.MCRObjectID;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.channels.FileLock;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.locks.ReentrantReadWriteLock;

/**
* This class generates object ids based on a file based cache. The cache is used to store the last generated id for a
* given base id. The cache file is located in the data directory of MyCoRe and is named "id_cache" and contains one
* file for each base id. The file contains the last generated id as a string.
*/
public class MCRFileBaseCacheObjectIDGenerator implements MCRObjectIDGenerator {

private static final Logger LOGGER = LogManager.getLogger();

static ConcurrentHashMap<String, ReentrantReadWriteLock> locks = new ConcurrentHashMap<>();

private static Path getCacheFilePath(String baseId) {

Path dataDir = getDataDirPath();

Path idCachePath = dataDir.resolve("id_cache");
if (!Files.exists(idCachePath)) {
synchronized (MCRFileBaseCacheObjectIDGenerator.class) {
if (!Files.exists(idCachePath)) {
try {
Files.createDirectory(idCachePath);
} catch (IOException e) {
throw new MCRException(
"Could not create " + idCachePath.toAbsolutePath() + " directory", e);
}
}
}
}

Path cacheFile = MCRUtils.safeResolve(idCachePath, baseId);
if (!Files.exists(cacheFile)) {
synchronized (MCRFileBaseCacheObjectIDGenerator.class) {
if (!Files.exists(cacheFile)) {
try {
Files.createFile(cacheFile);
} catch (IOException e) {
throw new MCRException("Could not create " + cacheFile.toAbsolutePath(), e);
}
}
}
}
return cacheFile;
}

static Path getDataDirPath() {
Path path = Paths.get(MCRConfiguration2.getStringOrThrow("MCR.datadir"));
if (Files.exists(path) && !Files.isDirectory(path)) {
throw new MCRException("Data directory does not exist or is not a directory: " + path);
}
return path;
}

private static void writeNewID(MCRObjectID nextID, ByteBuffer buffer, FileChannel channel, Path cacheFile)
throws IOException {
buffer.clear();
channel.position(0);
byte[] idAsBytes = nextID.toString().getBytes(StandardCharsets.UTF_8);
buffer.put(idAsBytes);
buffer.flip();
int written = channel.write(buffer);
if (written != idAsBytes.length) {
throw new MCRException("Could not write new ID to " + cacheFile.toAbsolutePath());
}
}

/**
* Set the next free id for the given baseId. Should only be used for migration purposes and the caller has to make
* sure that the cache file is not used by another process.
* @param baseId the base id
* @param next the next free id to be returned by getNextFreeId
*/
public void setNextFreeId(String baseId, int next) {
Path cacheFile = getCacheFilePath(baseId);

int idLengthInBytes = MCRObjectID.formatID(baseId, 1).getBytes(StandardCharsets.UTF_8).length;
try (
FileChannel channel = FileChannel.open(cacheFile, StandardOpenOption.WRITE,
StandardOpenOption.SYNC, StandardOpenOption.CREATE);){
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldn't we use here at least a FileLock, too?
A ReentrantWriteLock is probably not necessary.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is only used by the migration Command.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i added javadoc that the caller has to do locking. The method should only be rarely used anyway.

ByteBuffer buffer = ByteBuffer.allocate(idLengthInBytes);
channel.position(0);
writeNewID(MCRObjectID.getInstance(MCRObjectID.formatID(baseId, next-1)), buffer, channel, cacheFile);
} catch (FileNotFoundException e) {
throw new MCRException("Could not create " + cacheFile.toAbsolutePath(), e);
} catch (IOException e) {
throw new MCRException("Could not open " + cacheFile.toAbsolutePath(), e);
}
}

@Override
public MCRObjectID getNextFreeId(String baseId, int maxInWorkflow) {
Path cacheFile = getCacheFilePath(baseId);

MCRObjectID nextID;

ReentrantReadWriteLock lock = locks.computeIfAbsent(baseId, k -> new ReentrantReadWriteLock());
ReentrantReadWriteLock.WriteLock writeLock = lock.writeLock();

try {
writeLock.lock();
try (
FileChannel channel = FileChannel.open(cacheFile, StandardOpenOption.READ, StandardOpenOption.WRITE,
StandardOpenOption.SYNC);
FileLock fileLock = channel.lock()) {

int idLengthInBytes = MCRObjectID.formatID(baseId, 1).getBytes(StandardCharsets.UTF_8).length;
ByteBuffer buffer = ByteBuffer.allocate(idLengthInBytes);
buffer.clear();
channel.position(0);
int bytesRead = channel.read(buffer);
if (bytesRead <= 0) {
LOGGER.info("No ID found in " + cacheFile.toAbsolutePath());
// empty file -> new currentID is 1
nextID = MCRObjectID.getInstance(MCRObjectID.formatID(baseId, maxInWorkflow + 1));
writeNewID(nextID, buffer, channel, cacheFile);
} else if (bytesRead == idLengthInBytes) {
buffer.flip();
MCRObjectID objectID = readObjectIDFromBuffer(idLengthInBytes, buffer);
int lastID = objectID.getNumberAsInteger();
nextID = MCRObjectID.getInstance(MCRObjectID.formatID(baseId, lastID + maxInWorkflow + 1));
writeNewID(nextID, buffer, channel, cacheFile);
} else {
throw new MCRException("Content has different id length " + cacheFile.toAbsolutePath());
}
} catch (FileNotFoundException e) {
throw new MCRException("Could not create " + cacheFile.toAbsolutePath(), e);
} catch (IOException e) {
throw new MCRException("Could not open " + cacheFile.toAbsolutePath(), e);
}
} finally {
writeLock.unlock();
}

return nextID;
}

private static MCRObjectID readObjectIDFromBuffer(int idLengthBytes, ByteBuffer buffer) {
byte[] idBytes = new byte[idLengthBytes];
buffer.get(idBytes);
String lastIDString = new String(idBytes, StandardCharsets.UTF_8);
return MCRObjectID.getInstance(lastIDString);
}

@Override
public MCRObjectID getLastID(String baseId) {
Path cacheFilePath = getCacheFilePath(baseId);
ReentrantReadWriteLock lock = locks.computeIfAbsent(baseId, k -> new ReentrantReadWriteLock());
ReentrantReadWriteLock.ReadLock readLock = lock.readLock();
try {
readLock.lock();
int idLengthInBytes = MCRObjectID.formatID(baseId, 1).getBytes(StandardCharsets.UTF_8).length;

try (FileChannel channel = FileChannel.open(cacheFilePath)) {
ByteBuffer buffer = ByteBuffer.allocate(idLengthInBytes);
buffer.clear();
channel.position(0);
int bytesRead = channel.read(buffer);
if (bytesRead == -1) {
// empty file -> no ID found
return null;
} else if (bytesRead == idLengthInBytes) {
buffer.flip();
return readObjectIDFromBuffer(idLengthInBytes, buffer);
} else {
throw new MCRException("Content has different id length " + cacheFilePath.toAbsolutePath());
}
} catch (IOException e) {
throw new MCRException("Could not open " + cacheFilePath.toAbsolutePath(), e);
}
} finally {
readLock.unlock();
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@
import org.mycore.common.xsl.MCRErrorListener;
import org.mycore.datamodel.common.MCRAbstractMetadataVersion;
import org.mycore.datamodel.common.MCRActiveLinkException;
import org.mycore.datamodel.common.MCRFileBaseCacheObjectIDGenerator;
import org.mycore.datamodel.common.MCRLinkTableManager;
import org.mycore.datamodel.common.MCRXMLMetadataManager;
import org.mycore.datamodel.metadata.MCRBase;
Expand Down Expand Up @@ -1386,6 +1387,20 @@ public static void repairSharedMetadata(String id) throws MCRAccessException {
MCRMetadataManager.repairSharedMetadata(obj);
}

@MCRCommand(
syntax = "create object id cache",
help = "Creates a cache for all object ids in the configuration directory.",
order = 175)
public static void createObjectIDCache() {
MCRXMLMetadataManager metadataManager = MCRXMLMetadataManager.instance();
metadataManager.getObjectBaseIds().forEach(id -> {
LOGGER.info("Creating cache for base {}", id);
int highestStoredID = metadataManager.getHighestStoredID(id);
MCRFileBaseCacheObjectIDGenerator gen = new MCRFileBaseCacheObjectIDGenerator();
gen.setNextFreeId(id, highestStoredID+1);
});
}

/**
* The method start the repair of the metadata search for a given MCRObjectID as String.
*
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
* This file is part of *** M y C o R e ***
* See http://www.mycore.de/ for details.
*
* MyCoRe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* MyCoRe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with MyCoRe. If not, see <http://www.gnu.org/licenses/>.
*/

package org.mycore.datamodel.common;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MyCoRe License Header is missing

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added


import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.junit.Test;
import org.mycore.common.MCRTestCase;
import org.mycore.datamodel.metadata.MCRObjectID;

import java.io.IOException;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.Collections;
import java.util.stream.IntStream;

import static org.junit.Assert.assertEquals;

public class MCRFileBaseCacheObjectIDGeneratorTest extends MCRTestCase {

public static final int GENERATOR_COUNT = 10;
public static final int TEST_IDS = 100;

private static final Logger LOGGER = LogManager.getLogger();

@Test
public void getNextFreeId() throws IOException {
Files.createDirectories(MCRFileBaseCacheObjectIDGenerator.getDataDirPath());

var generatorList = new ArrayList<MCRFileBaseCacheObjectIDGenerator>();
for (int i = 0; i < GENERATOR_COUNT; i++) {
generatorList.add(new MCRFileBaseCacheObjectIDGenerator());
}

// need thread safe list of generated ids
var generatedIds = Collections.synchronizedList(new ArrayList<MCRObjectID>());
IntStream.range(0, TEST_IDS)
.parallel()
.forEach(i -> {
LOGGER.info("Generating ID {}", i);
var generator = generatorList.get(i % GENERATOR_COUNT);
MCRObjectID id = generator.getNextFreeId("junit", "test");
generatedIds.add(id);
});


// check if all ids are unique
assertEquals(TEST_IDS, generatedIds.size());
assertEquals(TEST_IDS, generatedIds.stream().distinct().count());

// check if there is no space in the ids
var sortedIds = new ArrayList<>(generatedIds);
Collections.sort(sortedIds);
for (int i = 0; i < sortedIds.size() - 1; i++) {
assertEquals(i+1, sortedIds.get(i).getNumberAsInteger());
}

}

}
Loading