Skip to content
This repository has been archived by the owner on Nov 14, 2024. It is now read-only.

Commit

Permalink
ABR 16: Accept lockId as string (#5898)
Browse files Browse the repository at this point in the history
prepareRestore now accepts a backup ID, enabling retries of failed restores without having to deconflict.
  • Loading branch information
gsheasby authored Feb 10, 2022
1 parent 4edefdc commit c3f269b
Show file tree
Hide file tree
Showing 15 changed files with 174 additions and 132 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import com.palantir.atlasdb.cassandra.backup.transaction.TransactionsTableInteraction;
import com.palantir.atlasdb.internalschema.InternalSchemaMetadataState;
import com.palantir.atlasdb.keyvalue.api.KeyValueService;
import com.palantir.atlasdb.timelock.api.DisableNamespacesRequest;
import com.palantir.atlasdb.timelock.api.DisableNamespacesResponse;
import com.palantir.atlasdb.timelock.api.Namespace;
import com.palantir.atlasdb.timelock.api.ReenableNamespacesRequest;
Expand Down Expand Up @@ -103,35 +104,37 @@ public static AtlasRestoreService create(

/**
* Disables TimeLock on all nodes for the given namespaces.
* Should be called exactly once prior to a restore operation. Calling this on multiple nodes will cause conflicts.
* This will fail if any namespace is already disabled, unless it was disabled with the provided backupId.
* Namespaces for which we don't have a recorded backup will be ignored.
*
* @param namespaces the namespaces to disable
* @param namespaces the namespaces to disable.
* @param backupId a unique identifier for this request (uniquely identifies the backup to which we're restoring)
*
* @return the result of the request, including a lock ID which must later be passed to completeRestore.
* @return the namespaces successfully disabled.
*/
@NonIdempotent
public DisableNamespacesResponse prepareRestore(Set<Namespace> namespaces) {
public Set<Namespace> prepareRestore(Set<Namespace> namespaces, String backupId) {
Map<Namespace, CompletedBackup> completedBackups = getCompletedBackups(namespaces);
Set<Namespace> namespacesToRestore = completedBackups.keySet();

DisableNamespacesResponse response = timeLockManagementService.disableTimelock(authHeader, namespacesToRestore);
DisableNamespacesRequest request = DisableNamespacesRequest.of(namespacesToRestore, backupId);
DisableNamespacesResponse response = timeLockManagementService.disableTimelock(authHeader, request);
return response.accept(new DisableNamespacesResponse.Visitor<>() {
@Override
public DisableNamespacesResponse visitSuccessful(SuccessfulDisableNamespacesResponse value) {
return response;
public Set<Namespace> visitSuccessful(SuccessfulDisableNamespacesResponse value) {
return namespacesToRestore;
}

@Override
public DisableNamespacesResponse visitUnsuccessful(UnsuccessfulDisableNamespacesResponse value) {
public Set<Namespace> visitUnsuccessful(UnsuccessfulDisableNamespacesResponse value) {
log.error(
"Failed to disable namespaces prior to restore",
SafeArg.of("namespaces", namespaces),
SafeArg.of("response", value));
return response;
return ImmutableSet.of();
}

@Override
public DisableNamespacesResponse visitUnknown(String unknownType) {
public Set<Namespace> visitUnknown(String unknownType) {
throw new SafeIllegalStateException(
"Unknown DisableNamespacesResponse", SafeArg.of("unknownType", unknownType));
}
Expand Down Expand Up @@ -161,28 +164,37 @@ public Set<Namespace> repairInternalTables(
* Completes the restore process for the requested namespaces.
* This includes fast-forwarding the timestamp, and then re-enabling the TimeLock namespaces.
*
* @param request the request object, which must include the lock ID returned by {@link #prepareRestore(Set)}
* @param namespaces the namespaces to re-enable
* @param backupId the backup identifier, which must match the one given to {@link #prepareRestore(Set, String)}
* @return the set of namespaces that were successfully fast-forwarded and re-enabled.
*/
@NonIdempotent
public Set<Namespace> completeRestore(ReenableNamespacesRequest request) {
Set<CompletedBackup> completedBackups = request.getNamespaces().stream()
public Set<Namespace> completeRestore(Set<Namespace> namespaces, String backupId) {
Set<CompletedBackup> completedBackups = namespaces.stream()
.map(backupPersister::getCompletedBackup)
.flatMap(Optional::stream)
.collect(Collectors.toSet());

if (completedBackups.isEmpty()) {
log.info(
"Attempted to complete restore, but no completed backups were found",
SafeArg.of("namespaces", request.getNamespaces()));
SafeArg.of("namespaces", namespaces));
return ImmutableSet.of();
} else if (completedBackups.size() < namespaces.size()) {
Set<Namespace> namespacesWithBackup =
completedBackups.stream().map(CompletedBackup::getNamespace).collect(Collectors.toSet());
Set<Namespace> namespacesWithoutBackup = Sets.difference(namespaces, namespacesWithBackup);
log.warn(
"Completed backups were not found for some namespaces",
SafeArg.of("namespacesWithBackup", namespacesWithBackup),
SafeArg.of("namespacesWithoutBackup", namespacesWithoutBackup));
}

// Fast forward timestamps
CompleteRestoreResponse response =
atlasRestoreClientBlocking.completeRestore(authHeader, CompleteRestoreRequest.of(completedBackups));
Set<Namespace> successfulNamespaces = response.getSuccessfulNamespaces();
Set<Namespace> failedNamespaces = Sets.difference(request.getNamespaces(), successfulNamespaces);
Set<Namespace> failedNamespaces = Sets.difference(namespaces, successfulNamespaces);
if (!failedNamespaces.isEmpty()) {
log.error(
"Failed to fast-forward timestamp for some namespaces. These will not be re-enabled.",
Expand All @@ -192,8 +204,8 @@ public Set<Namespace> completeRestore(ReenableNamespacesRequest request) {

// Re-enable timelock
timeLockManagementService.reenableTimelock(
authHeader, ReenableNamespacesRequest.of(successfulNamespaces, request.getLockId()));
if (successfulNamespaces.containsAll(request.getNamespaces())) {
authHeader, ReenableNamespacesRequest.of(successfulNamespaces, backupId));
if (successfulNamespaces.containsAll(namespaces)) {
log.info(
"Successfully completed restore for all namespaces",
SafeArg.of("namespaces", successfulNamespaces));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import com.palantir.atlasdb.backup.api.CompletedBackup;
import com.palantir.atlasdb.cassandra.backup.CassandraRepairHelper;
import com.palantir.atlasdb.cassandra.backup.RangesForRepair;
import com.palantir.atlasdb.timelock.api.DisableNamespacesRequest;
import com.palantir.atlasdb.timelock.api.DisableNamespacesResponse;
import com.palantir.atlasdb.timelock.api.Namespace;
import com.palantir.atlasdb.timelock.api.ReenableNamespacesRequest;
Expand All @@ -40,7 +41,6 @@
import com.palantir.tokens.auth.AuthHeader;
import java.util.Optional;
import java.util.Set;
import java.util.UUID;
import java.util.function.BiConsumer;
import java.util.stream.Collectors;
import org.junit.Before;
Expand All @@ -57,7 +57,7 @@ public class AtlasRestoreServiceTest {
private static final Namespace NO_BACKUP = Namespace.of("no-backup");
private static final Namespace FAILING_NAMESPACE = Namespace.of("failing");
private static final long BACKUP_START_TIMESTAMP = 2L;
private static final UUID LOCK_ID = new UUID(12, 9);
private static final String BACKUP_ID = "backup-19890526215242";

@Mock
private AuthHeader authHeader;
Expand Down Expand Up @@ -97,25 +97,25 @@ private void storeCompletedBackup(Namespace namespace) {
@Test
public void prepareReturnsOnlyCompletedBackups() {
DisableNamespacesResponse successfulDisable =
DisableNamespacesResponse.successful(SuccessfulDisableNamespacesResponse.of(LOCK_ID));
when(timeLockManagementService.disableTimelock(authHeader, ImmutableSet.of(WITH_BACKUP)))
.thenReturn(successfulDisable);
DisableNamespacesResponse.successful(SuccessfulDisableNamespacesResponse.of(BACKUP_ID));
DisableNamespacesRequest request = DisableNamespacesRequest.of(ImmutableSet.of(WITH_BACKUP), BACKUP_ID);
when(timeLockManagementService.disableTimelock(authHeader, request)).thenReturn(successfulDisable);

DisableNamespacesResponse actualDisable =
atlasRestoreService.prepareRestore(ImmutableSet.of(WITH_BACKUP, NO_BACKUP));
assertThat(actualDisable).isEqualTo(successfulDisable);
Set<Namespace> disabledNamespaces =
atlasRestoreService.prepareRestore(ImmutableSet.of(WITH_BACKUP, NO_BACKUP), BACKUP_ID);
assertThat(disabledNamespaces).containsExactly(WITH_BACKUP);
}

@Test
public void prepareBackupFailsIfDisableFails() {
DisableNamespacesResponse failedDisable = DisableNamespacesResponse.unsuccessful(
UnsuccessfulDisableNamespacesResponse.of(ImmutableSet.of(WITH_BACKUP), ImmutableSet.of()));
when(timeLockManagementService.disableTimelock(authHeader, ImmutableSet.of(WITH_BACKUP)))
.thenReturn(failedDisable);
DisableNamespacesRequest request = DisableNamespacesRequest.of(ImmutableSet.of(WITH_BACKUP), BACKUP_ID);
when(timeLockManagementService.disableTimelock(authHeader, request)).thenReturn(failedDisable);

DisableNamespacesResponse actualDisable =
atlasRestoreService.prepareRestore(ImmutableSet.of(WITH_BACKUP, NO_BACKUP));
assertThat(actualDisable).isEqualTo(failedDisable);
Set<Namespace> disabledNamespaces =
atlasRestoreService.prepareRestore(ImmutableSet.of(WITH_BACKUP, NO_BACKUP), BACKUP_ID);
assertThat(disabledNamespaces).isEmpty();
}

@Test
Expand Down Expand Up @@ -144,9 +144,9 @@ public void completesRestoreAfterFastForwardingTimestamp() {
when(atlasRestoreClient.completeRestore(authHeader, completeRequest))
.thenReturn(CompleteRestoreResponse.of(ImmutableSet.of(WITH_BACKUP)));

ReenableNamespacesRequest reenableRequest = ReenableNamespacesRequest.of(namespaces, LOCK_ID);
ReenableNamespacesRequest reenableRequest = ReenableNamespacesRequest.of(namespaces, BACKUP_ID);

Set<Namespace> successfulNamespaces = atlasRestoreService.completeRestore(reenableRequest);
Set<Namespace> successfulNamespaces = atlasRestoreService.completeRestore(namespaces, BACKUP_ID);
assertThat(successfulNamespaces).containsExactly(WITH_BACKUP);

InOrder inOrder = Mockito.inOrder(atlasRestoreClient, timeLockManagementService);
Expand All @@ -156,9 +156,7 @@ public void completesRestoreAfterFastForwardingTimestamp() {

@Test
public void completeRestoreDoesNotRunNamespacesWithoutCompletedBackup() {
ReenableNamespacesRequest reenableRequest = ReenableNamespacesRequest.of(ImmutableSet.of(NO_BACKUP), LOCK_ID);

Set<Namespace> namespaces = atlasRestoreService.completeRestore(reenableRequest);
Set<Namespace> namespaces = atlasRestoreService.completeRestore(ImmutableSet.of(NO_BACKUP), BACKUP_ID);

assertThat(namespaces).isEmpty();
verifyNoInteractions(atlasRestoreClient);
Expand All @@ -177,10 +175,12 @@ public void completeRestoreReturnsSuccessfulNamespaces() {
when(atlasRestoreClient.completeRestore(authHeader, request))
.thenReturn(CompleteRestoreResponse.of(ImmutableSet.of(WITH_BACKUP)));

ReenableNamespacesRequest reenableRequest = ReenableNamespacesRequest.of(namespaces, LOCK_ID);
ReenableNamespacesRequest reenableRequest =
ReenableNamespacesRequest.of(ImmutableSet.of(WITH_BACKUP), BACKUP_ID);

Set<Namespace> successfulNamespaces = atlasRestoreService.completeRestore(reenableRequest);
Set<Namespace> successfulNamespaces = atlasRestoreService.completeRestore(namespaces, BACKUP_ID);
assertThat(successfulNamespaces).containsExactly(WITH_BACKUP);
verify(atlasRestoreClient).completeRestore(authHeader, request);
verify(timeLockManagementService).reenableTimelock(authHeader, reenableRequest);
}
}
6 changes: 6 additions & 0 deletions changelog/@unreleased/pr-5898.v2.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
type: improvement
improvement:
description: prepareRestore now accepts a backup ID, enabling retries of failed
restores without having to deconflict.
links:
- https://github.com/palantir/atlasdb/pull/5898
8 changes: 4 additions & 4 deletions timelock-api/src/main/conjure/timelock-management-api.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ types:
DisableNamespacesRequest:
fields:
namespaces: set<api.Namespace>
lockId: uuid
lockId: string
SuccessfulDisableNamespacesResponse:
fields:
lockId: uuid
lockId: string
UnsuccessfulDisableNamespacesResponse:
fields:
consistentlyDisabledNamespaces:
Expand All @@ -32,7 +32,7 @@ types:
ReenableNamespacesRequest:
fields:
namespaces: set<api.Namespace>
lockId: uuid
lockId: string
SuccessfulReenableNamespacesResponse:
alias: boolean
UnsuccessfulReenableNamespacesResponse:
Expand Down Expand Up @@ -106,7 +106,7 @@ services:
disableTimelock:
http: POST /disable
args:
request: set<api.Namespace>
request: DisableNamespacesRequest
returns: DisableNamespacesResponse
docs: |
Disables the TimeLock server in a persistant way for the specified namespaces. This includes invalidating
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import com.fasterxml.jackson.databind.annotation.JsonSerialize;
import com.palantir.paxos.PaxosResponse;
import java.util.Map;
import java.util.UUID;
import org.immutables.value.Value;

@JsonDeserialize(as = ImmutableSingleNodeUpdateResponse.class)
Expand All @@ -30,13 +29,13 @@ public interface SingleNodeUpdateResponse extends PaxosResponse {
/**
* other namespaces will not have been disabled/re-enabled (the transaction will not complete).
*/
Map<Namespace, UUID> lockedNamespaces();
Map<Namespace, String> lockedNamespaces();

static SingleNodeUpdateResponse successful() {
return ImmutableSingleNodeUpdateResponse.builder().isSuccessful(true).build();
}

static SingleNodeUpdateResponse failed(Map<Namespace, UUID> lockedNamespaces) {
static SingleNodeUpdateResponse failed(Map<Namespace, String> lockedNamespaces) {
return ImmutableSingleNodeUpdateResponse.builder()
.isSuccessful(false)
.lockedNamespaces(lockedNamespaces)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
import com.palantir.paxos.Client;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.function.Function;
Expand Down Expand Up @@ -129,7 +128,8 @@ public void invalidateResourcesForClient(String namespace) {
}
}

public Map<Namespace, UUID> getNamespacesLockedWithDifferentLockId(Set<Namespace> namespaces, UUID expectedLockId) {
public Map<Namespace, String> getNamespacesLockedWithDifferentLockId(
Set<Namespace> namespaces, String expectedLockId) {
return disabledNamespaces.getNamespacesLockedWithDifferentLockId(namespaces, expectedLockId);
}

Expand Down
Loading

0 comments on commit c3f269b

Please sign in to comment.