Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

group independent rename #32438

Merged
merged 2 commits into from
Sep 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.maintenance;

import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.NodeType;
import com.yahoo.jdisc.Metric;
import com.yahoo.vespa.flags.Flags;
Expand All @@ -10,6 +10,8 @@
import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.node.Agent;
import com.yahoo.vespa.hosted.provision.node.Allocation;
import com.yahoo.vespa.hosted.provision.node.ClusterId;

import java.time.Duration;
import java.util.Collections;
Expand All @@ -33,51 +35,57 @@ public HostRenamer(NodeRepository nodeRepository, Duration interval, Metric metr
this.hostnameSchemeFlag = Flags.HOSTNAME_SCHEME.bindTo(nodeRepository.flagSource());
}

record ClusterGroup(ClusterId clusterId, Integer groupIndex){}

@Override
protected double maintain() {
if (!nodeRepository().nodes().isWorking()) return 0.0;
NodeList allNodes = nodeRepository().nodes().list();
NodeList activeHosts = allNodes.nodeType(NodeType.host).state(Node.State.active);
Set<ApplicationId> retiringApplications = applicationsOnRetiringHosts(activeHosts, allNodes);
Set<ClusterGroup> retiringClusterGroups = applicationsOnRetiringHosts(activeHosts, allNodes);
for (var host : activeHosts) {
Set<ApplicationId> applicationsOnHost = applicationsOn(host, allNodes);
if (!changeHostname(host, applicationsOnHost)) continue;
Set<ClusterGroup> clusterGroupsOnHost = applicationsGroupsOn(host, allNodes);
if (!changeHostname(host, clusterGroupsOnHost)) continue;

if (Collections.disjoint(retiringApplications, applicationsOnHost)) {
if (Collections.disjoint(retiringClusterGroups, clusterGroupsOnHost)) {
LOG.info("Deprovisioning " + host + " to change its hostname");
nodeRepository().nodes().deprovision(host.hostname(), Agent.system, nodeRepository().clock().instant());
retiringApplications.addAll(applicationsOnHost);
retiringClusterGroups.addAll(clusterGroupsOnHost);
}
}
return 1.0;
}

private Set<ApplicationId> applicationsOn(Node host, NodeList allNodes) {
Set<ApplicationId> applications = new HashSet<>();
private Set<ClusterGroup> applicationsGroupsOn(Node host, NodeList allNodes) {
Set<ClusterGroup> clusterGroups = new HashSet<>();
for (var child : allNodes.childrenOf(host)) {
applications.add(child.allocation().get().owner());
Allocation allocation = child.allocation().orElseThrow();
clusterGroups.add(new ClusterGroup(
new ClusterId(allocation.owner(), allocation.membership().cluster().id()),
allocation.membership().cluster().group().map(ClusterSpec.Group::index).orElse(0)));
}
return applications;
return clusterGroups;
}

private Set<ApplicationId> applicationsOnRetiringHosts(NodeList activeHosts, NodeList allNodes) {
Set<ApplicationId> applications = new HashSet<>();
private Set<ClusterGroup> applicationsOnRetiringHosts(NodeList activeHosts, NodeList allNodes) {
Set<ClusterGroup> applications = new HashSet<>();
for (var host : activeHosts.retiring()) {
applications.addAll(applicationsOn(host, allNodes));
applications.addAll(applicationsGroupsOn(host, allNodes));
}
return applications;
}

private boolean changeHostname(Node node, Set<ApplicationId> instances) {
private boolean changeHostname(Node node, Set<ClusterGroup> clusterGroups) {
if (node.hostname().endsWith(".vespa-cloud.net")) {
return false;
}
Set<String> wantedSchemes;
if (instances.isEmpty()) {
if (clusterGroups.isEmpty()) {
wantedSchemes = Set.of(hostnameSchemeFlag.value());
} else {
wantedSchemes = instances.stream()
.map(instance -> hostnameSchemeFlag.withApplicationId(Optional.of(instance)).value())
wantedSchemes = clusterGroups.stream()
.map(clusterGroup -> hostnameSchemeFlag.withApplicationId(
Optional.of(clusterGroup.clusterId().application())).value())
.collect(Collectors.toSet());
}
return wantedSchemes.size() == 1 && wantedSchemes.iterator().next().equals("standard");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import java.util.function.Supplier;

import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;

/**
Expand Down Expand Up @@ -71,6 +72,39 @@ public void rename() {
assertEquals(0, list.get().retiring().size(), "No more hosts to rename");
}

@Test
public void renameGrouped() {
InMemoryFlagSource flagSource = new InMemoryFlagSource();
ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east")))
.flagSource(flagSource)
.build();
Supplier<NodeList> list = () -> tester.nodeRepository().nodes().list().not().state(Node.State.deprovisioned);
HostRenamer renamer = new HostRenamer(tester.nodeRepository(), Duration.ofDays(1), new MockMetric());

ApplicationId groupedApp = ProvisioningTester.applicationId("groupedApp");
int hostCount = 4;
provisionHosts(hostCount, tester, "legacy.example.com");

deployGroupedApp(groupedApp, tester);

// Nothing happens when flag is unset
renamer.maintain();
assertEquals(0, list.get().retiring().size(), "No hosts to rename when feature flag is unset");

// Rename hosts
flagSource.withStringFlag(Flags.HOSTNAME_SCHEME.id(), "standard");
renamer.maintain();

assertEquals(2, list.get().owner(groupedApp).retiring().size(), "One node per group is retired at a time");
List<Node> retiringNodes = list.get().owner(groupedApp).retiring().asList();
assertNotEquals(
"Retiring nodes are from different groups",
retiringNodes.get(0).allocation().get().membership().cluster().group(),
retiringNodes.get(1).allocation().get().membership().cluster().group()
);
assertEquals(2, list.get().hosts().retiring().size(), "Two hosts should be retired");
}

private void replaceHosts(NodeList hosts, ProvisioningTester tester) {
for (var host : hosts) {
if (!host.status().wantToRetire()) throw new IllegalArgumentException(host + " is not requested to retire");
Expand Down Expand Up @@ -99,6 +133,12 @@ private void deploy(ApplicationId application, ProvisioningTester tester) {
tester.deploy(application, contentSpec, capacity);
}

private void deployGroupedApp(ApplicationId application, ProvisioningTester tester) {
ClusterSpec group0Spec = ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("content1")).vespaVersion("7").build();
Capacity capacity = Capacity.from(new ClusterResources(4, 2, new NodeResources(2, 8, 50, 1)));
tester.deploy(application, group0Spec, capacity);
}

private void provisionHosts(int count, ProvisioningTester tester, String domain) {
List<Node> nodes = tester.makeProvisionedNodes(count, (index) -> "host-" + index + "." + domain, new Flavor(new NodeResources(32, 128, 1024, 10)),
Optional.empty(), NodeType.host, 10, false);
Expand Down