diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostRenamer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostRenamer.java index 50f38bac90fa..4a5a8919df65 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostRenamer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostRenamer.java @@ -1,7 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.maintenance; -import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.NodeType; import com.yahoo.jdisc.Metric; import com.yahoo.vespa.flags.Flags; @@ -10,6 +10,8 @@ import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.node.Agent; +import com.yahoo.vespa.hosted.provision.node.Allocation; +import com.yahoo.vespa.hosted.provision.node.ClusterId; import java.time.Duration; import java.util.Collections; @@ -33,51 +35,57 @@ public HostRenamer(NodeRepository nodeRepository, Duration interval, Metric metr this.hostnameSchemeFlag = Flags.HOSTNAME_SCHEME.bindTo(nodeRepository.flagSource()); } + record ClusterGroup(ClusterId clusterId, Integer groupIndex){} + @Override protected double maintain() { if (!nodeRepository().nodes().isWorking()) return 0.0; NodeList allNodes = nodeRepository().nodes().list(); NodeList activeHosts = allNodes.nodeType(NodeType.host).state(Node.State.active); - Set retiringApplications = applicationsOnRetiringHosts(activeHosts, allNodes); + Set retiringClusterGroups = applicationsOnRetiringHosts(activeHosts, allNodes); for (var host : activeHosts) { - Set applicationsOnHost = applicationsOn(host, allNodes); - if (!changeHostname(host, applicationsOnHost)) continue; + Set clusterGroupsOnHost = applicationsGroupsOn(host, allNodes); + if (!changeHostname(host, clusterGroupsOnHost)) continue; - if (Collections.disjoint(retiringApplications, applicationsOnHost)) { + if (Collections.disjoint(retiringClusterGroups, clusterGroupsOnHost)) { LOG.info("Deprovisioning " + host + " to change its hostname"); nodeRepository().nodes().deprovision(host.hostname(), Agent.system, nodeRepository().clock().instant()); - retiringApplications.addAll(applicationsOnHost); + retiringClusterGroups.addAll(clusterGroupsOnHost); } } return 1.0; } - private Set applicationsOn(Node host, NodeList allNodes) { - Set applications = new HashSet<>(); + private Set applicationsGroupsOn(Node host, NodeList allNodes) { + Set clusterGroups = new HashSet<>(); for (var child : allNodes.childrenOf(host)) { - applications.add(child.allocation().get().owner()); + Allocation allocation = child.allocation().orElseThrow(); + clusterGroups.add(new ClusterGroup( + new ClusterId(allocation.owner(), allocation.membership().cluster().id()), + allocation.membership().cluster().group().map(ClusterSpec.Group::index).orElse(0))); } - return applications; + return clusterGroups; } - private Set applicationsOnRetiringHosts(NodeList activeHosts, NodeList allNodes) { - Set applications = new HashSet<>(); + private Set applicationsOnRetiringHosts(NodeList activeHosts, NodeList allNodes) { + Set applications = new HashSet<>(); for (var host : activeHosts.retiring()) { - applications.addAll(applicationsOn(host, allNodes)); + applications.addAll(applicationsGroupsOn(host, allNodes)); } return applications; } - private boolean changeHostname(Node node, Set instances) { + private boolean changeHostname(Node node, Set clusterGroups) { if (node.hostname().endsWith(".vespa-cloud.net")) { return false; } Set wantedSchemes; - if (instances.isEmpty()) { + if (clusterGroups.isEmpty()) { wantedSchemes = Set.of(hostnameSchemeFlag.value()); } else { - wantedSchemes = instances.stream() - .map(instance -> hostnameSchemeFlag.withApplicationId(Optional.of(instance)).value()) + wantedSchemes = clusterGroups.stream() + .map(clusterGroup -> hostnameSchemeFlag.withApplicationId( + Optional.of(clusterGroup.clusterId().application())).value()) .collect(Collectors.toSet()); } return wantedSchemes.size() == 1 && wantedSchemes.iterator().next().equals("standard"); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostRenamerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostRenamerTest.java index beaba982e2a7..d34e26bf2f8f 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostRenamerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostRenamerTest.java @@ -25,6 +25,7 @@ import java.util.function.Supplier; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertEquals; /** @@ -71,6 +72,39 @@ public void rename() { assertEquals(0, list.get().retiring().size(), "No more hosts to rename"); } + @Test + public void renameGrouped() { + InMemoryFlagSource flagSource = new InMemoryFlagSource(); + ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))) + .flagSource(flagSource) + .build(); + Supplier list = () -> tester.nodeRepository().nodes().list().not().state(Node.State.deprovisioned); + HostRenamer renamer = new HostRenamer(tester.nodeRepository(), Duration.ofDays(1), new MockMetric()); + + ApplicationId groupedApp = ProvisioningTester.applicationId("groupedApp"); + int hostCount = 4; + provisionHosts(hostCount, tester, "legacy.example.com"); + + deployGroupedApp(groupedApp, tester); + + // Nothing happens when flag is unset + renamer.maintain(); + assertEquals(0, list.get().retiring().size(), "No hosts to rename when feature flag is unset"); + + // Rename hosts + flagSource.withStringFlag(Flags.HOSTNAME_SCHEME.id(), "standard"); + renamer.maintain(); + + assertEquals(2, list.get().owner(groupedApp).retiring().size(), "One node per group is retired at a time"); + List retiringNodes = list.get().owner(groupedApp).retiring().asList(); + assertNotEquals( + "Retiring nodes are from different groups", + retiringNodes.get(0).allocation().get().membership().cluster().group(), + retiringNodes.get(1).allocation().get().membership().cluster().group() + ); + assertEquals(2, list.get().hosts().retiring().size(), "Two hosts should be retired"); + } + private void replaceHosts(NodeList hosts, ProvisioningTester tester) { for (var host : hosts) { if (!host.status().wantToRetire()) throw new IllegalArgumentException(host + " is not requested to retire"); @@ -99,6 +133,12 @@ private void deploy(ApplicationId application, ProvisioningTester tester) { tester.deploy(application, contentSpec, capacity); } + private void deployGroupedApp(ApplicationId application, ProvisioningTester tester) { + ClusterSpec group0Spec = ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("content1")).vespaVersion("7").build(); + Capacity capacity = Capacity.from(new ClusterResources(4, 2, new NodeResources(2, 8, 50, 1))); + tester.deploy(application, group0Spec, capacity); + } + private void provisionHosts(int count, ProvisioningTester tester, String domain) { List nodes = tester.makeProvisionedNodes(count, (index) -> "host-" + index + "." + domain, new Flavor(new NodeResources(32, 128, 1024, 10)), Optional.empty(), NodeType.host, 10, false);