Skip to content

Commit

Permalink
[bigvm] Don't free host when too much reserved RAM
Browse files Browse the repository at this point in the history
The logic in BigVmManager to decide whether a cluster is
able to free another BigVM spawn-host is changed from only
using the total percentage of used memory to using the
amount of reserved memory as well.
  • Loading branch information
grandchild committed Apr 1, 2021
1 parent 93e22ef commit 5b05204
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 20 deletions.
67 changes: 47 additions & 20 deletions nova/bigvm/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from nova.scheduler.client.report import get_placement_request_id
from nova.scheduler.client.report import NESTED_PROVIDER_API_VERSION
from nova.scheduler.utils import ResourceRequest
from nova import utils
from nova.virt.vmwareapi import special_spawning

LOG = logging.getLogger(__name__)
Expand All @@ -43,6 +44,7 @@
MEMORY_MB = rc_fields.ResourceClass.MEMORY_MB
BIGVM_RESOURCE = special_spawning.BIGVM_RESOURCE
BIGVM_DISABLED_TRAIT = 'CUSTOM_BIGVM_DISABLED'
MEMORY_RESERVABLE_MB_RESOURCE = utils.MEMORY_RESERVABLE_MB_RESOURCE
VMWARE_HV_TYPE = 'VMware vCenter Server'
SHARD_PREFIX = 'vc-'
HV_SIZE_BUCKET_THRESHOLD_PERCENT = 10
Expand Down Expand Up @@ -84,11 +86,13 @@ def _prepare_empty_host_for_spawning(self, context):
again if a migration happened in the background. We need to clean up
the child rp in this case and redo the scheduling.
We only want to fill up clusters to a certain point, configurable via
bigvm_cluster_max_usage_percent. resource-providers having more RAM
usage than this, will not be used for a hv_size. Additionally, we check
in every iteration, if we have to give up a freed-up host, because the
cluster reached the limit.
We only want to fill up cluster memory to a certain point, configurable
via bigvm_cluster_max_usage_percent and
bigvm_cluster_max_reservation_percent. Resource-providers having more
RAM usage or -reservation than those two respectively, will not be used
for an hv_size. Additionally, we check in every iteration, if we have
to give up a freed-up host, because the cluster reached one of the
limits.
"""
client = self.placement_client

Expand Down Expand Up @@ -139,16 +143,24 @@ def _flatten(list_of_lists):
for p, d in provider_summaries.items():
used = vmware_providers.get(p, {})\
.get('memory_mb_used_percent', 100)
if used > CONF.bigvm_cluster_max_usage_percent:
reserved = vmware_providers.get(p, {})\
.get('memory_reservable_mb_used_percent', 100)
if (used > CONF.bigvm_cluster_max_usage_percent
or reserved
> CONF.bigvm_cluster_max_reservation_percent):
continue
filtered_provider_summaries[p] = d

if not filtered_provider_summaries:
LOG.warning('Could not find a resource-provider to free up a '
'host for hypervisor size %(hv_size)d, because '
'all clusters are used more than %(max_used)d.',
'all clusters are already using more than '
'%(max_used)d%% of total memory or reserving more '
'than %(max_reserved)d%% of reservable memory.',
{'hv_size': hv_size,
'max_used': CONF.bigvm_cluster_max_usage_percent})
'max_used': CONF.bigvm_cluster_max_usage_percent,
'max_reserved':
CONF.bigvm_cluster_max_reservation_percent})
continue

# filter out providers that are disabled for bigVMs
Expand Down Expand Up @@ -287,15 +299,19 @@ def _get_providers(self, context):
elif rp['uuid'] not in vmware_hvs: # ignore baremetal
continue
else:
# retrieve the MEMORY_MB resource
url = '/resource_providers/{}/inventories/{}'.format(
rp['uuid'], MEMORY_MB)
# retrieve inventory for MEMORY_MB & MEMORY_RESERVABLE_MB info
url = '/resource_providers/{}/inventories'.format(rp['uuid'])
resp = client.get(url)
if resp.status_code != 200:
LOG.error('Could not retrieve inventory for RP %(rp)s.',
{'rp': rp['uuid']})
continue
memory_mb_inventory = resp.json()
inventory = resp.json()["inventories"]
memory_mb_inventory = inventory[MEMORY_MB]
memory_reservable_mb_inventory = inventory.get(
MEMORY_RESERVABLE_MB_RESOURCE)
if not memory_reservable_mb_inventory:
continue

# retrieve the usage
url = '/resource_providers/{}/usages'
Expand All @@ -311,6 +327,12 @@ def _get_providers(self, context):
- memory_mb_inventory['reserved'])
memory_mb_used_percent = (usages[MEMORY_MB]
/ float(memory_mb_total) * 100)
memory_reservable_mb_total = (
memory_reservable_mb_inventory['total']
- memory_reservable_mb_inventory['reserved'])
memory_reservable_mb_used_percent = (
usages.get(MEMORY_RESERVABLE_MB_RESOURCE, 0)
/ float(memory_reservable_mb_total) * 100)

host = vmware_hvs[rp['uuid']]
# ignore hypervisors we would never use anyways
Expand All @@ -327,13 +349,15 @@ def _get_providers(self, context):
'AZ or VC.',
{'host': host})
continue
vmware_providers[rp['uuid']] = {'hv_size': hv_size,
'host': host,
'az': host_azs[host],
'vc': host_vcs[host],
'cell_mapping': cell_mapping,
'memory_mb_used_percent':
memory_mb_used_percent}
vmware_providers[rp['uuid']] = {
'hv_size': hv_size,
'host': host,
'az': host_azs[host],
'vc': host_vcs[host],
'cell_mapping': cell_mapping,
'memory_mb_used_percent': memory_mb_used_percent,
'memory_reservable_mb_used_percent':
memory_reservable_mb_used_percent}

# make sure the placement cache is filled
client.get_provider_tree_and_ensure_root(context, rp['uuid'],
Expand Down Expand Up @@ -387,7 +411,10 @@ def _check_and_clean_providers(self, context, client, bigvm_providers,
continue
host_rp = vmware_providers[rp['host_rp_uuid']]
used_percent = host_rp['memory_mb_used_percent']
if used_percent > CONF.bigvm_cluster_max_usage_percent:
reserved_percent = host_rp['memory_mb_reserved_percent']
if used_percent > CONF.bigvm_cluster_max_usage_percent \
or reserved_percent \
> CONF.bigvm_cluster_max_reservation_percent:
overused_providers[rp_uuid] = rp
LOG.info('Resource-provider %(host_rp_uuid)s with free host '
'is overused. Marking %(rp_uuid)s for deletion.',
Expand Down
14 changes: 14 additions & 0 deletions nova/conf/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,20 @@
Clusters/resource-provider with this much usage are not used for freeing up a
host for spawning (a big VM). Clusters found to reach that amount, that already
have a host freed, get their free host removed.
"""),
cfg.IntOpt(
'bigvm_cluster_max_reservation_percent',
default=50,
help="""
Clusters/resource-providers with this percentage of memory reserved (of their
reservable memory, which can be less than total memory) are not used for
freeing up a host for spawning big VMs. Clusters found to reach that amount,
that already have a host freed, get their free host removed.
Compare the values of conf.vmware.memory_reservation_cluster_hosts_max_fail and
conf.vmware.memory_reservation_max_ratio_fallback to see how much of total
memory is actually reservable.
"""),
]

Expand Down

0 comments on commit 5b05204

Please sign in to comment.