Skip to content

Commit

Permalink
LP2053031: Adding tuning params (#213)
Browse files Browse the repository at this point in the history
* Adding tuning params
* snapshot_count now accepts a string that can be an integer or auto

---------

Co-authored-by: Homayoon Alimohammadi <[email protected]>
  • Loading branch information
FrancescoDeSimone and HomayoonAlimohammadi authored Jan 29, 2025
1 parent a2a872e commit c086dd5
Show file tree
Hide file tree
Showing 8 changed files with 109 additions and 12 deletions.
28 changes: 24 additions & 4 deletions config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,33 @@ options:
default: 2380
description: Port to run the ETCD Management service
channel:
type: string
default: auto
description: "The snap channel from which to install etcd (e.g. '3.3/stable'), or 'auto'\nto accept the charm default. Choosing 'auto' will install the latest \nsupported version of etcd at deploy time, but will not automatically upgrade\nto a newer version thereafter.\n"
heartbeat_interval:
type: int
default: 100
description: |
The frequency with which the leader will notify followers that it is
still the leader. By default, etcd uses a 100ms heartbeat interval.
election_timeout:
type: int
default: 1000
description: |
How long a follower node will go without hearing a heartbeat before
attempting to become leader itself.
By default, etcd uses a 1000ms election timeout.
snapshot_count:
type: string
default: auto
description: |
The snap channel from which to install etcd (e.g. '3.3/stable'), or 'auto'
to accept the charm default. Choosing 'auto' will install the latest
supported version of etcd at deploy time, but will not automatically upgrade
to a newer version thereafter.
Creating snapshots with the V2 backend can be expensive,
so snapshots are only created after a given number of changes to etcd.
The value could be an integer number or auto.
if auto, the channel version is checked.
Any channel grather than 3.2, use 100000 as value otherwhise 10000 is set
If etcd’s memory usage and disk usage are too high,
try lowering the snapshot threshold.
snapd_refresh:
default: "max"
type: string
Expand Down
16 changes: 14 additions & 2 deletions lib/etcd_databag.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,12 @@
from charmhelpers.core.hookenv import leader_get, leader_set
from charmhelpers.core import unitdata
from charms.reactive import is_state
from etcd_lib import get_ingress_address
from etcd_lib import get_bind_address, build_uri
from etcd_lib import (
get_ingress_address,
get_bind_address,
build_uri,
get_snapshot_count,
)

import string
import random
Expand All @@ -24,6 +28,9 @@ class EtcdDatabag:
'cluster_address': '127.0.0.1',
'db_address': '127.0.0.1',
'unit_name': 'etcd0',
'heartbeat-interval': '100',
'election-timeout': '1000',
'snapshot-count': '100000',
'port': '2380',
'management_port': '2379',
'ca_certificate': '/etc/ssl/etcd/ca.crt',
Expand All @@ -46,6 +53,11 @@ def __init__(self):
self.listen_client_urls.insert(0, build_uri("http", "127.0.0.1", 4001))
self.advertise_urls = [build_uri("https", get_ingress_address("db"), self.port)]
self.management_port = config("management_port")
self.heartbeat_interval = config("heartbeat_interval")
self.election_timeout = config("election_timeout")
self.snapshot_count = get_snapshot_count(
config("snapshot_count"), config("channel")
)
# Live polled properties
self.cluster_address = get_ingress_address("cluster")
self.unit_name = os.getenv("JUJU_UNIT_NAME").replace("/", "")
Expand Down
24 changes: 24 additions & 0 deletions lib/etcd_lib.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from ipaddress import ip_address
from packaging.version import Version

from charmhelpers.contrib.templating.jinja import render
from charmhelpers.core.hookenv import (
Expand Down Expand Up @@ -47,6 +48,29 @@ def get_ingress_address(endpoint_name):
return sorted(all_addrs, key=lambda i: ip_address(i).version)[0]


def get_snapshot_count(snapshot_count: str, channel: str) -> int:
"""Returns the snapshot count value
* check if the value is auto,
iff channel >=3.2 it will set 100'000 otherwhise it will set 10'000
* any other integer value will be set as it is
@param snapshot_count the value to set, could be a number or auto
@param channel the channel used by the charm
"""
SNAPSHOT_COUNT_PRIOR_32 = 10000
SNAPSHOT_COUNT_BEYOND_32 = 100000
CHANNEL = channel.split("/")[0]
if snapshot_count == "auto":
if channel == "auto" or Version(CHANNEL) >= Version("3.2"):
return SNAPSHOT_COUNT_BEYOND_32
return SNAPSHOT_COUNT_PRIOR_32
try:
return int(snapshot_count)
except ValueError:
raise TypeError(f"{snapshot_count} value is not an integer number")


def get_bind_address(endpoint_name):
"""Returns the first bind-address found in network info
belonging to the named endpoint, if available.
Expand Down
16 changes: 14 additions & 2 deletions reactive/etcd.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,13 @@ def post_series_upgrade():

@when("snap.installed.etcd")
@when("leadership.is_leader")
@when_any("config.changed.port", "config.changed.management_port")
@when_any(
"config.changed.port",
"config.changed.management_port",
"config.changed.snapshot_count",
"config.changed.election_timeout",
"config.changed.heartbeat_interval",
)
@when_not("etcd.installed")
@when_not("upgrade.series.in-progress")
def leader_config_changed():
Expand Down Expand Up @@ -247,7 +253,13 @@ def leader_config_changed():

@when("snap.installed.etcd")
@when_not("leadership.is_leader")
@when_any("config.changed.port", "config.changed.management_port")
@when_any(
"config.changed.port",
"config.changed.management_port",
"config.changed.snapshot_count",
"config.changed.election_timeout",
"config.changed.heartbeat_interval",
)
@when_not("etcd.installed")
def follower_config_changed():
"""Follower units need to render the configuration file, close and open
Expand Down
6 changes: 3 additions & 3 deletions templates/etcd3.conf
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@ data-dir: {{ etcd_data_dir }}
wal-dir: {{ etcd_data_dir }}
{% endif %}
# Number of committed transactions to trigger a snapshot to disk.
snapshot-count: 10000
snapshot-count: {{ snapshot_count }}

# Time (in milliseconds) of a heartbeat interval.
heartbeat-interval: 100
heartbeat-interval: {{ heartbeat_interval }}

# Time (in milliseconds) for an election to timeout.
election-timeout: 1000
election-timeout: {{ election_timeout }}

# Raise alarms when backend size exceeds the given quota. 0 means use the
# default quota.
Expand Down
4 changes: 4 additions & 0 deletions tests/unit/lib/test_etcd_databag.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ def test_render_etcd2(
config.set("management_port", 1234)
config.set("port", 5678)
config.set("bind_with_insecure_http", True)
config.set("channel", "3.2/stable")
config.set("snapshot_count", "auto")
bag = etcd_databag.EtcdDatabag()
template_env = Environment(loader=FileSystemLoader("templates"))
config = template_env.get_template("etcd2.conf").render(bag.__dict__)
Expand All @@ -64,6 +66,8 @@ def test_render_etcd3(
config.set("management_port", 1234)
config.set("port", 5678)
config.set("bind_with_insecure_http", True)
config.set("channel", "3.2/stable")
config.set("snapshot_count", "auto")
bag = etcd_databag.EtcdDatabag()
template_env = Environment(loader=FileSystemLoader("templates"))
config = template_env.get_template("etcd3.conf").render(bag.__dict__)
Expand Down
25 changes: 24 additions & 1 deletion tests/unit/lib/test_etcd_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@
import charmhelpers.core.hookenv as hookenv
import pytest

from etcd_lib import build_uri, get_bind_address, render_grafana_dashboard
from etcd_lib import (
build_uri,
get_bind_address,
render_grafana_dashboard,
get_snapshot_count,
)


def test_render_grafana_dashboard():
Expand Down Expand Up @@ -78,6 +83,24 @@ def test_get_bind_address_picks_v4_first(unit_private_ip):
unit_private_ip.assert_not_called()


def test_get_snapshot_count_auto_3_2():
channel = "3.2"
snapshot_count = "auto"
assert get_snapshot_count(snapshot_count, channel) == 100000


def test_get_snapshot_count_auto_3_1():
channel = "3.1"
snapshot_count = "auto"
assert get_snapshot_count(snapshot_count, channel) == 10000


def test_get_snapshot_count():
channel = "3.1"
snapshot_count = "100"
assert get_snapshot_count(snapshot_count, channel) == int(snapshot_count)


def test_get_bind_address_picks_v6(unit_private_ip):
ipv6 = "2002::1234:abcd:ffff:c0a8:101"
bind_data = {
Expand Down
2 changes: 2 additions & 0 deletions tests/unit/test_etcdctl.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ def config():
reactive.etcd.config.reset_mock()
reactive.etcd.config.side_effect = kv.get
reactive.etcd.config.set = kv.set
reactive.etcd.config.set("channel", "3.2/stable")
reactive.etcd.config.set("snapshot_count", "auto")
return reactive.etcd.config


Expand Down

0 comments on commit c086dd5

Please sign in to comment.