Skip to content

Commit

Permalink
Recover from pod restarts during cluster creation during setup (#499)
Browse files Browse the repository at this point in the history
* Recover from pod restarts during cluster creation during setup

* Commit missed file after format

* Re-add unnecessarily removed code

* Only run create_cluster in update_status if all other units are waiting and current unit is offline

* Address PR feedback

* Fix failing scale down and then scale up integration test

* Add comments to explain measures to recover from pod reschedule during cluster creation

* Pull latest charm libs for mysql and tracing

* Pull in latest mysql backups charm lib
  • Loading branch information
shayancanonical authored Sep 16, 2024
1 parent dfdd0bd commit 2009919
Show file tree
Hide file tree
Showing 8 changed files with 224 additions and 33 deletions.
7 changes: 4 additions & 3 deletions lib/charms/mysql/v0/backups.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ def is_unit_blocked(self) -> bool:
MySQLDeleteTempRestoreDirectoryError,
MySQLEmptyDataDirectoryError,
MySQLExecuteBackupCommandsError,
MySQLGetMemberStateError,
MySQLInitializeJujuOperationsTableError,
MySQLKillSessionError,
MySQLNoMemberStateError,
MySQLOfflineModeAndHiddenInstanceExistsError,
MySQLPrepareBackupForRestoreError,
MySQLRescanClusterError,
Expand All @@ -73,6 +73,7 @@ def is_unit_blocked(self) -> bool:
MySQLSetInstanceOptionError,
MySQLStartMySQLDError,
MySQLStopMySQLDError,
MySQLUnableToGetMemberStateError,
)
from charms.mysql.v0.s3_helpers import (
fetch_and_check_existence_of_s3_path,
Expand All @@ -99,7 +100,7 @@ def is_unit_blocked(self) -> bool:

# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 11
LIBPATCH = 12


if typing.TYPE_CHECKING:
Expand Down Expand Up @@ -339,7 +340,7 @@ def _can_unit_perform_backup(self) -> Tuple[bool, Optional[str]]:

try:
state, role = self.charm._mysql.get_member_state()
except MySQLGetMemberStateError:
except (MySQLNoMemberStateError, MySQLUnableToGetMemberStateError):
return False, "Error obtaining member state"

if role == "primary" and self.charm.app.planned_units() > 1:
Expand Down
50 changes: 43 additions & 7 deletions lib/charms/mysql/v0/mysql.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def wait_until_mysql_connection(self) -> None:
# Increment this major API version when introducing breaking changes
LIBAPI = 0

LIBPATCH = 72
LIBPATCH = 73

UNIT_TEARDOWN_LOCKNAME = "unit-teardown"
UNIT_ADD_LOCKNAME = "unit-add"
Expand Down Expand Up @@ -276,8 +276,12 @@ class MySQLGrantPrivilegesToUserError(Error):
"""Exception raised when there is an issue granting privileges to user."""


class MySQLGetMemberStateError(Error):
"""Exception raised when there is an issue getting member state."""
class MySQLNoMemberStateError(Error):
"""Exception raised when there is no member state."""


class MySQLUnableToGetMemberStateError(Error):
"""Exception raised when unable to get member state."""


class MySQLGetClusterEndpointsError(Error):
Expand Down Expand Up @@ -620,6 +624,26 @@ def cluster_initialized(self) -> bool:

return False

@property
def only_one_cluster_node_thats_uninitialized(self) -> Optional[bool]:
"""Check if only a single cluster node exists across all units."""
if not self.app_peer_data.get("cluster-name"):
return None

total_cluster_nodes = 0
for unit in self.app_units:
total_cluster_nodes += self._mysql.get_cluster_node_count(
from_instance=self.get_unit_address(unit)
)

total_online_cluster_nodes = 0
for unit in self.app_units:
total_online_cluster_nodes += self._mysql.get_cluster_node_count(
from_instance=self.get_unit_address(unit), node_status=MySQLMemberState["ONLINE"]
)

return total_cluster_nodes == 1 and total_online_cluster_nodes == 0

@property
def cluster_fully_initialized(self) -> bool:
"""Returns True if the cluster is fully initialized.
Expand Down Expand Up @@ -1728,6 +1752,18 @@ def is_instance_configured_for_innodb(
)
return False

def drop_group_replication_metadata_schema(self) -> None:
"""Drop the group replication metadata schema from current unit."""
commands = (
f"shell.connect('{self.instance_def(self.server_config_user)}')",
"dba.drop_metadata_schema()",
)

try:
self._run_mysqlsh_script("\n".join(commands))
except MySQLClientError:
logger.exception("Failed to drop group replication metadata schema")

def are_locks_acquired(self, from_instance: Optional[str] = None) -> bool:
"""Report if any topology change is being executed."""
commands = (
Expand Down Expand Up @@ -2356,13 +2392,13 @@ def get_member_state(self) -> Tuple[str, str]:
logger.error(
"Failed to get member state: mysqld daemon is down",
)
raise MySQLGetMemberStateError(e.message)
raise MySQLUnableToGetMemberStateError(e.message)

# output is like:
# 'MEMBER_STATE\tMEMBER_ROLE\tMEMBER_ID\t@@server_uuid\nONLINE\tPRIMARY\t<uuid>\t<uuid>\n'
lines = output.strip().lower().split("\n")
if len(lines) < 2:
raise MySQLGetMemberStateError("No member state retrieved")
raise MySQLNoMemberStateError("No member state retrieved")

if len(lines) == 2:
# Instance just know it own state
Expand All @@ -2378,7 +2414,7 @@ def get_member_state(self) -> Tuple[str, str]:
# filter server uuid
return results[0], results[1] or "unknown"

raise MySQLGetMemberStateError("No member state retrieved")
raise MySQLNoMemberStateError("No member state retrieved")

def is_cluster_replica(self, from_instance: Optional[str] = None) -> Optional[bool]:
"""Check if this cluster is a replica in a cluster set."""
Expand Down Expand Up @@ -2435,7 +2471,7 @@ def hold_if_recovering(self) -> None:
while True:
try:
member_state, _ = self.get_member_state()
except MySQLGetMemberStateError:
except (MySQLNoMemberStateError, MySQLUnableToGetMemberStateError):
break
if member_state == MySQLMemberState.RECOVERING:
logger.debug("Unit is recovering")
Expand Down
8 changes: 4 additions & 4 deletions lib/charms/tempo_k8s/v2/tracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def __init__(self, *args):
)
from ops.framework import EventSource, Object
from ops.model import ModelError, Relation
from pydantic import BaseModel, ConfigDict, Field
from pydantic import BaseModel, Field

# The unique Charmhub library identifier, never change it
LIBID = "12977e9aa0b34367903d8afeb8c3d85d"
Expand All @@ -107,7 +107,7 @@ def __init__(self, *args):

# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 8
LIBPATCH = 9

PYDEPS = ["pydantic"]

Expand Down Expand Up @@ -338,7 +338,7 @@ class Config:
class ProtocolType(BaseModel):
"""Protocol Type."""

model_config = ConfigDict(
model_config = ConfigDict( # type: ignore
# Allow serializing enum values.
use_enum_values=True
)
Expand Down Expand Up @@ -925,7 +925,7 @@ def get_endpoint(
def charm_tracing_config(
endpoint_requirer: TracingEndpointRequirer, cert_path: Optional[Union[Path, str]]
) -> Tuple[Optional[str], Optional[str]]:
"""Utility function to determine the charm_tracing config you will likely want.
"""Return the charm_tracing config you likely want.
If no endpoint is provided:
disable charm tracing.
Expand Down
69 changes: 53 additions & 16 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,14 @@
MySQLConfigureMySQLUsersError,
MySQLCreateClusterError,
MySQLGetClusterPrimaryAddressError,
MySQLGetMemberStateError,
MySQLGetMySQLVersionError,
MySQLInitializeJujuOperationsTableError,
MySQLLockAcquisitionError,
MySQLNoMemberStateError,
MySQLRebootFromCompleteOutageError,
MySQLServiceNotRunningError,
MySQLSetClusterPrimaryError,
MySQLUnableToGetMemberStateError,
)
from charms.mysql.v0.tls import MySQLTLS
from charms.prometheus_k8s.v0.prometheus_scrape import MetricsEndpointProvider
Expand Down Expand Up @@ -700,7 +701,12 @@ def _on_mysql_pebble_ready(self, event) -> None:
# First run setup
self._configure_instance(container)

if not self.unit.is_leader() or self.cluster_initialized:
# We consider cluster initialized only if a primary already exists
# (as there can be metadata in the database but no primary if pod
# crashes while cluster is being created)
if not self.unit.is_leader() or (
self.cluster_initialized and self._get_primary_from_online_peer()
):
# Non-leader units try to join cluster
self.unit.status = WaitingStatus("Waiting for instance to join the cluster")
self.unit_peer_data.update({"member-role": "secondary", "member-state": "waiting"})
Expand All @@ -710,12 +716,14 @@ def _on_mysql_pebble_ready(self, event) -> None:
try:
# Create the cluster when is the leader unit
logger.info(f"Creating cluster {self.app_peer_data['cluster-name']}")
self.unit.status = MaintenanceStatus("Creating cluster")
self.create_cluster()
self.unit.status = ops.ActiveStatus(self.active_status_message)

except (
MySQLCreateClusterError,
MySQLGetMemberStateError,
MySQLUnableToGetMemberStateError,
MySQLNoMemberStateError,
MySQLInitializeJujuOperationsTableError,
MySQLCreateClusterError,
):
Expand All @@ -728,19 +736,24 @@ def _handle_potential_cluster_crash_scenario(self) -> bool:
Returns:
bool indicating whether the caller should return
"""
if not self.cluster_initialized or not self.unit_peer_data.get("member-role"):
# health checks are only after cluster and members are initialized
if not self._mysql.is_mysqld_running():
return True

if not self._mysql.is_mysqld_running():
only_single_unitialized_node_across_cluster = (
self.only_one_cluster_node_thats_uninitialized
)

if (
not self.cluster_initialized and not only_single_unitialized_node_across_cluster
) or not self.unit_peer_data.get("member-role"):
return True

# retrieve and persist state for every unit
try:
state, role = self._mysql.get_member_state()
self.unit_peer_data["member-state"] = state
self.unit_peer_data["member-role"] = role
except MySQLGetMemberStateError:
except (MySQLNoMemberStateError, MySQLUnableToGetMemberStateError):
logger.error("Error getting member state. Avoiding potential cluster crash recovery")
self.unit.status = MaintenanceStatus("Unable to get member state")
return True
Expand All @@ -757,23 +770,33 @@ def _handle_potential_cluster_crash_scenario(self) -> bool:
if state == "recovering":
return True

if state in ["offline"]:
if state == "offline":
# Group Replication is active but the member does not belong to any group
all_states = {
self.peers.data[unit].get("member-state", "unknown") for unit in self.peers.units
}
# Add state for this unit (self.peers.units does not include this unit)
all_states.add("offline")

if all_states == {"offline"} and self.unit.is_leader():
# Add state 'offline' for this unit (self.peers.unit does not
# include this unit)
if (all_states | {"offline"} == {"offline"} and self.unit.is_leader()) or (
only_single_unitialized_node_across_cluster and all_states == {"waiting"}
):
# All instance are off, reboot cluster from outage from the leader unit

logger.info("Attempting reboot from complete outage.")
try:
self._mysql.reboot_from_complete_outage()
# Need condition to avoid rebooting on all units of application
if self.unit.is_leader() or only_single_unitialized_node_across_cluster:
self._mysql.reboot_from_complete_outage()
except MySQLRebootFromCompleteOutageError:
logger.error("Failed to reboot cluster from complete outage.")
self.unit.status = BlockedStatus("failed to recover cluster.")

if only_single_unitialized_node_across_cluster and all_states == {"waiting"}:
self._mysql.drop_group_replication_metadata_schema()
self.create_cluster()
self.unit.status = ActiveStatus(self.active_status_message)
else:
self.unit.status = BlockedStatus("failed to recover cluster.")

return True

Expand All @@ -785,10 +808,23 @@ def _is_cluster_blocked(self) -> bool:
Returns: a boolean indicating whether the update-status (caller) should
no-op and return.
"""
unit_member_state = self.unit_peer_data.get("member-state")
if unit_member_state in ["waiting", "restarting"]:
# We need to query member state from the server since member state would
# be 'offline' if pod rescheduled during cluster creation, however
# member-state in the unit peer databag will be 'waiting'
member_state_exists = True
try:
member_state, _ = self._mysql.get_member_state()
except MySQLUnableToGetMemberStateError:
logger.error("Error getting member state while checking if cluster is blocked")
self.unit.status = MaintenanceStatus("Unable to get member state")
return True
except MySQLNoMemberStateError:
member_state_exists = False

if not member_state_exists or member_state == "restarting":
# avoid changing status while tls is being set up or charm is being initialized
logger.info(f"Unit state is {unit_member_state}")
logger.info("Unit is waiting or restarting")
logger.debug(f"{member_state_exists=}, {member_state=}")
return True

# avoid changing status while async replication is setting up
Expand All @@ -812,6 +848,7 @@ def _on_update_status(self, _: Optional[UpdateStatusEvent]) -> None:

container = self.unit.get_container(CONTAINER_NAME)
if not container.can_connect():
logger.debug("Cannot connect to pebble in the mysql container")
return

if self._handle_potential_cluster_crash_scenario():
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -581,7 +581,7 @@ async def write_content_to_file_in_unit(
)


async def read_contents_from_file_in_unit(
def read_contents_from_file_in_unit(
ops_test: OpsTest, unit: Unit, path: str, container_name: str = CONTAINER_NAME
) -> str:
"""Read contents from file in the provided unit.
Expand Down
16 changes: 16 additions & 0 deletions tests/integration/high_availability/high_availability_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -653,3 +653,19 @@ def delete_pvcs(pvcs: list[PersistentVolumeClaim]) -> None:
namespace=pvc.metadata.namespace,
grace_period=0,
)


def delete_pod(ops_test: OpsTest, unit: Unit) -> None:
"""Delete the provided pod."""
pod_name = unit.name.replace("/", "-")
subprocess.run(
[
"microk8s.kubectl",
"-n",
ops_test.model.info.name,
"delete",
"pod",
pod_name,
],
check=True,
)
Loading

0 comments on commit 2009919

Please sign in to comment.