From ed458fdcb2f0ee827fb0b502f9490defb330e60a Mon Sep 17 00:00:00 2001 From: Bodong Yang <86948717+Bodong-Yang@users.noreply.github.com> Date: Wed, 19 Jun 2024 10:35:26 +0900 Subject: [PATCH] refactor: rpi_boot: detects slot by partition tables, not by checking slot fslabel (#321) This PR refines the rpi_boot module to detect slot by examining the partition layout, not relying on reading the fslabel, and explicitly requires the expected partition tables(but support extra partitions after partition ID 3). Also this PR implements the feature that rpi_boot will correct the active slot's fslabel with correct slot_id after slot detection. --- src/otaclient/app/boot_control/_common.py | 26 +++- src/otaclient/app/boot_control/_rpi_boot.py | 131 +++++++++++--------- 2 files changed, 99 insertions(+), 58 deletions(-) diff --git a/src/otaclient/app/boot_control/_common.py b/src/otaclient/app/boot_control/_common.py index 2f935e5a4..c31a50efb 100644 --- a/src/otaclient/app/boot_control/_common.py +++ b/src/otaclient/app/boot_control/_common.py @@ -194,6 +194,30 @@ def get_parent_dev(cls, child_device: str, *, raise_exception: bool = True) -> s cmd = ["lsblk", "-idpno", "PKNAME", child_device] return subprocess_check_output(cmd, raise_exception=raise_exception) + @classmethod + def get_device_tree( + cls, parent_dev: str, *, raise_exception: bool = True + ) -> list[str]: + """Get the device tree of a parent device. + + For example, for sda with 3 partitions, we will get: + ["/dev/sda", "/dev/sda1", "/dev/sda2", "/dev/sda3"] + + This function is implemented by calling: + lsblk -lnpo NAME + + Args: + parent_dev (str): The parent device to be checked. + raise_exception (bool, optional): raise exception on subprocess call failed. + Defaults to True. + + Returns: + str: _description_ + """ + cmd = ["lsblk", "-lnpo", "NAME", parent_dev] + raw_res = subprocess_check_output(cmd, raise_exception=raise_exception) + return raw_res.splitlines() + @classmethod def set_ext4_fslabel(cls, dev: str, fslabel: str, *, raise_exception: bool = True): """Set to ext4 formatted . @@ -724,7 +748,7 @@ def prepare_standby_dev( # TODO: in the future if in-place update mode is implemented, do a # fschck over the standby slot file system. if fslabel: - CMDHelperFuncs.set_ext4_fslabel(self.active_slot_dev, fslabel=fslabel) + CMDHelperFuncs.set_ext4_fslabel(self.standby_slot_dev, fslabel=fslabel) def umount_all(self, *, ignore_error: bool = True): logger.debug("unmount standby slot and active slot mount point...") diff --git a/src/otaclient/app/boot_control/_rpi_boot.py b/src/otaclient/app/boot_control/_rpi_boot.py index 330d7022b..d51b1ae7a 100644 --- a/src/otaclient/app/boot_control/_rpi_boot.py +++ b/src/otaclient/app/boot_control/_rpi_boot.py @@ -19,6 +19,7 @@ import logging import os import re +import subprocess from pathlib import Path from string import Template from typing import Generator @@ -33,11 +34,7 @@ from otaclient.app.boot_control.configs import rpi_boot_cfg as cfg from otaclient.app.boot_control.protocol import BootControllerProtocol from otaclient_api.v2 import types as api_types -from otaclient_common.common import ( - replace_atomic, - subprocess_call, - subprocess_check_output, -) +from otaclient_common.common import replace_atomic, subprocess_call logger = logging.getLogger(__name__) @@ -54,12 +51,13 @@ class _RPIBootControllerError(Exception): class _RPIBootControl: """Boot control helper for rpi4 support. - Expected partition layout: - /dev/sda: - - sda1: fat32, fslabel=systemb-boot - - sda2: ext4, fslabel=slot_a - - sda3: ext4, fslabel=slot_b + Supported partition layout: + /dev/sd: + - sd1: fat32, fslabel=systemb-boot + - sd2: ext4, fslabel=slot_a + - sd3: ext4, fslabel=slot_b slot is the fslabel for each AB rootfs. + NOTE that we allow extra partitions with ID after 3. This class provides the following features: 1. AB partition detection, @@ -90,60 +88,79 @@ def __init__(self) -> None: raise ValueError(_err_msg) self._init_slots_info() self._init_boot_files() + self._check_active_slot_id() - def _init_slots_info(self): - """Get current/standby slots info.""" - logger.debug("checking and initializing slots info...") - try: - # detect active slot - _active_slot_dev = CMDHelperFuncs.get_current_rootfs_dev() - assert _active_slot_dev - self._active_slot_dev = _active_slot_dev + def _check_active_slot_id(self): + """Check whether the active slot fslabel is matching the slot id. - _active_slot = CMDHelperFuncs.get_attrs_by_dev( - "LABEL", str(self._active_slot_dev) + If mismatched, try to correct the problem. + """ + fslabel = self.active_slot + actual_fslabel = CMDHelperFuncs.get_attrs_by_dev( + "LABEL", self.active_slot_dev, raise_exception=False + ) + if actual_fslabel == fslabel: + return + + logger.warning( + ( + f"current active slot is {fslabel}, but its fslabel is {actual_fslabel}, " + f"try to correct the fslabel with slot id {fslabel}..." ) - assert _active_slot - self._active_slot = _active_slot - - # detect standby slot - # NOTE: using the similar logic like grub, detect the silibing dev - # of the active slot as standby slot - _parent = CMDHelperFuncs.get_parent_dev(str(self._active_slot_dev)) - assert _parent - - # list children device file from parent device - # exclude parent dev(always in the front) - # expected raw result from lsblk: - # NAME="/dev/sdx" - # NAME="/dev/sdx1" # system-boot - # NAME="/dev/sdx2" # slot_a - # NAME="/dev/sdx3" # slot_b - _check_dev_family_cmd = ["lsblk", "-Ppo", "NAME", _parent] - _raw_child_partitions = subprocess_check_output( - _check_dev_family_cmd, raise_exception=True + ) + try: + CMDHelperFuncs.set_ext4_fslabel(self.active_slot_dev, fslabel) + os.sync() + except subprocess.CalledProcessError as e: + logger.error( + f"failed to correct the fslabel mismatched: {e!r}, {e.stderr.decode()}" ) + logger.error("this might cause problem on future OTA update!") + def _init_slots_info(self): + """Get current/standby slots info.""" + logger.debug("checking and initializing slots info...") + try: + # ------ detect active slot ------ # + active_slot_dev = CMDHelperFuncs.get_current_rootfs_dev() + assert active_slot_dev + self._active_slot_dev = active_slot_dev + + # detect the parent device of boot device + # i.e., for /dev/sda2 here we get /dev/sda + parent_dev = CMDHelperFuncs.get_parent_dev(str(self._active_slot_dev)) + assert parent_dev + + # get device tree, for /dev/sda device, we will get: + # ["/dev/sda", "/dev/sda1", "/dev/sda2", "/dev/sda3"] + _device_tree = CMDHelperFuncs.get_device_tree(parent_dev) + # remove the parent dev itself and system-boot partition + device_tree = _device_tree[2:] + + # Now we should only have two partitions in the device_tree list: + # /dev/sda2, /dev/sda3 + # NOTE that we allow extra partitions presented after sd3. + assert ( + len(device_tree) >= 2 + ), f"unexpected partition layout: {_device_tree=}" + + # get the active slot ID by its position in the disk try: - # NOTE: exclude the first 2 lines(parent and system-boot) - _child_partitions = [ - raw.split("=")[-1].strip('"') - for raw in _raw_child_partitions.splitlines()[2:] - ] - if ( - len(_child_partitions) != 2 - or self._active_slot_dev not in _child_partitions - ): - raise ValueError - _child_partitions.remove(self._active_slot_dev) - except Exception: + idx = device_tree.index(active_slot_dev) + except ValueError: raise ValueError( - f"unexpected partition layout: {_raw_child_partitions}" - ) from None - # it is OK if standby_slot dev doesn't have fslabel or fslabel != standby_slot_id - # we will always set the fslabel - self._standby_slot = self.AB_FLIPS[self._active_slot] - self._standby_slot_dev = _child_partitions[0] + f"active lost is not in the device tree: {active_slot_dev=}, {device_tree=}" + ) + + if idx == 0: # slot_a + self._active_slot = self.SLOT_A + self._standby_slot = self.SLOT_B + self._standby_slot_dev = device_tree[1] + elif idx == 1: # slot_b + self._active_slot = self.SLOT_B + self._standby_slot = self.SLOT_A + self._standby_slot_dev = device_tree[0] + logger.info( f"rpi_boot: active_slot: {self._active_slot}({self._active_slot_dev}), " f"standby_slot: {self._standby_slot}({self._standby_slot_dev})"