diff --git a/otaclient/app/boot_control/_common.py b/otaclient/app/boot_control/_common.py index f601906b5..be3444ae8 100644 --- a/otaclient/app/boot_control/_common.py +++ b/otaclient/app/boot_control/_common.py @@ -424,8 +424,7 @@ def reboot(cls): class OTAStatusFilesControl: - """Logics for controlling otaclient's behavior using ota_status files, - including status, slot_in_use and version. + """Logics for controlling otaclient's OTA status with corresponding files. OTAStatus files: status: current slot's OTA status @@ -444,6 +443,7 @@ def __init__( current_ota_status_dir: Union[str, Path], standby_ota_status_dir: Union[str, Path], finalize_switching_boot: FinalizeSwitchBootFunc, + force_initialize: bool = False, ) -> None: self.active_slot = active_slot self.standby_slot = standby_slot @@ -451,49 +451,30 @@ def __init__( self.standby_ota_status_dir = Path(standby_ota_status_dir) self.finalize_switching_boot = finalize_switching_boot - # NOTE: pre-assign live ota_status with the loaded ota_status, - # and then update live ota_status in below. - # The reason is for some platform, like raspberry pi 4B, - # the finalize_switching_boot might be slow, so we first provide - # live ota_status the same as loaded ota_status(or INITIALIZED), - # then update it after the init_ota_status_files finished. - _loaded_ota_status = self._load_current_status() - self._ota_status = ( - _loaded_ota_status if _loaded_ota_status else wrapper.StatusOta.INITIALIZED - ) - - _loaded_slot_in_use = self._load_current_slot_in_use() - if _loaded_slot_in_use and _loaded_slot_in_use != self.active_slot: - logger.warning( - f"boot into old slot {self.active_slot}, " - f"but slot_in_use indicates it should boot into {_loaded_slot_in_use}, " - "this might indicate a failed finalization at first reboot after update/rollback" - ) - - # initializing ota_status control - self._init_ota_status_files() + self._force_initialize = force_initialize + self.current_ota_status_dir.mkdir(exist_ok=True, parents=True) + self._load_slot_in_use_file() + self._load_status_file() logger.info( - f"ota_status files parsing completed, ota_status is {self._ota_status}" + f"ota_status files parsing completed, ota_status is {self._ota_status.name}" ) - def _init_ota_status_files(self): + def _load_status_file(self): """Check and/or init ota_status files for current slot.""" - self.current_ota_status_dir.mkdir(exist_ok=True, parents=True) - - # load ota_status and slot_in_use file _loaded_ota_status = self._load_current_status() - _loaded_slot_in_use = self._load_current_slot_in_use() + if self._force_initialize: + _loaded_ota_status = None # initialize ota_status files if not presented/incompleted/invalid - if not (_loaded_ota_status and _loaded_slot_in_use): + if not _loaded_ota_status: logger.info( "ota_status files incompleted/not presented, " - "initializing and set/store status to INITIALIZED..." + f"initializing and set/store status to {wrapper.StatusOta.INITIALIZED.name}..." ) - self._store_current_slot_in_use(self.active_slot) self._store_current_status(wrapper.StatusOta.INITIALIZED) self._ota_status = wrapper.StatusOta.INITIALIZED return + logger.info(f"status loaded from file: {_loaded_ota_status.name}") # status except UPDATING and ROLLBACKING(like SUCCESS/FAILURE/ROLLBACK_FAILURE) # are remained as it @@ -505,8 +486,11 @@ def _init_ota_status_files(self): return # updating or rollbacking, + # NOTE: pre-assign live ota_status with the loaded ota_status before entering finalizing_switch_boot, + # as some of the platform might have slow finalizing process(like raspberry). + self._ota_status = _loaded_ota_status # if is_switching_boot, execute the injected finalize_switching_boot function from - # boot controller, transit the ota_status according to the execution result. + # boot controller, transit the ota_status according to the execution result. # NOTE(20230614): for boot controller during multi-stage reboot(like rpi_boot), # calling finalize_switching_boot might result in direct reboot, # in such case, otaclient will terminate and ota_status will not be updated. @@ -528,7 +512,7 @@ def _init_ota_status_files(self): else: logger.error( f"we are in {_loaded_ota_status.name} ota_status, " - f"but {_loaded_slot_in_use=} doesn't match {self.active_slot=}, " + "but ota_status files indicate that we are not in switching boot mode, " "this indicates a failed first reboot" ) self._ota_status = ( @@ -538,6 +522,27 @@ def _init_ota_status_files(self): ) self._store_current_status(self._ota_status) + def _load_slot_in_use_file(self): + _loaded_slot_in_use = self._load_current_slot_in_use() + if self._force_initialize: + _loaded_slot_in_use = None + + if not _loaded_slot_in_use: + # NOTE(20230831): this can also resolve the backward compatibility issue + # in is_switching_boot method when old otaclient doesn't create + # slot_in_use file. + self._store_current_slot_in_use(self.active_slot) + return + logger.info(f"slot_in_use loaded from file: {_loaded_slot_in_use}") + + # check potential failed switching boot + if _loaded_slot_in_use and _loaded_slot_in_use != self.active_slot: + logger.warning( + f"boot into old slot {self.active_slot}, " + f"but slot_in_use indicates it should boot into {_loaded_slot_in_use}, " + "this might indicate a failed finalization at first reboot after update/rollback" + ) + # slot_in_use control def _store_current_slot_in_use(self, _slot: str): @@ -637,15 +642,11 @@ def pre_rollback_standby(self): self._store_standby_status(wrapper.StatusOta.ROLLBACKING) def load_active_slot_version(self) -> str: - _version = read_str_from_file( + return read_str_from_file( self.current_ota_status_dir / cfg.OTA_VERSION_FNAME, missing_ok=True, - default="", + default=cfg.DEFAULT_VERSION_STR, ) - if not _version: - logger.warning("version file not found, return empty version string") - - return _version def on_failure(self): """Store FAILURE to status file on failure.""" @@ -655,8 +656,14 @@ def on_failure(self): self._store_standby_status(wrapper.StatusOta.FAILURE) @property - def ota_status(self) -> wrapper.StatusOta: - """Read only ota_status property.""" + def booted_ota_status(self) -> wrapper.StatusOta: + """Loaded current slot's ota_status during boot control starts. + + NOTE: distinguish between the live ota_status maintained by otaclient. + + This property is only meant to be used once when otaclient starts up, + switch to use live_ota_status by otaclient after otaclient is running. + """ return self._ota_status @@ -705,7 +712,7 @@ def _load_current_ota_status(self) -> Optional[wrapper.StatusOta]: except KeyError: pass # invalid status string - def get_ota_status(self) -> wrapper.StatusOta: + def get_booted_ota_status(self) -> wrapper.StatusOta: return self.ota_status @@ -788,7 +795,12 @@ def __init__( self.standby_slot_mount_point.mkdir(exist_ok=True, parents=True) self.active_slot_mount_point.mkdir(exist_ok=True, parents=True) # standby slot /boot dir - self.standby_boot_dir = self.standby_slot_mount_point / "boot" + # NOTE(20230907): this will always be /boot, + # in the future this attribute will not be used by + # standby slot creater. + self.standby_boot_dir = self.standby_slot_mount_point / Path( + cfg.BOOT_DIR + ).relative_to("/") def mount_standby(self, *, raise_exc: bool = True) -> bool: """Mount standby slot dev to . diff --git a/otaclient/app/boot_control/_grub.py b/otaclient/app/boot_control/_grub.py index 8d9a38098..a65281298 100644 --- a/otaclient/app/boot_control/_grub.py +++ b/otaclient/app/boot_control/_grub.py @@ -13,7 +13,6 @@ # limitations under the License. -import os import re import shutil from dataclasses import dataclass @@ -42,10 +41,8 @@ from . import _errors from ._common import ( CMDHelperFuncs, - OTAStatusMixin, - PrepareMountMixin, - SlotInUseMixin, - VersionControlMixin, + OTAStatusFilesControl, + SlotMountHelper, cat_proc_cmdline, ) from .configs import grub_cfg as cfg @@ -58,7 +55,7 @@ @dataclass -class GrubMenuEntry: +class _GrubMenuEntry: """ NOTE: should only be called by the get_entry method @@ -112,15 +109,17 @@ class GrubHelper: initrd_pa: ClassVar[re.Pattern] = re.compile( r"^\s+initrd.*(?Pinitrd.img-(?P[\.\w-]*))", re.MULTILINE ) + kernel_fname_pa: ClassVar[re.Pattern] = re.compile(r"^vmlinuz-(?P[\.\w-]*)$") VMLINUZ = "vmlinuz" INITRD = "initrd.img" + FNAME_VER_SPLITTER = "-" SUFFIX_OTA = "ota" SUFFIX_OTA_STANDBY = "ota.standby" - KERNEL_OTA = f"{VMLINUZ}-{SUFFIX_OTA}" - KERNEL_OTA_STANDBY = f"{VMLINUZ}-{SUFFIX_OTA_STANDBY}" - INITRD_OTA = f"{INITRD}-{SUFFIX_OTA}" - INITRD_OTA_STANDBY = f"{INITRD}-{SUFFIX_OTA_STANDBY}" + KERNEL_OTA = f"{VMLINUZ}{FNAME_VER_SPLITTER}{SUFFIX_OTA}" + KERNEL_OTA_STANDBY = f"{VMLINUZ}{FNAME_VER_SPLITTER}{SUFFIX_OTA_STANDBY}" + INITRD_OTA = f"{INITRD}{FNAME_VER_SPLITTER}{SUFFIX_OTA}" + INITRD_OTA_STANDBY = f"{INITRD}{FNAME_VER_SPLITTER}{SUFFIX_OTA_STANDBY}" grub_default_options: ClassVar[Dict[str, str]] = { "GRUB_TIMEOUT_STYLE": "menu", @@ -139,7 +138,7 @@ def update_entry_rootfs( rootfs_str: str, start: int = 0, ) -> Optional[str]: - """Read in grub_cfg, update all entries' rootfs with , + """Read in grub_cfg, update matched kernel entries' rootfs with , and then return the updated one. Params: @@ -188,16 +187,18 @@ def update_entry_rootfs( return grub_cfg @classmethod - def get_entry(cls, grub_cfg: str, *, kernel_ver: str) -> Tuple[int, GrubMenuEntry]: + def get_entry(cls, grub_cfg: str, *, kernel_ver: str) -> Tuple[int, _GrubMenuEntry]: """Find the FIRST entry that matches the . + NOTE: assume that the FIRST matching entry is the normal entry, which is correct in most cases(recovery entry will always - be after the normal boot entry.) + be after the normal boot entry, and we by defautl disable + recovery entry). """ for index, entry_ma in enumerate(cls.menuentry_pa.finditer(grub_cfg)): if _linux := cls.linux_pa.search(entry_ma.group()): if kernel_ver == _linux.group("ver"): - return index, GrubMenuEntry(entry_ma) + return index, _GrubMenuEntry(entry_ma) raise ValueError(f"requested entry for {kernel_ver} not found") @@ -263,8 +264,9 @@ def grub_reboot(idx: int): raise -class GrubABPartitionDetecter: - """ +class GrubABPartitionDetector: + """A/B partition detector for ota-partition on grub booted system. + Expected layout: (system boots with legacy BIOS) /dev/sdx @@ -338,100 +340,131 @@ def _detect_standby_slot(self, active_dev: str) -> Tuple[str, str]: slot_name = dev_path.lstrip("/dev/") return slot_name, dev_path - ###### public methods ###### - def get_standby_slot(self) -> str: - return self.standby_slot - def get_standby_slot_dev(self) -> str: - return self.standby_dev +class _GrubControl: + """Implementation of ota-partition switch boot mechanism.""" - def get_active_slot(self) -> str: - return self.active_slot + def __init__(self) -> None: + ab_detector = GrubABPartitionDetector() + self.active_root_dev = ab_detector.active_dev + self.standby_root_dev = ab_detector.standby_dev + self.active_slot = ab_detector.active_slot + self.standby_slot = ab_detector.standby_slot + logger.info(f"{self.active_slot=}, {self.standby_slot=}") - def get_active_slot_dev(self) -> str: - return self.active_dev + self.boot_dir = Path(cfg.BOOT_DIR) + self.grub_file = Path(cfg.GRUB_CFG_PATH) + self.ota_partition_symlink = self.boot_dir / cfg.BOOT_OTA_PARTITION_FILE + self.active_ota_partition_folder = ( + self.boot_dir / cfg.BOOT_OTA_PARTITION_FILE + ).with_suffix(f".{self.active_slot}") + self.active_ota_partition_folder.mkdir(exist_ok=True) -class _SymlinkABPartitionDetecter: - """Implementation of legacy way to detect active/standby slot. + self.standby_ota_partition_folder = ( + self.boot_dir / cfg.BOOT_OTA_PARTITION_FILE + ).with_suffix(f".{self.standby_slot}") + self.standby_ota_partition_folder.mkdir(exist_ok=True) - NOTE: this is re-introduced for backward compatibility reason. + # NOTE: standby slot will be prepared in an OTA, GrubControl init will not check + # standby slot's ota-partition folder. + self._grub_control_initialized = False + self._check_active_slot_ota_partition_file() - Get the active slot by reading the symlink target of /boot/ota-partition. - if ota-partition -> ota-partition.sda3, then active slot is sda3. + @property + def initialized(self) -> bool: + """Indicates whether grub_control migrates itself from non-OTA booted system, + or recovered from a ota_partition files corrupted boot. - If there are ota-partition.sda2 and ota-partition.sda3 exist under /boot, and - ota-partition -> ota-partition.sda3, then sda2 is the standby slot. - """ + Normally this property should be false, if it is true, OTAStatusControl should also + initialize itself. + """ + return self._grub_control_initialized - @classmethod - def _get_active_slot_by_symlink(cls) -> str: - try: - ota_partition_symlink = Path(cfg.BOOT_DIR) / cfg.BOOT_OTA_PARTITION_FILE - active_ota_partition_file = os.readlink(ota_partition_symlink) + def _check_active_slot_ota_partition_file(self): + """Check and ensure active ota-partition files, init if needed. - return Path(active_ota_partition_file).suffix.strip(".") - except FileNotFoundError: - raise _errors.ABPartitionError("ota-partition files are broken") + GrubControl supports migrates system that doesn't boot via ota-partition + mechanism to using ota-partition mechanism. It also supports fixing ota-partition + symlink missing or corrupted. - @classmethod - def _get_standby_slot_by_symlink(cls) -> str: - """ - NOTE: expecting to have only 2 ota-partition files for A/B partition each. + NOTE: + 1. this method only update the ota-partition./grub.cfg! + 2. standby slot is not considered here! + 3. expected booted kernel/initrd located under /boot. """ - boot_dir = Path(cfg.BOOT_DIR) - try: - ota_partition_fs = list(boot_dir.glob(f"{cfg.BOOT_OTA_PARTITION_FILE}.*")) + # ------ check boot files ------ # + vmlinuz_active_slot = self.active_ota_partition_folder / GrubHelper.KERNEL_OTA + initrd_active_slot = self.active_ota_partition_folder / GrubHelper.INITRD_OTA + active_slot_ota_boot_files_missing = ( + not vmlinuz_active_slot.is_file() or not initrd_active_slot.is_file() + ) - active_slot = cls._get_active_slot_by_symlink() - active_slot_ota_partition_file = ( - boot_dir / f"{cfg.BOOT_OTA_PARTITION_FILE}.{active_slot}" + try: + kernel_booted_fpath, initrd_booted_fpath = self._get_current_booted_files() + kernel_booted, initrd_booted = ( + Path(kernel_booted_fpath).name, + Path(initrd_booted_fpath).name, ) - ota_partition_fs.remove(active_slot_ota_partition_file) - assert len(ota_partition_fs) == 1 - except (ValueError, AssertionError): - raise _errors.ABPartitionError("ota-partition files are broken") - - (standby_ota_partition_file,) = ota_partition_fs - return standby_ota_partition_file.suffix.strip(".") + # NOTE: current slot might be booted with ota(normal), or ota.standby(during update) + not_booted_with_ota_mechanism = kernel_booted not in ( + GrubHelper.KERNEL_OTA, + GrubHelper.KERNEL_OTA_STANDBY, + ) or initrd_booted not in ( + GrubHelper.INITRD_OTA, + GrubHelper.INITRD_OTA_STANDBY, + ) + except ValueError as e: + logger.error( + f"failed to get current booted kernel and initrd.image: {e!r}, " + "try to use active slot ota-partition files" + ) + kernel_booted, initrd_booted = vmlinuz_active_slot, initrd_active_slot + not_booted_with_ota_mechanism = True + ota_partition_symlink_missing = not self.ota_partition_symlink.is_symlink() -class _GrubControl: - """Implementation of ota-partition switch boot mechanism.""" + if ( + not_booted_with_ota_mechanism + or active_slot_ota_boot_files_missing + or ota_partition_symlink_missing + ): + logger.warning( + "system is not booted with ota mechanism(" + f"{not_booted_with_ota_mechanism=}, {active_slot_ota_boot_files_missing=}, {ota_partition_symlink_missing=}), " + f"migrating and initializing ota-partition files for {self.active_slot}@{self.active_root_dev}..." + ) - def __init__(self) -> None: - """NOTE: init only, no changes will be made in the __init__.""" - ab_detecter = GrubABPartitionDetecter() - self.active_root_dev = ab_detecter.get_active_slot_dev() - self.standby_root_dev = ab_detecter.get_standby_slot_dev() - self.active_slot = ab_detecter.get_active_slot() - self.standby_slot = ab_detecter.get_standby_slot() - logger.info(f"{self.active_slot=}, {self.standby_slot=}") + # NOTE: just copy but not cleanup the booted kernel/initrd files + if active_slot_ota_boot_files_missing: + shutil.copy( + self.boot_dir / kernel_booted, + self.active_ota_partition_folder, + follow_symlinks=True, + ) + shutil.copy( + self.boot_dir / initrd_booted, + self.active_ota_partition_folder, + follow_symlinks=True, + ) - self.boot_dir = Path(cfg.BOOT_DIR) - self.grub_file = Path(cfg.GRUB_CFG_PATH) - self.grub_default_file = Path(cfg.ACTIVE_ROOTFS_PATH) / Path( - cfg.DEFAULT_GRUB_PATH - ).relative_to("/") + # recreate all ota-partition files for active slot + self._prepare_kernel_initrd_links(self.active_ota_partition_folder) + self._ensure_ota_partition_symlinks(active_slot=self.active_slot) + self._ensure_standby_slot_boot_files_symlinks( + standby_slot=self.standby_slot + ) + self._grub_update_on_booted_slot() + self._grub_control_initialized = True - self.ota_partition_folder = self.boot_dir / cfg.BOOT_OTA_PARTITION_FILE - self.active_ota_partition_folder = ( - self.boot_dir / cfg.BOOT_OTA_PARTITION_FILE - ).with_suffix(f".{self.active_slot}") - self.active_grub_file = self.active_ota_partition_folder / "grub.cfg" + logger.info(f"ota-partition files for {self.active_slot} are ready") - self.standby_ota_partition_folder = ( - self.boot_dir / cfg.BOOT_OTA_PARTITION_FILE - ).with_suffix(f".{self.standby_slot}") - self.standby_grub_file = self.standby_ota_partition_folder / "grub.cfg" - - # create ota-partition folders for each - self.active_ota_partition_folder.mkdir(exist_ok=True) - self.standby_ota_partition_folder.mkdir(exist_ok=True) + def _get_current_booted_files(self) -> Tuple[str, str]: + """Return the name of booted kernel and initrd. - def _get_current_booted_kernel_and_initrd(self) -> Tuple[str, str]: - """Return the name of booted kernel and initrd.""" + Expected booted kernel and initrd are located under /boot. + """ boot_cmdline = cat_proc_cmdline() if kernel_ma := re.search( r"BOOT_IMAGE=.*(?Pvmlinuz-(?P[\w\.\-]*))", @@ -442,74 +475,75 @@ def _get_current_booted_kernel_and_initrd(self) -> Tuple[str, str]: raise ValueError("failed to detect booted linux kernel") # lookup the grub file and find the booted entry - _, entry = GrubHelper.get_entry( - read_str_from_file(self.grub_file), kernel_ver=kernel_ver - ) - logger.info(f"detected booted param: {entry.linux=}, {entry.initrd=}") - return entry.linux, entry.initrd + # NOTE(20230905): use standard way to find initrd img + initrd_img = f"{GrubHelper.INITRD}{GrubHelper.FNAME_VER_SPLITTER}{kernel_ver}" + if not (Path(cfg.BOOT_DIR) / initrd_img).is_file(): + raise ValueError(f"failed to find booted initrd image({initrd_img})") + return kernel_ma.group("kernel"), initrd_img @staticmethod - def _prepare_kernel_initrd_links_for_ota(target_folder: Path): - """ - prepare links for kernel/initrd + def _prepare_kernel_initrd_links(target_folder: Path): + """Prepare OTA symlinks for kernel/initrd under specific ota-partition folder. vmlinuz-ota -> vmlinuz-* initrd-ota -> initrd-* """ kernel, initrd = None, None + # NOTE(20230914): if multiple kernels presented, the first found + # kernel(along with corresponding initrd.img) will be used. for f in target_folder.glob("*"): if ( - f.name.find(GrubHelper.VMLINUZ) == 0 - and not f.is_symlink() - and kernel is None - ): - kernel = f.name - elif ( - f.name.find(GrubHelper.INITRD) == 0 - and not f.is_symlink() - and initrd is None + not f.is_symlink() + and (_kernel_fname := f.name) != GrubHelper.KERNEL_OTA + and (kernel_ma := GrubHelper.kernel_fname_pa.match(f.name)) ): - initrd = f.name - - if kernel and initrd: - break - + kernel_ver = kernel_ma.group("ver") + _initrd_fname = ( + f"{GrubHelper.INITRD}{GrubHelper.FNAME_VER_SPLITTER}{kernel_ver}" + ) + if (target_folder / _initrd_fname).is_file(): + kernel, initrd = _kernel_fname, _initrd_fname + break if not (kernel and initrd): raise ValueError(f"vmlinuz and/or initrd.img not found at {target_folder}") - kernel_ota = target_folder / GrubHelper.KERNEL_OTA - initrd_ota = target_folder / GrubHelper.INITRD_OTA - re_symlink_atomic(kernel_ota, kernel) - re_symlink_atomic(initrd_ota, initrd) + re_symlink_atomic(target_folder / GrubHelper.KERNEL_OTA, kernel) + re_symlink_atomic(target_folder / GrubHelper.INITRD_OTA, initrd) logger.info(f"finished generate ota symlinks under {target_folder}") - def _grub_update_for_active_slot(self, *, abort_on_standby_missed=True): - """Generate current active grub_file from the view of current active slot. + def _grub_update_on_booted_slot(self, *, abort_on_standby_missed=True): + """Update grub_default and generate grub.cfg for current booted slot. NOTE: - 1. this method only ensures the entry existence for ota(current active slot). - 2. this method ensures the default entry to be the current active slot. + 1. this method only ensures the entry existence for current booted slot. + 2. this method ensures the default entry to be the current booted slot. """ + grub_default_file = Path(cfg.ACTIVE_ROOTFS_PATH) / Path( + cfg.DEFAULT_GRUB_PATH + ).relative_to("/") + # NOTE: If the path points to a symlink, exists() returns - # whether the symlink points to an existing file or directory. + # whether the symlink points to an existing file or directory. active_vmlinuz = self.boot_dir / GrubHelper.KERNEL_OTA active_initrd = self.boot_dir / GrubHelper.INITRD_OTA - if not (active_vmlinuz.exists() and active_initrd.exists()): + if not (active_vmlinuz.is_file() and active_initrd.is_file()): msg = ( - "vmlinuz and/or initrd for active slot is not available, " - "refuse to update_grub" + "/boot/vmlinuz-ota and/or /boot/initrd.img-ota are broken, " + "refuse to do grub-update" ) logger.error(msg) raise ValueError(msg) # step1: update grub_default file - _in = self.grub_default_file.read_text() + _in = grub_default_file.read_text() _out = GrubHelper.update_grub_default(_in) - self.grub_default_file.write_text(_out) + write_str_to_file_sync(grub_default_file, _out) # step2: generate grub_cfg by grub-mkconfig # parse the output and find the active slot boot entry idx - grub_cfg = GrubHelper.grub_mkconfig() - if res := GrubHelper.get_entry(grub_cfg, kernel_ver=GrubHelper.SUFFIX_OTA): + grub_cfg_content = GrubHelper.grub_mkconfig() + if res := GrubHelper.get_entry( + grub_cfg_content, kernel_ver=GrubHelper.SUFFIX_OTA + ): active_slot_entry_idx, _ = res else: raise ValueError("boot entry for ACTIVE slot not found, abort") @@ -520,55 +554,52 @@ def _grub_update_for_active_slot(self, *, abort_on_standby_missed=True): f"boot entry for vmlinuz-ota(slot={self.active_slot}): {active_slot_entry_idx}" ) _out = GrubHelper.update_grub_default( - self.grub_default_file.read_text(), + grub_default_file.read_text(), default_entry_idx=active_slot_entry_idx, ) logger.debug(f"generated grub_default: {pformat(_out)}") - write_str_to_file_sync(self.grub_default_file, _out) + write_str_to_file_sync(grub_default_file, _out) # step4: populate new active grub_file # update the ota.standby entry's rootfs uuid to standby slot's uuid - grub_cfg = GrubHelper.grub_mkconfig() + active_slot_grub_file = self.active_ota_partition_folder / cfg.GRUB_CFG_FNAME + + grub_cfg_content = GrubHelper.grub_mkconfig() standby_uuid_str = CMDHelperFuncs.get_uuid_str_by_dev(self.standby_root_dev) if grub_cfg_updated := GrubHelper.update_entry_rootfs( - grub_cfg, + grub_cfg_content, kernel_ver=GrubHelper.SUFFIX_OTA_STANDBY, rootfs_str=f"root={standby_uuid_str}", ): - write_str_to_file_sync(self.active_grub_file, grub_cfg_updated) + write_str_to_file_sync(active_slot_grub_file, grub_cfg_updated) logger.info(f"standby rootfs: {standby_uuid_str}") logger.debug(f"generated grub_cfg: {pformat(grub_cfg_updated)}") else: msg = ( - "boot entry for standby slot not found, " + "/boot/vmlinuz-ota.standby and/or /boot/initrd.img-ota.standby not found, " "only current active slot's entry is populated." ) if abort_on_standby_missed: raise ValueError(msg) logger.warning(msg) - logger.info(f"generated grub_cfg: {pformat(grub_cfg)}") - write_str_to_file_sync(self.active_grub_file, grub_cfg) + logger.info(f"generated grub_cfg: {pformat(grub_cfg_content)}") + write_str_to_file_sync(active_slot_grub_file, grub_cfg_content) - # finally, symlink /boot/grub.cfg to ../ota-partition/grub.cfg - ota_partition_folder = Path(cfg.BOOT_OTA_PARTITION_FILE) # ota-partition + # finally, point grub.cfg to active slot's grub.cfg re_symlink_atomic( # /boot/grub/grub.cfg -> ../ota-partition/grub.cfg self.grub_file, - Path("../") / ota_partition_folder / "grub.cfg", + Path("../") / cfg.BOOT_OTA_PARTITION_FILE / "grub.cfg", ) logger.info(f"update_grub for {self.active_slot} finished.") - def _ensure_ota_partition_symlinks(self): - """ - NOTE: this method prepare symlinks from active slot's point of view. - NOTE 2: grub_cfg symlink will not be generated here, it will be linked - in grub_update method - """ - # prepare ota-partition symlinks + def _ensure_ota_partition_symlinks(self, active_slot: str): + """Ensure /boot/{ota_partition,vmlinuz-ota,initrd.img-ota} symlinks from + specified point's of view.""" ota_partition_folder = Path(cfg.BOOT_OTA_PARTITION_FILE) # ota-partition re_symlink_atomic( # /boot/ota-partition -> ota-partition. self.boot_dir / ota_partition_folder, - ota_partition_folder.with_suffix(f".{self.active_slot}"), + ota_partition_folder.with_suffix(f".{active_slot}"), ) re_symlink_atomic( # /boot/vmlinuz-ota -> ota-partition/vmlinuz-ota self.boot_dir / GrubHelper.KERNEL_OTA, @@ -578,70 +609,71 @@ def _ensure_ota_partition_symlinks(self): self.boot_dir / GrubHelper.INITRD_OTA, ota_partition_folder / GrubHelper.INITRD_OTA, ) - re_symlink_atomic( # /boot/vmlinuz-ota.standby -> ota-partition./vmlinuz + + def _ensure_standby_slot_boot_files_symlinks(self, standby_slot: str): + """Ensure boot files symlinks for specified .""" + ota_partition_folder = Path(cfg.BOOT_OTA_PARTITION_FILE) # ota-partition + re_symlink_atomic( # /boot/vmlinuz-ota.standby -> ota-partition./vmlinuz-ota self.boot_dir / GrubHelper.KERNEL_OTA_STANDBY, - ota_partition_folder.with_suffix(f".{self.standby_slot}") + ota_partition_folder.with_suffix(f".{standby_slot}") / GrubHelper.KERNEL_OTA, ) re_symlink_atomic( # /boot/initrd.img-ota.standby -> ota-partition./initrd.img-ota self.boot_dir / GrubHelper.INITRD_OTA_STANDBY, - ota_partition_folder.with_suffix(f".{self.standby_slot}") + ota_partition_folder.with_suffix(f".{standby_slot}") / GrubHelper.INITRD_OTA, ) - ###### public methods ###### - def reprepare_active_ota_partition_file(self, *, abort_on_standby_missed: bool): - self._prepare_kernel_initrd_links_for_ota(self.active_ota_partition_folder) - # switch ota-partition symlink to current active slot - self._ensure_ota_partition_symlinks() - self._grub_update_for_active_slot( - abort_on_standby_missed=abort_on_standby_missed - ) - - def reprepare_standby_ota_partition_file(self): - """NOTE: this method still updates active grub file under active ota-partition folder.""" - self._prepare_kernel_initrd_links_for_ota(self.standby_ota_partition_folder) - self._ensure_ota_partition_symlinks() - self._grub_update_for_active_slot(abort_on_standby_missed=True) + # API - def init_active_ota_partition_file(self): - """Prepare active ota-partition folder and ensure the existence of - symlinks needed for ota update. + def prepare_standby_dev(self, *, erase_standby: bool): + """ + Args: + erase_standby: indicate boot_controller whether to format the + standby slot's file system or not. This value is indicated and + passed to boot controller by the standby slot creator. + """ + try: + # try to unmount the standby root dev unconditionally + if CMDHelperFuncs.is_target_mounted(self.standby_root_dev): + CMDHelperFuncs.umount(self.standby_root_dev) + + if erase_standby: + CMDHelperFuncs.mkfs_ext4(self.standby_root_dev) + # TODO: check the standby file system status + # if not erase the standby slot + except Exception as e: + _err_msg = f"failed to prepare standby dev: {e!r}" + raise BootControlPreUpdateFailed(_err_msg) from e - GrubController supports migrates system that doesn't boot via ota-partition - mechanism(possibly using different grub configuration, i.e., grub submenu enabled) - to use ota-partition. + def finalize_update_switch_boot(self): + """Finalize switch boot and use boot files from current booted slot.""" + # NOTE: since we have not yet switched boot, the active/standby relationship is + # reversed here corresponding to booted slot. + self._prepare_kernel_initrd_links(self.standby_ota_partition_folder) + self._ensure_ota_partition_symlinks(active_slot=self.standby_slot) + self._ensure_standby_slot_boot_files_symlinks(standby_slot=self.active_slot) - NOTE: - 1. only update the ota-partition./grub.cfg! - 2. standby slot is not considered here! - 3. expected previously booted kernel/initrd to be located at /boot - """ - # check the current booted kernel, - # if it is not vmlinuz-ota, copy that kernel to active ota_partition folder - cur_kernel, cur_initrd = self._get_current_booted_kernel_and_initrd() - if cur_kernel != GrubHelper.KERNEL_OTA or cur_initrd != GrubHelper.INITRD_OTA: - logger.info( - "system doesn't use ota-partition mechanism to boot, " - "initializing ota-partition file..." - ) - # NOTE: just copy but not cleanup the existed kernel/initrd files - shutil.copy( - self.boot_dir / cur_kernel, - self.active_ota_partition_folder, - follow_symlinks=True, - ) - shutil.copy( - self.boot_dir / cur_initrd, - self.active_ota_partition_folder, - follow_symlinks=True, - ) - self.reprepare_active_ota_partition_file(abort_on_standby_missed=False) + self._grub_update_on_booted_slot(abort_on_standby_missed=True) - logger.info("ota-partition file initialized") + # switch ota-partition symlink to current booted slot + self._ensure_ota_partition_symlinks(active_slot=self.active_slot) + self._ensure_standby_slot_boot_files_symlinks(standby_slot=self.standby_slot) + return True def grub_reboot_to_standby(self): - self.reprepare_standby_ota_partition_file() + """Temporarily boot to standby slot after OTA applied to standby slot.""" + # ensure all required symlinks for standby slot are presented and valid + self._prepare_kernel_initrd_links(self.standby_ota_partition_folder) + self._ensure_standby_slot_boot_files_symlinks(standby_slot=self.standby_slot) + + # ensure all required symlinks for active slot are presented and valid + # NOTE: reboot after post-update is still using the current active slot's + # ota-partition symlinks(not yet switch boot). + self._prepare_kernel_initrd_links(self.active_ota_partition_folder) + self._ensure_ota_partition_symlinks(active_slot=self.active_slot) + self._grub_update_on_booted_slot(abort_on_standby_missed=True) + idx, _ = GrubHelper.get_entry( read_str_from_file(self.grub_file), kernel_ver=GrubHelper.SUFFIX_OTA_STANDBY, @@ -649,130 +681,31 @@ def grub_reboot_to_standby(self): GrubHelper.grub_reboot(idx) logger.info(f"system will reboot to {self.standby_slot=}: boot entry {idx}") - finalize_update_switch_boot = reprepare_active_ota_partition_file - -class GrubController( - VersionControlMixin, - OTAStatusMixin, - PrepareMountMixin, - SlotInUseMixin, - BootControllerProtocol, -): +class GrubController(BootControllerProtocol): def __init__(self) -> None: try: self._boot_control = _GrubControl() - - # try to unmount standby dev if possible - CMDHelperFuncs.umount(self._boot_control.standby_root_dev) - self.standby_slot_mount_point = Path(cfg.MOUNT_POINT) - self.standby_slot_mount_point.mkdir(exist_ok=True) - - ## ota-status dir - self.current_ota_status_dir = self._boot_control.active_ota_partition_folder - self.standby_ota_status_dir = ( - self._boot_control.standby_ota_partition_folder + self._mp_control = SlotMountHelper( + standby_slot_dev=self._boot_control.standby_root_dev, + standby_slot_mount_point=cfg.MOUNT_POINT, + active_slot_dev=self._boot_control.active_root_dev, + active_slot_mount_point=cfg.ACTIVE_ROOT_MOUNT_POINT, + ) + self._ota_status_control = OTAStatusFilesControl( + active_slot=self._boot_control.active_slot, + standby_slot=self._boot_control.standby_slot, + current_ota_status_dir=self._boot_control.active_ota_partition_folder, + standby_ota_status_dir=self._boot_control.standby_ota_partition_folder, + finalize_switching_boot=self._boot_control.finalize_update_switch_boot, + # NOTE(20230904): if boot control is initialized(i.e., migrate from non-ota booted system), + # force initialize the ota_status files. + force_initialize=self._boot_control.initialized, ) - - # refroot mount point - self.ref_slot_mount_point = Path(cfg.ACTIVE_ROOT_MOUNT_POINT) - # try to umount refroot mount point - CMDHelperFuncs.umount(self.ref_slot_mount_point) - if not os.path.isdir(self.ref_slot_mount_point): - os.mkdir(self.ref_slot_mount_point) - - # init boot control - # 1. load/process ota_status - # 2. finalize update/rollback or init boot files - self._init_boot_control() except Exception as e: logger.error(f"failed on init boot controller: {e!r}") raise BootControlInitError from e - def _init_boot_control(self): - # load ota_status str and slot_in_use - _ota_status = self._load_current_ota_status() - _slot_in_use = self._load_current_slot_in_use() - - # NOTE: for backward compatibility, only check otastatus file - if not _ota_status: - logger.info("initializing boot control files...") - _ota_status = wrapper.StatusOta.INITIALIZED - self._boot_control.init_active_ota_partition_file() - self._store_current_slot_in_use(self._boot_control.active_slot) - self._store_current_ota_status(wrapper.StatusOta.INITIALIZED) - - # populate slot_in_use file if it doesn't exist - if not _slot_in_use: - self._store_current_slot_in_use(self._boot_control.active_slot) - - if _ota_status in [wrapper.StatusOta.UPDATING, wrapper.StatusOta.ROLLBACKING]: - if self._is_switching_boot(): - self._boot_control.finalize_update_switch_boot( - abort_on_standby_missed=True - ) - # switch ota_status - _ota_status = wrapper.StatusOta.SUCCESS - else: - if _ota_status == wrapper.StatusOta.ROLLBACKING: - _ota_status = wrapper.StatusOta.ROLLBACK_FAILURE - else: - _ota_status = wrapper.StatusOta.FAILURE - # other ota_status will remain the same - - # detect failed reboot, but only print error logging - if ( - _ota_status != wrapper.StatusOta.INITIALIZED - and _slot_in_use - and _slot_in_use != self._boot_control.active_slot - ): - logger.error( - f"boot into old slot {self._boot_control.active_slot}, " - f"but slot_in_use indicates it should boot into {_slot_in_use}, " - "this might indicate a failed finalization at first reboot after update/rollback" - ) - - # apply ota_status to otaclient - self.ota_status = _ota_status - self._store_current_ota_status(_ota_status) - logger.info(f"boot control init finished, ota_status is {_ota_status}") - - def _is_switching_boot(self): - # evidence 1: ota_status should be updating/rollbacking at the first reboot - _check_ota_status = self._load_current_ota_status() in [ - wrapper.StatusOta.UPDATING, - wrapper.StatusOta.ROLLBACKING, - ] - - # NOTE(20220714): maintain backward compatibility, not using slot_in_use - # file here to detect switching boot. Maybe enable it in the future. - - # evidence 2(legacy): ota-partition.standby should be the - # current booted slot, because ota-partition symlink is not yet switched - # at the first reboot. - _target_slot = _SymlinkABPartitionDetecter._get_standby_slot_by_symlink() - _check_slot_in_use = _target_slot == self._boot_control.active_slot - - # evidence 2: slot_in_use file should have the same slot as current slot - # _target_slot = self._load_current_slot_in_use() - # _check_slot_in_use = _target_slot == self._boot_control.active_slot - - res = _check_ota_status and _check_slot_in_use - logger.info( - f"_is_switching_boot: {res} " - f"({_check_ota_status=}, {_check_slot_in_use=})" - ) - return res - - def _finalize_update(self) -> wrapper.StatusOta: - if self._is_switching_boot(): - self._boot_control.finalize_update_switch_boot(abort_on_standby_missed=True) - return wrapper.StatusOta.SUCCESS - else: - return wrapper.StatusOta.FAILURE - - _finalize_rollback = _finalize_update - def _update_fstab(self, *, active_slot_fstab: Path, standby_slot_fstab: Path): """Update standby fstab based on active slot's fstab and just installed new stanby fstab. @@ -793,10 +726,10 @@ def _update_fstab(self, *, active_slot_fstab: Path, standby_slot_fstab: Path): # standby partition fstab (to be merged) fstab_standby = read_str_from_file(standby_slot_fstab, missing_ok=False) fstab_standby_dict: Dict[str, re.Match] = {} + for line in fstab_standby.splitlines(): - if ma := fstab_entry_pa.match(line): - if ma.group("mount_point") == "/": - continue + ma = fstab_entry_pa.match(line) + if ma and ma.group("mount_point") != "/": fstab_standby_dict[ma.group("mount_point")] = ma # merge entries @@ -814,29 +747,29 @@ def _update_fstab(self, *, active_slot_fstab: Path, standby_slot_fstab: Path): del fstab_standby_dict[mp] else: merged.append("\t".join(ma.groups())) - else: - # re-add comments to merged - merged.append(line) + elif line.strip().startswith("#"): + merged.append(line) # re-add comments to merged # merge standby_fstab's left-over lines for _, ma in fstab_standby_dict.items(): merged.append("\t".join(ma.groups())) + merged.append("") # add a new line at the end of file # write to standby fstab write_str_to_file_sync(standby_slot_fstab, "\n".join(merged)) - def cleanup_standby_ota_partition_folder(self): + def _cleanup_standby_ota_partition_folder(self): """Cleanup old files under the standby ota-partition folder.""" - files_keept = ( + files_kept = ( cfg.OTA_STATUS_FNAME, cfg.OTA_VERSION_FNAME, cfg.SLOT_IN_USE_FNAME, - Path(cfg.GRUB_CFG_PATH).name, + cfg.GRUB_CFG_FNAME, ) removes = ( f - for f in self.standby_ota_status_dir.glob("*") - if f.name not in files_keept + for f in self._ota_status_control.standby_ota_status_dir.glob("*") + if f.name not in files_kept ) for f in removes: if f.is_dir(): @@ -844,72 +777,75 @@ def cleanup_standby_ota_partition_folder(self): else: f.unlink(missing_ok=True) - ###### public methods ###### - # also includes methods from OTAStatusMixin, VersionControlMixin - # load_version, get_ota_status - def on_operation_failure(self): - """Failure registering and cleanup at failure.""" - self._store_standby_ota_status(wrapper.StatusOta.FAILURE) - self._store_current_ota_status(wrapper.StatusOta.FAILURE) - logger.warning("on failure try to unmounting standby slot...") - self._umount_all(ignore_error=True) + def _copy_boot_files_from_standby_slot(self): + """Copy boot files under /boot to standby ota-partition folder.""" + standby_ota_partition_dir = self._ota_status_control.standby_ota_status_dir + for f in self._mp_control.standby_boot_dir.iterdir(): + if f.is_file() and not f.is_symlink(): + shutil.copy(f, standby_ota_partition_dir) + + # API def get_standby_slot_path(self) -> Path: - return self.standby_slot_mount_point + return self._mp_control.standby_slot_mount_point def get_standby_boot_dir(self) -> Path: - """ - NOTE: in grub_controller, kernel and initrd images are stored under - the ota_status_dir(ota_partition_dir) - """ - return self.standby_ota_status_dir + return self._mp_control.standby_boot_dir + + def load_version(self) -> str: + return self._ota_status_control.load_active_slot_version() + + def get_booted_ota_status(self) -> wrapper.StatusOta: + return self._ota_status_control.booted_ota_status + + def on_operation_failure(self): + """Failure registering and cleanup at failure.""" + logger.warning("on failure try to unmounting standby slot...") + self._ota_status_control.on_failure() + self._mp_control.umount_all(ignore_error=True) def pre_update(self, version: str, *, standby_as_ref: bool, erase_standby=False): try: - # update ota_status files - self._store_current_ota_status(wrapper.StatusOta.FAILURE) - self._store_standby_ota_status(wrapper.StatusOta.UPDATING) - # update version file - self._store_standby_version(version) - # update slot_in_use file - # set slot_in_use to to both slots - _target_slot = self._boot_control.standby_slot - self._store_current_slot_in_use(_target_slot) - self._store_standby_slot_in_use(_target_slot) - - # enter pre-update - self._prepare_and_mount_standby( - self._boot_control.standby_root_dev, - erase=erase_standby, - ) - self._mount_refroot( - standby_dev=self._boot_control.standby_root_dev, - active_dev=self._boot_control.active_root_dev, - standby_as_ref=standby_as_ref, - ) + logger.info("grub_boot: pre-update setup...") + ### udpate active slot's ota_status ### + self._ota_status_control.pre_update_current() + + ### mount slots ### + self._boot_control.prepare_standby_dev(erase_standby=erase_standby) + self._mp_control.mount_standby() + self._mp_control.mount_active() + + ### update standby slot's ota_status files ### + self._ota_status_control.pre_update_standby(version=version) + # remove old files under standby ota_partition folder - self.cleanup_standby_ota_partition_folder() + self._cleanup_standby_ota_partition_folder() except Exception as e: logger.error(f"failed on pre_update: {e!r}") raise BootControlPreUpdateFailed from e def post_update(self) -> Generator[None, None, None]: try: - # update fstab - active_fstab = Path(cfg.ACTIVE_ROOTFS_PATH) / Path( + logger.info("grub_boot: post-update setup...") + # ------ update fstab ------ # + active_fstab = self._mp_control.active_slot_mount_point / Path( cfg.FSTAB_FILE_PATH ).relative_to("/") - standby_fstab = self.standby_slot_mount_point / Path( + standby_fstab = self._mp_control.standby_slot_mount_point / Path( cfg.FSTAB_FILE_PATH ).relative_to("/") self._update_fstab( standby_slot_fstab=standby_fstab, active_slot_fstab=active_fstab, ) - # umount all mount points after local update finished - self._umount_all(ignore_error=True) + # ------ prepare boot files ------ # + self._copy_boot_files_from_standby_slot() + + # ------ pre-reboot ------ # + self._mp_control.umount_all(ignore_error=True) self._boot_control.grub_reboot_to_standby() + yield # hand over control to otaclient CMDHelperFuncs.reboot() except Exception as e: @@ -918,15 +854,19 @@ def post_update(self) -> Generator[None, None, None]: def pre_rollback(self): try: - self._store_current_ota_status(wrapper.StatusOta.FAILURE) - self._store_standby_ota_status(wrapper.StatusOta.ROLLBACKING) + logger.info("grub_boot: pre-rollback setup...") + self._ota_status_control.pre_rollback_current() + self._mp_control.mount_standby() + self._ota_status_control.pre_rollback_standby() except Exception as e: logger.error(f"failed on pre_rollback: {e!r}") raise BootControlPreRollbackFailed from e def post_rollback(self): try: + logger.info("grub_boot: post-rollback setup...") self._boot_control.grub_reboot_to_standby() + self._mp_control.umount_all(ignore_error=True) CMDHelperFuncs.reboot() except Exception as e: logger.error(f"failed on pre_rollback: {e!r}") diff --git a/otaclient/app/boot_control/_rpi_boot.py b/otaclient/app/boot_control/_rpi_boot.py index 7a555b774..2f4e5abea 100644 --- a/otaclient/app/boot_control/_rpi_boot.py +++ b/otaclient/app/boot_control/_rpi_boot.py @@ -397,7 +397,7 @@ def __init__(self) -> None: ) # 20230613: remove any leftover flag file if ota_status is not UPDATING/ROLLBACKING - if self._ota_status_control.ota_status not in ( + if self._ota_status_control.booted_ota_status not in ( wrapper.StatusOta.UPDATING, wrapper.StatusOta.ROLLBACKING, ): @@ -556,5 +556,5 @@ def on_operation_failure(self): def load_version(self) -> str: return self._ota_status_control.load_active_slot_version() - def get_ota_status(self) -> wrapper.StatusOta: - return self._ota_status_control.ota_status + def get_booted_ota_status(self) -> wrapper.StatusOta: + return self._ota_status_control.booted_ota_status diff --git a/otaclient/app/boot_control/configs.py b/otaclient/app/boot_control/configs.py index 88055ad86..d9fb96478 100644 --- a/otaclient/app/boot_control/configs.py +++ b/otaclient/app/boot_control/configs.py @@ -55,6 +55,7 @@ class GrubControlConfig(BaseConfig): BOOTLOADER: BootloaderType = BootloaderType.GRUB FSTAB_FILE_PATH: str = "/etc/fstab" GRUB_DIR: str = "/boot/grub" + GRUB_CFG_FNAME: str = "grub.cfg" GRUB_CFG_PATH: str = "/boot/grub/grub.cfg" DEFAULT_GRUB_PATH: str = "/etc/default/grub" BOOT_OTA_PARTITION_FILE: str = "ota-partition" diff --git a/otaclient/app/boot_control/protocol.py b/otaclient/app/boot_control/protocol.py index cb221eb93..8f19ae34f 100644 --- a/otaclient/app/boot_control/protocol.py +++ b/otaclient/app/boot_control/protocol.py @@ -24,8 +24,12 @@ class BootControllerProtocol(Protocol): """Boot controller protocol for otaclient.""" @abstractmethod - def get_ota_status(self) -> wrapper.StatusOta: - """Get the stored ota_status of current active slot.""" + def get_booted_ota_status(self) -> wrapper.StatusOta: + """Get the ota_status loaded from status file during otaclient starts up. + + This value is meant to be used only once during otaclient starts up, + to init the live_ota_status maintained by otaclient. + """ @abstractmethod def get_standby_slot_path(self) -> Path: @@ -33,7 +37,16 @@ def get_standby_slot_path(self) -> Path: @abstractmethod def get_standby_boot_dir(self) -> Path: - """Get the Path points to the standby boot folder.""" + """Get the Path points to the standby slot's boot folder. + + NOTE(20230907): this will always return the path to + /boot. + DEPRECATED(20230907): standby slot creator doesn't need to + treat the files under /boot specially, it is + boot controller's responsibility to get the + kernel/initrd.img from standby slot and prepare + them to actual boot dir. + """ @abstractmethod def pre_update(self, version: str, *, standby_as_ref: bool, erase_standby: bool): diff --git a/otaclient/app/configs.py b/otaclient/app/configs.py index 795a4a065..2f8bc771f 100644 --- a/otaclient/app/configs.py +++ b/otaclient/app/configs.py @@ -181,6 +181,9 @@ class BaseConfig(_InternalSettings): EXTERNAL_CACHE_DEV_MOUNTPOINT = "/mnt/external_cache_src" EXTERNAL_CACHE_SRC_PATH = "/mnt/external_cache_src/data" + # default version string to be reported in status API response + DEFAULT_VERSION_STR = "" + # init cfgs server_cfg = OtaClientServerConfig() diff --git a/otaclient/app/ota_client.py b/otaclient/app/ota_client.py index 5ca53ff38..330537a97 100644 --- a/otaclient/app/ota_client.py +++ b/otaclient/app/ota_client.py @@ -493,7 +493,9 @@ def __init__( self.boot_controller = boot_control_cls() self.create_standby_cls = create_standby_cls - self.live_ota_status = LiveOTAStatus(self.boot_controller.get_ota_status()) + self.live_ota_status = LiveOTAStatus( + self.boot_controller.get_booted_ota_status() + ) self.current_version = ( self.boot_controller.load_version() or self.DEFAULT_FIRMWARE_VERSION diff --git a/tests/test_boot_control/test_grub.py b/tests/test_boot_control/test_grub.py index 207d70b37..180a5d6a1 100644 --- a/tests/test_boot_control/test_grub.py +++ b/tests/test_boot_control/test_grub.py @@ -13,6 +13,7 @@ # limitations under the License. +import logging import os import shutil import typing @@ -20,7 +21,7 @@ import pytest_mock from pathlib import Path -import logging +from otaclient.app.proto import wrapper from tests.utils import SlotMeta from tests.conftest import TestConfiguration as cfg @@ -29,38 +30,59 @@ class GrubFSM: - def __init__(self) -> None: - self.current_slot = cfg.SLOT_A_ID_GRUB - self.standby_slot = cfg.SLOT_B_ID_GRUB + def __init__(self, slot_a_mp, slot_b_mp) -> None: + self._current_slot = cfg.SLOT_A_ID_GRUB + self._standby_slot = cfg.SLOT_B_ID_GRUB + self._current_slot_mp = Path(slot_a_mp) + self._standby_slot_mp = Path(slot_b_mp) + self._current_slot_dev_uuid = f"UUID={cfg.SLOT_A_UUID}" + self._standby_slot_dev_uuid = f"UUID={cfg.SLOT_B_UUID}" self.current_slot_bootable = True self.standby_slot_bootable = True self.is_boot_switched = False - def get_current_rootfs_dev(self): - return f"/dev/{self.current_slot}" + def get_active_slot(self) -> str: + return self._current_slot + + def get_standby_slot(self) -> str: + return self._standby_slot - def get_standby_rootfs_dev(self): - return f"/dev/{self.standby_slot}" + def get_active_slot_dev(self) -> str: + return f"/dev/{self._current_slot}" - def get_current_slot(self): - return self.current_slot + def get_standby_slot_dev(self) -> str: + return f"/dev/{self._standby_slot}" - def get_standby_slot(self): - return self.standby_slot + def get_active_slot_mp(self) -> Path: + return self._current_slot_mp + + def get_standby_slot_mp(self) -> Path: + return self._standby_slot_mp + + def get_standby_boot_dir(self) -> Path: + return self._standby_slot_mp / "boot" def get_uuid_str_by_dev(self, dev: str): - if dev == self.get_standby_rootfs_dev(): - return f"UUID={cfg.SLOT_B_UUID}" + if dev == self.get_standby_slot_dev(): + return self._standby_slot_dev_uuid else: - return f"UUID={cfg.SLOT_A_UUID}" + return self._current_slot_dev_uuid def switch_boot(self): - self.current_slot, self.standby_slot = self.standby_slot, self.current_slot + self._current_slot, self._standby_slot = self._standby_slot, self._current_slot + self._current_slot_mp, self._standby_slot_mp = ( + self._standby_slot_mp, + self._current_slot_mp, + ) + self._current_slot_dev_uuid, self._standby_slot_dev_uuid = ( + self._standby_slot_dev_uuid, + self._current_slot_dev_uuid, + ) self.is_boot_switched = True def cat_proc_cmdline(self): - if self.current_slot == cfg.SLOT_A_ID_GRUB: + if self._current_slot == cfg.SLOT_A_ID_GRUB: return cfg.CMDLINE_SLOT_A else: return cfg.CMDLINE_SLOT_B @@ -155,6 +177,9 @@ def grub_ab_slot(self, tmp_path: Path, ab_slots: SlotMeta): self.slot_a = Path(ab_slots.slot_a) self.slot_b = Path(ab_slots.slot_b) self.boot_dir = tmp_path / Path(cfg.BOOT_DIR).relative_to("/") + self.slot_b_boot_dir = self.slot_b / "boot" + self.slot_b_boot_dir.mkdir(parents=True, exist_ok=True) + self.slot_a_ota_partition_dir = ( self.boot_dir / f"{cfg.OTA_PARTITION_DIRNAME}.{cfg.SLOT_A_ID_GRUB}" ) @@ -168,6 +193,7 @@ def grub_ab_slot(self, tmp_path: Path, ab_slots: SlotMeta): self.boot_dir, dirs_exist_ok=True, ) + # NOTE: dummy ota-image doesn't have grub installed, # so we need to prepare /etc/default/grub by ourself default_grub = self.slot_a / Path(cfg.DEFAULT_GRUB_FILE).relative_to("/") @@ -193,27 +219,43 @@ def mock_setup( mocker: pytest_mock.MockerFixture, grub_ab_slot, ): - from otaclient.app.boot_control._grub import GrubABPartitionDetecter - from otaclient.app.boot_control._common import CMDHelperFuncs + from otaclient.app.boot_control._grub import GrubABPartitionDetector + from otaclient.app.boot_control._common import CMDHelperFuncs, SlotMountHelper - ###### start fsm ###### - self._fsm = GrubFSM() + # ------ start fsm ------ # + self._fsm = GrubFSM(slot_a_mp=self.slot_a, slot_b_mp=self.slot_b) - ###### mocking GrubABPartitionDetecter ###### - _GrubABPartitionDetecter_mock = typing.cast( - GrubABPartitionDetecter, mocker.MagicMock(spec=GrubABPartitionDetecter) + # ------ mock SlotMountHelper ------ # + _mocked_slot_mount_helper = mocker.MagicMock(spec=SlotMountHelper) + type(_mocked_slot_mount_helper).standby_slot_dev = mocker.PropertyMock( + wraps=self._fsm.get_standby_slot_dev ) - _GrubABPartitionDetecter_mock.get_standby_slot = mocker.MagicMock( - wraps=self._fsm.get_standby_slot + type(_mocked_slot_mount_helper).active_slot_dev = mocker.PropertyMock( + wraps=self._fsm.get_active_slot_dev + ) + type(_mocked_slot_mount_helper).standby_slot_mount_point = mocker.PropertyMock( + wraps=self._fsm.get_standby_slot_mp + ) + type(_mocked_slot_mount_helper).active_slot_mount_point = mocker.PropertyMock( + wraps=self._fsm.get_active_slot_mp ) - _GrubABPartitionDetecter_mock.get_active_slot = mocker.MagicMock( - wraps=self._fsm.get_current_slot + type(_mocked_slot_mount_helper).standby_boot_dir = mocker.PropertyMock( + wraps=self._fsm.get_standby_boot_dir ) - _GrubABPartitionDetecter_mock.get_active_slot_dev = mocker.MagicMock( - wraps=self._fsm.get_current_rootfs_dev + + # ------ mock GrubABPartitionDetector ------ # + _mocked_ab_partition_detector = mocker.MagicMock(spec=GrubABPartitionDetector) + type(_mocked_ab_partition_detector).active_slot = mocker.PropertyMock( + wraps=self._fsm.get_active_slot + ) + type(_mocked_ab_partition_detector).active_dev = mocker.PropertyMock( + wraps=self._fsm.get_active_slot_dev + ) + type(_mocked_ab_partition_detector).standby_slot = mocker.PropertyMock( + wraps=self._fsm.get_standby_slot ) - _GrubABPartitionDetecter_mock.get_standby_slot_dev = mocker.MagicMock( - wraps=self._fsm.get_standby_rootfs_dev + type(_mocked_ab_partition_detector).standby_dev = mocker.PropertyMock( + wraps=self._fsm.get_standby_slot_dev ) ###### mocking GrubHelper ###### @@ -251,13 +293,16 @@ def mock_setup( _CMDHelper_at_grub_path = f"{cfg.GRUB_MODULE_PATH}.CMDHelperFuncs" mocker.patch(_CMDHelper_at_common_path, _CMDHelper_mock) mocker.patch(_CMDHelper_at_grub_path, _CMDHelper_mock) - # patch _GrubABPartitionDetecter - _GrubABPartitionDetecter_path = ( - f"{cfg.GRUB_MODULE_PATH}.GrubABPartitionDetecter" + # patch _GrubABPartitionDetector + _GrubABPartitionDetector_path = ( + f"{cfg.GRUB_MODULE_PATH}.GrubABPartitionDetector" ) mocker.patch( - _GrubABPartitionDetecter_path, return_value=_GrubABPartitionDetecter_mock + _GrubABPartitionDetector_path, return_value=_mocked_ab_partition_detector ) + # patch SlotMountHelper + _SlotMountHelper_path = f"{cfg.GRUB_MODULE_PATH}.SlotMountHelper" + mocker.patch(_SlotMountHelper_path, return_value=_mocked_slot_mount_helper) # patch reading from /proc/cmdline mocker.patch( f"{cfg.GRUB_MODULE_PATH}.cat_proc_cmdline", @@ -276,7 +321,9 @@ def test_grub_normal_update(self, mocker: pytest_mock.MockerFixture): mocker.patch(_cfg_patch_path, self.cfg_for_slot_a_as_current()) grub_controller = GrubController() - assert (self.slot_a_ota_partition_dir / "status").read_text() == "INITIALIZED" + assert ( + self.slot_a_ota_partition_dir / "status" + ).read_text() == wrapper.StatusOta.INITIALIZED.name # assert ota-partition file points to slot_a ota-partition folder assert ( os.readlink(self.boot_dir / cfg.OTA_PARTITION_DIRNAME) @@ -293,18 +340,18 @@ def test_grub_normal_update(self, mocker: pytest_mock.MockerFixture): erase_standby=False, # NOTE: not used ) # update slot_b, slot_a_ota_status->FAILURE, slot_b_ota_status->UPDATING - assert (self.slot_a_ota_partition_dir / "status").read_text() == "FAILURE" - assert (self.slot_b_ota_partition_dir / "status").read_text() == "UPDATING" + assert ( + self.slot_a_ota_partition_dir / "status" + ).read_text() == wrapper.StatusOta.FAILURE.name + assert ( + self.slot_b_ota_partition_dir / "status" + ).read_text() == wrapper.StatusOta.UPDATING.name # NOTE: we have to copy the new kernel files to the slot_b's boot dir # this is done by the create_standby module _kernel = f"{cfg.KERNEL_PREFIX}-{cfg.KERNEL_VERSION}" _initrd = f"{cfg.INITRD_PREFIX}-{cfg.KERNEL_VERSION}" - shutil.copy( - self.slot_a_ota_partition_dir / _kernel, self.slot_b_ota_partition_dir - ) - shutil.copy( - self.slot_a_ota_partition_dir / _initrd, self.slot_b_ota_partition_dir - ) + shutil.copy(self.slot_a_ota_partition_dir / _kernel, self.slot_b_boot_dir) + shutil.copy(self.slot_a_ota_partition_dir / _initrd, self.slot_b_boot_dir) logger.info("pre-update completed, entering post-update...") # test post-update @@ -313,10 +360,10 @@ def test_grub_normal_update(self, mocker: pytest_mock.MockerFixture): next(_post_updater, None) assert ( self.slot_b / Path(cfg.FSTAB_FILE).relative_to("/") - ).read_text() == self.FSTAB_UPDATED.strip() + ).read_text().strip() == self.FSTAB_UPDATED.strip() assert ( self.boot_dir / "grub/grub.cfg" - ).read_text() == GrubMkConfigFSM.GRUB_CFG_SLOT_A_UPDATED + ).read_text().strip() == GrubMkConfigFSM.GRUB_CFG_SLOT_A_UPDATED.strip() # NOTE: check grub.cfg_slot_a_post_update, the target entry is 0 self._grub_reboot_mock.assert_called_once_with(0) self._CMDHelper_mock.reboot.assert_called_once() @@ -336,7 +383,9 @@ def test_grub_normal_update(self, mocker: pytest_mock.MockerFixture): ### test pre-init ### assert self._fsm.is_boot_switched - assert (self.slot_b_ota_partition_dir / "status").read_text() == "UPDATING" + assert ( + self.slot_b_ota_partition_dir / "status" + ).read_text() == wrapper.StatusOta.UPDATING.name # assert ota-partition file is not yet switched before first reboot init assert ( os.readlink(self.boot_dir / cfg.OTA_PARTITION_DIRNAME) @@ -344,13 +393,15 @@ def test_grub_normal_update(self, mocker: pytest_mock.MockerFixture): ) ### test first reboot init ### - grub_controller = GrubController() + _ = GrubController() # assert ota-partition file switch to slot_b ota-partition folder after first reboot init assert ( os.readlink(self.boot_dir / cfg.OTA_PARTITION_DIRNAME) == f"{cfg.OTA_PARTITION_DIRNAME}.{cfg.SLOT_B_ID_GRUB}" ) - assert (self.slot_b_ota_partition_dir / "status").read_text() == "SUCCESS" + assert ( + self.slot_b_ota_partition_dir / "status" + ).read_text() == wrapper.StatusOta.SUCCESS.name assert ( self.slot_b_ota_partition_dir / "version" ).read_text() == cfg.UPDATE_VERSION diff --git a/tests/test_boot_control/test_ota_status_control.py b/tests/test_boot_control/test_ota_status_control.py new file mode 100644 index 000000000..dc914b6da --- /dev/null +++ b/tests/test_boot_control/test_ota_status_control.py @@ -0,0 +1,270 @@ +# Copyright 2022 TIER IV, INC. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import logging +import threading +import pytest +from functools import partial +from pathlib import Path +from typing import Optional, Union + +from otaclient.app.common import read_str_from_file, write_str_to_file +from otaclient.app.proto import wrapper +from otaclient.app.boot_control.configs import BaseConfig as cfg +from otaclient.app.boot_control._common import OTAStatusFilesControl + +logger = logging.getLogger(__name__) + + +def _dummy_finalize_switch_boot( + flag: threading.Event, boot_control_switch_boot_result: bool +): + flag.set() + return boot_control_switch_boot_result + + +class TestOTAStatusFilesControl: + SLOT_A_ID, SLOT_B_ID = "slot_a", "slot_b" + + @pytest.fixture(autouse=True) + def setup(self, tmp_path: Path): + self.slot_a, self.slot_b = self.SLOT_A_ID, self.SLOT_B_ID + self.slot_a_ota_status_dir = tmp_path / "slot_a_ota_status_dir" + self.slot_a_ota_status_dir.mkdir() + self.slot_b_ota_status_dir = tmp_path / "slot_b_ota_status_dir" + self.slot_b_ota_status_dir.mkdir() + + self.slot_a_status_file = self.slot_a_ota_status_dir / cfg.OTA_STATUS_FNAME + self.slot_b_status_file = self.slot_b_ota_status_dir / cfg.OTA_STATUS_FNAME + self.slot_a_slot_in_use_file = ( + self.slot_a_ota_status_dir / cfg.SLOT_IN_USE_FNAME + ) + self.slot_b_slot_in_use_file = ( + self.slot_b_ota_status_dir / cfg.SLOT_IN_USE_FNAME + ) + + self.finalize_switch_boot_flag = threading.Event() + self.finalize_switch_boot_func = partial( + _dummy_finalize_switch_boot, self.finalize_switch_boot_flag + ) + + @pytest.mark.parametrize( + ( + "test_case,input_slot_a_status,input_slot_a_slot_in_use,force_initialize," + "output_slot_a_status,output_slot_a_slot_in_use" + ), + ( + ( + "test_initialize", + # input + None, + "", + False, + # output + wrapper.StatusOta.INITIALIZED, + SLOT_A_ID, + ), + ( + "test_force_initialize", + # input + wrapper.StatusOta.SUCCESS, + SLOT_A_ID, + True, + # output + wrapper.StatusOta.INITIALIZED, + SLOT_A_ID, + ), + ( + "test_normal_boot", + # input + wrapper.StatusOta.SUCCESS, + SLOT_A_ID, + False, + # output + wrapper.StatusOta.SUCCESS, + SLOT_A_ID, + ), + ), + ) + def test_ota_status_files_loading( + self, + test_case: str, + input_slot_a_status: Optional[wrapper.StatusOta], + input_slot_a_slot_in_use: str, + force_initialize: bool, + output_slot_a_status: wrapper.StatusOta, + output_slot_a_slot_in_use: str, + ): + logger.info(f"{test_case=}") + # ------ setup ------ # + write_str_to_file( + self.slot_a_status_file, + input_slot_a_status.name if input_slot_a_status else "", + ) + write_str_to_file(self.slot_a_slot_in_use_file, input_slot_a_slot_in_use) + + # ------ execution ------ # + status_control = OTAStatusFilesControl( + active_slot=self.slot_a, + standby_slot=self.slot_b, + current_ota_status_dir=self.slot_a_ota_status_dir, + standby_ota_status_dir=self.slot_b_ota_status_dir, + finalize_switching_boot=partial(self.finalize_switch_boot_func, True), + force_initialize=force_initialize, + ) + + # ------ assertion ------ # + assert not self.finalize_switch_boot_flag.is_set() + # check slot a + assert read_str_from_file(self.slot_a_status_file) == output_slot_a_status.name + assert status_control.booted_ota_status == output_slot_a_status + assert ( + read_str_from_file(self.slot_a_slot_in_use_file) + == status_control._load_current_slot_in_use() + == output_slot_a_slot_in_use + ) + + def test_pre_update(self): + """Test update from slot_a to slot_b.""" + # ------ direct init ------ # + status_control = OTAStatusFilesControl( + active_slot=self.slot_a, + standby_slot=self.slot_b, + current_ota_status_dir=self.slot_a_ota_status_dir, + standby_ota_status_dir=self.slot_b_ota_status_dir, + finalize_switching_boot=partial(self.finalize_switch_boot_func, True), + force_initialize=False, + ) + + # ------ execution ------ # + status_control.pre_update_current() + status_control.pre_update_standby(version="dummy_version") + + # ------ assertion ------ # + assert not self.finalize_switch_boot_flag.is_set() + # slot_a: current slot + assert ( + read_str_from_file(self.slot_a_status_file) + == wrapper.StatusOta.FAILURE.name + ) + assert ( + read_str_from_file(self.slot_a_slot_in_use_file) + == status_control._load_current_slot_in_use() + == self.slot_b + ) + # slot_b: standby slot + assert ( + read_str_from_file(self.slot_b_status_file) + == wrapper.StatusOta.UPDATING.name + ) + assert read_str_from_file(self.slot_b_slot_in_use_file) == self.slot_b + + @pytest.mark.parametrize( + ("test_case,finalizing_result"), + ( + ( + "test_finalizing_failed", + False, + ), + ( + "test_finalizing_succeeded", + True, + ), + ), + ) + def test_switching_boot( + self, + test_case: str, + finalizing_result: bool, + ): + """First reboot after OTA from slot_a to slot_b.""" + logger.info(f"{test_case=}") + # ------ setup ------ # + write_str_to_file(self.slot_a_status_file, wrapper.StatusOta.FAILURE.name) + write_str_to_file(self.slot_a_slot_in_use_file, self.slot_b) + write_str_to_file(self.slot_b_status_file, wrapper.StatusOta.UPDATING.name) + write_str_to_file(self.slot_b_slot_in_use_file, self.slot_b) + + # ------ execution ------ # + # otaclient boots on slot_b + status_control = OTAStatusFilesControl( + active_slot=self.slot_b, + standby_slot=self.slot_a, + current_ota_status_dir=self.slot_b_ota_status_dir, + standby_ota_status_dir=self.slot_a_ota_status_dir, + finalize_switching_boot=partial( + self.finalize_switch_boot_func, finalizing_result + ), + force_initialize=False, + ) + + # ------ assertion ------ # + # ensure finalizing is called + assert self.finalize_switch_boot_flag.is_set() + + # check slot a + assert ( + read_str_from_file(self.slot_a_status_file) + == wrapper.StatusOta.FAILURE.name + ) + assert ( + read_str_from_file(self.slot_a_slot_in_use_file) + == status_control._load_current_slot_in_use() + == self.slot_b + ) + assert ( + read_str_from_file(self.slot_b_slot_in_use_file) + == status_control._load_current_slot_in_use() + == self.slot_b + ) + + # finalizing succeeded + if finalizing_result: + assert status_control.booted_ota_status == wrapper.StatusOta.SUCCESS + assert ( + read_str_from_file(self.slot_b_status_file) + == wrapper.StatusOta.SUCCESS.name + ) + + else: + assert status_control.booted_ota_status == wrapper.StatusOta.FAILURE + assert ( + read_str_from_file(self.slot_b_status_file) + == wrapper.StatusOta.FAILURE.name + ) + + def test_accidentally_boots_back_to_standby(self): + """slot_a should be active slot but boots back to slot_b.""" + # ------ setup ------ # + write_str_to_file(self.slot_a_status_file, wrapper.StatusOta.SUCCESS.name) + write_str_to_file(self.slot_a_slot_in_use_file, self.slot_a) + write_str_to_file(self.slot_b_status_file, wrapper.StatusOta.FAILURE.name) + write_str_to_file(self.slot_b_slot_in_use_file, self.slot_a) + + # ------ execution ------ # + # otaclient accidentally boots on slot_b + status_control = OTAStatusFilesControl( + active_slot=self.slot_b, + standby_slot=self.slot_a, + current_ota_status_dir=self.slot_b_ota_status_dir, + standby_ota_status_dir=self.slot_a_ota_status_dir, + finalize_switching_boot=partial(self.finalize_switch_boot_func, True), + force_initialize=False, + ) + + # ------ assertion ------ # + assert not self.finalize_switch_boot_flag.is_set() + # slot_b's status is read + assert status_control.booted_ota_status == wrapper.StatusOta.FAILURE diff --git a/tests/test_boot_control/test_rpi_boot.py b/tests/test_boot_control/test_rpi_boot.py index 540dbf080..6c3dd1e77 100644 --- a/tests/test_boot_control/test_rpi_boot.py +++ b/tests/test_boot_control/test_rpi_boot.py @@ -319,7 +319,9 @@ def test_rpi_boot_normal_update(self, mocker: pytest_mock.MockerFixture): # 1. make sure ota_status is SUCCESS # 2. make sure the flag file is cleared # 3. make sure the config.txt is still for slot_b - assert rpi_boot_controller4_2.get_ota_status() == wrapper.StatusOta.SUCCESS + assert ( + rpi_boot_controller4_2.get_booted_ota_status() == wrapper.StatusOta.SUCCESS + ) assert ( self.slot_b_ota_status_dir / rpi_boot_cfg.OTA_STATUS_FNAME ).read_text() == wrapper.StatusOta.SUCCESS.name diff --git a/tests/test_ota_client.py b/tests/test_ota_client.py index ffe7a8b6d..541fead4a 100644 --- a/tests/test_ota_client.py +++ b/tests/test_ota_client.py @@ -231,7 +231,9 @@ def mock_setup(self, mocker: pytest_mock.MockerFixture): ) # patch boot_controller for otaclient initializing self.boot_controller.load_version.return_value = self.CURRENT_FIRMWARE_VERSION - self.boot_controller.get_ota_status.return_value = wrapper.StatusOta.SUCCESS + self.boot_controller.get_booted_ota_status.return_value = ( + wrapper.StatusOta.SUCCESS + ) self.ota_client = OTAClient( boot_control_cls=mocker.MagicMock(return_value=self.boot_controller),