Skip to content

Commit

Permalink
feat!: reject any OTA update/rollback request on ecu_info.yaml not pr…
Browse files Browse the repository at this point in the history
…operly loaded (#465)

This PR implements the requirement of rejecting any OTA update/rollback request when ecu_info.yaml file is broken, and reports the failure_reason as due to ecu_info.yaml is broken.

If ecu_info.yaml is broken and cannot be parsed at startup, otaclient will set the live_ota_status to FAILURE, failure_type to UNRECOVERABLE, with failure_message: `ecu_info.yaml file is broken, please check /boot/ota/ecu_info.yaml. reject any OTA request.`

And for every incoming OTA request, otaclient will reject with failure_type.UNRECOVERABLE for every ECUs listed in the request(as otaclient with default ecu_info.yaml doesn't have contact for the sub ECUs).

For multiple ECU environment, due to when falling down to the default ecu_info, we will only have one entry(which is the main ECU) in the available_ecu_ids list, so the situation will be the same as single ECU environment.

Other changes:
1. status_monitor: now status_monitor will always push to shm when the incoming report is OTAStatusChangeReport, regardless the push interval. 

BREAKING CHANGE: now otaclient will set itself in FAILURE OTA status and reject any OTA requests when ecu_info.yaml is not properly loaded at startup.
  • Loading branch information
Bodong-Yang authored Dec 26, 2024
1 parent 0a6271a commit 53c20b0
Show file tree
Hide file tree
Showing 5 changed files with 189 additions and 80 deletions.
10 changes: 9 additions & 1 deletion src/otaclient/_status_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,15 @@ def _status_collector_thread(self) -> None:
break

# ------ push status on load_report ------ #
if self.load_report(report) and self._status and _now > _next_shm_push:
# NOTE: always push OTAStatus change report
if (
self.load_report(report)
and self._status
and (
isinstance(report.payload, OTAStatusChangeReport)
or _now > _next_shm_push
)
):
try:
self._shm_status.write_msg(self._status)
_next_shm_push = _now + self.shm_push_interval
Expand Down
33 changes: 33 additions & 0 deletions src/otaclient/grpc/api_v2/servicer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import multiprocessing.queues as mp_queue
from concurrent.futures import ThreadPoolExecutor

import otaclient.configs.cfg as otaclient_cfg
from otaclient._types import (
IPCRequest,
IPCResEnum,
Expand Down Expand Up @@ -111,6 +112,22 @@ async def update(
update_acked_ecus = set()
response = api_types.UpdateResponse()

# NOTE(20241220): due to the fact that OTA Service API doesn't have field
# in UpdateResponseEcu msg, the only way to pass the failure_msg
# to upper is by status API.
if not otaclient_cfg.ECU_INFO_LOADED_SUCCESSFULLY:
logger.error(
"ecu_info.yaml is not loaded properly, reject any update request"
)
for _update_req in request.iter_ecu():
response.add_ecu(
api_types.UpdateResponseEcu(
ecu_id=_update_req.ecu_id,
result=api_types.FailureType.UNRECOVERABLE,
)
)
return response

# first: dispatch update request to all directly connected subECUs
tasks: dict[asyncio.Task, ECUContact] = {}
for ecu_contact in self.sub_ecus:
Expand Down Expand Up @@ -225,6 +242,22 @@ async def rollback(
logger.info(f"receive rollback request: {request}")
response = api_types.RollbackResponse()

# NOTE(20241220): due to the fact that OTA Service API doesn't have field
# in UpdateResponseEcu msg, the only way to pass the failure_msg
# to upper is by status API.
if not otaclient_cfg.ECU_INFO_LOADED_SUCCESSFULLY:
logger.error(
"ecu_info.yaml is not loaded properly, reject any rollback request"
)
for _rollback_req in request.iter_ecu():
response.add_ecu(
api_types.RollbackResponseEcu(
ecu_id=_rollback_req.ecu_id,
result=api_types.FailureType.UNRECOVERABLE,
)
)
return response

# first: dispatch rollback request to all directly connected subECUs
tasks: dict[asyncio.Task, ECUContact] = {}
for ecu_contact in self.sub_ecus:
Expand Down
69 changes: 54 additions & 15 deletions src/otaclient/ota_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import requests.exceptions as requests_exc
from requests import Response

import otaclient.configs.cfg as otaclient_cfg
from ota_metadata.legacy import parser as ota_metadata_parser
from ota_metadata.legacy import types as ota_metadata_types
from ota_metadata.utils.cert_store import (
Expand Down Expand Up @@ -66,7 +67,12 @@
)
from otaclient._utils import SharedOTAClientStatusWriter, get_traceback, wait_and_log
from otaclient.boot_control import BootControllerProtocol, get_boot_controller
from otaclient.configs.cfg import cfg, ecu_info, proxy_info
from otaclient.configs.cfg import (
ECU_INFO_LOADED_SUCCESSFULLY,
cfg,
ecu_info,
proxy_info,
)
from otaclient.create_standby import (
StandbySlotCreatorProtocol,
get_standby_slot_creator,
Expand Down Expand Up @@ -672,17 +678,7 @@ def __init__(
)
return

# load and report booted OTA status
_boot_ctrl_loaded_ota_status = self.boot_controller.get_booted_ota_status()
self._live_ota_status = _boot_ctrl_loaded_ota_status
status_report_queue.put_nowait(
StatusReport(
payload=OTAStatusChangeReport(
new_ota_status=_boot_ctrl_loaded_ota_status,
),
)
)

# ------ load firmware version ------ #
self.current_version = self.boot_controller.load_version()
status_report_queue.put_nowait(
StatusReport(
Expand All @@ -691,7 +687,9 @@ def __init__(
),
)
)
logger.info(f"firmware_version: {self.current_version}")

# ------ load CA store ------ #
self.ca_chains_store = None
try:
self.ca_chains_store = load_ca_cert_chains(cfg.CERT_DPATH)
Expand All @@ -701,9 +699,36 @@ def __init__(

self.ca_chains_store = CAChainStore()

self.started = True
logger.info("otaclient started")
logger.info(f"firmware_version: {self.current_version}")
# load and report booted OTA status
_boot_ctrl_loaded_ota_status = self.boot_controller.get_booted_ota_status()
if not otaclient_cfg.ECU_INFO_LOADED_SUCCESSFULLY:
logger.error(
"ecu_info.yaml file is not loaded properly, will reject any OTA request."
)
logger.error(f"set live_ota_status to {OTAStatus.FAILURE!r}")
self._live_ota_status = OTAStatus.FAILURE
status_report_queue.put_nowait(
StatusReport(
payload=OTAStatusChangeReport(
new_ota_status=OTAStatus.FAILURE,
failure_type=FailureType.UNRECOVERABLE,
failure_reason=f"ecu_info.yaml file is broken or missing, please check {cfg.ECU_INFO_FPATH}. "
"reject any OTA request.",
),
)
)
else:
self._live_ota_status = _boot_ctrl_loaded_ota_status
status_report_queue.put_nowait(
StatusReport(
payload=OTAStatusChangeReport(
new_ota_status=_boot_ctrl_loaded_ota_status,
),
)
)

self.started = True
logger.info("otaclient started")

def _on_failure(
self,
Expand Down Expand Up @@ -843,6 +868,20 @@ def main(
)
)

elif not self.started:
_err_msg = "reject OTA request due to otaclient is not (yet) started."
if not ECU_INFO_LOADED_SUCCESSFULLY:
_err_msg = f"reject OTA request due to {cfg.ECU_INFO_FPATH} missing or broken"

logger.error(_err_msg)
resp_queue.put_nowait(
IPCResponse(
res=IPCResEnum.REJECT_OTHER,
msg=_err_msg,
session_id=request.session_id,
)
)

elif isinstance(request, UpdateRequestV2):

_update_thread = threading.Thread(
Expand Down
104 changes: 59 additions & 45 deletions tests/test_otaclient/test_configs/test_ecu_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,23 +25,23 @@


@pytest.mark.parametrize(
"ecu_info_yaml, expected_ecu_info",
"ecu_info_yaml, expected_res",
(
# --- case 1: invalid ecu_info --- #
# case 1.1: valid yaml(empty file), invalid ecu_info
(
"# this is an empty file",
DEFAULT_ECU_INFO,
(False, DEFAULT_ECU_INFO),
),
# case 1.2: valid yaml(array), invalid ecu_info
(
("- this is an\n- yaml file that\n- contains a array\n"),
DEFAULT_ECU_INFO,
(False, DEFAULT_ECU_INFO),
),
# case 1.2: invalid yaml
(
" - \n not a \n [ valid yaml",
DEFAULT_ECU_INFO,
(False, DEFAULT_ECU_INFO),
),
# --- case 2: single ECU --- #
# case 2.1: basic single ECU
Expand All @@ -52,10 +52,13 @@
'ip_addr: "192.168.1.1"\n'
"bootloader: jetson-cboot\n"
),
ECUInfo(
ecu_id="autoware",
ip_addr=IPv4Address("192.168.1.1"),
bootloader=BootloaderType.JETSON_CBOOT,
(
True,
ECUInfo(
ecu_id="autoware",
ip_addr=IPv4Address("192.168.1.1"),
bootloader=BootloaderType.JETSON_CBOOT,
),
),
),
# case 2.2: single ECU with bootloader type specified
Expand All @@ -66,10 +69,13 @@
'ip_addr: "192.168.1.1"\n'
'bootloader: "grub"\n'
),
ECUInfo(
ecu_id="autoware",
ip_addr=IPv4Address("192.168.1.1"),
bootloader=BootloaderType.GRUB,
(
True,
ECUInfo(
ecu_id="autoware",
ip_addr=IPv4Address("192.168.1.1"),
bootloader=BootloaderType.GRUB,
),
),
),
# --- case 3: multiple ECUs --- #
Expand All @@ -87,21 +93,24 @@
'- ecu_id: "p2"\n'
' ip_addr: "192.168.0.12"\n'
),
ECUInfo(
ecu_id="autoware",
ip_addr=IPv4Address("192.168.1.1"),
bootloader=BootloaderType.GRUB,
available_ecu_ids=["autoware", "p1", "p2"],
secondaries=[
ECUContact(
ecu_id="p1",
ip_addr=IPv4Address("192.168.0.11"),
),
ECUContact(
ecu_id="p2",
ip_addr=IPv4Address("192.168.0.12"),
),
],
(
True,
ECUInfo(
ecu_id="autoware",
ip_addr=IPv4Address("192.168.1.1"),
bootloader=BootloaderType.GRUB,
available_ecu_ids=["autoware", "p1", "p2"],
secondaries=[
ECUContact(
ecu_id="p1",
ip_addr=IPv4Address("192.168.0.11"),
),
ECUContact(
ecu_id="p2",
ip_addr=IPv4Address("192.168.0.12"),
),
],
),
),
),
# case 3.2: multiple ECUs, with main ECU's bootloader specified
Expand All @@ -118,32 +127,37 @@
'- ecu_id: "p2"\n'
' ip_addr: "192.168.0.12"\n'
),
ECUInfo(
ecu_id="autoware",
ip_addr=IPv4Address("192.168.1.1"),
bootloader=BootloaderType.JETSON_UEFI,
available_ecu_ids=["autoware", "p1", "p2"],
secondaries=[
ECUContact(
ecu_id="p1",
ip_addr=IPv4Address("192.168.0.11"),
),
ECUContact(
ecu_id="p2",
ip_addr=IPv4Address("192.168.0.12"),
),
],
(
True,
ECUInfo(
ecu_id="autoware",
ip_addr=IPv4Address("192.168.1.1"),
bootloader=BootloaderType.JETSON_UEFI,
available_ecu_ids=["autoware", "p1", "p2"],
secondaries=[
ECUContact(
ecu_id="p1",
ip_addr=IPv4Address("192.168.0.11"),
),
ECUContact(
ecu_id="p2",
ip_addr=IPv4Address("192.168.0.12"),
),
],
),
),
),
),
)
def test_ecu_info(tmp_path: Path, ecu_info_yaml: str, expected_ecu_info: ECUInfo):
def test_ecu_info(
tmp_path: Path, ecu_info_yaml: str, expected_res: tuple[bool, ECUInfo]
):
# --- preparation --- #
(ota_dir := tmp_path / "boot" / "ota").mkdir(parents=True, exist_ok=True)
(ecu_info_file := ota_dir / "ecu_info.yaml").write_text(ecu_info_yaml)

# --- execution --- #
_, loaded_ecu_info = parse_ecu_info(ecu_info_file)
res = parse_ecu_info(ecu_info_file)

# --- assertion --- #
assert loaded_ecu_info == expected_ecu_info
assert res == expected_res
Loading

1 comment on commit 53c20b0

@github-actions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Coverage

Coverage Report
FileStmtsMissCoverMissing
src/ota_metadata/file_table
   __init__.py40100% 
   _orm.py160100% 
   _table.py911484%139–140, 178, 197–202, 204–206, 218–219
   _types.py31487%47, 54–56
src/ota_metadata/legacy
   __init__.py10100% 
   parser.py3354885%106, 170, 175, 211–212, 222–223, 226, 238, 289–291, 295–298, 324–327, 396, 399, 407–409, 422, 431–432, 435–436, 601–603, 653–654, 657, 685–686, 689–690, 692, 696, 698–699, 753, 756–758
   types.py841384%37, 40–42, 112–116, 122–125
src/ota_metadata/utils
   cert_store.py86890%58–59, 73, 87, 91, 102, 123, 127
src/ota_proxy
   __init__.py16756%48–49, 51, 53, 62, 72–73
   __main__.py660%16, 18–19, 21–22, 24
   _consts.py170100% 
   cache_control_header.py68494%71, 91, 113, 121
   cache_streaming.py1432284%152–154, 182–184, 209, 223, 227–228, 263–264, 266, 278, 347, 353–354, 357, 365–368
   config.py200100% 
   db.py801877%103, 109, 167, 173–174, 177, 183, 185, 209–216, 218–219
   errors.py50100% 
   external_cache.py282028%31, 35, 40–42, 44–45, 48–49, 51–53, 60, 63–65, 69–72
   lru_cache_helper.py46295%85–86
   ota_cache.py2346472%72–73, 140, 143–144, 156–157, 189, 192, 214, 234, 253–257, 261–263, 265, 267–274, 276–278, 281–282, 286–287, 291, 338, 346–348, 421, 448, 451–452, 474–476, 480–482, 488, 490–492, 497, 523–525, 559–561, 588, 594, 609
   server_app.py1403972%71, 74, 80, 99, 103, 162, 171, 213–214, 216–218, 221, 226–227, 230, 233–234, 237, 240, 243, 246, 259–260, 263–264, 266, 269, 295–298, 301, 315–317, 323–325
   utils.py130100% 
src/otaclient
   __init__.py5260%17, 19
   __main__.py110%16
   _logging.py513335%43–44, 46–47, 49–54, 56–57, 59–60, 62–65, 67, 77, 80–82, 84–86, 89–90, 92–96
   _otaproxy_ctx.py42420%20, 22–29, 31–36, 38, 40–41, 44, 46–50, 53–56, 59–60, 62–63, 65–67, 69, 74–78, 80
   _status_monitor.py1841790%48–49, 136–138, 161, 164, 184, 187, 203–204, 212, 215, 278, 300, 325–326
   _types.py960100% 
   _utils.py30293%80–81
   errors.py120199%97
   main.py25250%17, 19–29, 31–33, 35, 37, 41–42, 44–46, 48–50
   ota_core.py35514958%127, 129–130, 134–135, 137–139, 143–144, 149–150, 156, 158, 217–220, 343, 375–376, 378, 387, 390, 395–396, 399, 405, 407–411, 418, 424, 459–462, 465–476, 479–482, 523–526, 542–543, 547–548, 614–621, 626, 629–636, 661–662, 668, 672–673, 679, 696–698, 700, 721–722, 730–731, 768, 790, 817–819, 828–834, 848–854, 856–857, 862–863, 871–874, 876–877, 885, 887, 893, 895, 901, 903, 907, 913, 915, 921, 924–926, 936–937, 948–950, 952–953, 955, 957–958, 963, 965, 970
src/otaclient/boot_control
   __init__.py40100% 
   _firmware_package.py932276%82, 86, 136, 180, 186, 209–210, 213–218, 220–221, 224–229, 231
   _grub.py41812769%214, 262–265, 271–275, 312–313, 320–325, 328–334, 337, 340–341, 346, 348–350, 359–365, 367–368, 370–372, 381–383, 385–387, 466–467, 471–472, 524, 530, 556, 578, 582–583, 598–600, 624–627, 639, 643–645, 647–649, 708–711, 736–739, 762–765, 777–778, 781–782, 817, 823, 843–844, 846, 868–870, 888–891, 916–919, 926–929, 934–942, 947–954
   _jetson_cboot.py2612610%20, 22–25, 27–29, 35–40, 42, 58–60, 62, 64–65, 71, 75, 134, 137, 139–140, 143, 150–151, 159–160, 163, 169–170, 178, 187–191, 193, 199, 202–203, 209, 212–213, 218–219, 221, 227–228, 231–232, 235–237, 239, 245, 250–252, 254–256, 261, 263–266, 268–269, 278–279, 282–283, 288–289, 292–296, 299–300, 305–306, 309, 312–316, 321–324, 327, 330–331, 334, 337–338, 341, 345–350, 354–355, 359, 362–363, 366, 369–372, 374, 377–378, 382, 385, 388–391, 393, 400, 404–405, 408–409, 415–416, 422, 424–425, 429, 431, 433–435, 438, 442, 445, 448–449, 451, 454, 462–463, 470, 480, 483, 491–492, 497–500, 502, 509, 511–513, 519–520, 524–525, 528, 532, 535, 537, 544–548, 550, 562–565, 568, 571, 573, 580, 587–589, 591, 593, 596, 599, 602, 604–605, 608–612, 616–618, 620, 628–632, 634, 637, 641, 644, 655–656, 661, 671, 674–680, 684–690, 694–703, 707–715, 719, 721, 723–725
   _jetson_common.py1724573%132, 140, 288–291, 294, 311, 319, 354, 359–364, 382, 408–409, 411–413, 417–420, 422–423, 425–429, 431, 438–439, 442–443, 453, 456–457, 460, 462, 506–507
   _jetson_uefi.py40427432%124–126, 131–132, 151–153, 158–161, 328, 446, 448–451, 455, 459–460, 462–470, 472, 484–485, 488–489, 492–493, 496–498, 502–503, 508–510, 514, 518–519, 522–523, 526–527, 531, 534–535, 537, 542–543, 547, 550–551, 556, 560–561, 564, 568–570, 572, 576–579, 581–582, 604–605, 609–610, 612, 616, 620–621, 624–625, 632, 635–637, 640, 642–643, 648–649, 652–655, 657–658, 663, 665–666, 674, 677–680, 682–683, 685, 689–690, 694, 702–706, 709–710, 712, 715–719, 722, 725–729, 733–734, 737–742, 745–746, 749–752, 754–755, 762–763, 773–776, 779, 782–785, 788–792, 795–796, 799, 802–805, 808, 810, 815–816, 819, 822–825, 827, 833, 838–839, 858–859, 862, 870–871, 878, 888, 891, 898–899, 904–907, 915–918, 926–927, 939–942, 944, 947, 950, 958, 966–968, 970–972, 974–978, 983–984, 986, 999, 1003, 1006, 1016, 1021, 1029–1030, 1033, 1037, 1039–1041, 1047–1048, 1053, 1061–1066, 1071–1076, 1081–1089, 1094–1101, 1109–1111
   _ota_status_control.py1021189%117, 122, 127, 240, 244–245, 248, 255, 257–258, 273
   _rpi_boot.py28713353%53, 56, 120–121, 125, 133–136, 150–153, 158–159, 161–162, 167–168, 171–172, 181–182, 222, 228–232, 235, 253–255, 259–261, 266–268, 272–274, 284–285, 288, 291, 293–294, 296–297, 299–301, 307, 310–311, 321–324, 332–336, 338, 340–341, 346–347, 354, 357–362, 393, 395–398, 408–411, 415–416, 418–422, 450–453, 472–475, 498–501, 506–514, 519–526, 541–544, 551–554, 562–564
   _slot_mnt_helper.py100100% 
   configs.py510100% 
   protocol.py50100% 
   selecter.py412929%44–46, 49–50, 54–55, 58–60, 63, 65, 69, 77–79, 81–82, 84–85, 89, 91, 93–94, 96, 98–99, 101, 103
src/otaclient/configs
   __init__.py170100% 
   _cfg_configurable.py470100% 
   _cfg_consts.py47197%97
   _common.py80100% 
   _ecu_info.py56492%59, 64–65, 112
   _proxy_info.py50884%84, 86–87, 89, 100, 113–115
   cfg.py230100% 
src/otaclient/create_standby
   __init__.py13192%36
   common.py2264480%59, 62–63, 67–69, 71, 75–76, 78, 126, 174–176, 178–180, 182, 185–188, 192, 203, 279–280, 282–287, 299, 339, 367, 370–372, 388–389, 403, 407, 429–430
   interface.py70100% 
   rebuild_mode.py1151091%98–100, 119, 150–155
src/otaclient/grpc/api_v2
   ecu_status.py145795%117, 142, 144, 275, 347–348, 384
   ecu_tracker.py53530%17, 19–22, 24–30, 32, 34, 38–39, 42, 44, 50–53, 55, 57, 59–62, 69, 73–76, 80–81, 83, 85, 87–95, 99–100, 102, 104–107
   main.py41410%17, 19–24, 26–27, 29, 32, 39, 41–42, 44–45, 47–48, 50–55, 57–59, 61, 64, 70, 72–73, 76–77, 79–82, 84–85, 87
   servicer.py12710517%58–62, 64–65, 67–68, 74–78, 82–83, 88, 91, 95–97, 101–103, 111–113, 118–119, 122–123, 129, 132–136, 145–155, 162, 168, 171–173, 184–186, 189–191, 196, 203–206, 209, 213–214, 219, 222, 226–228, 232–234, 242–243, 248–249, 252–253, 259, 262–266, 275–284, 291, 297, 300–302, 307–308, 311
   types.py44295%78–79
src/otaclient_api/v2
   api_caller.py39684%45–47, 83–85
   types.py2563287%61, 64, 67–70, 86, 89–92, 131, 209–210, 212, 259, 262–263, 506–508, 512–513, 515, 518–519, 522–523, 578, 585–586, 588
src/otaclient_common
   __init__.py341555%42–44, 61, 63, 68–77
   _io.py64198%41
   cmdhelper.py130100% 
   common.py1061090%148, 151–153, 168, 175–177, 271, 275
   downloader.py1991094%107–108, 126, 153, 369, 424, 428, 516–517, 526
   linux.py611575%51–53, 59, 69, 74, 76, 108–109, 133–134, 190, 195–196, 198
   logging.py42490%56, 87–88, 95
   persist_file_handling.py1181884%113, 118, 150–152, 163, 192–193, 228–232, 242–244, 246–247
   proto_streamer.py42880%33, 48, 66–67, 72, 81–82, 100
   proto_wrapper.py3985785%87, 134–141, 165, 172, 184–186, 189–190, 205, 210, 221, 257, 263, 268, 299, 303, 307, 402, 462, 469, 472, 492, 499, 501, 526, 532, 535, 537, 562, 568, 571, 573, 605, 609, 611, 625, 642, 669, 672, 676, 692, 707, 713, 762–763, 765, 803–805
   retry_task_map.py129993%134–135, 153–154, 207–208, 210, 230–231
   shm_status.py952177%79–80, 83–84, 105, 120–122, 134, 139, 156–160, 169–170, 172, 179, 192, 204
   typing.py31487%48, 97–98, 100
TOTAL6867192971% 

Tests Skipped Failures Errors Time
242 0 💤 0 ❌ 0 🔥 12m 47s ⏱️

Please sign in to comment.