Skip to content

Commit

Permalink
Merge pull request #108 from CDOT-CV/Feature/firmware-manager-update
Browse files Browse the repository at this point in the history
Firmware Manager Upgrade Scheduler and Runner
  • Loading branch information
payneBrandon authored Nov 4, 2024
2 parents 90a240d + 8ad52e7 commit 5e3c9d1
Show file tree
Hide file tree
Showing 30 changed files with 913 additions and 396 deletions.
29 changes: 24 additions & 5 deletions docker-compose-addons.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
version: '3'

include:
- docker-compose.yml

Expand Down Expand Up @@ -123,11 +121,11 @@ services:
max-size: '10m'
max-file: '5'

firmware_manager:
firmware_manager_upgrade_scheduler:
build:
context: services
dockerfile: Dockerfile.firmware_manager
image: jpo_firmware_manager:latest
dockerfile: Dockerfile.fmus
image: jpo_firmware_manager_upgrade_scheduler:latest
restart: on-failure:3

ports:
Expand All @@ -138,6 +136,27 @@ services:
PG_DB_USER: ${PG_DB_USER}
PG_DB_PASS: ${PG_DB_PASS}

UPGRADE_RUNNER_ENDPOINT: ${FIRMWARE_MANAGER_UPGRADE_RUNNER_ENDPOINT}

LOGGING_LEVEL: ${FIRMWARE_MANAGER_LOGGING_LEVEL}
volumes:
- ${GOOGLE_APPLICATION_CREDENTIALS}:/google/gcp_credentials.json
- ${HOST_BLOB_STORAGE_DIRECTORY}:/mnt/blob_storage
logging:
options:
max-size: '10m'
max-file: '5'

firmware_manager_upgrade_runner:
build:
context: services
dockerfile: Dockerfile.fmur
image: jpo_firmware_manager_upgrade_runner:latest
restart: on-failure:3

ports:
- '8090:8080'
environment:
BLOB_STORAGE_PROVIDER: ${BLOB_STORAGE_PROVIDER}
BLOB_STORAGE_BUCKET: ${BLOB_STORAGE_BUCKET}

Expand Down
1 change: 0 additions & 1 deletion docker-compose-full-cm.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
version: '3'
services:
cvmanager_api:
build:
Expand Down
2 changes: 0 additions & 2 deletions docker-compose-mongo.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
version: '3'

include:
- docker-compose.yml

Expand Down
1 change: 0 additions & 1 deletion docker-compose-no-cm.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
version: '3.9'
services:
cvmanager_api:
build:
Expand Down
3 changes: 1 addition & 2 deletions docker-compose-obu-ota-server.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
version: '3'
services:
# OBU OTA Server and Nginx proxy services
jpo_ota_backend:
Expand All @@ -11,7 +10,7 @@ services:
- 8085:8085
environment:
SERVER_HOST: ${OBU_OTA_SERVER_HOST}
LOGGING_LEVEL: ${OBU_OTA_SERVER_LOGGING_LEVEL}
LOGGING_LEVEL: ${OBU_OTA_LOGGING_LEVEL}
BLOB_STORAGE_PROVIDER: ${BLOB_STORAGE_PROVIDER}
BLOB_STORAGE_BUCKET: ${OBU_OTA_BLOB_STORAGE_BUCKET}
BLOB_STORAGE_PATH: ${OBU_OTA_BLOB_STORAGE_PATH}
Expand Down
1 change: 0 additions & 1 deletion docker-compose-webapp-deployment.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# This file is used to build the webapp image for deployment.
# The COUNTS_MSG_TYPES and DOT_NAME variables must be set in .env before building to populate
# correctly in the deployed webapp as they are build-time variables.
version: '3'
services:
cvmanager_webapp:
build:
Expand Down
1 change: 1 addition & 0 deletions sample.env
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ KC_HOST_IP=${DOCKER_HOST_IP}

# Firmware Manager connectivity in the format 'http://endpoint:port'
FIRMWARE_MANAGER_ENDPOINT=http://${DOCKER_HOST_IP}:8089
FIRMWARE_MANAGER_UPGRADE_RUNNER_ENDPOINT=http://${DOCKER_HOST_IP}:8090

# Allowed CORS domain for accessing the CV Manager API from (set to the web application hostname)
# Make sure to include http:// or https://
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@ WORKDIR /home

ADD addons/images/firmware_manager/requirements.txt .
ADD addons/images/firmware_manager/resources/xfer_yunex.jar ./tools/
ADD addons/images/firmware_manager/*.py .
ADD addons/images/firmware_manager/upgrade_runner/*.py .
ADD common/*.py ./common/

RUN pip3 install -r requirements.txt
RUN apt-get update
RUN apt-get install -y default-jdk
RUN apt-get install -y iputils-ping

CMD ["/home/firmware_manager.py"]
CMD ["/home/upgrade_runner.py"]
ENTRYPOINT ["python3"]
13 changes: 13 additions & 0 deletions services/Dockerfile.fmus
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
FROM python:3.12.2-slim

WORKDIR /home

ADD addons/images/firmware_manager/requirements.txt .
ADD addons/images/firmware_manager/upgrade_scheduler/*.py .
ADD common/*.py ./common/

RUN pip3 install -r requirements.txt
RUN apt-get update

CMD ["/home/upgrade_scheduler.py"]
ENTRYPOINT ["python3"]
10 changes: 7 additions & 3 deletions services/addons/images/firmware_manager/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,20 @@

## About <a name = "about"></a>

This directory contains a microservice that runs within the CV Manager GKE Cluster. The firmware manager monitors the CV Manager PostgreSQL database to determine if there are any RSUs that are targeted for a firmware upgrade. This monitoring is a once-per-hour, scheduled occurrence. Alternatively, this micro-service hosts a REST API for directly initiating firmware upgrades - this is used by the CV Manager API. Firmware upgrades are then run in parallel and tracked until completion.
This directory contains two microservices that run within the CV Manager GKE Cluster. The firmware manager upgrade scheduler monitors the CV Manager PostgreSQL database to determine if there are any RSUs that are targeted for a firmware upgrade. This monitoring is a once-per-hour, scheduled occurrence. Alternatively, this micro-service hosts a REST API for directly initiating firmware upgrades - this is used by the CV Manager API. Firmware upgrades then schedule off tasks to the firmware manager upgrade runner that is initiated through an HTTP request. This allows for better scaling for more parallel upgrades.

An RSU is determined to be ready for upgrade if its entry in the "rsus" table in PostgreSQL has its "target_firmware_version" set to be different than its "firmware_version". The Firmware Manager will ignore all devices with incompatible firmware upgrades set as their target firmware based on the "firmware_upgrade_rules" table. The CV Manager API will only offer CV Manager webapp users compatible options so this generally is a precaution.

Hosting firmware files is recommended to be done via the cloud. GCP cloud storage is the currently supported method, but a directory mounted as a docker volume can also be used. Alternative cloud support can be added via the [download_blob.py](download_blob.py) script. Firmware storage must be organized by: `vendor/rsu-model/firmware-version/install_package`.

Firmware upgrades have unique procedures based on RSU vendor/manufacturer. To avoid requiring a unique bash script for every single firmware upgrade, the Firmware Manager has been written to use vendor based upgrade scripts that have been thoroughly tested. An interface-like abstract class, [base_upgrader.py](base_upgrader.py), has been made for helping create upgrade scripts for vendors not yet supported. The Firmware Manager selects the script to use based off the RSU's "model" column in the "rsus" table. These scripts report back to the Firmware Manager on completion with a status of whether the upgrade was a success or failure. Regardless, the Firmware Manager will remove the process from its tracking and update the PostgreSQL database accordingly.
Firmware upgrades have unique procedures based on RSU vendor/manufacturer. To avoid requiring a unique bash script for every single firmware upgrade, the firmware manager upgrade runner has been written to use vendor based upgrade scripts that have been thoroughly tested. An interface-like abstract class, [base_upgrader.py](base_upgrader.py), has been made for helping create upgrade scripts for vendors not yet supported. The firmware manager upgrade runner selects the script to use based off the RSU's "model" column in the "rsus" table. These scripts report back to the firmware manager upgrade scheduler on completion with a status of whether the upgrade was a success or failure. Regardless, the Firmware Manager will remove the process from its tracking and update the PostgreSQL database accordingly.

List of currently supported vendors:

- Commsignia
- Yunex

Available REST endpoints:
Available Firmware Manager Upgrade Scheduler REST endpoints:

- /init_firmware_upgrade [ **POST** ] `{ "rsu_ip": "" }`
- `rsu_ip` is the target RSU being upgraded (The target firmware is separately updated in PostgreSQL, this is just to get the Firmware Manager to immediately go look)
Expand All @@ -36,6 +36,10 @@ Available REST endpoints:
- Used to list all active upgrades in the form:
`{"active_upgrades": {"1.1.1.1": {"manufacturer": "Commsignia", "model": "ITS-RS4-M", "target_firmware_id": 2, "target_firmware_version": "y20.39.0", "install_package": "blob.blob"}}}`

Available Firmware Manager Upgrade Runner REST endpoints:

- /run_firmware_upgrade [ **POST** ] `{ "ipv4_address": "", "manufacturer": "", "model": "", "ssh_username": "", "ssh_password": "","target_firmware_id": "", "target_firmware_version": "", "install_package": ""}`

## Requirements <a name = "requirements"></a>

To properly run the firmware_manager microservice the following services are also required:
Expand Down
1 change: 1 addition & 0 deletions services/addons/images/firmware_manager/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
APScheduler==3.10.4
google-cloud-storage==2.14.0
flask==3.0.0
marshmallow==3.20.1
paramiko==3.5.0
pg8000==1.30.2
requests==2.31.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
class CommsigniaUpgrader(upgrader.UpgraderAbstractClass):
def __init__(self, upgrade_info):
# set file/blob location for post_upgrade script
self.post_upgrade_file_name = f"/home/{upgrade_info['ipv4_address']}/post_upgrade.sh"
self.post_upgrade_file_name = (
f"/home/{upgrade_info['ipv4_address']}/post_upgrade.sh"
)
self.post_upgrade_blob_name = f"{upgrade_info['manufacturer']}/{upgrade_info['model']}/{upgrade_info['target_firmware_version']}/post_upgrade.sh"
super().__init__(upgrade_info, firmware_extension=".tar.sig")

Expand Down Expand Up @@ -54,7 +56,9 @@ def upgrade(self):
ssh.close()

# If post_upgrade script exists execute it
if (self.download_blob(self.post_upgrade_blob_name, self.post_upgrade_file_name, ".sh")):
if self.download_blob(
self.post_upgrade_blob_name, self.post_upgrade_file_name, ".sh"
):
self.post_upgrade()

# Delete local installation package and its parent directory so it doesn't take up storage space
Expand All @@ -64,15 +68,19 @@ def upgrade(self):
self.notify_firmware_manager(success=True)
except Exception as err:
# If something goes wrong, cleanup anything left and report failure if possible
logging.error(f"Failed to perform firmware upgrade for {self.rsu_ip}: {err}")
logging.error(
f"Failed to perform firmware upgrade for {self.rsu_ip}: {err}"
)
self.cleanup()
self.notify_firmware_manager(success=False)
# send email to support team with the rsu and error
self.send_error_email("Firmware Upgrader", err)

def post_upgrade(self):
if self.wait_until_online() == -1:
raise Exception("RSU " + self.rsu_ip + " offline for too long after firmware upgrade")
raise Exception(
"RSU " + self.rsu_ip + " offline for too long after firmware upgrade"
)
try:
time.sleep(60)
# Make connection with the target device
Expand All @@ -95,25 +103,28 @@ def post_upgrade(self):

# Change permissions and execute post upgrade script
logging.info("Running post upgrade script for " + self.rsu_ip + "...")
ssh.exec_command(
f"chmod +x /tmp/post_upgrade.sh"
)
_stdin, _stdout, _stderr = ssh.exec_command(
f"/tmp/post_upgrade.sh"
)
ssh.exec_command(f"chmod +x /tmp/post_upgrade.sh")
_stdin, _stdout, _stderr = ssh.exec_command(f"/tmp/post_upgrade.sh")
decoded_stdout = _stdout.read().decode()
logging.info(decoded_stdout)
if "ALL OK" not in decoded_stdout:
ssh.close()
logging.error(f"Failed to execute post upgrade script for rsu {self.rsu_ip}: {decoded_stdout}")
logging.error(
f"Failed to execute post upgrade script for rsu {self.rsu_ip}: {decoded_stdout}"
)
return
ssh.close()
logging.info(f"Post upgrade script executed successfully for rsu: {self.rsu_ip}.")
logging.info(
f"Post upgrade script executed successfully for rsu: {self.rsu_ip}."
)
except Exception as err:
logging.error(f"Failed to execute post upgrade script for rsu {self.rsu_ip}: {err}")
logging.error(
f"Failed to execute post upgrade script for rsu {self.rsu_ip}: {err}"
)
# send email to support team with the rsu and error
self.send_error_email("Post-Upgrade Script", err)


# sys.argv[1] - JSON string with the following key-values:
# - ipv4_address
# - manufacturer
Expand All @@ -129,7 +140,7 @@ def post_upgrade(self):
# Trimming outer single quotes from the json.loads
upgrade_info = json.loads(sys.argv[1][1:-1])
commsignia_upgrader = CommsigniaUpgrader(upgrade_info)
if (commsignia_upgrader.check_online()):
if commsignia_upgrader.check_online():
commsignia_upgrader.upgrade()
else:
logging.error(f"RSU {upgrade_info['ipv4_address']} is offline")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,4 @@
LOGGING_LEVEL="INFO"
ACTIVE_UPGRADE_LIMIT=20

# PostgreSQL database variables
PG_DB_HOST=""
PG_DB_NAME=""
PG_DB_USER=""
PG_DB_PASS=""

# Blob storage variables (only 'GCP' and 'DOCKER' are supported at this time)
BLOB_STORAGE_PROVIDER=DOCKER
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
from flask import Flask, jsonify, request, abort
from subprocess import Popen, DEVNULL
from waitress import serve
from marshmallow import Schema, fields
import json
import logging
import os

app = Flask(__name__)

log_level = os.environ.get("LOGGING_LEVEL", "INFO")
logging.basicConfig(format="%(levelname)s:%(message)s", level=log_level)

manufacturer_upgrade_scripts = {
"Commsignia": "commsignia_upgrader.py",
"Yunex": "yunex_upgrader.py",
}


def start_upgrade_task(rsu_upgrade_data):
try:
Popen(
[
"python3",
f'/home/{manufacturer_upgrade_scripts[rsu_upgrade_data["manufacturer"]]}',
f"'{json.dumps(rsu_upgrade_data)}'",
],
stdout=DEVNULL,
)

return (
jsonify(
{
"message": f"Firmware upgrade started successfully for '{rsu_upgrade_data['ipv4_address']}'"
}
),
201,
)
except Exception as err:
# If this case occurs, only log it since there may not be a listener.
# Since the upgrade_queue and upgrade_queue_info will no longer have the RSU present,
# the hourly check_for_upgrades() will pick up the firmware upgrade again to retry the upgrade.
logging.error(
f"Encountered error of type {type(err)} while starting automatic upgrade process for {rsu_upgrade_data['ipv4_address']}: {err}"
)

return (
jsonify(
{
"message": f"Firmware upgrade failed to start for '{rsu_upgrade_data['ipv4_address']}'"
}
),
500,
)


class RunFirmwareUpgradeSchema(Schema):
ipv4_address = fields.IPv4(required=True)
manufacturer = fields.Str(required=True)
model = fields.Str(required=True)
ssh_username = fields.Str(required=True)
ssh_password = fields.Str(required=True)
target_firmware_id = fields.Int(required=True)
target_firmware_version = fields.Str(required=True)
install_package = fields.Str(required=True)


# REST endpoint to manually start firmware upgrades for a single targeted RSU
# Required request body values:
# - ipv4_address
# - manufacturer
# - model
# - ssh_username
# - ssh_password
# - target_firmware_id
# - target_firmware_version
# - install_package
@app.route("/run_firmware_upgrade", methods=["POST"])
def run_firmware_upgrade():
# Verify HTTP body JSON object
request_args = request.get_json()
schema = RunFirmwareUpgradeSchema()
errors = schema.validate(request_args)
if errors:
logging.error(str(errors))
abort(400, str(errors))

# Start the RSU upgrade task
return start_upgrade_task(request_args)


def serve_rest_api():
# Run Flask app
logging.info("Initiating the Firmware Manager Upgrade Runner REST API...")
serve(app, host="0.0.0.0", port=8080)


if __name__ == "__main__":
serve_rest_api()
Loading

0 comments on commit 5e3c9d1

Please sign in to comment.