diff --git a/app/models/miq_server/worker_management/kubernetes.rb b/app/models/miq_server/worker_management/kubernetes.rb index 3d161de3eae..8e5286dc8db 100644 --- a/app/models/miq_server/worker_management/kubernetes.rb +++ b/app/models/miq_server/worker_management/kubernetes.rb @@ -12,10 +12,6 @@ def sync_from_system # we only have to sync the list of pods and deployments once ensure_kube_monitors_started if my_server_is_primary? - # Before syncing the workers check for any orphaned worker rows that don't have - # a current pod and delete them - cleanup_orphaned_worker_rows - # Update worker deployments with updated settings such as cpu/memory limits sync_deployment_settings end @@ -54,6 +50,14 @@ def cleanup_orphaned_worker_rows end end + def cleanup_orphaned_workers + orphaned_pods = current_pods.keys - miq_workers.pluck(:system_uid) + return if orphaned_pods.empty? + + # TODO destroy orphaned pods + orphaned_pods.each { |_pod| } + end + def cleanup_failed_workers super diff --git a/app/models/miq_server/worker_management/monitor.rb b/app/models/miq_server/worker_management/monitor.rb index b176f9d0f1a..fc2c730a44a 100644 --- a/app/models/miq_server/worker_management/monitor.rb +++ b/app/models/miq_server/worker_management/monitor.rb @@ -18,6 +18,12 @@ def monitor_workers # Cache a list of the native objects backing the miq_workers (e.g.: pods, services, or processes) sync_from_system + # Cleanup any worker rows that don't have running workers + cleanup_orphaned_worker_rows + + # Cleanup any workers that don't have corresponding miq_workers rows + cleanup_orphaned_workers + sync_monitor # Sync the workers after sync'ing the child worker settings @@ -49,6 +55,14 @@ def sync_workers sync_stopping_workers end + def cleanup_orphaned_worker_rows + raise NotImplementedError, "cleanup_orphaned_worker_rows must be implemented in a subclass" + end + + def cleanup_orphaned_workers + raise NotImplementedError, "cleanup_orphaned_workers must be implemented in a subclass" + end + def cleanup_failed_workers check_pending_stop clean_worker_records diff --git a/app/models/miq_server/worker_management/process.rb b/app/models/miq_server/worker_management/process.rb index 6b6be61c493..1d6383383a7 100644 --- a/app/models/miq_server/worker_management/process.rb +++ b/app/models/miq_server/worker_management/process.rb @@ -1,7 +1,7 @@ class MiqServer::WorkerManagement::Process < MiqServer::WorkerManagement def sync_from_system require "sys/proctable" - self.miq_processes = Sys::ProcTable.ps.select { |proc| proc.ppid == my_server.pid } + @miq_processes_by_pid = Sys::ProcTable.ps.select { |proc| proc.ppid == my_server.pid }.index_by(&:pid) end def sync_starting_workers @@ -12,6 +12,22 @@ def sync_stopping_workers MiqWorker.find_all_stopping.to_a end + def cleanup_orphaned_worker_rows + orphaned_rows = miq_workers.where.not(:pid => miq_pids) + return if orphaned_rows.empty? + + _log.warn("Removing orphaned worker rows without corresponding processes: #{orphaned_rows.collect(&:pid).inspect}") + orphaned_rows.destroy_all + end + + def cleanup_orphaned_workers + orphaned_workers = miq_pids - miq_workers.pluck(:pid) + return if orphaned_workers.empty? + + _log.warn("Removing orphaned processes without corresponding worker rows: #{orphaned_workers.inspect}") + orphaned_workers.each { |pid| ::Process.kill(9, pid) } + end + def monitor_workers super @@ -74,5 +90,13 @@ def validate_worker(worker) private - attr_accessor :miq_processes + attr_reader :miq_processes_by_pid + + def miq_processes + miq_processes_by_pid.values + end + + def miq_pids + miq_processes_by_pid.keys + end end diff --git a/app/models/miq_server/worker_management/systemd.rb b/app/models/miq_server/worker_management/systemd.rb index abfd50310b7..634a09cab5e 100644 --- a/app/models/miq_server/worker_management/systemd.rb +++ b/app/models/miq_server/worker_management/systemd.rb @@ -29,6 +29,12 @@ def sync_stopping_workers end end + def cleanup_orphaned_worker_rows + end + + def cleanup_orphaned_workers + end + def cleanup_failed_workers super diff --git a/spec/models/miq_server/worker_management/kubernetes_spec.rb b/spec/models/miq_server/worker_management/kubernetes_spec.rb index 02f450e1406..26d8b97e9b0 100644 --- a/spec/models/miq_server/worker_management/kubernetes_spec.rb +++ b/spec/models/miq_server/worker_management/kubernetes_spec.rb @@ -120,9 +120,8 @@ context "#sync_from_system" do context "#ensure_kube_monitors_started" do - it "podified, ensures pod monitor started and orphaned rows are removed" do + it "podified, ensures pod monitor started" do expect(server.worker_manager).to receive(:ensure_kube_monitors_started) - expect(server.worker_manager).to receive(:cleanup_orphaned_worker_rows) server.worker_manager.sync_from_system end end