From 8faa4a3277103a908da44e738a8b65a51c0fd907 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tuomas=20M=C3=A4kinen?= Date: Fri, 22 Mar 2024 13:13:09 +0200 Subject: [PATCH] Fix deadlock on task scheduling when VM start fails When VM startup fails, the task is rescheduled and after recent changes to scheduler state management, this structure caused deadlock. Inline task rescheduling to avoid deadlock. --- crates/node/src/scheduler/mod.rs | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/crates/node/src/scheduler/mod.rs b/crates/node/src/scheduler/mod.rs index 74cbddd0..848c505a 100644 --- a/crates/node/src/scheduler/mod.rs +++ b/crates/node/src/scheduler/mod.rs @@ -310,7 +310,9 @@ impl Scheduler { state.task_queue.remove(&task.tx); } - self.reschedule(&task).await?; + // The task is already pending in program's work queue. Push program ID + // to pending programs queue to wait available resources. + state.pending_programs.push_back((task.tx, task.program_id)); tracing::warn!("task {} rescheduled: {}", task.id.to_string(), err); continue; } @@ -355,17 +357,6 @@ impl Scheduler { } } - async fn reschedule(&self, task: &Task) -> Result<()> { - // The task is already pending in program's work queue. Push program ID - // to pending programs queue to wait available resources. - self.state - .lock() - .await - .pending_programs - .push_back((task.tx, task.program_id)); - Ok(()) - } - async fn reap_zombies(&self) { let mut state = self.state.lock().await;