Skip to content
This repository has been archived by the owner on Dec 17, 2024. It is now read-only.

Commit

Permalink
Add timeout to QEMU QMP connect
Browse files Browse the repository at this point in the history
QEMU QMP connection can be blocking sometimes, which leads to whole
scheduling loop getting halted. This change adds timeout to QMP
connect so that there's maximum limit how long this can take time
before the VM is given up and killed.

While at it, cleanup some leftovers from workspace volume refactoring
and fix `vm_registry` maintenance on VM termination.
  • Loading branch information
tuommaki committed Mar 21, 2024
1 parent a5ac802 commit ccc27b1
Showing 1 changed file with 48 additions and 18 deletions.
66 changes: 48 additions & 18 deletions crates/node/src/vmm/qemu.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::types::file::IMAGES_DIR;
use async_trait::async_trait;
use eyre::Result;
use eyre::{eyre, Result};
use gevulot_node::types::file::TaskVmFile;
use qapi::{
futures::{QapiStream, QmpStreamTokio},
Expand All @@ -25,7 +25,7 @@ use tokio::{
io::{ReadHalf, WriteHalf},
net::{TcpStream, ToSocketAddrs},
sync::Mutex,
time::sleep,
time::{sleep, timeout},
};
use tokio_vsock::{Incoming, VsockConnectInfo, VsockListener};
use tonic::Extensions;
Expand All @@ -39,6 +39,8 @@ use crate::{
vmm::ResourceRequest,
};

const QMP_CONNECT_TIMEOUT: Duration = Duration::from_secs(1);

impl VMId for u32 {
fn as_any(&self) -> &dyn Any {
self
Expand Down Expand Up @@ -187,7 +189,6 @@ impl Provider for Qemu {
tx_hash,
program_id,
workspace_volume_label,
//qmp: Arc::new(Mutex::new(qmp)),
};

// Must VM must be registered before start, because when the VM starts,
Expand Down Expand Up @@ -224,7 +225,6 @@ impl Provider for Qemu {
// Register 2 hard drives via SCSI
.args(["-device", "virtio-scsi-pci,bus=pci.2,addr=0x0,id=scsi0"])
.args(["-device", "scsi-hd,bus=scsi0.0,drive=hd0"])
//.args(["-device", "scsi-hd,bus=scsi0.0,drive=hd1"])
.args(["-vga", "none"])
// CPUS
.args(["-smp", &cpus.to_string()])
Expand All @@ -242,15 +242,9 @@ impl Provider for Qemu {
&img_file.into_os_string().into_string().unwrap(),
),
])
// WORKSPACE FILE
/*
.args([
"-drive",
&format!("file={},format=raw,if=none,id=hd1", &workspace_file),
])*/
.args(["-display", "none"])
.args(["-serial", "stdio"])
//VirtFs
// WORKSPACE VirtFS
.args([
"-virtfs",
&format!(
Expand Down Expand Up @@ -309,19 +303,54 @@ impl Provider for Qemu {
std::process::exit(1);
}

match Qmp::new(format!("localhost:{qmp_port}")).await {
Ok(c) => client = Some(c),
Err(err) => {
retry_count += 1;
sleep(Duration::from_millis(10)).await;
match timeout(
QMP_CONNECT_TIMEOUT,
Qmp::new(format!("localhost:{qmp_port}")),
)
.await
{
Ok(connect) => match connect {
Ok(clnt) => client = Some(clnt),
Err(err) => {
// Connection was refused. QEMU not started yet.
retry_count += 1;
sleep(Duration::from_millis(10)).await;
}
},
Err(_) => {
if retry_count < 100 {
tracing::warn!(
"tx: {} - QEMU QMP connect timeout; retrying once",
tx_hash
);
retry_count = 99;
} else {
tracing::error!(
"tx: {} - QEMU QMP connect timeout. Terminating VM.",
tx_hash
);
let cid = qemu_vm_handle.cid;
qemu_vm_handle
.child
.as_mut()
.ok_or(std::io::Error::other(
"No child process defined for this handle",
))
.and_then(|p| {
p.kill()?;
p.wait()
})?;

self.vm_registry.remove(&cid);
self.cid_allocations.remove(&cid);
return Err(eyre!("Failed to connect to QEMU QMP"));
}
}
};
}
client.unwrap()
};

qmp_client.system_reset().await?;

Ok(VMHandle {
start_time,
vm_id: Arc::new(cid),
Expand Down Expand Up @@ -350,6 +379,7 @@ impl Provider for Qemu {

let cid = qemu_vm_handle.cid;
self.release_cid(cid);
self.vm_registry.remove(&cid);

Ok(())
} else {
Expand Down

0 comments on commit ccc27b1

Please sign in to comment.