vmm: cpu: Add pending removed vcpu check to avoid resize vcpu hang

Add pending removed vcpu check according to VcpuState.removing, which
can avoid cloud hypervisor hangup during continual vcpu resize.

Fix #5419

Signed-off-by: Yi Wang <foxywang@tencent.com>
(cherry picked from commit d46dd4b31f82f487706db9a1be9b1c3f6a10be3e)
This commit is contained in:
Yi Wang 2023-08-10 11:34:09 +08:00 committed by Bo Chen
parent c14f9ee2f4
commit a03f17a275

View File

@ -116,6 +116,9 @@ pub enum Error {
#[error("Error configuring vCPU: {0}")] #[error("Error configuring vCPU: {0}")]
VcpuConfiguration(#[source] arch::Error), VcpuConfiguration(#[source] arch::Error),
#[error("Still pending removed vcpu")]
VcpuPendingRemovedVcpu,
#[cfg(target_arch = "aarch64")] #[cfg(target_arch = "aarch64")]
#[error("Error fetching preferred target: {0}")] #[error("Error fetching preferred target: {0}")]
VcpuArmPreferredTarget(#[source] hypervisor::HypervisorVmError), VcpuArmPreferredTarget(#[source] hypervisor::HypervisorVmError),
@ -558,6 +561,7 @@ impl BusDevice for CpuManager {
struct VcpuState { struct VcpuState {
inserting: bool, inserting: bool,
removing: bool, removing: bool,
pending_removal: Arc<AtomicBool>,
handle: Option<thread::JoinHandle<()>>, handle: Option<thread::JoinHandle<()>>,
kill: Arc<AtomicBool>, kill: Arc<AtomicBool>,
vcpu_run_interrupted: Arc<AtomicBool>, vcpu_run_interrupted: Arc<AtomicBool>,
@ -1127,9 +1131,21 @@ impl CpuManager {
// Mark vCPUs for removal, actual removal happens on ejection // Mark vCPUs for removal, actual removal happens on ejection
for cpu_id in desired_vcpus..self.present_vcpus() { for cpu_id in desired_vcpus..self.present_vcpus() {
self.vcpu_states[usize::from(cpu_id)].removing = true; self.vcpu_states[usize::from(cpu_id)].removing = true;
self.vcpu_states[usize::from(cpu_id)]
.pending_removal
.store(true, Ordering::SeqCst);
} }
} }
pub fn check_pending_removed_vcpu(&mut self) -> bool {
for state in self.vcpu_states.iter() {
if state.active() && state.pending_removal.load(Ordering::SeqCst) {
return true;
}
}
false
}
fn remove_vcpu(&mut self, cpu_id: u8) -> Result<()> { fn remove_vcpu(&mut self, cpu_id: u8) -> Result<()> {
info!("Removing vCPU: cpu_id = {}", cpu_id); info!("Removing vCPU: cpu_id = {}", cpu_id);
let mut state = &mut self.vcpu_states[usize::from(cpu_id)]; let mut state = &mut self.vcpu_states[usize::from(cpu_id)];
@ -1140,6 +1156,7 @@ impl CpuManager {
// Once the thread has exited, clear the "kill" so that it can reused // Once the thread has exited, clear the "kill" so that it can reused
state.kill.store(false, Ordering::SeqCst); state.kill.store(false, Ordering::SeqCst);
state.pending_removal.store(false, Ordering::SeqCst);
Ok(()) Ok(())
} }
@ -1173,6 +1190,10 @@ impl CpuManager {
return Ok(false); return Ok(false);
} }
if self.check_pending_removed_vcpu() {
return Err(Error::VcpuPendingRemovedVcpu);
}
match desired_vcpus.cmp(&self.present_vcpus()) { match desired_vcpus.cmp(&self.present_vcpus()) {
cmp::Ordering::Greater => { cmp::Ordering::Greater => {
self.create_vcpus(desired_vcpus, None)?; self.create_vcpus(desired_vcpus, None)?;