From 1ac4c42a84ff117b07ec7b236912d6521b4d6d7b Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <sameo@linux.intel.com>
Date: Thu, 26 Nov 2020 16:44:00 +0100
Subject: [PATCH] hypervisor: emulator: Add initial fetch support

When the x86 instruction decoder tells us about some missing bytes from
the instruction stream, we call into the platform fetch method and
emulate one last instruction.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 hypervisor/src/arch/emulator/mod.rs           |  11 ++
 .../src/arch/x86/emulator/instructions/mov.rs |   3 -
 hypervisor/src/arch/x86/emulator/mod.rs       | 140 +++++++++++++++++-
 3 files changed, 148 insertions(+), 6 deletions(-)
diff --git a/hypervisor/src/arch/emulator/mod.rs b/hypervisor/src/arch/emulator/mod.rs
index 42b5fb95c..3bcd81c70 100644
--- a/hypervisor/src/arch/emulator/mod.rs
+++ b/hypervisor/src/arch/emulator/mod.rs
@@ -71,6 +71,9 @@ pub enum EmulationError<T: Debug> {
     #[error("Instruction Exception: {0}")]
     InstructionException(Exception<T>),
 
+    #[error("Instruction fetching error: {0}")]
+    InstructionFetchingError(#[source] anyhow::Error),
+
     #[error("Platform emulation error: {0}")]
     PlatformEmulationError(PlatformError),
 }
@@ -126,6 +129,14 @@ pub trait PlatformEmulator: Send + Sync {
     /// * `gva` - Guest virtual address to translate.
     ///
     fn gva_to_gpa(&self, gva: u64) -> Result<u64, PlatformError>;
+
+    /// Fetch instruction bytes from memory.
+    ///
+    /// # Arguments
+    ///
+    /// * `ip` - Instruction pointer virtual address to start fetching instructions from.
+    ///
+    fn fetch(&self, ip: u64, instruction_bytes: &mut [u8]) -> Result<(), PlatformError>;
 }
 
 pub type EmulationResult<S, E> = std::result::Result<S, EmulationError<E>>;
diff --git a/hypervisor/src/arch/x86/emulator/instructions/mov.rs b/hypervisor/src/arch/x86/emulator/instructions/mov.rs
index 03a772cd1..5983c6b36 100644
--- a/hypervisor/src/arch/x86/emulator/instructions/mov.rs
+++ b/hypervisor/src/arch/x86/emulator/instructions/mov.rs
@@ -249,9 +249,6 @@ impl<T: CpuStateManager> InstructionHandler<T> for Mov_rm64_r64 {
 #[cfg(test)]
 mod tests {
     #![allow(unused_mut)]
-
-    extern crate env_logger;
-
     use super::*;
     use crate::arch::x86::emulator::mock_vmm::*;
 
diff --git a/hypervisor/src/arch/x86/emulator/mod.rs b/hypervisor/src/arch/x86/emulator/mod.rs
index 2ab330a0c..2b7f7e935 100644
--- a/hypervisor/src/arch/x86/emulator/mod.rs
+++ b/hypervisor/src/arch/x86/emulator/mod.rs
@@ -515,12 +515,49 @@ impl<'a, T: CpuStateManager> Emulator<'a, T> {
         let mut decoder = Decoder::new(64, insn_stream, DecoderOptions::NONE);
         let mut insn = Instruction::default();
         let mut num_insn_emulated: usize = 0;
+        let mut fetched_insn_stream: [u8; 16] = [0; 16];
+        let mut last_decoded_ip: u64 = state.ip();
+        let mut stop_emulation: bool = false;
 
         decoder.set_ip(state.ip());
 
-        while decoder.can_decode() {
+        while decoder.can_decode() && !stop_emulation {
             decoder.decode_out(&mut insn);
 
+            if decoder.last_error() == DecoderError::NoMoreBytes {
+                // The decoder is missing some bytes to decode the current
+                // instruction, for example because the instruction stream
+                // crosses a page boundary.
+                // We fetch 16 more bytes from the instruction segment,
+                // decode and emulate the failing instruction and terminate
+                // the emulation loop.
+                debug!(
+                    "Fetching {} bytes from {:#x}",
+                    fetched_insn_stream.len(),
+                    last_decoded_ip
+                );
+
+                // fetched_insn_stream is 16 bytes long, enough to contain
+                // any complete x86 instruction.
+                self.platform
+                    .fetch(last_decoded_ip, &mut fetched_insn_stream)
+                    .map_err(EmulationError::PlatformEmulationError)?;
+
+                debug!("Fetched {:x?}", fetched_insn_stream);
+
+                // Once we have the new stream, we must create a new decoder
+                // and emulate one last instruction from the last decoded IP.
+                decoder = Decoder::new(64, &fetched_insn_stream, DecoderOptions::NONE);
+                decoder.decode_out(&mut insn);
+                if decoder.last_error() != DecoderError::None {
+                    return Err(EmulationError::InstructionFetchingError(anyhow!(
+                        "{:#x?}", insn
+                    )));
+                }
+
+                stop_emulation = true;
+            }
+
             // Emulate the decoded instruction
             self.insn_map
                 .instructions
@@ -530,12 +567,13 @@ impl<'a, T: CpuStateManager> Emulator<'a, T> {
                 })?
                 .emulate(&insn, &mut state, self.platform)?;
 
+            last_decoded_ip = decoder.ip();
             num_insn_emulated += 1;
 
             if let Some(num_insn) = num_insn {
                 if num_insn_emulated >= num_insn {
                     // Exit the decoding loop, do not decode the next instruction.
-                    break;
+                    stop_emulation = true;
                 }
             }
         }
@@ -601,7 +639,7 @@ mod mock_vmm {
             }
 
             let mut vmm = MockVMM {
-                memory: vec![0; 4096],
+                memory: vec![0; 8192],
                 state: Arc::new(Mutex::new(initial_state)),
             };
 
@@ -673,5 +711,101 @@ mod mock_vmm {
         fn gva_to_gpa(&self, gva: u64) -> Result<u64, PlatformError> {
             Ok(gva)
         }
+
+        fn fetch(&self, ip: u64, instruction_bytes: &mut [u8]) -> Result<(), PlatformError> {
+            let rip = self
+                .state
+                .lock()
+                .unwrap()
+                .linearize(Register::CS, ip, false)?;
+            self.read_memory(rip, instruction_bytes)
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    #![allow(unused_mut)]
+    use super::*;
+    use crate::arch::x86::emulator::mock_vmm::*;
+
+    macro_rules! hashmap {
+        ($( $key: expr => $val: expr ),*) => {{
+            let mut map = ::std::collections::HashMap::new();
+            $( map.insert($key, $val); )*
+                map
+        }}
+    }
+
+    #[test]
+    // Emulate truncated instruction stream, which should cause a fetch.
+    //
+    // mov rax, 0x1000
+    // Test with a first instruction truncated.
+    fn test_fetch_first_instruction() -> MockResult {
+        let ip: u64 = 0x1000;
+        let cpu_id = 0;
+        let memory = [
+            // Code at IP
+            0x48, 0xc7, 0xc0, 0x00, 0x10, 0x00, 0x00, // mov rax, 0x1000
+            0x48, 0x8b, 0x58, 0x10, // mov rbx, qword ptr [rax+10h]
+            // Padding
+            0x00, 0x00, 0x00, 0x00, 0x00, // Padding is all zeroes
+            // Data at IP + 0x10 (0x1234567812345678 in LE)
+            0x78, 0x56, 0x34, 0x12, 0x78, 0x56, 0x34, 0x12,
+        ];
+        let insn = [
+            // First instruction is truncated
+            0x48, 0xc7, 0xc0, 0x00, // mov rax, 0x1000 -- Missing bytes: 0x00, 0x10, 0x00, 0x00,
+        ];
+
+        let mut vmm = MockVMM::new(ip, hashmap![], Some((ip, &memory)));
+        vmm.emulate_insn(cpu_id, &insn, Some(2));
+
+        let rax: u64 = vmm
+            .cpu_state(cpu_id)
+            .unwrap()
+            .read_reg(Register::RAX)
+            .unwrap();
+        assert_eq!(rax, ip);
+
+        Ok(())
+    }
+
+    #[test]
+    // Emulate truncated instruction stream, which should cause a fetch.
+    //
+    // mov rax, 0x1000
+    // mov rbx, qword ptr [rax+10h]
+    // Test with a 2nd instruction truncated.
+    fn test_fetch_second_instruction() -> MockResult {
+        let target_rax: u64 = 0x1234567812345678;
+        let ip: u64 = 0x1000;
+        let cpu_id = 0;
+        let memory = [
+            // Code at IP
+            0x48, 0xc7, 0xc0, 0x00, 0x10, 0x00, 0x00, // mov rax, 0x1000
+            0x48, 0x8b, 0x58, 0x10, // mov rbx, qword ptr [rax+10h]
+            // Padding
+            0x00, 0x00, 0x00, 0x00, 0x00, // Padding is all zeroes
+            // Data at IP + 0x10 (0x1234567812345678 in LE)
+            0x78, 0x56, 0x34, 0x12, 0x78, 0x56, 0x34, 0x12,
+        ];
+        let insn = [
+            0x48, 0xc7, 0xc0, 0x00, 0x10, 0x00, 0x00, // mov rax, 0x1000
+            0x48, 0x8b, // Truncated mov rbx, qword ptr [rax+10h] -- missing [0x58, 0x10]
+        ];
+
+        let mut vmm = MockVMM::new(ip, hashmap![], Some((ip, &memory)));
+        vmm.emulate_insn(cpu_id, &insn, Some(2));
+
+        let rbx: u64 = vmm
+            .cpu_state(cpu_id)
+            .unwrap()
+            .read_reg(Register::RBX)
+            .unwrap();
+        assert_eq!(rbx, target_rax);
+
+        Ok(())
     }
 }