// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE-BSD-3-Clause file. // // Copyright © 2020 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause #[macro_use] extern crate log; pub mod async_io; pub mod fixed_vhd; #[cfg(feature = "io_uring")] /// Enabled with the `"io_uring"` feature pub mod fixed_vhd_async; pub mod fixed_vhd_sync; pub mod qcow; pub mod qcow_sync; #[cfg(feature = "io_uring")] /// Async primitives based on `io-uring` /// /// Enabled with the `"io_uring"` feature pub mod raw_async; pub mod raw_sync; pub mod vhd; pub mod vhdx; pub mod vhdx_sync; use crate::async_io::{AsyncIo, AsyncIoError, AsyncIoResult}; use crate::fixed_vhd::FixedVhd; use crate::qcow::{QcowFile, RawFile}; use crate::vhdx::{Vhdx, VhdxError}; #[cfg(feature = "io_uring")] use io_uring::{opcode, IoUring, Probe}; use libc::{ioctl, S_IFBLK, S_IFMT}; use smallvec::SmallVec; use std::alloc::{alloc_zeroed, dealloc, Layout}; use std::cmp; use std::collections::VecDeque; use std::convert::TryInto; use std::fmt::Debug; use std::fs::File; use std::io::{self, IoSlice, IoSliceMut, Read, Seek, SeekFrom, Write}; use std::os::linux::fs::MetadataExt; use std::os::unix::io::AsRawFd; use std::path::Path; use std::result; use std::sync::Arc; use std::sync::MutexGuard; use std::time::Instant; use thiserror::Error; use versionize::{VersionMap, Versionize, VersionizeResult}; use versionize_derive::Versionize; use virtio_bindings::virtio_blk::*; use virtio_queue::DescriptorChain; use vm_memory::{ bitmap::AtomicBitmap, bitmap::Bitmap, ByteValued, Bytes, GuestAddress, GuestMemory, GuestMemoryError, GuestMemoryLoadGuard, }; use vm_virtio::{AccessPlatform, Translatable}; use vmm_sys_util::eventfd::EventFd; use vmm_sys_util::{ioctl_io_nr, ioctl_ioc_nr}; type GuestMemoryMmap = vm_memory::GuestMemoryMmap; const SECTOR_SHIFT: u8 = 9; pub const SECTOR_SIZE: u64 = 0x01 << SECTOR_SHIFT; #[derive(Error, Debug)] pub enum Error { #[error("Guest gave us bad memory addresses")] GuestMemory(GuestMemoryError), #[error("Guest gave us offsets that would have overflowed a usize")] CheckedOffset(GuestAddress, usize), #[error("Guest gave us a write only descriptor that protocol says to read from")] UnexpectedWriteOnlyDescriptor, #[error("Guest gave us a read only descriptor that protocol says to write to")] UnexpectedReadOnlyDescriptor, #[error("Guest gave us too few descriptors in a descriptor chain")] DescriptorChainTooShort, #[error("Guest gave us a descriptor that was too short to use")] DescriptorLengthTooSmall, #[error("Failed to detect image type: {0}")] DetectImageType(std::io::Error), #[error("Failure in fixed vhd: {0}")] FixedVhdError(std::io::Error), #[error("Getting a block's metadata fails for any reason")] GetFileMetadata, #[error("The requested operation would cause a seek beyond disk end")] InvalidOffset, #[error("Failure in qcow: {0}")] QcowError(qcow::Error), #[error("Failure in raw file: {0}")] RawFileError(std::io::Error), #[error("The requested operation does not support multiple descriptors")] TooManyDescriptors, #[error("Failure in vhdx: {0}")] VhdxError(VhdxError), } fn build_device_id(disk_path: &Path) -> result::Result { let blk_metadata = match disk_path.metadata() { Err(_) => return Err(Error::GetFileMetadata), Ok(m) => m, }; // This is how kvmtool does it. let device_id = format!( "{}{}{}", blk_metadata.st_dev(), blk_metadata.st_rdev(), blk_metadata.st_ino() ); Ok(device_id) } pub fn build_serial(disk_path: &Path) -> Vec { let mut default_serial = vec![0; VIRTIO_BLK_ID_BYTES as usize]; match build_device_id(disk_path) { Err(_) => { warn!("Could not generate device id. We'll use a default."); } Ok(m) => { // The kernel only knows to read a maximum of VIRTIO_BLK_ID_BYTES. // This will also zero out any leftover bytes. let disk_id = m.as_bytes(); let bytes_to_copy = cmp::min(disk_id.len(), VIRTIO_BLK_ID_BYTES as usize); default_serial[..bytes_to_copy].clone_from_slice(&disk_id[..bytes_to_copy]) } } default_serial } #[derive(Error, Debug)] pub enum ExecuteError { #[error("Bad request: {0}")] BadRequest(Error), #[error("Failed to flush: {0}")] Flush(io::Error), #[error("Failed to read: {0}")] Read(GuestMemoryError), #[error("Failed to seek: {0}")] Seek(io::Error), #[error("Failed to write: {0}")] Write(GuestMemoryError), #[error("Unsupported request: {0}")] Unsupported(u32), #[error("Failed to submit io uring: {0}")] SubmitIoUring(io::Error), #[error("Failed to get guest address: {0}")] GetHostAddress(GuestMemoryError), #[error("Failed to async read: {0}")] AsyncRead(AsyncIoError), #[error("Failed to async write: {0}")] AsyncWrite(AsyncIoError), #[error("failed to async flush: {0}")] AsyncFlush(AsyncIoError), #[error("Failed allocating a temporary buffer: {0}")] TemporaryBufferAllocation(io::Error), } impl ExecuteError { pub fn status(&self) -> u32 { match *self { ExecuteError::BadRequest(_) => VIRTIO_BLK_S_IOERR, ExecuteError::Flush(_) => VIRTIO_BLK_S_IOERR, ExecuteError::Read(_) => VIRTIO_BLK_S_IOERR, ExecuteError::Seek(_) => VIRTIO_BLK_S_IOERR, ExecuteError::Write(_) => VIRTIO_BLK_S_IOERR, ExecuteError::Unsupported(_) => VIRTIO_BLK_S_UNSUPP, ExecuteError::SubmitIoUring(_) => VIRTIO_BLK_S_IOERR, ExecuteError::GetHostAddress(_) => VIRTIO_BLK_S_IOERR, ExecuteError::AsyncRead(_) => VIRTIO_BLK_S_IOERR, ExecuteError::AsyncWrite(_) => VIRTIO_BLK_S_IOERR, ExecuteError::AsyncFlush(_) => VIRTIO_BLK_S_IOERR, ExecuteError::TemporaryBufferAllocation(_) => VIRTIO_BLK_S_IOERR, } } } #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum RequestType { In, Out, Flush, GetDeviceId, Unsupported(u32), } pub fn request_type( mem: &GuestMemoryMmap, desc_addr: GuestAddress, ) -> result::Result { let type_ = mem.read_obj(desc_addr).map_err(Error::GuestMemory)?; match type_ { VIRTIO_BLK_T_IN => Ok(RequestType::In), VIRTIO_BLK_T_OUT => Ok(RequestType::Out), VIRTIO_BLK_T_FLUSH => Ok(RequestType::Flush), VIRTIO_BLK_T_GET_ID => Ok(RequestType::GetDeviceId), t => Ok(RequestType::Unsupported(t)), } } fn sector(mem: &GuestMemoryMmap, desc_addr: GuestAddress) -> result::Result { const SECTOR_OFFSET: usize = 8; let addr = match mem.checked_offset(desc_addr, SECTOR_OFFSET) { Some(v) => v, None => return Err(Error::CheckedOffset(desc_addr, SECTOR_OFFSET)), }; mem.read_obj(addr).map_err(Error::GuestMemory) } #[derive(Debug)] pub struct AlignedOperation { origin_ptr: u64, aligned_ptr: u64, size: usize, layout: Layout, } #[derive(Debug)] pub struct Request { pub request_type: RequestType, pub sector: u64, pub data_descriptors: SmallVec<[(GuestAddress, u32); 1]>, pub status_addr: GuestAddress, pub writeback: bool, pub aligned_operations: SmallVec<[AlignedOperation; 1]>, pub start: Instant, } impl Request { pub fn parse( desc_chain: &mut DescriptorChain>, access_platform: Option<&Arc>, ) -> result::Result { let hdr_desc = desc_chain .next() .ok_or(Error::DescriptorChainTooShort) .map_err(|e| { error!("Missing head descriptor"); e })?; // The head contains the request type which MUST be readable. if hdr_desc.is_write_only() { return Err(Error::UnexpectedWriteOnlyDescriptor); } let hdr_desc_addr = hdr_desc .addr() .translate_gva(access_platform, hdr_desc.len() as usize); let mut req = Request { request_type: request_type(desc_chain.memory(), hdr_desc_addr)?, sector: sector(desc_chain.memory(), hdr_desc_addr)?, data_descriptors: SmallVec::with_capacity(1), status_addr: GuestAddress(0), writeback: true, aligned_operations: SmallVec::with_capacity(1), start: Instant::now(), }; let status_desc; let mut desc = desc_chain .next() .ok_or(Error::DescriptorChainTooShort) .map_err(|e| { error!("Only head descriptor present: request = {:?}", req); e })?; if !desc.has_next() { status_desc = desc; // Only flush requests are allowed to skip the data descriptor. if req.request_type != RequestType::Flush { error!("Need a data descriptor: request = {:?}", req); return Err(Error::DescriptorChainTooShort); } } else { req.data_descriptors.reserve_exact(1); while desc.has_next() { if desc.is_write_only() && req.request_type == RequestType::Out { return Err(Error::UnexpectedWriteOnlyDescriptor); } if !desc.is_write_only() && req.request_type == RequestType::In { return Err(Error::UnexpectedReadOnlyDescriptor); } if !desc.is_write_only() && req.request_type == RequestType::GetDeviceId { return Err(Error::UnexpectedReadOnlyDescriptor); } req.data_descriptors.push(( desc.addr() .translate_gva(access_platform, desc.len() as usize), desc.len(), )); desc = desc_chain .next() .ok_or(Error::DescriptorChainTooShort) .map_err(|e| { error!("DescriptorChain corrupted: request = {:?}", req); e })?; } status_desc = desc; } // The status MUST always be writable. if !status_desc.is_write_only() { return Err(Error::UnexpectedReadOnlyDescriptor); } if status_desc.len() < 1 { return Err(Error::DescriptorLengthTooSmall); } req.status_addr = status_desc .addr() .translate_gva(access_platform, status_desc.len() as usize); Ok(req) } pub fn execute( &self, disk: &mut T, disk_nsectors: u64, mem: &GuestMemoryMmap, serial: &[u8], ) -> result::Result { disk.seek(SeekFrom::Start(self.sector << SECTOR_SHIFT)) .map_err(ExecuteError::Seek)?; let mut len = 0; for (data_addr, data_len) in &self.data_descriptors { let mut top: u64 = u64::from(*data_len) / SECTOR_SIZE; if u64::from(*data_len) % SECTOR_SIZE != 0 { top += 1; } top = top .checked_add(self.sector) .ok_or(ExecuteError::BadRequest(Error::InvalidOffset))?; if top > disk_nsectors { return Err(ExecuteError::BadRequest(Error::InvalidOffset)); } match self.request_type { RequestType::In => { mem.read_exact_from(*data_addr, disk, *data_len as usize) .map_err(ExecuteError::Read)?; len += data_len; } RequestType::Out => { mem.write_all_to(*data_addr, disk, *data_len as usize) .map_err(ExecuteError::Write)?; if !self.writeback { disk.flush().map_err(ExecuteError::Flush)?; } } RequestType::Flush => disk.flush().map_err(ExecuteError::Flush)?, RequestType::GetDeviceId => { if (*data_len as usize) < serial.len() { return Err(ExecuteError::BadRequest(Error::InvalidOffset)); } mem.write_slice(serial, *data_addr) .map_err(ExecuteError::Write)?; } RequestType::Unsupported(t) => return Err(ExecuteError::Unsupported(t)), }; } Ok(len) } pub fn execute_async( &mut self, mem: &GuestMemoryMmap, disk_nsectors: u64, disk_image: &mut dyn AsyncIo, serial: &[u8], user_data: u64, ) -> result::Result { let sector = self.sector; let request_type = self.request_type; let offset = (sector << SECTOR_SHIFT) as libc::off_t; let mut iovecs: SmallVec<[libc::iovec; 1]> = SmallVec::with_capacity(self.data_descriptors.len()); for (data_addr, data_len) in &self.data_descriptors { if *data_len == 0 { continue; } let mut top: u64 = u64::from(*data_len) / SECTOR_SIZE; if u64::from(*data_len) % SECTOR_SIZE != 0 { top += 1; } top = top .checked_add(sector) .ok_or(ExecuteError::BadRequest(Error::InvalidOffset))?; if top > disk_nsectors { return Err(ExecuteError::BadRequest(Error::InvalidOffset)); } let origin_ptr = mem .get_slice(*data_addr, *data_len as usize) .map_err(ExecuteError::GetHostAddress)? .ptr_guard(); // Verify the buffer alignment. // In case it's not properly aligned, an intermediate buffer is // created with the correct alignment, and a copy from/to the // origin buffer is performed, depending on the type of operation. let iov_base = if (origin_ptr.as_ptr() as u64) % SECTOR_SIZE != 0 { let layout = Layout::from_size_align(*data_len as usize, SECTOR_SIZE as usize).unwrap(); // SAFETY: layout has non-zero size let aligned_ptr = unsafe { alloc_zeroed(layout) }; if aligned_ptr.is_null() { return Err(ExecuteError::TemporaryBufferAllocation( io::Error::last_os_error(), )); } // We need to perform the copy beforehand in case we're writing // data out. if request_type == RequestType::Out { // SAFETY: destination buffer has been allocated with // the proper size. unsafe { std::ptr::copy(origin_ptr.as_ptr(), aligned_ptr, *data_len as usize) }; } // Store both origin and aligned pointers for complete_async() // to process them. self.aligned_operations.push(AlignedOperation { origin_ptr: origin_ptr.as_ptr() as u64, aligned_ptr: aligned_ptr as u64, size: *data_len as usize, layout, }); aligned_ptr as *mut libc::c_void } else { origin_ptr.as_ptr() as *mut libc::c_void }; let iovec = libc::iovec { iov_base, iov_len: *data_len as libc::size_t, }; iovecs.push(iovec); } // Queue operations expected to be submitted. match request_type { RequestType::In => { for (data_addr, data_len) in &self.data_descriptors { mem.get_slice(*data_addr, *data_len as usize) .map_err(ExecuteError::GetHostAddress)? .bitmap() .mark_dirty(0, *data_len as usize); } disk_image .read_vectored(offset, &iovecs, user_data) .map_err(ExecuteError::AsyncRead)?; } RequestType::Out => { disk_image .write_vectored(offset, &iovecs, user_data) .map_err(ExecuteError::AsyncWrite)?; } RequestType::Flush => { disk_image .fsync(Some(user_data)) .map_err(ExecuteError::AsyncFlush)?; } RequestType::GetDeviceId => { let (data_addr, data_len) = if self.data_descriptors.len() == 1 { (self.data_descriptors[0].0, self.data_descriptors[0].1) } else { return Err(ExecuteError::BadRequest(Error::TooManyDescriptors)); }; if (data_len as usize) < serial.len() { return Err(ExecuteError::BadRequest(Error::InvalidOffset)); } mem.write_slice(serial, data_addr) .map_err(ExecuteError::Write)?; return Ok(false); } RequestType::Unsupported(t) => return Err(ExecuteError::Unsupported(t)), } Ok(true) } pub fn complete_async(&mut self) -> result::Result<(), Error> { for aligned_operation in self.aligned_operations.drain(..) { // We need to perform the copy after the data has been read inside // the aligned buffer in case we're reading data in. if self.request_type == RequestType::In { // SAFETY: origin buffer has been allocated with the // proper size. unsafe { std::ptr::copy( aligned_operation.aligned_ptr as *const u8, aligned_operation.origin_ptr as *mut u8, aligned_operation.size, ) }; } // Free the temporary aligned buffer. // SAFETY: aligned_ptr was allocated by alloc_zeroed with the same // layout unsafe { dealloc( aligned_operation.aligned_ptr as *mut u8, aligned_operation.layout, ) }; } Ok(()) } pub fn set_writeback(&mut self, writeback: bool) { self.writeback = writeback } } #[derive(Copy, Clone, Debug, Default, Versionize)] #[repr(C, packed)] pub struct VirtioBlockConfig { pub capacity: u64, pub size_max: u32, pub seg_max: u32, pub geometry: VirtioBlockGeometry, pub blk_size: u32, pub physical_block_exp: u8, pub alignment_offset: u8, pub min_io_size: u16, pub opt_io_size: u32, pub writeback: u8, pub unused: u8, pub num_queues: u16, pub max_discard_sectors: u32, pub max_discard_seg: u32, pub discard_sector_alignment: u32, pub max_write_zeroes_sectors: u32, pub max_write_zeroes_seg: u32, pub write_zeroes_may_unmap: u8, pub unused1: [u8; 3], } #[derive(Copy, Clone, Debug, Default, Versionize)] #[repr(C, packed)] pub struct VirtioBlockGeometry { pub cylinders: u16, pub heads: u8, pub sectors: u8, } // SAFETY: data structure only contain a series of integers unsafe impl ByteValued for VirtioBlockConfig {} // SAFETY: data structure only contain a series of integers unsafe impl ByteValued for VirtioBlockGeometry {} /// Check if io_uring for block device can be used on the current system, as /// it correctly supports the expected io_uring features. pub fn block_io_uring_is_supported() -> bool { #[cfg(not(feature = "io_uring"))] { info!("io_uring is disabled by crate features"); false } #[cfg(feature = "io_uring")] { let error_msg = "io_uring not supported:"; // Check we can create an io_uring instance, which effectively verifies // that io_uring_setup() syscall is supported. let io_uring = match IoUring::new(1) { Ok(io_uring) => io_uring, Err(e) => { info!("{} failed to create io_uring instance: {}", error_msg, e); return false; } }; let submitter = io_uring.submitter(); let mut probe = Probe::new(); // Check we can register a probe to validate supported operations. match submitter.register_probe(&mut probe) { Ok(_) => {} Err(e) => { info!("{} failed to register a probe: {}", error_msg, e); return false; } } // Check IORING_OP_FSYNC is supported if !probe.is_supported(opcode::Fsync::CODE) { info!("{} IORING_OP_FSYNC operation not supported", error_msg); return false; } // Check IORING_OP_READV is supported if !probe.is_supported(opcode::Readv::CODE) { info!("{} IORING_OP_READV operation not supported", error_msg); return false; } // Check IORING_OP_WRITEV is supported if !probe.is_supported(opcode::Writev::CODE) { info!("{} IORING_OP_WRITEV operation not supported", error_msg); return false; } true } } pub trait AsyncAdaptor where F: Read + Write + Seek, { fn read_vectored_sync( &mut self, offset: libc::off_t, iovecs: &[libc::iovec], user_data: u64, eventfd: &EventFd, completion_list: &mut VecDeque<(u64, i32)>, ) -> AsyncIoResult<()> { // Convert libc::iovec into IoSliceMut let mut slices: SmallVec<[IoSliceMut; 1]> = SmallVec::with_capacity(iovecs.len()); for iovec in iovecs.iter() { // SAFETY: on Linux IoSliceMut wraps around libc::iovec slices.push(IoSliceMut::new(unsafe { std::mem::transmute(*iovec) })); } let result = { let mut file = self.file(); // Move the cursor to the right offset file.seek(SeekFrom::Start(offset as u64)) .map_err(AsyncIoError::ReadVectored)?; // Read vectored file.read_vectored(slices.as_mut_slice()) .map_err(AsyncIoError::ReadVectored)? }; completion_list.push_back((user_data, result as i32)); eventfd.write(1).unwrap(); Ok(()) } fn write_vectored_sync( &mut self, offset: libc::off_t, iovecs: &[libc::iovec], user_data: u64, eventfd: &EventFd, completion_list: &mut VecDeque<(u64, i32)>, ) -> AsyncIoResult<()> { // Convert libc::iovec into IoSlice let mut slices: SmallVec<[IoSlice; 1]> = SmallVec::with_capacity(iovecs.len()); for iovec in iovecs.iter() { // SAFETY: on Linux IoSlice wraps around libc::iovec slices.push(IoSlice::new(unsafe { std::mem::transmute(*iovec) })); } let result = { let mut file = self.file(); // Move the cursor to the right offset file.seek(SeekFrom::Start(offset as u64)) .map_err(AsyncIoError::WriteVectored)?; // Write vectored file.write_vectored(slices.as_slice()) .map_err(AsyncIoError::WriteVectored)? }; completion_list.push_back((user_data, result as i32)); eventfd.write(1).unwrap(); Ok(()) } fn fsync_sync( &mut self, user_data: Option, eventfd: &EventFd, completion_list: &mut VecDeque<(u64, i32)>, ) -> AsyncIoResult<()> { let result: i32 = { let mut file = self.file(); // Flush file.flush().map_err(AsyncIoError::Fsync)?; 0 }; if let Some(user_data) = user_data { completion_list.push_back((user_data, result)); eventfd.write(1).unwrap(); } Ok(()) } fn file(&mut self) -> MutexGuard; } pub enum ImageType { FixedVhd, Qcow2, Raw, Vhdx, } const QCOW_MAGIC: u32 = 0x5146_49fb; const VHDX_SIGN: u64 = 0x656C_6966_7864_6876; /// Read a block into memory aligned by the source block size (needed for O_DIRECT) pub fn read_aligned_block_size(f: &mut File) -> std::io::Result> { let blocksize = DiskTopology::probe(f)?.logical_block_size as usize; // SAFETY: We are allocating memory that is naturally aligned (size = alignment) and we meet // requirements for safety from Vec::from_raw_parts() as we are using the global allocator // and transferring ownership of the memory. let mut data = unsafe { Vec::from_raw_parts( alloc_zeroed(Layout::from_size_align_unchecked(blocksize, blocksize)), blocksize, blocksize, ) }; f.read_exact(&mut data)?; Ok(data) } /// Determine image type through file parsing. pub fn detect_image_type(f: &mut File) -> std::io::Result { let block = read_aligned_block_size(f)?; // Check 4 first bytes to get the header value and determine the image type let image_type = if u32::from_be_bytes(block[0..4].try_into().unwrap()) == QCOW_MAGIC { ImageType::Qcow2 } else if vhd::is_fixed_vhd(f)? { ImageType::FixedVhd } else if u64::from_le_bytes(block[0..8].try_into().unwrap()) == VHDX_SIGN { ImageType::Vhdx } else { ImageType::Raw }; Ok(image_type) } pub trait BlockBackend: Read + Write + Seek + Send + Debug { fn size(&self) -> Result; } /// Inspect the image file type and create an appropriate disk file to match it. pub fn create_disk_file(mut file: File, direct_io: bool) -> Result, Error> { let image_type = detect_image_type(&mut file).map_err(Error::DetectImageType)?; Ok(match image_type { ImageType::Qcow2 => { Box::new(QcowFile::from(RawFile::new(file, direct_io)).map_err(Error::QcowError)?) as Box } ImageType::FixedVhd => { Box::new(FixedVhd::new(file).map_err(Error::FixedVhdError)?) as Box } ImageType::Vhdx => { Box::new(Vhdx::new(file).map_err(Error::VhdxError)?) as Box } ImageType::Raw => Box::new(RawFile::new(file, direct_io)) as Box, }) } #[derive(Debug)] pub struct DiskTopology { pub logical_block_size: u64, pub physical_block_size: u64, pub minimum_io_size: u64, pub optimal_io_size: u64, } impl Default for DiskTopology { fn default() -> Self { Self { logical_block_size: 512, physical_block_size: 512, minimum_io_size: 512, optimal_io_size: 0, } } } ioctl_io_nr!(BLKSSZGET, 0x12, 104); ioctl_io_nr!(BLKPBSZGET, 0x12, 123); ioctl_io_nr!(BLKIOMIN, 0x12, 120); ioctl_io_nr!(BLKIOOPT, 0x12, 121); enum BlockSize { LogicalBlock, PhysicalBlock, MinimumIo, OptimalIo, } impl DiskTopology { fn is_block_device(f: &File) -> std::io::Result { let mut stat = std::mem::MaybeUninit::::uninit(); // SAFETY: FFI call with a valid fd and buffer let ret = unsafe { libc::fstat(f.as_raw_fd(), stat.as_mut_ptr()) }; if ret != 0 { return Err(std::io::Error::last_os_error()); } // SAFETY: stat is valid at this point let is_block = unsafe { (*stat.as_ptr()).st_mode & S_IFMT == S_IFBLK }; Ok(is_block) } // libc::ioctl() takes different types on different architectures fn query_block_size(f: &File, block_size_type: BlockSize) -> std::io::Result { let mut block_size = 0; // SAFETY: FFI call with correct arguments let ret = unsafe { ioctl( f.as_raw_fd(), match block_size_type { BlockSize::LogicalBlock => BLKSSZGET(), BlockSize::PhysicalBlock => BLKPBSZGET(), BlockSize::MinimumIo => BLKIOMIN(), BlockSize::OptimalIo => BLKIOOPT(), } as _, &mut block_size, ) }; if ret != 0 { return Err(std::io::Error::last_os_error()); }; Ok(block_size) } pub fn probe(f: &File) -> std::io::Result { if !Self::is_block_device(f)? { return Ok(DiskTopology::default()); } Ok(DiskTopology { logical_block_size: Self::query_block_size(f, BlockSize::LogicalBlock)?, physical_block_size: Self::query_block_size(f, BlockSize::PhysicalBlock)?, minimum_io_size: Self::query_block_size(f, BlockSize::MinimumIo)?, optimal_io_size: Self::query_block_size(f, BlockSize::OptimalIo)?, }) } }