diff --git a/Cargo.lock b/Cargo.lock index 77be0f7a5..67ff80520 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -141,6 +141,33 @@ dependencies = [ "vmm-sys-util 0.1.0 (git+https://github.com/sameo/vmm-sys-util)", ] +[[package]] +name = "proc-macro2" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "qcow" +version = "0.1.0" +dependencies = [ + "byteorder 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.48 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", + "remain 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "vmm-sys-util 0.1.0 (git+https://github.com/sameo/vmm-sys-util)", +] + +[[package]] +name = "quote" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro2 0.4.29 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "redox_syscall" version = "0.1.51" @@ -154,11 +181,31 @@ dependencies = [ "redox_syscall 0.1.51 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "remain" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro2 0.4.29 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 0.15.33 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "strsim" version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "syn" +version = "0.15.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro2 0.4.29 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "termion" version = "1.5.1" @@ -182,6 +229,11 @@ name = "unicode-width" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "unicode-xid" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "vec_map" version = "0.8.1" @@ -215,6 +267,7 @@ dependencies = [ "libc 0.2.48 (registry+https://github.com/rust-lang/crates.io-index)", "linux-loader 0.1.0 (git+https://github.com/sameo/linux-loader)", "pci 0.1.0", + "qcow 0.1.0", "vm-memory 0.1.0 (git+https://github.com/rust-vmm/vm-memory)", "vmm-sys-util 0.1.0 (git+https://github.com/sameo/vmm-sys-util)", ] @@ -260,12 +313,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum libc 0.2.48 (registry+https://github.com/rust-lang/crates.io-index)" = "e962c7641008ac010fa60a7dfdc1712449f29c44ef2d4702394aea943ee75047" "checksum linux-loader 0.1.0 (git+https://github.com/sameo/linux-loader)" = "" "checksum log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c84ec4b527950aa83a329754b01dbe3f58361d1c5efacd1f6d68c494d08a17c6" +"checksum proc-macro2 0.4.29 (registry+https://github.com/rust-lang/crates.io-index)" = "64c827cea7a7ab30ce4593e5e04d7a11617ad6ece2fa230605a78b00ff965316" +"checksum quote 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)" = "faf4799c5d274f3868a4aae320a0a182cbd2baee377b378f080e16a23e9d80db" "checksum redox_syscall 0.1.51 (registry+https://github.com/rust-lang/crates.io-index)" = "423e376fffca3dfa06c9e9790a9ccd282fafb3cc6e6397d01dbf64f9bacc6b85" "checksum redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76" +"checksum remain 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "3bec2543b50be4539fdc27fde082e218cf4c3895358ca77f5c52fe930589e209" "checksum strsim 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b4d15c810519a91cf877e7e36e63fe068815c678181439f2f29e2562147c3694" +"checksum syn 0.15.33 (registry+https://github.com/rust-lang/crates.io-index)" = "ec52cd796e5f01d0067225a5392e70084acc4c0013fa71d55166d38a8b307836" "checksum termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "689a3bdfaab439fd92bc87df5c4c78417d3cbe537487274e9b0b2dce76e92096" "checksum textwrap 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c0b59b6b4b44d867f1370ef1bd91bfb262bf07bf0ae65c202ea2fbc16153b693" "checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526" +"checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" "checksum vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a" "checksum vm-memory 0.1.0 (git+https://github.com/rust-vmm/vm-memory)" = "" "checksum vmm-sys-util 0.1.0 (git+https://github.com/sameo/vmm-sys-util)" = "" diff --git a/qcow/Cargo.toml b/qcow/Cargo.toml new file mode 100755 index 000000000..83d808b13 --- /dev/null +++ b/qcow/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "qcow" +version = "0.1.0" +authors = ["The Chromium OS Authors"] +edition = "2018" +license = "BSD-3-Clause" + +[lib] +path = "src/qcow.rs" + +[dependencies] +byteorder = "*" +libc = "*" +log = "*" +remain = "*" +vmm-sys-util = { git = "https://github.com/sameo/vmm-sys-util" } diff --git a/qcow/src/qcow.rs b/qcow/src/qcow.rs new file mode 100755 index 000000000..aa2388cb2 --- /dev/null +++ b/qcow/src/qcow.rs @@ -0,0 +1,2351 @@ +// Copyright 2018 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#[macro_use] +extern crate log; + +mod qcow_raw_file; +mod refcount; +mod vec_cache; + +use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; +use libc::{EINVAL, ENOSPC, ENOTSUP}; +use remain::sorted; +use vmm_sys_util::{FileSetLen, FileSync, PunchHole, SeekHole, WriteZeroes}; + +use std::cmp::min; +use std::fmt::{self, Display}; +use std::fs::File; +use std::io::{self, Read, Seek, SeekFrom, Write}; +use std::mem::size_of; +use std::os::unix::io::{AsRawFd, RawFd}; + +use crate::qcow_raw_file::QcowRawFile; +use crate::refcount::RefCount; +use crate::vec_cache::{CacheMap, Cacheable, VecCache}; + +#[sorted] +#[derive(Debug)] +pub enum Error { + BackingFilesNotSupported, + CompressedBlocksNotSupported, + EvictingCache(io::Error), + GettingFileSize(io::Error), + GettingRefcount(refcount::Error), + InvalidClusterIndex, + InvalidClusterSize, + InvalidIndex, + InvalidL1TableOffset, + InvalidMagic, + InvalidOffset(u64), + InvalidRefcountTableOffset, + InvalidRefcountTableSize, + NoFreeClusters, + NoRefcountClusters, + OpeningFile(io::Error), + ReadingData(io::Error), + ReadingHeader(io::Error), + ReadingPointers(io::Error), + ReadingRefCountBlock(refcount::Error), + ReadingRefCounts(io::Error), + RebuildingRefCounts(io::Error), + SeekingFile(io::Error), + SettingFileSize(io::Error), + SettingRefcountRefcount(io::Error), + SizeTooSmallForNumberOfClusters, + UnsupportedRefcountOrder, + UnsupportedVersion(u32), + WritingData(io::Error), + WritingHeader(io::Error), +} + +pub type Result = std::result::Result; + +impl Display for Error { + #[remain::check] + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use self::Error::*; + + #[sorted] + match self { + BackingFilesNotSupported => write!(f, "backing files not supported"), + CompressedBlocksNotSupported => write!(f, "compressed blocks not supported"), + EvictingCache(e) => write!(f, "failed to evict cache: {}", e), + GettingFileSize(e) => write!(f, "failed to get file size: {}", e), + GettingRefcount(e) => write!(f, "failed to get refcount: {}", e), + InvalidClusterIndex => write!(f, "invalid cluster index"), + InvalidClusterSize => write!(f, "invalid cluster size"), + InvalidIndex => write!(f, "invalid index"), + InvalidL1TableOffset => write!(f, "invalid L1 table offset"), + InvalidMagic => write!(f, "invalid magic"), + InvalidOffset(_) => write!(f, "invalid offset"), + InvalidRefcountTableOffset => write!(f, "invalid refcount table offset"), + InvalidRefcountTableSize => write!(f, "invalid refcount table size"), + NoFreeClusters => write!(f, "no free clusters"), + NoRefcountClusters => write!(f, "no refcount clusters"), + OpeningFile(e) => write!(f, "failed to open file: {}", e), + ReadingData(e) => write!(f, "failed to read data: {}", e), + ReadingHeader(e) => write!(f, "failed to read header: {}", e), + ReadingPointers(e) => write!(f, "failed to read pointers: {}", e), + ReadingRefCountBlock(e) => write!(f, "failed to read ref count block: {}", e), + ReadingRefCounts(e) => write!(f, "failed to read ref counts: {}", e), + RebuildingRefCounts(e) => write!(f, "failed to rebuild ref counts: {}", e), + SeekingFile(e) => write!(f, "failed to seek file: {}", e), + SettingFileSize(e) => write!(f, "failed to set file size: {}", e), + SettingRefcountRefcount(e) => write!(f, "failed to set refcount refcount: {}", e), + SizeTooSmallForNumberOfClusters => write!(f, "size too small for number of clusters"), + UnsupportedRefcountOrder => write!(f, "unsupported refcount order"), + UnsupportedVersion(v) => write!(f, "unsupported version: {}", v), + WritingData(e) => write!(f, "failed to write data: {}", e), + WritingHeader(e) => write!(f, "failed to write header: {}", e), + } + } +} + +pub enum ImageType { + Raw, + Qcow2, +} + +// QCOW magic constant that starts the header. +const QCOW_MAGIC: u32 = 0x5146_49fb; +// Default to a cluster size of 2^DEFAULT_CLUSTER_BITS +const DEFAULT_CLUSTER_BITS: u32 = 16; +const MAX_CLUSTER_BITS: u32 = 30; +// Only support 2 byte refcounts, 2^refcount_order bits. +const DEFAULT_REFCOUNT_ORDER: u32 = 4; + +const V3_BARE_HEADER_SIZE: u32 = 104; + +// bits 0-8 and 56-63 are reserved. +const L1_TABLE_OFFSET_MASK: u64 = 0x00ff_ffff_ffff_fe00; +const L2_TABLE_OFFSET_MASK: u64 = 0x00ff_ffff_ffff_fe00; +// Flags +const COMPRESSED_FLAG: u64 = 1 << 62; +const CLUSTER_USED_FLAG: u64 = 1 << 63; +const COMPATIBLE_FEATURES_LAZY_REFCOUNTS: u64 = 1; + +/// Contains the information from the header of a qcow file. +#[derive(Copy, Clone, Debug)] +pub struct QcowHeader { + pub magic: u32, + pub version: u32, + + pub backing_file_offset: u64, + pub backing_file_size: u32, + + pub cluster_bits: u32, + pub size: u64, + pub crypt_method: u32, + + pub l1_size: u32, + pub l1_table_offset: u64, + + pub refcount_table_offset: u64, + pub refcount_table_clusters: u32, + + pub nb_snapshots: u32, + pub snapshots_offset: u64, + + // v3 entries + pub incompatible_features: u64, + pub compatible_features: u64, + pub autoclear_features: u64, + pub refcount_order: u32, + pub header_size: u32, +} + +impl QcowHeader { + /// Creates a QcowHeader from a reference to a file. + pub fn new(f: &mut File) -> Result { + f.seek(SeekFrom::Start(0)).map_err(Error::ReadingHeader)?; + let magic = f.read_u32::().map_err(Error::ReadingHeader)?; + if magic != QCOW_MAGIC { + return Err(Error::InvalidMagic); + } + + // Reads the next u32 from the file. + fn read_u32_from_file(f: &mut File) -> Result { + f.read_u32::().map_err(Error::ReadingHeader) + } + + // Reads the next u64 from the file. + fn read_u64_from_file(f: &mut File) -> Result { + f.read_u64::().map_err(Error::ReadingHeader) + } + + Ok(QcowHeader { + magic, + version: read_u32_from_file(f)?, + backing_file_offset: read_u64_from_file(f)?, + backing_file_size: read_u32_from_file(f)?, + cluster_bits: read_u32_from_file(f)?, + size: read_u64_from_file(f)?, + crypt_method: read_u32_from_file(f)?, + l1_size: read_u32_from_file(f)?, + l1_table_offset: read_u64_from_file(f)?, + refcount_table_offset: read_u64_from_file(f)?, + refcount_table_clusters: read_u32_from_file(f)?, + nb_snapshots: read_u32_from_file(f)?, + snapshots_offset: read_u64_from_file(f)?, + incompatible_features: read_u64_from_file(f)?, + compatible_features: read_u64_from_file(f)?, + autoclear_features: read_u64_from_file(f)?, + refcount_order: read_u32_from_file(f)?, + header_size: read_u32_from_file(f)?, + }) + } + + /// Create a header for the given `size`. + pub fn create_for_size(size: u64) -> QcowHeader { + let cluster_bits: u32 = DEFAULT_CLUSTER_BITS; + let cluster_size: u32 = 0x01 << cluster_bits; + // L2 blocks are always one cluster long. They contain cluster_size/sizeof(u64) addresses. + let l2_size: u32 = cluster_size / size_of::() as u32; + let num_clusters: u32 = div_round_up_u64(size, u64::from(cluster_size)) as u32; + let num_l2_clusters: u32 = div_round_up_u32(num_clusters, l2_size); + let l1_clusters: u32 = div_round_up_u32(num_l2_clusters, cluster_size); + let header_clusters = div_round_up_u32(size_of::() as u32, cluster_size); + QcowHeader { + magic: QCOW_MAGIC, + version: 3, + backing_file_offset: 0, + backing_file_size: 0, + cluster_bits: DEFAULT_CLUSTER_BITS, + size, + crypt_method: 0, + l1_size: num_l2_clusters, + l1_table_offset: u64::from(cluster_size), + // The refcount table is after l1 + header. + refcount_table_offset: u64::from(cluster_size * (l1_clusters + 1)), + refcount_table_clusters: { + // Pre-allocate enough clusters for the entire refcount table as it must be + // continuous in the file. Allocate enough space to refcount all clusters, including + // the refcount clusters. + let max_refcount_clusters = max_refcount_clusters( + DEFAULT_REFCOUNT_ORDER, + cluster_size, + num_clusters + l1_clusters + num_l2_clusters + header_clusters, + ) as u32; + // The refcount table needs to store the offset of each refcount cluster. + div_round_up_u32( + max_refcount_clusters * size_of::() as u32, + cluster_size, + ) + }, + nb_snapshots: 0, + snapshots_offset: 0, + incompatible_features: 0, + compatible_features: 0, + autoclear_features: 0, + refcount_order: DEFAULT_REFCOUNT_ORDER, + header_size: V3_BARE_HEADER_SIZE, + } + } + + /// Write the header to `file`. + pub fn write_to(&self, file: &mut F) -> Result<()> { + // Writes the next u32 to the file. + fn write_u32_to_file(f: &mut F, value: u32) -> Result<()> { + f.write_u32::(value) + .map_err(Error::WritingHeader) + } + + // Writes the next u64 to the file. + fn write_u64_to_file(f: &mut F, value: u64) -> Result<()> { + f.write_u64::(value) + .map_err(Error::WritingHeader) + } + + write_u32_to_file(file, self.magic)?; + write_u32_to_file(file, self.version)?; + write_u64_to_file(file, self.backing_file_offset)?; + write_u32_to_file(file, self.backing_file_size)?; + write_u32_to_file(file, self.cluster_bits)?; + write_u64_to_file(file, self.size)?; + write_u32_to_file(file, self.crypt_method)?; + write_u32_to_file(file, self.l1_size)?; + write_u64_to_file(file, self.l1_table_offset)?; + write_u64_to_file(file, self.refcount_table_offset)?; + write_u32_to_file(file, self.refcount_table_clusters)?; + write_u32_to_file(file, self.nb_snapshots)?; + write_u64_to_file(file, self.snapshots_offset)?; + write_u64_to_file(file, self.incompatible_features)?; + write_u64_to_file(file, self.compatible_features)?; + write_u64_to_file(file, self.autoclear_features)?; + write_u32_to_file(file, self.refcount_order)?; + write_u32_to_file(file, self.header_size)?; + + // Set the file length by seeking and writing a zero to the last byte. This avoids needing + // a `File` instead of anything that implements seek as the `file` argument. + // Zeros out the l1 and refcount table clusters. + let cluster_size = 0x01u64 << self.cluster_bits; + let refcount_blocks_size = u64::from(self.refcount_table_clusters) * cluster_size; + file.seek(SeekFrom::Start( + self.refcount_table_offset + refcount_blocks_size - 2, + )) + .map_err(Error::WritingHeader)?; + file.write(&[0u8]).map_err(Error::WritingHeader)?; + + Ok(()) + } +} + +fn max_refcount_clusters(refcount_order: u32, cluster_size: u32, num_clusters: u32) -> usize { + let refcount_bytes = (0x01u32 << refcount_order) / 8; + let for_data = div_round_up_u32(num_clusters * refcount_bytes, cluster_size); + let for_refcounts = div_round_up_u32(for_data * refcount_bytes, cluster_size); + for_data as usize + for_refcounts as usize +} + +/// Represents a qcow2 file. This is a sparse file format maintained by the qemu project. +/// Full documentation of the format can be found in the qemu repository. +/// +/// # Example +/// +/// ``` +/// # use std::io::{Read, Seek, SeekFrom}; +/// # use qcow::{self, QcowFile}; +/// # fn test(file: std::fs::File) -> std::io::Result<()> { +/// let mut q = QcowFile::from(file).expect("Can't open qcow file"); +/// let mut buf = [0u8; 12]; +/// q.seek(SeekFrom::Start(10 as u64))?; +/// q.read(&mut buf[..])?; +/// # Ok(()) +/// # } +/// ``` +#[derive(Debug)] +pub struct QcowFile { + raw_file: QcowRawFile, + header: QcowHeader, + l1_table: VecCache, + l2_entries: u64, + l2_cache: CacheMap>, + refcounts: RefCount, + current_offset: u64, + unref_clusters: Vec, // List of freshly unreferenced clusters. + // List of unreferenced clusters available to be used. unref clusters become available once the + // removal of references to them have been synced to disk. + avail_clusters: Vec, + //TODO(dgreid) Add support for backing files. - backing_file: Option>>, +} + +impl QcowFile { + /// Creates a QcowFile from `file`. File must be a valid qcow2 image. + pub fn from(mut file: File) -> Result { + let header = QcowHeader::new(&mut file)?; + + // Only v3 files are supported. + if header.version != 3 { + return Err(Error::UnsupportedVersion(header.version)); + } + + let cluster_bits: u32 = header.cluster_bits; + if cluster_bits > MAX_CLUSTER_BITS { + return Err(Error::InvalidClusterSize); + } + let cluster_size = 0x01u64 << cluster_bits; + if cluster_size < size_of::() as u64 { + // Can't fit an offset in a cluster, nothing is going to work. + return Err(Error::InvalidClusterSize); + } + + // No current support for backing files. + if header.backing_file_offset != 0 { + return Err(Error::BackingFilesNotSupported); + } + + // Only support two byte refcounts. + let refcount_bits: u64 = 0x01u64 + .checked_shl(header.refcount_order) + .ok_or(Error::UnsupportedRefcountOrder)?; + if refcount_bits != 16 { + return Err(Error::UnsupportedRefcountOrder); + } + let refcount_bytes = (refcount_bits + 7) / 8; + + // Need at least one refcount cluster + if header.refcount_table_clusters == 0 { + return Err(Error::NoRefcountClusters); + } + offset_is_cluster_boundary(header.backing_file_offset, header.cluster_bits)?; + offset_is_cluster_boundary(header.l1_table_offset, header.cluster_bits)?; + offset_is_cluster_boundary(header.refcount_table_offset, header.cluster_bits)?; + offset_is_cluster_boundary(header.snapshots_offset, header.cluster_bits)?; + + // The first cluster should always have a non-zero refcount, so if it is 0, + // this is an old file with broken refcounts, which requires a rebuild. + let mut refcount_rebuild_required = true; + file.seek(SeekFrom::Start(header.refcount_table_offset)) + .map_err(Error::SeekingFile)?; + let first_refblock_addr = file.read_u64::().map_err(Error::ReadingHeader)?; + if first_refblock_addr != 0 { + file.seek(SeekFrom::Start(first_refblock_addr)) + .map_err(Error::SeekingFile)?; + let first_cluster_refcount = + file.read_u16::().map_err(Error::ReadingHeader)?; + if first_cluster_refcount != 0 { + refcount_rebuild_required = false; + } + } + + if (header.compatible_features & COMPATIBLE_FEATURES_LAZY_REFCOUNTS) != 0 { + refcount_rebuild_required = true; + } + + let mut raw_file = + QcowRawFile::from(file, cluster_size).ok_or(Error::InvalidClusterSize)?; + if refcount_rebuild_required { + QcowFile::rebuild_refcounts(&mut raw_file, header)?; + } + + let l2_size = cluster_size / size_of::() as u64; + let num_clusters = div_round_up_u64(header.size, cluster_size); + let num_l2_clusters = div_round_up_u64(num_clusters, l2_size); + let l1_clusters = div_round_up_u64(num_l2_clusters, cluster_size); + let header_clusters = div_round_up_u64(size_of::() as u64, cluster_size); + let l1_table = VecCache::from_vec( + raw_file + .read_pointer_table( + header.l1_table_offset, + num_l2_clusters, + Some(L1_TABLE_OFFSET_MASK), + ) + .map_err(Error::ReadingHeader)?, + ); + + let num_clusters = div_round_up_u64(header.size, cluster_size); + let refcount_clusters = max_refcount_clusters( + header.refcount_order, + cluster_size as u32, + (num_clusters + l1_clusters + num_l2_clusters + header_clusters) as u32, + ) as u64; + let refcount_block_entries = cluster_size / refcount_bytes; + let refcounts = RefCount::new( + &mut raw_file, + header.refcount_table_offset, + refcount_clusters, + refcount_block_entries, + cluster_size, + ) + .map_err(Error::ReadingRefCounts)?; + + let l2_entries = cluster_size / size_of::() as u64; + + let mut qcow = QcowFile { + raw_file, + header, + l1_table, + l2_entries, + l2_cache: CacheMap::new(100), + refcounts, + current_offset: 0, + unref_clusters: Vec::new(), + avail_clusters: Vec::new(), + }; + + // Check that the L1 and refcount tables fit in a 64bit address space. + qcow.header + .l1_table_offset + .checked_add(qcow.l1_address_offset(qcow.virtual_size())) + .ok_or(Error::InvalidL1TableOffset)?; + qcow.header + .refcount_table_offset + .checked_add(u64::from(qcow.header.refcount_table_clusters) * cluster_size) + .ok_or(Error::InvalidRefcountTableOffset)?; + + qcow.find_avail_clusters()?; + + Ok(qcow) + } + + /// Creates a new QcowFile at the given path. + pub fn new(mut file: File, virtual_size: u64) -> Result { + let header = QcowHeader::create_for_size(virtual_size); + file.seek(SeekFrom::Start(0)).map_err(Error::SeekingFile)?; + header.write_to(&mut file)?; + + let mut qcow = Self::from(file)?; + + // Set the refcount for each refcount table cluster. + let cluster_size = 0x01u64 << qcow.header.cluster_bits; + let refcount_table_base = qcow.header.refcount_table_offset as u64; + let end_cluster_addr = + refcount_table_base + u64::from(qcow.header.refcount_table_clusters) * cluster_size; + + let mut cluster_addr = 0; + while cluster_addr < end_cluster_addr { + let mut unref_clusters = qcow + .set_cluster_refcount(cluster_addr, 1) + .map_err(Error::SettingRefcountRefcount)?; + qcow.unref_clusters.append(&mut unref_clusters); + cluster_addr += cluster_size; + } + + Ok(qcow) + } + + /// Returns the `QcowHeader` for this file. + pub fn header(&self) -> &QcowHeader { + &self.header + } + + /// Returns the L1 lookup table for this file. This is only useful for debugging. + pub fn l1_table(&self) -> &[u64] { + &self.l1_table.get_values() + } + + /// Returns an L2_table of cluster addresses, only used for debugging. + pub fn l2_table(&mut self, l1_index: usize) -> Result> { + let l2_addr_disk = *self.l1_table.get(l1_index).ok_or(Error::InvalidIndex)?; + + if l2_addr_disk == 0 { + // Reading from an unallocated cluster will return zeros. + return Ok(None); + } + + if !self.l2_cache.contains_key(l1_index) { + // Not in the cache. + let table = VecCache::from_vec( + Self::read_l2_cluster(&mut self.raw_file, l2_addr_disk) + .map_err(Error::ReadingPointers)?, + ); + let l1_table = &self.l1_table; + let raw_file = &mut self.raw_file; + self.l2_cache + .insert(l1_index, table, |index, evicted| { + raw_file.write_pointer_table( + l1_table[index], + evicted.get_values(), + CLUSTER_USED_FLAG, + ) + }) + .map_err(Error::EvictingCache)?; + } + + // The index must exist as it was just inserted if it didn't already. + Ok(Some(self.l2_cache.get(l1_index).unwrap().get_values())) + } + + /// Returns the refcount table for this file. This is only useful for debugging. + pub fn ref_table(&self) -> &[u64] { + &self.refcounts.ref_table() + } + + /// Returns the `index`th refcount block from the file. + pub fn refcount_block(&mut self, index: usize) -> Result> { + self.refcounts + .refcount_block(&mut self.raw_file, index) + .map_err(Error::ReadingRefCountBlock) + } + + /// Returns the first cluster in the file with a 0 refcount. Used for testing. + pub fn first_zero_refcount(&mut self) -> Result> { + let file_size = self + .raw_file + .file_mut() + .metadata() + .map_err(Error::GettingFileSize)? + .len(); + let cluster_size = 0x01u64 << self.header.cluster_bits; + + let mut cluster_addr = 0; + while cluster_addr < file_size { + let cluster_refcount = self + .refcounts + .get_cluster_refcount(&mut self.raw_file, cluster_addr) + .map_err(Error::GettingRefcount)?; + if cluster_refcount == 0 { + return Ok(Some(cluster_addr)); + } + cluster_addr += cluster_size; + } + Ok(None) + } + + fn find_avail_clusters(&mut self) -> Result<()> { + let cluster_size = self.raw_file.cluster_size(); + + let file_size = self + .raw_file + .file_mut() + .metadata() + .map_err(Error::GettingFileSize)? + .len(); + + for i in (0..file_size).step_by(cluster_size as usize) { + let refcount = self + .refcounts + .get_cluster_refcount(&mut self.raw_file, i) + .map_err(Error::GettingRefcount)?; + if refcount == 0 { + self.avail_clusters.push(i); + } + } + + Ok(()) + } + + /// Rebuild the reference count tables. + fn rebuild_refcounts(raw_file: &mut QcowRawFile, header: QcowHeader) -> Result<()> { + fn add_ref(refcounts: &mut [u16], cluster_size: u64, cluster_address: u64) -> Result<()> { + let idx = (cluster_address / cluster_size) as usize; + if idx >= refcounts.len() { + return Err(Error::InvalidClusterIndex); + } + refcounts[idx] += 1; + Ok(()) + } + + // Add a reference to the first cluster (header plus extensions). + fn set_header_refcount(refcounts: &mut [u16], cluster_size: u64) -> Result<()> { + add_ref(refcounts, cluster_size, 0) + } + + // Add references to the L1 table clusters. + fn set_l1_refcounts( + refcounts: &mut [u16], + header: QcowHeader, + cluster_size: u64, + ) -> Result<()> { + let l1_clusters = div_round_up_u64(u64::from(header.l1_size), cluster_size); + let l1_table_offset = header.l1_table_offset; + for i in 0..l1_clusters { + add_ref(refcounts, cluster_size, l1_table_offset + i * cluster_size)?; + } + Ok(()) + } + + // Traverse the L1 and L2 tables to find all reachable data clusters. + fn set_data_refcounts( + refcounts: &mut [u16], + header: QcowHeader, + cluster_size: u64, + raw_file: &mut QcowRawFile, + ) -> Result<()> { + let l1_table = raw_file + .read_pointer_table( + header.l1_table_offset, + u64::from(header.l1_size), + Some(L1_TABLE_OFFSET_MASK), + ) + .map_err(Error::ReadingPointers)?; + for l1_index in 0..header.l1_size as usize { + let l2_addr_disk = *l1_table.get(l1_index).ok_or(Error::InvalidIndex)?; + if l2_addr_disk != 0 { + // Add a reference to the L2 table cluster itself. + add_ref(refcounts, cluster_size, l2_addr_disk)?; + + // Read the L2 table and find all referenced data clusters. + let l2_table = raw_file + .read_pointer_table( + l2_addr_disk, + cluster_size / size_of::() as u64, + Some(L2_TABLE_OFFSET_MASK), + ) + .map_err(Error::ReadingPointers)?; + for data_cluster_addr in l2_table { + if data_cluster_addr != 0 { + add_ref(refcounts, cluster_size, data_cluster_addr)?; + } + } + } + } + + Ok(()) + } + + // Add references to the top-level refcount table clusters. + fn set_refcount_table_refcounts( + refcounts: &mut [u16], + header: QcowHeader, + cluster_size: u64, + ) -> Result<()> { + let refcount_table_offset = header.refcount_table_offset; + for i in 0..u64::from(header.refcount_table_clusters) { + add_ref( + refcounts, + cluster_size, + refcount_table_offset + i * cluster_size, + )?; + } + Ok(()) + } + + // Allocate clusters for refblocks. + // This needs to be done last so that we have the correct refcounts for all other + // clusters. + fn alloc_refblocks( + refcounts: &mut [u16], + cluster_size: u64, + refblock_clusters: u64, + pointers_per_cluster: u64, + ) -> Result> { + let refcount_table_entries = div_round_up_u64(refblock_clusters, pointers_per_cluster); + let mut ref_table = vec![0; refcount_table_entries as usize]; + let mut first_free_cluster: u64 = 0; + for refblock_addr in &mut ref_table { + while refcounts[first_free_cluster as usize] != 0 { + first_free_cluster += 1; + if first_free_cluster >= refcounts.len() as u64 { + return Err(Error::InvalidRefcountTableSize); + } + } + + *refblock_addr = first_free_cluster * cluster_size; + add_ref(refcounts, cluster_size, *refblock_addr)?; + + first_free_cluster += 1; + } + + Ok(ref_table) + } + + // Write the updated reference count blocks and reftable. + fn write_refblocks( + refcounts: &[u16], + mut header: QcowHeader, + ref_table: &[u64], + raw_file: &mut QcowRawFile, + refcount_block_entries: u64, + ) -> Result<()> { + // Rewrite the header with lazy refcounts enabled while we are rebuilding the tables. + header.compatible_features |= COMPATIBLE_FEATURES_LAZY_REFCOUNTS; + raw_file + .file_mut() + .seek(SeekFrom::Start(0)) + .map_err(Error::SeekingFile)?; + header.write_to(raw_file.file_mut())?; + + for (i, refblock_addr) in ref_table.iter().enumerate() { + // Write a block of refcounts to the location indicated by refblock_addr. + let refblock_start = i * (refcount_block_entries as usize); + let refblock_end = min( + refcounts.len(), + refblock_start + refcount_block_entries as usize, + ); + let refblock = &refcounts[refblock_start..refblock_end]; + raw_file + .write_refcount_block(*refblock_addr, refblock) + .map_err(Error::WritingHeader)?; + + // If this is the last (partial) cluster, pad it out to a full refblock cluster. + if refblock.len() < refcount_block_entries as usize { + let refblock_padding = + vec![0u16; refcount_block_entries as usize - refblock.len()]; + raw_file + .write_refcount_block( + *refblock_addr + refblock.len() as u64 * 2, + &refblock_padding, + ) + .map_err(Error::WritingHeader)?; + } + } + + // Rewrite the top-level refcount table. + raw_file + .write_pointer_table(header.refcount_table_offset, &ref_table, 0) + .map_err(Error::WritingHeader)?; + + // Rewrite the header again, now with lazy refcounts disabled. + header.compatible_features &= !COMPATIBLE_FEATURES_LAZY_REFCOUNTS; + raw_file + .file_mut() + .seek(SeekFrom::Start(0)) + .map_err(Error::SeekingFile)?; + header.write_to(raw_file.file_mut())?; + + Ok(()) + } + + let cluster_size = raw_file.cluster_size(); + + let file_size = raw_file + .file_mut() + .metadata() + .map_err(Error::GettingFileSize)? + .len(); + + let refcount_bits = 1u64 << header.refcount_order; + let refcount_bytes = div_round_up_u64(refcount_bits, 8); + let refcount_block_entries = cluster_size / refcount_bytes; + let pointers_per_cluster = cluster_size / size_of::() as u64; + let data_clusters = div_round_up_u64(header.size, cluster_size); + let l2_clusters = div_round_up_u64(data_clusters, pointers_per_cluster); + let l1_clusters = div_round_up_u64(l2_clusters, cluster_size); + let header_clusters = div_round_up_u64(size_of::() as u64, cluster_size); + let max_clusters = data_clusters + l2_clusters + l1_clusters + header_clusters; + let mut max_valid_cluster_index = max_clusters; + let refblock_clusters = div_round_up_u64(max_valid_cluster_index, refcount_block_entries); + let reftable_clusters = div_round_up_u64(refblock_clusters, pointers_per_cluster); + // Account for refblocks and the ref table size needed to address them. + let refblocks_for_refs = div_round_up_u64( + refblock_clusters + reftable_clusters, + refcount_block_entries, + ); + let reftable_clusters_for_refs = + div_round_up_u64(refblocks_for_refs, refcount_block_entries); + max_valid_cluster_index += refblock_clusters + reftable_clusters; + max_valid_cluster_index += refblocks_for_refs + reftable_clusters_for_refs; + + if max_valid_cluster_index > usize::max_value() as u64 { + return Err(Error::InvalidRefcountTableSize); + } + + let max_valid_cluster_offset = max_valid_cluster_index * cluster_size; + if max_valid_cluster_offset < file_size - cluster_size { + return Err(Error::InvalidRefcountTableSize); + } + + let mut refcounts = vec![0; max_valid_cluster_index as usize]; + + // Find all references clusters and rebuild refcounts. + set_header_refcount(&mut refcounts, cluster_size)?; + set_l1_refcounts(&mut refcounts, header, cluster_size)?; + set_data_refcounts(&mut refcounts, header, cluster_size, raw_file)?; + set_refcount_table_refcounts(&mut refcounts, header, cluster_size)?; + + // Allocate clusters to store the new reference count blocks. + let ref_table = alloc_refblocks( + &mut refcounts, + cluster_size, + refblock_clusters, + pointers_per_cluster, + )?; + + // Write updated reference counts and point the reftable at them. + write_refblocks( + &refcounts, + header, + &ref_table, + raw_file, + refcount_block_entries, + ) + } + + // Limits the range so that it doesn't exceed the virtual size of the file. + fn limit_range_file(&self, address: u64, count: usize) -> usize { + if address.checked_add(count as u64).is_none() || address > self.virtual_size() { + return 0; + } + min(count as u64, self.virtual_size() - address) as usize + } + + // Limits the range so that it doesn't overflow the end of a cluster. + fn limit_range_cluster(&self, address: u64, count: usize) -> usize { + let offset: u64 = self.raw_file.cluster_offset(address); + let limit = self.raw_file.cluster_size() - offset; + min(count as u64, limit) as usize + } + + // Gets the maximum virtual size of this image. + fn virtual_size(&self) -> u64 { + self.header.size + } + + // Gets the offset of `address` in the L1 table. + fn l1_address_offset(&self, address: u64) -> u64 { + let l1_index = self.l1_table_index(address); + l1_index * size_of::() as u64 + } + + // Gets the offset of `address` in the L1 table. + fn l1_table_index(&self, address: u64) -> u64 { + (address / self.raw_file.cluster_size()) / self.l2_entries + } + + // Gets the offset of `address` in the L2 table. + fn l2_table_index(&self, address: u64) -> u64 { + (address / self.raw_file.cluster_size()) % self.l2_entries + } + + // Gets the offset of the given guest address in the host file. If L1, L2, or data clusters have + // yet to be allocated, return None. + fn file_offset_read(&mut self, address: u64) -> std::io::Result> { + if address >= self.virtual_size() as u64 { + return Err(std::io::Error::from_raw_os_error(EINVAL)); + } + + let l1_index = self.l1_table_index(address) as usize; + let l2_addr_disk = *self + .l1_table + .get(l1_index) + .ok_or_else(|| std::io::Error::from_raw_os_error(EINVAL))?; + + if l2_addr_disk == 0 { + // Reading from an unallocated cluster will return zeros. + return Ok(None); + } + + let l2_index = self.l2_table_index(address) as usize; + + if !self.l2_cache.contains_key(l1_index) { + // Not in the cache. + let table = + VecCache::from_vec(Self::read_l2_cluster(&mut self.raw_file, l2_addr_disk)?); + + let l1_table = &self.l1_table; + let raw_file = &mut self.raw_file; + self.l2_cache.insert(l1_index, table, |index, evicted| { + raw_file.write_pointer_table( + l1_table[index], + evicted.get_values(), + CLUSTER_USED_FLAG, + ) + })?; + }; + + let cluster_addr = self.l2_cache.get(l1_index).unwrap()[l2_index]; + if cluster_addr == 0 { + return Ok(None); + } + Ok(Some(cluster_addr + self.raw_file.cluster_offset(address))) + } + + // Gets the offset of the given guest address in the host file. If L1, L2, or data clusters need + // to be allocated, they will be. + fn file_offset_write(&mut self, address: u64) -> std::io::Result { + if address >= self.virtual_size() as u64 { + return Err(std::io::Error::from_raw_os_error(EINVAL)); + } + + let l1_index = self.l1_table_index(address) as usize; + let l2_addr_disk = *self + .l1_table + .get(l1_index) + .ok_or_else(|| std::io::Error::from_raw_os_error(EINVAL))?; + let l2_index = self.l2_table_index(address) as usize; + + let mut set_refcounts = Vec::new(); + + if !self.l2_cache.contains_key(l1_index) { + // Not in the cache. + let l2_table = if l2_addr_disk == 0 { + // Allocate a new cluster to store the L2 table and update the L1 table to point + // to the new table. + let new_addr: u64 = self.get_new_cluster()?; + // The cluster refcount starts at one meaning it is used but doesn't need COW. + set_refcounts.push((new_addr, 1)); + self.l1_table[l1_index] = new_addr; + VecCache::new(self.l2_entries as usize) + } else { + VecCache::from_vec(Self::read_l2_cluster(&mut self.raw_file, l2_addr_disk)?) + }; + let l1_table = &self.l1_table; + let raw_file = &mut self.raw_file; + self.l2_cache.insert(l1_index, l2_table, |index, evicted| { + raw_file.write_pointer_table( + l1_table[index], + evicted.get_values(), + CLUSTER_USED_FLAG, + ) + })?; + } + + let cluster_addr = match self.l2_cache.get(l1_index).unwrap()[l2_index] { + 0 => { + // Need to allocate a data cluster + let cluster_addr = self.append_data_cluster()?; + self.update_cluster_addr(l1_index, l2_index, cluster_addr, &mut set_refcounts)?; + cluster_addr + } + a => a, + }; + + for (addr, count) in set_refcounts { + let mut newly_unref = self.set_cluster_refcount(addr, count)?; + self.unref_clusters.append(&mut newly_unref); + } + + Ok(cluster_addr + self.raw_file.cluster_offset(address)) + } + + // Updates the l1 and l2 tables to point to the new `cluster_addr`. + fn update_cluster_addr( + &mut self, + l1_index: usize, + l2_index: usize, + cluster_addr: u64, + set_refcounts: &mut Vec<(u64, u16)>, + ) -> io::Result<()> { + if !self.l2_cache.get(l1_index).unwrap().dirty() { + // Free the previously used cluster if one exists. Modified tables are always + // witten to new clusters so the L1 table can be committed to disk after they + // are and L1 never points at an invalid table. + // The index must be valid from when it was insterted. + let addr = self.l1_table[l1_index]; + if addr != 0 { + self.unref_clusters.push(addr); + set_refcounts.push((addr, 0)); + } + + // Allocate a new cluster to store the L2 table and update the L1 table to point + // to the new table. The cluster will be written when the cache is flushed, no + // need to copy the data now. + let new_addr: u64 = self.get_new_cluster()?; + // The cluster refcount starts at one indicating it is used but doesn't need + // COW. + set_refcounts.push((new_addr, 1)); + self.l1_table[l1_index] = new_addr; + } + // 'unwrap' is OK because it was just added. + self.l2_cache.get_mut(l1_index).unwrap()[l2_index] = cluster_addr; + Ok(()) + } + + // Allocate a new cluster and return its offset within the raw file. + fn get_new_cluster(&mut self) -> std::io::Result { + // First use a pre allocated cluster if one is available. + if let Some(free_cluster) = self.avail_clusters.pop() { + let cluster_size = self.raw_file.cluster_size() as usize; + self.raw_file + .file_mut() + .seek(SeekFrom::Start(free_cluster))?; + self.raw_file.file_mut().write_zeroes(cluster_size)?; + return Ok(free_cluster); + } + + let max_valid_cluster_offset = self.refcounts.max_valid_cluster_offset(); + if let Some(new_cluster) = self.raw_file.add_cluster_end(max_valid_cluster_offset)? { + return Ok(new_cluster); + } else { + error!("No free clusters in get_new_cluster()"); + return Err(std::io::Error::from_raw_os_error(ENOSPC)); + } + } + + // Allocate and initialize a new data cluster. Returns the offset of the + // cluster in to the file on success. + fn append_data_cluster(&mut self) -> std::io::Result { + let new_addr: u64 = self.get_new_cluster()?; + // The cluster refcount starts at one indicating it is used but doesn't need COW. + let mut newly_unref = self.set_cluster_refcount(new_addr, 1)?; + self.unref_clusters.append(&mut newly_unref); + Ok(new_addr) + } + + // Returns true if the cluster containing `address` is already allocated. + fn cluster_allocated(&mut self, address: u64) -> std::io::Result { + if address >= self.virtual_size() as u64 { + return Err(std::io::Error::from_raw_os_error(EINVAL)); + } + + let l1_index = self.l1_table_index(address) as usize; + let l2_addr_disk = *self + .l1_table + .get(l1_index) + .ok_or_else(|| std::io::Error::from_raw_os_error(EINVAL))?; + let l2_index = self.l2_table_index(address) as usize; + + if l2_addr_disk == 0 { + // The whole L2 table for this address is not allocated yet, + // so the cluster must also be unallocated. + return Ok(false); + } + + if !self.l2_cache.contains_key(l1_index) { + // Not in the cache. + let table = + VecCache::from_vec(Self::read_l2_cluster(&mut self.raw_file, l2_addr_disk)?); + let l1_table = &self.l1_table; + let raw_file = &mut self.raw_file; + self.l2_cache.insert(l1_index, table, |index, evicted| { + raw_file.write_pointer_table( + l1_table[index], + evicted.get_values(), + CLUSTER_USED_FLAG, + ) + })?; + } + + let cluster_addr = self.l2_cache.get(l1_index).unwrap()[l2_index]; + // If cluster_addr != 0, the cluster is allocated. + Ok(cluster_addr != 0) + } + + // Find the first guest address greater than or equal to `address` whose allocation state + // matches `allocated`. + fn find_allocated_cluster( + &mut self, + address: u64, + allocated: bool, + ) -> std::io::Result> { + let size = self.virtual_size(); + if address >= size { + return Ok(None); + } + + // If offset is already within a hole, return it. + if self.cluster_allocated(address)? == allocated { + return Ok(Some(address)); + } + + // Skip to the next cluster boundary. + let cluster_size = self.raw_file.cluster_size(); + let mut cluster_addr = (address / cluster_size + 1) * cluster_size; + + // Search for clusters with the desired allocation state. + while cluster_addr < size { + if self.cluster_allocated(cluster_addr)? == allocated { + return Ok(Some(cluster_addr)); + } + cluster_addr += cluster_size; + } + + Ok(None) + } + + // Deallocate the storage for the cluster starting at `address`. + // Any future reads of this cluster will return all zeroes. + fn deallocate_cluster(&mut self, address: u64) -> std::io::Result<()> { + if address >= self.virtual_size() as u64 { + return Err(std::io::Error::from_raw_os_error(EINVAL)); + } + + let l1_index = self.l1_table_index(address) as usize; + let l2_addr_disk = *self + .l1_table + .get(l1_index) + .ok_or_else(|| std::io::Error::from_raw_os_error(EINVAL))?; + let l2_index = self.l2_table_index(address) as usize; + + if l2_addr_disk == 0 { + // The whole L2 table for this address is not allocated yet, + // so the cluster must also be unallocated. + return Ok(()); + } + + if !self.l2_cache.contains_key(l1_index) { + // Not in the cache. + let table = + VecCache::from_vec(Self::read_l2_cluster(&mut self.raw_file, l2_addr_disk)?); + let l1_table = &self.l1_table; + let raw_file = &mut self.raw_file; + self.l2_cache.insert(l1_index, table, |index, evicted| { + raw_file.write_pointer_table( + l1_table[index], + evicted.get_values(), + CLUSTER_USED_FLAG, + ) + })?; + } + + let cluster_addr = self.l2_cache.get(l1_index).unwrap()[l2_index]; + if cluster_addr == 0 { + // This cluster is already unallocated; nothing to do. + return Ok(()); + } + + // Decrement the refcount. + let refcount = self + .refcounts + .get_cluster_refcount(&mut self.raw_file, cluster_addr) + .map_err(|_| std::io::Error::from_raw_os_error(EINVAL))?; + if refcount == 0 { + return Err(std::io::Error::from_raw_os_error(EINVAL)); + } + + let new_refcount = refcount - 1; + let mut newly_unref = self.set_cluster_refcount(cluster_addr, new_refcount)?; + self.unref_clusters.append(&mut newly_unref); + + // Rewrite the L2 entry to remove the cluster mapping. + // unwrap is safe as we just checked/inserted this entry. + self.l2_cache.get_mut(l1_index).unwrap()[l2_index] = 0; + + if new_refcount == 0 { + let cluster_size = self.raw_file.cluster_size(); + // This cluster is no longer in use; deallocate the storage. + // The underlying FS may not support FALLOC_FL_PUNCH_HOLE, + // so don't treat an error as fatal. Future reads will return zeros anyways. + let _ = self + .raw_file + .file_mut() + .punch_hole(cluster_addr, cluster_size); + self.unref_clusters.push(cluster_addr); + } + Ok(()) + } + + // Deallocate the storage for `length` bytes starting at `address`. + // Any future reads of this range will return all zeroes. + fn deallocate_bytes(&mut self, address: u64, length: usize) -> std::io::Result<()> { + let write_count: usize = self.limit_range_file(address, length); + + let mut nwritten: usize = 0; + while nwritten < write_count { + let curr_addr = address + nwritten as u64; + let count = self.limit_range_cluster(curr_addr, write_count - nwritten); + + if count == self.raw_file.cluster_size() as usize { + // Full cluster - deallocate the storage. + self.deallocate_cluster(curr_addr)?; + } else { + // Partial cluster - zero out the relevant bytes if it was allocated. + // Any space in unallocated clusters can be left alone, since + // unallocated clusters already read back as zeroes. + if let Some(offset) = self.file_offset_read(curr_addr)? { + // Partial cluster - zero it out. + self.raw_file.file_mut().seek(SeekFrom::Start(offset))?; + self.raw_file.file_mut().write_zeroes(count)?; + } + } + + nwritten += count; + } + Ok(()) + } + + // Reads an L2 cluster from the disk, returning an error if the file can't be read or if any + // cluster is compressed. + fn read_l2_cluster(raw_file: &mut QcowRawFile, cluster_addr: u64) -> std::io::Result> { + let file_values = raw_file.read_pointer_cluster(cluster_addr, None)?; + if file_values.iter().any(|entry| entry & COMPRESSED_FLAG != 0) { + return Err(std::io::Error::from_raw_os_error(ENOTSUP)); + } + Ok(file_values + .iter() + .map(|entry| *entry & L2_TABLE_OFFSET_MASK) + .collect()) + } + + // Set the refcount for a cluster with the given address. + // Returns a list of any refblocks that can be reused, this happens when a refblock is moved, + // the old location can be reused. + fn set_cluster_refcount(&mut self, address: u64, refcount: u16) -> std::io::Result> { + let mut added_clusters = Vec::new(); + let mut unref_clusters = Vec::new(); + let mut refcount_set = false; + let mut new_cluster = None; + + while !refcount_set { + match self.refcounts.set_cluster_refcount( + &mut self.raw_file, + address, + refcount, + new_cluster.take(), + ) { + Ok(None) => { + refcount_set = true; + } + Ok(Some(freed_cluster)) => { + unref_clusters.push(freed_cluster); + refcount_set = true; + } + Err(refcount::Error::EvictingRefCounts(e)) => { + return Err(e); + } + Err(refcount::Error::InvalidIndex) => { + return Err(std::io::Error::from_raw_os_error(EINVAL)); + } + Err(refcount::Error::NeedCluster(addr)) => { + // Read the address and call set_cluster_refcount again. + new_cluster = Some(( + addr, + VecCache::from_vec(self.raw_file.read_refcount_block(addr)?), + )); + } + Err(refcount::Error::NeedNewCluster) => { + // Allocate the cluster and call set_cluster_refcount again. + let addr = self.get_new_cluster()?; + added_clusters.push(addr); + new_cluster = Some(( + addr, + VecCache::new(self.refcounts.refcounts_per_block() as usize), + )); + } + Err(refcount::Error::ReadingRefCounts(e)) => { + return Err(e); + } + } + } + + for addr in added_clusters { + self.set_cluster_refcount(addr, 1)?; + } + Ok(unref_clusters) + } + + fn sync_caches(&mut self) -> std::io::Result<()> { + // Write out all dirty L2 tables. + for (l1_index, l2_table) in self.l2_cache.iter_mut().filter(|(_k, v)| v.dirty()) { + // The index must be valid from when we insterted it. + let addr = self.l1_table[*l1_index]; + if addr != 0 { + self.raw_file.write_pointer_table( + addr, + l2_table.get_values(), + CLUSTER_USED_FLAG, + )?; + } else { + return Err(std::io::Error::from_raw_os_error(EINVAL)); + } + l2_table.mark_clean(); + } + // Write the modified refcount blocks. + self.refcounts.flush_blocks(&mut self.raw_file)?; + // Make sure metadata(file len) and all data clusters are written. + self.raw_file.file_mut().sync_all()?; + + // Push L1 table and refcount table last as all the clusters they point to are now + // guaranteed to be valid. + let mut sync_required = if self.l1_table.dirty() { + self.raw_file.write_pointer_table( + self.header.l1_table_offset, + &self.l1_table.get_values(), + 0, + )?; + self.l1_table.mark_clean(); + true + } else { + false + }; + sync_required |= self.refcounts.flush_table(&mut self.raw_file)?; + if sync_required { + self.raw_file.file_mut().sync_data()?; + } + Ok(()) + } +} + +impl Drop for QcowFile { + fn drop(&mut self) { + let _ = self.sync_caches(); + } +} + +impl AsRawFd for QcowFile { + fn as_raw_fd(&self) -> RawFd { + self.raw_file.file().as_raw_fd() + } +} + +impl Read for QcowFile { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + let address: u64 = self.current_offset as u64; + let read_count: usize = self.limit_range_file(address, buf.len()); + + let mut nread: usize = 0; + while nread < read_count { + let curr_addr = address + nread as u64; + let file_offset = self.file_offset_read(curr_addr)?; + let count = self.limit_range_cluster(curr_addr, read_count - nread); + + if let Some(offset) = file_offset { + self.raw_file.file_mut().seek(SeekFrom::Start(offset))?; + self.raw_file + .file_mut() + .read_exact(&mut buf[nread..(nread + count)])?; + } else { + // Previously unwritten region, return zeros + for b in &mut buf[nread..(nread + count)] { + *b = 0; + } + } + + nread += count; + } + self.current_offset += read_count as u64; + Ok(read_count) + } +} + +impl Seek for QcowFile { + fn seek(&mut self, pos: SeekFrom) -> std::io::Result { + let new_offset: Option = match pos { + SeekFrom::Start(off) => Some(off), + SeekFrom::End(off) => { + if off < 0 { + 0i64.checked_sub(off) + .and_then(|increment| self.virtual_size().checked_sub(increment as u64)) + } else { + self.virtual_size().checked_add(off as u64) + } + } + SeekFrom::Current(off) => { + if off < 0 { + 0i64.checked_sub(off) + .and_then(|increment| self.current_offset.checked_sub(increment as u64)) + } else { + self.current_offset.checked_add(off as u64) + } + } + }; + + if let Some(o) = new_offset { + if o <= self.virtual_size() { + self.current_offset = o; + return Ok(o); + } + } + Err(std::io::Error::from_raw_os_error(EINVAL)) + } +} + +impl Write for QcowFile { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + let address: u64 = self.current_offset as u64; + let write_count: usize = self.limit_range_file(address, buf.len()); + + let mut nwritten: usize = 0; + while nwritten < write_count { + let curr_addr = address + nwritten as u64; + let offset = self.file_offset_write(curr_addr)?; + let count = self.limit_range_cluster(curr_addr, write_count - nwritten); + + if let Err(e) = self.raw_file.file_mut().seek(SeekFrom::Start(offset)) { + return Err(e); + } + if let Err(e) = self + .raw_file + .file_mut() + .write(&buf[nwritten..(nwritten + count)]) + { + return Err(e); + } + + nwritten += count; + } + self.current_offset += write_count as u64; + Ok(write_count) + } + + fn flush(&mut self) -> std::io::Result<()> { + self.sync_caches()?; + self.avail_clusters.append(&mut self.unref_clusters); + Ok(()) + } +} + +impl FileSync for QcowFile { + fn fsync(&mut self) -> std::io::Result<()> { + self.flush() + } +} + +impl FileSetLen for QcowFile { + fn set_len(&self, _len: u64) -> std::io::Result<()> { + Err(std::io::Error::new( + std::io::ErrorKind::Other, + "set_len() not supported for QcowFile", + )) + } +} + +impl PunchHole for QcowFile { + fn punch_hole(&mut self, offset: u64, length: u64) -> std::io::Result<()> { + let mut remaining = length; + let mut offset = offset; + while remaining > 0 { + let chunk_length = min(remaining, std::usize::MAX as u64) as usize; + self.deallocate_bytes(offset, chunk_length)?; + remaining -= chunk_length as u64; + offset += chunk_length as u64; + } + Ok(()) + } +} + +impl SeekHole for QcowFile { + fn seek_hole(&mut self, offset: u64) -> io::Result> { + match self.find_allocated_cluster(offset, false) { + Err(e) => Err(e), + Ok(None) => { + if offset < self.virtual_size() { + Ok(Some(self.seek(SeekFrom::End(0))?)) + } else { + Ok(None) + } + } + Ok(Some(o)) => { + self.seek(SeekFrom::Start(o))?; + Ok(Some(o)) + } + } + } + + fn seek_data(&mut self, offset: u64) -> io::Result> { + match self.find_allocated_cluster(offset, true) { + Err(e) => Err(e), + Ok(None) => Ok(None), + Ok(Some(o)) => { + self.seek(SeekFrom::Start(o))?; + Ok(Some(o)) + } + } + } +} + +// Returns an Error if the given offset doesn't align to a cluster boundary. +fn offset_is_cluster_boundary(offset: u64, cluster_bits: u32) -> Result<()> { + if offset & ((0x01 << cluster_bits) - 1) != 0 { + return Err(Error::InvalidOffset(offset)); + } + Ok(()) +} + +// Ceiling of the division of `dividend`/`divisor`. +fn div_round_up_u64(dividend: u64, divisor: u64) -> u64 { + (dividend + divisor - 1) / divisor +} + +// Ceiling of the division of `dividend`/`divisor`. +fn div_round_up_u32(dividend: u32, divisor: u32) -> u32 { + (dividend + divisor - 1) / divisor +} + +fn convert_copy(reader: &mut R, writer: &mut W, offset: u64, size: u64) -> Result<()> +where + R: Read + Seek, + W: Write + Seek, +{ + const CHUNK_SIZE: usize = 65536; + let mut buf = [0; CHUNK_SIZE]; + let mut read_count = 0; + reader + .seek(SeekFrom::Start(offset)) + .map_err(Error::SeekingFile)?; + writer + .seek(SeekFrom::Start(offset)) + .map_err(Error::SeekingFile)?; + loop { + let this_count = min(CHUNK_SIZE as u64, size - read_count) as usize; + let nread = reader + .read(&mut buf[..this_count]) + .map_err(Error::ReadingData)?; + writer.write(&buf[..nread]).map_err(Error::WritingData)?; + read_count += nread as u64; + if nread == 0 || read_count == size { + break; + } + } + + Ok(()) +} + +fn convert_reader_writer(reader: &mut R, writer: &mut W, size: u64) -> Result<()> +where + R: Read + Seek + SeekHole, + W: Write + Seek, +{ + let mut offset = 0; + while offset < size { + // Find the next range of data. + let next_data = match reader.seek_data(offset).map_err(Error::SeekingFile)? { + Some(o) => o, + None => { + // No more data in the file. + break; + } + }; + let next_hole = match reader.seek_hole(next_data).map_err(Error::SeekingFile)? { + Some(o) => o, + None => { + // This should not happen - there should always be at least one hole + // after any data. + return Err(Error::SeekingFile(io::Error::from_raw_os_error(EINVAL))); + } + }; + let count = next_hole - next_data; + convert_copy(reader, writer, next_data, count)?; + offset = next_hole; + } + + Ok(()) +} + +fn convert_reader(reader: &mut R, dst_file: File, dst_type: ImageType) -> Result<()> +where + R: Read + Seek + SeekHole, +{ + let src_size = reader.seek(SeekFrom::End(0)).map_err(Error::SeekingFile)?; + reader + .seek(SeekFrom::Start(0)) + .map_err(Error::SeekingFile)?; + + // Ensure the destination file is empty before writing to it. + dst_file.set_len(0).map_err(Error::SettingFileSize)?; + + match dst_type { + ImageType::Qcow2 => { + let mut dst_writer = QcowFile::new(dst_file, src_size)?; + convert_reader_writer(reader, &mut dst_writer, src_size) + } + ImageType::Raw => { + let mut dst_writer = dst_file; + // Set the length of the destination file to convert it into a sparse file + // of the desired size. + dst_writer + .set_len(src_size) + .map_err(Error::SettingFileSize)?; + convert_reader_writer(reader, &mut dst_writer, src_size) + } + } +} + +/// Copy the contents of a disk image in `src_file` into `dst_file`. +/// The type of `src_file` is automatically detected, and the output file type is +/// determined by `dst_type`. +pub fn convert(src_file: File, dst_file: File, dst_type: ImageType) -> Result<()> { + let src_type = detect_image_type(&src_file)?; + match src_type { + ImageType::Qcow2 => { + let mut src_reader = QcowFile::from(src_file)?; + convert_reader(&mut src_reader, dst_file, dst_type) + } + ImageType::Raw => { + // src_file is a raw file. + let mut src_reader = src_file; + convert_reader(&mut src_reader, dst_file, dst_type) + } + } +} + +/// Detect the type of an image file by checking for a valid qcow2 header. +pub fn detect_image_type(file: &File) -> Result { + let mut f = file; + let orig_seek = f.seek(SeekFrom::Current(0)).map_err(Error::SeekingFile)?; + f.seek(SeekFrom::Start(0)).map_err(Error::SeekingFile)?; + let magic = f.read_u32::().map_err(Error::ReadingHeader)?; + let image_type = if magic == QCOW_MAGIC { + ImageType::Qcow2 + } else { + ImageType::Raw + }; + f.seek(SeekFrom::Start(orig_seek)) + .map_err(Error::SeekingFile)?; + Ok(image_type) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs::File; + use std::io::{Read, Seek, SeekFrom, Write}; + use sys_util::SharedMemory; + + fn valid_header() -> Vec { + vec![ + 0x51u8, 0x46, 0x49, 0xfb, // magic + 0x00, 0x00, 0x00, 0x03, // version + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // backing file offset + 0x00, 0x00, 0x00, 0x00, // backing file size + 0x00, 0x00, 0x00, 0x10, // cluster_bits + 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, // size + 0x00, 0x00, 0x00, 0x00, // crypt method + 0x00, 0x00, 0x01, 0x00, // L1 size + 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, // L1 table offset + 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, // refcount table offset + 0x00, 0x00, 0x00, 0x03, // refcount table clusters + 0x00, 0x00, 0x00, 0x00, // nb snapshots + 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, // snapshots offset + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // incompatible_features + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // compatible_features + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // autoclear_features + 0x00, 0x00, 0x00, 0x04, // refcount_order + 0x00, 0x00, 0x00, 0x68, // header_length + ] + } + + fn with_basic_file(header: &[u8], mut testfn: F) + where + F: FnMut(File), + { + let shm = SharedMemory::new(None).unwrap(); + let mut disk_file: File = shm.into(); + disk_file.write_all(&header).unwrap(); + disk_file.set_len(0x5_0000).unwrap(); + disk_file.seek(SeekFrom::Start(0)).unwrap(); + + testfn(disk_file); // File closed when the function exits. + } + + fn with_default_file(file_size: u64, mut testfn: F) + where + F: FnMut(QcowFile), + { + let shm = SharedMemory::new(None).unwrap(); + let qcow_file = QcowFile::new(shm.into(), file_size).unwrap(); + + testfn(qcow_file); // File closed when the function exits. + } + + #[test] + fn default_header() { + let header = QcowHeader::create_for_size(0x10_0000); + let shm = SharedMemory::new(None).unwrap(); + let mut disk_file: File = shm.into(); + header + .write_to(&mut disk_file) + .expect("Failed to write header to shm."); + disk_file.seek(SeekFrom::Start(0)).unwrap(); + QcowFile::from(disk_file).expect("Failed to create Qcow from default Header"); + } + + #[test] + fn header_read() { + with_basic_file(&valid_header(), |mut disk_file: File| { + QcowHeader::new(&mut disk_file).expect("Failed to create Header."); + }); + } + + #[test] + fn invalid_magic() { + let invalid_header = vec![0x51u8, 0x46, 0x4a, 0xfb]; + with_basic_file(&invalid_header, |mut disk_file: File| { + QcowHeader::new(&mut disk_file).expect_err("Invalid header worked."); + }); + } + + #[test] + fn invalid_refcount_order() { + let mut header = valid_header(); + header[99] = 2; + with_basic_file(&header, |disk_file: File| { + QcowFile::from(disk_file).expect_err("Invalid refcount order worked."); + }); + } + + #[test] + fn write_read_start() { + with_basic_file(&valid_header(), |disk_file: File| { + let mut q = QcowFile::from(disk_file).unwrap(); + q.write(b"test first bytes") + .expect("Failed to write test string."); + let mut buf = [0u8; 4]; + q.seek(SeekFrom::Start(0)).expect("Failed to seek."); + q.read(&mut buf).expect("Failed to read."); + assert_eq!(&buf, b"test"); + }); + } + + #[test] + fn offset_write_read() { + with_basic_file(&valid_header(), |disk_file: File| { + let mut q = QcowFile::from(disk_file).unwrap(); + let b = [0x55u8; 0x1000]; + q.seek(SeekFrom::Start(0xfff2000)).expect("Failed to seek."); + q.write(&b).expect("Failed to write test string."); + let mut buf = [0u8; 4]; + q.seek(SeekFrom::Start(0xfff2000)).expect("Failed to seek."); + q.read(&mut buf).expect("Failed to read."); + assert_eq!(buf[0], 0x55); + }); + } + + #[test] + fn write_zeroes_read() { + with_basic_file(&valid_header(), |disk_file: File| { + let mut q = QcowFile::from(disk_file).unwrap(); + // Write some test data. + let b = [0x55u8; 0x1000]; + q.seek(SeekFrom::Start(0xfff2000)).expect("Failed to seek."); + q.write(&b).expect("Failed to write test string."); + // Overwrite the test data with zeroes. + q.seek(SeekFrom::Start(0xfff2000)).expect("Failed to seek."); + let nwritten = q.write_zeroes(0x200).expect("Failed to write zeroes."); + assert_eq!(nwritten, 0x200); + // Verify that the correct part of the data was zeroed out. + let mut buf = [0u8; 0x1000]; + q.seek(SeekFrom::Start(0xfff2000)).expect("Failed to seek."); + q.read(&mut buf).expect("Failed to read."); + assert_eq!(buf[0], 0); + assert_eq!(buf[0x1FF], 0); + assert_eq!(buf[0x200], 0x55); + assert_eq!(buf[0xFFF], 0x55); + }); + } + + #[test] + fn write_zeroes_full_cluster() { + // Choose a size that is larger than a cluster. + // valid_header uses cluster_bits = 12, which corresponds to a cluster size of 4096. + const CHUNK_SIZE: usize = 4096 * 2 + 512; + with_basic_file(&valid_header(), |disk_file: File| { + let mut q = QcowFile::from(disk_file).unwrap(); + // Write some test data. + let b = [0x55u8; CHUNK_SIZE]; + q.seek(SeekFrom::Start(0)).expect("Failed to seek."); + q.write(&b).expect("Failed to write test string."); + // Overwrite the full cluster with zeroes. + q.seek(SeekFrom::Start(0)).expect("Failed to seek."); + let nwritten = q.write_zeroes(CHUNK_SIZE).expect("Failed to write zeroes."); + assert_eq!(nwritten, CHUNK_SIZE); + // Verify that the data was zeroed out. + let mut buf = [0u8; CHUNK_SIZE]; + q.seek(SeekFrom::Start(0)).expect("Failed to seek."); + q.read(&mut buf).expect("Failed to read."); + assert_eq!(buf[0], 0); + assert_eq!(buf[CHUNK_SIZE - 1], 0); + }); + } + + #[test] + fn test_header() { + with_basic_file(&valid_header(), |disk_file: File| { + let q = QcowFile::from(disk_file).unwrap(); + assert_eq!(q.virtual_size(), 0x20_0000_0000); + }); + } + + #[test] + fn read_small_buffer() { + with_basic_file(&valid_header(), |disk_file: File| { + let mut q = QcowFile::from(disk_file).unwrap(); + let mut b = [5u8; 16]; + q.seek(SeekFrom::Start(1000)).expect("Failed to seek."); + q.read(&mut b).expect("Failed to read."); + assert_eq!(0, b[0]); + assert_eq!(0, b[15]); + }); + } + + #[test] + fn replay_ext4() { + with_basic_file(&valid_header(), |disk_file: File| { + let mut q = QcowFile::from(disk_file).unwrap(); + const BUF_SIZE: usize = 0x1000; + let mut b = [0u8; BUF_SIZE]; + + struct Transfer { + pub write: bool, + pub addr: u64, + }; + + // Write transactions from mkfs.ext4. + let xfers: Vec = vec![ + Transfer { + write: false, + addr: 0xfff0000, + }, + Transfer { + write: false, + addr: 0xfffe000, + }, + Transfer { + write: false, + addr: 0x0, + }, + Transfer { + write: false, + addr: 0x1000, + }, + Transfer { + write: false, + addr: 0xffff000, + }, + Transfer { + write: false, + addr: 0xffdf000, + }, + Transfer { + write: false, + addr: 0xfff8000, + }, + Transfer { + write: false, + addr: 0xffe0000, + }, + Transfer { + write: false, + addr: 0xffce000, + }, + Transfer { + write: false, + addr: 0xffb6000, + }, + Transfer { + write: false, + addr: 0xffab000, + }, + Transfer { + write: false, + addr: 0xffa4000, + }, + Transfer { + write: false, + addr: 0xff8e000, + }, + Transfer { + write: false, + addr: 0xff86000, + }, + Transfer { + write: false, + addr: 0xff84000, + }, + Transfer { + write: false, + addr: 0xff89000, + }, + Transfer { + write: false, + addr: 0xfe7e000, + }, + Transfer { + write: false, + addr: 0x100000, + }, + Transfer { + write: false, + addr: 0x3000, + }, + Transfer { + write: false, + addr: 0x7000, + }, + Transfer { + write: false, + addr: 0xf000, + }, + Transfer { + write: false, + addr: 0x2000, + }, + Transfer { + write: false, + addr: 0x4000, + }, + Transfer { + write: false, + addr: 0x5000, + }, + Transfer { + write: false, + addr: 0x6000, + }, + Transfer { + write: false, + addr: 0x8000, + }, + Transfer { + write: false, + addr: 0x9000, + }, + Transfer { + write: false, + addr: 0xa000, + }, + Transfer { + write: false, + addr: 0xb000, + }, + Transfer { + write: false, + addr: 0xc000, + }, + Transfer { + write: false, + addr: 0xd000, + }, + Transfer { + write: false, + addr: 0xe000, + }, + Transfer { + write: false, + addr: 0x10000, + }, + Transfer { + write: false, + addr: 0x11000, + }, + Transfer { + write: false, + addr: 0x12000, + }, + Transfer { + write: false, + addr: 0x13000, + }, + Transfer { + write: false, + addr: 0x14000, + }, + Transfer { + write: false, + addr: 0x15000, + }, + Transfer { + write: false, + addr: 0x16000, + }, + Transfer { + write: false, + addr: 0x17000, + }, + Transfer { + write: false, + addr: 0x18000, + }, + Transfer { + write: false, + addr: 0x19000, + }, + Transfer { + write: false, + addr: 0x1a000, + }, + Transfer { + write: false, + addr: 0x1b000, + }, + Transfer { + write: false, + addr: 0x1c000, + }, + Transfer { + write: false, + addr: 0x1d000, + }, + Transfer { + write: false, + addr: 0x1e000, + }, + Transfer { + write: false, + addr: 0x1f000, + }, + Transfer { + write: false, + addr: 0x21000, + }, + Transfer { + write: false, + addr: 0x22000, + }, + Transfer { + write: false, + addr: 0x24000, + }, + Transfer { + write: false, + addr: 0x40000, + }, + Transfer { + write: false, + addr: 0x0, + }, + Transfer { + write: false, + addr: 0x3000, + }, + Transfer { + write: false, + addr: 0x7000, + }, + Transfer { + write: false, + addr: 0x0, + }, + Transfer { + write: false, + addr: 0x1000, + }, + Transfer { + write: false, + addr: 0x2000, + }, + Transfer { + write: false, + addr: 0x3000, + }, + Transfer { + write: false, + addr: 0x0, + }, + Transfer { + write: false, + addr: 0x449000, + }, + Transfer { + write: false, + addr: 0x48000, + }, + Transfer { + write: false, + addr: 0x48000, + }, + Transfer { + write: false, + addr: 0x448000, + }, + Transfer { + write: false, + addr: 0x44a000, + }, + Transfer { + write: false, + addr: 0x48000, + }, + Transfer { + write: false, + addr: 0x48000, + }, + Transfer { + write: true, + addr: 0x0, + }, + Transfer { + write: true, + addr: 0x448000, + }, + Transfer { + write: true, + addr: 0x449000, + }, + Transfer { + write: true, + addr: 0x44a000, + }, + Transfer { + write: true, + addr: 0xfff0000, + }, + Transfer { + write: true, + addr: 0xfff1000, + }, + Transfer { + write: true, + addr: 0xfff2000, + }, + Transfer { + write: true, + addr: 0xfff3000, + }, + Transfer { + write: true, + addr: 0xfff4000, + }, + Transfer { + write: true, + addr: 0xfff5000, + }, + Transfer { + write: true, + addr: 0xfff6000, + }, + Transfer { + write: true, + addr: 0xfff7000, + }, + Transfer { + write: true, + addr: 0xfff8000, + }, + Transfer { + write: true, + addr: 0xfff9000, + }, + Transfer { + write: true, + addr: 0xfffa000, + }, + Transfer { + write: true, + addr: 0xfffb000, + }, + Transfer { + write: true, + addr: 0xfffc000, + }, + Transfer { + write: true, + addr: 0xfffd000, + }, + Transfer { + write: true, + addr: 0xfffe000, + }, + Transfer { + write: true, + addr: 0xffff000, + }, + ]; + + for xfer in &xfers { + q.seek(SeekFrom::Start(xfer.addr)).expect("Failed to seek."); + if xfer.write { + q.write(&b).expect("Failed to write."); + } else { + let read_count: usize = q.read(&mut b).expect("Failed to read."); + assert_eq!(read_count, BUF_SIZE); + } + } + }); + } + + #[test] + fn combo_write_read() { + with_default_file(1024 * 1024 * 1024 * 256, |mut qcow_file| { + const NUM_BLOCKS: usize = 555; + const BLOCK_SIZE: usize = 0x1_0000; + const OFFSET: usize = 0x1_0000_0020; + let data = [0x55u8; BLOCK_SIZE]; + let mut readback = [0u8; BLOCK_SIZE]; + for i in 0..NUM_BLOCKS { + let seek_offset = OFFSET + i * BLOCK_SIZE; + qcow_file + .seek(SeekFrom::Start(seek_offset as u64)) + .expect("Failed to seek."); + let nwritten = qcow_file.write(&data).expect("Failed to write test data."); + assert_eq!(nwritten, BLOCK_SIZE); + // Read back the data to check it was written correctly. + qcow_file + .seek(SeekFrom::Start(seek_offset as u64)) + .expect("Failed to seek."); + let nread = qcow_file.read(&mut readback).expect("Failed to read."); + assert_eq!(nread, BLOCK_SIZE); + for (orig, read) in data.iter().zip(readback.iter()) { + assert_eq!(orig, read); + } + } + // Check that address 0 is still zeros. + qcow_file.seek(SeekFrom::Start(0)).expect("Failed to seek."); + let nread = qcow_file.read(&mut readback).expect("Failed to read."); + assert_eq!(nread, BLOCK_SIZE); + for read in readback.iter() { + assert_eq!(*read, 0); + } + // Check the data again after the writes have happened. + for i in 0..NUM_BLOCKS { + let seek_offset = OFFSET + i * BLOCK_SIZE; + qcow_file + .seek(SeekFrom::Start(seek_offset as u64)) + .expect("Failed to seek."); + let nread = qcow_file.read(&mut readback).expect("Failed to read."); + assert_eq!(nread, BLOCK_SIZE); + for (orig, read) in data.iter().zip(readback.iter()) { + assert_eq!(orig, read); + } + } + + assert_eq!(qcow_file.first_zero_refcount().unwrap(), None); + }); + } + + fn seek_cur(file: &mut QcowFile) -> u64 { + file.seek(SeekFrom::Current(0)).unwrap() + } + + #[test] + fn seek_data() { + with_default_file(0x30000, |mut file| { + // seek_data at or after the end of the file should return None + assert_eq!(file.seek_data(0x10000).unwrap(), None); + assert_eq!(seek_cur(&mut file), 0); + assert_eq!(file.seek_data(0x10001).unwrap(), None); + assert_eq!(seek_cur(&mut file), 0); + + // Write some data to [0x10000, 0x20000) + let b = [0x55u8; 0x10000]; + file.seek(SeekFrom::Start(0x10000)).unwrap(); + file.write_all(&b).unwrap(); + assert_eq!(file.seek_data(0).unwrap(), Some(0x10000)); + assert_eq!(seek_cur(&mut file), 0x10000); + + // seek_data within data should return the same offset + assert_eq!(file.seek_data(0x10000).unwrap(), Some(0x10000)); + assert_eq!(seek_cur(&mut file), 0x10000); + assert_eq!(file.seek_data(0x10001).unwrap(), Some(0x10001)); + assert_eq!(seek_cur(&mut file), 0x10001); + assert_eq!(file.seek_data(0x1FFFF).unwrap(), Some(0x1FFFF)); + assert_eq!(seek_cur(&mut file), 0x1FFFF); + + assert_eq!(file.seek_data(0).unwrap(), Some(0x10000)); + assert_eq!(seek_cur(&mut file), 0x10000); + assert_eq!(file.seek_data(0x1FFFF).unwrap(), Some(0x1FFFF)); + assert_eq!(seek_cur(&mut file), 0x1FFFF); + assert_eq!(file.seek_data(0x20000).unwrap(), None); + assert_eq!(seek_cur(&mut file), 0x1FFFF); + }); + } + + #[test] + fn seek_hole() { + with_default_file(0x30000, |mut file| { + // File consisting entirely of a hole + assert_eq!(file.seek_hole(0).unwrap(), Some(0)); + assert_eq!(seek_cur(&mut file), 0); + assert_eq!(file.seek_hole(0xFFFF).unwrap(), Some(0xFFFF)); + assert_eq!(seek_cur(&mut file), 0xFFFF); + + // seek_hole at or after the end of the file should return None + file.seek(SeekFrom::Start(0)).unwrap(); + assert_eq!(file.seek_hole(0x30000).unwrap(), None); + assert_eq!(seek_cur(&mut file), 0); + assert_eq!(file.seek_hole(0x30001).unwrap(), None); + assert_eq!(seek_cur(&mut file), 0); + + // Write some data to [0x10000, 0x20000) + let b = [0x55u8; 0x10000]; + file.seek(SeekFrom::Start(0x10000)).unwrap(); + file.write_all(&b).unwrap(); + + // seek_hole within a hole should return the same offset + assert_eq!(file.seek_hole(0).unwrap(), Some(0)); + assert_eq!(seek_cur(&mut file), 0); + assert_eq!(file.seek_hole(0xFFFF).unwrap(), Some(0xFFFF)); + assert_eq!(seek_cur(&mut file), 0xFFFF); + + // seek_hole within data should return the next hole + file.seek(SeekFrom::Start(0)).unwrap(); + assert_eq!(file.seek_hole(0x10000).unwrap(), Some(0x20000)); + assert_eq!(seek_cur(&mut file), 0x20000); + file.seek(SeekFrom::Start(0)).unwrap(); + assert_eq!(file.seek_hole(0x10001).unwrap(), Some(0x20000)); + assert_eq!(seek_cur(&mut file), 0x20000); + file.seek(SeekFrom::Start(0)).unwrap(); + assert_eq!(file.seek_hole(0x1FFFF).unwrap(), Some(0x20000)); + assert_eq!(seek_cur(&mut file), 0x20000); + file.seek(SeekFrom::Start(0)).unwrap(); + assert_eq!(file.seek_hole(0xFFFF).unwrap(), Some(0xFFFF)); + assert_eq!(seek_cur(&mut file), 0xFFFF); + file.seek(SeekFrom::Start(0)).unwrap(); + assert_eq!(file.seek_hole(0x10000).unwrap(), Some(0x20000)); + assert_eq!(seek_cur(&mut file), 0x20000); + file.seek(SeekFrom::Start(0)).unwrap(); + assert_eq!(file.seek_hole(0x1FFFF).unwrap(), Some(0x20000)); + assert_eq!(seek_cur(&mut file), 0x20000); + file.seek(SeekFrom::Start(0)).unwrap(); + assert_eq!(file.seek_hole(0x20000).unwrap(), Some(0x20000)); + assert_eq!(seek_cur(&mut file), 0x20000); + file.seek(SeekFrom::Start(0)).unwrap(); + assert_eq!(file.seek_hole(0x20001).unwrap(), Some(0x20001)); + assert_eq!(seek_cur(&mut file), 0x20001); + + // seek_hole at EOF should return None + file.seek(SeekFrom::Start(0)).unwrap(); + assert_eq!(file.seek_hole(0x30000).unwrap(), None); + assert_eq!(seek_cur(&mut file), 0); + + // Write some data to [0x20000, 0x30000) + file.seek(SeekFrom::Start(0x20000)).unwrap(); + file.write_all(&b).unwrap(); + + // seek_hole within [0x20000, 0x30000) should now find the hole at EOF + assert_eq!(file.seek_hole(0x20000).unwrap(), Some(0x30000)); + assert_eq!(seek_cur(&mut file), 0x30000); + file.seek(SeekFrom::Start(0)).unwrap(); + assert_eq!(file.seek_hole(0x20001).unwrap(), Some(0x30000)); + assert_eq!(seek_cur(&mut file), 0x30000); + file.seek(SeekFrom::Start(0)).unwrap(); + assert_eq!(file.seek_hole(0x30000).unwrap(), None); + assert_eq!(seek_cur(&mut file), 0); + }); + } + + #[test] + fn rebuild_refcounts() { + with_basic_file(&valid_header(), |mut disk_file: File| { + let header = QcowHeader::new(&mut disk_file).expect("Failed to create Header."); + let cluster_size = 65536; + let mut raw_file = + QcowRawFile::from(disk_file, cluster_size).expect("Failed to create QcowRawFile."); + QcowFile::rebuild_refcounts(&mut raw_file, header) + .expect("Failed to rebuild recounts."); + }); + } +} diff --git a/qcow/src/qcow_raw_file.rs b/qcow/src/qcow_raw_file.rs new file mode 100644 index 000000000..456b9869e --- /dev/null +++ b/qcow/src/qcow_raw_file.rs @@ -0,0 +1,136 @@ +// Copyright 2018 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +use std::fs::File; +use std::io::{self, BufWriter, Seek, SeekFrom}; +use std::mem::size_of; + +use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; + +/// A qcow file. Allows reading/writing clusters and appending clusters. +#[derive(Debug)] +pub struct QcowRawFile { + file: File, + cluster_size: u64, + cluster_mask: u64, +} + +impl QcowRawFile { + /// Creates a `QcowRawFile` from the given `File`, `None` is returned if `cluster_size` is not + /// a power of two. + pub fn from(file: File, cluster_size: u64) -> Option { + if cluster_size.count_ones() != 1 { + return None; + } + Some(QcowRawFile { + file, + cluster_size, + cluster_mask: cluster_size - 1, + }) + } + + /// Reads `count` 64 bit offsets and returns them as a vector. + /// `mask` optionally ands out some of the bits on the file. + pub fn read_pointer_table( + &mut self, + offset: u64, + count: u64, + mask: Option, + ) -> io::Result> { + let mut table = vec![0; count as usize]; + self.file.seek(SeekFrom::Start(offset))?; + self.file.read_u64_into::(&mut table)?; + if let Some(m) = mask { + for ptr in &mut table { + *ptr &= m; + } + } + Ok(table) + } + + /// Reads a cluster's worth of 64 bit offsets and returns them as a vector. + /// `mask` optionally ands out some of the bits on the file. + pub fn read_pointer_cluster(&mut self, offset: u64, mask: Option) -> io::Result> { + let count = self.cluster_size / size_of::() as u64; + self.read_pointer_table(offset, count, mask) + } + + /// Writes `table` of u64 pointers to `offset` in the file. + /// `non_zero_flags` will be ORed with all non-zero values in `table`. + /// writing. + pub fn write_pointer_table( + &mut self, + offset: u64, + table: &[u64], + non_zero_flags: u64, + ) -> io::Result<()> { + self.file.seek(SeekFrom::Start(offset))?; + let mut buffer = BufWriter::with_capacity(table.len() * size_of::(), &self.file); + for addr in table { + let val = if *addr == 0 { + 0 + } else { + *addr | non_zero_flags + }; + buffer.write_u64::(val)?; + } + Ok(()) + } + + /// Read a refcount block from the file and returns a Vec containing the block. + /// Always returns a cluster's worth of data. + pub fn read_refcount_block(&mut self, offset: u64) -> io::Result> { + let count = self.cluster_size / size_of::() as u64; + let mut table = vec![0; count as usize]; + self.file.seek(SeekFrom::Start(offset))?; + self.file.read_u16_into::(&mut table)?; + Ok(table) + } + + /// Writes a refcount block to the file. + pub fn write_refcount_block(&mut self, offset: u64, table: &[u16]) -> io::Result<()> { + self.file.seek(SeekFrom::Start(offset))?; + let mut buffer = BufWriter::with_capacity(table.len() * size_of::(), &self.file); + for count in table { + buffer.write_u16::(*count)?; + } + Ok(()) + } + + /// Allocates a new cluster at the end of the current file, return the address. + pub fn add_cluster_end(&mut self, max_valid_cluster_offset: u64) -> io::Result> { + // Determine where the new end of the file should be and set_len, which + // translates to truncate(2). + let file_end: u64 = self.file.seek(SeekFrom::End(0))?; + let new_cluster_address: u64 = (file_end + self.cluster_size - 1) & !self.cluster_mask; + + if new_cluster_address > max_valid_cluster_offset { + return Ok(None); + } + + self.file.set_len(new_cluster_address + self.cluster_size)?; + + Ok(Some(new_cluster_address)) + } + + /// Returns a reference to the underlying file. + pub fn file(&self) -> &File { + &self.file + } + + /// Returns a mutable reference to the underlying file. + pub fn file_mut(&mut self) -> &mut File { + &mut self.file + } + + /// Returns the size of the file's clusters. + pub fn cluster_size(&self) -> u64 { + self.cluster_size + } + + /// Returns the offset of `address` within a cluster. + pub fn cluster_offset(&self, address: u64) -> u64 { + address & self.cluster_mask + } +} diff --git a/qcow/src/refcount.rs b/qcow/src/refcount.rs new file mode 100644 index 000000000..3b87e91fa --- /dev/null +++ b/qcow/src/refcount.rs @@ -0,0 +1,253 @@ +// Copyright 2018 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +use std; +use std::fmt::{self, Display}; +use std::io; + +use libc::EINVAL; + +use crate::qcow_raw_file::QcowRawFile; +use crate::vec_cache::{CacheMap, Cacheable, VecCache}; + +#[derive(Debug)] +pub enum Error { + /// `EvictingCache` - Error writing a refblock from the cache to disk. + EvictingRefCounts(io::Error), + /// `InvalidIndex` - Address requested isn't within the range of the disk. + InvalidIndex, + /// `NeedCluster` - Handle this error by reading the cluster and calling the function again. + NeedCluster(u64), + /// `NeedNewCluster` - Handle this error by allocating a cluster and calling the function again. + NeedNewCluster, + /// `ReadingRefCounts` - Error reading the file in to the refcount cache. + ReadingRefCounts(io::Error), +} + +pub type Result = std::result::Result; + +impl Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use self::Error::*; + + match self { + EvictingRefCounts(e) => write!( + f, + "failed to write a refblock from the cache to disk: {}", + e + ), + InvalidIndex => write!(f, "address requested is not within the range of the disk"), + NeedCluster(addr) => write!(f, "cluster with addr={} needs to be read", addr), + NeedNewCluster => write!(f, "new cluster needs to be allocated for refcounts"), + ReadingRefCounts(e) => { + write!(f, "failed to read the file into the refcount cache: {}", e) + } + } + } +} + +/// Represents the refcount entries for an open qcow file. +#[derive(Debug)] +pub struct RefCount { + ref_table: VecCache, + refcount_table_offset: u64, + refblock_cache: CacheMap>, + refcount_block_entries: u64, // number of refcounts in a cluster. + cluster_size: u64, + max_valid_cluster_offset: u64, +} + +impl RefCount { + /// Creates a `RefCount` from `file`, reading the refcount table from `refcount_table_offset`. + /// `refcount_table_entries` specifies the number of refcount blocks used by this image. + /// `refcount_block_entries` indicates the number of refcounts in each refcount block. + /// Each refcount table entry points to a refcount block. + pub fn new( + raw_file: &mut QcowRawFile, + refcount_table_offset: u64, + refcount_table_entries: u64, + refcount_block_entries: u64, + cluster_size: u64, + ) -> io::Result { + let ref_table = VecCache::from_vec(raw_file.read_pointer_table( + refcount_table_offset, + refcount_table_entries, + None, + )?); + let max_valid_cluster_index = (ref_table.len() as u64) * refcount_block_entries - 1; + let max_valid_cluster_offset = max_valid_cluster_index * cluster_size; + Ok(RefCount { + ref_table, + refcount_table_offset, + refblock_cache: CacheMap::new(50), + refcount_block_entries, + cluster_size, + max_valid_cluster_offset, + }) + } + + /// Returns the number of refcounts per block. + pub fn refcounts_per_block(&self) -> u64 { + self.refcount_block_entries + } + + /// Returns the maximum valid cluster offset in the raw file for this refcount table. + pub fn max_valid_cluster_offset(&self) -> u64 { + self.max_valid_cluster_offset + } + + /// Returns `NeedNewCluster` if a new cluster needs to be allocated for refcounts. If an + /// existing cluster needs to be read, `NeedCluster(addr)` is returned. The Caller should + /// allocate a cluster or read the required one and call this function again with the cluster. + /// On success, an optional address of a dropped cluster is returned. The dropped cluster can + /// be reused for other purposes. + pub fn set_cluster_refcount( + &mut self, + raw_file: &mut QcowRawFile, + cluster_address: u64, + refcount: u16, + mut new_cluster: Option<(u64, VecCache)>, + ) -> Result> { + let (table_index, block_index) = self.get_refcount_index(cluster_address); + + let block_addr_disk = *self.ref_table.get(table_index).ok_or(Error::InvalidIndex)?; + + // Fill the cache if this block isn't yet there. + if !self.refblock_cache.contains_key(table_index) { + // Need a new cluster + if let Some((addr, table)) = new_cluster.take() { + self.ref_table[table_index] = addr; + let ref_table = &self.ref_table; + self.refblock_cache + .insert(table_index, table, |index, evicted| { + raw_file.write_refcount_block(ref_table[index], evicted.get_values()) + }) + .map_err(Error::EvictingRefCounts)?; + } else { + if block_addr_disk == 0 { + return Err(Error::NeedNewCluster); + } + return Err(Error::NeedCluster(block_addr_disk)); + } + } + + // Unwrap is safe here as the entry was filled directly above. + let dropped_cluster = if !self.refblock_cache.get(table_index).unwrap().dirty() { + // Free the previously used block and use a new one. Writing modified counts to new + // blocks keeps the on-disk state consistent even if it's out of date. + if let Some((addr, _)) = new_cluster.take() { + self.ref_table[table_index] = addr; + Some(block_addr_disk) + } else { + return Err(Error::NeedNewCluster); + } + } else { + None + }; + + self.refblock_cache.get_mut(table_index).unwrap()[block_index] = refcount; + Ok(dropped_cluster) + } + + /// Flush the dirty refcount blocks. This must be done before flushing the table that points to + /// the blocks. + pub fn flush_blocks(&mut self, raw_file: &mut QcowRawFile) -> io::Result<()> { + // Write out all dirty L2 tables. + for (table_index, block) in self.refblock_cache.iter_mut().filter(|(_k, v)| v.dirty()) { + let addr = self.ref_table[*table_index]; + if addr != 0 { + raw_file.write_refcount_block(addr, block.get_values())?; + } else { + return Err(std::io::Error::from_raw_os_error(EINVAL)); + } + block.mark_clean(); + } + Ok(()) + } + + /// Flush the refcount table that keeps the address of the refcounts blocks. + /// Returns true if the table changed since the previous `flush_table()` call. + pub fn flush_table(&mut self, raw_file: &mut QcowRawFile) -> io::Result { + if self.ref_table.dirty() { + raw_file.write_pointer_table( + self.refcount_table_offset, + &self.ref_table.get_values(), + 0, + )?; + self.ref_table.mark_clean(); + Ok(true) + } else { + Ok(false) + } + } + + /// Gets the refcount for a cluster with the given address. + pub fn get_cluster_refcount( + &mut self, + raw_file: &mut QcowRawFile, + address: u64, + ) -> Result { + let (table_index, block_index) = self.get_refcount_index(address); + let block_addr_disk = *self.ref_table.get(table_index).ok_or(Error::InvalidIndex)?; + if block_addr_disk == 0 { + return Ok(0); + } + if !self.refblock_cache.contains_key(table_index) { + let table = VecCache::from_vec( + raw_file + .read_refcount_block(block_addr_disk) + .map_err(Error::ReadingRefCounts)?, + ); + let ref_table = &self.ref_table; + self.refblock_cache + .insert(table_index, table, |index, evicted| { + raw_file.write_refcount_block(ref_table[index], evicted.get_values()) + }) + .map_err(Error::EvictingRefCounts)?; + } + Ok(self.refblock_cache.get(table_index).unwrap()[block_index]) + } + + /// Returns the refcount table for this file. This is only useful for debugging. + pub fn ref_table(&self) -> &[u64] { + &self.ref_table.get_values() + } + + /// Returns the refcounts stored in the given block. + pub fn refcount_block( + &mut self, + raw_file: &mut QcowRawFile, + table_index: usize, + ) -> Result> { + let block_addr_disk = *self.ref_table.get(table_index).ok_or(Error::InvalidIndex)?; + if block_addr_disk == 0 { + return Ok(None); + } + if !self.refblock_cache.contains_key(table_index) { + let table = VecCache::from_vec( + raw_file + .read_refcount_block(block_addr_disk) + .map_err(Error::ReadingRefCounts)?, + ); + // TODO(dgreid) - closure needs to return an error. + let ref_table = &self.ref_table; + self.refblock_cache + .insert(table_index, table, |index, evicted| { + raw_file.write_refcount_block(ref_table[index], evicted.get_values()) + }) + .map_err(Error::EvictingRefCounts)?; + } + // The index must exist as it was just inserted if it didn't already. + Ok(Some( + self.refblock_cache.get(table_index).unwrap().get_values(), + )) + } + + // Gets the address of the refcount block and the index into the block for the given address. + fn get_refcount_index(&self, address: u64) -> (usize, usize) { + let block_index = (address / self.cluster_size) % self.refcount_block_entries; + let refcount_table_index = (address / self.cluster_size) / self.refcount_block_entries; + (refcount_table_index as usize, block_index as usize) + } +} diff --git a/qcow/src/vec_cache.rs b/qcow/src/vec_cache.rs new file mode 100644 index 000000000..7d8f9cece --- /dev/null +++ b/qcow/src/vec_cache.rs @@ -0,0 +1,185 @@ +// Copyright 2018 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +use std::collections::hash_map::IterMut; +use std::collections::HashMap; +use std::io; +use std::ops::{Index, IndexMut}; +use std::slice::SliceIndex; + +/// Trait that allows for checking if an implementor is dirty. Useful for types that are cached so +/// it can be checked if they need to be committed to disk. +pub trait Cacheable { + /// Used to check if the item needs to be written out or if it can be discarded. + fn dirty(&self) -> bool; +} + +#[derive(Debug)] +/// Represents a vector that implements the `Cacheable` trait so it can be held in a cache. +pub struct VecCache { + vec: Box<[T]>, + dirty: bool, +} + +impl VecCache { + /// Creates a `VecCache` that can hold `count` elements. + pub fn new(count: usize) -> VecCache { + VecCache { + vec: vec![Default::default(); count].into_boxed_slice(), + dirty: true, + } + } + + /// Creates a `VecCache` from the passed in `vec`. + pub fn from_vec(vec: Vec) -> VecCache { + VecCache { + vec: vec.into_boxed_slice(), + dirty: false, + } + } + + pub fn get(&self, index: I) -> Option<&>::Output> + where + I: SliceIndex<[T]>, + { + self.vec.get(index) + } + + /// Gets a reference to the underlying vector. + pub fn get_values(&self) -> &[T] { + &self.vec + } + + /// Mark this cache element as clean. + pub fn mark_clean(&mut self) { + self.dirty = false; + } + + /// Returns the number of elements in the vector. + pub fn len(&self) -> usize { + self.vec.len() + } +} + +impl Cacheable for VecCache { + fn dirty(&self) -> bool { + self.dirty + } +} + +impl Index for VecCache { + type Output = T; + + fn index(&self, index: usize) -> &T { + self.vec.index(index) + } +} + +impl IndexMut for VecCache { + fn index_mut(&mut self, index: usize) -> &mut T { + self.dirty = true; + self.vec.index_mut(index) + } +} + +#[derive(Debug)] +pub struct CacheMap { + capacity: usize, + map: HashMap, +} + +impl CacheMap { + pub fn new(capacity: usize) -> Self { + CacheMap { + capacity, + map: HashMap::with_capacity(capacity), + } + } + + pub fn contains_key(&self, key: usize) -> bool { + self.map.contains_key(&key) + } + + pub fn get(&self, index: usize) -> Option<&T> { + self.map.get(&index) + } + + pub fn get_mut(&mut self, index: usize) -> Option<&mut T> { + self.map.get_mut(&index) + } + + pub fn iter_mut(&mut self) -> IterMut { + self.map.iter_mut() + } + + // Check if the refblock cache is full and we need to evict. + pub fn insert(&mut self, index: usize, block: T, write_callback: F) -> io::Result<()> + where + F: FnOnce(usize, T) -> io::Result<()>, + { + if self.map.len() == self.capacity { + // TODO(dgreid) - smarter eviction strategy. + let to_evict = *self.map.iter().nth(0).unwrap().0; + if let Some(evicted) = self.map.remove(&to_evict) { + if evicted.dirty() { + write_callback(to_evict, evicted)?; + } + } + } + self.map.insert(index, block); + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + struct NumCache(pub u64); + impl Cacheable for NumCache { + fn dirty(&self) -> bool { + true + } + } + + #[test] + fn evicts_when_full() { + let mut cache = CacheMap::::new(3); + let mut evicted = None; + cache + .insert(0, NumCache(5), |index, _| { + evicted = Some(index); + Ok(()) + }) + .unwrap(); + assert_eq!(evicted, None); + cache + .insert(1, NumCache(6), |index, _| { + evicted = Some(index); + Ok(()) + }) + .unwrap(); + assert_eq!(evicted, None); + cache + .insert(2, NumCache(7), |index, _| { + evicted = Some(index); + Ok(()) + }) + .unwrap(); + assert_eq!(evicted, None); + cache + .insert(3, NumCache(8), |index, _| { + evicted = Some(index); + Ok(()) + }) + .unwrap(); + assert!(evicted.is_some()); + + // Check that three of the four items inserted are still there and that the most recently + // inserted is one of them. + let num_items = (0..=3).filter(|k| cache.contains_key(&k)).count(); + assert_eq!(num_items, 3); + assert!(cache.contains_key(&3)); + } +} diff --git a/vmm/Cargo.toml b/vmm/Cargo.toml old mode 100644 new mode 100755 index 9c488c63c..3782dfd02 --- a/vmm/Cargo.toml +++ b/vmm/Cargo.toml @@ -12,6 +12,7 @@ kvm-bindings = "0.1" kvm-ioctls = { git = "https://github.com/rust-vmm/kvm-ioctls" } libc = ">=0.2.39" pci = {path = "../pci"} +qcow = { path = "../qcow" } linux-loader = { git = "https://github.com/sameo/linux-loader" } vmm-sys-util = { git = "https://github.com/sameo/vmm-sys-util" }