diff --git a/Cargo.lock b/Cargo.lock index 6e5b6858..2c95bb07 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -687,6 +687,27 @@ dependencies = [ "libc", ] +[[package]] +name = "num_enum" +version = "0.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f646caf906c20226733ed5b1374287eb97e3c2a5c227ce668c1f2ce20ae57c9" +dependencies = [ + "num_enum_derive", +] + +[[package]] +name = "num_enum_derive" +version = "0.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcbff9bc912032c62bf65ef1d5aea88983b420f4f839db1e9b0c281a25c9c799" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "once_cell" version = "1.18.0" @@ -800,6 +821,16 @@ version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" +[[package]] +name = "proc-macro-crate" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f4c021e1093a56626774e81216a4ce732a735e5bad4868a03f3ed65ca0c3919" +dependencies = [ + "once_cell", + "toml_edit", +] + [[package]] name = "proc-macro2" version = "1.0.58" @@ -1116,6 +1147,23 @@ dependencies = [ "serde", ] +[[package]] +name = "toml_datetime" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a76a9312f5ba4c2dec6b9161fdf25d87ad8a09256ccea5a556fef03c706a10f" + +[[package]] +name = "toml_edit" +version = "0.19.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2380d56e8670370eee6566b0bfd4265f65b3f432e8c6d85623f728d4fa31f739" +dependencies = [ + "indexmap", + "toml_datetime", + "winnow", +] + [[package]] name = "typenum" version = "1.16.0" @@ -1176,7 +1224,7 @@ dependencies = [ "thiserror", "vhost", "vhost-user-backend", - "virtio-bindings", + "virtio-bindings 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "virtio-queue", "vm-memory", "vmm-sys-util", @@ -1193,7 +1241,7 @@ dependencies = [ "thiserror", "vhost", "vhost-user-backend", - "virtio-bindings", + "virtio-bindings 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "virtio-queue", "vm-memory", "vmm-sys-util", @@ -1213,7 +1261,26 @@ dependencies = [ "thiserror", "vhost", "vhost-user-backend", - "virtio-bindings", + "virtio-bindings 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "virtio-queue", + "vm-memory", + "vmm-sys-util", +] + +[[package]] +name = "vhost-device-scsi" +version = "0.1.0" +dependencies = [ + "clap", + "env_logger", + "epoll", + "log", + "num_enum", + "tempfile", + "thiserror", + "vhost", + "vhost-user-backend", + "virtio-bindings 0.2.0 (git+https://github.com/rust-vmm/vm-virtio?rev=467c8ec99375a5f4e08b85b18257cd7e0bac1dc0)", "virtio-queue", "vm-memory", "vmm-sys-util", @@ -1228,7 +1295,7 @@ dependencies = [ "libc", "log", "vhost", - "virtio-bindings", + "virtio-bindings 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "virtio-queue", "vm-memory", "vmm-sys-util", @@ -1251,7 +1318,7 @@ dependencies = [ "thiserror", "vhost", "vhost-user-backend", - "virtio-bindings", + "virtio-bindings 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "virtio-queue", "virtio-vsock", "vm-memory", @@ -1264,6 +1331,11 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b9084faf91b9aa9676ae2cac8f1432df2839d9566e6f19f29dbc13a8b831dff" +[[package]] +name = "virtio-bindings" +version = "0.2.0" +source = "git+https://github.com/rust-vmm/vm-virtio?rev=467c8ec99375a5f4e08b85b18257cd7e0bac1dc0#467c8ec99375a5f4e08b85b18257cd7e0bac1dc0" + [[package]] name = "virtio-queue" version = "0.8.0" @@ -1271,7 +1343,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "91aebb1df33db33cbf04d4c2445e4f78d0b0c8e65acfd16a4ee95ef63ca252f8" dependencies = [ "log", - "virtio-bindings", + "virtio-bindings 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "vm-memory", "vmm-sys-util", ] @@ -1282,7 +1354,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb198c4dd87bf0b4f6b5d8cb41284fca13763a5a1a7e5b8a7ccce45e46d4cf73" dependencies = [ - "virtio-bindings", + "virtio-bindings 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "virtio-queue", "vm-memory", ] @@ -1488,6 +1560,15 @@ version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" +[[package]] +name = "winnow" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61de7bac303dc551fe038e2b3cef0f571087a47571ea6e79a87692ac99b99699" +dependencies = [ + "memchr", +] + [[package]] name = "yaml-rust" version = "0.4.5" diff --git a/Cargo.toml b/Cargo.toml index 4d1a538c..633fa4d3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,5 +4,6 @@ members = [ "crates/gpio", "crates/i2c", "crates/rng", + "crates/scsi", "crates/vsock", ] diff --git a/coverage_config_x86_64.json b/coverage_config_x86_64.json index e3d77522..9cf6dcc8 100644 --- a/coverage_config_x86_64.json +++ b/coverage_config_x86_64.json @@ -1,5 +1,5 @@ { - "coverage_score": 67.6, + "coverage_score": 69.6, "exclude_path": "", "crate_features": "" } diff --git a/crates/scsi/ARCHITECTURE.md b/crates/scsi/ARCHITECTURE.md new file mode 100644 index 00000000..2a2f7fae --- /dev/null +++ b/crates/scsi/ARCHITECTURE.md @@ -0,0 +1,39 @@ +# vhost-user-scsi architecture + +Rough outline of the different pieces and how they fit together: + +## `scsi/mod.rs` + +This defines the `Target` trait, which represents a SCSI target. The code in +this file is independent from: + +- A particular SCSI implementation: Currently, we have one implementation of + `Target`, which emulates the SCSI commands itself; but future implementations + could provide pass-through to an iSCSI target or SCSI devices attached to the + host. +- A particular SCSI transport: Nothing in `src/scsi/*` knows anything about + virtio; this is helpful for maintainability, and also allows our SCSI + emulation code to be reusable as, for example, an iSCSI target. To this end, + the `Target` trait is generic over a `Read` and `Write` that it uses for SCSI + data transfer. This makes testing easy: we can just provide a `Vec` to + write into. + +## `scsi/emulation/*.rs` + +This is the SCSI emulation code, which forms the bulk of the crate. It provides +`EmulatedTarget`, an implementation of `Target`. `EmulatedTarget`, in turn, +looks at the LUN and delegates commands to an implementation of `LogicalUnit`. +In most cases, this will be `BlockDevice`; there's also `MissingLun`, which is +used for responding to commands to invalid LUNs. + +Currently, there is no separation between commands defined in the SPC standard +(commands shared by all device types) and the SBC standard (block-device +specific commands). If we ever implemented another device type (CD/DVD seems +most likely), we'd want to separate those out. + +As noted above, the emulation code knows nothing about virtio. + +## `src/{main,virtio}.rs` + +This code handles vhost-user, virtio, and virtio-scsi; it's the only part of +the crate that knows about these protocols. diff --git a/crates/scsi/CHANGELOG.md b/crates/scsi/CHANGELOG.md new file mode 100644 index 00000000..d471959d --- /dev/null +++ b/crates/scsi/CHANGELOG.md @@ -0,0 +1,3 @@ +# Upcoming Release + +- First initial daemon implementation. diff --git a/crates/scsi/Cargo.toml b/crates/scsi/Cargo.toml new file mode 100644 index 00000000..682c9cad --- /dev/null +++ b/crates/scsi/Cargo.toml @@ -0,0 +1,31 @@ +[package] +name = "vhost-device-scsi" +version = "0.1.0" +authors = ["Gaelan Steele ", "Erik Schilling "] +description = "vhost scsi backend device" +repository = "https://github.com/rust-vmm/vhost-device" +readme = "README.md" +keywords = ["scsi", "vhost", "virt", "backend"] +license = "Apache-2.0 OR BSD-3-Clause" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +clap = { version = "4.2", features = ["derive"] } +env_logger = "0.10" +epoll = "4.3" +log = "0.4" +num_enum = "0.5" +thiserror = "1.0" +vhost = { version = "0.7", features = ["vhost-user-slave"] } +vhost-user-backend = "0.9" +# until the scsi bindings hit a release, we have to use the commit that adds them as rev. +virtio-bindings = { git = "https://github.com/rust-vmm/vm-virtio", rev = "467c8ec99375a5f4e08b85b18257cd7e0bac1dc0" } +virtio-queue = "0.8" +vm-memory = "0.11" +vmm-sys-util = "0.11" + +[dev-dependencies] +tempfile = "3.2.0" + diff --git a/crates/scsi/README.md b/crates/scsi/README.md new file mode 100644 index 00000000..46779df1 --- /dev/null +++ b/crates/scsi/README.md @@ -0,0 +1,48 @@ +# vhost-user-scsi + +This is a Rust implementation of a vhost-user-scsi daemon. + +## Usage + +Run the vhost-user-scsi daemon: + +``` +vhost-user-scsi -r --socket-path /tmp/vhost-user-scsi.sock /path/to/image.raw /path/to/second-image.raw ... +``` + +Run QEMU: + +``` +qemu-system-x86_64 ... \ + -device vhost-user-scsi-pci,num_queues=1,param_change=off,chardev=vus \ + -chardev socket,id=vus,path=/tmp/vhost-user-scsi.sock \ + # must match total guest meory + -object memory-backend-memfd,id=mem,size=384M,share=on \ + -numa node,memdev=mem +``` + +## Limitations + +We are currently only supporting a single request queue and do not support +dynamic reconfiguration of LUN parameters (VIRTIO_SCSI_F_CHANGE). + +## Features + +This crate is a work-in-progress. Currently, it's possible to mount and read +up to 256 read-only raw disk images. Some features we might like to add +at some point, roughly ordered from sooner to later: + +- Write support. This should just be a matter of implementing the WRITE + command, but there's a bit of complexity around writeback caching we + need to make sure we get right. +- Support more LUNs. virtio-scsi supports up to 16384 LUNs per target. + After 256, the LUN encoding format is different; it's nothing too + complicated, but I haven't gotten around to implementing it. +- Concurrency. Currently, we process SCSI commands one at a time. Eventually, + it'd be a good idea to use threads or some fancy async/io_uring stuff to + concurrently handle multiple commands. virtio-scsi also allows for multiple + request queues, allowing the guest to submit requests from multiple cores + in parallel; we should support that. +- iSCSI passthrough. This shouldn't be too bad, but it might be a good idea + to decide on a concurrency model (threads or async) before we spend too much + time here. diff --git a/crates/scsi/src/main.rs b/crates/scsi/src/main.rs new file mode 100644 index 00000000..9e7813f1 --- /dev/null +++ b/crates/scsi/src/main.rs @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause + +mod scsi; +mod vhu_scsi; +mod virtio; + +use std::{ + fs::File, + path::PathBuf, + sync::{Arc, RwLock}, +}; + +use clap::Parser; +use log::{error, info, warn}; +use thiserror::Error as ThisError; +use vhost::vhost_user::{self, Listener}; +use vhost_user_backend::VhostUserDaemon; +use vm_memory::{GuestMemoryAtomic, GuestMemoryMmap}; + +use crate::scsi::emulation::{ + block_device::{BlockDevice, FileBackend, MediumRotationRate}, + target::EmulatedTarget, +}; +use crate::vhu_scsi::VhostUserScsiBackend; + +#[derive(Debug, ThisError)] +enum Error { + #[error("More than 256 LUNs aren't currently supported")] + TooManyLUNs, + #[error("Failed creating listener: {0}")] + FailedCreatingListener(vhost_user::Error), +} + +type Result = std::result::Result; + +#[derive(Parser)] +struct ScsiArgs { + /// Make the images read-only. + /// + /// Currently, we don't actually support writes, but sometimes we want to + /// pretend the disk is writable to work around issues with some tools that + /// use the Linux SCSI generic API. + #[arg(long = "read-only", short = 'r')] + read_only: bool, + /// Tell the guest this disk is non-rotational. + /// + /// Affects some heuristics in Linux around, for example, scheduling. + #[arg(long = "solid-state")] + solid_state: bool, + /// Location of vhost-user socket. + #[clap(short, long)] + socket_path: PathBuf, + /// Images against which the SCSI actions are emulated. + images: Vec, +} + +fn create_backend(args: &ScsiArgs) -> Result { + let mut backend = VhostUserScsiBackend::new(); + let mut target = EmulatedTarget::new(); + + if args.images.len() > 256 { + // This is fairly simple to add; it's just a matter of supporting the right LUN + // encoding formats. + error!("Currently only up to 256 targets are supported"); + return Err(Error::TooManyLUNs); + } + + if !args.read_only { + warn!("Currently, only read-only images are supported. Unless you know what you're doing, you want to pass -r"); + } + + for image in &args.images { + let mut dev = BlockDevice::new(FileBackend::new( + File::options() + .read(true) + .write(true) + .open(image) + .expect("Opening image"), + )); + dev.set_write_protected(args.read_only); + dev.set_solid_state(if args.solid_state { + MediumRotationRate::NonRotating + } else { + MediumRotationRate::Unreported + }); + target.add_lun(Box::new(dev)); + } + + backend.add_target(Box::new(target)); + Ok(backend) +} + +fn start_backend(backend: VhostUserScsiBackend, args: ScsiArgs) -> Result<()> { + let backend = Arc::new(RwLock::new(backend)); + let mut daemon = VhostUserDaemon::new( + "vhost-user-scsi".into(), + Arc::clone(&backend), + GuestMemoryAtomic::new(GuestMemoryMmap::new()), + ) + .expect("Creating daemon"); + + daemon + .start(Listener::new(args.socket_path, true).map_err(Error::FailedCreatingListener)?) + .expect("Starting daemon"); + + match daemon.wait() { + Ok(()) => { + info!("Stopping cleanly."); + } + Err(vhost_user_backend::Error::HandleRequest(vhost_user::Error::PartialMessage)) => { + info!("vhost-user connection closed with partial message. If the VM is shutting down, this is expected behavior; otherwise, it might be a bug."); + } + Err(e) => { + warn!("Error running daemon: {:?}", e); + } + } + + // No matter the result, we need to shut down the worker thread. + // unwrap will only panic if we already panicked somewhere else + backend + .read() + .unwrap() + .exit_event + .write(1) + .expect("Shutting down worker thread"); + Ok(()) +} + +fn main() -> Result<()> { + env_logger::init(); + let args = ScsiArgs::parse(); + let backend = create_backend(&args)?; + start_backend(backend, args) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_create_backend() { + let sock = tempfile::NamedTempFile::new().unwrap(); + let args = ScsiArgs { + images: vec!["/dev/null".into()], + read_only: true, + socket_path: sock.path().into(), + solid_state: false, + }; + create_backend(&args).unwrap(); + } + + #[test] + fn test_fail_listener() { + let socket_name = "~/path/not/present/scsi"; + let args = ScsiArgs { + images: vec!["/dev/null".into()], + read_only: true, + socket_path: socket_name.into(), + solid_state: false, + }; + let backend = create_backend(&args).unwrap(); + let err = start_backend(backend, args).unwrap_err(); + if let Error::FailedCreatingListener(_) = err { + } else { + panic!("expected failure when creating listener"); + } + } +} diff --git a/crates/scsi/src/scsi/emulation/block_device.rs b/crates/scsi/src/scsi/emulation/block_device.rs new file mode 100644 index 00000000..7ac4884a --- /dev/null +++ b/crates/scsi/src/scsi/emulation/block_device.rs @@ -0,0 +1,778 @@ +// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause + +use std::{ + convert::{TryFrom, TryInto}, + fs::File, + io::{self, Read, Write}, + num::{NonZeroU32, NonZeroU64, TryFromIntError}, + ops::{Add, Div, Mul, Sub}, + os::unix::prelude::*, +}; + +use log::{debug, error, warn}; + +use super::{ + command::{ + parse_opcode, CommandType, LunSpecificCommand, ModePageSelection, ModeSensePageControl, + ParseOpcodeResult, ReportSupportedOpCodesMode, SenseFormat, VpdPage, OPCODES, + }, + mode_page::ModePage, + response_data::{respond_standard_inquiry_data, SilentlyTruncate}, + target::{LogicalUnit, LunRequest}, +}; +use crate::scsi::{sense, CmdError, CmdOutput, TaskAttr}; + +pub(crate) enum MediumRotationRate { + Unreported, + NonRotating, +} + +#[derive(Clone, Copy, PartialEq, PartialOrd)] +pub(crate) struct ByteOffset(u64); +impl From for ByteOffset { + fn from(value: u64) -> Self { + ByteOffset(value) + } +} +impl From for u64 { + fn from(value: ByteOffset) -> Self { + value.0 + } +} +impl Div for ByteOffset { + type Output = BlockOffset; + + fn div(self, rhs: BlockSize) -> Self::Output { + BlockOffset(self.0 / NonZeroU64::from(rhs.0)) + } +} + +#[derive(Clone, Copy, PartialEq, PartialOrd)] +pub(crate) struct BlockSize(NonZeroU32); +impl From for u32 { + fn from(value: BlockSize) -> Self { + u32::from(value.0) + } +} +impl TryFrom for BlockSize { + type Error = TryFromIntError; + + fn try_from(value: u32) -> Result { + Ok(BlockSize(NonZeroU32::try_from(value)?)) + } +} + +#[derive(Clone, Copy, PartialEq, PartialOrd)] +pub(crate) struct BlockOffset(u64); +impl From for u64 { + fn from(value: BlockOffset) -> Self { + value.0 + } +} +impl From for BlockOffset { + fn from(value: u64) -> Self { + BlockOffset(value) + } +} +impl Add for BlockOffset { + type Output = BlockOffset; + + fn add(self, rhs: BlockOffset) -> Self::Output { + BlockOffset(self.0 + rhs.0) + } +} +impl Sub for BlockOffset { + type Output = Self; + + fn sub(self, rhs: BlockOffset) -> Self::Output { + BlockOffset(self.0 - rhs.0) + } +} +impl Mul for BlockOffset { + type Output = ByteOffset; + + fn mul(self, rhs: BlockSize) -> Self::Output { + ByteOffset(self.0 * u64::from(NonZeroU64::from(rhs.0))) + } +} + +pub(crate) trait BlockDeviceBackend: Send + Sync { + fn read_exact_at(&mut self, buf: &mut [u8], offset: ByteOffset) -> io::Result<()>; + fn write_exact_at(&mut self, buf: &[u8], offset: ByteOffset) -> io::Result<()>; + fn size_in_blocks(&mut self) -> io::Result; + fn block_size(&self) -> BlockSize; + fn sync(&mut self) -> io::Result<()>; +} + +pub(crate) struct FileBackend { + file: File, + block_size: BlockSize, +} + +impl FileBackend { + pub fn new(file: File) -> Self { + Self { + file, + block_size: BlockSize::try_from(512).expect("512 is valid BlockSize"), + } + } +} + +impl BlockDeviceBackend for FileBackend { + fn read_exact_at(&mut self, buf: &mut [u8], offset: ByteOffset) -> io::Result<()> { + self.file.read_exact_at(buf, u64::from(offset)) + } + + fn write_exact_at(&mut self, buf: &[u8], offset: ByteOffset) -> io::Result<()> { + self.file.write_all_at(buf, u64::from(offset)) + } + + fn size_in_blocks(&mut self) -> io::Result { + let len = ByteOffset::from(self.file.metadata()?.len()); + assert!(u64::from(len) % NonZeroU64::from(self.block_size.0) == 0); + Ok(len / self.block_size) + } + + fn block_size(&self) -> BlockSize { + self.block_size + } + + fn sync(&mut self) -> io::Result<()> { + self.file.sync_data() + } +} + +pub(crate) struct BlockDevice { + backend: T, + write_protected: bool, + rotation_rate: MediumRotationRate, +} + +impl BlockDevice { + pub(crate) const fn new(backend: T) -> Self { + Self { + backend, + write_protected: false, + rotation_rate: MediumRotationRate::Unreported, + } + } + + fn read_blocks(&mut self, lba: BlockOffset, blocks: BlockOffset) -> io::Result> { + // TODO: Ideally, this would be a read_vectored directly into guest + // address space. Instead, we have an allocation and several copies. + + let mut ret = vec![ + 0; + usize::try_from(u64::from(blocks * self.backend.block_size())) + .expect("block length in bytes should fit usize") + ]; + + self.backend + .read_exact_at(&mut ret[..], lba * self.backend.block_size())?; + + Ok(ret) + } + + fn write_blocks( + &mut self, + lba: BlockOffset, + blocks: BlockOffset, + reader: &mut dyn Read, + ) -> io::Result<()> { + // TODO: Avoid the copies here. + let mut buf = vec![ + 0; + usize::try_from(u64::from(blocks * self.backend.block_size())) + .expect("block length in bytes should fit usize") + ]; + reader.read_exact(&mut buf)?; + self.backend + .write_exact_at(&buf, lba * self.backend.block_size())?; + + Ok(()) + } + + fn write_same_block( + &mut self, + lba_start: BlockOffset, + block_count: BlockOffset, + buf: &[u8], + ) -> io::Result<()> { + let block_size = self.backend.block_size(); + for lba in u64::from(lba_start)..u64::from(lba_start + block_count) { + let lba = BlockOffset(lba); + self.backend.write_exact_at(buf, lba * block_size)?; + } + Ok(()) + } + + pub fn set_write_protected(&mut self, wp: bool) { + self.write_protected = wp; + } + + pub fn set_solid_state(&mut self, rotation_rate: MediumRotationRate) { + self.rotation_rate = rotation_rate; + } +} + +impl LogicalUnit for BlockDevice { + fn execute_command( + &mut self, + data_in: &mut SilentlyTruncate<&mut dyn Write>, + data_out: &mut dyn Read, + req: LunRequest, + command: LunSpecificCommand, + ) -> Result { + if req.crn != 0 { + // CRN is a weird bit of the protocol we wouldn't ever expect to be used over + // virtio-scsi; but it's allowed to set it non-zero + warn!("Received non-zero CRN: {}", req.crn); + } + + if req.task_attr != TaskAttr::Simple { + // virtio-scsi spec allows us to treat all task attrs as SIMPLE. + warn!("Ignoring non-simple task attr of {:?}", req.task_attr); + } + + if req.prio != 0 { + // My reading of SAM-6 is that priority is purely advisory, so it's fine to + // ignore it. + warn!("Ignoring non-zero priority of {}.", req.prio); + } + + if req.naca { + // We don't support NACA, and say as much in our INQUIRY data, so if + // we get it that's an error. + warn!("Driver set NACA bit, which is unsupported."); + return Ok(CmdOutput::check_condition(sense::INVALID_FIELD_IN_CDB)); + } + + debug!("Incoming command: {:?}", command); + + match command { + LunSpecificCommand::TestUnitReady => Ok(CmdOutput::ok()), + LunSpecificCommand::ReadCapacity10 => { + match self.backend.size_in_blocks() { + Ok(size) => { + // READ CAPACITY (10) returns a 32-bit LBA, which may not be enough. If it + // isn't, we're supposed to return 0xffff_ffff and hope the driver gets the + // memo and uses the newer READ CAPACITY (16). + + // n.b. this is the last block, ie (length-1), not length + let final_block: u32 = u64::from(size - BlockOffset(1)) + .try_into() + .unwrap_or(0xffff_ffff); + let block_size: u32 = u32::from(self.backend.block_size()); + + data_in + .write_all(&u32::to_be_bytes(final_block)) + .map_err(CmdError::DataIn)?; + data_in + .write_all(&u32::to_be_bytes(block_size)) + .map_err(CmdError::DataIn)?; + + Ok(CmdOutput::ok()) + } + Err(e) => { + error!("Error getting image size: {}", e); + // TODO: Is this a reasonable sense code to send? + Ok(CmdOutput::check_condition(sense::UNRECOVERED_READ_ERROR)) + } + } + } + LunSpecificCommand::ReadCapacity16 => { + match self.backend.size_in_blocks() { + Ok(size) => { + // n.b. this is the last block, ie (length-1), not length + let final_block = u64::from(size - BlockOffset(1)); + let block_size = u32::from(self.backend.block_size()); + + data_in + .write_all(&u64::to_be_bytes(final_block)) + .map_err(CmdError::DataIn)?; + data_in + .write_all(&u32::to_be_bytes(block_size)) + .map_err(CmdError::DataIn)?; + + // no protection stuff; 1-to-1 logical/physical blocks + data_in.write_all(&[0, 0]).map_err(CmdError::DataIn)?; + + // top 2 bits: thin provisioning stuff; other 14 bits are lowest + // aligned LBA, which is zero + data_in + .write_all(&[0b1100_0000, 0]) + .map_err(CmdError::DataIn)?; + + // reserved + data_in.write_all(&[0; 16]).map_err(CmdError::DataIn)?; + + Ok(CmdOutput::ok()) + } + Err(e) => { + error!("Error getting image size: {}", e); + // TODO: Is this a reasonable sense code to send? + Ok(CmdOutput::check_condition(sense::UNRECOVERED_READ_ERROR)) + } + } + } + LunSpecificCommand::ModeSense6 { mode_page, pc, dbd } => { + // we use this for the pages array if we only need a single element; lifetime + // rules mean it has to be declared here + let single_page_array: [ModePage; 1]; + + let pages = match mode_page { + ModePageSelection::Single(x) => { + single_page_array = [x]; + &single_page_array + } + ModePageSelection::AllPageZeros => ModePage::ALL_ZERO, + }; + + let pages_len: u32 = pages.iter().map(|x| u32::from(x.page_length() + 2)).sum(); + // SPC-6r05, 7.5.6: "Logical units that support more than 256 bytes of block + // descriptors and mode pages should implement ten-byte mode commands. The MODE + // DATA LENGTH field in the six-byte CDB header limits the transferred data to + // 256 bytes." + // Unclear what exactly we're supposed to do if we have more than 256 bytes of + // mode pages and get sent a MODE SENSE (6). In any case, we don't at the + // moment; if we ever get that much, this unwrap() will start + // crashing us and we can figure out what to do. + let pages_len = u8::try_from(pages_len).unwrap(); + + // mode parameter header + data_in + .write_all(&[ + pages_len + 3, // size in bytes after this one + 0, // medium type - 0 for SBC + if self.write_protected { + 0b1001_0000 // WP, support DPOFUA + } else { + 0b0001_0000 // support DPOFUA + }, + 0, // block desc length + ]) + .map_err(CmdError::DataIn)?; + + if !dbd { + // TODO: Block descriptors are optional, so we currently + // don't provide them. Does any driver + // actually use them? + } + + for page in pages { + match pc { + ModeSensePageControl::Current | ModeSensePageControl::Default => { + page.write(data_in).map_err(CmdError::DataIn)?; + } + ModeSensePageControl::Changeable => { + // SPC-6 6.14.3: "If the logical unit does not + // implement changeable parameters mode pages and + // the device server receives a MODE SENSE command + // with 01b in the PC field, then the device server + // shall terminate the command with CHECK CONDITION + // status, with the sense key set to ILLEGAL + // REQUEST, and the additional sense code set to + // INVALID FIELD IN CDB." + return Ok(CmdOutput::check_condition(sense::INVALID_FIELD_IN_CDB)); + } + ModeSensePageControl::Saved => { + return Ok(CmdOutput::check_condition( + sense::SAVING_PARAMETERS_NOT_SUPPORTED, + )) + } + } + } + + Ok(CmdOutput::ok()) + } + LunSpecificCommand::Read10 { + dpo, + fua, + lba, + transfer_length, + } => { + if dpo { + // DPO is just a hint that the guest probably won't access + // this any time soon, so we can ignore it + debug!("Silently ignoring DPO flag"); + } + + if fua { + // Somewhat weirdly, SCSI supports FUA on reads. Here's the + // key bit: "A force unit access (FUA) bit set to one + // specifies that the device server shall read the logical + // blocks from… the medium. If the FUA bit is set to one + // and a volatile cache contains a more recent version of a + // logical block than… the medium, then, before reading the + // logical block, the device server shall write the logical + // block to… the medium." + + // I guess the idea is that you can read something back, and + // be absolutely sure what you just read will persist. + + // So for our purposes, we need to make sure whatever we + // return has been saved to disk. fsync()ing the whole image + // is a bit blunt, but does the trick. + + if let Err(e) = self.backend.sync() { + error!("Error syncing file: {}", e); + return Ok(CmdOutput::check_condition(sense::TARGET_FAILURE)); + } + } + + // Ignore group number: AFAICT, it's for separating reads from different + // workloads in performance metrics, and we don't report anything like that + + let size = match self.backend.size_in_blocks() { + Ok(size) => size, + Err(e) => { + error!("Error getting image size for read: {}", e); + return Ok(CmdOutput::check_condition(sense::UNRECOVERED_READ_ERROR)); + } + }; + + let lba = BlockOffset(lba.into()); + let transfer_length = BlockOffset(transfer_length.into()); + + if lba + transfer_length > size { + return Ok(CmdOutput::check_condition( + sense::LOGICAL_BLOCK_ADDRESS_OUT_OF_RANGE, + )); + } + + let read_result = self.read_blocks(lba, transfer_length); + + match read_result { + Ok(bytes) => { + data_in.write_all(&bytes[..]).map_err(CmdError::DataIn)?; + Ok(CmdOutput::ok()) + } + Err(e) => { + error!("Error reading image: {}", e); + Ok(CmdOutput::check_condition(sense::UNRECOVERED_READ_ERROR)) + } + } + } + LunSpecificCommand::Write10 { + dpo, + fua, + lba, + transfer_length, + } => { + if dpo { + // DPO is just a hint that the guest probably won't access + // this any time soon, so we can ignore it + debug!("Silently ignoring DPO flag"); + } + + let size = match self.backend.size_in_blocks() { + Ok(size) => size, + Err(e) => { + error!("Error getting image size for read: {}", e); + return Ok(CmdOutput::check_condition(sense::TARGET_FAILURE)); + } + }; + + let lba = BlockOffset(lba.into()); + let transfer_length = BlockOffset(transfer_length.into()); + + if lba + transfer_length > size { + return Ok(CmdOutput::check_condition( + sense::LOGICAL_BLOCK_ADDRESS_OUT_OF_RANGE, + )); + } + + let write_result = self.write_blocks(lba, transfer_length, data_out); + + if fua { + if let Err(e) = self.backend.sync() { + error!("Error syncing file: {}", e); + return Ok(CmdOutput::check_condition(sense::TARGET_FAILURE)); + } + } + + match write_result { + Ok(()) => Ok(CmdOutput::ok()), + Err(e) => { + error!("Error writing to block device: {}", e); + Ok(CmdOutput::check_condition(sense::TARGET_FAILURE)) + } + } + } + LunSpecificCommand::WriteSame16 { + lba, + number_of_logical_blocks, + anchor, + } => { + // We do not support block provisioning + if anchor { + return Ok(CmdOutput::check_condition(sense::INVALID_FIELD_IN_CDB)); + } + + // This command can be used to unmap/discard a region of blocks... + // TODO: Do something smarter and punch holes into the backend, + // for now we will just write A LOT of zeros in a very inefficient way. + + let size = match self.backend.size_in_blocks() { + Ok(size) => size, + Err(e) => { + error!("Error getting image size for read: {}", e); + return Ok(CmdOutput::check_condition(sense::UNRECOVERED_READ_ERROR)); + } + }; + + let lba = BlockOffset(lba); + let number_of_logical_blocks = BlockOffset(number_of_logical_blocks.into()); + + if lba + number_of_logical_blocks > size { + return Ok(CmdOutput::check_condition( + sense::LOGICAL_BLOCK_ADDRESS_OUT_OF_RANGE, + )); + } + + let mut buf = vec![ + 0; + usize::try_from(u32::from(self.backend.block_size())) + .expect("block_size should fit usize") + ]; + let read_result = data_out.read_exact(&mut buf); + if let Err(e) = read_result { + error!("Error reading from data_out: {}", e); + return Ok(CmdOutput::check_condition(sense::TARGET_FAILURE)); + } + + let write_result = self.write_same_block(lba, number_of_logical_blocks, &buf); + + match write_result { + Ok(()) => Ok(CmdOutput::ok()), + Err(e) => { + error!("Error writing to block device: {}", e); + Ok(CmdOutput::check_condition(sense::TARGET_FAILURE)) + } + } + } + LunSpecificCommand::Inquiry(page_code) => { + // top 3 bits 0: peripheral device code = exists and ready + // bottom 5 bits 0: device type = block device + data_in.write_all(&[0]).map_err(CmdError::DataIn)?; + + if let Some(code) = page_code { + let mut out = vec![]; + match code { + VpdPage::SupportedVpdPages => { + out.push(VpdPage::SupportedVpdPages.into()); + out.push(VpdPage::BlockDeviceCharacteristics.into()); + out.push(VpdPage::LogicalBlockProvisioning.into()); + } + VpdPage::BlockDeviceCharacteristics => { + let rotation_rate: u16 = match self.rotation_rate { + MediumRotationRate::Unreported => 0, + MediumRotationRate::NonRotating => 1, + }; + out.extend_from_slice(&rotation_rate.to_be_bytes()); + // nothing worth setting in the rest + out.extend_from_slice(&[0; 58]); + } + VpdPage::LogicalBlockProvisioning => { + out.push(0); // don't support threshold sets + out.push(0b1110_0100); // support unmapping w/ UNMAP + // and WRITE SAME (10 & 16), + // don't support anchored + // LBAs or group descriptors + out.push(0b0000_0010); // thin provisioned + out.push(0); // no threshold % support + } + _ => return Ok(CmdOutput::check_condition(sense::INVALID_FIELD_IN_CDB)), + } + + data_in + .write_all(&[code.into()]) + .map_err(CmdError::DataIn)?; + data_in + .write_all( + &u16::try_from(out.len()) + .expect("VPD page < 2^16 bits") + .to_be_bytes(), + ) + .map_err(CmdError::DataIn)?; + data_in.write_all(&out).map_err(CmdError::DataIn)?; + } else { + respond_standard_inquiry_data(data_in).map_err(CmdError::DataIn)?; + } + + Ok(CmdOutput::ok()) + } + LunSpecificCommand::ReportSupportedOperationCodes { rctd, mode } => { + // helpers for output data format + fn one_command_supported( + data_in: &mut impl Write, + ty: CommandType, + ) -> io::Result<()> { + data_in.write_all(&[0])?; // unused flags + data_in.write_all(&[0b0000_0011])?; // supported, don't set a bunch of flags + let tpl = ty.cdb_template(); + data_in.write_all( + &u16::try_from(tpl.len()) + .expect("length of TPL to be same as CDB") + .to_be_bytes(), + )?; + data_in.write_all(tpl)?; + Ok(()) + } + + fn one_command_not_supported(data_in: &mut impl Write) -> io::Result<()> { + data_in.write_all(&[0])?; // unused flags + data_in.write_all(&[0b0000_0001])?; // not supported + data_in.write_all(&[0; 2])?; // cdb len + Ok(()) + } + + fn timeout_descriptor(data_in: &mut impl Write) -> io::Result<()> { + // timeout descriptor + data_in.write_all(&0xa_u16.to_be_bytes())?; // len + data_in.write_all(&[0, 0])?; // reserved, cmd specific + data_in.write_all(&0_u32.to_be_bytes())?; + data_in.write_all(&0_u32.to_be_bytes())?; + Ok(()) + } + + match mode { + ReportSupportedOpCodesMode::All => { + let cmd_len = if rctd { 20 } else { 8 }; + let len = u32::try_from(OPCODES.len() * cmd_len) + .expect("less than (2^32 / 20) ~= 2^27 opcodes"); + data_in + .write_all(&len.to_be_bytes()) + .map_err(CmdError::DataIn)?; + + for &(ty, (opcode, sa)) in OPCODES { + data_in.write_all(&[opcode]).map_err(CmdError::DataIn)?; + data_in.write_all(&[0]).map_err(CmdError::DataIn)?; // reserved + data_in + .write_all(&sa.unwrap_or(0).to_be_bytes()) + .map_err(CmdError::DataIn)?; + data_in.write_all(&[0]).map_err(CmdError::DataIn)?; // reserved + + let ctdp: u8 = if rctd { 0b10 } else { 0b00 }; + let servactv = u8::from(sa.is_some()); + data_in + .write_all(&[ctdp | servactv]) + .map_err(CmdError::DataIn)?; + + data_in + .write_all( + &u16::try_from(ty.cdb_template().len()) + .expect("length of TPL to be same as CDB") + .to_be_bytes(), + ) + .map_err(CmdError::DataIn)?; + + if rctd { + timeout_descriptor(data_in).map_err(CmdError::DataIn)?; + } + } + } + ReportSupportedOpCodesMode::OneCommand(opcode) => match parse_opcode(opcode) { + ParseOpcodeResult::Command(ty) => { + one_command_supported(data_in, ty).map_err(CmdError::DataIn)?; + + if rctd { + timeout_descriptor(data_in).map_err(CmdError::DataIn)?; + } + } + ParseOpcodeResult::ServiceAction(_) => { + return Ok(CmdOutput::check_condition(sense::INVALID_FIELD_IN_CDB)); + } + ParseOpcodeResult::Invalid => { + warn!("Reporting that we don't support command {:#2x}. It might be worth adding.", opcode); + one_command_not_supported(data_in).map_err(CmdError::DataIn)?; + } + }, + ReportSupportedOpCodesMode::OneServiceAction(opcode, sa) => { + match parse_opcode(opcode) { + ParseOpcodeResult::Command(_) => { + return Ok(CmdOutput::check_condition(sense::INVALID_FIELD_IN_CDB)) + } + ParseOpcodeResult::ServiceAction(unparsed_sa) => { + if let Some(ty) = unparsed_sa.parse(sa) { + one_command_supported(data_in, ty).map_err(CmdError::DataIn)?; + + if rctd { + timeout_descriptor(data_in).map_err(CmdError::DataIn)?; + } + } else { + warn!("Reporting that we don't support command {:#2x}/{:#2x}. It might be worth adding.", opcode, sa); + one_command_not_supported(data_in).map_err(CmdError::DataIn)?; + } + } + ParseOpcodeResult::Invalid => { + // the spec isn't super clear what we're supposed to do here, but I + // think an invalid opcode is one for which our implementation + // "does not implement service actions", so we say invalid field in + // CDB + warn!("Reporting that we don't support command {:#2x}/{:#2x}. It might be worth adding.", opcode, sa); + return Ok(CmdOutput::check_condition(sense::INVALID_FIELD_IN_CDB)); + } + } + } + ReportSupportedOpCodesMode::OneCommandOrServiceAction(opcode, sa) => { + match parse_opcode(opcode) { + ParseOpcodeResult::Command(ty) => { + if sa == 0 { + one_command_supported(data_in, ty).map_err(CmdError::DataIn)?; + + if rctd { + timeout_descriptor(data_in).map_err(CmdError::DataIn)?; + } + } else { + one_command_not_supported(data_in).map_err(CmdError::DataIn)?; + } + } + ParseOpcodeResult::ServiceAction(unparsed_sa) => { + if let Some(ty) = unparsed_sa.parse(sa) { + one_command_supported(data_in, ty).map_err(CmdError::DataIn)?; + + if rctd { + timeout_descriptor(data_in).map_err(CmdError::DataIn)?; + } + } else { + warn!("Reporting that we don't support command {:#2x}/{:#2x}. It might be worth adding.", opcode, sa); + one_command_not_supported(data_in).map_err(CmdError::DataIn)?; + } + } + ParseOpcodeResult::Invalid => { + warn!("Reporting that we don't support command {:#2x}[/{:#2x}]. It might be worth adding.", opcode, sa); + one_command_not_supported(data_in).map_err(CmdError::DataIn)?; + } + } + } + } + Ok(CmdOutput::ok()) + } + LunSpecificCommand::RequestSense(format) => { + match format { + SenseFormat::Fixed => { + data_in + .write_all(&sense::NO_ADDITIONAL_SENSE_INFORMATION.to_fixed_sense()) + .map_err(CmdError::DataIn)?; + Ok(CmdOutput::ok()) + } + SenseFormat::Descriptor => { + // Don't support desciptor format. + Ok(CmdOutput::check_condition(sense::INVALID_FIELD_IN_CDB)) + } + } + } + LunSpecificCommand::SynchronizeCache10 => { + // While SCSI allows just syncing a range, we just sync the entire file + match self.backend.sync() { + Ok(()) => Ok(CmdOutput::ok()), + Err(e) => { + error!("Error syncing block device: {}", e); + Ok(CmdOutput::check_condition(sense::TARGET_FAILURE)) + } + } + } + } + } +} diff --git a/crates/scsi/src/scsi/emulation/command.rs b/crates/scsi/src/scsi/emulation/command.rs new file mode 100644 index 00000000..43cb0b40 --- /dev/null +++ b/crates/scsi/src/scsi/emulation/command.rs @@ -0,0 +1,681 @@ +// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause + +//! Data structures and parsing code for SCSI commands. A rough overview: +//! We need to deal with opcodes in two places: in parsing commands themselves, +//! and in implementing REPORT SUPPORTED OPERATION CODES. Therefore, we parse +//! commands in two steps. First, we parse the opcode (and sometimes service +//! action) into a `CommandType` (a C-style enum containing just the commands, +//! not their parameters), then using that, we parse the rest of the CDB and +//! obtain a `Cdb`, which consists of a `Command`, an enum representing a +//! command and its parameters, along with some fields shared across many or all +//! commands. + +use std::convert::{TryFrom, TryInto}; + +use log::warn; +use num_enum::TryFromPrimitive; + +use crate::scsi::emulation::mode_page::ModePage; + +/// One of the modes supported by SCSI's REPORT LUNS command. +#[derive(PartialEq, Eq, TryFromPrimitive, Debug, Copy, Clone)] +#[repr(u8)] +pub(crate) enum ReportLunsSelectReport { + NoWellKnown = 0x0, + WellKnownOnly = 0x1, + All = 0x2, + Administrative = 0x10, + TopLevel = 0x11, + SameConglomerate = 0x12, +} + +/// A type of "vital product data" page returned by SCSI's INQUIRY command. +#[derive(PartialEq, Eq, Debug, Copy, Clone)] +pub(crate) enum VpdPage { + Ascii(u8), + Ata, // * + BlockDeviceCharacteristics, // * + BlockDeviceCharacteristicsExt, + BlockLimits, // * + BlockLimitsExt, + CfaProfile, + DeviceConstituents, + DeviceIdentification, // * + ExtendedInquiry, + FormatPresets, + LogicalBlockProvisioning, // * + ManagementNetworkAddresses, + ModePagePolicy, + PowerCondition, + PowerConsumption, + PortocolSpecificLogicalUnit, + ProtocolSpecificPort, + Referrals, + ScsiFeatureSets, + ScsiPorts, + SoftwareInterfaceIdentification, + SupportedVpdPages, // * + ThirdPartyCopy, + UnitSerialNumber, // * + ZonedBlockDeviceCharacteristics, // * +} +// starred ones are ones Linux will use if available + +#[derive(PartialEq, Eq, TryFromPrimitive, Debug, Copy, Clone)] +#[repr(u8)] +pub(crate) enum ModeSensePageControl { + Current = 0b00, + Changeable = 0b01, + Default = 0b10, + Saved = 0b11, +} + +impl TryFrom for VpdPage { + type Error = (); + + fn try_from(val: u8) -> Result { + match val { + 0x00 => Ok(Self::SupportedVpdPages), + 0x1..=0x7f => Ok(Self::Ascii(val)), + 0x80 => Ok(Self::UnitSerialNumber), + 0x83 => Ok(Self::DeviceIdentification), + 0x84 => Ok(Self::SoftwareInterfaceIdentification), + 0x85 => Ok(Self::ManagementNetworkAddresses), + 0x86 => Ok(Self::ExtendedInquiry), + 0x87 => Ok(Self::ModePagePolicy), + 0x88 => Ok(Self::ScsiPorts), + 0x89 => Ok(Self::Ata), + 0x8a => Ok(Self::PowerCondition), + 0x8b => Ok(Self::DeviceConstituents), + 0x8c => Ok(Self::CfaProfile), + 0x8d => Ok(Self::PowerConsumption), + 0x8f => Ok(Self::ThirdPartyCopy), + 0x90 => Ok(Self::PortocolSpecificLogicalUnit), + 0x91 => Ok(Self::ProtocolSpecificPort), + 0x92 => Ok(Self::ScsiFeatureSets), + 0xb0 => Ok(Self::BlockLimits), + 0xb1 => Ok(Self::BlockDeviceCharacteristics), + 0xb2 => Ok(Self::LogicalBlockProvisioning), + 0xb3 => Ok(Self::Referrals), + 0xb5 => Ok(Self::BlockDeviceCharacteristicsExt), + 0xb6 => Ok(Self::ZonedBlockDeviceCharacteristics), + 0xb7 => Ok(Self::BlockLimitsExt), + 0xb8 => Ok(Self::FormatPresets), + _ => Err(()), + } + } +} + +impl From for u8 { + fn from(pc: VpdPage) -> Self { + match pc { + VpdPage::Ascii(val) => val, + VpdPage::Ata => 0x89, + VpdPage::BlockDeviceCharacteristics => 0xb1, + VpdPage::BlockDeviceCharacteristicsExt => 0xb5, + VpdPage::BlockLimits => 0xb0, + VpdPage::BlockLimitsExt => 0xb7, + VpdPage::CfaProfile => 0x8c, + VpdPage::DeviceConstituents => 0x8b, + VpdPage::DeviceIdentification => 0x83, + VpdPage::ExtendedInquiry => 0x86, + VpdPage::FormatPresets => 0xb8, + VpdPage::LogicalBlockProvisioning => 0xb2, + VpdPage::ManagementNetworkAddresses => 0x85, + VpdPage::ModePagePolicy => 0x87, + VpdPage::PowerCondition => 0x8a, + VpdPage::PowerConsumption => 0x8d, + VpdPage::PortocolSpecificLogicalUnit => 0x90, + VpdPage::ProtocolSpecificPort => 0x91, + VpdPage::Referrals => 0xb3, + VpdPage::ScsiFeatureSets => 0x92, + VpdPage::ScsiPorts => 0x88, + VpdPage::SoftwareInterfaceIdentification => 0x84, + VpdPage::SupportedVpdPages => 0x00, + VpdPage::ThirdPartyCopy => 0x8f, + VpdPage::UnitSerialNumber => 0x80, + VpdPage::ZonedBlockDeviceCharacteristics => 0xb6, + } + } +} + +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub(crate) enum SenseFormat { + Fixed, + Descriptor, +} + +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub(crate) enum ModePageSelection { + AllPageZeros, + Single(ModePage), +} + +#[derive(Debug)] +pub(crate) enum LunIndependentCommand { + ReportLuns(ReportLunsSelectReport), +} + +#[derive(Debug)] +pub(crate) enum LunSpecificCommand { + Inquiry(Option), + ModeSense6 { + pc: ModeSensePageControl, + mode_page: ModePageSelection, + /// Disable block descriptors + dbd: bool, + }, + Read10 { + /// Disable page out (i.e. hint that this page won't be accessed again + /// soon, so we shouldn't bother caching it) + dpo: bool, + /// Force unit access (i.e. bypass cache) + fua: bool, + lba: u32, + transfer_length: u16, + }, + Write10 { + /// Disable page out (i.e. hint that this page won't be accessed again + /// soon, so we shouldn't bother caching it) + dpo: bool, + /// Force unit access (i.e. bypass cache) + fua: bool, + lba: u32, + transfer_length: u16, + }, + WriteSame16 { + lba: u64, + number_of_logical_blocks: u32, + anchor: bool, + }, + ReadCapacity10, + ReadCapacity16, + ReportSupportedOperationCodes { + /// SCSI RCTD bit: whether we should include timeout descriptors. + rctd: bool, + mode: ReportSupportedOpCodesMode, + }, + RequestSense(SenseFormat), + TestUnitReady, + SynchronizeCache10, +} + +#[derive(Debug)] +pub(crate) enum Command { + LunIndependentCommand(LunIndependentCommand), + LunSpecificCommand(LunSpecificCommand), +} + +#[derive(Clone, Copy, Debug)] +pub(crate) enum CommandType { + Inquiry, + ModeSense6, + Read10, + ReadCapacity10, + ReadCapacity16, + ReportLuns, + ReportSupportedOperationCodes, + RequestSense, + TestUnitReady, + Write10, + WriteSame16, + SynchronizeCache10, +} + +pub(crate) const OPCODES: &[(CommandType, (u8, Option))] = &[ + (CommandType::TestUnitReady, (0x0, None)), + (CommandType::RequestSense, (0x3, None)), + (CommandType::Inquiry, (0x12, None)), + (CommandType::ModeSense6, (0x1a, None)), + (CommandType::ReadCapacity10, (0x25, None)), + (CommandType::Read10, (0x28, None)), + (CommandType::Write10, (0x2a, None)), + (CommandType::SynchronizeCache10, (0x35, None)), + (CommandType::WriteSame16, (0x93, None)), + (CommandType::ReadCapacity16, (0x9e, Some(0x10))), + (CommandType::ReportLuns, (0xa0, None)), + ( + CommandType::ReportSupportedOperationCodes, + (0xa3, Some(0xc)), + ), +]; + +#[derive(Debug, Clone, Copy)] +pub(crate) struct UnparsedServiceAction(u8); +impl UnparsedServiceAction { + pub fn parse(self, service_action: u16) -> Option { + OPCODES + .iter() + .find(|(_, opcode)| *opcode == (self.0, Some(service_action))) + .map(|&(ty, _)| ty) + } +} + +/// See `parse_opcode` +#[derive(Debug, Clone, Copy)] +pub(crate) enum ParseOpcodeResult { + /// The opcode represents a single command. + Command(CommandType), + /// The opcode requires a service action. + ServiceAction(UnparsedServiceAction), + /// The opcode is invalid. + Invalid, +} + +/// Determine the command that corresponds to a SCSI opcode. +/// +/// This is a little weird. Most SCSI commands are just identified by the +/// opcode - the first byte of the CDB - but some opcodes require a second +/// byte, called the service action. Generally, each distinct service action +/// value is treated as a first-class command. But there's some weirdness +/// around parsing, especially with invalid commands: sometimes, we're +/// expected to behave differently for a valid opcode with an invalid +/// service action vs an invalid opcode. +/// +/// To allow for this, we have a two-step parsing API. First, a caller +/// calls `parse_opcode` with the first byte of the CDB. This could return +/// three things: +/// - `Command`: the opcode corresponded to a single-byte command; we're done. +/// - `Invalid`: the opcode isn't recognized at all; we're done. +/// - `ServiceAction`: the opcode is the first byte of a service action; the +/// caller needs to call .parse() on the `UnparsedServiceAction` we returned +/// with the service action byte. +pub(crate) fn parse_opcode(opcode: u8) -> ParseOpcodeResult { + let found = OPCODES.iter().find(|(_, (x, _))| *x == opcode); + match found { + Some(&(ty, (_, None))) => ParseOpcodeResult::Command(ty), + Some((_, (_, Some(_)))) => { + // we found some service action that uses this opcode; so this is a + // service action opcode, and we need the service action + ParseOpcodeResult::ServiceAction(UnparsedServiceAction(opcode)) + } + None => ParseOpcodeResult::Invalid, + } +} + +impl CommandType { + fn from_cdb(cdb: &[u8]) -> Result { + // TODO: Variable-length CDBs put the service action in a different + // place. This'll need to change if we ever support those. IIRC, Linux + // doesn't ever use them, so it may never be relevant. + match parse_opcode(cdb[0]) { + ParseOpcodeResult::Command(ty) => Ok(ty), + ParseOpcodeResult::ServiceAction(sa) => sa + .parse(u16::from(cdb[1] & 0b0001_1111)) + .ok_or(ParseError::InvalidField), + ParseOpcodeResult::Invalid => Err(ParseError::InvalidCommand), + } + } + + /// Return the SCSI "CDB usage data" (see SPC-6 6.34.3) for this command + /// type. + /// + /// Basically, this consists of a structure the size of the CDB for the + /// command, starting with the opcode and service action (if any), then + /// proceeding to a bitmap of fields we recognize. + pub const fn cdb_template(self) -> &'static [u8] { + match self { + Self::TestUnitReady => &[ + 0x0, + 0b0000_0000, + 0b0000_0000, + 0b0000_0000, + 0b0000_0000, + 0b0000_0100, + ], + Self::RequestSense => &[ + 0x3, + 0b0000_0001, + 0b0000_0000, + 0b0000_0000, + 0b1111_1111, + 0b0000_0100, + ], + Self::ReportLuns => &[ + 0xa0, + 0b0000_0000, + 0b1111_1111, + 0b0000_0000, + 0b0000_0000, + 0b0000_0000, + 0b1111_1111, + 0b1111_1111, + 0b1111_1111, + 0b1111_1111, + 0b0000_0000, + 0b0000_0100, + ], + Self::ReadCapacity10 => &[ + 0x25, + 0b0000_0000, + 0b0000_0000, + 0b0000_0000, + 0b0000_0000, + 0b0000_0000, + 0b0000_0000, + 0b0000_0000, + 0b0000_0000, + 0b0000_0100, + ], + Self::ReadCapacity16 => &[ + 0x9e, + 0x10, + 0b0000_0000, + 0b0000_0000, + 0b0000_0000, + 0b0000_0000, + 0b0000_0000, + 0b0000_0000, + 0b0000_0000, + 0b0000_0000, + 0b1111_1111, + 0b1111_1111, + 0b1111_1111, + 0b1111_1111, + 0b0000_0000, + 0b0000_0100, + ], + Self::ModeSense6 => &[ + 0x1a, + 0b0000_1000, + 0b1111_1111, + 0b1111_1111, + 0b1111_1111, + 0b0000_0100, + ], + Self::Read10 => &[ + 0x28, + 0b1111_1100, + 0b1111_1111, + 0b1111_1111, + 0b1111_1111, + 0b1111_1111, + 0b0011_1111, + 0b1111_1111, + 0b1111_1111, + 0b0000_0100, + ], + Self::Write10 => &[ + 0x2A, + 0b1111_1100, + 0b1111_1111, + 0b1111_1111, + 0b1111_1111, + 0b1111_1111, + 0b0011_1111, + 0b1111_1111, + 0b1111_1111, + 0b0000_0100, + ], + Self::WriteSame16 => &[ + 0x93, + 0b1111_1001, + 0b1111_1111, + 0b1111_1111, + 0b1111_1111, + 0b1111_1111, + 0b1111_1111, + 0b1111_1111, + 0b1111_1111, + 0b1111_1111, + 0b1111_1111, + 0b1111_1111, + 0b1111_1111, + 0b1111_1111, + 0b0011_1111, + 0b0000_0100, + ], + Self::Inquiry => &[ + 0x12, + 0b0000_0001, + 0b1111_1111, + 0b1111_1111, + 0b1111_1111, + 0b0000_0100, + ], + Self::ReportSupportedOperationCodes => &[ + 0xa3, + 0xc, + 0b1000_0111, + 0b1111_1111, + 0b1111_1111, + 0b1111_1111, + 0b1111_1111, + 0b1111_1111, + 0b1111_1111, + 0b1111_1111, + 0b0000_0000, + 0b0000_0100, + ], + Self::SynchronizeCache10 => &[ + 0x53, + 0b0000_0010, + 0b1111_1111, + 0b1111_1111, + 0b1111_1111, + 0b1111_1111, + 0b0011_1111, + 0b1111_1111, + 0b1111_1111, + 0b0000_0100, + ], + } + } +} + +#[derive(Debug)] +pub(crate) struct Cdb { + pub command: Command, + pub allocation_length: Option, + pub naca: bool, +} + +#[derive(Debug, PartialEq, Eq, Copy, Clone)] +pub(crate) enum ParseError { + /// The opcode (specifically the first byte of the CDB) is unknown, i.e. we + /// should respond with INVALID COMMAND OPERATION CODE + InvalidCommand, + /// Another field of the CDB (including the service action, if any) is + /// invalid, i.e. we should respond with INVALID FIELD IN CDB. + InvalidField, + /// The CDB has fewer bytes than necessary for its opcode. + TooSmall, +} + +#[derive(Debug, PartialEq, Eq, Copy, Clone)] +pub(crate) enum ReportSupportedOpCodesMode { + All, + OneCommand(u8), + OneServiceAction(u8, u16), + OneCommandOrServiceAction(u8, u16), +} + +impl Cdb { + // TODO: do we want to ensure reserved fields are 0? SCSI allows, but + // doesn't require, us to do so. + pub(crate) fn parse(cdb: &[u8]) -> Result { + let ct = CommandType::from_cdb(cdb)?; + if cdb.len() < ct.cdb_template().len() { + return Err(ParseError::TooSmall); + } + // Shrink the cdb down to its size, so accidentally accessing fields past the + // length panics + let cdb = &cdb[..ct.cdb_template().len()]; + + // unwraps below are safe: they're just calling TryFrom to convert from slices + // to fixed-size arrays; in each case, we're using constant indexes and we + // verified above that they're in bounds, so none of them can panic at runtime + + match ct { + CommandType::Inquiry => { + // INQUIRY + let evpd = match cdb[1] { + 0 => false, + 1 => true, + // obselete or reserved bits set + _ => return Err(ParseError::InvalidField), + }; + let page_code_raw = cdb[2]; + let page_code = match (evpd, page_code_raw) { + (false, 0) => None, + (true, pc) => Some(pc.try_into().map_err(|_| ParseError::InvalidField)?), + (false, _) => return Err(ParseError::InvalidField), + }; + Ok(Self { + command: Command::LunSpecificCommand(LunSpecificCommand::Inquiry(page_code)), + allocation_length: Some(u32::from(u16::from_be_bytes( + cdb[3..5].try_into().unwrap(), + ))), + naca: (cdb[5] & 0b0000_0100) != 0, + }) + } + CommandType::ModeSense6 => { + let dbd = match cdb[1] { + 0b0000_1000 => true, + 0b0000_0000 => false, + _ => return Err(ParseError::InvalidField), + }; + let pc = (cdb[2] & 0b1100_0000) >> 6; + let page_code = cdb[2] & 0b0011_1111; + let subpage_code = cdb[3]; + let mode: ModePageSelection = match (page_code, subpage_code) { + (0x8, 0x0) => ModePageSelection::Single(ModePage::Caching), + (0x3f, 0x0) => ModePageSelection::AllPageZeros, + _ => { + warn!( + "Rejecting request for unknown mode page {:#2x}/{:#2x}.", + page_code, subpage_code + ); + return Err(ParseError::InvalidField); + } + }; + Ok(Self { + command: Command::LunSpecificCommand(LunSpecificCommand::ModeSense6 { + pc: pc.try_into().map_err(|_| ParseError::InvalidField)?, + mode_page: mode, + dbd, + }), + allocation_length: Some(u32::from(cdb[4])), + naca: (cdb[5] & 0b0000_0100) != 0, + }) + } + CommandType::Read10 => { + if cdb[1] & 0b1110_0100 != 0 { + // Features (protection and rebuild assist) we don't + // support; the standard says to respond with INVALID + // FIELD IN CDB for these if unsupported + return Err(ParseError::InvalidField); + } + Ok(Self { + command: Command::LunSpecificCommand(LunSpecificCommand::Read10 { + dpo: cdb[1] & 0b0001_0000 != 0, + fua: cdb[1] & 0b0000_1000 != 0, + lba: u32::from_be_bytes(cdb[2..6].try_into().unwrap()), + transfer_length: u16::from_be_bytes(cdb[7..9].try_into().unwrap()), + }), + allocation_length: None, + naca: (cdb[9] & 0b0000_0100) != 0, + }) + } + CommandType::Write10 => { + if cdb[1] & 0b1110_0000 != 0 { + // Feature (protection) that we don't + // support; the standard says to respond with INVALID + // FIELD IN CDB for these if unsupported + return Err(ParseError::InvalidField); + } + Ok(Self { + command: Command::LunSpecificCommand(LunSpecificCommand::Write10 { + dpo: cdb[1] & 0b0001_0000 != 0, + fua: cdb[1] & 0b0000_1000 != 0, + lba: u32::from_be_bytes(cdb[2..6].try_into().unwrap()), + transfer_length: u16::from_be_bytes(cdb[7..9].try_into().unwrap()), + }), + allocation_length: None, + naca: (cdb[9] & 0b0000_0100) != 0, + }) + } + CommandType::WriteSame16 => { + if cdb[1] & 0b1110_0001 != 0 { + warn!("Unsupported field in WriteSame16"); + // We neither support protections nor logical block provisioning + return Err(ParseError::InvalidField); + } + Ok(Self { + command: Command::LunSpecificCommand(LunSpecificCommand::WriteSame16 { + lba: u64::from_be_bytes(cdb[2..10].try_into().expect("lba should fit u64")), + number_of_logical_blocks: u32::from_be_bytes( + cdb[10..14].try_into().expect("block count should fit u32"), + ), + anchor: (cdb[1] & 0b0001_0000) != 0, + }), + allocation_length: None, + naca: (cdb[15] & 0b0000_0100) != 0, + }) + } + CommandType::SynchronizeCache10 => Ok(Self { + command: Command::LunSpecificCommand(LunSpecificCommand::SynchronizeCache10), + allocation_length: None, + naca: (cdb[9] & 0b0000_0100) != 0, + }), + CommandType::ReadCapacity10 => Ok(Self { + command: Command::LunSpecificCommand(LunSpecificCommand::ReadCapacity10), + allocation_length: None, + naca: (cdb[9] & 0b0000_0100) != 0, + }), + CommandType::ReadCapacity16 => Ok(Self { + command: Command::LunSpecificCommand(LunSpecificCommand::ReadCapacity16), + allocation_length: Some(u32::from_be_bytes(cdb[10..14].try_into().unwrap())), + naca: (cdb[15] & 0b0000_0100) != 0, + }), + CommandType::ReportLuns => Ok(Self { + command: Command::LunIndependentCommand(LunIndependentCommand::ReportLuns( + cdb[2].try_into().map_err(|_| ParseError::InvalidField)?, + )), + allocation_length: Some(u32::from_be_bytes(cdb[6..10].try_into().unwrap())), + naca: (cdb[9] & 0b0000_0100) != 0, + }), + CommandType::ReportSupportedOperationCodes => { + let rctd = cdb[2] & 0b1000_0000 != 0; + let mode = match cdb[2] & 0b0000_0111 { + 0b000 => ReportSupportedOpCodesMode::All, + 0b001 => ReportSupportedOpCodesMode::OneCommand(cdb[3]), + 0b010 => ReportSupportedOpCodesMode::OneServiceAction( + cdb[3], + u16::from_be_bytes(cdb[4..6].try_into().unwrap()), + ), + 0b011 => ReportSupportedOpCodesMode::OneCommandOrServiceAction( + cdb[3], + u16::from_be_bytes(cdb[4..6].try_into().unwrap()), + ), + _ => return Err(ParseError::InvalidField), + }; + + Ok(Self { + command: Command::LunSpecificCommand( + LunSpecificCommand::ReportSupportedOperationCodes { rctd, mode }, + ), + allocation_length: Some(u32::from_be_bytes(cdb[6..10].try_into().unwrap())), + naca: (cdb[11] & 0b0000_0100) != 0, + }) + } + CommandType::RequestSense => { + let format = if cdb[1] & 0b0000_0001 == 1 { + SenseFormat::Descriptor + } else { + SenseFormat::Fixed + }; + Ok(Self { + command: Command::LunSpecificCommand(LunSpecificCommand::RequestSense(format)), + allocation_length: Some(u32::from(cdb[4])), + naca: (cdb[5] & 0b0000_0100) != 0, + }) + } + CommandType::TestUnitReady => Ok(Self { + command: Command::LunSpecificCommand(LunSpecificCommand::TestUnitReady), + allocation_length: None, + naca: (cdb[5] & 0b0000_0100) != 0, + }), + } + } +} diff --git a/crates/scsi/src/scsi/emulation/missing_lun.rs b/crates/scsi/src/scsi/emulation/missing_lun.rs new file mode 100644 index 00000000..cb94baa8 --- /dev/null +++ b/crates/scsi/src/scsi/emulation/missing_lun.rs @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause + +use std::io::{Read, Write}; + +use super::{ + command::{LunSpecificCommand, SenseFormat}, + response_data::{respond_standard_inquiry_data, SilentlyTruncate}, + target::{LogicalUnit, LunRequest}, +}; +use crate::scsi::{sense, CmdError, CmdError::DataIn, CmdOutput}; + +pub(crate) struct MissingLun; + +impl LogicalUnit for MissingLun { + fn execute_command( + &mut self, + data_in: &mut SilentlyTruncate<&mut dyn Write>, + _data_out: &mut dyn Read, + _req: LunRequest, + cmd: LunSpecificCommand, + ) -> Result { + match cmd { + LunSpecificCommand::Inquiry(page_code) => { + // peripheral qualifier 0b011: logical unit not accessible + // device type 0x1f: unknown/no device type + data_in.write_all(&[0b0110_0000 | 0x1f]).map_err(DataIn)?; + match page_code { + Some(_) => { + // SPC-6 7.7.2: "If the PERIPHERAL QUALIFIER field is + // not set to 000b, the contents of the PAGE LENGTH + // field and the VPD parameters are outside the + // scope of this standard." + // + // Returning a 0 length and no data seems sensible enough. + data_in.write_all(&[0]).map_err(DataIn)?; + } + None => { + respond_standard_inquiry_data(data_in).map_err(DataIn)?; + } + } + Ok(CmdOutput::ok()) + } + LunSpecificCommand::RequestSense(format) => { + match format { + SenseFormat::Fixed => { + data_in + .write_all(&sense::LOGICAL_UNIT_NOT_SUPPORTED.to_fixed_sense()) + .map_err(DataIn)?; + Ok(CmdOutput::ok()) + } + SenseFormat::Descriptor => { + // Don't support desciptor format. + Ok(CmdOutput::check_condition(sense::INVALID_FIELD_IN_CDB)) + } + } + } + _ => Ok(CmdOutput::check_condition( + sense::LOGICAL_UNIT_NOT_SUPPORTED, + )), + } + } +} diff --git a/crates/scsi/src/scsi/emulation/mod.rs b/crates/scsi/src/scsi/emulation/mod.rs new file mode 100644 index 00000000..d697842e --- /dev/null +++ b/crates/scsi/src/scsi/emulation/mod.rs @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause + +pub(crate) mod block_device; +mod command; +pub(crate) mod missing_lun; +pub(crate) mod mode_page; +mod response_data; +pub(crate) mod target; + +#[cfg(test)] +mod tests; diff --git a/crates/scsi/src/scsi/emulation/mode_page.rs b/crates/scsi/src/scsi/emulation/mode_page.rs new file mode 100644 index 00000000..e0c30e76 --- /dev/null +++ b/crates/scsi/src/scsi/emulation/mode_page.rs @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause + +use std::io::{self, Write}; + +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub(crate) enum ModePage { + Caching, +} + +impl ModePage { + pub(crate) const ALL_ZERO: &'static [Self] = &[Self::Caching]; + + pub(crate) const fn page_code(self) -> (u8, u8) { + match self { + Self::Caching => (0x8, 0), + } + } + + pub(crate) const fn page_length(self) -> u8 { + match self { + Self::Caching => 0x12, + } + } + + pub(crate) fn write(self, data_in: &mut impl Write) -> io::Result<()> { + assert_eq!(self.page_code().1, 0, "Subpages aren't supported yet."); + + data_in.write_all(&[ + self.page_code().0, // top 2 bits: no subpage, saving not supported + self.page_length(), // page length + ])?; + + match self { + Self::Caching => { + data_in.write_all(&[ + // Writeback Cache Enable, lots of bits zero + // n.b. kernel logs will show WCE off; it always says + // that for read-only devices, which we are rn + 0b0000_0100, + ])?; + // various cache fine-tuning stuff we can't really control + data_in.write_all(&[0; 0x11])?; + } + } + + Ok(()) + } +} diff --git a/crates/scsi/src/scsi/emulation/response_data.rs b/crates/scsi/src/scsi/emulation/response_data.rs new file mode 100644 index 00000000..352d0698 --- /dev/null +++ b/crates/scsi/src/scsi/emulation/response_data.rs @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause + +//! Some helpers for writing response data, shared between `BlockDevice` and +//! `MissingLun` + +use std::{cmp::min, convert::TryFrom, io, io::Write}; + +/// A wrapper around a `Write` that silently truncates its input after a given +/// number of bytes. This matches the semantics of SCSI's ALLOCATION LENGTH +/// field; anything beyond the allocation length is silently omitted. +pub struct SilentlyTruncate(W, usize); + +impl SilentlyTruncate { + pub const fn new(writer: W, len: usize) -> Self { + Self(writer, len) + } +} + +impl Write for SilentlyTruncate { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + if self.1 == 0 { + // our goal is to silently fail, so once we've stopped actually + // writing, just pretend all writes work + return Ok(buf.len()); + } + let len = min(buf.len(), self.1); + let buf = &buf[..len]; + let written = self.0.write(buf)?; + self.1 -= written; + Ok(written) + } + + fn flush(&mut self) -> std::io::Result<()> { + self.0.flush() + } +} + +fn encode_lun(lun: u16) -> [u8; 8] { + let lun = u8::try_from(lun).expect("more than 255 LUNs are currently unsupported"); + [0, lun, 0, 0, 0, 0, 0, 0] +} + +/// Write the response data for a REPORT LUNS command. +pub fn respond_report_luns(data_in: &mut impl Write, luns: T) -> io::Result<()> +where + T: IntoIterator, + T::IntoIter: ExactSizeIterator, +{ + let iter = luns.into_iter(); + data_in.write_all( + &(u32::try_from(iter.len() * 8)) + .expect("less than 256 LUNS") + .to_be_bytes(), + )?; + data_in.write_all(&[0; 4])?; // reserved + for lun in iter { + data_in.write_all(&encode_lun(lun))?; + } + Ok(()) +} + +/// Write the response data for a standard (i.e. not VPD) inquiry, excluding the +/// first byte (the peripheal qualifier and device type). +pub fn respond_standard_inquiry_data(data_in: &mut impl Write) -> io::Result<()> { + // TODO: Feature bits here we might want to support: + // - NormACA + // - command queueing + data_in.write_all(&[ + // various bits: not removable, not part of a + // conglomerate, no info on hotpluggability + 0, + 0x7, // version: SPC-6 + // bits: don't support NormACA, support modern LUN format + // INQUIRY data version 2 + 0b0001_0000 | 0x2, + 91, // additional INQURIY data length + // bunch of feature bits we don't support: + 0, + 0, + 0, + ])?; + + // TODO: register this or another name with T10 + data_in.write_all(b"rust-vmm")?; + data_in.write_all(b"vhost-user-scsi ")?; + data_in.write_all(b"v0 ")?; + + // The Linux kernel doesn't request any more than this, so any data we return + // after this point is mostly academic. + + data_in.write_all(&[0; 22])?; + + let product_descs: &[u16; 8] = &[ + 0x00c0, // SAM-6 (no version claimed) + 0x05c0, // SPC-5 (no version claimed) + 0x0600, // SBC-4 (no version claimed) + 0x0, 0x0, 0x0, 0x0, 0x0, + ]; + + for desc in product_descs { + data_in.write_all(&desc.to_be_bytes())?; + } + + data_in.write_all(&[0; 22])?; + + Ok(()) +} diff --git a/crates/scsi/src/scsi/emulation/target.rs b/crates/scsi/src/scsi/emulation/target.rs new file mode 100644 index 00000000..82e660cc --- /dev/null +++ b/crates/scsi/src/scsi/emulation/target.rs @@ -0,0 +1,143 @@ +// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause + +use std::convert::TryFrom; +use std::io::{Read, Write}; + +use log::error; + +use super::{ + command::{ + Cdb, Command, LunIndependentCommand, LunSpecificCommand, ParseError, ReportLunsSelectReport, + }, + missing_lun::MissingLun, + response_data::{respond_report_luns, SilentlyTruncate}, +}; +use crate::scsi::{sense, CmdError, CmdOutput, Request, Target, TaskAttr}; + +pub(crate) struct LunRequest { + pub _id: u64, + pub task_attr: TaskAttr, + pub crn: u8, + pub prio: u8, + pub _allocation_length: Option, + pub naca: bool, +} + +/// A single logical unit of an emulated SCSI device. +pub(crate) trait LogicalUnit: Send + Sync { + /// Process a SCSI command sent to this logical unit. + /// + /// # Return value + /// This function returns a Result, but it should return Err only in limited + /// circumstances: when something goes wrong at the transport level, such + /// as writes to `req.data_in` failing or `req.cdb` being too short. + /// Any other errors, such as invalid SCSI commands or I/O errors + /// accessing an underlying file, should result in an Ok return value + /// with a `CmdOutput` representing a SCSI-level error (i.e. CHECK + /// CONDITION status, and appropriate sense data). + fn execute_command( + &mut self, + data_in: &mut SilentlyTruncate<&mut dyn Write>, + data_out: &mut dyn Read, + parameters: LunRequest, + command: LunSpecificCommand, + ) -> Result; +} + +/// A SCSI target implemented by emulating a device within vhost-user-scsi. +pub(crate) struct EmulatedTarget { + luns: Vec>, +} + +impl EmulatedTarget { + pub(crate) fn new() -> Self { + Self { luns: Vec::new() } + } + + pub(crate) fn add_lun(&mut self, logical_unit: Box) { + self.luns.push(logical_unit); + } + + pub(crate) fn luns(&self) -> impl Iterator + ExactSizeIterator + '_ { + // unwrap is safe: we limit LUNs at 256 + self.luns + .iter() + .enumerate() + .map(|(idx, _logical_unit)| u16::try_from(idx).unwrap()) + } +} + +impl Default for EmulatedTarget { + fn default() -> Self { + Self::new() + } +} + +impl Target for EmulatedTarget { + fn execute_command( + &mut self, + lun: u16, + data_out: &mut dyn Read, + data_in: &mut dyn Write, + req: Request, + ) -> Result { + match Cdb::parse(req.cdb) { + Ok(cdb) => { + let mut data_in = SilentlyTruncate::new( + data_in, + cdb.allocation_length.map_or(usize::MAX, |x| x as usize), + ); + + match cdb.command { + Command::LunIndependentCommand(cmd) => match cmd { + LunIndependentCommand::ReportLuns(select_report) => { + match select_report { + ReportLunsSelectReport::NoWellKnown + | ReportLunsSelectReport::All => { + respond_report_luns(&mut data_in, self.luns()) + .map_err(CmdError::DataIn)?; + } + ReportLunsSelectReport::WellKnownOnly + | ReportLunsSelectReport::Administrative + | ReportLunsSelectReport::TopLevel + | ReportLunsSelectReport::SameConglomerate => { + respond_report_luns(&mut data_in, vec![].into_iter()) + .map_err(CmdError::DataIn)?; + } + } + Ok(CmdOutput::ok()) + } + }, + Command::LunSpecificCommand(cmd) => { + let req = LunRequest { + _id: req.id, + task_attr: req.task_attr, + crn: req.crn, + prio: req.prio, + _allocation_length: cdb.allocation_length, + naca: cdb.naca, + }; + match self.luns.get_mut(lun as usize) { + Some(lun) => lun.execute_command(&mut data_in, data_out, req, cmd), + None => MissingLun.execute_command(&mut data_in, data_out, req, cmd), + } + } + } + } + Err(ParseError::InvalidCommand) => { + error!("Rejecting CDB for unknown command: {:?}", req.cdb); + Ok(CmdOutput::check_condition( + sense::INVALID_COMMAND_OPERATION_CODE, + )) + } + // TODO: SCSI has a provision for INVALID FIELD IN CDB to include the + // index of the invalid field, but it's not clear if that's mandatory. + // In any case, QEMU omits it. + Err(ParseError::InvalidField) => { + error!("Rejecting CDB with invalid field: {:?}", req.cdb); + Ok(CmdOutput::check_condition(sense::INVALID_FIELD_IN_CDB)) + } + Err(ParseError::TooSmall) => Err(CmdError::CdbTooShort), + } + } +} diff --git a/crates/scsi/src/scsi/emulation/tests/bad_lun.rs b/crates/scsi/src/scsi/emulation/tests/bad_lun.rs new file mode 100644 index 00000000..38b7e4a2 --- /dev/null +++ b/crates/scsi/src/scsi/emulation/tests/bad_lun.rs @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause + +use super::{do_command_fail_lun, do_command_in_lun, null_image}; +use crate::scsi::{ + emulation::{block_device::BlockDevice, target::EmulatedTarget}, + sense, +}; + +#[test] +fn test_report_luns() { + let mut target = EmulatedTarget::new(); + for _ in 0..5 { + let dev = BlockDevice::new(null_image()); + target.add_lun(Box::new(dev)); + } + + let select_reports = &[0x0, 0x2]; // all but well known, all + + for &sr in select_reports { + do_command_in_lun( + &mut target, + 6, + &[ + 0xa0, // REPORT LUNS + 0, // reserved + sr, // select report + 0, 0, 0, // reserved + 0, 0, 1, 0, // alloc length: 256 + 0, 0, + ], + &[], + &[ + 0, 0, 0, 40, // length: 5*8 = 40 + 0, 0, 0, 0, // reserved + 0, 0, 0, 0, 0, 0, 0, 0, // LUN 0 + 0, 1, 0, 0, 0, 0, 0, 0, // LUN 1 + 0, 2, 0, 0, 0, 0, 0, 0, // LUN 2 + 0, 3, 0, 0, 0, 0, 0, 0, // LUN 3 + 0, 4, 0, 0, 0, 0, 0, 0, // LUN 4 + ], + ); + } +} + +#[test] +fn test_report_luns_empty() { + let mut target = EmulatedTarget::new(); + for _ in 0..5 { + let dev = BlockDevice::new(null_image()); + target.add_lun(Box::new(dev)); + } + + // well-known only and several modes explictly defined to return an empty list + // for all but ceratin types of recieving LUNs + let select_reports = &[0x1, 0x10, 0x11, 0x12]; + + for &sr in select_reports { + do_command_in_lun( + &mut target, + 6, + &[ + 0xa0, // REPORT LUNS + 0, // reserved + sr, // select report + 0, 0, 0, // reserved + 0, 0, 1, 0, // alloc length: 256 + 0, 0, + ], + &[], + &[ + 0, 0, 0, 0, // length: 0 + 0, 0, 0, 0, // reserved + ], + ); + } +} + +#[test] +fn test_request_sense() { + let mut target = EmulatedTarget::new(); + let dev = BlockDevice::new(null_image()); + target.add_lun(Box::new(dev)); + + do_command_in_lun( + &mut target, + 1, + &[ + 0x3, // REQUEST SENSE + 0, // fixed format sense data + 0, 0, // reserved + 255, // alloc length + 0, // control + ], + &[], + &sense::LOGICAL_UNIT_NOT_SUPPORTED.to_fixed_sense(), + ); +} + +#[test] +fn test_request_sense_descriptor_format() { + let mut target = EmulatedTarget::new(); + let dev = BlockDevice::new(null_image()); + target.add_lun(Box::new(dev)); + + do_command_fail_lun( + &mut target, + 1, + &[ + 0x3, // REQUEST SENSE + 1, // descriptor format sense data + 0, 0, // reserved + 255, // alloc length + 0, // control + ], + sense::INVALID_FIELD_IN_CDB, + ); +} + +#[test] +fn test_inquiry() { + let mut target = EmulatedTarget::new(); + let dev = BlockDevice::new(null_image()); + target.add_lun(Box::new(dev)); + + do_command_in_lun( + &mut target, + 1, + &[ + 0x12, // INQUIRY + 0, // EVPD bit: 0 + 0, // page code + 1, 0, // alloc length: 256 + 0, // control + ], + &[], + // some empty comments to get rustfmt to do something vaguely sensible + &[ + 0x7f, // device not accessible, unknown type + 0, // features + 0x7, // version + 0x12, // response data format v2, HiSup = 1 + 91, // addl length + 0, 0, 0, // unsupported features + // vendor + b'r', b'u', b's', b't', b'-', b'v', b'm', b'm', // + // product + b'v', b'h', b'o', b's', b't', b'-', b'u', b's', b'e', b'r', b'-', b's', b'c', b's', + b'i', b' ', // + // revision + b'v', b'0', b' ', b' ', // + // reserved/obselete/vendor specific + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // version descriptors + 0x0, 0xc0, // SAM-6 + 0x05, 0xc0, // SPC-5 (no code assigned for 6 yet) + 0x06, 0x0, // SBC-4 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // + // reserved + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + ); +} + +#[test] +fn test_other_command() { + let mut target = EmulatedTarget::new(); + let dev = BlockDevice::new(null_image()); + target.add_lun(Box::new(dev)); + + do_command_fail_lun( + &mut target, + 1, + &[ + 0, // TEST UNIT READY + 0, 0, 0, 0, // reserved + 0, // control + ], + sense::LOGICAL_UNIT_NOT_SUPPORTED, + ); +} + +#[test] +fn test_invalid_command() { + let mut target = EmulatedTarget::new(); + let dev = BlockDevice::new(null_image()); + target.add_lun(Box::new(dev)); + + do_command_fail_lun( + &mut target, + 1, + &[ + 0xff, // vendor specific + 0, 0, 0, 0, // reserved + 0, // control + ], + sense::INVALID_COMMAND_OPERATION_CODE, + ); +} diff --git a/crates/scsi/src/scsi/emulation/tests/generic.rs b/crates/scsi/src/scsi/emulation/tests/generic.rs new file mode 100644 index 00000000..19f4bd47 --- /dev/null +++ b/crates/scsi/src/scsi/emulation/tests/generic.rs @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause + +//! Tests for stuff shared between commands. + +use std::io::ErrorKind; + +use super::{do_command_fail, test_image}; +use crate::scsi::{ + emulation::{block_device::BlockDevice, target::EmulatedTarget}, + sense, CmdError, Request, Target, TaskAttr, +}; + +#[test] +fn test_invalid_opcode() { + let mut target = EmulatedTarget::new(); + let dev = BlockDevice::new(test_image()); + target.add_lun(Box::new(dev)); + + do_command_fail( + &mut target, + &[ + 0xff, // vendor specific, unused by us + 0, 0, 0, 0, 0, + ], + sense::INVALID_COMMAND_OPERATION_CODE, + ); +} + +#[test] +fn test_invalid_service_action() { + let mut target = EmulatedTarget::new(); + let dev = BlockDevice::new(test_image()); + target.add_lun(Box::new(dev)); + + do_command_fail( + &mut target, + &[ + 0xa3, // MAINTAINANCE IN + 0x1f, // vendor specific, unused by us + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + sense::INVALID_FIELD_IN_CDB, + ); +} + +#[test] +fn test_short_data_out_buffer() { + let mut target = EmulatedTarget::new(); + let dev = BlockDevice::new(test_image()); + target.add_lun(Box::new(dev)); + + let mut data_in: &mut [u8] = &mut []; + let mut data_out: &[u8] = &[0_u8; 511]; + + let res = target.execute_command( + 0, + &mut data_out, + &mut data_in, + Request { + id: 0, + cdb: &[ + 0x28, // READ (10) + 0, // flags + 0, 0, 0, 15, // LBA: 5 + 0, // reserved, group # + 0, 1, // transfer length: 1 + 0, // control + ], + task_attr: TaskAttr::Simple, + crn: 0, + prio: 0, + }, + ); + + if let CmdError::DataIn(e) = res.unwrap_err() { + assert_eq!(e.kind(), ErrorKind::WriteZero); + } else { + panic!(); + } +} + +#[test] +fn test_short_cdb() { + let mut target: EmulatedTarget = EmulatedTarget::new(); + let dev = BlockDevice::new(test_image()); + target.add_lun(Box::new(dev)); + + let mut data_in: &mut [u8] = &mut []; + let mut data_out: &[u8] = &[]; + + let res = target.execute_command( + 0, + &mut data_out, + &mut data_in, + Request { + id: 0, + cdb: &[ + 0x28, // READ (10) + ], + task_attr: TaskAttr::Simple, + crn: 0, + prio: 0, + }, + ); + + assert!(matches!(res.unwrap_err(), CmdError::CdbTooShort)); +} diff --git a/crates/scsi/src/scsi/emulation/tests/mod.rs b/crates/scsi/src/scsi/emulation/tests/mod.rs new file mode 100644 index 00000000..8e8ffc18 --- /dev/null +++ b/crates/scsi/src/scsi/emulation/tests/mod.rs @@ -0,0 +1,520 @@ +// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause + +#![cfg(test)] + +mod bad_lun; +mod generic; +mod report_supported_operation_codes; + +use std::{ + fs::File, + io::Write, + sync::{Arc, Mutex}, +}; + +use tempfile::tempfile; + +use super::{ + block_device::{ + BlockDevice, BlockDeviceBackend, BlockOffset, BlockSize, ByteOffset, FileBackend, + }, + target::EmulatedTarget, +}; +use crate::scsi::{ + sense::{self, SenseTriple}, + CmdOutput, Request, Target, TaskAttr, +}; + +#[derive(Clone)] +struct TestBackend { + data: Arc>, +} + +impl TestBackend { + fn new() -> Self { + TestBackend { + data: Arc::new(Mutex::new([0; 512 * 16])), + } + } +} + +impl BlockDeviceBackend for TestBackend { + fn read_exact_at(&mut self, buf: &mut [u8], offset: ByteOffset) -> std::io::Result<()> { + let data = self.data.lock().unwrap(); + + let offset = usize::try_from(u64::from(offset)).expect("offset should fit usize"); + buf.copy_from_slice(&data[offset..(offset + buf.len())]); + Ok(()) + } + + fn write_exact_at(&mut self, buf: &[u8], offset: ByteOffset) -> std::io::Result<()> { + let mut data = self.data.lock().unwrap(); + + let offset = usize::try_from(u64::from(offset)).expect("offset should fit usize"); + data[offset..(offset + buf.len())].copy_from_slice(buf); + Ok(()) + } + + fn size_in_blocks(&mut self) -> std::io::Result { + Ok(ByteOffset::from( + u64::try_from(self.data.lock().unwrap().len()).expect("size_in_blocks should fit u64"), + ) / self.block_size()) + } + + fn block_size(&self) -> BlockSize { + BlockSize::try_from(512).expect("512 should be a valid BlockSize") + } + + fn sync(&mut self) -> std::io::Result<()> { + Ok(()) + } +} + +fn null_image() -> FileBackend { + FileBackend::new(File::open("/dev/null").unwrap()) +} + +fn test_image() -> FileBackend { + let mut f = tempfile().unwrap(); + // generate 16 512-byte sectors, each of which consist of a single + // repeated hex character, i.e. + // sector 00: 0000000....0000 + // sector 15: fffffff....ffff + for chr in b'0'..=b'9' { + f.write_all(&[chr; 512]).unwrap(); + } + for chr in b'a'..=b'f' { + f.write_all(&[chr; 512]).unwrap(); + } + FileBackend::new(f) +} + +fn do_command_in_lun( + target: &mut EmulatedTarget, + lun: u16, + cdb: &[u8], + data_out: &[u8], + expected_data_in: &[u8], +) { + let mut data_in = Vec::new(); + + let res = target.execute_command( + lun, + &mut &data_out[..], + &mut data_in, + Request { + id: 0, + cdb, + task_attr: TaskAttr::Simple, + crn: 0, + prio: 0, + }, + ); + + assert_eq!(res.unwrap(), CmdOutput::ok()); + assert_eq!(&data_in, expected_data_in); +} + +fn do_command_fail_lun( + target: &mut EmulatedTarget, + lun: u16, + cdb: &[u8], + expected_error: SenseTriple, +) { + let mut data_in = Vec::new(); + let mut data_out: &[u8] = &[]; + + let res = target.execute_command( + lun, + &mut data_out, + &mut data_in, + Request { + id: 0, + cdb, + task_attr: TaskAttr::Simple, + crn: 0, + prio: 0, + }, + ); + + assert_eq!(res.unwrap(), CmdOutput::check_condition(expected_error)); + assert_eq!(&data_in, &[]); +} + +fn do_command_in( + target: &mut EmulatedTarget, + cdb: &[u8], + data_out: &[u8], + expected_data_in: &[u8], +) { + do_command_in_lun(target, 0, cdb, data_out, expected_data_in); +} + +fn do_command_fail(target: &mut EmulatedTarget, cdb: &[u8], expected_error: SenseTriple) { + do_command_fail_lun(target, 0, cdb, expected_error); +} + +fn block_size_512() -> BlockSize { + BlockSize::try_from(512).expect("512 should be a valid block_size") +} + +#[test] +fn test_test_unit_ready() { + let mut target = EmulatedTarget::new(); + let dev = BlockDevice::new(null_image()); + target.add_lun(Box::new(dev)); + + do_command_in(&mut target, &[0, 0, 0, 0, 0, 0], &[], &[]); +} + +#[test] +fn test_report_luns() { + let mut target = EmulatedTarget::new(); + for _ in 0..5 { + let dev = BlockDevice::new(null_image()); + target.add_lun(Box::new(dev)); + } + + do_command_in( + &mut target, + &[ + 0xa0, // REPORT LUNS + 0, // reserved + 0, // select report + 0, 0, 0, // reserved + 0, 0, 1, 0, // alloc length: 256 + 0, 0, + ], + &[], + &[ + 0, 0, 0, 40, // length: 5*8 = 40 + 0, 0, 0, 0, // reserved + 0, 0, 0, 0, 0, 0, 0, 0, // LUN 0 + 0, 1, 0, 0, 0, 0, 0, 0, // LUN 1 + 0, 2, 0, 0, 0, 0, 0, 0, // LUN 2 + 0, 3, 0, 0, 0, 0, 0, 0, // LUN 3 + 0, 4, 0, 0, 0, 0, 0, 0, // LUN 4 + ], + ); +} + +#[test] +fn test_read_10() { + let mut target = EmulatedTarget::new(); + let dev = BlockDevice::new(test_image()); + target.add_lun(Box::new(dev)); + + // TODO: this test relies on the default logical block size of 512. We should + // make that explicit. + + do_command_in( + &mut target, + &[ + 0x28, // READ (10) + 0, // flags + 0, 0, 0, 5, // LBA: 5 + 0, // reserved, group # + 0, 1, // transfer length: 1 + 0, // control + ], + &[], + &[b'5'; 512], + ); +} + +#[test] +fn test_read_10_last_block() { + let mut target = EmulatedTarget::new(); + let dev = BlockDevice::new(test_image()); + target.add_lun(Box::new(dev)); + + // TODO: this test relies on the default logical block size of 512. We should + // make that explicit. + + do_command_in( + &mut target, + &[ + 0x28, // READ (10) + 0, // flags + 0, 0, 0, 15, // LBA: 5 + 0, // reserved, group # + 0, 1, // transfer length: 1 + 0, // control + ], + &[], + &[b'f'; 512], + ); +} + +#[test] +fn test_read_10_out_of_range() { + let mut target = EmulatedTarget::new(); + let dev = BlockDevice::new(test_image()); + target.add_lun(Box::new(dev)); + + // TODO: this test relies on the default logical block size of 512. We should + // make that explicit. + + do_command_fail( + &mut target, + &[ + 0x28, // READ (10) + 0, // flags + 0, 0, 0, 16, // LBA: 16 + 0, // reserved, group # + 0, 1, // transfer length: 1 + 0, // control + ], + sense::LOGICAL_BLOCK_ADDRESS_OUT_OF_RANGE, + ); +} + +#[test] +fn test_read_10_cross_out() { + let mut target = EmulatedTarget::new(); + let dev = BlockDevice::new(null_image()); + target.add_lun(Box::new(dev)); + + // TODO: this test relies on the default logical block size of 512. We should + // make that explicit. + + do_command_fail( + &mut target, + &[ + 0x28, // READ (10) + 0, // flags + 0, 0, 0, 15, // LBA: 15 + 0, // reserved, group # + 0, 2, // transfer length: 2 + 0, // control + ], + sense::LOGICAL_BLOCK_ADDRESS_OUT_OF_RANGE, + ); +} + +#[test] +fn test_write_10() { + let mut target = EmulatedTarget::new(); + let mut backend = TestBackend::new(); + let dev = BlockDevice::new(backend.clone()); + target.add_lun(Box::new(dev)); + + // TODO: this test relies on the default logical block size of 512. We should + // make that explicit. + + { + let data_out = [b'w'; 512]; + + do_command_in( + &mut target, + &[ + 0x2a, // WRITE (10) + 0, // flags + 0, 0, 0, 5, // LBA: 5 + 0, // reserved, group # + 0, 1, // transfer length: 1 + 0, // control + ], + &data_out, + &[], + ); + + let mut buf = [0_u8; 512]; + backend + .read_exact_at(&mut buf, BlockOffset::from(5) * block_size_512()) + .expect("Reading should work"); + assert_eq!(data_out, buf); + } +} + +#[test] +fn test_write_same_16() { + let mut target = EmulatedTarget::new(); + let mut backend = TestBackend::new(); + let dev = BlockDevice::new(backend.clone()); + target.add_lun(Box::new(dev)); + + // TODO: this test relies on the default logical block size of 512. We should + // make that explicit. + + backend + .write_exact_at(&[0xff; 512 * 6], BlockOffset::from(5) * block_size_512()) + .expect("Write should succeed"); + + let data_out = [0_u8; 512]; + + do_command_in( + &mut target, + &[ + 0x93, // WRITE SAME (16) + 0, // flags + 0, 0, 0, 0, 0, 0, 0, 5, // LBA: 5 + 0, 0, 0, 5, // tnumber of blocks: 5 + 0, // reserved, group # + 0, // control + ], + &data_out, + &[], + ); + + let mut buf = [0_u8; 512 * 5]; + backend + .read_exact_at(&mut buf, BlockOffset::from(5) * block_size_512()) + .expect("Reading should work"); + assert_eq!([0_u8; 512 * 5], buf, "5 sectors should have been zero'd"); + + let mut buf = [0_u8; 512]; + backend + .read_exact_at(&mut buf, BlockOffset::from(10) * block_size_512()) + .expect("Reading should work"); + assert_eq!( + [0xff_u8; 512], buf, + "sector after write should be left untouched" + ); +} + +#[test] +fn test_read_capacity_10() { + let mut target = EmulatedTarget::new(); + let dev = BlockDevice::new(test_image()); + target.add_lun(Box::new(dev)); + + // TODO: this test relies on the default logical block size of 512. We should + // make that explicit. + + // TODO: we should test behavior with ≥ 2 TiB images. But not sure how we + // can do that reliably without risking using 2 TiB of disk + + do_command_in( + &mut target, + &[ + 0x25, // READ CAPACITY (10) + 0, 0, 0, 0, 0, 0, 0, 0, // flags + 0, // control + ], + &[], + &[ + 0, 0, 0, 15, // returned LBA (last valid LBA), + 0, 0, 2, 0, // block size (512) + ], + ); +} + +#[test] +fn test_read_capacity_16() { + let mut target = EmulatedTarget::new(); + let dev = BlockDevice::new(test_image()); + target.add_lun(Box::new(dev)); + + // TODO: this test relies on the default logical block size of 512. We should + // make that explicit. + + do_command_in( + &mut target, + &[ + 0x9e, 0x10, // READ CAPACITY (16) + 0, 0, 0, 0, 0, 0, 0, 0, // obsolete + 0, 0, 0, 32, // allocation length: 32 + 0, // obselete/reserved + 0, // control + ], + &[], + &[ + 0, 0, 0, 0, 0, 0, 0, 15, // returned LBA (last valid LBA), + 0, 0, 2, 0, // block size (512) + 0, // reserved, zoned stuff, protection stuff + 0, // one PB per LB + 0xc0, // thin provisioning, unmapped blocks read 0 + 0, // LBA 0 is aligned (top bits above) + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // reserved + ], + ); +} + +#[test] +fn test_inquiry() { + let mut target = EmulatedTarget::new(); + let dev = BlockDevice::new(null_image()); + target.add_lun(Box::new(dev)); + + do_command_in( + &mut target, + &[ + 0x12, // INQUIRY + 0, // EVPD bit: 0 + 0, // page code + 1, 0, // alloc length: 256 + 0, // control + ], + &[], + // some empty comments to get rustfmt to do something vaguely sensible + &[ + 0, // accessible; direct acccess block device + 0, // features + 0x7, // version + 0x12, // response data format v2, HiSup = 1 + 91, // addl length + 0, 0, 0, // unsupported features + // vendor + b'r', b'u', b's', b't', b'-', b'v', b'm', b'm', // + // product + b'v', b'h', b'o', b's', b't', b'-', b'u', b's', b'e', b'r', b'-', b's', b'c', b's', + b'i', b' ', // + // revision + b'v', b'0', b' ', b' ', // + // reserved/obselete/vendor specific + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // version descriptors + 0x0, 0xc0, // SAM-6 + 0x05, 0xc0, // SPC-5 (no code assigned for 6 yet) + 0x06, 0, // SBC-4 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // + // reserved + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + ); +} + +#[test] +fn test_request_sense() { + let mut target = EmulatedTarget::new(); + let dev = BlockDevice::new(null_image()); + target.add_lun(Box::new(dev)); + + do_command_in( + &mut target, + &[ + 0x3, // INQUIRY + 0, // desc bit: 0 + 0, 0, // reserved + 255, // alloc length + 0, // control + ], + &[], + // We'll always return this - modern SCSI has autosense, so any errors are sent with the + // response to the command that caused them (and therefore immediately cleared), and + // REQUEST SENSE returns an actual error only under some exceptional circumstances + // we don't implement. + &sense::NO_ADDITIONAL_SENSE_INFORMATION.to_fixed_sense(), + ); +} + +#[test] +fn test_request_sense_descriptor_format() { + let mut target = EmulatedTarget::new(); + let dev = BlockDevice::new(null_image()); + target.add_lun(Box::new(dev)); + + do_command_fail( + &mut target, + &[ + 0x3, // INQUIRY + 1, // desc bit: 1 + 0, 0, // reserved + 255, // alloc length + 0, // control + ], + // We don't support descriptor format sense data. + sense::INVALID_FIELD_IN_CDB, + ); +} diff --git a/crates/scsi/src/scsi/emulation/tests/report_supported_operation_codes.rs b/crates/scsi/src/scsi/emulation/tests/report_supported_operation_codes.rs new file mode 100644 index 00000000..016d6a60 --- /dev/null +++ b/crates/scsi/src/scsi/emulation/tests/report_supported_operation_codes.rs @@ -0,0 +1,420 @@ +// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause + +use super::{do_command_fail, do_command_in, null_image}; +use crate::scsi::{ + emulation::{block_device::BlockDevice, target::EmulatedTarget}, + sense, +}; + +#[test] +fn test_one_command() { + let mut target = EmulatedTarget::new(); + let dev = BlockDevice::new(null_image()); + target.add_lun(Box::new(dev)); + + do_command_in( + &mut target, + &[ + 0xa3, 0x0c, // REPORT SUPPORTED OPERATION CODES + 0b1, // reporting options: one command + 0, 1, 2, // opcode: TEST UNIT READY, SA ignored + 0, 0, 1, 0, // allocation length: 256 + 0, // reserved + 0, // control + ], + &[], + &[ + 0, 0b11, // flags, supported + 0, 6, // cdb len + 0, 0, 0, 0, 0, 0b0100, // usage data + ], + ); +} + +#[test] +fn test_one_command_with_timeout_descriptor() { + let mut target = EmulatedTarget::new(); + let dev = BlockDevice::new(null_image()); + target.add_lun(Box::new(dev)); + + do_command_in( + &mut target, + &[ + 0xa3, 0x0c, // REPORT SUPPORTED OPERATION CODES + 0x81, // request timeout descs, reporting options: one command + 0, 1, 2, // opcode: TEST UNIT READY, SA ignored + 0, 0, 1, 0, // allocation length: 256 + 0, // reserved + 0, // control + ], + &[], + &[ + 0, 0b11, // flags, supported + 0, 6, // cdb len + 0, 0, 0, 0, 0, 0b0100, // usage data + 0, 0xa, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // no timeouts + ], + ); +} + +#[test] +fn test_one_command_unsupported() { + let mut target = EmulatedTarget::new(); + let dev = BlockDevice::new(null_image()); + target.add_lun(Box::new(dev)); + + do_command_in( + &mut target, + &[ + 0xa3, 0x0c, // REPORT SUPPORTED OPERATION CODES + 0b1, // reporting options: one command + 0xff, 1, 2, // opcode: vendor specific, SA ignored + 0, 0, 1, 0, // allocation length: 256 + 0, // reserved + 0, // control + ], + &[], + &[ + 0, 0b01, // flags, not supported + 0, 0, // cdb len + ], + ); +} + +#[test] +fn test_one_command_valid_service_action() { + let mut target = EmulatedTarget::new(); + let dev = BlockDevice::new(null_image()); + target.add_lun(Box::new(dev)); + + do_command_fail( + &mut target, + &[ + 0xa3, 0x0c, // REPORT SUPPORTED OPERATION CODES + 0b1, // reporting options: one command + 0x9e, 0, 0x10, // SERVICE ACTION IN (16), READ CAPACITY (16) + 0, 0, 1, 0, // allocation length: 256 + 0, // reserved + 0, // control + ], + sense::INVALID_FIELD_IN_CDB, + ); +} + +#[test] +fn test_one_command_invalid_service_action() { + let mut target = EmulatedTarget::new(); + let dev = BlockDevice::new(null_image()); + target.add_lun(Box::new(dev)); + + do_command_fail( + &mut target, + &[ + 0xa3, 0x0c, // REPORT SUPPORTED OPERATION CODES + 0b1, // reporting options: one command + 0x9e, 0, 0xff, // SERVICE ACTION IN (16), invalid + 0, 0, 1, 0, // allocation length: 256 + 0, // reserved + 0, // control + ], + sense::INVALID_FIELD_IN_CDB, + ); +} + +#[test] +fn test_one_service_action() { + let mut target = EmulatedTarget::new(); + let dev = BlockDevice::new(null_image()); + target.add_lun(Box::new(dev)); + + do_command_in( + &mut target, + &[ + 0xa3, 0x0c, // REPORT SUPPORTED OPERATION CODES + 0b10, // reporting options: one service action + 0x9e, 0, 0x10, // SERVICE ACTION IN (16), READ CAPACITY (16) + 0, 0, 1, 0, // allocation length: 256 + 0, // reserved + 0, // control + ], + &[], + &[ + 0, 0b11, // flags, supported + 0, 16, // cdb len + 0x9e, 0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff, 0, + 0b0100, // usage data + ], + ); +} + +#[test] +fn test_one_service_action_with_timeout_descriptor() { + let mut target = EmulatedTarget::new(); + let dev = BlockDevice::new(null_image()); + target.add_lun(Box::new(dev)); + + do_command_in( + &mut target, + &[ + 0xa3, 0x0c, // REPORT SUPPORTED OPERATION CODES + 0x82, // request timeout descs, reporting options: one service action + 0x9e, 0, 0x10, // SERVICE ACTION IN (16), READ CAPACITY (16) + 0, 0, 1, 0, // allocation length: 256 + 0, // reserved + 0, // control + ], + &[], + &[ + 0, 0b11, // flags, supported + 0, 16, // cdb len + 0x9e, 0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff, 0, + 0b0100, // usage data + 0, 0xa, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // no timeouts + ], + ); +} + +#[test] +fn test_one_service_action_unknown_opcode() { + let mut target = EmulatedTarget::new(); + let dev = BlockDevice::new(null_image()); + target.add_lun(Box::new(dev)); + + // not entirely sure this behavior is correct; see comment in implementation + do_command_fail( + &mut target, + &[ + 0xa3, 0x0c, // REPORT SUPPORTED OPERATION CODES + 0b10, // reporting options: one service action + 0xff, 1, 2, // opcode: vendor specific, unimplemented + 0, 0, 1, 0, // allocation length: 256 + 0, // reserved + 0, // control + ], + sense::INVALID_FIELD_IN_CDB, + ); +} + +#[test] +fn test_one_service_action_unknown_service_action() { + let mut target = EmulatedTarget::new(); + let dev = BlockDevice::new(null_image()); + target.add_lun(Box::new(dev)); + + do_command_in( + &mut target, + &[ + 0xa3, 0x0c, // REPORT SUPPORTED OPERATION CODES + 0b10, // reporting options: one service action + 0x9e, 0, 0xff, // SERVICE ACTION IN (16), invalid SA + 0, 0, 1, 0, // allocation length: 256 + 0, // reserved + 0, // control + ], + &[], + &[ + 0, 0b01, // flags, not supported + 0, 0, // cdb len + ], + ); +} + +#[test] +fn test_one_service_action_not_service_action() { + let mut target = EmulatedTarget::new(); + let dev = BlockDevice::new(null_image()); + target.add_lun(Box::new(dev)); + + do_command_fail( + &mut target, + &[ + 0xa3, 0x0c, // REPORT SUPPORTED OPERATION CODES + 0b10, // reporting options: one service action + 0, 1, 2, // TEST UNIT READY + 0, 0, 1, 0, // allocation length: 256 + 0, // reserved + 0, // control + ], + sense::INVALID_FIELD_IN_CDB, + ); +} + +// rest of these tests are for "mode 3", which the spec calls 011b and our +// implementation calls OneCommandOrServiceAction, but that's a mouthful so just +// use "mode 3" for test names + +#[test] +fn test_mode_3_opcode_without_service_action() { + let mut target = EmulatedTarget::new(); + let dev = BlockDevice::new(null_image()); + target.add_lun(Box::new(dev)); + + do_command_in( + &mut target, + &[ + 0xa3, 0x0c, // REPORT SUPPORTED OPERATION CODES + 0b11, // reporting options: mode 3 + 0, 0, 0, // opcode: TEST UNIT READY, SA: 0 + 0, 0, 1, 0, // allocation length: 256 + 0, // reserved + 0, // control + ], + &[], + &[ + 0, 0b11, // flags, supported + 0, 6, // cdb len + 0, 0, 0, 0, 0, 0b0100, // usage data + ], + ); +} + +#[test] +fn test_mode_3_with_timeout_descriptor() { + let mut target = EmulatedTarget::new(); + let dev = BlockDevice::new(null_image()); + target.add_lun(Box::new(dev)); + + do_command_in( + &mut target, + &[ + 0xa3, 0x0c, // REPORT SUPPORTED OPERATION CODES + 0x83, // request timeout descs, reporting options: mode 3 + 0, 0, 0, // opcode: TEST UNIT READY, SA: 0 + 0, 0, 1, 0, // allocation length: 256 + 0, // reserved + 0, // control + ], + &[], + &[ + 0, 0b11, // flags, supported + 0, 6, // cdb len + 0, 0, 0, 0, 0, 0b0100, // usage data + 0, 0xa, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // no timeouts + ], + ); +} + +#[test] +fn test_mode_3_opcode_with_unnecessary_service_action() { + let mut target = EmulatedTarget::new(); + let dev = BlockDevice::new(null_image()); + target.add_lun(Box::new(dev)); + + do_command_in( + &mut target, + &[ + 0xa3, 0x0c, // REPORT SUPPORTED OPERATION CODES + 0b11, // reporting options: mode 3 + 0, 0, 1, // opcode: TEST UNIT READY, SA: 1 + 0, 0, 1, 0, // allocation length: 256 + 0, // reserved + 0, // control + ], + &[], + &[ + 0, 0b01, // flags, not supported + 0, 0, // cdb len + ], + ); +} + +#[test] +fn test_mode_3_invalid_opcode() { + let mut target = EmulatedTarget::new(); + let dev = BlockDevice::new(null_image()); + target.add_lun(Box::new(dev)); + + do_command_in( + &mut target, + &[ + 0xa3, 0x0c, // REPORT SUPPORTED OPERATION CODES + 0b11, // reporting options: mode 3 + 0xff, 0, 0, // opcode: vendor specific + 0, 0, 1, 0, // allocation length: 256 + 0, // reserved + 0, // control + ], + &[], + &[ + 0, 0b01, // flags, not supported + 0, 0, // cdb len + ], + ); +} + +#[test] +fn test_mode_3_service_action() { + let mut target = EmulatedTarget::new(); + let dev = BlockDevice::new(null_image()); + target.add_lun(Box::new(dev)); + + do_command_in( + &mut target, + &[ + 0xa3, 0x0c, // REPORT SUPPORTED OPERATION CODES + 0b11, // reporting options: mode 3 + 0x9e, 0, 0x10, // opcode: SERVICE ACTION IN (16), READ CAPACITY (16) + 0, 0, 1, 0, // allocation length: 256 + 0, // reserved + 0, // control + ], + &[], + &[ + 0, 0b11, // flags, supported + 0, 16, // cdb len + 0x9e, 0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff, 0, + 0b0100, // usage data + ], + ); +} + +#[test] +fn test_mode_3_service_action_with_timeout_descriptor() { + let mut target = EmulatedTarget::new(); + let dev = BlockDevice::new(null_image()); + target.add_lun(Box::new(dev)); + + do_command_in( + &mut target, + &[ + 0xa3, 0x0c, // REPORT SUPPORTED OPERATION CODES + 0x83, // request timeout desc, tireporting options: mode 3 + 0x9e, 0, 0x10, // opcode: SERVICE ACTION IN (16), READ CAPACITY (16) + 0, 0, 1, 0, // allocation length: 256 + 0, // reserved + 0, // control + ], + &[], + &[ + 0, 0b11, // flags, supported + 0, 16, // cdb len + 0x9e, 0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff, 0, + 0b0100, // usage data + 0, 0xa, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // no timeouts + ], + ); +} + +#[test] +fn test_mode_3_invalid_service_action() { + let mut target = EmulatedTarget::new(); + let dev = BlockDevice::new(null_image()); + target.add_lun(Box::new(dev)); + + do_command_in( + &mut target, + &[ + 0xa3, 0x0c, // REPORT SUPPORTED OPERATION CODES + 0b11, // reporting options: mode 3 + 0x9e, 0, 0xff, // opcode: SERVICE ACTION IN (16), invalid SA + 0, 0, 1, 0, // allocation length: 256 + 0, // reserved + 0, // control + ], + &[], + &[ + 0, 0b01, // flags, not supported + 0, 0, // cdb len + ], + ); +} diff --git a/crates/scsi/src/scsi/mod.rs b/crates/scsi/src/scsi/mod.rs new file mode 100644 index 00000000..9c1f1589 --- /dev/null +++ b/crates/scsi/src/scsi/mod.rs @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause + +pub mod emulation; +pub mod sense; + +use std::io::{self, Read, Write}; + +use self::sense::SenseTriple; + +#[derive(PartialEq, Eq, Debug, Clone, Copy)] +pub enum TaskAttr { + Simple, + Ordered, + HeadOfQueue, + Aca, +} + +#[derive(Debug, PartialEq, Eq)] +pub struct CmdOutput { + pub status: u8, + pub status_qualifier: u16, + pub sense: Vec, +} + +impl CmdOutput { + pub const fn ok() -> Self { + Self { + status: 0, + status_qualifier: 0, + sense: Vec::new(), + } + } + + pub fn check_condition(sense: SenseTriple) -> Self { + Self { + status: 2, + status_qualifier: 0, + sense: sense.to_fixed_sense(), + } + } +} + +pub struct Request<'a> { + pub id: u64, + pub cdb: &'a [u8], + pub task_attr: TaskAttr, + pub crn: u8, + pub prio: u8, +} + +/// An transport-level error encountered while processing a SCSI command. +/// +/// This is only for transport-level errors; anything else should be handled by +/// returning a CHECK CONDITION status at the SCSI level. +#[derive(Debug)] +pub enum CmdError { + /// The provided CDB is too short for its operation code. + CdbTooShort, + /// An error occurred while writing to the provided data in writer. + DataIn(io::Error), +} + +/// A transport-independent implementation of a SCSI target. +/// +/// Currently, we only support emulated targets (see the `emulation` module), +/// but other implementations of this trait could implement pass-through to +/// iSCSI targets or SCSI devices on the host. +pub trait Target: Send + Sync { + fn execute_command( + &mut self, + lun: u16, + data_out: &mut dyn Read, + data_in: &mut dyn Write, + req: Request, + ) -> Result; +} diff --git a/crates/scsi/src/scsi/sense.rs b/crates/scsi/src/scsi/sense.rs new file mode 100644 index 00000000..ad7da96e --- /dev/null +++ b/crates/scsi/src/scsi/sense.rs @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause + +#[derive(Debug, Eq, PartialEq, Clone, Copy)] +pub struct SenseTriple(u8, u8, u8); + +impl SenseTriple { + pub fn to_fixed_sense(self) -> Vec { + vec![ + 0x70, // response code (fixed, current); valid bit (0) + 0x0, // reserved + self.0, // sk; various upper bits 0 + 0x0, 0x0, 0x0, 0x0, // information + 0xa, // add'l sense length + 0x0, 0x0, 0x0, 0x0, // cmd-specific information + self.1, // asc + self.2, // ascq + 0x0, // field-replacable unit code + 0x0, 0x0, 0x0, // sense-key-sepcific information + ] + } +} + +const NO_SENSE: u8 = 0; +const MEDIUM_ERROR: u8 = 0x3; +const HARDWARE_ERROR: u8 = 0x4; +const ILLEGAL_REQUEST: u8 = 0x5; + +pub const NO_ADDITIONAL_SENSE_INFORMATION: SenseTriple = SenseTriple(NO_SENSE, 0, 0); + +pub const INVALID_COMMAND_OPERATION_CODE: SenseTriple = SenseTriple(ILLEGAL_REQUEST, 0x20, 0x0); +pub const LOGICAL_BLOCK_ADDRESS_OUT_OF_RANGE: SenseTriple = SenseTriple(ILLEGAL_REQUEST, 0x21, 0x0); +pub const INVALID_FIELD_IN_CDB: SenseTriple = SenseTriple(ILLEGAL_REQUEST, 0x24, 0x0); +pub const LOGICAL_UNIT_NOT_SUPPORTED: SenseTriple = SenseTriple(ILLEGAL_REQUEST, 0x21, 0x0); +pub const SAVING_PARAMETERS_NOT_SUPPORTED: SenseTriple = SenseTriple(ILLEGAL_REQUEST, 0x39, 0x0); + +pub const UNRECOVERED_READ_ERROR: SenseTriple = SenseTriple(MEDIUM_ERROR, 0x11, 0x0); +pub const TARGET_FAILURE: SenseTriple = SenseTriple(HARDWARE_ERROR, 0x44, 0x0); diff --git a/crates/scsi/src/vhu_scsi.rs b/crates/scsi/src/vhu_scsi.rs new file mode 100644 index 00000000..85676546 --- /dev/null +++ b/crates/scsi/src/vhu_scsi.rs @@ -0,0 +1,621 @@ +// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause + +use core::slice; +use std::convert::{TryFrom, TryInto}; +use std::io::{self, ErrorKind}; +use std::mem; + +use log::{debug, error, info, warn}; +use vhost::vhost_user::{VhostUserProtocolFeatures, VhostUserVirtioFeatures}; +use vhost_user_backend::{VhostUserBackendMut, VringRwLock, VringT}; +use virtio_bindings::virtio_scsi::{virtio_scsi_config, virtio_scsi_event}; +use virtio_bindings::{ + virtio_config::VIRTIO_F_VERSION_1, + virtio_ring::{VIRTIO_RING_F_EVENT_IDX, VIRTIO_RING_F_INDIRECT_DESC}, + virtio_scsi::VIRTIO_SCSI_F_HOTPLUG, +}; +use virtio_queue::QueueOwnedT; +use vm_memory::{GuestAddressSpace, GuestMemoryAtomic, GuestMemoryLoadGuard, GuestMemoryMmap}; +use vmm_sys_util::{ + epoll::EventSet, + eventfd::{EventFd, EFD_NONBLOCK}, +}; + +use crate::scsi::Target; +use crate::virtio::CDB_SIZE; +use crate::{ + scsi::{self, CmdError, TaskAttr}, + virtio::{self, Request, RequestParseError, Response, ResponseCode, VirtioScsiLun, SENSE_SIZE}, +}; + +const REQUEST_QUEUE: u16 = 2; + +type DescriptorChainWriter = virtio::DescriptorChainWriter>; +type DescriptorChainReader = virtio::DescriptorChainReader>; + +pub(crate) struct VhostUserScsiBackend { + event_idx: bool, + mem: Option>, + targets: Vec>, + pub(crate) exit_event: EventFd, +} + +impl VhostUserScsiBackend { + pub(crate) fn new() -> Self { + Self { + event_idx: false, + mem: None, + targets: Vec::new(), + exit_event: EventFd::new(EFD_NONBLOCK).expect("Creating exit eventfd"), + } + } + + fn parse_target(&mut self, lun: VirtioScsiLun) -> Option<(&mut Box, u16)> { + match lun { + VirtioScsiLun::TargetLun(target, lun) => self + .targets + .get_mut(usize::from(target)) + .map(|tgt| (tgt, lun)), + VirtioScsiLun::ReportLuns => { + // TODO: do we need to handle the REPORT LUNS well-known LUN? + // In practice, everyone seems to just use LUN 0 + warn!("Guest is trying to use the REPORT LUNS well-known LUN, which we don't support."); + None + } + } + } + + fn process_requests( + &mut self, + reader: &mut DescriptorChainReader, + writer: &mut DescriptorChainWriter, + ) { + let mut body_writer = writer.clone(); + const RESPONSE_HEADER_SIZE: u32 = 12; + body_writer.skip( + RESPONSE_HEADER_SIZE + u32::try_from(SENSE_SIZE).expect("SENSE_SIZE should fit 32bit"), + ); + + let response = match Request::parse(reader) { + Ok(r) => { + if let Some((target, lun)) = self.parse_target(r.lun) { + let output = target.execute_command( + lun, + reader, + &mut body_writer, + scsi::Request { + id: r.id, + cdb: &r.cdb, + task_attr: match r.task_attr { + 0 => TaskAttr::Simple, + 1 => TaskAttr::Ordered, + 2 => TaskAttr::HeadOfQueue, + 3 => TaskAttr::Aca, + _ => { + // virtio-scsi spec allows us to map any task attr to simple, presumably + // including future ones + warn!("Unknown task attr: {}", r.task_attr); + TaskAttr::Simple + } + }, + crn: r.crn, + prio: r.prio, + }, + ); + + match output { + Ok(output) => { + assert!(output.sense.len() < SENSE_SIZE); + + Response { + response: ResponseCode::Ok, + status: output.status, + status_qualifier: output.status_qualifier, + sense: output.sense, + // TODO: handle residual for data in + residual: body_writer.residual(), + } + } + Err(CmdError::CdbTooShort) => { + // the CDB buffer is, by default, sized larger than any CDB we support; we don't + // handle writes to config space (because QEMU doesn't let us), so there's no + // way the guest can set it too small + unreachable!(); + } + Err(CmdError::DataIn(e)) => { + if e.kind() == ErrorKind::WriteZero { + Response::error(ResponseCode::Overrun, 0) + } else { + error!("Error writing response to guest memory: {}", e); + + // There's some chance the header and data in are on different descriptors, + // and only the data in descriptor is bad, so let's at least try to write an + // error to the header + Response::error(ResponseCode::Failure, body_writer.residual()) + } + } + } + } else { + debug!("Rejecting command to LUN with bad target {:?}", r.lun); + Response::error(ResponseCode::BadTarget, body_writer.residual()) + } + } + Err(RequestParseError::CouldNotReadGuestMemory(e)) => { + // See comment later about errors while writing to guest mem; maybe we at least + // got functional write desciptors, so we can report an error + error!("Error reading request from guest memory: {:?}", e); + Response::error(ResponseCode::Failure, body_writer.residual()) + } + Err(RequestParseError::FailedParsingLun(lun)) => { + error!("Unable to parse LUN: {:?}", lun); + Response::error(ResponseCode::Failure, body_writer.residual()) + } + }; + + if let Err(e) = response.write(writer) { + // Alright, so something went wrong writing our response header to guest memory. + // The only reason this should ever happen, I think, is if the guest gave us a + // virtio descriptor with an invalid address. + + // There's not a great way to recover from this - we just discovered that + // our only way of communicating with the guest doesn't work - so we either + // silently fail or crash. There isn't too much sense in crashing, IMO, as + // the guest could still recover by, say, installing a fixed kernel and + // rebooting. So let's just log an error and do nothing. + error!("Error writing response to guest memory: {:?}", e); + } + } + + fn process_request_queue(&mut self, vring: &VringRwLock) -> Result<(), io::Error> { + let chains: Vec<_> = vring + .get_mut() + .get_queue_mut() + .iter(self.mem.as_ref().unwrap().memory()) + .map_err(|e| io::Error::new(ErrorKind::Other, e))? + .collect(); + for dc in chains { + let mut writer = DescriptorChainWriter::new(dc.clone()); + let mut reader = DescriptorChainReader::new(dc.clone()); + + self.process_requests(&mut reader, &mut writer); + + vring + .add_used(dc.head_index(), writer.max_written()) + .map_err(|e| io::Error::new(ErrorKind::Other, e))?; + } + + vring + .signal_used_queue() + .map_err(|e| io::Error::new(ErrorKind::Other, e))?; + Ok(()) + } + + pub(crate) fn add_target(&mut self, target: Box) { + self.targets.push(target); + } +} + +impl VhostUserBackendMut for VhostUserScsiBackend { + fn num_queues(&self) -> usize { + // control + event + request queues + let num_request_queues = 1; + 2 + num_request_queues + } + + fn max_queue_size(&self) -> usize { + 128 // qemu assumes this by default + } + + fn features(&self) -> u64 { + 1 << VIRTIO_F_VERSION_1 + | 1 << VIRTIO_SCSI_F_HOTPLUG + | 1 << VIRTIO_RING_F_INDIRECT_DESC + | 1 << VIRTIO_RING_F_EVENT_IDX + | VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits() + } + + fn protocol_features(&self) -> VhostUserProtocolFeatures { + VhostUserProtocolFeatures::MQ | VhostUserProtocolFeatures::CONFIG + } + + fn set_event_idx(&mut self, enabled: bool) { + self.event_idx = enabled; + } + + fn update_memory( + &mut self, + atomic_mem: GuestMemoryAtomic, + ) -> std::result::Result<(), std::io::Error> { + info!("Memory updated - guest probably booting"); + self.mem = Some(atomic_mem); + Ok(()) + } + + fn handle_event( + &mut self, + device_event: u16, + evset: EventSet, + vrings: &[VringRwLock], + thread_id: usize, + ) -> io::Result { + assert!(evset == EventSet::IN); + assert!(vrings.len() == 3); + assert!((device_event as usize) < vrings.len()); + assert!(thread_id == 0); + + let vring = &vrings[device_event as usize]; + match device_event { + REQUEST_QUEUE => { + if self.event_idx { + // vm-virtio's Queue implementation only checks avail_index + // once, so to properly support EVENT_IDX we need to keep + // calling process_request_queue() until it stops finding + // new requests on the queue. + loop { + vring.disable_notification().unwrap(); + self.process_request_queue(vring)?; + if !vring.enable_notification().unwrap() { + break; + } + } + } else { + // Without EVENT_IDX, a single call is enough. + self.process_request_queue(vring)?; + } + } + _ => { + error!("Ignoring descriptor on queue {}", device_event); + } + } + + Ok(false) + } + + fn get_config(&self, offset: u32, size: u32) -> Vec { + let config = virtio_scsi_config { + num_queues: 1, + seg_max: 128 - 2, + max_sectors: 0xFFFF, + cmd_per_lun: 128, + event_info_size: mem::size_of::() + .try_into() + .expect("event info size should fit 32bit"), + sense_size: SENSE_SIZE.try_into().expect("SENSE_SIZE should fit 32bit"), + cdb_size: CDB_SIZE.try_into().expect("CDB_SIZE should fit 32bit"), + max_channel: 0, + max_target: 255, + max_lun: u32::from(!u16::from(VirtioScsiLun::ADDRESS_METHOD_PATTERN) << 8 | 0xff), + }; + + // SAFETY: + // Pointer is aligned (points to start of struct), valid and we only + // access up to the size of the struct. + let config_slice = unsafe { + slice::from_raw_parts( + &config as *const virtio_scsi_config as *const u8, + mem::size_of::(), + ) + }; + + config_slice + .iter() + .skip(offset as usize) + .take(size as usize) + .cloned() + .collect() + } + + fn set_config(&mut self, _offset: u32, _buf: &[u8]) -> std::result::Result<(), std::io::Error> { + // QEMU handles config space itself + panic!("Access to configuration space is not supported."); + } + + fn exit_event(&self, _thread_index: usize) -> Option { + Some(self.exit_event.try_clone().expect("Cloning exit eventfd")) + } +} + +#[cfg(test)] +mod tests { + use std::{ + convert::TryInto, + io::{self, Read, Write}, + sync::{Arc, Mutex}, + }; + + use vhost_user_backend::{VhostUserBackendMut, VringRwLock, VringT}; + use virtio_bindings::{ + virtio_ring::VRING_DESC_F_WRITE, + virtio_scsi::{ + virtio_scsi_cmd_req, virtio_scsi_config, VIRTIO_SCSI_S_BAD_TARGET, + VIRTIO_SCSI_S_FAILURE, VIRTIO_SCSI_S_OK, + }, + }; + use virtio_queue::{mock::MockSplitQueue, Descriptor}; + use vm_memory::{ + Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic, + GuestMemoryMmap, + }; + + use super::VhostUserScsiBackend; + use crate::{ + scsi::{CmdOutput, Target, TaskAttr}, + virtio::{ + tests::{VirtioScsiCmdReq, VirtioScsiCmdResp}, + VirtioScsiLun, CDB_SIZE, + }, + }; + + #[allow(dead_code)] + struct RecordedCommand { + lun: u16, + id: u64, + cdb: [u8; CDB_SIZE], + task_attr: TaskAttr, + crn: u8, + prio: u8, + } + + struct FakeTargetCommandCollector { + received_commands: Vec, + } + + impl FakeTargetCommandCollector { + fn new() -> Arc> { + Arc::new(Mutex::new(Self { + received_commands: vec![], + })) + } + } + + type FakeResponse = Result; + + struct FakeTarget { + collector: Arc>, + callback: Cb, + } + + impl FakeTarget { + fn new(collector: Arc>, callback: Cb) -> Self + where + Cb: FnMut(u16, crate::scsi::Request) -> FakeResponse + Sync + Send, + { + Self { + collector, + callback, + } + } + } + + impl Target for FakeTarget + where + Cb: FnMut(u16, crate::scsi::Request) -> FakeResponse + Sync + Send, + { + fn execute_command( + &mut self, + lun: u16, + _data_out: &mut dyn Read, + _data_in: &mut dyn Write, + req: crate::scsi::Request, + ) -> Result { + let mut collector = self.collector.lock().unwrap(); + collector.received_commands.push(RecordedCommand { + lun, + id: req.id, + cdb: req.cdb.try_into().unwrap(), + task_attr: req.task_attr, + crn: req.crn, + prio: req.prio, + }); + (self.callback)(lun, req) + } + } + + fn setup( + req: impl ByteValued, + ) -> ( + VhostUserScsiBackend, + VringRwLock, + GuestMemoryAtomic, + ) { + let mem = GuestMemoryAtomic::new( + GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x1000_0000)]).unwrap(), + ); + // The `build_desc_chain` function will populate the `NEXT` related flags and field. + let v = vec![ + Descriptor::new(0x10_0000, 0x100, 0, 0), // request + Descriptor::new(0x20_0000, 0x100, VRING_DESC_F_WRITE as u16, 0), // response + ]; + + mem.memory() + .write_obj(req, GuestAddress(0x10_0000)) + .expect("writing to succeed"); + + let mem_handle = mem.memory(); + + let queue = MockSplitQueue::new(&*mem_handle, 16); + // queue.set_avail_idx(1); + + queue.build_desc_chain(&v).unwrap(); + + // Put the descriptor index 0 in the first available ring position. + mem.memory() + .write_obj(0u16, queue.avail_addr().unchecked_add(4)) + .unwrap(); + + // Set `avail_idx` to 1. + mem.memory() + .write_obj(1u16, queue.avail_addr().unchecked_add(2)) + .unwrap(); + + let vring = VringRwLock::new(mem.clone(), 16).unwrap(); + + // vring.set_queue_info(0x10_0000, 0x10_0000, 0x300).unwrap(); + vring.set_queue_size(16); + vring + .set_queue_info( + queue.desc_table_addr().0, + queue.avail_addr().0, + queue.used_addr().0, + ) + .unwrap(); + vring.set_queue_ready(true); + + let mut backend = VhostUserScsiBackend::new(); + backend.update_memory(mem.clone()).unwrap(); + + (backend, vring, mem) + } + + fn get_response(mem: &GuestMemoryAtomic) -> VirtioScsiCmdResp { + mem.memory() + .read_obj::(GuestAddress(0x20_0000)) + .expect("Unable to read response from memory") + } + + fn create_lun_specifier(target: u8, lun: u16) -> [u8; 8] { + let lun = lun.to_le_bytes(); + + [ + 0x1, + target, + lun[0] | VirtioScsiLun::FLAT_SPACE_ADDRESSING_METHOD, + lun[1], + 0x0, + 0x0, + 0x0, + 0x0, + ] + } + + #[test] + fn backend_test() { + let collector = FakeTargetCommandCollector::new(); + let fake_target = Box::new(FakeTarget::new(collector.clone(), |_, _| { + Ok(CmdOutput::ok()) + })); + + let req = VirtioScsiCmdReq(virtio_scsi_cmd_req { + lun: create_lun_specifier(0, 0), + tag: 0, + task_attr: 0, + prio: 0, + crn: 0, + cdb: [0; CDB_SIZE], + }); + + let (mut backend, vring, mem) = setup(req); + backend.add_target(fake_target); + backend.process_request_queue(&vring).unwrap(); + + let res = get_response(&mem); + assert_eq!(res.0.response, VIRTIO_SCSI_S_OK as u8); + + let collector = collector.lock().unwrap(); + assert_eq!( + collector.received_commands.len(), + 1, + "expect one command to be passed to Target" + ); + } + + #[test] + fn backend_error_reporting_test() { + let collector = FakeTargetCommandCollector::new(); + let fake_target = Box::new(FakeTarget::new(collector.clone(), |_, _| { + Err(crate::scsi::CmdError::DataIn(io::Error::new( + io::ErrorKind::Other, + "internal error", + ))) + })); + + let req = VirtioScsiCmdReq(virtio_scsi_cmd_req { + lun: create_lun_specifier(0, 0), + tag: 0, + task_attr: 0, + prio: 0, + crn: 0, + cdb: [0; CDB_SIZE], + }); + + let (mut backend, vring, mem) = setup(req); + backend.add_target(fake_target); + backend.process_request_queue(&vring).unwrap(); + + let res = get_response(&mem); + assert_eq!(res.0.response, VIRTIO_SCSI_S_FAILURE as u8); + + let collector = collector.lock().unwrap(); + assert_eq!( + collector.received_commands.len(), + 1, + "expect one command to be passed to Target" + ); + } + + #[test] + fn test_command_to_unknown_lun() { + let collector = FakeTargetCommandCollector::new(); + + let req = VirtioScsiCmdReq(virtio_scsi_cmd_req { + lun: create_lun_specifier(0, 0), + tag: 0, + task_attr: 0, + prio: 0, + crn: 0, + cdb: [0; CDB_SIZE], + }); + + let (mut backend, vring, mem) = setup(req); + backend.process_request_queue(&vring).unwrap(); + + let res = get_response(&mem); + assert_eq!(res.0.response, VIRTIO_SCSI_S_BAD_TARGET as u8); + + let collector = collector.lock().unwrap(); + assert_eq!( + collector.received_commands.len(), + 0, + "expect no command to make it to the target" + ); + } + + #[test] + fn test_broken_read_descriptor() { + let collector = FakeTargetCommandCollector::new(); + + let broken_req = [0u8; 1]; // single byte request + + let (mut backend, vring, mem) = setup(broken_req); + backend.process_request_queue(&vring).unwrap(); + + let res = get_response(&mem); + assert_eq!(res.0.response, VIRTIO_SCSI_S_FAILURE as u8); + + let collector = collector.lock().unwrap(); + assert_eq!( + collector.received_commands.len(), + 0, + "expect no command to make it to the target" + ); + } + + #[test] + fn test_reading_config() { + let backend = VhostUserScsiBackend::new(); + + // 0 len slice + assert_eq!(vec![0_u8; 0], backend.get_config(0, 0)); + // overly long slice + assert_eq!( + std::mem::size_of::(), + backend.get_config(0, 2000).len() + ); + // subslice + assert_eq!(1, backend.get_config(4, 1).len()); + // overly long subslice + assert_eq!(28, backend.get_config(8, 10000).len()); + // offset after end + assert_eq!(0, backend.get_config(100000, 10).len()); + } +} diff --git a/crates/scsi/src/virtio.rs b/crates/scsi/src/virtio.rs new file mode 100644 index 00000000..423c0aba --- /dev/null +++ b/crates/scsi/src/virtio.rs @@ -0,0 +1,370 @@ +// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause + +//! Helpers for virtio and virtio-scsi. +use std::{ + cell::Cell, + cmp::{max, min}, + convert::TryInto, + io, + io::{ErrorKind, Read, Write}, + mem, + ops::Deref, + rc::Rc, +}; + +use log::error; +use virtio_bindings::virtio_scsi::virtio_scsi_cmd_req; +use virtio_queue::{Descriptor, DescriptorChain, DescriptorChainRwIter}; +use vm_memory::{Bytes, GuestAddress, GuestMemory}; + +/// virtio-scsi has its own format for LUNs, documented in 5.6.6.1 of virtio +/// v1.1. This represents a LUN parsed from that format. +#[derive(PartialEq, Eq, Clone, Copy, Debug)] +pub(crate) enum VirtioScsiLun { + ReportLuns, + TargetLun(u8, u16), +} + +pub(crate) const REPORT_LUNS: [u8; 8] = [0xc1, 0x01, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0]; + +impl VirtioScsiLun { + pub(crate) const FLAT_SPACE_ADDRESSING_METHOD: u8 = 0b0100_0000; + pub(crate) const ADDRESS_METHOD_PATTERN: u8 = 0b1100_0000; + + pub(crate) fn parse(bytes: [u8; 8]) -> Option { + if bytes == REPORT_LUNS { + Some(Self::ReportLuns) + } else if bytes[0] == 0x1 { + let target = bytes[1]; + // bytes[2..3] is a normal SCSI single-level lun + if (bytes[2] & Self::ADDRESS_METHOD_PATTERN) != Self::FLAT_SPACE_ADDRESSING_METHOD { + error!( + "Got LUN in unsupported format: {:#2x} {:#2x}. \ + Only flat space addressing is supported!", + bytes[2], bytes[3] + ); + return None; + } + + let lun = u16::from_be_bytes([bytes[2] & !Self::ADDRESS_METHOD_PATTERN, bytes[3]]); + Some(Self::TargetLun(target, lun)) + } else { + None + } + } +} + +#[repr(u8)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum ResponseCode { + Ok = 0, + Overrun = 1, + BadTarget = 3, + Failure = 9, +} + +// These are the defaults given in the virtio spec; QEMU doesn't let the driver +// write to config space, so these will always be the correct values. +pub(crate) const SENSE_SIZE: usize = 96; +pub(crate) const CDB_SIZE: usize = 32; + +pub(crate) struct Request { + pub id: u64, + pub lun: VirtioScsiLun, + pub prio: u8, + pub crn: u8, + pub cdb: [u8; CDB_SIZE], + pub task_attr: u8, +} + +#[derive(Debug)] +pub(crate) enum RequestParseError { + CouldNotReadGuestMemory(io::Error), + FailedParsingLun([u8; 8]), +} + +impl Request { + pub fn parse(reader: &mut impl Read) -> Result { + let mut request = [0; mem::size_of::()]; + + reader + .read_exact(&mut request) + .map_err(RequestParseError::CouldNotReadGuestMemory)?; + + let lun = VirtioScsiLun::parse(request[0..8].try_into().expect("slice is of length 8")) + .ok_or(RequestParseError::FailedParsingLun( + request[0..8].try_into().expect("slice to be of length 8"), + ))?; + + Ok(Self { + id: u64::from_le_bytes(request[8..16].try_into().expect("slice is of length 8")), + lun, + task_attr: request[16], + prio: request[17], + crn: request[18], + cdb: request[19..].try_into().expect("should fit into cdb"), + }) + } +} + +#[derive(Debug, PartialEq, Eq)] +pub(crate) struct Response { + pub response: ResponseCode, + pub status: u8, + pub status_qualifier: u16, + pub sense: Vec, + pub residual: u32, +} + +impl Response { + pub fn write(&self, writer: &mut impl Write) -> Result<(), io::Error> { + writer.write_all(&(self.sense.len() as u32).to_le_bytes())?; // sense_len + writer.write_all(&self.residual.to_le_bytes())?; // residual + writer.write_all(&self.status_qualifier.to_le_bytes())?; // status qual + writer.write_all(&[self.status])?; // status + writer.write_all(&[self.response as u8])?; // response + + writer.write_all(&self.sense[..])?; + + Ok(()) + } + + /// Shortcut to create a response for an error condition, where most fields + /// don't matter. + pub fn error(code: ResponseCode, residual: u32) -> Self { + assert!(code != ResponseCode::Ok); + Self { + response: code, + status: 0, + status_qualifier: 0, + sense: Vec::new(), + residual, + } + } +} + +// TODO: Drop this if https://github.com/rust-vmm/vm-virtio/pull/33 found an agreement +/// A `Write` implementation that writes to the memory indicated by a virtio +/// descriptor chain. +#[derive(Clone)] +pub struct DescriptorChainWriter +where + M::Target: GuestMemory, +{ + chain: DescriptorChain, + iter: DescriptorChainRwIter, + current: Option, + offset: u32, + written: u32, + max_written: Rc>, +} + +impl DescriptorChainWriter +where + M::Target: GuestMemory, +{ + pub fn new(chain: DescriptorChain) -> Self { + let mut iter = chain.clone().writable(); + let current = iter.next(); + Self { + chain, + iter, + current, + offset: 0, + written: 0, + max_written: Rc::new(Cell::new(0)), + } + } + + pub fn skip(&mut self, bytes: u32) { + self.offset += bytes; + self.add_written(bytes); + while self + .current + .map_or(false, |current| self.offset >= current.len()) + { + let current = self.current.expect("loop condition ensures existance"); + self.offset -= current.len(); + self.current = self.iter.next(); + } + } + + pub fn residual(&mut self) -> u32 { + let mut ret = 0; + while let Some(current) = self.current { + ret += current.len() - self.offset; + self.offset = 0; + self.current = self.iter.next(); + } + ret + } + + fn add_written(&mut self, written: u32) { + self.written += written; + self.max_written + .set(max(self.max_written.get(), self.written)); + } + + pub fn max_written(&self) -> u32 { + self.max_written.get() + } +} + +impl Write for DescriptorChainWriter +where + M::Target: GuestMemory, +{ + fn write(&mut self, buf: &[u8]) -> std::io::Result { + if let Some(current) = self.current { + let left_in_descriptor = current.len() - self.offset; + let to_write: u32 = min(left_in_descriptor as usize, buf.len()) as u32; + + let written = self + .chain + .memory() + .write( + &buf[..(to_write as usize)], + GuestAddress(current.addr().0.checked_add(u64::from(self.offset)).ok_or( + io::Error::new(ErrorKind::Other, vm_memory::Error::InvalidGuestRegion), + )?), + ) + .map_err(|e| io::Error::new(ErrorKind::Other, e))?; + + self.offset += written as u32; + + if self.offset == current.len() { + self.current = self.iter.next(); + self.offset = 0; + } + + self.add_written(written as u32); + + Ok(written) + } else { + Ok(0) + } + } + + fn flush(&mut self) -> std::io::Result<()> { + // no-op: we're writing directly to guest memory + Ok(()) + } +} + +/// A `Read` implementation that reads from the memory indicated by a virtio +/// descriptor chain. +pub struct DescriptorChainReader +where + M::Target: GuestMemory, +{ + chain: DescriptorChain, + iter: DescriptorChainRwIter, + current: Option, + offset: u32, +} + +impl DescriptorChainReader +where + M::Target: GuestMemory, +{ + pub fn new(chain: DescriptorChain) -> Self { + let mut iter = chain.clone().readable(); + let current = iter.next(); + + Self { + chain, + iter, + current, + offset: 0, + } + } +} + +impl Read for DescriptorChainReader +where + M::Target: GuestMemory, +{ + fn read(&mut self, buf: &mut [u8]) -> io::Result { + if let Some(current) = self.current { + let left_in_descriptor = current.len() - self.offset; + let to_read = min(left_in_descriptor, buf.len() as u32); + + let read = self + .chain + .memory() + .read( + &mut buf[..(to_read as usize)], + GuestAddress(current.addr().0 + u64::from(self.offset)), + ) + .map_err(|e| io::Error::new(ErrorKind::Other, e))?; + + self.offset += read as u32; + + if self.offset == current.len() { + self.current = self.iter.next(); + self.offset = 0; + } + + Ok(read) + } else { + Ok(0) + } + } +} + +#[cfg(test)] +pub(crate) mod tests { + use virtio_bindings::virtio_scsi::{virtio_scsi_cmd_req, virtio_scsi_cmd_resp}; + use virtio_queue::{mock::MockSplitQueue, Descriptor}; + use vm_memory::{ByteValued, GuestAddress, GuestMemoryMmap}; + + use super::*; + + #[derive(Debug, Default, Clone, Copy)] + #[repr(transparent)] + pub(crate) struct VirtioScsiCmdReq(pub virtio_scsi_cmd_req); + /// SAFETY: struct is a transparent wrapper around the request + /// which can be read from a byte array + unsafe impl ByteValued for VirtioScsiCmdReq {} + + #[derive(Debug, Default, Clone, Copy)] + #[repr(transparent)] + pub(crate) struct VirtioScsiCmdResp(pub virtio_scsi_cmd_resp); + /// SAFETY: struct is a transparent wrapper around the response + /// which can be read from a byte array + unsafe impl ByteValued for VirtioScsiCmdResp {} + + pub(crate) fn report_luns_command() -> VirtioScsiCmdReq { + VirtioScsiCmdReq(virtio_scsi_cmd_req { + lun: REPORT_LUNS, + tag: 0, + task_attr: 0, + prio: 0, + crn: 0, + cdb: [0; CDB_SIZE], + }) + } + + #[test] + fn test_parse_request() { + let mem: GuestMemoryMmap = + GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x1000_0000)]).unwrap(); + + // The `build_desc_chain` function will populate the `NEXT` related flags and field. + let v = vec![ + // A device-writable request header descriptor. + Descriptor::new(0x10_0000, 0x100, 0, 0), + ]; + + let req = report_luns_command(); + mem.write_obj(req, GuestAddress(0x10_0000)) + .expect("writing to succeed"); + + let queue = MockSplitQueue::new(&mem, 16); + let chain = queue.build_desc_chain(&v).unwrap(); + + let mut chain = DescriptorChainReader::new(chain.clone()); + let req = Request::parse(&mut chain).expect("request failed to parse"); + assert_eq!(req.lun, VirtioScsiLun::ReportLuns); + } +} diff --git a/crates/scsi/test/.containerignore b/crates/scsi/test/.containerignore new file mode 100644 index 00000000..d26621d7 --- /dev/null +++ b/crates/scsi/test/.containerignore @@ -0,0 +1,2 @@ +results/ +test-data/ \ No newline at end of file diff --git a/crates/scsi/test/.gitignore b/crates/scsi/test/.gitignore new file mode 100644 index 00000000..d26621d7 --- /dev/null +++ b/crates/scsi/test/.gitignore @@ -0,0 +1,2 @@ +results/ +test-data/ \ No newline at end of file diff --git a/crates/scsi/test/Containerfile b/crates/scsi/test/Containerfile new file mode 100644 index 00000000..6abc2765 --- /dev/null +++ b/crates/scsi/test/Containerfile @@ -0,0 +1,11 @@ +FROM fedora:39 +RUN dnf install --quiet --assumeyes \ + /usr/bin/qemu-system-x86_64 \ + /usr/bin/qemu-img \ + /usr/bin/virt-sysprep \ + /usr/bin/ssh-keygen \ + /usr/bin/ssh \ + /usr/sbin/libvirtd \ + wget \ + && dnf clean all +VOLUME /tests/ diff --git a/crates/scsi/test/README.md b/crates/scsi/test/README.md new file mode 100644 index 00000000..93220541 --- /dev/null +++ b/crates/scsi/test/README.md @@ -0,0 +1,33 @@ +# Testing tools + +This folder contains some tooling for tests + +## Prerequisites + +For running these tests, you need a KVM enabled x86_64 machine and `podman`. + +vhost-user-scsi must have been built already. + +## Performed tests + +Right now, the test harness will only run +[blktests](https://github.com/osandov/blktests) against the target device +(these tests are probably testing the guest kernel more than the actual +device). + +## Test execution + +Triggering the build of the necessary container images and invoking the tests +is done by calling `./invoke-test.sh`. + +That will build the `Containerfile`, launch a container and invoke +`./start-test.sh` inside of the container. That will download a Fedora cloud +image, launch the daemon, launch QEMU, waits until it is up and triggers the +test execution. + +Results will be downloaded into a timestamped folder under `results/`. + +# Other test tools + +Some quick and dirty fuzzing code is available at +https://github.com/Ablu/vhost-device/tree/scsi-fuzzing. diff --git a/crates/scsi/test/invoke-test.sh b/crates/scsi/test/invoke-test.sh new file mode 100755 index 00000000..34e48ea2 --- /dev/null +++ b/crates/scsi/test/invoke-test.sh @@ -0,0 +1,20 @@ +#!/bin/bash -xe + +cd $(dirname "$0") + +DAEMON_BINARY="$PWD/../../../target/debug/vhost-device-scsi" + +if [[ ! -e "$DAEMON_BINARY" ]] +then + echo "Unable to find \"$DAEMON_BINARY\". Did you run cargo build?" + exit 1 +fi + +TAG_NAME=vhost-device-scsi-test-env +podman build -t "$TAG_NAME" . +podman run \ + -v /dev/kvm:/dev/kvm \ + --security-opt label=disable \ + -v "$DAEMON_BINARY":/usr/local/bin/vhost-device-scsi:ro \ + -v $PWD:/test "$TAG_NAME" \ + /test/start-test.sh diff --git a/crates/scsi/test/start-test.sh b/crates/scsi/test/start-test.sh new file mode 100755 index 00000000..6ebf8c6b --- /dev/null +++ b/crates/scsi/test/start-test.sh @@ -0,0 +1,60 @@ +#!/bin/bash -xe + +cd $(dirname "$0") + +libvirtd --daemon +virtlogd --daemon +export LIBGUESTFS_BACKEND=direct + +mkdir -p test-data/ +pushd test-data + IMAGE=Fedora-Cloud-Base-38-1.6.x86_64.qcow2 + test -e "$IMAGE" || wget --quiet "https://download.fedoraproject.org/pub/fedora/linux/releases/38/Cloud/x86_64/images/$IMAGE" -O "$IMAGE" + qemu-img create -f qcow2 -F qcow2 -b "$PWD/$IMAGE" fedora-overlay.qcow2 + + test -e test-key-id_rsa || ssh-keygen -N "" -f test-key-id_rsa + + virt-sysprep -a fedora-overlay.qcow2 \ + --ssh-inject root:file:test-key-id_rsa.pub + + fallocate -l 5GiB big-image.img +popd + +SSH_OPTS="-i test-data/test-key-id_rsa -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o User=root -o Port=2222" + +vhost-device-scsi --socket-path /tmp/vhost-user-scsi.sock test-data/big-image.img & + +sleep 1 + +qemu-system-x86_64 \ + -enable-kvm -cpu host \ + -device virtio-net-pci,netdev=net0,mac=52:54:00:12:35:02\ + -netdev user,id=net0,hostfwd=tcp::2222-:22,hostfwd=tcp::2323-:23 \ + -object rng-random,filename=/dev/urandom,id=rng0 -device virtio-rng-pci,rng=rng0 \ + -hda test-data/fedora-overlay.qcow2 \ + -object memory-backend-memfd,id=mem,size=8192M,share=on \ + -numa node,memdev=mem \ + -device vhost-user-scsi-pci,num_queues=1,param_change=off,chardev=vus \ + -chardev socket,id=vus,path=/tmp/vhost-user-scsi.sock \ + -smp 4 -m 8192 \ + -serial mon:stdio \ + -display none & + + +while ! ssh $SSH_OPTS localhost echo waiting for guest to come online +do + sleep 1 +done + + +scp $SSH_OPTS test-script.sh localhost:~/ +ssh $SSH_OPTS localhost /root/test-script.sh || echo "tests failed" + +export RESULT_DIR="$PWD/results/$(date --rfc-3339=s)" +mkdir -p "$RESULT_DIR" + +scp $SSH_OPTS -r localhost:/root/blktests/results/ "$RESULT_DIR/" +ssh $SSH_OPTS localhost poweroff + +wait # wait for qemu to terminate + diff --git a/crates/scsi/test/test-script.sh b/crates/scsi/test/test-script.sh new file mode 100755 index 00000000..927aec57 --- /dev/null +++ b/crates/scsi/test/test-script.sh @@ -0,0 +1,10 @@ +#!/bin/bash -xe + +dnf install -y git make g++ fio liburing-devel blktrace + +git clone https://github.com/osandov/blktests.git +pushd blktests + echo "TEST_DEVS=(/dev/sdb)" > config + make -j $(nproc) + ./check scsi block +popd \ No newline at end of file diff --git a/rust-vmm-ci b/rust-vmm-ci index 285971e8..8627b376 160000 --- a/rust-vmm-ci +++ b/rust-vmm-ci @@ -1 +1 @@ -Subproject commit 285971e8c716512d6e35ac47a009a49fc3c75660 +Subproject commit 8627b3766b2bedde4657c7e9ddfc6f95a20e6942