diff --git a/layout.ld b/layout.ld index 5403d7d1..5aff83e1 100644 --- a/layout.ld +++ b/layout.ld @@ -1,8 +1,9 @@ -ENTRY(ram64_start) +ENTRY(linux64_start) PHDRS { - program PT_LOAD FILEHDR PHDRS ; + ram PT_LOAD FILEHDR PHDRS ; + note PT_NOTE ; } /* Loaders like to put stuff in low memory (< 1M), so we don't use it. */ @@ -13,16 +14,28 @@ stack_size = 64K; SECTIONS { - /* Mapping in the program headers makes it easier to mmap the whole file. */ + /* Mapping the program headers and note into RAM makes the file smaller. */ . = ram_min; . += SIZEOF_HEADERS; + .note : { *(.note) } :note :ram - .rodata : { *(.rodata .rodata.*) } :program - .text : { *(.text .text.*) } :program - .data : { *(.data .data.*) } :program - .bss : { *(.bss .bss.*) } :program + /* These sections are mapped into RAM from the file. Omitting :ram from + later sections avoids emitting empty sections in the final binary. */ + data_start = .; + .rodata : { *(.rodata .rodata.*) } :ram + .text : { *(.text .text.*) } + .text32 : { *(.text32) } + .data : { *(.data .data.*) } + data_size = . - data_start; - firmware_ram_size = . - ram_min; + /* The BSS section isn't mapped from any file data. It is simply zeroed + in RAM. So our file size should be computed from here. */ + file_size = . - ram_min; + .bss : { + bss_start = .; + *(.bss .bss.*) + bss_size = . - bss_start; + } ASSERT((. <= ram_max - stack_size), "firmware size too big for RAM region") diff --git a/src/asm/gdt64.s b/src/asm/gdt64.s new file mode 100644 index 00000000..9737c43f --- /dev/null +++ b/src/asm/gdt64.s @@ -0,0 +1,32 @@ +.section .rodata, "a" + +gdt64_ptr: + .short gdt64_end - gdt64_start - 1 # GDT length is actually (length - 1) + .quad gdt64_start + +gdt64_start: # First descriptor is always null + .quad 0 +code64_desc: # 64-bit Code-Segments always have: Base = 0, Limit = 4G + # CS.Limit[15:00] = 0 - Ignored + .short 0x0000 + # CS.Base[15:00] = 0 - Ignored + .short 0x0000 + # CS.Base[23:16] = 0 (bits 0-7) - Ignored + .byte 0x00 + # CS.Accessed = 1 (bit 8) - Don't write to segment on first use + # CS.ReadEnable = 1 (bit 9) - Read/Execute Code-Segment + # CS.Conforming = 0 (bit 10) - Nonconforming, no lower-priv access + # CS.Executable = 1 (bit 11) - Code-Segement + # CS.S = 1 (bit 12) - Not a System-Segement + # CS.DPL = 0 (bits 13-14) - We only use this segment in Ring 0 + # CS.P = 1 (bit 15) - Segment is present + .byte 0b10011011 + # CS.Limit[19:16] = 0 (bits 16-19) - Ignored + # CS.AVL = 0 (bit 20) - Our software doesn't use this bit + # CS.L = 1 (bit 21) - This isn't a 64-bit segment + # CS.D = 0 (bit 22) - This is a 32-bit segment + # CS.G = 0 (bit 23) - Ignored + .byte 0b00100000 + # CS.Base[31:24] = 0 (bits 24-31) - Ignored + .byte 0x00 +gdt64_end: diff --git a/src/asm/mod.rs b/src/asm/mod.rs index e1b44166..95dd9360 100644 --- a/src/asm/mod.rs +++ b/src/asm/mod.rs @@ -1 +1,4 @@ +global_asm!(include_str!("note.s")); +global_asm!(include_str!("ram32.s")); global_asm!(include_str!("ram64.s")); +global_asm!(include_str!("gdt64.s")); diff --git a/src/asm/note.s b/src/asm/note.s new file mode 100644 index 00000000..674cf70e --- /dev/null +++ b/src/asm/note.s @@ -0,0 +1,20 @@ +.section .note, "a" + +# From xen/include/public/elfnote.h, "Physical entry point into the kernel." +XEN_ELFNOTE_PHYS32_ENTRY = 18 + +# We don't bother defining an ELFNOTE macro, as we only have one note. +# This is equialent to the kernel's: +# ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, .long pvh_start) +.align 4 + .long name_end - name_start # namesz + .long desc_end - desc_start # descsz + .long XEN_ELFNOTE_PHYS32_ENTRY # type +name_start: + .asciz "Xen" +name_end: +.align 4 +desc_start: + .long ram32_start +desc_end: +.align 4 diff --git a/src/asm/ram32.s b/src/asm/ram32.s new file mode 100644 index 00000000..e989fd7e --- /dev/null +++ b/src/asm/ram32.s @@ -0,0 +1,46 @@ +.section .text32, "ax" +.code32 + +ram32_start: + # Stash the PVH start_info struct in %rdi. + movl %ebx, %edi + # Zero out %rsi, its value is unspecificed in the PVH Boot Protocol. + xorl %esi, %esi + +setup_page_tables: + # First L2 entry identity maps [0, 2 MiB) + movl $0b10000011, (L2_TABLES) # huge (bit 7), writable (bit 1), present (bit 0) + # First L3 entry points to L2 table + movl $L2_TABLES, %eax + orb $0b00000011, %al # writable (bit 1), present (bit 0) + movl %eax, (L3_TABLE) + # First L4 entry points to L3 table + movl $L3_TABLE, %eax + orb $0b00000011, %al # writable (bit 1), present (bit 0) + movl %eax, (L4_TABLE) + +enable_paging: + # Load page table root into CR3 + movl $L4_TABLE, %eax + movl %eax, %cr3 + + # Set CR4.PAE (Physical Address Extension) + movl %cr4, %eax + orb $0b00100000, %al # Set bit 5 + movl %eax, %cr4 + # Set EFER.LME (Long Mode Enable) + movl $0xC0000080, %ecx + rdmsr + orb $0b00000001, %ah # Set bit 8 + wrmsr + # Set CRO.PG (Paging) + movl %cr0, %eax + orl $(1 << 31), %eax + movl %eax, %cr0 + +jump_to_64bit: + # We are now in 32-bit compatibility mode. To enter 64-bit mode, we need to + # load a 64-bit code segment into our GDT. + lgdtl gdt64_ptr + # Set CS to a 64-bit segment and jump to 64-bit code. + ljmpl $(code64_desc - gdt64_start), $ram64_start diff --git a/src/asm/ram64.s b/src/asm/ram64.s index 09bbb942..6999c048 100644 --- a/src/asm/ram64.s +++ b/src/asm/ram64.s @@ -1,14 +1,15 @@ .section .text, "ax" -.global ram64_start +.global linux64_start .code64 -ram64_start: - # Indicate (via serial) that we are in long/64-bit mode - movw $0x3f8, %dx - movb $'L', %al - outb %al, %dx +linux64_start: + # Zero out %rdi, its value is unspecificed in the Linux Boot Protocol. + xorq %rdi, %rdi +ram64_start: # Setup the stack (at the end of our RAM region) movq $ram_max, %rsp + # PVH start_info is in %rdi, the first paramter of the System V ABI. + # BootParams are in %rsi, the second paramter of the System V ABI. jmp rust64_start diff --git a/src/boot.rs b/src/boot.rs new file mode 100644 index 00000000..ff172893 --- /dev/null +++ b/src/boot.rs @@ -0,0 +1,211 @@ +use core::mem; + +use crate::{ + common, + fat::{Error, Read}, + mem::MemoryRegion, +}; + +// Common data needed for all boot paths +pub trait Info { + // Starting address of the Root System Descriptor Pointer + fn rsdp_addr(&self) -> u64; + // The kernel command line (not including null terminator) + fn cmdline(&self) -> &[u8]; + // Methods to access the E820 Memory map + fn num_entries(&self) -> u8; + fn entry(&self, idx: u8) -> E820Entry; +} + +#[derive(Clone, Copy, Debug)] +#[repr(C, packed)] +pub struct E820Entry { + pub addr: u64, + pub size: u64, + pub entry_type: u32, +} + +impl E820Entry { + pub const RAM_TYPE: u32 = 1; +} + +// The so-called "zeropage" +#[derive(Clone, Copy)] +#[repr(C, packed)] +pub struct Params { + screen_info: ScreenInfo, // 0x000 + apm_bios_info: ApmBiosInfo, // 0x040 + _pad2: [u8; 4], // 0x054 + tboot_addr: u64, // 0x058 + ist_info: IstInfo, // 0x060 + pub acpi_rsdp_addr: u64, // 0x070 + _pad3: [u8; 8], // 0x078 + hd0_info: HdInfo, // 0x080 - obsolete + hd1_info: HdInfo, // 0x090 - obsolete + sys_desc_table: SysDescTable, // 0x0a0 - obsolete + olpc_ofw_header: OlpcOfwHeader, // 0x0b0 + ext_ramdisk_image: u32, // 0x0c0 + ext_ramdisk_size: u32, // 0x0c4 + ext_cmd_line_ptr: u32, // 0x0c8 + _pad4: [u8; 0x74], // 0x0cc + edd_info: EdidInfo, // 0x140 + efi_info: EfiInfo, // 0x1c0 + alt_mem_k: u32, // 0x1e0 + scratch: u32, // 0x1e4 + e820_entries: u8, // 0x1e8 + eddbuf_entries: u8, // 0x1e9 + edd_mbr_sig_buf_entries: u8, // 0x1ea + kbd_status: u8, // 0x1eb + secure_boot: u8, // 0x1ec + _pad5: [u8; 2], // 0x1ed + sentinel: u8, // 0x1ef + _pad6: [u8; 1], // 0x1f0 + pub hdr: Header, // 0x1f1 + _pad7: [u8; 0x290 - HEADER_END], + edd_mbr_sig_buffer: [u32; 16], // 0x290 + e820_table: [E820Entry; 128], // 0x2d0 + _pad8: [u8; 0x30], // 0xcd0 + eddbuf: [EddInfo; 6], // 0xd00 + _pad9: [u8; 0x114], // 0xeec +} + +impl Default for Params { + fn default() -> Self { + // SAFETY: Struct consists entirely of primitive integral types. + unsafe { mem::zeroed() } + } +} + +impl Params { + pub fn set_entries(&mut self, info: &dyn Info) { + self.e820_entries = info.num_entries(); + for i in 0..self.e820_entries { + self.e820_table[i as usize] = info.entry(i); + } + } +} + +impl Info for Params { + fn rsdp_addr(&self) -> u64 { + self.acpi_rsdp_addr + } + fn cmdline(&self) -> &[u8] { + unsafe { common::from_cstring(self.hdr.cmd_line_ptr as u64) } + } + fn num_entries(&self) -> u8 { + self.e820_entries + } + fn entry(&self, idx: u8) -> E820Entry { + assert!(idx < self.num_entries()); + self.e820_table[idx as usize] + } +} + +const HEADER_START: usize = 0x1f1; +const HEADER_END: usize = HEADER_START + mem::size_of::
(); + +#[derive(Clone, Copy, Debug)] +#[repr(C, packed)] +pub struct Header { + pub setup_sects: u8, + pub root_flags: u16, + pub syssize: u32, + pub ram_size: u16, + pub vid_mode: u16, + pub root_dev: u16, + pub boot_flag: u16, + pub jump: u16, + pub header: [u8; 4], + pub version: u16, + pub realmode_swtch: u32, + pub start_sys_seg: u16, + pub kernel_version: u16, + pub type_of_loader: u8, + pub loadflags: u8, + pub setup_move_size: u16, + pub code32_start: u32, + pub ramdisk_image: u32, + pub ramdisk_size: u32, + pub bootsect_kludge: u32, + pub heap_end_ptr: u16, + pub ext_loader_ver: u8, + pub ext_loader_type: u8, + pub cmd_line_ptr: u32, + pub initrd_addr_max: u32, + pub kernel_alignment: u32, + pub relocatable_kernel: u8, + pub min_alignment: u8, + pub xloadflags: u16, + pub cmdline_size: u32, + pub hardware_subarch: u32, + pub hardware_subarch_data: u64, + pub payload_offset: u32, + pub payload_length: u32, + pub setup_data: u64, + pub pref_address: u64, + pub init_size: u32, + pub handover_offset: u32, +} + +impl Header { + // Read a kernel header from the first two sectors of a file + pub fn from_file(f: &mut dyn Read) -> Result { + let mut data: [u8; 1024] = [0; 1024]; + let mut region = MemoryRegion::from_bytes(&mut data); + + f.seek(0)?; + f.load_file(&mut region)?; + + #[repr(C)] + struct HeaderData { + before: [u8; HEADER_START], + hdr: Header, + after: [u8; 1024 - HEADER_END], + } + // SAFETY: Struct consists entirely of primitive integral types. + Ok(unsafe { mem::transmute::<_, HeaderData>(data) }.hdr) + } +} + +// Right now the stucts below are unused, so we only need them to be the correct +// size. Update test_size_and_offset if a struct's real definition is added. +#[derive(Clone, Copy)] +#[repr(C, packed)] +struct ScreenInfo([u8; 0x40]); +#[derive(Clone, Copy)] +#[repr(C, packed)] +struct ApmBiosInfo([u8; 0x14]); +#[derive(Clone, Copy)] +#[repr(C, packed)] +struct IstInfo([u8; 0x10]); +#[derive(Clone, Copy)] +#[repr(C, packed)] +struct HdInfo([u8; 0x10]); +#[derive(Clone, Copy)] +#[repr(C, packed)] +struct SysDescTable([u8; 0x10]); +#[derive(Clone, Copy)] +#[repr(C, packed)] +struct OlpcOfwHeader([u8; 0x10]); +#[derive(Clone, Copy)] +#[repr(C, packed)] +struct EdidInfo([u8; 0x80]); +#[derive(Clone, Copy)] +#[repr(C, packed)] +struct EfiInfo([u8; 0x20]); +#[derive(Clone, Copy)] +#[repr(C, packed)] +struct EddInfo([u8; 0x52]); + +#[cfg(test)] +mod tests { + use super::*; + #[test] + fn test_size_and_offset() { + assert_eq!(mem::size_of::
(), 119); + assert_eq!(mem::size_of::(), 20); + assert_eq!(mem::size_of::(), 4096); + + assert_eq!(offset_of!(Params, hdr), HEADER_START); + } +} diff --git a/src/bzimage.rs b/src/bzimage.rs index 330f13e9..176723a3 100644 --- a/src/bzimage.rs +++ b/src/bzimage.rs @@ -11,201 +11,167 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. +use atomic_refcell::AtomicRefCell; -use crate::fat::{self, Read}; +use crate::{ + boot::{E820Entry, Header, Info, Params}, + fat::{self, Read}, + mem::MemoryRegion, +}; +#[derive(Debug)] pub enum Error { - FileError, - KernelOld, + FileError(fat::Error), + NoInitrdMemory, MagicMissing, NotRelocatable, } impl From for Error { - fn from(_: fat::Error) -> Error { - Error::FileError + fn from(e: fat::Error) -> Error { + Error::FileError(e) } } -// From firecracker -/// Kernel command line start address. -const CMDLINE_START: usize = 0x4b000; -/// Kernel command line start address maximum size. -const CMDLINE_MAX_SIZE: usize = 0x10000; -/// The 'zero page', a.k.a linux kernel bootparams. -pub const ZERO_PAGE_START: usize = 0x7000; +const KERNEL_LOCATION: u64 = 0x20_0000; -const KERNEL_LOCATION: u32 = 0x20_0000; +#[repr(transparent)] +pub struct Kernel(Params); -const E820_RAM: u32 = 1; - -#[repr(C, packed)] -struct E820Entry { - addr: u64, - size: u64, - entry_type: u32, -} - -pub fn load_initrd(f: &mut dyn Read) -> Result<(), Error> { - let mut zero_page = crate::mem::MemoryRegion::new(ZERO_PAGE_START as u64, 4096); - - let mut max_load_address = u64::from(zero_page.read_u32(0x22c)); - if max_load_address == 0 { - max_load_address = 0x37ff_ffff; +impl Kernel { + pub fn new(info: &dyn Info) -> Self { + let mut kernel = Self(Params::default()); + kernel.0.acpi_rsdp_addr = info.rsdp_addr(); + kernel.0.set_entries(info); + kernel } - let e820_count = zero_page.read_u8(0x1e8); - let e820_table = zero_page.as_mut_slice::(0x2d0, u64::from(e820_count)); + pub fn load_kernel(&mut self, f: &mut dyn Read) -> Result<(), Error> { + self.0.hdr = Header::from_file(f)?; - // Search E820 table for highest usable ram location that is below the limit. - let mut top_of_usable_ram = 0; - for entry in e820_table { - if entry.entry_type == E820_RAM { - let m = entry.addr + entry.size - 1; - if m > top_of_usable_ram && m < max_load_address { - top_of_usable_ram = m; - } + if self.0.hdr.boot_flag != 0xAA55 || self.0.hdr.header != *b"HdrS" { + return Err(Error::MagicMissing); + } + // Check relocatable + if self.0.hdr.version < 0x205 || self.0.hdr.relocatable_kernel == 0 { + return Err(Error::NotRelocatable); } - } - if top_of_usable_ram > max_load_address { - top_of_usable_ram = max_load_address; + // Skip over the setup sectors + let setup_sects = match self.0.hdr.setup_sects { + 0 => 4, + n => n as u32, + }; + let setup_bytes = (setup_sects + 1) * 512; + let remaining_bytes = f.get_size() - setup_bytes; + + let mut region = MemoryRegion::new(KERNEL_LOCATION, remaining_bytes as u64); + f.seek(setup_bytes)?; + f.load_file(&mut region)?; + + // Fill out "write/modify" fields + self.0.hdr.type_of_loader = 0xff; // Unknown Loader + self.0.hdr.code32_start = KERNEL_LOCATION as u32; // Where we load the kernel + self.0.hdr.cmd_line_ptr = CMDLINE_START as u32; // Where we load the cmdline + Ok(()) } - // Align address to 2MiB boundary as we use 2 MiB pages - let initrd_address = (top_of_usable_ram - u64::from(f.get_size())) & !((2 << 20) - 1); - let mut initrd_region = crate::mem::MemoryRegion::new(initrd_address, u64::from(f.get_size())); - - let mut offset = 0; - while offset < f.get_size() { - let bytes_remaining = f.get_size() - offset; - - // Use intermediata buffer for last, partial sector - if bytes_remaining < 512 { - let mut data: [u8; 512] = [0; 512]; - match f.read(&mut data) { - Err(crate::fat::Error::EndOfFile) => break, - Err(_) => return Err(Error::FileError), - Ok(_) => {} - } - let dst = initrd_region.as_mut_slice(u64::from(offset), u64::from(bytes_remaining)); - dst.copy_from_slice(&data[0..bytes_remaining as usize]); - break; - } + // Compute the load address for the initial ramdisk + fn initrd_addr(&self, size: u64) -> Option { + let initrd_addr_max = match self.0.hdr.initrd_addr_max { + 0 => 0x37FF_FFFF, + a => a as u64, + }; + let max_start = (initrd_addr_max + 1) - size; - let dst = initrd_region.as_mut_slice(u64::from(offset), 512); - match f.read(dst) { - Err(crate::fat::Error::EndOfFile) => break, - Err(_) => return Err(Error::FileError), - Ok(_) => {} + let mut option_addr = None; + for i in 0..self.0.num_entries() { + let entry = self.0.entry(i); + if entry.entry_type != E820Entry::RAM_TYPE { + continue; + } + let addr = entry.addr + entry.size - size; + // Align address to 2MiB boundary as we use 2 MiB pages + let addr = addr & !((2 << 20) - 1); + // The ramdisk must fit in the region completely + if addr > max_start || addr < entry.addr { + continue; + } + // Use the largest address we can find + if let Some(load_addr) = option_addr { + if load_addr >= addr { + continue; + } + } + option_addr = Some(addr) } - - offset += 512; + option_addr } - // initrd pointer/size - zero_page.write_u32(0x218, initrd_address as u32); - zero_page.write_u32(0x21c, f.get_size()); - Ok(()) -} - -pub fn append_commandline(addition: &str) -> Result<(), Error> { - let mut cmdline_region = - crate::mem::MemoryRegion::new(CMDLINE_START as u64, CMDLINE_MAX_SIZE as u64); - let zero_page = crate::mem::MemoryRegion::new(ZERO_PAGE_START as u64, 4096); - - let cmdline = cmdline_region.as_mut_slice::(0, CMDLINE_MAX_SIZE as u64); - - // Use the actual string length but limit to the orgiginal incoming size - let orig_len = zero_page.read_u32(0x238) as usize; - - let orig_cmdline = unsafe { - core::str::from_utf8_unchecked(&cmdline[0..orig_len]).trim_matches(char::from(0)) - }; - let orig_len = orig_cmdline.len(); - - cmdline[orig_len] = b' '; - cmdline[orig_len + 1..orig_len + 1 + addition.len()].copy_from_slice(addition.as_bytes()); - cmdline[orig_len + 1 + addition.len()] = 0; - - // Commandline pointer/size - zero_page.write_u32(0x228, CMDLINE_START as u32); - zero_page.write_u32(0x238, (orig_len + addition.len() + 1) as u32); - - Ok(()) -} - -pub fn load_kernel(f: &mut dyn Read) -> Result { - f.seek(0)?; - - let mut buf: [u8; 1024] = [0; 1024]; - - f.read(&mut buf[0..512])?; - f.read(&mut buf[512..])?; - - let setup = crate::mem::MemoryRegion::from_slice(&buf[..]); + pub fn load_initrd(&mut self, f: &mut dyn Read) -> Result<(), Error> { + let size = f.get_size() as u64; + let addr = match self.initrd_addr(size) { + Some(addr) => addr, + None => return Err(Error::NoInitrdMemory), + }; - if setup.read_u16(0x1fe) != 0xAA55 { - return Err(Error::MagicMissing); - } + let mut region = MemoryRegion::new(addr, size); + f.seek(0)?; + f.load_file(&mut region)?; - if setup.read_u32(0x202) != 0x5372_6448 { - return Err(Error::MagicMissing); + // initrd pointer/size + self.0.hdr.ramdisk_image = addr as u32; + self.0.hdr.ramdisk_size = size as u32; + Ok(()) } - // Need for relocation - if setup.read_u16(0x206) < 0x205 { - return Err(Error::KernelOld); + pub fn append_cmdline(&mut self, addition: &[u8]) { + if !addition.is_empty() { + CMDLINE.borrow_mut().append(addition); + assert!(CMDLINE.borrow().len() < self.0.hdr.cmdline_size); + } } - // Check relocatable - if setup.read_u8(0x234) == 0 { - return Err(Error::NotRelocatable); + pub fn boot(&mut self) { + // 0x200 is the startup_64 offset + let jump_address = self.0.hdr.code32_start as u64 + 0x200; + // Rely on x86 C calling convention where second argument is put into %rsi register + let ptr = jump_address as *const (); + let code: extern "C" fn(usize, usize) = unsafe { core::mem::transmute(ptr) }; + (code)(0 /* dummy value */, &mut self.0 as *mut _ as usize); } +} - let header_start = 0x1f1 as usize; - let header_end = 0x202 + buf[0x0201] as usize; - - // Reuse the zero page that we were originally given - // TODO: Zero and fill it ourself but will need to save E820 details - let mut zero_page = crate::mem::MemoryRegion::new(ZERO_PAGE_START as u64, 4096); - - let dst = zero_page.as_mut_slice(header_start as u64, (header_end - header_start) as u64); - dst.copy_from_slice(&buf[header_start..header_end]); - - // Unknown loader - zero_page.write_u8(0x210, 0xff); +// This is the highest region at which we can load the kernel command line. +const CMDLINE_START: u64 = 0x4b000; +const CMDLINE_MAX_LEN: u64 = 0x10000; - // Where we will load the kernel into - zero_page.write_u32(0x214, KERNEL_LOCATION); +static CMDLINE: AtomicRefCell = AtomicRefCell::new(CmdLine::new()); - let mut setup_sects = buf[header_start] as usize; +struct CmdLine { + region: MemoryRegion, + length: usize, // Does not include null pointer +} - if setup_sects == 0 { - setup_sects = 4; +impl CmdLine { + const fn new() -> Self { + Self { + region: MemoryRegion::new(CMDLINE_START, CMDLINE_MAX_LEN), + length: 0, + } } - setup_sects += 1; // Include the boot sector - - let setup_bytes = setup_sects * 512; // Use to start reading the main image - - let mut load_offset = u64::from(KERNEL_LOCATION); - - f.seek(setup_bytes as u32)?; - - loop { - let mut dst = crate::mem::MemoryRegion::new(load_offset, 512); - let dst = dst.as_mut_slice(0, 512); + const fn len(&self) -> u32 { + self.length as u32 + } - match f.read(dst) { - Err(crate::fat::Error::EndOfFile) => { - // 0x200 is the startup_64 offset - return Ok(u64::from(KERNEL_LOCATION) + 0x200); - } - Err(_) => return Err(Error::FileError), - Ok(_) => {} - }; + fn append(&mut self, args: &[u8]) { + let bytes = self.region.as_bytes(); + bytes[self.length] = b' '; + self.length += 1; - load_offset += 512; + bytes[self.length..self.length + args.len()].copy_from_slice(args); + self.length += args.len(); + bytes[self.length] = 0; } } diff --git a/src/common.rs b/src/common.rs index ea6ce683..4a7c4300 100644 --- a/src/common.rs +++ b/src/common.rs @@ -33,6 +33,24 @@ macro_rules! container_of_mut { }}; } +// SAFETY: Requires that addr point to a static, null-terminated C-string. +// The returned slice does not include the null-terminator. +pub unsafe fn from_cstring(addr: u64) -> &'static [u8] { + if addr == 0 { + return &[]; + } + let start = addr as *const u8; + let mut size: usize = 0; + while start.add(size).read() != 0 { + size += 1; + } + core::slice::from_raw_parts(start, size) +} + +pub fn ascii_strip(s: &[u8]) -> &str { + core::str::from_utf8(s).unwrap().trim_matches(char::from(0)) +} + pub fn ucs2_as_ascii_length(input: *const u16) -> usize { let mut len = 0; loop { diff --git a/src/efi/mod.rs b/src/efi/mod.rs index b1dedd82..61dfc0de 100644 --- a/src/efi/mod.rs +++ b/src/efi/mod.rs @@ -27,6 +27,8 @@ use r_efi::{ }, }; +use crate::boot; + mod alloc; mod block; mod console; @@ -574,29 +576,13 @@ extern "win64" fn image_unload(_: Handle) -> Status { efi::Status::UNSUPPORTED } -/// The 'zero page', a.k.a linux kernel bootparams. -pub const ZERO_PAGE_START: usize = 0x7000; - -const E820_RAM: u32 = 1; - -#[repr(C, packed)] -struct E820Entry { - addr: u64, - size: u64, - entry_type: u32, -} - const PAGE_SIZE: u64 = 4096; // Populate allocator from E820, fixed ranges for the firmware and the loaded binary. -fn populate_allocator(image_address: u64, image_size: u64) { - let mut zero_page = crate::mem::MemoryRegion::new(ZERO_PAGE_START as u64, 4096); - - let e820_count = zero_page.read_u8(0x1e8); - let e820_table = zero_page.as_mut_slice::(0x2d0, u64::from(e820_count)); - - for entry in e820_table { - if entry.entry_type == E820_RAM { +fn populate_allocator(info: &dyn boot::Info, image_address: u64, image_size: u64) { + for i in 0..info.num_entries() { + let entry = info.entry(i); + if entry.entry_type == boot::E820Entry::RAM_TYPE { ALLOCATOR.borrow_mut().add_initial_allocation( MemoryType::ConventionalMemory, entry.size / PAGE_SIZE, @@ -633,6 +619,7 @@ pub fn efi_exec( address: u64, loaded_address: u64, loaded_size: u64, + info: &dyn boot::Info, fs: &crate::fat::Filesystem, block: *const crate::block::VirtioBlockDevice, ) { @@ -715,7 +702,7 @@ pub fn efi_exec( }; let vendor_data = 0u32; - let acpi_rsdp_ptr = unsafe { *((ZERO_PAGE_START + 0x70) as u64 as *const u64) }; + let acpi_rsdp_ptr = info.rsdp_addr(); let mut ct = if acpi_rsdp_ptr != 0 { efi::ConfigurationTable { @@ -765,7 +752,7 @@ pub fn efi_exec( configuration_table: &mut ct, }; - populate_allocator(loaded_address, loaded_size); + populate_allocator(info, loaded_address, loaded_size); let efi_part_id = unsafe { block::populate_block_wrappers(&mut BLOCK_WRAPPERS, block) }; diff --git a/src/fat.rs b/src/fat.rs index abc6c707..1bdf1d38 100644 --- a/src/fat.rs +++ b/src/fat.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::block::SectorRead; +use crate::{block::SectorRead, mem::MemoryRegion}; #[repr(packed)] struct Header { @@ -244,6 +244,24 @@ pub trait Read { fn read(&mut self, data: &mut [u8]) -> Result; fn seek(&mut self, offset: u32) -> Result<(), Error>; fn get_size(&self) -> u32; + + // Loads the remainder of the file into the specified memory region + fn load_file(&mut self, mem: &mut MemoryRegion) -> Result<(), Error> { + let mut chunks = mem.as_bytes().chunks_exact_mut(512); + for chunk in chunks.by_ref() { + self.read(chunk)?; + } + let last = chunks.into_remainder(); + if last.is_empty() { + return Ok(()); + } + // Use tmp buffer for last, partial sector + let mut dst = [0; 512]; + let bytes = self.read(&mut dst)? as usize; + assert_eq!(bytes, last.len()); + last.copy_from_slice(&dst[..bytes]); + Ok(()) + } } impl<'a> Read for File<'a> { diff --git a/src/loader.rs b/src/loader.rs index 92b45bcb..bdd8aee9 100644 --- a/src/loader.rs +++ b/src/loader.rs @@ -13,7 +13,9 @@ // limitations under the License. use crate::{ - bzimage, + boot, + bzimage::{self, Kernel}, + common::ascii_strip, fat::{self, Read}, }; @@ -23,20 +25,21 @@ pub struct LoaderConfig { pub cmdline: [u8; 4096], } +#[derive(Debug)] pub enum Error { - FileError, - BzImageError, + FileError(fat::Error), + BzImageError(bzimage::Error), } impl From for Error { - fn from(_: fat::Error) -> Error { - Error::FileError + fn from(e: fat::Error) -> Error { + Error::FileError(e) } } impl From for Error { - fn from(_: bzimage::Error) -> Error { - Error::BzImageError + fn from(e: bzimage::Error) -> Error { + Error::BzImageError(e) } } @@ -103,10 +106,6 @@ fn parse_entry(f: &mut fat::File) -> Result { Ok(loader_config) } -fn ascii_strip(s: &[u8]) -> &str { - unsafe { core::str::from_utf8_unchecked(&s) }.trim_matches(char::from(0)) -} - const ENTRY_DIRECTORY: &str = "/loader/entries/"; fn default_entry_path(fs: &fat::Filesystem) -> Result<[u8; 260], fat::Error> { @@ -123,7 +122,7 @@ fn default_entry_path(fs: &fat::Filesystem) -> Result<[u8; 260], fat::Error> { Ok(entry_path) } -pub fn load_default_entry(fs: &fat::Filesystem) -> Result { +pub fn load_default_entry(fs: &fat::Filesystem, info: &dyn boot::Info) -> Result { let default_entry_path = default_entry_path(&fs)?; let default_entry_path = ascii_strip(&default_entry_path); @@ -134,19 +133,20 @@ pub fn load_default_entry(fs: &fat::Filesystem) -> Result { let initrd_path = ascii_strip(&entry.initrd_path); let cmdline = ascii_strip(&entry.cmdline); + let mut kernel = Kernel::new(info); + let mut bzimage_file = fs.open(bzimage_path)?; - let jump_address = bzimage::load_kernel(&mut bzimage_file)?; + kernel.load_kernel(&mut bzimage_file)?; if !initrd_path.is_empty() { let mut initrd_file = fs.open(initrd_path)?; - bzimage::load_initrd(&mut initrd_file)?; + kernel.load_initrd(&mut initrd_file)?; } - if !cmdline.is_empty() { - bzimage::append_commandline(cmdline)? - } + kernel.append_cmdline(info.cmdline()); + kernel.append_cmdline(cmdline.as_bytes()); - Ok(jump_address) + Ok(kernel) } #[cfg(test)] diff --git a/src/main.rs b/src/main.rs index a9bcb9b1..0da96ba7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -34,6 +34,7 @@ mod common; #[cfg(not(test))] mod asm; mod block; +mod boot; mod bzimage; mod efi; mod fat; @@ -43,6 +44,7 @@ mod paging; mod part; mod pci; mod pe; +mod pvh; mod virtio; #[cfg(all(not(test), feature = "log-panic"))] @@ -75,88 +77,84 @@ fn enable_sse() { const VIRTIO_PCI_VENDOR_ID: u16 = 0x1af4; const VIRTIO_PCI_BLOCK_DEVICE_ID: u16 = 0x1042; -fn boot_from_device(device: &mut block::VirtioBlockDevice) -> bool { - match device.init() { - Err(_) => { - log!("Error configuring block device"); - return false; - } - Ok(_) => log!( - "Virtio block device configured. Capacity: {} sectors", - device.get_capacity() - ), +fn boot_from_device(device: &mut block::VirtioBlockDevice, info: &dyn boot::Info) -> bool { + if let Err(err) = device.init() { + log!("Error configuring block device: {:?}", err); + return false; } + log!( + "Virtio block device configured. Capacity: {} sectors", + device.get_capacity() + ); - let mut f; - - match part::find_efi_partition(device) { - Ok((start, end)) => { - log!("Found EFI partition"); - f = fat::Filesystem::new(device, start, end); - if f.init().is_err() { - log!("Failed to create filesystem"); - return false; - } - } - Err(_) => { - log!("Failed to find EFI partition"); + let (start, end) = match part::find_efi_partition(device) { + Ok(p) => p, + Err(err) => { + log!("Failed to find EFI partition: {:?}", err); return false; } - } + }; + log!("Found EFI partition"); + let mut f = fat::Filesystem::new(device, start, end); + if let Err(err) = f.init() { + log!("Failed to create filesystem: {:?}", err); + return false; + } log!("Filesystem ready"); - let jump_address; + match loader::load_default_entry(&f, info) { + Ok(mut kernel) => { + device.reset(); + log!("Jumping to kernel"); + kernel.boot(); + return true; + } + Err(err) => log!("Error loading default entry: {:?}", err), + } - match loader::load_default_entry(&f) { - Ok(addr) => { - jump_address = addr; + log!("Using EFI boot."); + let mut file = match f.open("/EFI/BOOT/BOOTX64 EFI") { + Ok(file) => file, + Err(err) => { + log!("Failed to load default EFI binary: {:?}", err); + return false; } - Err(_) => { - log!("Error loading default entry. Using EFI boot."); - match f.open("/EFI/BOOT/BOOTX64 EFI") { - Ok(mut file) => { - log!("Found bootloader (BOOTX64.EFI)"); - let mut l = pe::Loader::new(&mut file); - match l.load(0x20_0000) { - Ok((a, size)) => { - log!("Executable loaded"); - efi::efi_exec(a, 0x20_0000, size, &f, device); - return true; - } - Err(e) => { - match e { - pe::Error::FileError => log!("File error"), - pe::Error::InvalidExecutable => log!("Invalid executable"), - } - return false; - } - } - } - Err(_) => { - log!("Failed to find bootloader"); - return false; - } - } + }; + log!("Found bootloader (BOOTX64.EFI)"); + + let mut l = pe::Loader::new(&mut file); + let load_addr = 0x20_0000; + let (entry_addr, size) = match l.load(load_addr) { + Ok(load_info) => load_info, + Err(err) => { + log!("Error loading executable: {:?}", err); + return false; } - } + }; device.reset(); - - log!("Jumping to kernel"); - - // Rely on x86 C calling convention where second argument is put into %rsi register - let ptr = jump_address as *const (); - let code: extern "C" fn(u64, u64) = unsafe { core::mem::transmute(ptr) }; - (code)(0 /* dummy value */, bzimage::ZERO_PAGE_START as u64); + log!("Executable loaded"); + efi::efi_exec(entry_addr, 0x20_0000, size, info, &f, device); true } -#[cfg_attr(not(test), no_mangle)] -pub extern "C" fn rust64_start() -> ! { - log!("\nStarting.."); +#[no_mangle] +pub extern "C" fn rust64_start(rdi: Option<&pvh::StartInfo>, rsi: Option<&boot::Params>) -> ! { + if let Some(start_info) = rdi { + log!("\nBooting via PVH Boot Protocol"); + run(start_info) + } + if let Some(boot_params) = rsi { + log!("\nBooting via Linux Boot Protocol"); + run(boot_params) + } + panic!("Unable to determine boot protocol") +} + +fn run(info: &dyn boot::Info) -> ! { enable_sse(); - paging::MANAGER.borrow_mut().setup(); + paging::setup(); pci::print_bus(); @@ -167,7 +165,7 @@ pub extern "C" fn rust64_start() -> ! { let mut pci_transport = pci::VirtioPciTransport::new(pci_device); block::VirtioBlockDevice::new(&mut pci_transport); let mut device = block::VirtioBlockDevice::new(&mut pci_transport); - boot_from_device(&mut device) + boot_from_device(&mut device, info) }, ); diff --git a/src/mem.rs b/src/mem.rs index a52c2cf6..f8cfa1e8 100644 --- a/src/mem.rs +++ b/src/mem.rs @@ -22,18 +22,23 @@ pub struct MemoryRegion { } impl MemoryRegion { - pub fn new(base: u64, length: u64) -> MemoryRegion { + pub const fn new(base: u64, length: u64) -> MemoryRegion { MemoryRegion { base, length } } /// Take a slice and turn it into a region of memory - pub fn from_slice(data: &[T]) -> MemoryRegion { + pub fn from_bytes(data: &mut [u8]) -> MemoryRegion { MemoryRegion { base: data.as_ptr() as u64, - length: (data.len() * core::mem::size_of::()) as u64, + length: data.len() as u64, } } + // Expose the entire region as a byte slice + pub fn as_bytes(&mut self) -> &mut [u8] { + self.as_mut_slice(0, self.length) + } + /// Expose a section of the memory region as a slice pub fn as_mut_slice(&mut self, offset: u64, length: u64) -> &mut [T] { assert!((offset + (length * core::mem::size_of::() as u64)) <= self.length); diff --git a/src/paging.rs b/src/paging.rs index 3b3124fa..448ede42 100644 --- a/src/paging.rs +++ b/src/paging.rs @@ -1,56 +1,51 @@ -use atomic_refcell::AtomicRefCell; use x86_64::{ registers::control::Cr3, structures::paging::{PageSize, PageTable, PageTableFlags, PhysFrame, Size2MiB}, PhysAddr, }; -// This is the number of GiB we will identity map. +// Amount of memory we identity map in setup(), max 512 GiB. const ADDRESS_SPACE_GIB: usize = 4; -pub static MANAGER: AtomicRefCell = AtomicRefCell::new(Manager::new()); -pub struct Manager { - l4: PageTable, - l3: PageTable, - l2s: [PageTable; ADDRESS_SPACE_GIB], -} - -impl Manager { - const fn new() -> Self { - Manager { - l4: PageTable::new(), - l3: PageTable::new(), - l2s: [PageTable::new(); ADDRESS_SPACE_GIB], +// Put the Page Tables in static muts to make linking easier +#[no_mangle] +static mut L4_TABLE: PageTable = PageTable::new(); +#[no_mangle] +static mut L3_TABLE: PageTable = PageTable::new(); +#[no_mangle] +static mut L2_TABLES: [PageTable; ADDRESS_SPACE_GIB] = [PageTable::new(); ADDRESS_SPACE_GIB]; + +pub fn setup() { + // SAFETY: This function is idempontent and only writes to static memory and + // CR3. Thus, it is safe to run multiple times or on multiple threads. + let (l4, l3, l2s) = unsafe { (&mut L4_TABLE, &mut L3_TABLE, &mut L2_TABLES) }; + log!("Setting up {} GiB identity mapping", ADDRESS_SPACE_GIB); + let pt_flags = PageTableFlags::PRESENT | PageTableFlags::WRITABLE; + + // Setup Identity map using L2 huge pages + let mut next_addr = PhysAddr::new(0); + for l2 in l2s.iter_mut() { + for l2e in l2.iter_mut() { + l2e.set_addr(next_addr, pt_flags | PageTableFlags::HUGE_PAGE); + next_addr += Size2MiB::SIZE; } } - pub fn setup(&mut self) { - log!("Setting up {} GiB identity mapping", ADDRESS_SPACE_GIB); - - let pt_flags = PageTableFlags::PRESENT | PageTableFlags::WRITABLE; - // Setup Identity map using L2 huge pages - let mut next_addr = PhysAddr::new(0); - for l2 in self.l2s.iter_mut() { - for l2e in l2.iter_mut() { - l2e.set_addr(next_addr, pt_flags | PageTableFlags::HUGE_PAGE); - next_addr += Size2MiB::SIZE; - } - } - - // Point L3 at L2s - for (i, l2) in self.l2s.iter().enumerate() { - self.l3[i].set_addr(phys_addr(l2), pt_flags); - } + // Point L3 at L2s + for (i, l2) in l2s.iter().enumerate() { + l3[i].set_addr(phys_addr(l2), pt_flags); + } - // Point L4 at L3 - self.l4[0].set_addr(phys_addr(&self.l3), pt_flags); + // Point L4 at L3 + l4[0].set_addr(phys_addr(l3), pt_flags); - // Point Cr3 at PML4 - let cr3_flags = Cr3::read().1; - let pml4t_frame = PhysFrame::from_start_address(phys_addr(&self.l4)).unwrap(); - unsafe { Cr3::write(pml4t_frame, cr3_flags) }; - log!("Page tables setup"); + // Point Cr3 at L4 + let (cr3_frame, cr3_flags) = Cr3::read(); + let l4_frame = PhysFrame::from_start_address(phys_addr(l4)).unwrap(); + if cr3_frame != l4_frame { + unsafe { Cr3::write(l4_frame, cr3_flags) }; } + log!("Page tables setup"); } // Map a virtual address to a PhysAddr (assumes identity mapping) diff --git a/src/pe.rs b/src/pe.rs index 3dc1d526..2748fe78 100644 --- a/src/pe.rs +++ b/src/pe.rs @@ -60,7 +60,7 @@ impl<'a> Loader<'a> { Err(_) => return Err(Error::FileError), } - let dos_region = MemoryRegion::from_slice(&data); + let dos_region = MemoryRegion::from_bytes(&mut data); // 'MZ' magic if dos_region.read_u16(0) != 0x5a4d { @@ -74,7 +74,7 @@ impl<'a> Loader<'a> { return Err(Error::InvalidExecutable); } - let pe_region = MemoryRegion::from_slice(&data[pe_header_offset as usize..]); + let pe_region = MemoryRegion::from_bytes(&mut data[pe_header_offset as usize..]); // The Microsoft specification uses offsets relative to the COFF area // which is 4 after the signature (so all offsets are +4 relative to the spec) @@ -91,7 +91,8 @@ impl<'a> Loader<'a> { self.num_sections = pe_region.read_u16(6); let optional_header_size = pe_region.read_u16(20); - let optional_region = MemoryRegion::from_slice(&data[(24 + pe_header_offset) as usize..]); + let optional_region = + MemoryRegion::from_bytes(&mut data[(24 + pe_header_offset) as usize..]); // Only support x86-64 EFI if optional_region.read_u16(0) != 0x20b { @@ -177,7 +178,7 @@ impl<'a> Loader<'a> { let l: &mut [u8] = loaded_region .as_mut_slice(u64::from(section.virt_address), u64::from(section_size)); - let reloc_region = MemoryRegion::from_slice(l); + let reloc_region = MemoryRegion::from_bytes(l); let mut section_bytes_remaining = section_size; let mut offset = 0; diff --git a/src/pvh.rs b/src/pvh.rs new file mode 100644 index 00000000..c5a49f4f --- /dev/null +++ b/src/pvh.rs @@ -0,0 +1,55 @@ +use crate::{ + boot::{E820Entry, Info}, + common, +}; + +// Structures from xen/include/public/arch-x86/hvm/start_info.h +#[derive(Debug)] +#[repr(C)] +pub struct StartInfo { + magic: [u8; 4], + version: u32, + flags: u32, + nr_modules: u32, + modlist_paddr: u64, + cmdline_paddr: u64, + rsdp_paddr: u64, + memmap_paddr: u64, + memmap_entries: u32, + _pad: u32, +} + +#[derive(Clone, Copy, Debug)] +#[repr(C)] +struct MemMapEntry { + addr: u64, + size: u64, + entry_type: u32, + _pad: u32, +} + +impl Info for StartInfo { + fn rsdp_addr(&self) -> u64 { + self.rsdp_paddr + } + fn cmdline(&self) -> &[u8] { + unsafe { common::from_cstring(self.cmdline_paddr) } + } + fn num_entries(&self) -> u8 { + // memmap_paddr and memmap_entries only exist in version 1 or later + if self.version < 1 || self.memmap_paddr == 0 { + return 0; + } + self.memmap_entries as u8 + } + fn entry(&self, idx: u8) -> E820Entry { + assert!(idx < self.num_entries()); + let ptr = self.memmap_paddr as *const MemMapEntry; + let entry = unsafe { *ptr.offset(idx as isize) }; + E820Entry { + addr: entry.addr, + size: entry.size, + entry_type: entry.entry_type, + } + } +} diff --git a/src/virtio.rs b/src/virtio.rs index 778e2305..df2a4faf 100644 --- a/src/virtio.rs +++ b/src/virtio.rs @@ -13,6 +13,7 @@ // limitations under the License. /// Virtio related errors +#[derive(Debug)] pub enum Error { VirtioUnsupportedDevice, VirtioLegacyOnly,