Skip to content

Commit f1fde19

Browse files
author
zhiayang
committed
kernel: some QoL changes
1. expose scheduler::pause/unpause/isPaused functions that replace having to manually change the numHeldLocks variable to prevent preemption. 2. allow specifying the AddressSpaceType in which to collect a memticket, which allows collecting them in kernel space
1 parent cdd64e4 commit f1fde19

File tree

12 files changed

+149
-94
lines changed

12 files changed

+149
-94
lines changed

Diff for: kernel/include/cpu/scheduler.h

+3
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,9 @@ namespace nx
357357
void notifyOne(condition* cv);
358358
void notifyAll(condition* cv);
359359

360+
void pause();
361+
void unpause();
362+
bool isPaused();
360363

361364
// returns t.
362365
Thread* addThread(Thread* t);

Diff for: kernel/include/export/syscall_funcs.h

+40-38
Original file line numberDiff line numberDiff line change
@@ -9,44 +9,46 @@
99

1010
namespace nx
1111
{
12-
constexpr uint64_t SYSCALL_EXIT = 0;
13-
14-
constexpr uint64_t SYSCALL_IPC_SEND = 1;
15-
constexpr uint64_t SYSCALL_IPC_PEEK = 2;
16-
constexpr uint64_t SYSCALL_IPC_POLL = 3;
17-
constexpr uint64_t SYSCALL_IPC_DISCARD = 4;
18-
constexpr uint64_t SYSCALL_IPC_RECEIVE = 5;
19-
constexpr uint64_t SYSCALL_IPC_RECEIVE_BLOCK = 6;
20-
constexpr uint64_t SYSCALL_IPC_SET_SIG_HANDLER = 7;
21-
constexpr uint64_t SYSCALL_IPC_SIGNAL = 8;
22-
constexpr uint64_t SYSCALL_IPC_SIGNAL_BLOCK = 9;
23-
constexpr uint64_t SYSCALL_IPC_FIND_SELECTOR = 10;
24-
25-
constexpr uint64_t SYSCALL_RPC_CALL = 11;
26-
constexpr uint64_t SYSCALL_RPC_CALL_PROCEDURE = 12;
27-
constexpr uint64_t SYSCALL_RPC_RETURN = 13;
28-
constexpr uint64_t SYSCALL_RPC_WAIT_CALL = 14;
29-
constexpr uint64_t SYSCALL_RPC_WAIT_ANY_CALL = 15;
30-
constexpr uint64_t SYSCALL_RPC_OPEN = 16;
31-
constexpr uint64_t SYSCALL_RPC_CLOSE = 17;
32-
constexpr uint64_t SYSCALL_RPC_FORWARD = 18;
33-
34-
constexpr uint64_t SYSCALL_MMAP_ANON = 20;
35-
constexpr uint64_t SYSCALL_MMAP_FILE = 21;
36-
37-
constexpr uint64_t SYSCALL_MEMTICKET_CREATE = 30;
38-
constexpr uint64_t SYSCALL_MEMTICKET_COLLECT = 31;
39-
constexpr uint64_t SYSCALL_MEMTICKET_RELEASE = 32;
40-
constexpr uint64_t SYSCALL_MEMTICKET_FIND = 33;
41-
42-
constexpr uint64_t SYSCALL_VFS_READ = 80;
43-
constexpr uint64_t SYSCALL_VFS_WRITE = 81;
44-
45-
constexpr uint64_t SYSCALL_GET_NANOSECOND_TS = 97;
46-
constexpr uint64_t SYSCALL_USER_SIGNAL_LEAVE = 98;
47-
constexpr uint64_t SYSCALL_LOG = 99;
48-
49-
constexpr uint64_t _SYSCALL_MAX = 100;
12+
constexpr uint64_t SYSCALL_EXIT = 0;
13+
14+
constexpr uint64_t SYSCALL_IPC_SEND = 1;
15+
constexpr uint64_t SYSCALL_IPC_PEEK = 2;
16+
constexpr uint64_t SYSCALL_IPC_POLL = 3;
17+
constexpr uint64_t SYSCALL_IPC_DISCARD = 4;
18+
constexpr uint64_t SYSCALL_IPC_RECEIVE = 5;
19+
constexpr uint64_t SYSCALL_IPC_RECEIVE_BLOCK = 6;
20+
constexpr uint64_t SYSCALL_IPC_SET_SIG_HANDLER = 7;
21+
constexpr uint64_t SYSCALL_IPC_SIGNAL = 8;
22+
constexpr uint64_t SYSCALL_IPC_SIGNAL_BLOCK = 9;
23+
constexpr uint64_t SYSCALL_IPC_FIND_SELECTOR = 10;
24+
25+
constexpr uint64_t SYSCALL_RPC_CALL = 11;
26+
constexpr uint64_t SYSCALL_RPC_CALL_PROCEDURE = 12;
27+
constexpr uint64_t SYSCALL_RPC_RETURN = 13;
28+
constexpr uint64_t SYSCALL_RPC_WAIT_CALL = 14;
29+
constexpr uint64_t SYSCALL_RPC_WAIT_ANY_CALL = 15;
30+
constexpr uint64_t SYSCALL_RPC_OPEN = 16;
31+
constexpr uint64_t SYSCALL_RPC_CLOSE = 17;
32+
constexpr uint64_t SYSCALL_RPC_FORWARD = 18;
33+
34+
constexpr uint64_t SYSCALL_MMAP_ANON = 20;
35+
constexpr uint64_t SYSCALL_MMAP_FILE = 21;
36+
37+
constexpr uint64_t SYSCALL_MEMTICKET_CREATE = 30;
38+
constexpr uint64_t SYSCALL_MEMTICKET_COLLECT = 31;
39+
constexpr uint64_t SYSCALL_MEMTICKET_RELEASE = 32;
40+
constexpr uint64_t SYSCALL_MEMTICKET_FIND = 33;
41+
42+
constexpr uint64_t SYSCALL_PROCESS_SPAWN_FROM_MEMORY = 40;
43+
44+
constexpr uint64_t SYSCALL_VFS_READ = 80;
45+
constexpr uint64_t SYSCALL_VFS_WRITE = 81;
46+
47+
constexpr uint64_t SYSCALL_GET_NANOSECOND_TS = 97;
48+
constexpr uint64_t SYSCALL_USER_SIGNAL_LEAVE = 98;
49+
constexpr uint64_t SYSCALL_LOG = 99;
50+
51+
constexpr uint64_t _SYSCALL_MAX = 100;
5052
}
5153

5254
#endif

Diff for: kernel/include/ipc.h

+4-1
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,12 @@ namespace nx::ipc
6161

6262
memticket_id createMemticket(size_t len, uint64_t flags);
6363
void releaseMemticket(const mem_ticket_t& ticket);
64-
mem_ticket_t collectMemticket(memticket_id ticketId);
6564
mem_ticket_t findExistingMemticket(memticket_id ticketId);
6665

66+
// map to userspace by default, but allow overriding so we can put tickets in kernel space.
67+
mem_ticket_t collectMemticket(memticket_id ticketId,
68+
vmm::AddressSpaceType aspace = vmm::AddressSpaceType::User);
69+
6770
void cleanupProcessTickets(scheduler::Process* proc);
6871

6972
// TODO: for now, again we can only send messages/signals to threads, not processes.

Diff for: kernel/include/mm.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,9 @@ namespace nx
198198
nx::array<PhysAddr> allocatedPhysPages;
199199

200200
// this is a spinlock! see the paragraphs in _heap_impl.h on why.
201-
nx::spinlock lk;
201+
// this is also a *recursive* spinlock! it might happen that updating the addressspace
202+
// requires performing other addrspace-related operations, which should be allowable.
203+
nx::r_spinlock lk;
202204

203205
struct LockedAddrSpace
204206
{

Diff for: kernel/include/nx.h

+1
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ namespace nx
3939
namespace initrd
4040
{
4141
void init(BootInfo* bi);
42+
ipc::mem_ticket_t getInitrdMemticket();
4243
}
4344
}
4445

Diff for: kernel/source/arch/x86_64/exceptions/handlers.s

+1
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,7 @@ nx_x64_exception_handler_31:
278278

279279

280280

281+
.align 16
281282
.global __last_saved_error_code
282283
__last_saved_error_code:
283284
.quad 0

Diff for: kernel/source/ipc/memticket.cpp

+7-4
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ namespace nx::ipc
9696
return -1;
9797
}
9898

99-
mem_ticket_t collectMemticket(memticket_id ticketId)
99+
mem_ticket_t collectMemticket(memticket_id ticketId, vmm::AddressSpaceType aspace)
100100
{
101101
auto ret_ticket = ipc::mem_ticket_t();
102102
memset(&ret_ticket, 0, sizeof(ipc::mem_ticket_t));
@@ -118,16 +118,19 @@ namespace nx::ipc
118118
auto proc = scheduler::getCurrentProcess();
119119

120120
// TODO: we might want to abstract this out a bit more?
121-
auto virt = vmm::allocateAddrSpace(tik->numPages, vmm::AddressSpaceType::User);
121+
auto virt = vmm::allocateAddrSpace(tik->numPages, aspace);
122122
auto svmr = vmm::SharedVMRegion(virt, tik->numPages, &tik->physicalPages);
123123

124124
proc->addrspace.lock()->addSharedRegion(svmr.clone());
125125

126126
proc->collectedTickets.lock()->append(ticketId);
127127

128128
// mapLazy will not even map the pages present, so both a read and a write will trap
129-
vmm::mapLazy(virt, tik->numPages,
130-
vmm::PAGE_USER | ((tik->flags & ipc::MEMTICKET_FLAG_WRITE) ? vmm::PAGE_WRITE : 0), proc);
129+
auto pageflags = (tik->flags & ipc::MEMTICKET_FLAG_WRITE) ? vmm::PAGE_WRITE : 0;
130+
if(aspace == vmm::AddressSpaceType::User)
131+
pageflags |= vmm::PAGE_USER;
132+
133+
vmm::mapLazy(virt, tik->numPages, pageflags, proc);
131134

132135
tik->ref();
133136
tik->collectors[proc].insert(virt, krt::move(svmr));

Diff for: kernel/source/main.cpp

+28-8
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
#include "cpu/exceptions.h"
1515
#include "devices/pc/apic.h"
1616

17+
#include "rpc.h"
18+
1719
namespace nx
1820
{
1921
// TODO: jesus christ get rid of this
@@ -174,10 +176,6 @@ namespace nx
174176
// parse the kernel parameters from the bootloader.
175177
params::init(bootinfo);
176178

177-
// setup the vfs and the initrd
178-
vfs::init();
179-
initrd::init(bootinfo);
180-
181179
// init the real console
182180
console::init(bootinfo->fbHorz, bootinfo->fbVert, bootinfo->fbScanWidth);
183181

@@ -192,17 +190,23 @@ namespace nx
192190
if(!params::haveOption("no_symbols"))
193191
util::initKernelSymbols(bootinfo);
194192

195-
// we should be done with the bootinfo now.
196-
pmm::freeAllEarlyMemory(bootinfo);
197-
198193
// initialise the interrupt controller (APIC or PIC).
199194
// init_arch allows us to do basic scheduling.
200195
interrupts::init_arch();
201196

197+
// bootstrap the scheduler first
198+
scheduler::bootstrap();
199+
200+
// setup the vfs and the initrd
201+
vfs::init();
202+
initrd::init(bootinfo);
203+
204+
// we should be done with the bootinfo now.
205+
pmm::freeAllEarlyMemory(bootinfo);
206+
202207
// hopefully we are flying more than half a ship at this point
203208
// initialise the scheduler with some threads -- this function will end!!
204209
{
205-
scheduler::bootstrap();
206210
scheduler::init();
207211
scheduler::installTickHandlers();
208212

@@ -261,8 +265,24 @@ namespace nx
261265
*/
262266

263267

268+
/*
269+
todo (09/01/2021):
264270
271+
1. all prior todos
265272
273+
2. a lot of the syscalls don't return error codes; we need to figure out a way to either
274+
(a) return an error code nicely to the user via the return value
275+
(b) use (clobber) another register to return an error code "out of band"
276+
*/
277+
278+
/*
279+
todo (19/02/2021):
280+
281+
1. all prior todos
282+
283+
2. it would be nice to have a mechanism for "prefetching" pagefaults (we need that for mmap's
284+
MAP_POPULATE in the future anyway).
285+
*/
266286

267287

268288

Diff for: kernel/source/misc/initrd.cpp

+21-1
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,27 @@ struct tarent_t
5050
namespace nx {
5151
namespace initrd
5252
{
53+
static ipc::mem_ticket_t initrdTicket;
54+
static void* setup_memticket(size_t size)
55+
{
56+
auto id = ipc::createMemticket(size, ipc::MEMTICKET_FLAG_READ | ipc::MEMTICKET_FLAG_WRITE);
57+
if(id == (ipc::memticket_id) -1)
58+
abort("failed to create memticket for initrd");
59+
60+
//* obviously put it in the kernel address space. since memtickets are lazy by design, this...
61+
//* will be a page-fault-fest on startup... which is probably not that big of a deal.
62+
initrdTicket = ipc::collectMemticket(id, vmm::AddressSpaceType::Kernel);
63+
if(!initrdTicket.ptr || !initrdTicket.len)
64+
abort("failed to collect memticket for initrd");
65+
66+
return initrdTicket.ptr;
67+
}
68+
69+
ipc::mem_ticket_t getInitrdMemticket()
70+
{
71+
return initrdTicket;
72+
}
73+
5374
void init(BootInfo* bi)
5475
{
5576
if(bi->initrdSize == 0 || bi->initrdBuffer == 0)
@@ -69,7 +90,6 @@ namespace initrd
6990
// check if it's gzip first.
7091
if(((gzip_header_t*) initrd)->magic[0] == 0x1F && ((gzip_header_t*) initrd)->magic[1] == 0x8B)
7192
{
72-
log("initrd", "format: gzip (compressed: {} bytes)", inpSz);
7393
println("decompressing initrd...\n");
7494

7595
// note: we do this to avoid unaligned access, which ubsan complains about.

Diff for: kernel/source/mm/page_fault.cpp

+1-4
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ namespace vmm
1010
{
1111
constexpr bool LOG_ALL_FAULTS = false;
1212

13-
1413
bool handlePageFault(uint64_t cr2, uint64_t errorCode, uint64_t rip)
1514
{
1615
// The error code gives us details of what happened.
@@ -21,14 +20,12 @@ namespace vmm
2120
if(reserved)
2221
return false;
2322

24-
2523
auto aligned_cr2 = VirtAddr(PAGE_ALIGN(cr2));
2624

2725
// this will get the flags for the current process's address space, so we
2826
// don't need to pass it explicitly.
2927
auto flags = vmm::getPageFlags(aligned_cr2);
3028

31-
3229
/*
3330
make sure this is something that's a legitimate situation.
3431
@@ -40,7 +37,7 @@ namespace vmm
4037
if(flags & PAGE_COPY_ON_WRITE)
4138
{
4239
auto pid = scheduler::getCurrentProcess()->processId;
43-
auto tid = scheduler::getCurrentThread()->threadId;
40+
auto tid = scheduler::getCurrentThread() ? scheduler::getCurrentThread()->threadId : 0;
4441

4542
// ok, we should be sane here. we enable interrupts here, for a number of reasons:
4643
// 1. this handler might need to do complex work, so we don't want to hold up the

Diff for: kernel/source/syscall/functions.cpp

+34-35
Original file line numberDiff line numberDiff line change
@@ -25,53 +25,52 @@ namespace syscall
2525
for(size_t i = 0; i < SyscallTableEntryCount; i++)
2626
SyscallTable[i] = (void*) do_nothing;
2727

28-
SyscallTable[SYSCALL_EXIT] = (void*) exit;
28+
SyscallTable[SYSCALL_EXIT] = (void*) exit;
2929

3030
// syscall/sc_ipc.cpp
31-
SyscallTable[SYSCALL_IPC_SEND] = (void*) ipc_send;
32-
SyscallTable[SYSCALL_IPC_PEEK] = (void*) ipc_peek;
33-
SyscallTable[SYSCALL_IPC_POLL] = (void*) ipc_poll;
34-
SyscallTable[SYSCALL_IPC_DISCARD] = (void*) ipc_discard;
35-
SyscallTable[SYSCALL_IPC_RECEIVE] = (void*) ipc_receive;
36-
SyscallTable[SYSCALL_IPC_RECEIVE_BLOCK] = (void*) do_nothing;
37-
SyscallTable[SYSCALL_IPC_SET_SIG_HANDLER] = (void*) ipc_set_signal_handler;
38-
SyscallTable[SYSCALL_IPC_SIGNAL] = (void*) ipc_signal;
39-
SyscallTable[SYSCALL_IPC_SIGNAL_BLOCK] = (void*) ipc_signal_block;
40-
SyscallTable[SYSCALL_IPC_FIND_SELECTOR] = (void*) ipc_find_selector;
31+
SyscallTable[SYSCALL_IPC_SEND] = (void*) ipc_send;
32+
SyscallTable[SYSCALL_IPC_PEEK] = (void*) ipc_peek;
33+
SyscallTable[SYSCALL_IPC_POLL] = (void*) ipc_poll;
34+
SyscallTable[SYSCALL_IPC_DISCARD] = (void*) ipc_discard;
35+
SyscallTable[SYSCALL_IPC_RECEIVE] = (void*) ipc_receive;
36+
SyscallTable[SYSCALL_IPC_RECEIVE_BLOCK] = (void*) do_nothing;
37+
SyscallTable[SYSCALL_IPC_SET_SIG_HANDLER] = (void*) ipc_set_signal_handler;
38+
SyscallTable[SYSCALL_IPC_SIGNAL] = (void*) ipc_signal;
39+
SyscallTable[SYSCALL_IPC_SIGNAL_BLOCK] = (void*) ipc_signal_block;
40+
SyscallTable[SYSCALL_IPC_FIND_SELECTOR] = (void*) ipc_find_selector;
4141

4242
// syscall/sc_rpc.cpp
43-
SyscallTable[SYSCALL_RPC_CALL] = (void*) rpc_call;
44-
SyscallTable[SYSCALL_RPC_CALL_PROCEDURE] = (void*) rpc_call_procedure;
45-
SyscallTable[SYSCALL_RPC_RETURN] = (void*) rpc_return;
46-
SyscallTable[SYSCALL_RPC_WAIT_CALL] = (void*) rpc_wait_call;
47-
SyscallTable[SYSCALL_RPC_WAIT_ANY_CALL] = (void*) rpc_wait_any_call;
48-
SyscallTable[SYSCALL_RPC_OPEN] = (void*) rpc_open;
49-
SyscallTable[SYSCALL_RPC_CLOSE] = (void*) rpc_close;
50-
SyscallTable[SYSCALL_RPC_FORWARD] = (void*) do_nothing;
43+
SyscallTable[SYSCALL_RPC_CALL] = (void*) rpc_call;
44+
SyscallTable[SYSCALL_RPC_CALL_PROCEDURE] = (void*) rpc_call_procedure;
45+
SyscallTable[SYSCALL_RPC_RETURN] = (void*) rpc_return;
46+
SyscallTable[SYSCALL_RPC_WAIT_CALL] = (void*) rpc_wait_call;
47+
SyscallTable[SYSCALL_RPC_WAIT_ANY_CALL] = (void*) rpc_wait_any_call;
48+
SyscallTable[SYSCALL_RPC_OPEN] = (void*) rpc_open;
49+
SyscallTable[SYSCALL_RPC_CLOSE] = (void*) rpc_close;
50+
SyscallTable[SYSCALL_RPC_FORWARD] = (void*) do_nothing;
5151

5252
// syscall/sc_mmap.cpp
53-
SyscallTable[SYSCALL_MMAP_ANON] = (void*) mmap_anon;
54-
SyscallTable[SYSCALL_MMAP_FILE] = (void*) mmap_file;
53+
SyscallTable[SYSCALL_MMAP_ANON] = (void*) mmap_anon;
54+
SyscallTable[SYSCALL_MMAP_FILE] = (void*) mmap_file;
5555

5656
// syscall/sc_memticket.cpp
57-
SyscallTable[SYSCALL_MEMTICKET_CREATE] = (void*) create_memory_ticket;
58-
SyscallTable[SYSCALL_MEMTICKET_COLLECT] = (void*) collect_memory_ticket;
59-
SyscallTable[SYSCALL_MEMTICKET_RELEASE] = (void*) release_memory_ticket;
60-
SyscallTable[SYSCALL_MEMTICKET_FIND] = (void*) find_existing_memory_ticket;
57+
SyscallTable[SYSCALL_MEMTICKET_CREATE] = (void*) create_memory_ticket;
58+
SyscallTable[SYSCALL_MEMTICKET_COLLECT] = (void*) collect_memory_ticket;
59+
SyscallTable[SYSCALL_MEMTICKET_RELEASE] = (void*) release_memory_ticket;
60+
SyscallTable[SYSCALL_MEMTICKET_FIND] = (void*) find_existing_memory_ticket;
6161

62-
// syscall/sc_vfs.cpp
63-
SyscallTable[SYSCALL_VFS_READ] = (void*) vfs_read;
64-
SyscallTable[SYSCALL_VFS_WRITE] = (void*) vfs_write;
62+
// syscall/sc_process.cpp
63+
SyscallTable[SYSCALL_PROCESS_SPAWN_FROM_MEMORY] = (void*) spawn_process_from_memory;
6564

66-
SyscallTable[77] = (void*) tmp_debug;
65+
// syscall/sc_vfs.cpp
66+
SyscallTable[SYSCALL_VFS_READ] = (void*) vfs_read;
67+
SyscallTable[SYSCALL_VFS_WRITE] = (void*) vfs_write;
6768

68-
SyscallTable[SYSCALL_GET_NANOSECOND_TS] = (void*) nanosecond_timestamp;
69-
SyscallTable[SYSCALL_USER_SIGNAL_LEAVE] = (void*) user_signal_leave;
70-
SyscallTable[SYSCALL_LOG] = (void*) kernel_log;
69+
SyscallTable[77] = (void*) tmp_debug;
7170

72-
// SyscallTable[11] = (void*) debug_char;
73-
// SyscallTable[12] = (void*) debug_ptr;
74-
// SyscallTable[13] = (void*) debug_str;
71+
SyscallTable[SYSCALL_GET_NANOSECOND_TS] = (void*) nanosecond_timestamp;
72+
SyscallTable[SYSCALL_USER_SIGNAL_LEAVE] = (void*) user_signal_leave;
73+
SyscallTable[SYSCALL_LOG] = (void*) kernel_log;
7574
}
7675

7776
void init()

0 commit comments

Comments
 (0)