Skip to content

Commit 22c34f3

Browse files
committed
auto merge of #12172 : alexcrichton/rust/green-improvements, r=brson
These commits pick off some low-hanging fruit which were slowing down spawning green threads. The major speedup comes from fixing a bug in stack caching where we never used any cached stacks! The program I used to benchmark is at the end. It was compiled with `rustc --opt-level=3 bench.rs --test` and run as `RUST_THREADS=1 ./bench --bench`. I chose to use `RUST_THREADS=1` due to #11730 as the profiles I was getting interfered too much when all the schedulers were in play (and shouldn't be after #11730 is fixed). All of the units below are in ns/iter as reported by `--bench` (lower is better). | | green | native | raw | | ------------- | ----- | ------ | ------ | | osx before | 12699 | 24030 | 19734 | | linux before | 10223 | 125983 | 122647 | | osx after | 3847 | 25771 | 20835 | | linux after | 2631 | 135398 | 122765 | Note that this is *not* a benchmark of spawning green tasks vs native tasks. I put in the native numbers just to get a ballpark of where green tasks are. This is benchmark is *clearly* benefiting from stack caching. Also, OSX is clearly not 5x faster than linux, I think my VM is just much slower. All in all, this ended up being a nice 4x speedup for spawning a green task when you're using a cached stack. ```rust extern mod extra; extern mod native; use std::rt::thread::Thread; #[bench] fn green(bh: &mut extra::test::BenchHarness) { let (p, c) = SharedChan::new(); bh.iter(|| { let c = c.clone(); spawn(proc() { c.send(()); }); p.recv(); }); } #[bench] fn native(bh: &mut extra::test::BenchHarness) { let (p, c) = SharedChan::new(); bh.iter(|| { let c = c.clone(); native::task::spawn(proc() { c.send(()); }); p.recv(); }); } #[bench] fn raw(bh: &mut extra::test::BenchHarness) { bh.iter(|| { Thread::start(proc() {}).join() }); } ```
2 parents 68129d2 + 301ff0c commit 22c34f3

File tree

13 files changed

+231
-141
lines changed

13 files changed

+231
-141
lines changed

mk/crates.mk

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ TOOLS := compiletest rustdoc rustc
5757

5858
DEPS_std := native:rustrt native:compiler-rt
5959
DEPS_extra := std term sync serialize getopts collections
60-
DEPS_green := std
60+
DEPS_green := std native:context_switch
6161
DEPS_rustuv := std native:uv native:uv_support
6262
DEPS_native := std
6363
DEPS_syntax := std extra term serialize collections

mk/rt.mk

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
# that's per-target so you're allowed to conditionally add files based on the
3636
# target.
3737
################################################################################
38-
NATIVE_LIBS := rustrt sundown uv_support morestack miniz
38+
NATIVE_LIBS := rustrt sundown uv_support morestack miniz context_switch
3939

4040
# $(1) is the target triple
4141
define NATIVE_LIBRARIES
@@ -54,9 +54,10 @@ NATIVE_DEPS_rustrt_$(1) := rust_builtin.c \
5454
rust_android_dummy.c \
5555
rust_test_helpers.c \
5656
rust_try.ll \
57-
arch/$$(HOST_$(1))/_context.S \
5857
arch/$$(HOST_$(1))/record_sp.S
5958
NATIVE_DEPS_morestack_$(1) := arch/$$(HOST_$(1))/morestack.S
59+
NATIVE_DEPS_context_switch_$(1) := \
60+
arch/$$(HOST_$(1))/_context.S
6061

6162
################################################################################
6263
# You shouldn't find it that necessary to edit anything below this line.

src/libgreen/context.rs

Lines changed: 60 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010

11-
use std::libc::c_void;
1211
use std::uint;
1312
use std::cast::{transmute, transmute_mut_unsafe,
1413
transmute_region, transmute_mut_region};
1514
use stack::Stack;
1615
use std::unstable::stack;
16+
use std::unstable::raw;
1717

1818
// FIXME #7761: Registers is boxed so that it is 16-byte aligned, for storing
1919
// SSE regs. It would be marginally better not to do this. In C++ we
@@ -22,47 +22,33 @@ use std::unstable::stack;
2222
// the registers are sometimes empty, but the discriminant would
2323
// then misalign the regs again.
2424
pub struct Context {
25-
/// The context entry point, saved here for later destruction
26-
priv start: Option<~proc()>,
2725
/// Hold the registers while the task or scheduler is suspended
2826
priv regs: ~Registers,
2927
/// Lower bound and upper bound for the stack
3028
priv stack_bounds: Option<(uint, uint)>,
3129
}
3230

31+
pub type InitFn = extern "C" fn(uint, *(), *()) -> !;
32+
3333
impl Context {
3434
pub fn empty() -> Context {
3535
Context {
36-
start: None,
3736
regs: new_regs(),
3837
stack_bounds: None,
3938
}
4039
}
4140

4241
/// Create a new context that will resume execution by running proc()
43-
pub fn new(start: proc(), stack: &mut Stack) -> Context {
44-
// The C-ABI function that is the task entry point
45-
//
46-
// Note that this function is a little sketchy. We're taking a
47-
// procedure, transmuting it to a stack-closure, and then calling to
48-
// closure. This leverages the fact that the representation of these two
49-
// types is the same.
50-
//
51-
// The reason that we're doing this is that this procedure is expected
52-
// to never return. The codegen which frees the environment of the
53-
// procedure occurs *after* the procedure has completed, and this means
54-
// that we'll never actually free the procedure.
55-
//
56-
// To solve this, we use this transmute (to not trigger the procedure
57-
// deallocation here), and then store a copy of the procedure in the
58-
// `Context` structure returned. When the `Context` is deallocated, then
59-
// the entire procedure box will be deallocated as well.
60-
extern fn task_start_wrapper(f: &proc()) {
61-
unsafe {
62-
let f: &|| = transmute(f);
63-
(*f)()
64-
}
65-
}
42+
///
43+
/// The `init` function will be run with `arg` and the `start` procedure
44+
/// split up into code and env pointers. It is required that the `init`
45+
/// function never return.
46+
///
47+
/// FIXME: this is basically an awful the interface. The main reason for
48+
/// this is to reduce the number of allocations made when a green
49+
/// task is spawned as much as possible
50+
pub fn new(init: InitFn, arg: uint, start: proc(),
51+
stack: &mut Stack) -> Context {
6652

6753
let sp: *uint = stack.end();
6854
let sp: *mut uint = unsafe { transmute_mut_unsafe(sp) };
@@ -74,14 +60,10 @@ impl Context {
7460
transmute_region(&*regs));
7561
};
7662

77-
// FIXME #7767: Putting main into a ~ so it's a thin pointer and can
78-
// be passed to the spawn function. Another unfortunate
79-
// allocation
80-
let start = ~start;
81-
8263
initialize_call_frame(&mut *regs,
83-
task_start_wrapper as *c_void,
84-
unsafe { transmute(&*start) },
64+
init,
65+
arg,
66+
unsafe { transmute(start) },
8567
sp);
8668

8769
// Scheduler tasks don't have a stack in the "we allocated it" sense,
@@ -96,7 +78,6 @@ impl Context {
9678
Some((stack_base as uint, sp as uint))
9779
};
9880
return Context {
99-
start: Some(start),
10081
regs: regs,
10182
stack_bounds: bounds,
10283
}
@@ -138,7 +119,7 @@ impl Context {
138119
}
139120
}
140121

141-
#[link(name = "rustrt", kind = "static")]
122+
#[link(name = "context_switch", kind = "static")]
142123
extern {
143124
fn rust_swap_registers(out_regs: *mut Registers, in_regs: *Registers);
144125
}
@@ -185,13 +166,17 @@ fn new_regs() -> ~Registers {
185166
}
186167

187168
#[cfg(target_arch = "x86")]
188-
fn initialize_call_frame(regs: &mut Registers, fptr: *c_void, arg: *c_void,
189-
sp: *mut uint) {
169+
fn initialize_call_frame(regs: &mut Registers, fptr: InitFn, arg: uint,
170+
procedure: raw::Procedure, sp: *mut uint) {
190171

172+
// x86 has interesting stack alignment requirements, so do some alignment
173+
// plus some offsetting to figure out what the actual stack should be.
191174
let sp = align_down(sp);
192175
let sp = mut_offset(sp, -4);
193176

194-
unsafe { *sp = arg as uint };
177+
unsafe { *mut_offset(sp, 2) = procedure.env as uint };
178+
unsafe { *mut_offset(sp, 1) = procedure.code as uint };
179+
unsafe { *mut_offset(sp, 0) = arg as uint };
195180
let sp = mut_offset(sp, -1);
196181
unsafe { *sp = 0 }; // The final return address
197182

@@ -215,14 +200,18 @@ fn new_regs() -> ~Registers { ~([0, .. 34]) }
215200
fn new_regs() -> ~Registers { ~([0, .. 22]) }
216201

217202
#[cfg(target_arch = "x86_64")]
218-
fn initialize_call_frame(regs: &mut Registers, fptr: *c_void, arg: *c_void,
219-
sp: *mut uint) {
203+
fn initialize_call_frame(regs: &mut Registers, fptr: InitFn, arg: uint,
204+
procedure: raw::Procedure, sp: *mut uint) {
205+
extern { fn rust_bootstrap_green_task(); }
220206

221207
// Redefinitions from rt/arch/x86_64/regs.h
222-
static RUSTRT_ARG0: uint = 3;
223208
static RUSTRT_RSP: uint = 1;
224209
static RUSTRT_IP: uint = 8;
225210
static RUSTRT_RBP: uint = 2;
211+
static RUSTRT_R12: uint = 4;
212+
static RUSTRT_R13: uint = 5;
213+
static RUSTRT_R14: uint = 6;
214+
static RUSTRT_R15: uint = 7;
226215

227216
let sp = align_down(sp);
228217
let sp = mut_offset(sp, -1);
@@ -231,13 +220,23 @@ fn initialize_call_frame(regs: &mut Registers, fptr: *c_void, arg: *c_void,
231220
unsafe { *sp = 0; }
232221

233222
rtdebug!("creating call frame");
234-
rtdebug!("fptr {}", fptr);
235-
rtdebug!("arg {}", arg);
223+
rtdebug!("fptr {:#x}", fptr as uint);
224+
rtdebug!("arg {:#x}", arg);
236225
rtdebug!("sp {}", sp);
237226

238-
regs[RUSTRT_ARG0] = arg as uint;
227+
// These registers are frobbed by rust_bootstrap_green_task into the right
228+
// location so we can invoke the "real init function", `fptr`.
229+
regs[RUSTRT_R12] = arg as uint;
230+
regs[RUSTRT_R13] = procedure.code as uint;
231+
regs[RUSTRT_R14] = procedure.env as uint;
232+
regs[RUSTRT_R15] = fptr as uint;
233+
234+
// These registers are picked up by the regulard context switch paths. These
235+
// will put us in "mostly the right context" except for frobbing all the
236+
// arguments to the right place. We have the small trampoline code inside of
237+
// rust_bootstrap_green_task to do that.
239238
regs[RUSTRT_RSP] = sp as uint;
240-
regs[RUSTRT_IP] = fptr as uint;
239+
regs[RUSTRT_IP] = rust_bootstrap_green_task as uint;
241240

242241
// Last base pointer on the stack should be 0
243242
regs[RUSTRT_RBP] = 0;
@@ -250,18 +249,26 @@ type Registers = [uint, ..32];
250249
fn new_regs() -> ~Registers { ~([0, .. 32]) }
251250

252251
#[cfg(target_arch = "arm")]
253-
fn initialize_call_frame(regs: &mut Registers, fptr: *c_void, arg: *c_void,
254-
sp: *mut uint) {
252+
fn initialize_call_frame(regs: &mut Registers, fptr: InitFn, arg: uint,
253+
procedure: raw::Procedure, sp: *mut uint) {
254+
extern { fn rust_bootstrap_green_task(); }
255+
255256
let sp = align_down(sp);
256257
// sp of arm eabi is 8-byte aligned
257258
let sp = mut_offset(sp, -2);
258259

259260
// The final return address. 0 indicates the bottom of the stack
260261
unsafe { *sp = 0; }
261262

262-
regs[0] = arg as uint; // r0
263-
regs[13] = sp as uint; // #53 sp, r13
264-
regs[14] = fptr as uint; // #60 pc, r15 --> lr
263+
// ARM uses the same technique as x86_64 to have a landing pad for the start
264+
// of all new green tasks. Neither r1/r2 are saved on a context switch, so
265+
// the shim will copy r3/r4 into r1/r2 and then execute the function in r5
266+
regs[0] = arg as uint; // r0
267+
regs[3] = procedure.code as uint; // r3
268+
regs[4] = procedure.env as uint; // r4
269+
regs[5] = fptr as uint; // r5
270+
regs[13] = sp as uint; // #52 sp, r13
271+
regs[14] = rust_bootstrap_green_task as uint; // #56 pc, r14 --> lr
265272
}
266273

267274
#[cfg(target_arch = "mips")]
@@ -271,8 +278,8 @@ type Registers = [uint, ..32];
271278
fn new_regs() -> ~Registers { ~([0, .. 32]) }
272279

273280
#[cfg(target_arch = "mips")]
274-
fn initialize_call_frame(regs: &mut Registers, fptr: *c_void, arg: *c_void,
275-
sp: *mut uint) {
281+
fn initialize_call_frame(regs: &mut Registers, fptr: InitFn, arg: uint,
282+
procedure: raw::Procedure, sp: *mut uint) {
276283
let sp = align_down(sp);
277284
// sp of mips o32 is 8-byte aligned
278285
let sp = mut_offset(sp, -2);

src/libgreen/coroutine.rs

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@
1111
// Coroutines represent nothing more than a context and a stack
1212
// segment.
1313

14-
use std::rt::env;
15-
1614
use context::Context;
1715
use stack::{StackPool, Stack};
1816

@@ -31,22 +29,6 @@ pub struct Coroutine {
3129
}
3230

3331
impl Coroutine {
34-
pub fn new(stack_pool: &mut StackPool,
35-
stack_size: Option<uint>,
36-
start: proc())
37-
-> Coroutine {
38-
let stack_size = match stack_size {
39-
Some(size) => size,
40-
None => env::min_stack()
41-
};
42-
let mut stack = stack_pool.take_stack(stack_size);
43-
let initial_context = Context::new(start, &mut stack);
44-
Coroutine {
45-
current_stack_segment: stack,
46-
saved_context: initial_context
47-
}
48-
}
49-
5032
pub fn empty() -> Coroutine {
5133
Coroutine {
5234
current_stack_segment: unsafe { Stack::dummy_stack() },

src/libgreen/sched.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -756,7 +756,7 @@ impl Scheduler {
756756

757757
/// Called by a running task to end execution, after which it will
758758
/// be recycled by the scheduler for reuse in a new task.
759-
pub fn terminate_current_task(mut ~self, cur: ~GreenTask) {
759+
pub fn terminate_current_task(mut ~self, cur: ~GreenTask) -> ! {
760760
// Similar to deschedule running task and then, but cannot go through
761761
// the task-blocking path. The task is already dying.
762762
let stask = self.sched_task.take_unwrap();

src/libgreen/stack.rs

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,9 +138,9 @@ impl StackPool {
138138

139139
pub fn take_stack(&mut self, min_size: uint) -> Stack {
140140
// Ideally this would be a binary search
141-
match self.stacks.iter().position(|s| s.min_size < min_size) {
141+
match self.stacks.iter().position(|s| min_size <= s.min_size) {
142142
Some(idx) => self.stacks.swap_remove(idx),
143-
None => Stack::new(min_size)
143+
None => Stack::new(min_size)
144144
}
145145
}
146146

@@ -156,3 +156,33 @@ extern {
156156
end: *libc::uintptr_t) -> libc::c_uint;
157157
fn rust_valgrind_stack_deregister(id: libc::c_uint);
158158
}
159+
160+
#[cfg(test)]
161+
mod tests {
162+
use super::StackPool;
163+
164+
#[test]
165+
fn stack_pool_caches() {
166+
let mut p = StackPool::new();
167+
let s = p.take_stack(10);
168+
p.give_stack(s);
169+
let s = p.take_stack(4);
170+
assert_eq!(s.min_size, 10);
171+
p.give_stack(s);
172+
let s = p.take_stack(14);
173+
assert_eq!(s.min_size, 14);
174+
p.give_stack(s);
175+
}
176+
177+
#[test]
178+
fn stack_pool_caches_exact() {
179+
let mut p = StackPool::new();
180+
let mut s = p.take_stack(10);
181+
s.valgrind_id = 100;
182+
p.give_stack(s);
183+
184+
let s = p.take_stack(10);
185+
assert_eq!(s.min_size, 10);
186+
assert_eq!(s.valgrind_id, 100);
187+
}
188+
}

0 commit comments

Comments
 (0)