Skip to content

Commit f06935b

Browse files
Have the main thread take the role of thread zero when running the threadpool.
1 parent 4a293e1 commit f06935b

File tree

1 file changed

+53
-30
lines changed

1 file changed

+53
-30
lines changed

src/threadpool.c

Lines changed: 53 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,6 @@ static int pooled_thread_func(void *void_arg) {
254254
API_FUNC hw_pool_init_status hw_pool_init(uint32_t num_threads) {
255255
if unlikely (!num_threads) return POOL_INIT_NO_THREADS_SPECIFIED;
256256
uint32_t old = 0u;
257-
assert(num_threads < UINT32_MAX);
258257
if unlikely (!atomic_compare_exchange_strong_explicit(&hw_pool.num_threads,
259258
&old,
260259
num_threads,
@@ -284,16 +283,24 @@ API_FUNC hw_pool_init_status hw_pool_init(uint32_t num_threads) {
284283
pooled_thread_control *thread_control =
285284
(pooled_thread_control *)(buffer + alignment * (size_t)i);
286285
init_thread_control(thread_control, i, &hw_pool);
287-
int status;
286+
if (i) {
287+
int status;
288288
#ifdef QPOOL_USE_PTHREADS
289-
status = pthread_create(
290-
&thread_control->thread, &attr, pooled_thread_func, thread_control);
291-
if unlikely (status) goto cleanup_threads;
289+
status = pthread_create(
290+
&thread_control->thread, &attr, pooled_thread_func, thread_control);
291+
if unlikely (status) goto cleanup_threads;
292292
#else
293-
status =
294-
thrd_create(&thread_control->thread, pooled_thread_func, thread_control);
295-
if unlikely (status != thrd_success) goto cleanup_threads;
293+
status = thrd_create(
294+
&thread_control->thread, pooled_thread_func, thread_control);
295+
if unlikely (status != thrd_success) goto cleanup_threads;
296296
#endif
297+
} else {
298+
#ifndef NDEBUG
299+
// Zero out the allocated thread object to make extra sure we never access
300+
// it. It still has to be there to satisfy alignment constraints.
301+
&thread_control->thread = 0ul;
302+
#endif
303+
}
297304
++i;
298305
}
299306
#ifdef QPOOL_USE_PTHREADS
@@ -303,7 +310,13 @@ API_FUNC hw_pool_init_status hw_pool_init(uint32_t num_threads) {
303310
return POOL_INIT_SUCCESS;
304311
cleanup_threads:
305312
if (i) {
313+
// Last thread failed to launch, so no need to clean it up.
314+
// If an error was raised it would have been at an iteration
315+
// higher than 0 for the thread create loop since no thread is
316+
// created at 0.
306317
uint32_t j = --i;
318+
// current thread does the work of worker zero so
319+
// no need to signal or join for that one.
307320
while (i) {
308321
// TODO: fix deinit to match new layout and interrupt mechanism.
309322
pooled_thread_control *thread_control =
@@ -347,17 +360,17 @@ API_FUNC __attribute__((no_sanitize("memory"))) void hw_pool_destroy() {
347360
atomic_load_explicit(&hw_pool.num_threads, memory_order_relaxed);
348361
char *buffer = atomic_load_explicit(&hw_pool.threads, memory_order_relaxed);
349362
size_t alignment = QTHREAD_MAX((size_t)64u, get_cache_line_size());
350-
uint32_t i = num_threads;
363+
uint32_t i = num_threads - 1u;
364+
// Current thread is thread 0 so no need to notify/join that one.
351365
while (i) {
352-
--i;
353366
// TODO: fix deinit to match new layout and interrupt mechanism.
354367
pooled_thread_control *thread_control =
355368
(pooled_thread_control *)(buffer + alignment * (size_t)i);
356369
notify_worker_of_termination(thread_control);
370+
--i;
357371
}
358-
i = num_threads;
372+
i = num_threads - 1u;
359373
while (i) {
360-
--i;
361374
pooled_thread_control *thread_control =
362375
(pooled_thread_control *)(buffer + alignment * (size_t)i);
363376
// TODO: crash informatively if join fails somehow.
@@ -366,6 +379,7 @@ API_FUNC __attribute__((no_sanitize("memory"))) void hw_pool_destroy() {
366379
#else
367380
thrd_join(thread_control->thread, NULL);
368381
#endif
382+
--i;
369383
}
370384

371385
atomic_store_explicit(&hw_pool.threads, NULL, memory_order_relaxed);
@@ -379,31 +393,40 @@ API_FUNC uint32_t get_num_delegated_threads() {
379393
return 1;
380394
}
381395

382-
// TODO: have the main thread fill the role of thread 0.
383-
// Instead of having the main thread wait/resume, swap in its thread-locals
384-
// then have it run the per-thread function.
385-
// This will avoid the suspend/resume OS overheads for at least that thread.
396+
// Note: current thread fills the role of thread zero in the pool.
386397

387398
API_FUNC void
388399
pool_run_on_all(pool_header *pool, qt_threadpool_func_type func, void *arg) {
389400
uint32_t num_threads =
390401
atomic_load_explicit(&pool->num_threads, memory_order_relaxed);
391402
assert(num_threads);
392-
assert(num_threads < UINT32_MAX);
393-
char *buffer =
394-
(char *)atomic_load_explicit(&pool->threads, memory_order_relaxed);
395-
atomic_store_explicit(
396-
&pool->num_active_threads, num_threads, memory_order_relaxed);
397-
init_main_sync(pool);
398-
size_t alignment = QTHREAD_MAX((size_t)64u, get_cache_line_size());
399-
for (uint32_t i = 0u;
400-
i < atomic_load_explicit(&pool->num_threads, memory_order_relaxed);
401-
i++) {
402-
pooled_thread_control *thread_control =
403-
(pooled_thread_control *)(buffer + alignment * (size_t)i);
404-
launch_work_on_thread(thread_control, func, arg);
403+
if (num_threads > 1u) {
404+
char *buffer =
405+
(char *)atomic_load_explicit(&pool->threads, memory_order_relaxed);
406+
atomic_store_explicit(
407+
&pool->num_active_threads, num_threads - 1u, memory_order_relaxed);
408+
init_main_sync(pool);
409+
size_t alignment = QTHREAD_MAX((size_t)64u, get_cache_line_size());
410+
for (uint32_t i = 1u;
411+
i < atomic_load_explicit(&pool->num_threads, memory_order_relaxed);
412+
i++) {
413+
pooled_thread_control *thread_control =
414+
(pooled_thread_control *)(buffer + alignment * (size_t)i);
415+
launch_work_on_thread(thread_control, func, arg);
416+
}
417+
}
418+
uint32_t outer_index = context_index;
419+
context_index = 0u;
420+
pool_header *outer_delegated_pool = delegated_pool;
421+
delegated_pool = NULL;
422+
func(arg);
423+
delegated_pool = outer_delegated_pool;
424+
context_index = outer_index;
425+
if (num_threads > 1u) {
426+
// some loops may have threads that take dramatically longer
427+
// so we still suspend, but it's potentially for much less time.
428+
suspend_main_while_working(pool);
405429
}
406-
suspend_main_while_working(pool);
407430
}
408431

409432
API_FUNC void run_on_current_pool(qt_threadpool_func_type func, void *arg) {

0 commit comments

Comments
 (0)