From: Yabin Cui Date: Thu, 4 Dec 2014 05:36:24 +0000 (-0800) Subject: Use mmap to create the pthread_internal_t X-Git-Url: https://git.ti.com/gitweb?p=android-sdk%2Fplatform-bionic.git;a=commitdiff_plain;h=8cf1b305670123aed7638d984ca39bfd22388440;hp=c631bb215e29981222f19c092ded49c7c1f15845;ds=sidebyside Use mmap to create the pthread_internal_t Add name to mmaped regions. Add pthread benchmark code. Allocate pthread_internal_t on regular stack. Bug: 16847284 Change-Id: Id60835163bb0d68092241f1a118015b5a8f85069 --- diff --git a/benchmarks/pthread_benchmark.cpp b/benchmarks/pthread_benchmark.cpp index 92e59982..42023e04 100644 --- a/benchmarks/pthread_benchmark.cpp +++ b/benchmarks/pthread_benchmark.cpp @@ -47,6 +47,21 @@ static void BM_pthread_getspecific(int iters) { } BENCHMARK(BM_pthread_getspecific); +static void BM_pthread_setspecific(int iters) { + StopBenchmarkTiming(); + pthread_key_t key; + pthread_key_create(&key, NULL); + StartBenchmarkTiming(); + + for (int i = 0; i < iters; ++i) { + pthread_setspecific(key, NULL); + } + + StopBenchmarkTiming(); + pthread_key_delete(key); +} +BENCHMARK(BM_pthread_setspecific); + static void DummyPthreadOnceInitFunction() { } @@ -137,3 +152,80 @@ static void BM_pthread_rw_lock_write(int iters) { pthread_rwlock_destroy(&lock); } BENCHMARK(BM_pthread_rw_lock_write); + +static void* IdleThread(void*) { + return NULL; +} + +static void BM_pthread_create(int iters) { + StopBenchmarkTiming(); + pthread_t thread; + + for (int i = 0; i < iters; ++i) { + StartBenchmarkTiming(); + pthread_create(&thread, NULL, IdleThread, NULL); + StopBenchmarkTiming(); + pthread_join(thread, NULL); + } +} +BENCHMARK(BM_pthread_create); + +static void* RunThread(void*) { + StopBenchmarkTiming(); + return NULL; +} + +static void BM_pthread_create_and_run(int iters) { + StopBenchmarkTiming(); + pthread_t thread; + + for (int i = 0; i < iters; ++i) { + StartBenchmarkTiming(); + pthread_create(&thread, NULL, RunThread, NULL); + pthread_join(thread, NULL); + } +} +BENCHMARK(BM_pthread_create_and_run); + +static void* ExitThread(void*) { + StartBenchmarkTiming(); + pthread_exit(NULL); +} + +static void BM_pthread_exit_and_join(int iters) { + StopBenchmarkTiming(); + pthread_t thread; + + for (int i = 0; i < iters; ++i) { + pthread_create(&thread, NULL, ExitThread, NULL); + pthread_join(thread, NULL); + StopBenchmarkTiming(); + } +} +BENCHMARK(BM_pthread_exit_and_join); + +static void BM_pthread_key_create(int iters) { + StopBenchmarkTiming(); + pthread_key_t key; + + for (int i = 0; i < iters; ++i) { + StartBenchmarkTiming(); + pthread_key_create(&key, NULL); + StopBenchmarkTiming(); + pthread_key_delete(key); + } +} +BENCHMARK(BM_pthread_key_create); + +static void BM_pthread_key_delete(int iters) { + StopBenchmarkTiming(); + pthread_key_t key; + + for (int i = 0; i < iters; ++i) { + pthread_key_create(&key, NULL); + StartBenchmarkTiming(); + pthread_key_delete(key); + StopBenchmarkTiming(); + } +} +BENCHMARK(BM_pthread_key_delete); diff --git a/libc/bionic/libc_init_common.cpp b/libc/bionic/libc_init_common.cpp index 2a6a03b5..15b3fd5e 100644 --- a/libc/bionic/libc_init_common.cpp +++ b/libc/bionic/libc_init_common.cpp @@ -74,9 +74,7 @@ uintptr_t __stack_chk_guard = 0; void __libc_init_tls(KernelArgumentBlock& args) { __libc_auxv = args.auxv; - static void* tls[BIONIC_TLS_SLOTS]; static pthread_internal_t main_thread; - main_thread.tls = tls; // Tell the kernel to clear our tid field when we exit, so we're like any other pthread. // As a side-effect, this tells us our pid (which is the same as the main thread's tid). @@ -96,7 +94,7 @@ void __libc_init_tls(KernelArgumentBlock& args) { __init_thread(&main_thread, false); __init_tls(&main_thread); __set_tls(main_thread.tls); - tls[TLS_SLOT_BIONIC_PREINIT] = &args; + main_thread.tls[TLS_SLOT_BIONIC_PREINIT] = &args; __init_alternate_signal_stack(&main_thread); } diff --git a/libc/bionic/pthread_create.cpp b/libc/bionic/pthread_create.cpp index c99e69c9..c47b7504 100644 --- a/libc/bionic/pthread_create.cpp +++ b/libc/bionic/pthread_create.cpp @@ -35,6 +35,7 @@ #include "pthread_internal.h" #include "private/bionic_macros.h" +#include "private/bionic_prctl.h" #include "private/bionic_ssp.h" #include "private/bionic_tls.h" #include "private/libc_logging.h" @@ -72,6 +73,10 @@ void __init_alternate_signal_stack(pthread_internal_t* thread) { ss.ss_flags = 0; sigaltstack(&ss, NULL); thread->alternate_signal_stack = ss.ss_sp; + + // We can only use const static allocated string for mapped region name, as Android kernel + // uses the string pointer directly when dumping /proc/pid/maps. + prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, ss.ss_sp, ss.ss_size, "thread signal stack"); } } @@ -101,31 +106,64 @@ int __init_thread(pthread_internal_t* thread, bool add_to_thread_list) { return error; } -static void* __create_thread_stack(pthread_internal_t* thread) { +static void* __create_thread_stack(const pthread_attr_t& attr) { // Create a new private anonymous map. int prot = PROT_READ | PROT_WRITE; int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; - void* stack = mmap(NULL, thread->attr.stack_size, prot, flags, -1, 0); + void* stack = mmap(NULL, attr.stack_size, prot, flags, -1, 0); if (stack == MAP_FAILED) { __libc_format_log(ANDROID_LOG_WARN, "libc", "pthread_create failed: couldn't allocate %zd-byte stack: %s", - thread->attr.stack_size, strerror(errno)); + attr.stack_size, strerror(errno)); return NULL; } // Set the guard region at the end of the stack to PROT_NONE. - if (mprotect(stack, thread->attr.guard_size, PROT_NONE) == -1) { + if (mprotect(stack, attr.guard_size, PROT_NONE) == -1) { __libc_format_log(ANDROID_LOG_WARN, "libc", "pthread_create failed: couldn't mprotect PROT_NONE %zd-byte stack guard region: %s", - thread->attr.guard_size, strerror(errno)); - munmap(stack, thread->attr.stack_size); + attr.guard_size, strerror(errno)); + munmap(stack, attr.stack_size); return NULL; } return stack; } +static int __allocate_thread(pthread_attr_t* attr, pthread_internal_t** threadp, void** child_stack) { + if (attr->stack_base == NULL) { + // The caller didn't provide a stack, so allocate one. + // Make sure the stack size and guard size are multiples of PAGE_SIZE. + attr->stack_size = BIONIC_ALIGN(attr->stack_size, PAGE_SIZE); + attr->guard_size = BIONIC_ALIGN(attr->guard_size, PAGE_SIZE); + attr->stack_base = __create_thread_stack(*attr); + if (attr->stack_base == NULL) { + return EAGAIN; + } + } else { + // The caller did provide a stack, so remember we're not supposed to free it. + attr->flags |= PTHREAD_ATTR_FLAG_USER_ALLOCATED_STACK; + } + + // Thread stack is used for two sections: + // pthread_internal_t. + // regular stack, from top to down. + uint8_t* stack_top = reinterpret_cast(attr->stack_base) + attr->stack_size; + stack_top -= sizeof(pthread_internal_t); + pthread_internal_t* thread = reinterpret_cast(stack_top); + + // No need to check stack_top alignment. The size of pthread_internal_t is 16-bytes aligned, + // and user allocated stack is guaranteed by pthread_attr_setstack. + + thread->attr = *attr; + __init_tls(thread); + + *threadp = thread; + *child_stack = stack_top; + return 0; +} + static int __pthread_start(void* arg) { pthread_internal_t* thread = reinterpret_cast(arg); @@ -158,43 +196,21 @@ int pthread_create(pthread_t* thread_out, pthread_attr_t const* attr, // Inform the rest of the C library that at least one thread was created. __isthreaded = 1; - pthread_internal_t* thread = __create_thread_struct(); - if (thread == NULL) { - return EAGAIN; - } - + pthread_attr_t thread_attr; if (attr == NULL) { - pthread_attr_init(&thread->attr); + pthread_attr_init(&thread_attr); } else { - thread->attr = *attr; + thread_attr = *attr; attr = NULL; // Prevent misuse below. } - // Make sure the stack size and guard size are multiples of PAGE_SIZE. - thread->attr.stack_size = BIONIC_ALIGN(thread->attr.stack_size, PAGE_SIZE); - thread->attr.guard_size = BIONIC_ALIGN(thread->attr.guard_size, PAGE_SIZE); - - if (thread->attr.stack_base == NULL) { - // The caller didn't provide a stack, so allocate one. - thread->attr.stack_base = __create_thread_stack(thread); - if (thread->attr.stack_base == NULL) { - __free_thread_struct(thread); - return EAGAIN; - } - } else { - // The caller did provide a stack, so remember we're not supposed to free it. - thread->attr.flags |= PTHREAD_ATTR_FLAG_USER_ALLOCATED_STACK; + pthread_internal_t* thread = NULL; + void* child_stack = NULL; + int result = __allocate_thread(&thread_attr, &thread, &child_stack); + if (result != 0) { + return result; } - // Make room for the TLS area. - // The child stack is the same address, just growing in the opposite direction. - // At offsets >= 0, we have the TLS slots. - // At offsets < 0, we have the child stack. - thread->tls = reinterpret_cast(reinterpret_cast(thread->attr.stack_base) + - thread->attr.stack_size - BIONIC_ALIGN(BIONIC_TLS_SLOTS * sizeof(void*), 16)); - void* child_stack = thread->tls; - __init_tls(thread); - // Create a mutex for the thread in TLS to wait on once it starts so we can keep // it from doing anything until after we notify the debugger about it // @@ -211,7 +227,7 @@ int pthread_create(pthread_t* thread_out, pthread_attr_t const* attr, int flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM | CLONE_SETTLS | CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID; - void* tls = thread->tls; + void* tls = reinterpret_cast(thread->tls); #if defined(__i386__) // On x86 (but not x86-64), CLONE_SETTLS takes a pointer to a struct user_desc rather than // a pointer to the TLS itself. @@ -229,7 +245,6 @@ int pthread_create(pthread_t* thread_out, pthread_attr_t const* attr, if (!thread->user_allocated_stack()) { munmap(thread->attr.stack_base, thread->attr.stack_size); } - __free_thread_struct(thread); __libc_format_log(ANDROID_LOG_WARN, "libc", "pthread_create failed: clone failed: %s", strerror(errno)); return clone_errno; } diff --git a/libc/bionic/pthread_detach.cpp b/libc/bionic/pthread_detach.cpp index a8608e3a..715acf13 100644 --- a/libc/bionic/pthread_detach.cpp +++ b/libc/bionic/pthread_detach.cpp @@ -46,7 +46,7 @@ int pthread_detach(pthread_t t) { if (thread->tid == 0) { // Already exited; clean up. - _pthread_internal_remove_locked(thread.get()); + _pthread_internal_remove_locked(thread.get(), true); return 0; } diff --git a/libc/bionic/pthread_exit.cpp b/libc/bionic/pthread_exit.cpp index a6bb3631..e04cf8e7 100644 --- a/libc/bionic/pthread_exit.cpp +++ b/libc/bionic/pthread_exit.cpp @@ -90,7 +90,7 @@ void pthread_exit(void* return_value) { // Keep track of what we need to know about the stack before we lose the pthread_internal_t. void* stack_base = thread->attr.stack_base; size_t stack_size = thread->attr.stack_size; - bool user_allocated_stack = thread->user_allocated_stack(); + bool free_stack = false; pthread_mutex_lock(&g_thread_list_lock); if ((thread->attr.flags & PTHREAD_ATTR_FLAG_DETACHED) != 0) { @@ -98,24 +98,18 @@ void pthread_exit(void* return_value) { // First make sure that the kernel does not try to clear the tid field // because we'll have freed the memory before the thread actually exits. __set_tid_address(NULL); - _pthread_internal_remove_locked(thread); - } else { - // Make sure that the pthread_internal_t doesn't have stale pointers to a stack that - // will be unmapped after the exit call below. - if (!user_allocated_stack) { - thread->attr.stack_base = NULL; - thread->attr.stack_size = 0; - thread->tls = NULL; + + // pthread_internal_t is freed below with stack, not here. + _pthread_internal_remove_locked(thread, false); + if (!thread->user_allocated_stack()) { + free_stack = true; } - // pthread_join is responsible for destroying the pthread_internal_t for non-detached threads. - // The kernel will futex_wake on the pthread_internal_t::tid field to wake pthread_join. } pthread_mutex_unlock(&g_thread_list_lock); - if (user_allocated_stack) { - // Cleaning up this thread's stack is the creator's responsibility, not ours. - __exit(0); - } else { + // Detached threads exit with stack teardown, and everything deallocated here. + // Threads that can be joined exit but leave their stacks for the pthread_join caller to clean up. + if (free_stack) { // We need to munmap the stack we're running on before calling exit. // That's not something we can do in C. @@ -126,5 +120,7 @@ void pthread_exit(void* return_value) { sigprocmask(SIG_SETMASK, &mask, NULL); _exit_with_stack_teardown(stack_base, stack_size); + } else { + __exit(0); } } diff --git a/libc/bionic/pthread_internal.h b/libc/bionic/pthread_internal.h index c5136c93..95097b7e 100644 --- a/libc/bionic/pthread_internal.h +++ b/libc/bionic/pthread_internal.h @@ -30,6 +30,8 @@ #include +#include "private/bionic_tls.h" + /* Has the thread been detached by a pthread_join or pthread_detach call? */ #define PTHREAD_ATTR_FLAG_DETACHED 0x00000001 @@ -72,8 +74,6 @@ struct pthread_internal_t { return (attr.flags & PTHREAD_ATTR_FLAG_USER_ALLOCATED_STACK) != 0; } - void** tls; - pthread_attr_t attr; __pthread_cleanup_t* cleanup_stack; @@ -86,16 +86,16 @@ struct pthread_internal_t { pthread_mutex_t startup_handshake_mutex; + void* tls[BIONIC_TLS_SLOTS]; + /* * The dynamic linker implements dlerror(3), which makes it hard for us to implement this * per-thread buffer by simply using malloc(3) and free(3). */ #define __BIONIC_DLERROR_BUFFER_SIZE 512 char dlerror_buffer[__BIONIC_DLERROR_BUFFER_SIZE]; -}; +} __attribute__((aligned(16))); // Align it as thread stack top below it should be aligned. -__LIBC_HIDDEN__ pthread_internal_t* __create_thread_struct(); -__LIBC_HIDDEN__ void __free_thread_struct(pthread_internal_t*); __LIBC_HIDDEN__ int __init_thread(pthread_internal_t* thread, bool add_to_thread_list); __LIBC_HIDDEN__ void __init_tls(pthread_internal_t* thread); __LIBC_HIDDEN__ void __init_alternate_signal_stack(pthread_internal_t*); @@ -105,7 +105,7 @@ __LIBC_HIDDEN__ void _pthread_internal_add(pthread_internal_t* thread); extern "C" __LIBC64_HIDDEN__ pthread_internal_t* __get_thread(void); __LIBC_HIDDEN__ void pthread_key_clean_all(void); -__LIBC_HIDDEN__ void _pthread_internal_remove_locked(pthread_internal_t* thread); +__LIBC_HIDDEN__ void _pthread_internal_remove_locked(pthread_internal_t* thread, bool free_thread); /* * Traditionally we gave threads a 1MiB stack. When we started diff --git a/libc/bionic/pthread_internals.cpp b/libc/bionic/pthread_internals.cpp index 33cddd74..7c30e6e4 100644 --- a/libc/bionic/pthread_internals.cpp +++ b/libc/bionic/pthread_internals.cpp @@ -41,26 +41,7 @@ pthread_internal_t* g_thread_list = NULL; pthread_mutex_t g_thread_list_lock = PTHREAD_MUTEX_INITIALIZER; -pthread_internal_t* __create_thread_struct() { - void* result = mmap(NULL, sizeof(pthread_internal_t), PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0); - if (result == MAP_FAILED) { - __libc_format_log(ANDROID_LOG_WARN, "libc", - "__create_thread_struct() failed: %s", strerror(errno)); - return NULL; - } - return reinterpret_cast(result); -} - -void __free_thread_struct(pthread_internal_t* thread) { - int result = munmap(thread, sizeof(pthread_internal_t)); - if (result != 0) { - __libc_format_log(ANDROID_LOG_WARN, "libc", - "__free_thread_struct() failed: %s", strerror(errno)); - } -} - -void _pthread_internal_remove_locked(pthread_internal_t* thread) { +void _pthread_internal_remove_locked(pthread_internal_t* thread, bool free_thread) { if (thread->next != NULL) { thread->next->prev = thread->prev; } @@ -70,10 +51,11 @@ void _pthread_internal_remove_locked(pthread_internal_t* thread) { g_thread_list = thread->next; } - // The main thread is not heap-allocated. See __libc_init_tls for the declaration, - // and __libc_init_common for the point where it's added to the thread list. - if ((thread->attr.flags & PTHREAD_ATTR_FLAG_MAIN_THREAD) == 0) { - __free_thread_struct(thread); + // For threads using user allocated stack (including the main thread), the pthread_internal_t + // can't be freed since it is on the stack. + if (free_thread && !(thread->attr.flags & PTHREAD_ATTR_FLAG_USER_ALLOCATED_STACK)) { + // Use one munmap to free the whole thread stack, including pthread_internal_t. + munmap(thread->attr.stack_base, thread->attr.stack_size); } } diff --git a/libc/bionic/pthread_join.cpp b/libc/bionic/pthread_join.cpp index 0cbed62b..e3350efd 100644 --- a/libc/bionic/pthread_join.cpp +++ b/libc/bionic/pthread_join.cpp @@ -74,6 +74,6 @@ int pthread_join(pthread_t t, void** return_value) { *return_value = thread->return_value; } - _pthread_internal_remove_locked(thread.get()); + _pthread_internal_remove_locked(thread.get(), true); return 0; }