summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJosh Gao2018-01-24 16:23:42 -0600
committerJosh Gao2018-02-09 17:35:40 -0600
commitc531ed6648b99e88061cd41e7948f1f4e2dcef03 (patch)
treefece00aa1d36274ea8cf939490bea0d91f3e045c /debuggerd
parentbac72c884f3ae08aa26c62cc94d165c198accd64 (diff)
downloadplatform-system-core-c531ed6648b99e88061cd41e7948f1f4e2dcef03.tar.gz
platform-system-core-c531ed6648b99e88061cd41e7948f1f4e2dcef03.tar.xz
platform-system-core-c531ed6648b99e88061cd41e7948f1f4e2dcef03.zip
debuggerd_fallback: fix race.
A race condition occurs when one thread takes more than a second to get scheduled to handle the signal we send to ask it to dump its stack. When this happens, the main thread will continue on, close the fd, and then ask the next thread to dump, but the slow thread will then wake up and try to write to the new thread's fd, or trigger an assertion in __linker_enable_fallback_allocator. Do a few things to make this less bad: - encode both target tid and fd in the shared atomic, so that we know who each fd is for - switch __linker_enable_fallback_allocator to return success instead of aborting, and bail out if it's already in use - write to the output fd right when we get to it, instead of doing it whenever the dumping code decides to, to reduce the likelihood that the timeout expires Test: debuggerd_test Change-Id: Ife0f6dae388b601e7f991605f14d7a0274013f6b
Diffstat (limited to 'debuggerd')
-rw-r--r--debuggerd/handler/debuggerd_fallback.cpp105
1 files changed, 89 insertions, 16 deletions
diff --git a/debuggerd/handler/debuggerd_fallback.cpp b/debuggerd/handler/debuggerd_fallback.cpp
index 5fddddcf2..364fca5e8 100644
--- a/debuggerd/handler/debuggerd_fallback.cpp
+++ b/debuggerd/handler/debuggerd_fallback.cpp
@@ -55,7 +55,7 @@
55using android::base::unique_fd; 55using android::base::unique_fd;
56using unwindstack::Regs; 56using unwindstack::Regs;
57 57
58extern "C" void __linker_enable_fallback_allocator(); 58extern "C" bool __linker_enable_fallback_allocator();
59extern "C" void __linker_disable_fallback_allocator(); 59extern "C" void __linker_disable_fallback_allocator();
60 60
61// This is incredibly sketchy to do inside of a signal handler, especially when libbacktrace 61// This is incredibly sketchy to do inside of a signal handler, especially when libbacktrace
@@ -65,7 +65,11 @@ extern "C" void __linker_disable_fallback_allocator();
65// This isn't the default method of dumping because it can fail in cases such as address space 65// This isn't the default method of dumping because it can fail in cases such as address space
66// exhaustion. 66// exhaustion.
67static void debuggerd_fallback_trace(int output_fd, ucontext_t* ucontext) { 67static void debuggerd_fallback_trace(int output_fd, ucontext_t* ucontext) {
68 __linker_enable_fallback_allocator(); 68 if (!__linker_enable_fallback_allocator()) {
69 async_safe_format_log(ANDROID_LOG_ERROR, "libc", "fallback allocator already in use");
70 return;
71 }
72
69 { 73 {
70 std::unique_ptr<Regs> regs; 74 std::unique_ptr<Regs> regs;
71 75
@@ -84,7 +88,11 @@ static void debuggerd_fallback_trace(int output_fd, ucontext_t* ucontext) {
84 88
85static void debuggerd_fallback_tombstone(int output_fd, ucontext_t* ucontext, siginfo_t* siginfo, 89static void debuggerd_fallback_tombstone(int output_fd, ucontext_t* ucontext, siginfo_t* siginfo,
86 void* abort_message) { 90 void* abort_message) {
87 __linker_enable_fallback_allocator(); 91 if (!__linker_enable_fallback_allocator()) {
92 async_safe_format_log(ANDROID_LOG_ERROR, "libc", "fallback allocator already in use");
93 return;
94 }
95
88 engrave_tombstone_ucontext(output_fd, reinterpret_cast<uintptr_t>(abort_message), siginfo, 96 engrave_tombstone_ucontext(output_fd, reinterpret_cast<uintptr_t>(abort_message), siginfo,
89 ucontext); 97 ucontext);
90 __linker_disable_fallback_allocator(); 98 __linker_disable_fallback_allocator();
@@ -116,7 +124,7 @@ static void iterate_siblings(bool (*callback)(pid_t, int), int output_fd) {
116 closedir(dir); 124 closedir(dir);
117} 125}
118 126
119static bool forward_output(int src_fd, int dst_fd) { 127static bool forward_output(int src_fd, int dst_fd, pid_t expected_tid) {
120 // Make sure the thread actually got the signal. 128 // Make sure the thread actually got the signal.
121 struct pollfd pfd = { 129 struct pollfd pfd = {
122 .fd = src_fd, .events = POLLIN, 130 .fd = src_fd, .events = POLLIN,
@@ -127,6 +135,18 @@ static bool forward_output(int src_fd, int dst_fd) {
127 return false; 135 return false;
128 } 136 }
129 137
138 pid_t tid;
139 if (TEMP_FAILURE_RETRY(read(src_fd, &tid, sizeof(tid))) != sizeof(tid)) {
140 async_safe_format_log(ANDROID_LOG_ERROR, "libc", "failed to read tid");
141 return false;
142 }
143
144 if (tid != expected_tid) {
145 async_safe_format_log(ANDROID_LOG_ERROR, "libc", "received tid %d, expected %d", tid,
146 expected_tid);
147 return false;
148 }
149
130 while (true) { 150 while (true) {
131 char buf[512]; 151 char buf[512];
132 ssize_t rc = TEMP_FAILURE_RETRY(read(src_fd, buf, sizeof(buf))); 152 ssize_t rc = TEMP_FAILURE_RETRY(read(src_fd, buf, sizeof(buf)));
@@ -144,16 +164,54 @@ static bool forward_output(int src_fd, int dst_fd) {
144 } 164 }
145} 165}
146 166
167struct __attribute__((__packed__)) packed_thread_output {
168 int32_t tid;
169 int32_t fd;
170};
171
172static uint64_t pack_thread_fd(pid_t tid, int fd) {
173 packed_thread_output packed = {.tid = tid, .fd = fd};
174 uint64_t result;
175 static_assert(sizeof(packed) == sizeof(result));
176 memcpy(&result, &packed, sizeof(packed));
177 return result;
178}
179
180static std::pair<pid_t, int> unpack_thread_fd(uint64_t value) {
181 packed_thread_output result;
182 memcpy(&result, &value, sizeof(value));
183 return std::make_pair(result.tid, result.fd);
184}
185
147static void trace_handler(siginfo_t* info, ucontext_t* ucontext) { 186static void trace_handler(siginfo_t* info, ucontext_t* ucontext) {
148 static std::atomic<int> trace_output_fd(-1); 187 static std::atomic<uint64_t> trace_output(pack_thread_fd(-1, -1));
149 188
150 if (info->si_value.sival_int == ~0) { 189 if (info->si_value.sival_int == ~0) {
151 // Asked to dump by the original signal recipient. 190 // Asked to dump by the original signal recipient.
152 debuggerd_fallback_trace(trace_output_fd, ucontext); 191 uint64_t val = trace_output.load();
192 auto [tid, fd] = unpack_thread_fd(val);
193 if (tid != gettid()) {
194 // We received some other thread's info request?
195 async_safe_format_log(ANDROID_LOG_ERROR, "libc",
196 "thread %d received output fd for thread %d?", gettid(), tid);
197 return;
198 }
199
200 if (!trace_output.compare_exchange_strong(val, pack_thread_fd(-1, -1))) {
201 // Presumably, the timeout in forward_output expired, and the main thread moved on.
202 // If this happened, the main thread closed our fd for us, so just return.
203 async_safe_format_log(ANDROID_LOG_ERROR, "libc", "cmpxchg for thread %d failed", gettid());
204 return;
205 }
153 206
154 int tmp = trace_output_fd.load(); 207 // Write our tid to the output fd to let the main thread know that we're working.
155 trace_output_fd.store(-1); 208 if (TEMP_FAILURE_RETRY(write(fd, &tid, sizeof(tid))) == sizeof(tid)) {
156 close(tmp); 209 debuggerd_fallback_trace(fd, ucontext);
210 } else {
211 async_safe_format_log(ANDROID_LOG_ERROR, "libc", "failed to write to output fd");
212 }
213
214 close(fd);
157 return; 215 return;
158 } 216 }
159 217
@@ -189,7 +247,14 @@ static void trace_handler(siginfo_t* info, ucontext_t* ucontext) {
189 return false; 247 return false;
190 } 248 }
191 249
192 trace_output_fd.store(pipe_write.get()); 250 uint64_t expected = pack_thread_fd(-1, -1);
251 if (!trace_output.compare_exchange_strong(expected,
252 pack_thread_fd(tid, pipe_write.release()))) {
253 auto [tid, fd] = unpack_thread_fd(expected);
254 async_safe_format_log(ANDROID_LOG_ERROR, "libc",
255 "thread %d is already outputting to fd %d?", tid, fd);
256 return false;
257 }
193 258
194 siginfo_t siginfo = {}; 259 siginfo_t siginfo = {};
195 siginfo.si_code = SI_QUEUE; 260 siginfo.si_code = SI_QUEUE;
@@ -203,12 +268,20 @@ static void trace_handler(siginfo_t* info, ucontext_t* ucontext) {
203 return false; 268 return false;
204 } 269 }
205 270
206 bool success = forward_output(pipe_read.get(), output_fd); 271 bool success = forward_output(pipe_read.get(), output_fd, tid);
207 if (success) { 272 if (!success) {
208 // The signaled thread has closed trace_output_fd already. 273 async_safe_format_log(ANDROID_LOG_ERROR, "libc",
209 (void)pipe_write.release(); 274 "timeout expired while waiting for thread %d to dump", tid);
210 } else { 275 }
211 trace_output_fd.store(-1); 276
277 // Regardless of whether the poll succeeds, check to see if the thread took fd ownership.
278 uint64_t post_wait = trace_output.exchange(pack_thread_fd(-1, -1));
279 if (post_wait != pack_thread_fd(-1, -1)) {
280 auto [tid, fd] = unpack_thread_fd(post_wait);
281 if (fd != -1) {
282 async_safe_format_log(ANDROID_LOG_ERROR, "libc", "closing fd %d for thread %d", fd, tid);
283 close(fd);
284 }
212 } 285 }
213 286
214 return true; 287 return true;