aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorSteven Rostedt (VMware)2017-05-01 08:35:09 -0500
committerSteven Rostedt (VMware)2017-05-01 09:26:40 -0500
commit73a757e63114dfd765f1c5d1ff7e994f123d0234 (patch)
tree757adb3d5a56f4a22c8e98e106068fc6666d1969 /kernel
parentca2958f14c4706d5dced95f4f7dfe2bdd1b268de (diff)
downloadkernel-73a757e63114dfd765f1c5d1ff7e994f123d0234.tar.gz
kernel-73a757e63114dfd765f1c5d1ff7e994f123d0234.tar.xz
kernel-73a757e63114dfd765f1c5d1ff7e994f123d0234.zip
ring-buffer: Return reader page back into existing ring buffer
When reading the ring buffer for consuming, it is optimized for splice, where a page is taken out of the ring buffer (zero copy) and sent to the reading consumer. When the read is finished with the page, it calls ring_buffer_free_read_page(), which simply frees the page. The next time the reader needs to get a page from the ring buffer, it must call ring_buffer_alloc_read_page() which allocates and initializes a reader page for the ring buffer to be swapped into the ring buffer for a new filled page for the reader. The problem is that there's no reason to actually free the page when it is passed back to the ring buffer. It can hold it off and reuse it for the next iteration. This completely removes the interaction with the page_alloc mechanism. Using the trace-cmd utility to record all events (causing trace-cmd to require reading lots of pages from the ring buffer, and calling ring_buffer_alloc/free_read_page() several times), and also assigning a stack trace trigger to the mm_page_alloc event, we can see how many times the ring_buffer_alloc_read_page() needed to allocate a page for the ring buffer. Before this change: # trace-cmd record -e all -e mem_page_alloc -R stacktrace sleep 1 # trace-cmd report |grep ring_buffer_alloc_read_page | wc -l 9968 After this change: # trace-cmd record -e all -e mem_page_alloc -R stacktrace sleep 1 # trace-cmd report |grep ring_buffer_alloc_read_page | wc -l 4 Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/trace/ring_buffer.c40
-rw-r--r--kernel/trace/ring_buffer_benchmark.c2
-rw-r--r--kernel/trace/trace.c17
3 files changed, 50 insertions, 9 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 96fc3c043ad6..01b4ee5326cf 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -438,6 +438,7 @@ struct ring_buffer_per_cpu {
438 raw_spinlock_t reader_lock; /* serialize readers */ 438 raw_spinlock_t reader_lock; /* serialize readers */
439 arch_spinlock_t lock; 439 arch_spinlock_t lock;
440 struct lock_class_key lock_key; 440 struct lock_class_key lock_key;
441 struct buffer_data_page *free_page;
441 unsigned long nr_pages; 442 unsigned long nr_pages;
442 unsigned int current_context; 443 unsigned int current_context;
443 struct list_head *pages; 444 struct list_head *pages;
@@ -4377,9 +4378,25 @@ EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
4377 */ 4378 */
4378void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu) 4379void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu)
4379{ 4380{
4380 struct buffer_data_page *bpage; 4381 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
4382 struct buffer_data_page *bpage = NULL;
4383 unsigned long flags;
4381 struct page *page; 4384 struct page *page;
4382 4385
4386 local_irq_save(flags);
4387 arch_spin_lock(&cpu_buffer->lock);
4388
4389 if (cpu_buffer->free_page) {
4390 bpage = cpu_buffer->free_page;
4391 cpu_buffer->free_page = NULL;
4392 }
4393
4394 arch_spin_unlock(&cpu_buffer->lock);
4395 local_irq_restore(flags);
4396
4397 if (bpage)
4398 goto out;
4399
4383 page = alloc_pages_node(cpu_to_node(cpu), 4400 page = alloc_pages_node(cpu_to_node(cpu),
4384 GFP_KERNEL | __GFP_NORETRY, 0); 4401 GFP_KERNEL | __GFP_NORETRY, 0);
4385 if (!page) 4402 if (!page)
@@ -4387,6 +4404,7 @@ void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu)
4387 4404
4388 bpage = page_address(page); 4405 bpage = page_address(page);
4389 4406
4407 out:
4390 rb_init_page(bpage); 4408 rb_init_page(bpage);
4391 4409
4392 return bpage; 4410 return bpage;
@@ -4396,13 +4414,29 @@ EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page);
4396/** 4414/**
4397 * ring_buffer_free_read_page - free an allocated read page 4415 * ring_buffer_free_read_page - free an allocated read page
4398 * @buffer: the buffer the page was allocate for 4416 * @buffer: the buffer the page was allocate for
4417 * @cpu: the cpu buffer the page came from
4399 * @data: the page to free 4418 * @data: the page to free
4400 * 4419 *
4401 * Free a page allocated from ring_buffer_alloc_read_page. 4420 * Free a page allocated from ring_buffer_alloc_read_page.
4402 */ 4421 */
4403void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data) 4422void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data)
4404{ 4423{
4405 free_page((unsigned long)data); 4424 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
4425 struct buffer_data_page *bpage = data;
4426 unsigned long flags;
4427
4428 local_irq_save(flags);
4429 arch_spin_lock(&cpu_buffer->lock);
4430
4431 if (!cpu_buffer->free_page) {
4432 cpu_buffer->free_page = bpage;
4433 bpage = NULL;
4434 }
4435
4436 arch_spin_unlock(&cpu_buffer->lock);
4437 local_irq_restore(flags);
4438
4439 free_page((unsigned long)bpage);
4406} 4440}
4407EXPORT_SYMBOL_GPL(ring_buffer_free_read_page); 4441EXPORT_SYMBOL_GPL(ring_buffer_free_read_page);
4408 4442
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index c190a4d5013c..9fbcaf567886 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -171,7 +171,7 @@ static enum event_status read_page(int cpu)
171 } 171 }
172 } 172 }
173 } 173 }
174 ring_buffer_free_read_page(buffer, bpage); 174 ring_buffer_free_read_page(buffer, cpu, bpage);
175 175
176 if (ret < 0) 176 if (ret < 0)
177 return EVENT_DROPPED; 177 return EVENT_DROPPED;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 60c904fa5480..5b645b0fbbb8 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6054,6 +6054,7 @@ static int tracing_clock_open(struct inode *inode, struct file *file)
6054struct ftrace_buffer_info { 6054struct ftrace_buffer_info {
6055 struct trace_iterator iter; 6055 struct trace_iterator iter;
6056 void *spare; 6056 void *spare;
6057 unsigned int spare_cpu;
6057 unsigned int read; 6058 unsigned int read;
6058}; 6059};
6059 6060
@@ -6383,9 +6384,11 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
6383 return -EBUSY; 6384 return -EBUSY;
6384#endif 6385#endif
6385 6386
6386 if (!info->spare) 6387 if (!info->spare) {
6387 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer, 6388 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6388 iter->cpu_file); 6389 iter->cpu_file);
6390 info->spare_cpu = iter->cpu_file;
6391 }
6389 if (!info->spare) 6392 if (!info->spare)
6390 return -ENOMEM; 6393 return -ENOMEM;
6391 6394
@@ -6445,7 +6448,8 @@ static int tracing_buffers_release(struct inode *inode, struct file *file)
6445 __trace_array_put(iter->tr); 6448 __trace_array_put(iter->tr);
6446 6449
6447 if (info->spare) 6450 if (info->spare)
6448 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare); 6451 ring_buffer_free_read_page(iter->trace_buffer->buffer,
6452 info->spare_cpu, info->spare);
6449 kfree(info); 6453 kfree(info);
6450 6454
6451 mutex_unlock(&trace_types_lock); 6455 mutex_unlock(&trace_types_lock);
@@ -6456,6 +6460,7 @@ static int tracing_buffers_release(struct inode *inode, struct file *file)
6456struct buffer_ref { 6460struct buffer_ref {
6457 struct ring_buffer *buffer; 6461 struct ring_buffer *buffer;
6458 void *page; 6462 void *page;
6463 int cpu;
6459 int ref; 6464 int ref;
6460}; 6465};
6461 6466
@@ -6467,7 +6472,7 @@ static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6467 if (--ref->ref) 6472 if (--ref->ref)
6468 return; 6473 return;
6469 6474
6470 ring_buffer_free_read_page(ref->buffer, ref->page); 6475 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6471 kfree(ref); 6476 kfree(ref);
6472 buf->private = 0; 6477 buf->private = 0;
6473} 6478}
@@ -6501,7 +6506,7 @@ static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6501 if (--ref->ref) 6506 if (--ref->ref)
6502 return; 6507 return;
6503 6508
6504 ring_buffer_free_read_page(ref->buffer, ref->page); 6509 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6505 kfree(ref); 6510 kfree(ref);
6506 spd->partial[i].private = 0; 6511 spd->partial[i].private = 0;
6507} 6512}
@@ -6566,11 +6571,13 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6566 kfree(ref); 6571 kfree(ref);
6567 break; 6572 break;
6568 } 6573 }
6574 ref->cpu = iter->cpu_file;
6569 6575
6570 r = ring_buffer_read_page(ref->buffer, &ref->page, 6576 r = ring_buffer_read_page(ref->buffer, &ref->page,
6571 len, iter->cpu_file, 1); 6577 len, iter->cpu_file, 1);
6572 if (r < 0) { 6578 if (r < 0) {
6573 ring_buffer_free_read_page(ref->buffer, ref->page); 6579 ring_buffer_free_read_page(ref->buffer, ref->cpu,
6580 ref->page);
6574 kfree(ref); 6581 kfree(ref);
6575 break; 6582 break;
6576 } 6583 }