@@ -1291,6 +1291,9 @@ extern int perf_output_begin(struct perf_output_handle *handle,
extern int perf_output_begin_forward(struct perf_output_handle *handle,
struct perf_event *event,
unsigned int size);
+extern int perf_output_begin_forward_in_page(struct perf_output_handle *handle,
+ struct perf_event *event,
+ unsigned int size);
extern int perf_output_begin_backward(struct perf_output_handle *handle,
struct perf_event *event,
unsigned int size);
@@ -116,9 +116,11 @@ ring_buffer_has_space(unsigned long head, unsigned long tail,
static __always_inline int
__perf_output_begin(struct perf_output_handle *handle,
struct perf_event *event, unsigned int size,
- bool backward)
+ bool backward, bool stay_in_page)
{
struct ring_buffer *rb;
+ unsigned int adj_size;
+ unsigned int gap_size;
unsigned long tail, offset, head;
int have_lost, page_shift;
struct {
@@ -144,6 +146,13 @@ __perf_output_begin(struct perf_output_handle *handle,
goto out;
}
+ page_shift = PAGE_SHIFT + page_order(rb);
+
+ if (unlikely(stay_in_page)) {
+ if (size > (1UL << page_shift))
+ goto out;
+ }
+
handle->rb = rb;
handle->event = event;
@@ -156,13 +165,24 @@ __perf_output_begin(struct perf_output_handle *handle,
perf_output_get_handle(handle);
+ gap_size = 0;
+ adj_size = size;
do {
tail = READ_ONCE(rb->user_page->data_tail);
offset = head = local_read(&rb->head);
+
+ if (unlikely(stay_in_page)) {
+ gap_size = (1UL << page_shift) -
+ (offset & ((1UL << page_shift) - 1));
+ if (gap_size < size)
+ adj_size += gap_size;
+ }
+
if (!rb->overwrite) {
if (unlikely(!ring_buffer_has_space(head, tail,
perf_data_size(rb),
- size, backward)))
+ adj_size,
+ backward)))
goto fail;
}
@@ -179,9 +199,9 @@ __perf_output_begin(struct perf_output_handle *handle,
*/
if (!backward)
- head += size;
+ head += adj_size;
else
- head -= size;
+ head -= adj_size;
} while (local_cmpxchg(&rb->head, offset, head) != offset);
if (backward) {
@@ -189,6 +209,22 @@ __perf_output_begin(struct perf_output_handle *handle,
head = (u64)(-head);
}
+ /*
+ * If we had to skip over the remainder of the current page because it
+ * is not large enough to hold the sample and the sample is not allowed
+ * to cross a page boundary, we need to clear the remainder of the page
+ * (fill it with 0s so it is clear we skipped it), and adjust the start
+ * of the sample (offset).
+ */
+ if (stay_in_page && gap_size > 0) {
+ int page = (offset >> page_shift) & (rb->nr_pages - 1);
+
+ offset &= (1UL << page_shift) - 1;
+ memset(rb->data_pages[page] + offset, 0, gap_size);
+
+ offset = head - size;
+ }
+
/*
* We rely on the implied barrier() by local_cmpxchg() to ensure
* none of the data stores below can be lifted up by the compiler.
@@ -197,8 +233,6 @@ __perf_output_begin(struct perf_output_handle *handle,
if (unlikely(head - local_read(&rb->wakeup) > rb->watermark))
local_add(rb->watermark, &rb->wakeup);
- page_shift = PAGE_SHIFT + page_order(rb);
-
handle->page = (offset >> page_shift) & (rb->nr_pages - 1);
offset &= (1UL << page_shift) - 1;
handle->addr = rb->data_pages[handle->page] + offset;
@@ -233,13 +267,26 @@ __perf_output_begin(struct perf_output_handle *handle,
int perf_output_begin_forward(struct perf_output_handle *handle,
struct perf_event *event, unsigned int size)
{
- return __perf_output_begin(handle, event, size, false);
+ return __perf_output_begin(handle, event, size, false, false);
+}
+
+/*
+ * Prepare the ring buffer for 'size' bytes of output for the given event.
+ * This particular version is used when the event data is not allowed to cross
+ * a page boundary. This means size cannot be more than PAGE_SIZE. It also
+ * ensures that any unused portion of a page is filled with zeros.
+ */
+int perf_output_begin_forward_in_page(struct perf_output_handle *handle,
+ struct perf_event *event,
+ unsigned int size)
+{
+ return __perf_output_begin(handle, event, size, false, true);
}
int perf_output_begin_backward(struct perf_output_handle *handle,
struct perf_event *event, unsigned int size)
{
- return __perf_output_begin(handle, event, size, true);
+ return __perf_output_begin(handle, event, size, true, false);
}
int perf_output_begin(struct perf_output_handle *handle,
@@ -247,7 +294,7 @@ int perf_output_begin(struct perf_output_handle *handle,
{
return __perf_output_begin(handle, event, size,
- unlikely(is_write_backward(event)));
+ unlikely(is_write_backward(event)), false);
}
unsigned int perf_output_copy(struct perf_output_handle *handle,