diff mbox

[v2,4/8] sparc64: optimize loads in clock_sched()

Message ID 1497286107-974183-5-git-send-email-pasha.tatashin@oracle.com
State Changes Requested
Delegated to: David Miller
Headers show

Commit Message

Pavel Tatashin June 12, 2017, 4:48 p.m. UTC
In clock sched we now have three loads:
	- Function pointer
	- quotient for multiplication
	- offset

However, it is possible to improve performance substantially, by
guaranteeing that all three loads are from the same cacheline.

By moving these three values first in sparc64_tick_ops, and by having
tick_operations 64-byte aligned we guarantee this.

Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Reviewed-by: Shannon Nelson <shannon.nelson@oracle.com>
Reviewed-by: Steven Sistare <steven.sistare@oracle.com>
---
 arch/sparc/include/asm/timer_64.h |    5 +++++
 arch/sparc/kernel/time_64.c       |   17 +++++++----------
 2 files changed, 12 insertions(+), 10 deletions(-)

Comments

David Miller June 12, 2017, 7:06 p.m. UTC | #1
From: Pavel Tatashin <pasha.tatashin@oracle.com>
Date: Mon, 12 Jun 2017 12:48:23 -0400

> @@ -164,7 +164,7 @@ static unsigned long tick_add_tick(unsigned long adj)
>  	return new_tick;
>  }
>  
> -static struct sparc64_tick_ops tick_operations __read_mostly = {
> +static struct sparc64_tick_ops tick_operations __aligned(64) __read_mostly = {

Please use __cacheline_aligned instead of "__aligned(64)"
--
To unsubscribe from this list: send the line "unsubscribe sparclinux" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pavel Tatashin June 12, 2017, 7:55 p.m. UTC | #2
Hi Dave,

Thank you for your comments:

When I use __cacheline_aligned instead of __aligned(64) I am getting the 
following error:

error: section of 'tick_operations' conflicts with previous declaration

The __section__ difference causes the issue:

__cacheline_aligned= __attribute__((__aligned__((1 << 6)), 
__section__(".data..cacheline_aligned")))

vs

__aligned(64) = __attribute__((aligned(64)))

Pasha

On 2017-06-12 15:06, David Miller wrote:
> From: Pavel Tatashin <pasha.tatashin@oracle.com>
> Date: Mon, 12 Jun 2017 12:48:23 -0400
> 
>> @@ -164,7 +164,7 @@ static unsigned long tick_add_tick(unsigned long adj)
>>   	return new_tick;
>>   }
>>   
>> -static struct sparc64_tick_ops tick_operations __read_mostly = {
>> +static struct sparc64_tick_ops tick_operations __aligned(64) __read_mostly = {
> 
> Please use __cacheline_aligned instead of "__aligned(64)"
> --
> To unsubscribe from this list: send the line "unsubscribe sparclinux" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

--
To unsubscribe from this list: send the line "unsubscribe sparclinux" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller June 12, 2017, 8:01 p.m. UTC | #3
From: Pasha Tatashin <pasha.tatashin@oracle.com>
Date: Mon, 12 Jun 2017 15:55:44 -0400

> Thank you for your comments:
> 
> When I use __cacheline_aligned instead of __aligned(64) I am getting
> the following error:
> 
> error: section of 'tick_operations' conflicts with previous
> declaration
> 
> The __section__ difference causes the issue:
> 
> __cacheline_aligned= __attribute__((__aligned__((1 << 6)),
> __section__(".data..cacheline_aligned")))
> 
> vs
> 
> __aligned(64) = __attribute__((aligned(64)))

Perhaps get rid of the __read_mostly.
--
To unsubscribe from this list: send the line "unsubscribe sparclinux" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pavel Tatashin June 12, 2017, 8:02 p.m. UTC | #4
> Perhaps get rid of the __read_mostly.

OK, or as alternative we could do something like:

__read_mostly __aligned(SMP_CACHE_BYTES)

Not sure what is better though.

Pasha
--
To unsubscribe from this list: send the line "unsubscribe sparclinux" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller June 12, 2017, 8:05 p.m. UTC | #5
From: Pasha Tatashin <pasha.tatashin@oracle.com>
Date: Mon, 12 Jun 2017 16:02:54 -0400

>> Perhaps get rid of the __read_mostly.
> 
> OK, or as alternative we could do something like:
> 
> __read_mostly __aligned(SMP_CACHE_BYTES)
> 
> Not sure what is better though.

Better not to put alignment induced gaps into the __read_only section
like this I think.
--
To unsubscribe from this list: send the line "unsubscribe sparclinux" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/sparc/include/asm/timer_64.h b/arch/sparc/include/asm/timer_64.h
index fce4150..bde2cc4 100644
--- a/arch/sparc/include/asm/timer_64.h
+++ b/arch/sparc/include/asm/timer_64.h
@@ -9,7 +9,12 @@ 
 #include <linux/types.h>
 #include <linux/init.h>
 
+/* The most frequently accessed fields should be first,
+ * to fit into the same cacheline.
+ */
 struct sparc64_tick_ops {
+	unsigned long ticks_per_nsec_quotient;
+	unsigned long offset;
 	unsigned long long (*get_tick)(void);
 	int (*add_compare)(unsigned long);
 	unsigned long softint_mask;
diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c
index 5f53b74..d654f5c 100644
--- a/arch/sparc/kernel/time_64.c
+++ b/arch/sparc/kernel/time_64.c
@@ -164,7 +164,7 @@  static unsigned long tick_add_tick(unsigned long adj)
 	return new_tick;
 }
 
-static struct sparc64_tick_ops tick_operations __read_mostly = {
+static struct sparc64_tick_ops tick_operations __aligned(64) __read_mostly = {
 	.name		=	"tick",
 	.init_tick	=	tick_init_tick,
 	.disable_irq	=	tick_disable_irq,
@@ -391,9 +391,6 @@  static int hbtick_add_compare(unsigned long adj)
 	.softint_mask	=	1UL << 0,
 };
 
-static unsigned long timer_ticks_per_nsec_quotient __read_mostly;
-static unsigned long timer_offset __read_mostly;
-
 unsigned long cmos_regs;
 EXPORT_SYMBOL(cmos_regs);
 
@@ -784,11 +781,11 @@  void __init time_init(void)
 
 	tb_ticks_per_usec = freq / USEC_PER_SEC;
 
-	timer_ticks_per_nsec_quotient =
+	tick_operations.ticks_per_nsec_quotient =
 		clocksource_hz2mult(freq, SPARC64_NSEC_PER_CYC_SHIFT);
 
-	timer_offset = (tick_operations.get_tick()
-			* timer_ticks_per_nsec_quotient)
+	tick_operations.offset = (tick_operations.get_tick()
+			* tick_operations.ticks_per_nsec_quotient)
 			>> SPARC64_NSEC_PER_CYC_SHIFT;
 
 	clocksource_tick.name = tick_operations.name;
@@ -816,11 +813,11 @@  void __init time_init(void)
 
 unsigned long long sched_clock(void)
 {
+	unsigned long quotient = tick_operations.ticks_per_nsec_quotient;
+	unsigned long offset = tick_operations.offset;
 	unsigned long ticks = tick_operations.get_tick();
 
-	return ((ticks * timer_ticks_per_nsec_quotient)
-		>> SPARC64_NSEC_PER_CYC_SHIFT)
-		- timer_offset;
+	return ((ticks * quotient) >> SPARC64_NSEC_PER_CYC_SHIFT) - offset;
 }
 
 int read_current_timer(unsigned long *timer_val)