diff mbox

[v8,3/6] powerpc: lib/locks.c: Add cpu yield/wake helper function

Message ID 1480951166-44830-4-git-send-email-xinhui.pan@linux.vnet.ibm.com (mailing list archive)
State Superseded
Headers show

Commit Message

xinhui Dec. 5, 2016, 3:19 p.m. UTC
Add two corresponding helper functions to support pv-qspinlock.

For normal use, __spin_yield_cpu will confer current vcpu slices to the
target vcpu(say, a lock holder). If target vcpu is not specified or it
is in running state, such conferging to lpar happens or not depends.

Because hcall itself will introduce latency and a little overhead. And we
do NOT want to suffer any latency on some cases, e.g. in interrupt handler.
The second parameter *confer* can indicate such case.

__spin_wake_cpu is simpiler, it will wake up one vcpu regardless of its
current vcpu state.

Signed-off-by: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/spinlock.h |  4 +++
 arch/powerpc/lib/locks.c            | 59 +++++++++++++++++++++++++++++++++++++
 2 files changed, 63 insertions(+)

Comments

Boqun Feng Dec. 6, 2016, 1:23 a.m. UTC | #1
On Mon, Dec 05, 2016 at 10:19:23AM -0500, Pan Xinhui wrote:
> Add two corresponding helper functions to support pv-qspinlock.
> 
> For normal use, __spin_yield_cpu will confer current vcpu slices to the
> target vcpu(say, a lock holder). If target vcpu is not specified or it
> is in running state, such conferging to lpar happens or not depends.
> 
> Because hcall itself will introduce latency and a little overhead. And we
> do NOT want to suffer any latency on some cases, e.g. in interrupt handler.
> The second parameter *confer* can indicate such case.
> 
> __spin_wake_cpu is simpiler, it will wake up one vcpu regardless of its
> current vcpu state.
> 
> Signed-off-by: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com>
> ---
>  arch/powerpc/include/asm/spinlock.h |  4 +++
>  arch/powerpc/lib/locks.c            | 59 +++++++++++++++++++++++++++++++++++++
>  2 files changed, 63 insertions(+)
> 
> diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
> index 954099e..6426bd5 100644
> --- a/arch/powerpc/include/asm/spinlock.h
> +++ b/arch/powerpc/include/asm/spinlock.h
> @@ -64,9 +64,13 @@ static inline bool vcpu_is_preempted(int cpu)
>  /* We only yield to the hypervisor if we are in shared processor mode */
>  #define SHARED_PROCESSOR (lppaca_shared_proc(local_paca->lppaca_ptr))
>  extern void __spin_yield(arch_spinlock_t *lock);
> +extern void __spin_yield_cpu(int cpu, int confer);
> +extern void __spin_wake_cpu(int cpu);
>  extern void __rw_yield(arch_rwlock_t *lock);
>  #else /* SPLPAR */
>  #define __spin_yield(x)        barrier()
> +#define __spin_yield_cpu(x, y) barrier()
> +#define __spin_wake_cpu(x) barrier()
>  #define __rw_yield(x)  barrier()
>  #define SHARED_PROCESSOR       0
>  #endif
> diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
> index 6574626..bd872c9 100644
> --- a/arch/powerpc/lib/locks.c
> +++ b/arch/powerpc/lib/locks.c
> @@ -23,6 +23,65 @@
>  #include <asm/hvcall.h>
>  #include <asm/smp.h>
>  
> +/*
> + * confer our slices to a specified cpu and return. If it is in running state
> + * or cpu is -1, then we will check confer. If confer is NULL, we will return
> + * otherwise we confer our slices to lpar.
> + */
> +void __spin_yield_cpu(int cpu, int confer)
> +{
> +	unsigned int holder_cpu = cpu, yield_count;

As I said at:

https://marc.info/?l=linux-kernel&m=147455748619343&w=2

@holder_cpu is not necessary and doesn't help anything.

> +
> +	if (cpu == -1)
> +		goto yield_to_lpar;
> +
> +	BUG_ON(holder_cpu >= nr_cpu_ids);
> +	yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count);
> +
> +	/* if cpu is running, confer slices to lpar conditionally*/
> +	if ((yield_count & 1) == 0)
> +		goto yield_to_lpar;
> +
> +	plpar_hcall_norets(H_CONFER,
> +		get_hard_smp_processor_id(holder_cpu), yield_count);
> +	return;
> +
> +yield_to_lpar:
> +	if (confer)
> +		plpar_hcall_norets(H_CONFER, -1, 0);
> +}
> +EXPORT_SYMBOL_GPL(__spin_yield_cpu);
> +
> +void __spin_wake_cpu(int cpu)
> +{
> +	unsigned int holder_cpu = cpu;

And it's even wrong to call the parameter of _wake_cpu() a holder_cpu,
because it's not the current lock holder.

Regards,
Boqun

> +
> +	BUG_ON(holder_cpu >= nr_cpu_ids);
> +	/*
> +	 * NOTE: we should always do this hcall regardless of
> +	 * the yield_count of the holder_cpu.
> +	 * as thers might be a case like below;
> +	 *	CPU 1			CPU 2
> +	 *				yielded = true
> +	 * if (yielded)
> +	 *	__spin_wake_cpu()
> +	 *				__spin_yield_cpu()
> +	 *
> +	 * So we might lose a wake if we check the yield_count and
> +	 * return directly if the holder_cpu is running.
> +	 * IOW. do NOT code like below.
> +	 *	yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count);
> +	 *	if ((yield_count & 1) == 0)
> +	 *		return;
> +	 *
> +	 * a PROD hcall marks the target_cpu proded, which cause the next cede
> +	 * or confer called on the target_cpu invalid.
> +	 */
> +	plpar_hcall_norets(H_PROD,
> +		get_hard_smp_processor_id(holder_cpu));
> +}
> +EXPORT_SYMBOL_GPL(__spin_wake_cpu);
> +
>  #ifndef CONFIG_QUEUED_SPINLOCKS
>  void __spin_yield(arch_spinlock_t *lock)
>  {
> -- 
> 2.4.11
>
xinhui Dec. 6, 2016, 1:30 a.m. UTC | #2
在 2016/12/6 09:23, Boqun Feng 写道:
> On Mon, Dec 05, 2016 at 10:19:23AM -0500, Pan Xinhui wrote:
>> Add two corresponding helper functions to support pv-qspinlock.
>>
>> For normal use, __spin_yield_cpu will confer current vcpu slices to the
>> target vcpu(say, a lock holder). If target vcpu is not specified or it
>> is in running state, such conferging to lpar happens or not depends.
>>
>> Because hcall itself will introduce latency and a little overhead. And we
>> do NOT want to suffer any latency on some cases, e.g. in interrupt handler.
>> The second parameter *confer* can indicate such case.
>>
>> __spin_wake_cpu is simpiler, it will wake up one vcpu regardless of its
>> current vcpu state.
>>
>> Signed-off-by: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com>
>> ---
>>  arch/powerpc/include/asm/spinlock.h |  4 +++
>>  arch/powerpc/lib/locks.c            | 59 +++++++++++++++++++++++++++++++++++++
>>  2 files changed, 63 insertions(+)
>>
>> diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
>> index 954099e..6426bd5 100644
>> --- a/arch/powerpc/include/asm/spinlock.h
>> +++ b/arch/powerpc/include/asm/spinlock.h
>> @@ -64,9 +64,13 @@ static inline bool vcpu_is_preempted(int cpu)
>>  /* We only yield to the hypervisor if we are in shared processor mode */
>>  #define SHARED_PROCESSOR (lppaca_shared_proc(local_paca->lppaca_ptr))
>>  extern void __spin_yield(arch_spinlock_t *lock);
>> +extern void __spin_yield_cpu(int cpu, int confer);
>> +extern void __spin_wake_cpu(int cpu);
>>  extern void __rw_yield(arch_rwlock_t *lock);
>>  #else /* SPLPAR */
>>  #define __spin_yield(x)        barrier()
>> +#define __spin_yield_cpu(x, y) barrier()
>> +#define __spin_wake_cpu(x) barrier()
>>  #define __rw_yield(x)  barrier()
>>  #define SHARED_PROCESSOR       0
>>  #endif
>> diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
>> index 6574626..bd872c9 100644
>> --- a/arch/powerpc/lib/locks.c
>> +++ b/arch/powerpc/lib/locks.c
>> @@ -23,6 +23,65 @@
>>  #include <asm/hvcall.h>
>>  #include <asm/smp.h>
>>
>> +/*
>> + * confer our slices to a specified cpu and return. If it is in running state
>> + * or cpu is -1, then we will check confer. If confer is NULL, we will return
>> + * otherwise we confer our slices to lpar.
>> + */
>> +void __spin_yield_cpu(int cpu, int confer)
>> +{
>> +	unsigned int holder_cpu = cpu, yield_count;
>
> As I said at:
>
> https://marc.info/?l=linux-kernel&m=147455748619343&w=2
>
> @holder_cpu is not necessary and doesn't help anything.
>
>> +
>> +	if (cpu == -1)
>> +		goto yield_to_lpar;
>> +
>> +	BUG_ON(holder_cpu >= nr_cpu_ids);
>> +	yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count);
>> +
>> +	/* if cpu is running, confer slices to lpar conditionally*/
>> +	if ((yield_count & 1) == 0)
>> +		goto yield_to_lpar;
>> +
>> +	plpar_hcall_norets(H_CONFER,
>> +		get_hard_smp_processor_id(holder_cpu), yield_count);
>> +	return;
>> +
>> +yield_to_lpar:
>> +	if (confer)
>> +		plpar_hcall_norets(H_CONFER, -1, 0);
>> +}
>> +EXPORT_SYMBOL_GPL(__spin_yield_cpu);
>> +
>> +void __spin_wake_cpu(int cpu)
>> +{
>> +	unsigned int holder_cpu = cpu;
>
> And it's even wrong to call the parameter of _wake_cpu() a holder_cpu,
> because it's not the current lock holder.
>
oh, its name is really misleading.

thanks

> Regards,
> Boqun
>
>> +
>> +	BUG_ON(holder_cpu >= nr_cpu_ids);
>> +	/*
>> +	 * NOTE: we should always do this hcall regardless of
>> +	 * the yield_count of the holder_cpu.
>> +	 * as thers might be a case like below;
>> +	 *	CPU 1			CPU 2
>> +	 *				yielded = true
>> +	 * if (yielded)
>> +	 *	__spin_wake_cpu()
>> +	 *				__spin_yield_cpu()
>> +	 *
>> +	 * So we might lose a wake if we check the yield_count and
>> +	 * return directly if the holder_cpu is running.
>> +	 * IOW. do NOT code like below.
>> +	 *	yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count);
>> +	 *	if ((yield_count & 1) == 0)
>> +	 *		return;
>> +	 *
>> +	 * a PROD hcall marks the target_cpu proded, which cause the next cede
>> +	 * or confer called on the target_cpu invalid.
>> +	 */
>> +	plpar_hcall_norets(H_PROD,
>> +		get_hard_smp_processor_id(holder_cpu));
>> +}
>> +EXPORT_SYMBOL_GPL(__spin_wake_cpu);
>> +
>>  #ifndef CONFIG_QUEUED_SPINLOCKS
>>  void __spin_yield(arch_spinlock_t *lock)
>>  {
>> --
>> 2.4.11
>>
diff mbox

Patch

diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index 954099e..6426bd5 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -64,9 +64,13 @@  static inline bool vcpu_is_preempted(int cpu)
 /* We only yield to the hypervisor if we are in shared processor mode */
 #define SHARED_PROCESSOR (lppaca_shared_proc(local_paca->lppaca_ptr))
 extern void __spin_yield(arch_spinlock_t *lock);
+extern void __spin_yield_cpu(int cpu, int confer);
+extern void __spin_wake_cpu(int cpu);
 extern void __rw_yield(arch_rwlock_t *lock);
 #else /* SPLPAR */
 #define __spin_yield(x)        barrier()
+#define __spin_yield_cpu(x, y) barrier()
+#define __spin_wake_cpu(x) barrier()
 #define __rw_yield(x)  barrier()
 #define SHARED_PROCESSOR       0
 #endif
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
index 6574626..bd872c9 100644
--- a/arch/powerpc/lib/locks.c
+++ b/arch/powerpc/lib/locks.c
@@ -23,6 +23,65 @@ 
 #include <asm/hvcall.h>
 #include <asm/smp.h>
 
+/*
+ * confer our slices to a specified cpu and return. If it is in running state
+ * or cpu is -1, then we will check confer. If confer is NULL, we will return
+ * otherwise we confer our slices to lpar.
+ */
+void __spin_yield_cpu(int cpu, int confer)
+{
+	unsigned int holder_cpu = cpu, yield_count;
+
+	if (cpu == -1)
+		goto yield_to_lpar;
+
+	BUG_ON(holder_cpu >= nr_cpu_ids);
+	yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count);
+
+	/* if cpu is running, confer slices to lpar conditionally*/
+	if ((yield_count & 1) == 0)
+		goto yield_to_lpar;
+
+	plpar_hcall_norets(H_CONFER,
+		get_hard_smp_processor_id(holder_cpu), yield_count);
+	return;
+
+yield_to_lpar:
+	if (confer)
+		plpar_hcall_norets(H_CONFER, -1, 0);
+}
+EXPORT_SYMBOL_GPL(__spin_yield_cpu);
+
+void __spin_wake_cpu(int cpu)
+{
+	unsigned int holder_cpu = cpu;
+
+	BUG_ON(holder_cpu >= nr_cpu_ids);
+	/*
+	 * NOTE: we should always do this hcall regardless of
+	 * the yield_count of the holder_cpu.
+	 * as thers might be a case like below;
+	 *	CPU 1			CPU 2
+	 *				yielded = true
+	 * if (yielded)
+	 *	__spin_wake_cpu()
+	 *				__spin_yield_cpu()
+	 *
+	 * So we might lose a wake if we check the yield_count and
+	 * return directly if the holder_cpu is running.
+	 * IOW. do NOT code like below.
+	 *	yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count);
+	 *	if ((yield_count & 1) == 0)
+	 *		return;
+	 *
+	 * a PROD hcall marks the target_cpu proded, which cause the next cede
+	 * or confer called on the target_cpu invalid.
+	 */
+	plpar_hcall_norets(H_PROD,
+		get_hard_smp_processor_id(holder_cpu));
+}
+EXPORT_SYMBOL_GPL(__spin_wake_cpu);
+
 #ifndef CONFIG_QUEUED_SPINLOCKS
 void __spin_yield(arch_spinlock_t *lock)
 {