diff mbox

[v2,2/5] powerpc/lib/sstep: Add popcnt instruction emulation

Message ID 20170724010109.21263-2-matthew.brown.dev@gmail.com (mailing list archive)
State Superseded
Headers show

Commit Message

Matt Brown July 24, 2017, 1:01 a.m. UTC
This adds emulations for the popcntb, popcntw, and popcntd instructions.
Tested for correctness against the popcnt{b,w,d} instructions on ppc64le.

Signed-off-by: Matt Brown <matthew.brown.dev@gmail.com>
---
v2:
	- fixed opcodes
	- fixed typecasting
	- fixed bitshifting error for both 32 and 64bit arch
---
 arch/powerpc/lib/sstep.c | 43 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 42 insertions(+), 1 deletion(-)

Comments

Segher Boessenkool July 24, 2017, 7:36 a.m. UTC | #1
Hi Matt,

On Mon, Jul 24, 2017 at 11:01:06AM +1000, Matt Brown wrote:
> +	for (i = 0; i < (64 / size); i++) {

If you do

  for (i = 0; i < 64; i += size)

things are slightly nicer.

> +		if ((i * size) < 32)
> +			low |= n << (i * size);
> +		else
> +			high |= n << ((i * size) - 32);
> +	}
> +	regs->gpr[ra] = (high << 32) | low;

Why have separate high and low vars?

And there are much better ways to calculate popcount, of course.


Segher
Balbir Singh July 24, 2017, 10:28 a.m. UTC | #2
On Mon, Jul 24, 2017 at 11:01 AM, Matt Brown
<matthew.brown.dev@gmail.com> wrote:
> This adds emulations for the popcntb, popcntw, and popcntd instructions.
> Tested for correctness against the popcnt{b,w,d} instructions on ppc64le.
>
> Signed-off-by: Matt Brown <matthew.brown.dev@gmail.com>
> ---
> v2:
>         - fixed opcodes
>         - fixed typecasting
>         - fixed bitshifting error for both 32 and 64bit arch
> ---
>  arch/powerpc/lib/sstep.c | 43 ++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 42 insertions(+), 1 deletion(-)
>
> diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
> index 87d277f..e6a16a3 100644
> --- a/arch/powerpc/lib/sstep.c
> +++ b/arch/powerpc/lib/sstep.c
> @@ -612,6 +612,35 @@ static nokprobe_inline void do_cmpb(struct pt_regs *regs, unsigned long v1,
>         regs->gpr[rd] = out_val;
>  }
>
> +/*
> + * The size parameter is used to adjust the equivalent popcnt instruction.
> + * popcntb = 8, popcntw = 32, popcntd = 64
> + */
> +static nokprobe_inline void do_popcnt(struct pt_regs *regs, unsigned long v1,
> +                               int size, int ra)
> +{
> +       unsigned long long high, low, mask;
> +       unsigned int n;
> +       int i, j;
> +
> +       high = 0;
> +       low = 0;
> +
> +       for (i = 0; i < (64 / size); i++) {
> +               n = 0;
> +               for (j = 0; j < size; j++) {
> +                       mask = 1UL << (j + (i * size));
> +                       if (v1 & mask)
> +                               n++;
> +               }
> +               if ((i * size) < 32)
> +                       low |= n << (i * size);
> +               else
> +                       high |= n << ((i * size) - 32);
> +       }
> +       regs->gpr[ra] = (high << 32) | low;
> +}

There's a way to do it in very efficient way via the Giles-Miller
method of side-ways addition

Please see

http://opensourceforu.com/2012/06/power-programming-bitwise-tips-tricks/
and lib/hweight.c, you can reuse the code from lib/hweight.c

Balbir Singh
Matt Brown July 25, 2017, 12:53 a.m. UTC | #3
On Mon, Jul 24, 2017 at 8:28 PM, Balbir Singh <bsingharora@gmail.com> wrote:
> On Mon, Jul 24, 2017 at 11:01 AM, Matt Brown
> <matthew.brown.dev@gmail.com> wrote:
>> This adds emulations for the popcntb, popcntw, and popcntd instructions.
>> Tested for correctness against the popcnt{b,w,d} instructions on ppc64le.
>>
>> Signed-off-by: Matt Brown <matthew.brown.dev@gmail.com>
>> ---
>> v2:
>>         - fixed opcodes
>>         - fixed typecasting
>>         - fixed bitshifting error for both 32 and 64bit arch
>> ---
>>  arch/powerpc/lib/sstep.c | 43 ++++++++++++++++++++++++++++++++++++++++++-
>>  1 file changed, 42 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
>> index 87d277f..e6a16a3 100644
>> --- a/arch/powerpc/lib/sstep.c
>> +++ b/arch/powerpc/lib/sstep.c
>> @@ -612,6 +612,35 @@ static nokprobe_inline void do_cmpb(struct pt_regs *regs, unsigned long v1,
>>         regs->gpr[rd] = out_val;
>>  }
>>
>> +/*
>> + * The size parameter is used to adjust the equivalent popcnt instruction.
>> + * popcntb = 8, popcntw = 32, popcntd = 64
>> + */
>> +static nokprobe_inline void do_popcnt(struct pt_regs *regs, unsigned long v1,
>> +                               int size, int ra)
>> +{
>> +       unsigned long long high, low, mask;
>> +       unsigned int n;
>> +       int i, j;
>> +
>> +       high = 0;
>> +       low = 0;
>> +
>> +       for (i = 0; i < (64 / size); i++) {
>> +               n = 0;
>> +               for (j = 0; j < size; j++) {
>> +                       mask = 1UL << (j + (i * size));
>> +                       if (v1 & mask)
>> +                               n++;
>> +               }
>> +               if ((i * size) < 32)
>> +                       low |= n << (i * size);
>> +               else
>> +                       high |= n << ((i * size) - 32);
>> +       }
>> +       regs->gpr[ra] = (high << 32) | low;
>> +}
>
> There's a way to do it in very efficient way via the Giles-Miller
> method of side-ways addition
>
> Please see
>
> http://opensourceforu.com/2012/06/power-programming-bitwise-tips-tricks/
> and lib/hweight.c, you can reuse the code from lib/hweight.c

Oh that's a really cool technique.
We could use that for the parity instructions too.

>
> Balbir Singh
diff mbox

Patch

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 87d277f..e6a16a3 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -612,6 +612,35 @@  static nokprobe_inline void do_cmpb(struct pt_regs *regs, unsigned long v1,
 	regs->gpr[rd] = out_val;
 }
 
+/*
+ * The size parameter is used to adjust the equivalent popcnt instruction.
+ * popcntb = 8, popcntw = 32, popcntd = 64
+ */
+static nokprobe_inline void do_popcnt(struct pt_regs *regs, unsigned long v1,
+				int size, int ra)
+{
+	unsigned long long high, low, mask;
+	unsigned int n;
+	int i, j;
+
+	high = 0;
+	low = 0;
+
+	for (i = 0; i < (64 / size); i++) {
+		n = 0;
+		for (j = 0; j < size; j++) {
+			mask = 1UL << (j + (i * size));
+			if (v1 & mask)
+				n++;
+		}
+		if ((i * size) < 32)
+			low |= n << (i * size);
+		else
+			high |= n << ((i * size) - 32);
+	}
+	regs->gpr[ra] = (high << 32) | low;
+}
+
 static nokprobe_inline int trap_compare(long v1, long v2)
 {
 	int ret = 0;
@@ -1194,6 +1223,10 @@  int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
 			regs->gpr[ra] = regs->gpr[rd] & ~regs->gpr[rb];
 			goto logical_done;
 
+		case 122:	/* popcntb */
+			do_popcnt(regs, regs->gpr[rd], 8, ra);
+			goto logical_done;
+
 		case 124:	/* nor */
 			regs->gpr[ra] = ~(regs->gpr[rd] | regs->gpr[rb]);
 			goto logical_done;
@@ -1206,6 +1239,10 @@  int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
 			regs->gpr[ra] = regs->gpr[rd] ^ regs->gpr[rb];
 			goto logical_done;
 
+		case 378:	/* popcntw */
+			do_popcnt(regs, regs->gpr[rd], 32, ra);
+			goto logical_done;
+
 		case 412:	/* orc */
 			regs->gpr[ra] = regs->gpr[rd] | ~regs->gpr[rb];
 			goto logical_done;
@@ -1217,7 +1254,11 @@  int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
 		case 476:	/* nand */
 			regs->gpr[ra] = ~(regs->gpr[rd] & regs->gpr[rb]);
 			goto logical_done;
-
+#ifdef __powerpc64__
+		case 506:	/* popcntd */
+			do_popcnt(regs, regs->gpr[rd], 64, ra);
+			goto logical_done;
+#endif
 		case 922:	/* extsh */
 			regs->gpr[ra] = (signed short) regs->gpr[rd];
 			goto logical_done;