Patchwork powerpc: OE=1 Form Instructions Not Decoded Correctly

login
register
mail settings
Submitter Tom Musta
Date Sept. 6, 2013, 3:13 p.m.
Message ID <CAJ-hshaxuJBMCFquT7J50rjuDYWer_0i1=2QmnOKGAxx4n=FUw@mail.gmail.com>
Download mbox | patch
Permalink /patch/273432/
State Changes Requested
Headers show

Comments

Tom Musta - Sept. 6, 2013, 3:13 p.m.
To: linuxppc-dev@lists.ozlabs.org
Subject: [PATCH] powerpc: OE=1 Form Instructions Not Decoded Correctly
From: Tom Musta <tommusta@gmail.com>

PowerISA uses instruction bit 21 to indicate that the overflow (OV) bit
of the XER is to be set, as well as its corresponding sticky bit (SO).
This patch addresses two defects in the implementation of the PowerISA
single step code for this category of instructions:  (a) the OE=1 case
is not correctly accounted for in the case statement for the extended
opcode handling.  (b) the implementation is not setting XER[OV] and
XER[SO].

Signed-off-by: Tom Musta <tommusta@gmail.com>

---
 arch/powerpc/lib/sstep.c |  208
+++++++++++++++++++++++++++++++++++++++++-----
 1 files changed, 185 insertions(+), 23 deletions(-)


  * Functions in ldstfp.S
@@ -467,6 +474,13 @@ static int __kprobes do_vsx_store(int rn, int
(*func)(int, unsigned long),
  ".previous" \
  : "=r" (err) \
  : "r" (addr), "i" (-EFAULT), "0" (err))
+static void __kprobes set_xer_ov(struct pt_regs *regs, int ov)
+{
+ if (ov)
+ regs->xer |= (XER_SO | XER_OV);
+ else
+ regs->xer &= ~XER_OV;
+}

 static void __kprobes set_cr0(struct pt_regs *regs, int rd)
 {
@@ -487,7 +501,7 @@ static void __kprobes set_cr0(struct pt_regs *regs, int
rd)

 static void __kprobes add_with_carry(struct pt_regs *regs, int rd,
      unsigned long val1, unsigned long val2,
-     unsigned long carry_in)
+     unsigned long carry_in, int oe)
 {
  unsigned long val = val1 + val2;

@@ -504,6 +518,13 @@ static void __kprobes add_with_carry(struct pt_regs
*regs, int rd,
  regs->xer |= XER_CA;
  else
  regs->xer &= ~XER_CA;
+ if (oe) {
+ unsigned long m = ((regs->msr & MSR_64BIT) == 0) ?
+ SGN_32_MSK : SGN_64_MSK;
+ int ov = ((val1 & m) == (val2 & m)) &&
+ ((val1 & m) != (val & m));
+ set_xer_ov(regs, ov);
+ }
 }

 static void __kprobes do_cmp_signed(struct pt_regs *regs, long v1, long v2,
@@ -552,7 +573,7 @@ static void __kprobes do_cmp_unsigned(struct pt_regs
*regs, unsigned long v1,
 #define DATA32(x) (x)
 #endif
 #define ROTATE(x, n) ((n) ? (((x) << (n)) | ((x) >> (8 * sizeof(long) -
(n)))) : (x))
-
+#define OE(instr)       (((instr) >> (31-21)) & 1)
 /*
  * Emulate instructions that cause a transfer of control,
  * loads and stores, and a few other instructions.
@@ -693,7 +714,7 @@ int __kprobes emulate_step(struct pt_regs *regs,
unsigned int instr)

  case 8: /* subfic */
  imm = (short) instr;
- add_with_carry(regs, rd, ~regs->gpr[ra], imm, 1);
+ add_with_carry(regs, rd, ~regs->gpr[ra], imm, 1, 0);
  goto instr_done;

  case 10: /* cmpli */
@@ -718,12 +739,12 @@ int __kprobes emulate_step(struct pt_regs *regs,
unsigned int instr)

  case 12: /* addic */
  imm = (short) instr;
- add_with_carry(regs, rd, regs->gpr[ra], imm, 0);
+ add_with_carry(regs, rd, regs->gpr[ra], imm, 0, 0);
  goto instr_done;

  case 13: /* addic. */
  imm = (short) instr;
- add_with_carry(regs, rd, regs->gpr[ra], imm, 0);
+ add_with_carry(regs, rd, regs->gpr[ra], imm, 0, 0);
  set_cr0(regs, rd);
  goto instr_done;

@@ -944,8 +965,9 @@ int __kprobes emulate_step(struct pt_regs *regs,
unsigned int instr)
  * Arithmetic instructions
  */
  case 8: /* subfc */
+ case 520: /* subfco */
  add_with_carry(regs, rd, ~regs->gpr[ra],
-       regs->gpr[rb], 1);
+       regs->gpr[rb], 1, OE(instr));
  goto arith_done;
 #ifdef __powerpc64__
  case 9: /* mulhdu */
@@ -954,8 +976,9 @@ int __kprobes emulate_step(struct pt_regs *regs,
unsigned int instr)
  goto arith_done;
 #endif
  case 10: /* addc */
+ case 522:       /* addco */
  add_with_carry(regs, rd, regs->gpr[ra],
-       regs->gpr[rb], 0);
+       regs->gpr[rb], 0, OE(instr));
  goto arith_done;

  case 11: /* mulhwu */
@@ -964,7 +987,19 @@ int __kprobes emulate_step(struct pt_regs *regs,
unsigned int instr)
  goto arith_done;

  case 40: /* subf */
- regs->gpr[rd] = regs->gpr[rb] - regs->gpr[ra];
+ case 552: /* subfo */
+ val = regs->gpr[rb] - regs->gpr[ra];
+ if (OE(instr)) { /* subfo */
+ unsigned long m =
+ ((regs->msr & MSR_64BIT) == 0) ?
+ SGN_32_MSK : SGN_64_MSK;
+ int ov =
+ ((~regs->gpr[ra] & m) ==
+ (regs->gpr[rb] & m)) &&
+ ((regs->gpr[rb] & m) != (val & m));
+ set_xer_ov(regs, ov);
+ }
+ regs->gpr[rd] = val;
  goto arith_done;
 #ifdef __powerpc64__
  case 73: /* mulhd */
@@ -978,69 +1013,196 @@ int __kprobes emulate_step(struct pt_regs *regs,
unsigned int instr)
  goto arith_done;

  case 104: /* neg */
+ case 616: /* nego */
  regs->gpr[rd] = -regs->gpr[ra];
+ if (OE(instr)) { /* nego */
+ int ov = (regs->msr & MSR_64BIT) ?
+ (regs->gpr[rd] == 0x8000000000000000l) :
+ (regs->gpr[rd] == 0x80000000l);
+ set_xer_ov(regs, ov);
+ }
  goto arith_done;

  case 136: /* subfe */
+ case 648: /* subfeo */
  add_with_carry(regs, rd, ~regs->gpr[ra], regs->gpr[rb],
-       regs->xer & XER_CA);
+       regs->xer & XER_CA, OE(instr));
  goto arith_done;

  case 138: /* adde */
+ case 650: /* addeo */
  add_with_carry(regs, rd, regs->gpr[ra], regs->gpr[rb],
-       regs->xer & XER_CA);
+       regs->xer & XER_CA, OE(instr));
  goto arith_done;

  case 200: /* subfze */
+ case 712: /* subfzeo */
  add_with_carry(regs, rd, ~regs->gpr[ra], 0L,
-       regs->xer & XER_CA);
+       regs->xer & XER_CA, OE(instr));
  goto arith_done;

  case 202: /* addze */
+ case 714: /* addzeo */
  add_with_carry(regs, rd, regs->gpr[ra], 0L,
-       regs->xer & XER_CA);
+       regs->xer & XER_CA, OE(instr));
  goto arith_done;

  case 232: /* subfme */
+ case 744: /* subfmeo */
  add_with_carry(regs, rd, ~regs->gpr[ra], -1L,
-       regs->xer & XER_CA);
+       regs->xer & XER_CA, OE(instr));
  goto arith_done;
 #ifdef __powerpc64__
  case 233: /* mulld */
- regs->gpr[rd] = regs->gpr[ra] * regs->gpr[rb];
+ case 745:       /* mulldo */
+ {
+ unsigned long xer_copy;
+ asm("mulldo %0,%2,%3;"
+ "mfxer %1" :
+ "=r" (regs->gpr[rd]), "=r" (xer_copy) :
+ "r" (regs->gpr[ra]), "r" (regs->gpr[rb]));
+ if (OE(instr))
+ set_xer_ov(regs, (xer_copy & XER_OV) != 0);
  goto arith_done;
+ }
 #endif
  case 234: /* addme */
+ case 746: /* addme0 */
  add_with_carry(regs, rd, regs->gpr[ra], -1L,
-       regs->xer & XER_CA);
+       regs->xer & XER_CA, OE(instr));
  goto arith_done;

  case 235: /* mullw */
- regs->gpr[rd] = (unsigned int) regs->gpr[ra] *
- (unsigned int) regs->gpr[rb];
+ case 747: /* mullwo */
+ regs->gpr[rd] = SGN_EXT_32(regs->gpr[ra]) *
+ SGN_EXT_32(regs->gpr[rb]);
+ if (OE(instr)) { /* mullwo */
+ int ov = 0;
+ if ((regs->gpr[rd] & U32_MSK) == 0)
+ ov = (regs->gpr[rd] & SGN_32_MSK) != 0;
+ else if ((regs->gpr[rd] & U32_MSK) == U32_MSK)
+ ov = (regs->gpr[rd] & SGN_32_MSK) == 0;
+ else
+ ov = 1;
+ set_xer_ov(regs, ov);
+ }
  goto arith_done;

  case 266: /* add */
- regs->gpr[rd] = regs->gpr[ra] + regs->gpr[rb];
+ case 778: /* addo */
+ val = regs->gpr[ra] + regs->gpr[rb];
+ if (OE(instr)) { /* addo */
+ unsigned long m = ((regs->msr & MSR_64BIT) == 0)
+ ? SGN_32_MSK : SGN_64_MSK;
+ int ov = ((regs->gpr[ra] & m) ==
+ (regs->gpr[rb] & m)) &&
+ ((regs->gpr[ra] & m) != (val & m));
+ set_xer_ov(regs, ov);
+ }
+ regs->gpr[rd] = val;
+ goto arith_done;
+#ifdef __powerpc64__
+ case 393: /* divdeu */
+ case 905:       /* divdeuo */
+ {
+ unsigned long xer_copy;
+ asm("divdeuo %0,%2,%3;"
+ "mfxer %1" :
+ "=r" (regs->gpr[rd]), "=r" (xer_copy) :
+ "r" (regs->gpr[ra]), "r" (regs->gpr[rb]));
+ if (OE(instr))
+ set_xer_ov(regs, (xer_copy & XER_OV) != 0);
+ goto arith_done;
+ }
+#endif
+ case 395: /* divweu */
+ case 907: /* divweuo */
+ val = (L32(regs->gpr[ra]) << 32) / L32(regs->gpr[rb]);
+ if (OE(instr)) {
+ int ov = (L32(regs->gpr[rb]) == 0) ||
+ ((val & U32_MSK) != 0);
+ set_xer_ov(regs, ov);
+ }
+ regs->gpr[rd] = val;
+ goto arith_done;
+#ifdef __powerpc64__
+ case 425: /* divde */
+ case 937:       /* divdeo */
+ {
+ unsigned long xer_copy;
+ asm("divdeo %0,%2,%3;"
+ " mfxer %1" :
+ "=r" (regs->gpr[rd]), "=r" (xer_copy) :
+ "r" (regs->gpr[ra]), "r" (regs->gpr[rb]));
+ if (OE(instr))
+ set_xer_ov(regs, (xer_copy & XER_OV) != 0);
+ goto arith_done;
+ }
+#endif
+ case 427: /* divwe */
+ case 939: /* divweo */
+ val = ((long int)SGN_EXT_32(regs->gpr[ra]) << 32) /
+ (long int)SGN_EXT_32(regs->gpr[rb]);
+ if (OE(instr)) {
+ int ov = (L32(regs->gpr[rb]) == 0) ||
+ ((L32(regs->gpr[ra]) == SGN_32_MSK) &&
+ (L32(regs->gpr[rb]) == L32_MSK));
+ if ((val & U32_MSK) == 0)
+ ov |= (val & SGN_32_MSK) != 0;
+ else if ((val & U32_MSK) == U32_MSK)
+ ov |= (val & SGN_32_MSK) == 0;
+ else
+ ov = 1;
+ set_xer_ov(regs, ov);
+ }
+ regs->gpr[rd] = val;
  goto arith_done;
 #ifdef __powerpc64__
  case 457: /* divdu */
- regs->gpr[rd] = regs->gpr[ra] / regs->gpr[rb];
+ case 969: /* divduo */
+ val = regs->gpr[ra] / regs->gpr[rb];
+ if (OE(instr)) { /* divduo */
+ int ov = (regs->gpr[rb] == 0);
+ set_xer_ov(regs, ov);
+ }
+ regs->gpr[rd] = val;
  goto arith_done;
 #endif
  case 459: /* divwu */
- regs->gpr[rd] = (unsigned int) regs->gpr[ra] /
+ case 971: /* divwuo */
+ val = (unsigned int) regs->gpr[ra] /
  (unsigned int) regs->gpr[rb];
+ if (OE(instr)) { /* divwuo */
+ int ov = (L32(regs->gpr[rb]) == 0);
+ set_xer_ov(regs, ov);
+ }
+ regs->gpr[rd] = val;
  goto arith_done;
 #ifdef __powerpc64__
  case 489: /* divd */
- regs->gpr[rd] = (long int) regs->gpr[ra] /
+ case 1001: /* divdo */
+ val = (long int) regs->gpr[ra] /
  (long int) regs->gpr[rb];
+ if (OE(instr)) { /* divdo */
+ int ov = (regs->gpr[rb] == 0) ||
+ ((regs->gpr[ra] == SGN_64_MSK) &&
+ (regs->gpr[rb] == -1ul));
+ set_xer_ov(regs, ov);
+ }
+ regs->gpr[rd] = val;
  goto arith_done;
 #endif
  case 491: /* divw */
- regs->gpr[rd] = (int) regs->gpr[ra] /
- (int) regs->gpr[rb];
+ case 1003: /* divwo */
+ val = (long int)SGN_EXT_32(regs->gpr[ra]) /
+ (long int)SGN_EXT_32(regs->gpr[rb]);
+ if (OE(instr)) {
+ int ov = (L32(regs->gpr[rb]) == 0) ||
+ ((L32(regs->gpr[ra]) == SGN_32_MSK) &&
+ (L32(regs->gpr[rb]) == L32_MSK));
+ set_xer_ov(regs, ov);
+ }
+ regs->gpr[rd] = val;
  goto arith_done;
Paul Mackerras - Sept. 8, 2013, 10:35 p.m.
On Fri, Sep 06, 2013 at 10:13:00AM -0500, Tom Musta wrote:
> To: linuxppc-dev@lists.ozlabs.org
> Subject: [PATCH] powerpc: OE=1 Form Instructions Not Decoded Correctly
> From: Tom Musta <tommusta@gmail.com>
> 
> PowerISA uses instruction bit 21 to indicate that the overflow (OV) bit
> of the XER is to be set, as well as its corresponding sticky bit (SO).
> This patch addresses two defects in the implementation of the PowerISA
> single step code for this category of instructions:  (a) the OE=1 case
> is not correctly accounted for in the case statement for the extended
> opcode handling.  (b) the implementation is not setting XER[OV] and
> XER[SO].

Are you seeing any actual problems arising from the OE=1 instructions
not being emulated?  This code was designed primarily for emulating
instructions in the kernel, which is written in C, and the C compiler
doesn't emit OE=1 instructions -- or at least it didn't in the past.
So, does the impetus for this change come because the C compiler is
now emitting these instructions, or because this code is being used on
non-kernel instructions, or just for completeness?  Your patch
description needs to include answers to these kinds of questions.

Also, you need to indent your code correctly according to
Documentation/CodingStyle.

Paul.
Benjamin Herrenschmidt - Sept. 9, 2013, 10:09 a.m.
On Mon, 2013-09-09 at 08:35 +1000, Paul Mackerras wrote:
> On Fri, Sep 06, 2013 at 10:13:00AM -0500, Tom Musta wrote:
> > To: linuxppc-dev@lists.ozlabs.org
> > Subject: [PATCH] powerpc: OE=1 Form Instructions Not Decoded Correctly
> > From: Tom Musta <tommusta@gmail.com>
> > 
> > PowerISA uses instruction bit 21 to indicate that the overflow (OV) bit
> > of the XER is to be set, as well as its corresponding sticky bit (SO).
> > This patch addresses two defects in the implementation of the PowerISA
> > single step code for this category of instructions:  (a) the OE=1 case
> > is not correctly accounted for in the case statement for the extended
> > opcode handling.  (b) the implementation is not setting XER[OV] and
> > XER[SO].
> 
> Are you seeing any actual problems arising from the OE=1 instructions
> not being emulated?  This code was designed primarily for emulating
> instructions in the kernel, which is written in C, and the C compiler
> doesn't emit OE=1 instructions -- or at least it didn't in the past.
> So, does the impetus for this change come because the C compiler is
> now emitting these instructions, or because this code is being used on
> non-kernel instructions, or just for completeness?  Your patch
> description needs to include answers to these kinds of questions.

Isn't that code occasionally used with uprobes too nowadays  ?

Cheers,
Ben.

> Also, you need to indent your code correctly according to
> Documentation/CodingStyle.
> 
> Paul.
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
Benjamin Herrenschmidt - Sept. 9, 2013, 10:11 a.m.
On Mon, 2013-09-09 at 08:35 +1000, Paul Mackerras wrote:
> 
> Also, you need to indent your code correctly according to
> Documentation/CodingStyle.

Looks like the patch has been mangled by the mailer ... it's in HTML to
begin with :-)

Tom, you need to find a mailer setup that works for sending patches,
basically that sends them in plain text without any mangling of
tabs and spaces and without word wrapping.

I personally use "evolution" under Linux which has a "preformatted"
style mode for that but I'm sure there are plenty of other options.

Cheers,
Ben.
Tom Musta - Sept. 9, 2013, 12:44 p.m.
> Looks like the patch has been mangled by the mailer ... it's in HTML to
> begin with :-)

Frustrating.  I did use a plain text option on the mail client and did a
test send to a few accounts before posting to the mailing list.  The
patch was verified by checkpatch.pl before it got mangled.

I will find a better setup and resubmit.

> Isn't that code occasionally used with uprobes too nowadays  ?

Yes. I believe so.

Tom Musta (tmusta@us.ibm.com)
Senior Software Engineer
Blue Gene Kernel Development
IBM Rochester
(507) 253-4119   (T/L 553-4119)
Tom Musta - Sept. 9, 2013, 8:20 p.m.
> > Isn't that code occasionally used with uprobes too nowadays  ?
>
> Yes. I believe so.

I'm going to back-pedal a little.  I reread code and can connect
single step code to kprobes but not necessarily to uprobes.  So
I am not sure that this code is used with uprobes.
Stephen Rothwell - Sept. 10, 2013, 12:25 a.m.
Hi Tom,

On Mon, 9 Sep 2013 07:44:54 -0500 Tom Musta <tmusta@us.ibm.com> wrote:
>
> > Looks like the patch has been mangled by the mailer ... it's in HTML to
> > begin with :-)
> 
> Frustrating.  I did use a plain text option on the mail client and did a
> test send to a few accounts before posting to the mailing list.  The
> patch was verified by checkpatch.pl before it got mangled.
> 
> I will find a better setup and resubmit.

Also please find a mailer that does *not* respond to the envelope
recipient (linuxppc-dev-bounces+tmusta=us.ibm.com@lists.ozlabs.org in
this case) as that just tends to irritate list owners.
Ananth N Mavinakayanahalli - Sept. 11, 2013, 4:37 a.m.
On Mon, Sep 09, 2013 at 03:20:58PM -0500, Tom Musta wrote:
> > > Isn't that code occasionally used with uprobes too nowadays  ?
> >
> > Yes. I believe so.
> 
> I'm going to back-pedal a little.  I reread code and can connect
> single step code to kprobes but not necessarily to uprobes.  So
> I am not sure that this code is used with uprobes.

Yes, emulate_step() is used for uprobes too.

Ananth

Patch

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index d220b88..a4f343d 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -31,6 +31,13 @@  extern char system_call_common[];
 #define XER_OV 0x40000000U
 #define XER_CA 0x20000000U

+#define U32_MSK         0xFFFFFFFF00000000ul
+#define L32_MSK         0x00000000FFFFFFFFul
+#define SGN_32_MSK      0x0000000080000000ul
+#define SGN_64_MSK      0x8000000000000000ul
+#define SGN_EXT_32(x)   (((x) & SGN_32_MSK) ? ((x) | U32_MSK) : ((x) &
L32_MSK))
+#define L32(x)          ((x) & L32_MSK)
+
 #ifdef CONFIG_PPC_FPU
 /*