diff mbox series

[v4,2/4] powerpc/sstep: Support VSX vector paired storage access instructions

Message ID 20201008072726.233086-3-ravi.bangoria@linux.ibm.com (mailing list archive)
State Superseded
Headers show
Series powerpc/sstep: VSX 32-byte vector paired load/store instructions | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch powerpc/merge (d1def5df359f3f1882cc29d8baa5cd2a4861a6c6)
snowpatch_ozlabs/checkpatch warning total: 0 errors, 0 warnings, 1 checks, 244 lines checked
snowpatch_ozlabs/needsstable success Patch has no Fixes tags

Commit Message

Ravi Bangoria Oct. 8, 2020, 7:27 a.m. UTC
From: Balamuruhan S <bala24@linux.ibm.com>

VSX Vector Paired instructions loads/stores an octword (32 bytes)
from/to storage into two sequential VSRs. Add emulation support
for these new instructions:
  * Load VSX Vector Paired (lxvp)
  * Load VSX Vector Paired Indexed (lxvpx)
  * Prefixed Load VSX Vector Paired (plxvp)
  * Store VSX Vector Paired (stxvp)
  * Store VSX Vector Paired Indexed (stxvpx)
  * Prefixed Store VSX Vector Paired (pstxvp)

Suggested-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Balamuruhan S <bala24@linux.ibm.com>
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
---
 arch/powerpc/lib/sstep.c | 146 +++++++++++++++++++++++++++++++++------
 1 file changed, 125 insertions(+), 21 deletions(-)

Comments

kernel test robot Oct. 8, 2020, 11:24 a.m. UTC | #1
Hi Ravi,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on powerpc/next]
[also build test ERROR on v5.9-rc8 next-20201007]
[cannot apply to mpe/next scottwood/next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Ravi-Bangoria/powerpc-sstep-VSX-32-byte-vector-paired-load-store-instructions/20201008-153614
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-g5_defconfig (attached as .config)
compiler: powerpc64-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/55def6779849f9aec057f405abf1cd98a8674b4f
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Ravi-Bangoria/powerpc-sstep-VSX-32-byte-vector-paired-load-store-instructions/20201008-153614
        git checkout 55def6779849f9aec057f405abf1cd98a8674b4f
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=powerpc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   arch/powerpc/lib/sstep.c: In function 'analyse_instr':
>> arch/powerpc/lib/sstep.c:2901:15: error: implicit declaration of function 'VSX_REGISTER_XTP'; did you mean 'H_REGISTER_SMR'? [-Werror=implicit-function-declaration]
    2901 |     op->reg = VSX_REGISTER_XTP(rd);
         |               ^~~~~~~~~~~~~~~~
         |               H_REGISTER_SMR
   cc1: all warnings being treated as errors

vim +2901 arch/powerpc/lib/sstep.c

  2815	
  2816	#ifdef __powerpc64__
  2817		case 62:	/* std[u] */
  2818			op->ea = dsform_ea(word, regs);
  2819			switch (word & 3) {
  2820			case 0:		/* std */
  2821				op->type = MKOP(STORE, 0, 8);
  2822				break;
  2823			case 1:		/* stdu */
  2824				op->type = MKOP(STORE, UPDATE, 8);
  2825				break;
  2826			case 2:		/* stq */
  2827				if (!(rd & 1))
  2828					op->type = MKOP(STORE, 0, 16);
  2829				break;
  2830			}
  2831			break;
  2832		case 1: /* Prefixed instructions */
  2833			if (!cpu_has_feature(CPU_FTR_ARCH_31))
  2834				return -1;
  2835	
  2836			prefix_r = GET_PREFIX_R(word);
  2837			ra = GET_PREFIX_RA(suffix);
  2838			op->update_reg = ra;
  2839			rd = (suffix >> 21) & 0x1f;
  2840			op->reg = rd;
  2841			op->val = regs->gpr[rd];
  2842	
  2843			suffixopcode = get_op(suffix);
  2844			prefixtype = (word >> 24) & 0x3;
  2845			switch (prefixtype) {
  2846			case 0: /* Type 00  Eight-Byte Load/Store */
  2847				if (prefix_r && ra)
  2848					break;
  2849				op->ea = mlsd_8lsd_ea(word, suffix, regs);
  2850				switch (suffixopcode) {
  2851				case 41:	/* plwa */
  2852					op->type = MKOP(LOAD, PREFIXED | SIGNEXT, 4);
  2853					break;
  2854				case 42:        /* plxsd */
  2855					op->reg = rd + 32;
  2856					op->type = MKOP(LOAD_VSX, PREFIXED, 8);
  2857					op->element_size = 8;
  2858					op->vsx_flags = VSX_CHECK_VEC;
  2859					break;
  2860				case 43:	/* plxssp */
  2861					op->reg = rd + 32;
  2862					op->type = MKOP(LOAD_VSX, PREFIXED, 4);
  2863					op->element_size = 8;
  2864					op->vsx_flags = VSX_FPCONV | VSX_CHECK_VEC;
  2865					break;
  2866				case 46:	/* pstxsd */
  2867					op->reg = rd + 32;
  2868					op->type = MKOP(STORE_VSX, PREFIXED, 8);
  2869					op->element_size = 8;
  2870					op->vsx_flags = VSX_CHECK_VEC;
  2871					break;
  2872				case 47:	/* pstxssp */
  2873					op->reg = rd + 32;
  2874					op->type = MKOP(STORE_VSX, PREFIXED, 4);
  2875					op->element_size = 8;
  2876					op->vsx_flags = VSX_FPCONV | VSX_CHECK_VEC;
  2877					break;
  2878				case 51:	/* plxv1 */
  2879					op->reg += 32;
  2880					fallthrough;
  2881				case 50:	/* plxv0 */
  2882					op->type = MKOP(LOAD_VSX, PREFIXED, 16);
  2883					op->element_size = 16;
  2884					op->vsx_flags = VSX_CHECK_VEC;
  2885					break;
  2886				case 55:	/* pstxv1 */
  2887					op->reg = rd + 32;
  2888					fallthrough;
  2889				case 54:	/* pstxv0 */
  2890					op->type = MKOP(STORE_VSX, PREFIXED, 16);
  2891					op->element_size = 16;
  2892					op->vsx_flags = VSX_CHECK_VEC;
  2893					break;
  2894				case 56:        /* plq */
  2895					op->type = MKOP(LOAD, PREFIXED, 16);
  2896					break;
  2897				case 57:	/* pld */
  2898					op->type = MKOP(LOAD, PREFIXED, 8);
  2899					break;
  2900				case 58:        /* plxvp */
> 2901					op->reg = VSX_REGISTER_XTP(rd);
  2902					op->type = MKOP(LOAD_VSX, PREFIXED, 32);
  2903					op->element_size = 32;
  2904					break;
  2905				case 60:        /* stq */
  2906					op->type = MKOP(STORE, PREFIXED, 16);
  2907					break;
  2908				case 61:	/* pstd */
  2909					op->type = MKOP(STORE, PREFIXED, 8);
  2910					break;
  2911				case 62:        /* pstxvp */
  2912					op->reg = VSX_REGISTER_XTP(rd);
  2913					op->type = MKOP(STORE_VSX, PREFIXED, 32);
  2914					op->element_size = 32;
  2915					break;
  2916				}
  2917				break;
  2918			case 1: /* Type 01 Eight-Byte Register-to-Register */
  2919				break;
  2920			case 2: /* Type 10 Modified Load/Store */
  2921				if (prefix_r && ra)
  2922					break;
  2923				op->ea = mlsd_8lsd_ea(word, suffix, regs);
  2924				switch (suffixopcode) {
  2925				case 32:	/* plwz */
  2926					op->type = MKOP(LOAD, PREFIXED, 4);
  2927					break;
  2928				case 34:	/* plbz */
  2929					op->type = MKOP(LOAD, PREFIXED, 1);
  2930					break;
  2931				case 36:	/* pstw */
  2932					op->type = MKOP(STORE, PREFIXED, 4);
  2933					break;
  2934				case 38:	/* pstb */
  2935					op->type = MKOP(STORE, PREFIXED, 1);
  2936					break;
  2937				case 40:	/* plhz */
  2938					op->type = MKOP(LOAD, PREFIXED, 2);
  2939					break;
  2940				case 42:	/* plha */
  2941					op->type = MKOP(LOAD, PREFIXED | SIGNEXT, 2);
  2942					break;
  2943				case 44:	/* psth */
  2944					op->type = MKOP(STORE, PREFIXED, 2);
  2945					break;
  2946				case 48:        /* plfs */
  2947					op->type = MKOP(LOAD_FP, PREFIXED | FPCONV, 4);
  2948					break;
  2949				case 50:        /* plfd */
  2950					op->type = MKOP(LOAD_FP, PREFIXED, 8);
  2951					break;
  2952				case 52:        /* pstfs */
  2953					op->type = MKOP(STORE_FP, PREFIXED | FPCONV, 4);
  2954					break;
  2955				case 54:        /* pstfd */
  2956					op->type = MKOP(STORE_FP, PREFIXED, 8);
  2957					break;
  2958				}
  2959				break;
  2960			case 3: /* Type 11 Modified Register-to-Register */
  2961				break;
  2962			}
  2963	#endif /* __powerpc64__ */
  2964	
  2965		}
  2966	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff mbox series

Patch

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index e6242744c71b..e39ee1651636 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -32,6 +32,10 @@  extern char system_call_vectored_emulate[];
 #define XER_OV32	0x00080000U
 #define XER_CA32	0x00040000U
 
+#ifdef CONFIG_VSX
+#define VSX_REGISTER_XTP(rd)   ((((rd) & 1) << 5) | ((rd) & 0xfe))
+#endif
+
 #ifdef CONFIG_PPC_FPU
 /*
  * Functions in ldstfp.S
@@ -279,6 +283,19 @@  static nokprobe_inline void do_byte_reverse(void *ptr, int nb)
 		up[1] = tmp;
 		break;
 	}
+	case 32: {
+		unsigned long *up = (unsigned long *)ptr;
+		unsigned long tmp;
+
+		tmp = byterev_8(up[0]);
+		up[0] = byterev_8(up[3]);
+		up[3] = tmp;
+		tmp = byterev_8(up[2]);
+		up[2] = byterev_8(up[1]);
+		up[1] = tmp;
+		break;
+	}
+
 #endif
 	default:
 		WARN_ON_ONCE(1);
@@ -709,6 +726,8 @@  void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg,
 	reg->d[0] = reg->d[1] = 0;
 
 	switch (op->element_size) {
+	case 32:
+		/* [p]lxvp[x] */
 	case 16:
 		/* whole vector; lxv[x] or lxvl[l] */
 		if (size == 0)
@@ -717,7 +736,7 @@  void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg,
 		if (IS_LE && (op->vsx_flags & VSX_LDLEFT))
 			rev = !rev;
 		if (rev)
-			do_byte_reverse(reg, 16);
+			do_byte_reverse(reg, size);
 		break;
 	case 8:
 		/* scalar loads, lxvd2x, lxvdsx */
@@ -793,6 +812,20 @@  void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg,
 	size = GETSIZE(op->type);
 
 	switch (op->element_size) {
+	case 32:
+		/* [p]stxvp[x] */
+		if (size == 0)
+			break;
+		if (rev) {
+			/* reverse 32 bytes */
+			buf.d[0] = byterev_8(reg->d[3]);
+			buf.d[1] = byterev_8(reg->d[2]);
+			buf.d[2] = byterev_8(reg->d[1]);
+			buf.d[3] = byterev_8(reg->d[0]);
+			reg = &buf;
+		}
+		memcpy(mem, reg, size);
+		break;
 	case 16:
 		/* stxv, stxvx, stxvl, stxvll */
 		if (size == 0)
@@ -861,28 +894,43 @@  static nokprobe_inline int do_vsx_load(struct instruction_op *op,
 				       bool cross_endian)
 {
 	int reg = op->reg;
-	u8 mem[16];
-	union vsx_reg buf;
+	int i, j, nr_vsx_regs;
+	u8 mem[32];
+	union vsx_reg buf[2];
 	int size = GETSIZE(op->type);
 
 	if (!address_ok(regs, ea, size) || copy_mem_in(mem, ea, size, regs))
 		return -EFAULT;
 
-	emulate_vsx_load(op, &buf, mem, cross_endian);
+	nr_vsx_regs = size / sizeof(__vector128);
+	emulate_vsx_load(op, buf, mem, cross_endian);
 	preempt_disable();
 	if (reg < 32) {
 		/* FP regs + extensions */
 		if (regs->msr & MSR_FP) {
-			load_vsrn(reg, &buf);
+			for (i = 0; i < nr_vsx_regs; i++) {
+				j = IS_LE ? nr_vsx_regs - i - 1 : i;
+				load_vsrn(reg + i, &buf[j].v);
+			}
 		} else {
-			current->thread.fp_state.fpr[reg][0] = buf.d[0];
-			current->thread.fp_state.fpr[reg][1] = buf.d[1];
+			for (i = 0; i < nr_vsx_regs; i++) {
+				j = IS_LE ? nr_vsx_regs - i - 1 : i;
+				current->thread.fp_state.fpr[reg + i][0] = buf[j].d[0];
+				current->thread.fp_state.fpr[reg + i][1] = buf[j].d[1];
+			}
 		}
 	} else {
-		if (regs->msr & MSR_VEC)
-			load_vsrn(reg, &buf);
-		else
-			current->thread.vr_state.vr[reg - 32] = buf.v;
+		if (regs->msr & MSR_VEC) {
+			for (i = 0; i < nr_vsx_regs; i++) {
+				j = IS_LE ? nr_vsx_regs - i - 1 : i;
+				load_vsrn(reg + i, &buf[j].v);
+			}
+		} else {
+			for (i = 0; i < nr_vsx_regs; i++) {
+				j = IS_LE ? nr_vsx_regs - i - 1 : i;
+				current->thread.vr_state.vr[reg - 32 + i] = buf[j].v;
+			}
+		}
 	}
 	preempt_enable();
 	return 0;
@@ -893,30 +941,45 @@  static nokprobe_inline int do_vsx_store(struct instruction_op *op,
 					bool cross_endian)
 {
 	int reg = op->reg;
-	u8 mem[16];
-	union vsx_reg buf;
+	int i, j, nr_vsx_regs;
+	u8 mem[32];
+	union vsx_reg buf[2];
 	int size = GETSIZE(op->type);
 
 	if (!address_ok(regs, ea, size))
 		return -EFAULT;
 
+	nr_vsx_regs = size / sizeof(__vector128);
 	preempt_disable();
 	if (reg < 32) {
 		/* FP regs + extensions */
 		if (regs->msr & MSR_FP) {
-			store_vsrn(reg, &buf);
+			for (i = 0; i < nr_vsx_regs; i++) {
+				j = IS_LE ? nr_vsx_regs - i - 1 : i;
+				store_vsrn(reg + i, &buf[j].v);
+			}
 		} else {
-			buf.d[0] = current->thread.fp_state.fpr[reg][0];
-			buf.d[1] = current->thread.fp_state.fpr[reg][1];
+			for (i = 0; i < nr_vsx_regs; i++) {
+				j = IS_LE ? nr_vsx_regs - i - 1 : i;
+				buf[j].d[0] = current->thread.fp_state.fpr[reg + i][0];
+				buf[j].d[1] = current->thread.fp_state.fpr[reg + i][1];
+			}
 		}
 	} else {
-		if (regs->msr & MSR_VEC)
-			store_vsrn(reg, &buf);
-		else
-			buf.v = current->thread.vr_state.vr[reg - 32];
+		if (regs->msr & MSR_VEC) {
+			for (i = 0; i < nr_vsx_regs; i++) {
+				j = IS_LE ? nr_vsx_regs - i - 1 : i;
+				store_vsrn(reg + i, &buf[j].v);
+			}
+		} else {
+			for (i = 0; i < nr_vsx_regs; i++) {
+				j = IS_LE ? nr_vsx_regs - i - 1 : i;
+				buf[j].v = current->thread.vr_state.vr[reg - 32 + i];
+			}
+		}
 	}
 	preempt_enable();
-	emulate_vsx_store(op, &buf, mem, cross_endian);
+	emulate_vsx_store(op, buf, mem, cross_endian);
 	return  copy_mem_out(mem, ea, size, regs);
 }
 #endif /* CONFIG_VSX */
@@ -2403,6 +2466,14 @@  int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
 			op->vsx_flags = VSX_SPLAT;
 			break;
 
+		case 333:       /* lxvpx */
+			if (!cpu_has_feature(CPU_FTR_ARCH_31))
+				return -1;
+			op->reg = VSX_REGISTER_XTP(rd);
+			op->type = MKOP(LOAD_VSX, 0, 32);
+			op->element_size = 32;
+			break;
+
 		case 364:	/* lxvwsx */
 			op->reg = rd | ((word & 1) << 5);
 			op->type = MKOP(LOAD_VSX, 0, 4);
@@ -2431,6 +2502,13 @@  int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
 				VSX_CHECK_VEC;
 			break;
 		}
+		case 461:       /* stxvpx */
+			if (!cpu_has_feature(CPU_FTR_ARCH_31))
+				return -1;
+			op->reg = VSX_REGISTER_XTP(rd);
+			op->type = MKOP(STORE_VSX, 0, 32);
+			op->element_size = 32;
+			break;
 		case 524:	/* lxsspx */
 			op->reg = rd | ((word & 1) << 5);
 			op->type = MKOP(LOAD_VSX, 0, 4);
@@ -2672,6 +2750,22 @@  int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
 #endif
 
 #ifdef CONFIG_VSX
+	case 6:
+		if (!cpu_has_feature(CPU_FTR_ARCH_31))
+			return -1;
+		op->ea = dqform_ea(word, regs);
+		op->reg = VSX_REGISTER_XTP(rd);
+		op->element_size = 32;
+		switch (word & 0xf) {
+		case 0:         /* lxvp */
+			op->type = MKOP(LOAD_VSX, 0, 32);
+			break;
+		case 1:         /* stxvp */
+			op->type = MKOP(STORE_VSX, 0, 32);
+			break;
+		}
+		break;
+
 	case 61:	/* stfdp, lxv, stxsd, stxssp, stxv */
 		switch (word & 7) {
 		case 0:		/* stfdp with LSB of DS field = 0 */
@@ -2803,12 +2897,22 @@  int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
 			case 57:	/* pld */
 				op->type = MKOP(LOAD, PREFIXED, 8);
 				break;
+			case 58:        /* plxvp */
+				op->reg = VSX_REGISTER_XTP(rd);
+				op->type = MKOP(LOAD_VSX, PREFIXED, 32);
+				op->element_size = 32;
+				break;
 			case 60:        /* stq */
 				op->type = MKOP(STORE, PREFIXED, 16);
 				break;
 			case 61:	/* pstd */
 				op->type = MKOP(STORE, PREFIXED, 8);
 				break;
+			case 62:        /* pstxvp */
+				op->reg = VSX_REGISTER_XTP(rd);
+				op->type = MKOP(STORE_VSX, PREFIXED, 32);
+				op->element_size = 32;
+				break;
 			}
 			break;
 		case 1: /* Type 01 Eight-Byte Register-to-Register */