Message ID | 20200506034050.24806-24-jniethe5@gmail.com (mailing list archive) |
---|---|
State | Accepted |
Commit | 650b55b707fdfa764e9f2b81314d3eb4216fb962 |
Headers | show |
Series | Initial Prefixed Instruction support | expand |
Context | Check | Description |
---|---|---|
snowpatch_ozlabs/apply_patch | success | Successfully applied on branch powerpc/merge (1bc92fe3175eb26ff37e580c0383d7a9abe06835) |
snowpatch_ozlabs/checkpatch | warning | total: 0 errors, 0 warnings, 4 checks, 397 lines checked |
snowpatch_ozlabs/needsstable | success | Patch has no Fixes tags |
Hi mpe, Relating to your message on [PATCH v8 16/30] powerpc: Define and use __get_user_instr{,inatomic}() - could you please take this. diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -106,6 +106,24 @@ static inline int __access_ok(unsigned long addr, unsigned long size, __put_user_nosleep((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) #ifdef __powerpc64__ +#define get_user_instr(x, ptr) \ +({ \ + long __gui_ret = 0; \ + unsigned long __gui_ptr = (unsigned long)ptr; \ + struct ppc_inst __gui_inst; \ + unsigned int prefix, suffix; \ + __gui_ret = get_user(prefix, (unsigned int __user *)__gui_ptr); \ + if (!__gui_ret && (prefix >> 26) == OP_PREFIX) { \ + __gui_ret = get_user(suffix, \ + (unsigned int __user *)__gui_ptr + 1); \ + __gui_inst = ppc_inst_prefix(prefix, suffix); \ + } else { \ + __gui_inst = ppc_inst(prefix); \ + } \ + (x) = __gui_inst; \ + __gui_ret; \ +}) + #define __get_user_instr(x, ptr) \ ({ \ long __gui_ret = 0; \ @@ -142,6 +160,8 @@ static inline int __access_ok(unsigned long addr, unsigned long size, __gui_ret; \ }) #else +#define get_user_instr(x, ptr) \ + get_user((x).val, (u32 *)(ptr)) #define __get_user_instr(x, ptr) \ __get_user_nocheck((x).val, (u32 *)(ptr), sizeof(u32), true) #define __get_user_instr_inatomic(x, ptr) \
Le 06/05/2020 à 05:40, Jordan Niethe a écrit : > For powerpc64, redefine the ppc_inst type so both word and prefixed > instructions can be represented. On powerpc32 the type will remain the > same. Update places which had assumed instructions to be 4 bytes long. > > Reviewed-by: Alistair Popple <alistair@popple.id.au> > Signed-off-by: Jordan Niethe <jniethe5@gmail.com> > --- > v4: New to series > v5: - Distinguish normal instructions from prefixed instructions with a > 0xff marker for the suffix. > - __patch_instruction() using std for prefixed instructions > v6: - Return false instead of 0 in ppc_inst_prefixed() > - Fix up types for ppc32 so it compiles > - remove ppc_inst_write() > - __patching_instruction(): move flush out of condition > v8: - style > - Define and use OP_PREFIX instead of '1' (back from v3) > - __patch_instruction() fix for big endian > --- > arch/powerpc/include/asm/inst.h | 69 ++++++++++++++++++++++++--- > arch/powerpc/include/asm/kprobes.h | 2 +- > arch/powerpc/include/asm/ppc-opcode.h | 3 ++ > arch/powerpc/include/asm/uaccess.h | 40 +++++++++++++++- > arch/powerpc/include/asm/uprobes.h | 2 +- > arch/powerpc/kernel/crash_dump.c | 2 +- > arch/powerpc/kernel/optprobes.c | 42 ++++++++-------- > arch/powerpc/kernel/optprobes_head.S | 3 ++ > arch/powerpc/lib/code-patching.c | 19 ++++++-- > arch/powerpc/lib/feature-fixups.c | 5 +- > arch/powerpc/lib/inst.c | 41 ++++++++++++++++ > arch/powerpc/lib/sstep.c | 4 +- > arch/powerpc/xmon/xmon.c | 4 +- > arch/powerpc/xmon/xmon_bpts.S | 2 + > 14 files changed, 200 insertions(+), 38 deletions(-) > > diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h > index 2f3c9d5bcf7c..7868b80b610e 100644 > --- a/arch/powerpc/include/asm/inst.h > +++ b/arch/powerpc/include/asm/inst.h > @@ -2,29 +2,79 @@ > #ifndef _ASM_INST_H > #define _ASM_INST_H > > +#include <asm/ppc-opcode.h> > /* > * Instruction data type for POWER > */ > > struct ppc_inst { > u32 val; > +#ifdef __powerpc64__ CONFIG_PPC64 should be used instead. This is also reported by checkpatch. > + u32 suffix; > +#endif /* __powerpc64__ */ > } __packed; > > -#define ppc_inst(x) ((struct ppc_inst){ .val = x }) > - > static inline u32 ppc_inst_val(struct ppc_inst x) > { > return x.val; > } > > -static inline int ppc_inst_len(struct ppc_inst x) > +static inline int ppc_inst_primary_opcode(struct ppc_inst x) > { > - return sizeof(struct ppc_inst); > + return ppc_inst_val(x) >> 26; What about using get_op() from asm/disassemble.h instead of hardcodiing ? > } > > -static inline int ppc_inst_primary_opcode(struct ppc_inst x) > +#ifdef __powerpc64__ Use CONFIG_PPC64 > +#define ppc_inst(x) ((struct ppc_inst){ .val = (x), .suffix = 0xff }) > + > +#define ppc_inst_prefix(x, y) ((struct ppc_inst){ .val = (x), .suffix = (y) }) > + > +static inline u32 ppc_inst_suffix(struct ppc_inst x) > { > - return ppc_inst_val(x) >> 26; > + return x.suffix; > +} > + > +static inline bool ppc_inst_prefixed(struct ppc_inst x) > +{ > + return (ppc_inst_primary_opcode(x) == 1) && ppc_inst_suffix(x) != 0xff; > +} > + > +static inline struct ppc_inst ppc_inst_swab(struct ppc_inst x) > +{ > + return ppc_inst_prefix(swab32(ppc_inst_val(x)), > + swab32(ppc_inst_suffix(x))); > +} > + > +static inline struct ppc_inst ppc_inst_read(const struct ppc_inst *ptr) > +{ > + u32 val, suffix; > + > + val = *(u32 *)ptr; > + if ((val >> 26) == 1) { Don't hardcode, use ppc_inst_primary_opcode() and compare it to OP_PREFIX Or use get_op() from asm/disassemble.h > + suffix = *((u32 *)ptr + 1); > + return ppc_inst_prefix(val, suffix); > + } else { > + return ppc_inst(val); > + } > +} > + > +static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y) > +{ > + return *(u64 *)&x == *(u64 *)&y; > +} > + > +#else > + > +#define ppc_inst(x) ((struct ppc_inst){ .val = x }) > + > +static inline bool ppc_inst_prefixed(struct ppc_inst x) > +{ > + return false; > +} > + > +static inline u32 ppc_inst_suffix(struct ppc_inst x) > +{ > + return 0; > } > > static inline struct ppc_inst ppc_inst_swab(struct ppc_inst x) > @@ -42,6 +92,13 @@ static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y) > return ppc_inst_val(x) == ppc_inst_val(y); > } > > +#endif /* __powerpc64__ */ > + > +static inline int ppc_inst_len(struct ppc_inst x) > +{ > + return (ppc_inst_prefixed(x)) ? 8 : 4; > +} > + > int probe_user_read_inst(struct ppc_inst *inst, > struct ppc_inst *nip); > int probe_kernel_read_inst(struct ppc_inst *inst, > diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h > index 66b3f2983b22..4fc0e15e23a5 100644 > --- a/arch/powerpc/include/asm/kprobes.h > +++ b/arch/powerpc/include/asm/kprobes.h > @@ -43,7 +43,7 @@ extern kprobe_opcode_t optprobe_template_ret[]; > extern kprobe_opcode_t optprobe_template_end[]; > > /* Fixed instruction size for powerpc */ > -#define MAX_INSN_SIZE 1 > +#define MAX_INSN_SIZE 2 > #define MAX_OPTIMIZED_LENGTH sizeof(kprobe_opcode_t) /* 4 bytes */ > #define MAX_OPTINSN_SIZE (optprobe_template_end - optprobe_template_entry) > #define RELATIVEJUMP_SIZE sizeof(kprobe_opcode_t) /* 4 bytes */ > diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h > index c1df75edde44..2a39c716c343 100644 > --- a/arch/powerpc/include/asm/ppc-opcode.h > +++ b/arch/powerpc/include/asm/ppc-opcode.h > @@ -158,6 +158,9 @@ > /* VMX Vector Store Instructions */ > #define OP_31_XOP_STVX 231 > > +/* Prefixed Instructions */ > +#define OP_PREFIX 1 > + > #define OP_31 31 > #define OP_LWZ 32 > #define OP_STFS 52 > diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h > index c0a35e4586a5..217897927926 100644 > --- a/arch/powerpc/include/asm/uaccess.h > +++ b/arch/powerpc/include/asm/uaccess.h > @@ -105,11 +105,49 @@ static inline int __access_ok(unsigned long addr, unsigned long size, > #define __put_user_inatomic(x, ptr) \ > __put_user_nosleep((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) > > +#ifdef __powerpc64__ Replace by CONFIG_PPC64 > +#define __get_user_instr(x, ptr) \ > +({ \ > + long __gui_ret = 0; \ > + unsigned long __gui_ptr = (unsigned long)ptr; \ > + struct ppc_inst __gui_inst; \ > + unsigned int prefix, suffix; \ > + __gui_ret = __get_user(prefix, (unsigned int __user *)__gui_ptr); \ __get_user() can be costly especially with KUAP. I think you should perform a 64 bits read and fallback on a 32 bits read only if the 64 bits read failed. > + if (!__gui_ret && (prefix >> 26) == OP_PREFIX) { \ What about using get_op() from asm/disassemble.h instead of hardcodiing ? > + __gui_ret = __get_user(suffix, \ > + (unsigned int __user *)__gui_ptr + 1); \ > + __gui_inst = ppc_inst_prefix(prefix, suffix); \ > + } else { \ > + __gui_inst = ppc_inst(prefix); \ > + } \ > + (x) = __gui_inst; \ > + __gui_ret; \ > +}) > + > +#define __get_user_instr_inatomic(x, ptr) \ > +({ \ > + long __gui_ret = 0; \ > + unsigned long __gui_ptr = (unsigned long)ptr; \ > + struct ppc_inst __gui_inst; \ > + unsigned int prefix, suffix; \ > + __gui_ret = __get_user_inatomic(prefix, (unsigned int __user *)__gui_ptr); \ Same > + if (!__gui_ret && (prefix >> 26) == OP_PREFIX) { \ > + __gui_ret = __get_user_inatomic(suffix, \ > + (unsigned int __user *)__gui_ptr + 1); \ > + __gui_inst = ppc_inst_prefix(prefix, suffix); \ > + } else { \ > + __gui_inst = ppc_inst(prefix); \ > + } \ > + (x) = __gui_inst; \ > + __gui_ret; \ > +}) > +#else > #define __get_user_instr(x, ptr) \ > __get_user_nocheck((x).val, (u32 *)(ptr), sizeof(u32), true) > - > #define __get_user_instr_inatomic(x, ptr) \ > __get_user_nosleep((x).val, (u32 *)(ptr), sizeof(u32)) > +#endif > + > extern long __put_user_bad(void); > > /* > diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h > index 7e3b329ba2d3..5bf65f5d44a9 100644 > --- a/arch/powerpc/include/asm/uprobes.h > +++ b/arch/powerpc/include/asm/uprobes.h > @@ -15,7 +15,7 @@ > > typedef ppc_opcode_t uprobe_opcode_t; > > -#define MAX_UINSN_BYTES 4 > +#define MAX_UINSN_BYTES 8 > #define UPROBE_XOL_SLOT_BYTES (MAX_UINSN_BYTES) > > /* The following alias is needed for reference from arch-agnostic code */ > diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c > index 72bafb47e757..735e89337398 100644 > --- a/arch/powerpc/kernel/crash_dump.c > +++ b/arch/powerpc/kernel/crash_dump.c > @@ -46,7 +46,7 @@ static void __init create_trampoline(unsigned long addr) > * two instructions it doesn't require any registers. > */ > patch_instruction(p, ppc_inst(PPC_INST_NOP)); > - patch_branch(++p, addr + PHYSICAL_START, 0); > + patch_branch((void *)p + 4, addr + PHYSICAL_START, 0); > } > > void __init setup_kdump_trampoline(void) > diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c > index 52c1ab3f85aa..a8e66603d12b 100644 > --- a/arch/powerpc/kernel/optprobes.c > +++ b/arch/powerpc/kernel/optprobes.c > @@ -162,43 +162,43 @@ void patch_imm32_load_insns(unsigned int val, kprobe_opcode_t *addr) > > /* > * Generate instructions to load provided immediate 64-bit value > - * to register 'r3' and patch these instructions at 'addr'. > + * to register 'reg' and patch these instructions at 'addr'. > */ > -void patch_imm64_load_insns(unsigned long val, kprobe_opcode_t *addr) > +void patch_imm64_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr) I think this change should go in a separate patch. > { > - /* lis r3,(op)@highest */ > + /* lis reg,(op)@highest */ > patch_instruction((struct ppc_inst *)addr, > - ppc_inst(PPC_INST_ADDIS | ___PPC_RT(3) | > + ppc_inst(PPC_INST_ADDIS | ___PPC_RT(reg) | > ((val >> 48) & 0xffff))); > addr++; > > - /* ori r3,r3,(op)@higher */ > + /* ori reg,reg,(op)@higher */ > patch_instruction((struct ppc_inst *)addr, > - ppc_inst(PPC_INST_ORI | ___PPC_RA(3) | > - ___PPC_RS(3) | ((val >> 32) & 0xffff))); > + ppc_inst(PPC_INST_ORI | ___PPC_RA(reg) | > + ___PPC_RS(reg) | ((val >> 32) & 0xffff))); > addr++; > > - /* rldicr r3,r3,32,31 */ > + /* rldicr reg,reg,32,31 */ > patch_instruction((struct ppc_inst *)addr, > - ppc_inst(PPC_INST_RLDICR | ___PPC_RA(3) | > - ___PPC_RS(3) | __PPC_SH64(32) | __PPC_ME64(31))); > + ppc_inst(PPC_INST_RLDICR | ___PPC_RA(reg) | > + ___PPC_RS(reg) | __PPC_SH64(32) | __PPC_ME64(31))); > addr++; > > - /* oris r3,r3,(op)@h */ > + /* oris reg,reg,(op)@h */ > patch_instruction((struct ppc_inst *)addr, > - ppc_inst(PPC_INST_ORIS | ___PPC_RA(3) | > - ___PPC_RS(3) | ((val >> 16) & 0xffff))); > + ppc_inst(PPC_INST_ORIS | ___PPC_RA(reg) | > + ___PPC_RS(reg) | ((val >> 16) & 0xffff))); > addr++; > > - /* ori r3,r3,(op)@l */ > + /* ori reg,reg,(op)@l */ > patch_instruction((struct ppc_inst *)addr, > - ppc_inst(PPC_INST_ORI | ___PPC_RA(3) | > - ___PPC_RS(3) | (val & 0xffff))); > + ppc_inst(PPC_INST_ORI | ___PPC_RA(reg) | > + ___PPC_RS(reg) | (val & 0xffff))); > } > > int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) > { > - struct ppc_inst branch_op_callback, branch_emulate_step; > + struct ppc_inst branch_op_callback, branch_emulate_step, temp; > kprobe_opcode_t *op_callback_addr, *emulate_step_addr, *buff; > long b_offset; > unsigned long nip, size; > @@ -249,7 +249,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) > * Fixup the template with instructions to: > * 1. load the address of the actual probepoint > */ > - patch_imm64_load_insns((unsigned long)op, buff + TMPL_OP_IDX); > + patch_imm64_load_insns((unsigned long)op, 3, buff + TMPL_OP_IDX); > > /* > * 2. branch to optimized_callback() and emulate_step() > @@ -282,7 +282,11 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) > /* > * 3. load instruction to be emulated into relevant register, and > */ > - patch_imm32_load_insns(*p->ainsn.insn, buff + TMPL_INSN_IDX); > + temp = ppc_inst_read((struct ppc_inst *)p->ainsn.insn); > + patch_imm64_load_insns(ppc_inst_val(temp) | > + ((u64)ppc_inst_suffix(temp) << 32), > + 4, So now we are also using r4 ? Any explanation somewhere on the way it works ? This change seems unrelated to this patch, nothing in the description about it. Can we suddenly use a new register without problem ? > + buff + TMPL_INSN_IDX); What's the point with splitting this line in 4 lines ? Can't it fit in 2 lines ? > > /* > * 4. branch back from trampoline > diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S > index cf383520843f..ff8ba4d3824d 100644 > --- a/arch/powerpc/kernel/optprobes_head.S > +++ b/arch/powerpc/kernel/optprobes_head.S > @@ -94,6 +94,9 @@ optprobe_template_insn: > /* 2, Pass instruction to be emulated in r4 */ > nop > nop > + nop > + nop > + nop > > .global optprobe_template_call_emulate > optprobe_template_call_emulate: > diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c > index d946f7d6bb32..58b67b62d5d3 100644 > --- a/arch/powerpc/lib/code-patching.c > +++ b/arch/powerpc/lib/code-patching.c > @@ -24,13 +24,24 @@ static int __patch_instruction(struct ppc_inst *exec_addr, struct ppc_inst instr > { > int err = 0; > > - __put_user_asm(ppc_inst_val(instr), patch_addr, err, "stw"); > - if (err) > - return err; > + if (!ppc_inst_prefixed(instr)) { > + __put_user_asm(ppc_inst_val(instr), patch_addr, err, "stw"); > + if (err) > + return err; This test should remain outside of the if/else, it doesn't need to be duplicated. > + } else { > +#ifdef CONFIG_CPU_LITTLE_ENDIAN > + __put_user_asm((u64)ppc_inst_suffix(instr) << 32 | > + ppc_inst_val(instr), patch_addr, err, "std"); > +#else > + __put_user_asm((u64)ppc_inst_val(instr) << 32 | > + ppc_inst_suffix(instr), patch_addr, err, "std"); > +#endif /* CONFIG_CPU_LITTLE_ENDIAN */ > + if (err) > + return err; > + } > > asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr), > "r" (exec_addr)); > - Why remove the blank line ? > return 0; > } > > diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c > index 2bd2b752de4f..a8238eff3a31 100644 > --- a/arch/powerpc/lib/feature-fixups.c > +++ b/arch/powerpc/lib/feature-fixups.c > @@ -84,12 +84,13 @@ static int patch_feature_section(unsigned long value, struct fixup_entry *fcur) > src = alt_start; > dest = start; > > - for (; src < alt_end; src++, dest++) { > + for (; src < alt_end; src = (void *)src + ppc_inst_len(ppc_inst_read(src)), > + (dest = (void *)dest + ppc_inst_len(ppc_inst_read(dest)))) { Can we do this outside the for() for readability ? > if (patch_alt_instruction(src, dest, alt_start, alt_end)) > return 1; > } > > - for (; dest < end; dest++) > + for (; dest < end; dest = (void *)dest + ppc_inst_len(ppc_inst(PPC_INST_NOP))) > raw_patch_instruction(dest, ppc_inst(PPC_INST_NOP)); > > return 0; > diff --git a/arch/powerpc/lib/inst.c b/arch/powerpc/lib/inst.c > index 08dedd927268..eb6f9ee28ac6 100644 > --- a/arch/powerpc/lib/inst.c > +++ b/arch/powerpc/lib/inst.c > @@ -3,9 +3,49 @@ > * Copyright 2020, IBM Corporation. > */ > > +#include <asm/ppc-opcode.h> > #include <linux/uaccess.h> > #include <asm/inst.h> > > +#ifdef __powerpc64__ > +int probe_user_read_inst(struct ppc_inst *inst, > + struct ppc_inst *nip) > +{ > + unsigned int val, suffix; > + int err; > + > + err = probe_user_read(&val, nip, sizeof(val)); A user read is costly with KUAP. Can we do a 64 bits read and perform a 32 bits read only when 64 bits read fails ? > + if (err) > + return err; > + if ((val >> 26) == OP_PREFIX) { What about using get_op() from asm/disassemble.h instead of hardcodiing ? > + err = probe_user_read(&suffix, (void *)nip + 4, 4 or sizeof(unsigned int) ? Why use both in the same line ? > + sizeof(unsigned int)); > + *inst = ppc_inst_prefix(val, suffix); > + } else { > + *inst = ppc_inst(val); > + } > + return err; > +} > + > +int probe_kernel_read_inst(struct ppc_inst *inst, > + struct ppc_inst *src) > +{ > + unsigned int val, suffix; > + int err; > + > + err = probe_kernel_read(&val, src, sizeof(val)); > + if (err) > + return err; > + if ((val >> 26) == OP_PREFIX) { > + err = probe_kernel_read(&suffix, (void *)src + 4, > + sizeof(unsigned int)); > + *inst = ppc_inst_prefix(val, suffix); > + } else { > + *inst = ppc_inst(val); > + } > + return err; > +} > +#else > int probe_user_read_inst(struct ppc_inst *inst, > struct ppc_inst *nip) > { > @@ -27,3 +67,4 @@ int probe_kernel_read_inst(struct ppc_inst *inst, > *inst = ppc_inst(val); > return err; > } > +#endif /* __powerpc64__ */ > diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c > index 95a56bb1ba3f..ecd756c346fd 100644 > --- a/arch/powerpc/lib/sstep.c > +++ b/arch/powerpc/lib/sstep.c > @@ -1169,10 +1169,12 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, > unsigned long int imm; > unsigned long int val, val2; > unsigned int mb, me, sh; > - unsigned int word; > + unsigned int word, suffix; > long ival; > > word = ppc_inst_val(instr); > + suffix = ppc_inst_suffix(instr); > + > op->type = COMPUTE; > > opcode = ppc_inst_primary_opcode(instr); > diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c > index 4d6980d51456..647b3829c4eb 100644 > --- a/arch/powerpc/xmon/xmon.c > +++ b/arch/powerpc/xmon/xmon.c > @@ -758,8 +758,8 @@ static int xmon_bpt(struct pt_regs *regs) > > /* Are we at the trap at bp->instr[1] for some bp? */ > bp = in_breakpoint_table(regs->nip, &offset); > - if (bp != NULL && offset == 4) { > - regs->nip = bp->address + 4; > + if (bp != NULL && (offset == 4 || offset == 8)) { > + regs->nip = bp->address + offset; > atomic_dec(&bp->ref_count); > return 1; > } > diff --git a/arch/powerpc/xmon/xmon_bpts.S b/arch/powerpc/xmon/xmon_bpts.S > index f3ad0ab50854..69726814cd27 100644 > --- a/arch/powerpc/xmon/xmon_bpts.S > +++ b/arch/powerpc/xmon/xmon_bpts.S > @@ -4,6 +4,8 @@ > #include <asm/asm-offsets.h> > #include "xmon_bpts.h" > > +/* Prefixed instructions can not cross 64 byte boundaries */ > +.align 6 > .global bpt_table > bpt_table: > .space NBPTS * BPT_SIZE > Christophe
On Thursday, 14 May 2020 4:11:43 PM AEST Christophe Leroy wrote: > @@ -249,7 +249,7 @@ int arch_prepare_optimized_kprobe(struct > optimized_kprobe *op, struct kprobe *p) > > * Fixup the template with instructions to: > > * 1. load the address of the actual probepoint > > */ > > - patch_imm64_load_insns((unsigned long)op, buff + TMPL_OP_IDX); > > + patch_imm64_load_insns((unsigned long)op, 3, buff + TMPL_OP_IDX); > > > > /* > > * 2. branch to optimized_callback() and emulate_step() > > @@ -282,7 +282,11 @@ int arch_prepare_optimized_kprobe(struct > > optimized_kprobe *op, struct kprobe *p) /* > > * 3. load instruction to be emulated into relevant register, and > > */ > > - patch_imm32_load_insns(*p->ainsn.insn, buff + TMPL_INSN_IDX); > > + temp = ppc_inst_read((struct ppc_inst *)p->ainsn.insn); > > + patch_imm64_load_insns(ppc_inst_val(temp) | > > + ((u64)ppc_inst_suffix(temp) << 32), > > + 4, > > So now we are also using r4 ? Any explanation somewhere on the way it > works ? This change seems unrelated to this patch, nothing in the > description about it. Can we suddenly use a new register without problem ? Unless I missed something there is no change in register usage here that I could see. patch_imm32_load_insns() was/is hardcoded to use register r4. - Alistair
On Thu, May 14, 2020 at 4:12 PM Christophe Leroy <christophe.leroy@csgroup.eu> wrote: > > > > Le 06/05/2020 à 05:40, Jordan Niethe a écrit : > > For powerpc64, redefine the ppc_inst type so both word and prefixed > > instructions can be represented. On powerpc32 the type will remain the > > same. Update places which had assumed instructions to be 4 bytes long. > > > > Reviewed-by: Alistair Popple <alistair@popple.id.au> > > Signed-off-by: Jordan Niethe <jniethe5@gmail.com> > > --- > > v4: New to series > > v5: - Distinguish normal instructions from prefixed instructions with a > > 0xff marker for the suffix. > > - __patch_instruction() using std for prefixed instructions > > v6: - Return false instead of 0 in ppc_inst_prefixed() > > - Fix up types for ppc32 so it compiles > > - remove ppc_inst_write() > > - __patching_instruction(): move flush out of condition > > v8: - style > > - Define and use OP_PREFIX instead of '1' (back from v3) > > - __patch_instruction() fix for big endian > > --- > > arch/powerpc/include/asm/inst.h | 69 ++++++++++++++++++++++++--- > > arch/powerpc/include/asm/kprobes.h | 2 +- > > arch/powerpc/include/asm/ppc-opcode.h | 3 ++ > > arch/powerpc/include/asm/uaccess.h | 40 +++++++++++++++- > > arch/powerpc/include/asm/uprobes.h | 2 +- > > arch/powerpc/kernel/crash_dump.c | 2 +- > > arch/powerpc/kernel/optprobes.c | 42 ++++++++-------- > > arch/powerpc/kernel/optprobes_head.S | 3 ++ > > arch/powerpc/lib/code-patching.c | 19 ++++++-- > > arch/powerpc/lib/feature-fixups.c | 5 +- > > arch/powerpc/lib/inst.c | 41 ++++++++++++++++ > > arch/powerpc/lib/sstep.c | 4 +- > > arch/powerpc/xmon/xmon.c | 4 +- > > arch/powerpc/xmon/xmon_bpts.S | 2 + > > 14 files changed, 200 insertions(+), 38 deletions(-) > > > > diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h > > index 2f3c9d5bcf7c..7868b80b610e 100644 > > --- a/arch/powerpc/include/asm/inst.h > > +++ b/arch/powerpc/include/asm/inst.h > > @@ -2,29 +2,79 @@ > > #ifndef _ASM_INST_H > > #define _ASM_INST_H > > > > +#include <asm/ppc-opcode.h> > > /* > > * Instruction data type for POWER > > */ > > > > struct ppc_inst { > > u32 val; > > +#ifdef __powerpc64__ > > CONFIG_PPC64 should be used instead. This is also reported by checkpatch. Sure will use that instead. > > > + u32 suffix; > > +#endif /* __powerpc64__ */ > > } __packed; > > > > -#define ppc_inst(x) ((struct ppc_inst){ .val = x }) > > - > > static inline u32 ppc_inst_val(struct ppc_inst x) > > { > > return x.val; > > } > > > > -static inline int ppc_inst_len(struct ppc_inst x) > > +static inline int ppc_inst_primary_opcode(struct ppc_inst x) > > { > > - return sizeof(struct ppc_inst); > > + return ppc_inst_val(x) >> 26; > > What about using get_op() from asm/disassemble.h instead of hardcodiing ? Okay will use it here and the other places you point out. > > > } > > > > -static inline int ppc_inst_primary_opcode(struct ppc_inst x) > > +#ifdef __powerpc64__ > > Use CONFIG_PPC64 > > > +#define ppc_inst(x) ((struct ppc_inst){ .val = (x), .suffix = 0xff }) > > + > > +#define ppc_inst_prefix(x, y) ((struct ppc_inst){ .val = (x), .suffix = (y) }) > > + > > +static inline u32 ppc_inst_suffix(struct ppc_inst x) > > { > > - return ppc_inst_val(x) >> 26; > > + return x.suffix; > > +} > > + > > +static inline bool ppc_inst_prefixed(struct ppc_inst x) > > +{ > > + return (ppc_inst_primary_opcode(x) == 1) && ppc_inst_suffix(x) != 0xff; > > +} > > + > > +static inline struct ppc_inst ppc_inst_swab(struct ppc_inst x) > > +{ > > + return ppc_inst_prefix(swab32(ppc_inst_val(x)), > > + swab32(ppc_inst_suffix(x))); > > +} > > + > > +static inline struct ppc_inst ppc_inst_read(const struct ppc_inst *ptr) > > +{ > > + u32 val, suffix; > > + > > + val = *(u32 *)ptr; > > + if ((val >> 26) == 1) { > > Don't hardcode, use ppc_inst_primary_opcode() and compare it to OP_PREFIX > Or use get_op() from asm/disassemble.h > > > > + suffix = *((u32 *)ptr + 1); > > + return ppc_inst_prefix(val, suffix); > > + } else { > > + return ppc_inst(val); > > + } > > +} > > + > > +static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y) > > +{ > > + return *(u64 *)&x == *(u64 *)&y; > > +} > > + > > +#else > > + > > +#define ppc_inst(x) ((struct ppc_inst){ .val = x }) > > + > > +static inline bool ppc_inst_prefixed(struct ppc_inst x) > > +{ > > + return false; > > +} > > + > > +static inline u32 ppc_inst_suffix(struct ppc_inst x) > > +{ > > + return 0; > > } > > > > static inline struct ppc_inst ppc_inst_swab(struct ppc_inst x) > > @@ -42,6 +92,13 @@ static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y) > > return ppc_inst_val(x) == ppc_inst_val(y); > > } > > > > +#endif /* __powerpc64__ */ > > + > > +static inline int ppc_inst_len(struct ppc_inst x) > > +{ > > + return (ppc_inst_prefixed(x)) ? 8 : 4; > > +} > > + > > int probe_user_read_inst(struct ppc_inst *inst, > > struct ppc_inst *nip); > > int probe_kernel_read_inst(struct ppc_inst *inst, > > diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h > > index 66b3f2983b22..4fc0e15e23a5 100644 > > --- a/arch/powerpc/include/asm/kprobes.h > > +++ b/arch/powerpc/include/asm/kprobes.h > > @@ -43,7 +43,7 @@ extern kprobe_opcode_t optprobe_template_ret[]; > > extern kprobe_opcode_t optprobe_template_end[]; > > > > /* Fixed instruction size for powerpc */ > > -#define MAX_INSN_SIZE 1 > > +#define MAX_INSN_SIZE 2 > > #define MAX_OPTIMIZED_LENGTH sizeof(kprobe_opcode_t) /* 4 bytes */ > > #define MAX_OPTINSN_SIZE (optprobe_template_end - optprobe_template_entry) > > #define RELATIVEJUMP_SIZE sizeof(kprobe_opcode_t) /* 4 bytes */ > > diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h > > index c1df75edde44..2a39c716c343 100644 > > --- a/arch/powerpc/include/asm/ppc-opcode.h > > +++ b/arch/powerpc/include/asm/ppc-opcode.h > > @@ -158,6 +158,9 @@ > > /* VMX Vector Store Instructions */ > > #define OP_31_XOP_STVX 231 > > > > +/* Prefixed Instructions */ > > +#define OP_PREFIX 1 > > + > > #define OP_31 31 > > #define OP_LWZ 32 > > #define OP_STFS 52 > > diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h > > index c0a35e4586a5..217897927926 100644 > > --- a/arch/powerpc/include/asm/uaccess.h > > +++ b/arch/powerpc/include/asm/uaccess.h > > @@ -105,11 +105,49 @@ static inline int __access_ok(unsigned long addr, unsigned long size, > > #define __put_user_inatomic(x, ptr) \ > > __put_user_nosleep((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) > > > > +#ifdef __powerpc64__ > > Replace by CONFIG_PPC64 > > > +#define __get_user_instr(x, ptr) \ > > +({ \ > > + long __gui_ret = 0; \ > > + unsigned long __gui_ptr = (unsigned long)ptr; \ > > + struct ppc_inst __gui_inst; \ > > + unsigned int prefix, suffix; \ > > + __gui_ret = __get_user(prefix, (unsigned int __user *)__gui_ptr); \ > > __get_user() can be costly especially with KUAP. I think you should > perform a 64 bits read and fallback on a 32 bits read only if the 64 > bits read failed. Thanks, I will try doing it that way. > > > + if (!__gui_ret && (prefix >> 26) == OP_PREFIX) { \ > > What about using get_op() from asm/disassemble.h instead of hardcodiing ? > > > + __gui_ret = __get_user(suffix, \ > > + (unsigned int __user *)__gui_ptr + 1); \ > > + __gui_inst = ppc_inst_prefix(prefix, suffix); \ > > + } else { \ > > + __gui_inst = ppc_inst(prefix); \ > > + } \ > > + (x) = __gui_inst; \ > > + __gui_ret; \ > > +}) > > + > > +#define __get_user_instr_inatomic(x, ptr) \ > > +({ \ > > + long __gui_ret = 0; \ > > + unsigned long __gui_ptr = (unsigned long)ptr; \ > > + struct ppc_inst __gui_inst; \ > > + unsigned int prefix, suffix; \ > > + __gui_ret = __get_user_inatomic(prefix, (unsigned int __user *)__gui_ptr); \ > > Same > > > + if (!__gui_ret && (prefix >> 26) == OP_PREFIX) { \ > > + __gui_ret = __get_user_inatomic(suffix, \ > > + (unsigned int __user *)__gui_ptr + 1); \ > > + __gui_inst = ppc_inst_prefix(prefix, suffix); \ > > + } else { \ > > + __gui_inst = ppc_inst(prefix); \ > > + } \ > > + (x) = __gui_inst; \ > > + __gui_ret; \ > > +}) > > +#else > > #define __get_user_instr(x, ptr) \ > > __get_user_nocheck((x).val, (u32 *)(ptr), sizeof(u32), true) > > - > > #define __get_user_instr_inatomic(x, ptr) \ > > __get_user_nosleep((x).val, (u32 *)(ptr), sizeof(u32)) > > +#endif > > + > > extern long __put_user_bad(void); > > > > /* > > diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h > > index 7e3b329ba2d3..5bf65f5d44a9 100644 > > --- a/arch/powerpc/include/asm/uprobes.h > > +++ b/arch/powerpc/include/asm/uprobes.h > > @@ -15,7 +15,7 @@ > > > > typedef ppc_opcode_t uprobe_opcode_t; > > > > -#define MAX_UINSN_BYTES 4 > > +#define MAX_UINSN_BYTES 8 > > #define UPROBE_XOL_SLOT_BYTES (MAX_UINSN_BYTES) > > > > /* The following alias is needed for reference from arch-agnostic code */ > > diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c > > index 72bafb47e757..735e89337398 100644 > > --- a/arch/powerpc/kernel/crash_dump.c > > +++ b/arch/powerpc/kernel/crash_dump.c > > @@ -46,7 +46,7 @@ static void __init create_trampoline(unsigned long addr) > > * two instructions it doesn't require any registers. > > */ > > patch_instruction(p, ppc_inst(PPC_INST_NOP)); > > - patch_branch(++p, addr + PHYSICAL_START, 0); > > + patch_branch((void *)p + 4, addr + PHYSICAL_START, 0); > > } > > > > void __init setup_kdump_trampoline(void) > > diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c > > index 52c1ab3f85aa..a8e66603d12b 100644 > > --- a/arch/powerpc/kernel/optprobes.c > > +++ b/arch/powerpc/kernel/optprobes.c > > @@ -162,43 +162,43 @@ void patch_imm32_load_insns(unsigned int val, kprobe_opcode_t *addr) > > > > /* > > * Generate instructions to load provided immediate 64-bit value > > - * to register 'r3' and patch these instructions at 'addr'. > > + * to register 'reg' and patch these instructions at 'addr'. > > */ > > -void patch_imm64_load_insns(unsigned long val, kprobe_opcode_t *addr) > > +void patch_imm64_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr) > > I think this change should go in a separate patch. Okay. > > > { > > - /* lis r3,(op)@highest */ > > + /* lis reg,(op)@highest */ > > patch_instruction((struct ppc_inst *)addr, > > - ppc_inst(PPC_INST_ADDIS | ___PPC_RT(3) | > > + ppc_inst(PPC_INST_ADDIS | ___PPC_RT(reg) | > > ((val >> 48) & 0xffff))); > > addr++; > > > > - /* ori r3,r3,(op)@higher */ > > + /* ori reg,reg,(op)@higher */ > > patch_instruction((struct ppc_inst *)addr, > > - ppc_inst(PPC_INST_ORI | ___PPC_RA(3) | > > - ___PPC_RS(3) | ((val >> 32) & 0xffff))); > > + ppc_inst(PPC_INST_ORI | ___PPC_RA(reg) | > > + ___PPC_RS(reg) | ((val >> 32) & 0xffff))); > > addr++; > > > > - /* rldicr r3,r3,32,31 */ > > + /* rldicr reg,reg,32,31 */ > > patch_instruction((struct ppc_inst *)addr, > > - ppc_inst(PPC_INST_RLDICR | ___PPC_RA(3) | > > - ___PPC_RS(3) | __PPC_SH64(32) | __PPC_ME64(31))); > > + ppc_inst(PPC_INST_RLDICR | ___PPC_RA(reg) | > > + ___PPC_RS(reg) | __PPC_SH64(32) | __PPC_ME64(31))); > > addr++; > > > > - /* oris r3,r3,(op)@h */ > > + /* oris reg,reg,(op)@h */ > > patch_instruction((struct ppc_inst *)addr, > > - ppc_inst(PPC_INST_ORIS | ___PPC_RA(3) | > > - ___PPC_RS(3) | ((val >> 16) & 0xffff))); > > + ppc_inst(PPC_INST_ORIS | ___PPC_RA(reg) | > > + ___PPC_RS(reg) | ((val >> 16) & 0xffff))); > > addr++; > > > > - /* ori r3,r3,(op)@l */ > > + /* ori reg,reg,(op)@l */ > > patch_instruction((struct ppc_inst *)addr, > > - ppc_inst(PPC_INST_ORI | ___PPC_RA(3) | > > - ___PPC_RS(3) | (val & 0xffff))); > > + ppc_inst(PPC_INST_ORI | ___PPC_RA(reg) | > > + ___PPC_RS(reg) | (val & 0xffff))); > > } > > > > int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) > > { > > - struct ppc_inst branch_op_callback, branch_emulate_step; > > + struct ppc_inst branch_op_callback, branch_emulate_step, temp; > > kprobe_opcode_t *op_callback_addr, *emulate_step_addr, *buff; > > long b_offset; > > unsigned long nip, size; > > @@ -249,7 +249,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) > > * Fixup the template with instructions to: > > * 1. load the address of the actual probepoint > > */ > > - patch_imm64_load_insns((unsigned long)op, buff + TMPL_OP_IDX); > > + patch_imm64_load_insns((unsigned long)op, 3, buff + TMPL_OP_IDX); > > > > /* > > * 2. branch to optimized_callback() and emulate_step() > > @@ -282,7 +282,11 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) > > /* > > * 3. load instruction to be emulated into relevant register, and > > */ > > - patch_imm32_load_insns(*p->ainsn.insn, buff + TMPL_INSN_IDX); > > + temp = ppc_inst_read((struct ppc_inst *)p->ainsn.insn); > > + patch_imm64_load_insns(ppc_inst_val(temp) | > > + ((u64)ppc_inst_suffix(temp) << 32), > > + 4, > > So now we are also using r4 ? Any explanation somewhere on the way it > works ? This change seems unrelated to this patch, nothing in the > description about it. Can we suddenly use a new register without problem ? Sorry it is not very clear, r4 was always being used. patch_imm32_load_insns() hardcoded r4. We now need to load 64 bits as we just introduced prefixed instruction, so are using patch_imm64_load_insns(). That is the connection to the patch. But a separate patch and description would probably make that clearer. > > > + buff + TMPL_INSN_IDX); > > What's the point with splitting this line in 4 lines ? Can't it fit in 2 > lines ? Sure. > > > > > /* > > * 4. branch back from trampoline > > diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S > > index cf383520843f..ff8ba4d3824d 100644 > > --- a/arch/powerpc/kernel/optprobes_head.S > > +++ b/arch/powerpc/kernel/optprobes_head.S > > @@ -94,6 +94,9 @@ optprobe_template_insn: > > /* 2, Pass instruction to be emulated in r4 */ > > nop > > nop > > + nop > > + nop > > + nop > > > > .global optprobe_template_call_emulate > > optprobe_template_call_emulate: > > diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c > > index d946f7d6bb32..58b67b62d5d3 100644 > > --- a/arch/powerpc/lib/code-patching.c > > +++ b/arch/powerpc/lib/code-patching.c > > @@ -24,13 +24,24 @@ static int __patch_instruction(struct ppc_inst *exec_addr, struct ppc_inst instr > > { > > int err = 0; > > > > - __put_user_asm(ppc_inst_val(instr), patch_addr, err, "stw"); > > - if (err) > > - return err; > > + if (!ppc_inst_prefixed(instr)) { > > + __put_user_asm(ppc_inst_val(instr), patch_addr, err, "stw"); > > + if (err) > > + return err; > > This test should remain outside of the if/else, it doesn't need to be > duplicated. Okay. > > > + } else { > > +#ifdef CONFIG_CPU_LITTLE_ENDIAN > > + __put_user_asm((u64)ppc_inst_suffix(instr) << 32 | > > + ppc_inst_val(instr), patch_addr, err, "std"); > > +#else > > + __put_user_asm((u64)ppc_inst_val(instr) << 32 | > > + ppc_inst_suffix(instr), patch_addr, err, "std"); > > +#endif /* CONFIG_CPU_LITTLE_ENDIAN */ > > + if (err) > > + return err; > > + } > > > > asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr), > > "r" (exec_addr)); > > - > > Why remove the blank line ? Sorry that was by mistake. > > > return 0; > > } > > > > diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c > > index 2bd2b752de4f..a8238eff3a31 100644 > > --- a/arch/powerpc/lib/feature-fixups.c > > +++ b/arch/powerpc/lib/feature-fixups.c > > @@ -84,12 +84,13 @@ static int patch_feature_section(unsigned long value, struct fixup_entry *fcur) > > src = alt_start; > > dest = start; > > > > - for (; src < alt_end; src++, dest++) { > > + for (; src < alt_end; src = (void *)src + ppc_inst_len(ppc_inst_read(src)), > > + (dest = (void *)dest + ppc_inst_len(ppc_inst_read(dest)))) { > > Can we do this outside the for() for readability ? You are right, I will make it clearer. > > > if (patch_alt_instruction(src, dest, alt_start, alt_end)) > > return 1; > > } > > > > - for (; dest < end; dest++) > > + for (; dest < end; dest = (void *)dest + ppc_inst_len(ppc_inst(PPC_INST_NOP))) > > raw_patch_instruction(dest, ppc_inst(PPC_INST_NOP)); > > > > return 0; > > diff --git a/arch/powerpc/lib/inst.c b/arch/powerpc/lib/inst.c > > index 08dedd927268..eb6f9ee28ac6 100644 > > --- a/arch/powerpc/lib/inst.c > > +++ b/arch/powerpc/lib/inst.c > > @@ -3,9 +3,49 @@ > > * Copyright 2020, IBM Corporation. > > */ > > > > +#include <asm/ppc-opcode.h> > > #include <linux/uaccess.h> > > #include <asm/inst.h> > > > > +#ifdef __powerpc64__ > > +int probe_user_read_inst(struct ppc_inst *inst, > > + struct ppc_inst *nip) > > +{ > > + unsigned int val, suffix; > > + int err; > > + > > + err = probe_user_read(&val, nip, sizeof(val)); > > A user read is costly with KUAP. Can we do a 64 bits read and perform a > 32 bits read only when 64 bits read fails ? > > > + if (err) > > + return err; > > + if ((val >> 26) == OP_PREFIX) { > > What about using get_op() from asm/disassemble.h instead of hardcodiing ? > > > + err = probe_user_read(&suffix, (void *)nip + 4, > > 4 or sizeof(unsigned int) ? Why use both in the same line ? True, doesn't really make sense. > > > + sizeof(unsigned int)); > > + *inst = ppc_inst_prefix(val, suffix); > > + } else { > > + *inst = ppc_inst(val); > > + } > > + return err; > > +} > > + > > +int probe_kernel_read_inst(struct ppc_inst *inst, > > + struct ppc_inst *src) > > +{ > > + unsigned int val, suffix; > > + int err; > > + > > + err = probe_kernel_read(&val, src, sizeof(val)); > > + if (err) > > + return err; > > + if ((val >> 26) == OP_PREFIX) { > > + err = probe_kernel_read(&suffix, (void *)src + 4, > > + sizeof(unsigned int)); > > + *inst = ppc_inst_prefix(val, suffix); > > + } else { > > + *inst = ppc_inst(val); > > + } > > + return err; > > +} > > +#else > > int probe_user_read_inst(struct ppc_inst *inst, > > struct ppc_inst *nip) > > { > > @@ -27,3 +67,4 @@ int probe_kernel_read_inst(struct ppc_inst *inst, > > *inst = ppc_inst(val); > > return err; > > } > > +#endif /* __powerpc64__ */ > > diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c > > index 95a56bb1ba3f..ecd756c346fd 100644 > > --- a/arch/powerpc/lib/sstep.c > > +++ b/arch/powerpc/lib/sstep.c > > @@ -1169,10 +1169,12 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, > > unsigned long int imm; > > unsigned long int val, val2; > > unsigned int mb, me, sh; > > - unsigned int word; > > + unsigned int word, suffix; > > long ival; > > > > word = ppc_inst_val(instr); > > + suffix = ppc_inst_suffix(instr); > > + > > op->type = COMPUTE; > > > > opcode = ppc_inst_primary_opcode(instr); > > diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c > > index 4d6980d51456..647b3829c4eb 100644 > > --- a/arch/powerpc/xmon/xmon.c > > +++ b/arch/powerpc/xmon/xmon.c > > @@ -758,8 +758,8 @@ static int xmon_bpt(struct pt_regs *regs) > > > > /* Are we at the trap at bp->instr[1] for some bp? */ > > bp = in_breakpoint_table(regs->nip, &offset); > > - if (bp != NULL && offset == 4) { > > - regs->nip = bp->address + 4; > > + if (bp != NULL && (offset == 4 || offset == 8)) { > > + regs->nip = bp->address + offset; > > atomic_dec(&bp->ref_count); > > return 1; > > } > > diff --git a/arch/powerpc/xmon/xmon_bpts.S b/arch/powerpc/xmon/xmon_bpts.S > > index f3ad0ab50854..69726814cd27 100644 > > --- a/arch/powerpc/xmon/xmon_bpts.S > > +++ b/arch/powerpc/xmon/xmon_bpts.S > > @@ -4,6 +4,8 @@ > > #include <asm/asm-offsets.h> > > #include "xmon_bpts.h" > > > > +/* Prefixed instructions can not cross 64 byte boundaries */ > > +.align 6 > > .global bpt_table > > bpt_table: > > .space NBPTS * BPT_SIZE > > > > Christophe
On Thu, May 14, 2020 at 10:06 PM Alistair Popple <alistair@popple.id.au> wrote: > > On Thursday, 14 May 2020 4:11:43 PM AEST Christophe Leroy wrote: > > @@ -249,7 +249,7 @@ int arch_prepare_optimized_kprobe(struct > > optimized_kprobe *op, struct kprobe *p) > > > * Fixup the template with instructions to: > > > * 1. load the address of the actual probepoint > > > */ > > > - patch_imm64_load_insns((unsigned long)op, buff + TMPL_OP_IDX); > > > + patch_imm64_load_insns((unsigned long)op, 3, buff + TMPL_OP_IDX); > > > > > > /* > > > * 2. branch to optimized_callback() and emulate_step() > > > @@ -282,7 +282,11 @@ int arch_prepare_optimized_kprobe(struct > > > optimized_kprobe *op, struct kprobe *p) /* > > > * 3. load instruction to be emulated into relevant register, and > > > */ > > > - patch_imm32_load_insns(*p->ainsn.insn, buff + TMPL_INSN_IDX); > > > + temp = ppc_inst_read((struct ppc_inst *)p->ainsn.insn); > > > + patch_imm64_load_insns(ppc_inst_val(temp) | > > > + ((u64)ppc_inst_suffix(temp) << 32), > > > + 4, > > > > So now we are also using r4 ? Any explanation somewhere on the way it > > works ? This change seems unrelated to this patch, nothing in the > > description about it. Can we suddenly use a new register without problem ? > > Unless I missed something there is no change in register usage here that I > could see. patch_imm32_load_insns() was/is hardcoded to use register r4. Yes, that is right. > > - Alistair > >
Le 14/05/2020 à 14:06, Alistair Popple a écrit : > On Thursday, 14 May 2020 4:11:43 PM AEST Christophe Leroy wrote: >> @@ -249,7 +249,7 @@ int arch_prepare_optimized_kprobe(struct >> optimized_kprobe *op, struct kprobe *p) >>> * Fixup the template with instructions to: >>> * 1. load the address of the actual probepoint >>> */ >>> - patch_imm64_load_insns((unsigned long)op, buff + TMPL_OP_IDX); >>> + patch_imm64_load_insns((unsigned long)op, 3, buff + TMPL_OP_IDX); >>> >>> /* >>> * 2. branch to optimized_callback() and emulate_step() >>> @@ -282,7 +282,11 @@ int arch_prepare_optimized_kprobe(struct >>> optimized_kprobe *op, struct kprobe *p) /* >>> * 3. load instruction to be emulated into relevant register, and >>> */ >>> - patch_imm32_load_insns(*p->ainsn.insn, buff + TMPL_INSN_IDX); >>> + temp = ppc_inst_read((struct ppc_inst *)p->ainsn.insn); >>> + patch_imm64_load_insns(ppc_inst_val(temp) | >>> + ((u64)ppc_inst_suffix(temp) << 32), >>> + 4, >> >> So now we are also using r4 ? Any explanation somewhere on the way it >> works ? This change seems unrelated to this patch, nothing in the >> description about it. Can we suddenly use a new register without problem ? > > Unless I missed something there is no change in register usage here that I > could see. patch_imm32_load_insns() was/is hardcoded to use register r4. > Ah ... Euh ... Ok I missed the change from patch_imm32_load_insns() to patch_imm64_load_insns(), I'll check again.
Christophe Leroy <christophe.leroy@csgroup.eu> writes: > Le 06/05/2020 à 05:40, Jordan Niethe a écrit : >> For powerpc64, redefine the ppc_inst type so both word and prefixed >> instructions can be represented. On powerpc32 the type will remain the >> same. Update places which had assumed instructions to be 4 bytes long. ... >> diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h >> index c0a35e4586a5..217897927926 100644 >> --- a/arch/powerpc/include/asm/uaccess.h >> +++ b/arch/powerpc/include/asm/uaccess.h >> @@ -105,11 +105,49 @@ static inline int __access_ok(unsigned long addr, unsigned long size, >> #define __put_user_inatomic(x, ptr) \ >> __put_user_nosleep((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) >> >> +#ifdef __powerpc64__ > > Replace by CONFIG_PPC64 > >> +#define __get_user_instr(x, ptr) \ >> +({ \ >> + long __gui_ret = 0; \ >> + unsigned long __gui_ptr = (unsigned long)ptr; \ >> + struct ppc_inst __gui_inst; \ >> + unsigned int prefix, suffix; \ >> + __gui_ret = __get_user(prefix, (unsigned int __user *)__gui_ptr); \ > > __get_user() can be costly especially with KUAP. I think you should > perform a 64 bits read and fallback on a 32 bits read only if the 64 > bits read failed. I worry that might break something. It _should_ be safe, but I'm paranoid. If we think the KUAP overhead is a problem then I think we'd be better off pulling the KUAP disable/enable into this macro. >> diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c >> index 2bd2b752de4f..a8238eff3a31 100644 >> --- a/arch/powerpc/lib/feature-fixups.c >> +++ b/arch/powerpc/lib/feature-fixups.c >> @@ -84,12 +84,13 @@ static int patch_feature_section(unsigned long value, struct fixup_entry *fcur) >> src = alt_start; >> dest = start; >> >> - for (; src < alt_end; src++, dest++) { >> + for (; src < alt_end; src = (void *)src + ppc_inst_len(ppc_inst_read(src)), >> + (dest = (void *)dest + ppc_inst_len(ppc_inst_read(dest)))) { > > Can we do this outside the for() for readability ? I have an idea for cleaning these up, will post it as a follow-up to the series. >> diff --git a/arch/powerpc/lib/inst.c b/arch/powerpc/lib/inst.c >> index 08dedd927268..eb6f9ee28ac6 100644 >> --- a/arch/powerpc/lib/inst.c >> +++ b/arch/powerpc/lib/inst.c >> @@ -3,9 +3,49 @@ >> * Copyright 2020, IBM Corporation. >> */ >> >> +#include <asm/ppc-opcode.h> >> #include <linux/uaccess.h> >> #include <asm/inst.h> >> >> +#ifdef __powerpc64__ >> +int probe_user_read_inst(struct ppc_inst *inst, >> + struct ppc_inst *nip) >> +{ >> + unsigned int val, suffix; >> + int err; >> + >> + err = probe_user_read(&val, nip, sizeof(val)); > > A user read is costly with KUAP. Can we do a 64 bits read and perform a > 32 bits read only when 64 bits read fails ? Same comment as above. cheers
Hey mpe, fixes for the issues highlighted by Christophe, except KUAP as discussed. Will make the optprobe change as a preceding patch. diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h --- a/arch/powerpc/include/asm/inst.h +++ b/arch/powerpc/include/asm/inst.h @@ -11,9 +11,9 @@ struct ppc_inst { u32 val; -#ifdef __powerpc64__ +#ifdef CONFIG_PPC64 u32 suffix; -#endif /* __powerpc64__ */ +#endif /* CONFIG_PPC64 */ } __packed; static inline u32 ppc_inst_val(struct ppc_inst x) @@ -26,7 +26,7 @@ static inline int ppc_inst_primary_opcode(struct ppc_inst x) return get_op(ppc_inst_val(x)); } -#ifdef __powerpc64__ +#ifdef CONFIG_PPC64 #define ppc_inst(x) ((struct ppc_inst){ .val = (x), .suffix = 0xff }) #define ppc_inst_prefix(x, y) ((struct ppc_inst){ .val = (x), .suffix = (y) }) @@ -52,7 +52,7 @@ static inline struct ppc_inst ppc_inst_read(const struct ppc_inst *ptr) u32 val, suffix; val = *(u32 *)ptr; - if ((val >> 26) == 1) { + if ((get_op(val)) == OP_PREFIX) { suffix = *((u32 *)ptr + 1); return ppc_inst_prefix(val, suffix); } else { @@ -94,7 +94,7 @@ static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y) return ppc_inst_val(x) == ppc_inst_val(y); } -#endif /* __powerpc64__ */ +#endif /* CONFIG_PPC64 */ static inline int ppc_inst_len(struct ppc_inst x) { diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index e9027b3c641a..ac36a82321d4 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -105,7 +105,7 @@ static inline int __access_ok(unsigned long addr, unsigned long size, #define __put_user_inatomic(x, ptr) \ __put_user_nosleep((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) -#ifdef __powerpc64__ +#ifdef CONFIG_PPC64 #define __get_user_instr(x, ptr) \ ({ \ long __gui_ret = 0; \ @@ -113,7 +113,7 @@ static inline int __access_ok(unsigned long addr, unsigned long size, struct ppc_inst __gui_inst; \ unsigned int prefix, suffix; \ __gui_ret = __get_user(prefix, (unsigned int __user *)__gui_ptr); \ - if (!__gui_ret && (prefix >> 26) == OP_PREFIX) { \ + if (!__gui_ret && (get_op(prefix)) == OP_PREFIX) { \ __gui_ret = __get_user(suffix, \ (unsigned int __user *)__gui_ptr + 1); \ __gui_inst = ppc_inst_prefix(prefix, suffix); \ @@ -131,7 +131,7 @@ static inline int __access_ok(unsigned long addr, unsigned long size, struct ppc_inst __gui_inst; \ unsigned int prefix, suffix; \ __gui_ret = __get_user_inatomic(prefix, (unsigned int __user *)__gui_ptr); \ - if (!__gui_ret && (prefix >> 26) == OP_PREFIX) { \ + if (!__gui_ret && (get_op(prefix)) == OP_PREFIX) { \ __gui_ret = __get_user_inatomic(suffix, \ (unsigned int __user *)__gui_ptr + 1); \ __gui_inst = ppc_inst_prefix(prefix, suffix); \ diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index a8e66603d12b..3ac105e7faae 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -283,10 +283,8 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) * 3. load instruction to be emulated into relevant register, and */ temp = ppc_inst_read((struct ppc_inst *)p->ainsn.insn); - patch_imm64_load_insns(ppc_inst_val(temp) | - ((u64)ppc_inst_suffix(temp) << 32), - 4, - buff + TMPL_INSN_IDX); + patch_imm64_load_insns(ppc_inst_val(temp) | ((u64)ppc_inst_suffix(temp) << 32), + 4, buff + TMPL_INSN_IDX); /* * 4. branch back from trampoline diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 58b67b62d5d3..bfd4e1dae0fb 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -26,8 +26,6 @@ static int __patch_instruction(struct ppc_inst *exec_addr, struct ppc_inst instr if (!ppc_inst_prefixed(instr)) { __put_user_asm(ppc_inst_val(instr), patch_addr, err, "stw"); - if (err) - return err; } else { #ifdef CONFIG_CPU_LITTLE_ENDIAN __put_user_asm((u64)ppc_inst_suffix(instr) << 32 | @@ -36,12 +34,13 @@ static int __patch_instruction(struct ppc_inst *exec_addr, struct ppc_inst instr __put_user_asm((u64)ppc_inst_val(instr) << 32 | ppc_inst_suffix(instr), patch_addr, err, "std"); #endif /* CONFIG_CPU_LITTLE_ENDIAN */ - if (err) - return err; } + if (err) + return err; asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr), "r" (exec_addr)); + return 0; } diff --git a/arch/powerpc/lib/inst.c b/arch/powerpc/lib/inst.c index e5e589994097..3c3851ffdb36 100644 --- a/arch/powerpc/lib/inst.c +++ b/arch/powerpc/lib/inst.c @@ -7,7 +7,7 @@ #include <linux/uaccess.h> #include <asm/inst.h> -#ifdef __powerpc64__ +#ifdef CONFIG_PPC64 int probe_user_read_inst(struct ppc_inst *inst, struct ppc_inst *nip) { @@ -17,9 +17,8 @@ int probe_user_read_inst(struct ppc_inst *inst, err = probe_user_read(&val, nip, sizeof(val)); if (err) return err; - if ((val >> 26) == OP_PREFIX) { - err = probe_user_read(&suffix, (void *)nip + 4, - sizeof(unsigned int)); + if (get_op(val) == OP_PREFIX) { + err = probe_user_read(&suffix, (void *)nip + 4, 4); *inst = ppc_inst_prefix(val, suffix); } else { *inst = ppc_inst(val); @@ -36,9 +35,8 @@ int probe_kernel_read_inst(struct ppc_inst *inst, err = probe_kernel_read(&val, src, sizeof(val)); if (err) return err; - if ((val >> 26) == OP_PREFIX) { - err = probe_kernel_read(&suffix, (void *)src + 4, - sizeof(unsigned int)); + if (get_op(val) == OP_PREFIX) { + err = probe_kernel_read(&suffix, (void *)src + 4, 4); *inst = ppc_inst_prefix(val, suffix); } else { *inst = ppc_inst(val); @@ -67,4 +65,4 @@ int probe_kernel_read_inst(struct ppc_inst *inst, *inst = ppc_inst(val); return err; } -#endif /* __powerpc64__ */ +#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h index 2f3c9d5bcf7c..7868b80b610e 100644 --- a/arch/powerpc/include/asm/inst.h +++ b/arch/powerpc/include/asm/inst.h @@ -2,29 +2,79 @@ #ifndef _ASM_INST_H #define _ASM_INST_H +#include <asm/ppc-opcode.h> /* * Instruction data type for POWER */ struct ppc_inst { u32 val; +#ifdef __powerpc64__ + u32 suffix; +#endif /* __powerpc64__ */ } __packed; -#define ppc_inst(x) ((struct ppc_inst){ .val = x }) - static inline u32 ppc_inst_val(struct ppc_inst x) { return x.val; } -static inline int ppc_inst_len(struct ppc_inst x) +static inline int ppc_inst_primary_opcode(struct ppc_inst x) { - return sizeof(struct ppc_inst); + return ppc_inst_val(x) >> 26; } -static inline int ppc_inst_primary_opcode(struct ppc_inst x) +#ifdef __powerpc64__ +#define ppc_inst(x) ((struct ppc_inst){ .val = (x), .suffix = 0xff }) + +#define ppc_inst_prefix(x, y) ((struct ppc_inst){ .val = (x), .suffix = (y) }) + +static inline u32 ppc_inst_suffix(struct ppc_inst x) { - return ppc_inst_val(x) >> 26; + return x.suffix; +} + +static inline bool ppc_inst_prefixed(struct ppc_inst x) +{ + return (ppc_inst_primary_opcode(x) == 1) && ppc_inst_suffix(x) != 0xff; +} + +static inline struct ppc_inst ppc_inst_swab(struct ppc_inst x) +{ + return ppc_inst_prefix(swab32(ppc_inst_val(x)), + swab32(ppc_inst_suffix(x))); +} + +static inline struct ppc_inst ppc_inst_read(const struct ppc_inst *ptr) +{ + u32 val, suffix; + + val = *(u32 *)ptr; + if ((val >> 26) == 1) { + suffix = *((u32 *)ptr + 1); + return ppc_inst_prefix(val, suffix); + } else { + return ppc_inst(val); + } +} + +static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y) +{ + return *(u64 *)&x == *(u64 *)&y; +} + +#else + +#define ppc_inst(x) ((struct ppc_inst){ .val = x }) + +static inline bool ppc_inst_prefixed(struct ppc_inst x) +{ + return false; +} + +static inline u32 ppc_inst_suffix(struct ppc_inst x) +{ + return 0; } static inline struct ppc_inst ppc_inst_swab(struct ppc_inst x) @@ -42,6 +92,13 @@ static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y) return ppc_inst_val(x) == ppc_inst_val(y); } +#endif /* __powerpc64__ */ + +static inline int ppc_inst_len(struct ppc_inst x) +{ + return (ppc_inst_prefixed(x)) ? 8 : 4; +} + int probe_user_read_inst(struct ppc_inst *inst, struct ppc_inst *nip); int probe_kernel_read_inst(struct ppc_inst *inst, diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h index 66b3f2983b22..4fc0e15e23a5 100644 --- a/arch/powerpc/include/asm/kprobes.h +++ b/arch/powerpc/include/asm/kprobes.h @@ -43,7 +43,7 @@ extern kprobe_opcode_t optprobe_template_ret[]; extern kprobe_opcode_t optprobe_template_end[]; /* Fixed instruction size for powerpc */ -#define MAX_INSN_SIZE 1 +#define MAX_INSN_SIZE 2 #define MAX_OPTIMIZED_LENGTH sizeof(kprobe_opcode_t) /* 4 bytes */ #define MAX_OPTINSN_SIZE (optprobe_template_end - optprobe_template_entry) #define RELATIVEJUMP_SIZE sizeof(kprobe_opcode_t) /* 4 bytes */ diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index c1df75edde44..2a39c716c343 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -158,6 +158,9 @@ /* VMX Vector Store Instructions */ #define OP_31_XOP_STVX 231 +/* Prefixed Instructions */ +#define OP_PREFIX 1 + #define OP_31 31 #define OP_LWZ 32 #define OP_STFS 52 diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index c0a35e4586a5..217897927926 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -105,11 +105,49 @@ static inline int __access_ok(unsigned long addr, unsigned long size, #define __put_user_inatomic(x, ptr) \ __put_user_nosleep((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) +#ifdef __powerpc64__ +#define __get_user_instr(x, ptr) \ +({ \ + long __gui_ret = 0; \ + unsigned long __gui_ptr = (unsigned long)ptr; \ + struct ppc_inst __gui_inst; \ + unsigned int prefix, suffix; \ + __gui_ret = __get_user(prefix, (unsigned int __user *)__gui_ptr); \ + if (!__gui_ret && (prefix >> 26) == OP_PREFIX) { \ + __gui_ret = __get_user(suffix, \ + (unsigned int __user *)__gui_ptr + 1); \ + __gui_inst = ppc_inst_prefix(prefix, suffix); \ + } else { \ + __gui_inst = ppc_inst(prefix); \ + } \ + (x) = __gui_inst; \ + __gui_ret; \ +}) + +#define __get_user_instr_inatomic(x, ptr) \ +({ \ + long __gui_ret = 0; \ + unsigned long __gui_ptr = (unsigned long)ptr; \ + struct ppc_inst __gui_inst; \ + unsigned int prefix, suffix; \ + __gui_ret = __get_user_inatomic(prefix, (unsigned int __user *)__gui_ptr); \ + if (!__gui_ret && (prefix >> 26) == OP_PREFIX) { \ + __gui_ret = __get_user_inatomic(suffix, \ + (unsigned int __user *)__gui_ptr + 1); \ + __gui_inst = ppc_inst_prefix(prefix, suffix); \ + } else { \ + __gui_inst = ppc_inst(prefix); \ + } \ + (x) = __gui_inst; \ + __gui_ret; \ +}) +#else #define __get_user_instr(x, ptr) \ __get_user_nocheck((x).val, (u32 *)(ptr), sizeof(u32), true) - #define __get_user_instr_inatomic(x, ptr) \ __get_user_nosleep((x).val, (u32 *)(ptr), sizeof(u32)) +#endif + extern long __put_user_bad(void); /* diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h index 7e3b329ba2d3..5bf65f5d44a9 100644 --- a/arch/powerpc/include/asm/uprobes.h +++ b/arch/powerpc/include/asm/uprobes.h @@ -15,7 +15,7 @@ typedef ppc_opcode_t uprobe_opcode_t; -#define MAX_UINSN_BYTES 4 +#define MAX_UINSN_BYTES 8 #define UPROBE_XOL_SLOT_BYTES (MAX_UINSN_BYTES) /* The following alias is needed for reference from arch-agnostic code */ diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 72bafb47e757..735e89337398 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -46,7 +46,7 @@ static void __init create_trampoline(unsigned long addr) * two instructions it doesn't require any registers. */ patch_instruction(p, ppc_inst(PPC_INST_NOP)); - patch_branch(++p, addr + PHYSICAL_START, 0); + patch_branch((void *)p + 4, addr + PHYSICAL_START, 0); } void __init setup_kdump_trampoline(void) diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index 52c1ab3f85aa..a8e66603d12b 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -162,43 +162,43 @@ void patch_imm32_load_insns(unsigned int val, kprobe_opcode_t *addr) /* * Generate instructions to load provided immediate 64-bit value - * to register 'r3' and patch these instructions at 'addr'. + * to register 'reg' and patch these instructions at 'addr'. */ -void patch_imm64_load_insns(unsigned long val, kprobe_opcode_t *addr) +void patch_imm64_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr) { - /* lis r3,(op)@highest */ + /* lis reg,(op)@highest */ patch_instruction((struct ppc_inst *)addr, - ppc_inst(PPC_INST_ADDIS | ___PPC_RT(3) | + ppc_inst(PPC_INST_ADDIS | ___PPC_RT(reg) | ((val >> 48) & 0xffff))); addr++; - /* ori r3,r3,(op)@higher */ + /* ori reg,reg,(op)@higher */ patch_instruction((struct ppc_inst *)addr, - ppc_inst(PPC_INST_ORI | ___PPC_RA(3) | - ___PPC_RS(3) | ((val >> 32) & 0xffff))); + ppc_inst(PPC_INST_ORI | ___PPC_RA(reg) | + ___PPC_RS(reg) | ((val >> 32) & 0xffff))); addr++; - /* rldicr r3,r3,32,31 */ + /* rldicr reg,reg,32,31 */ patch_instruction((struct ppc_inst *)addr, - ppc_inst(PPC_INST_RLDICR | ___PPC_RA(3) | - ___PPC_RS(3) | __PPC_SH64(32) | __PPC_ME64(31))); + ppc_inst(PPC_INST_RLDICR | ___PPC_RA(reg) | + ___PPC_RS(reg) | __PPC_SH64(32) | __PPC_ME64(31))); addr++; - /* oris r3,r3,(op)@h */ + /* oris reg,reg,(op)@h */ patch_instruction((struct ppc_inst *)addr, - ppc_inst(PPC_INST_ORIS | ___PPC_RA(3) | - ___PPC_RS(3) | ((val >> 16) & 0xffff))); + ppc_inst(PPC_INST_ORIS | ___PPC_RA(reg) | + ___PPC_RS(reg) | ((val >> 16) & 0xffff))); addr++; - /* ori r3,r3,(op)@l */ + /* ori reg,reg,(op)@l */ patch_instruction((struct ppc_inst *)addr, - ppc_inst(PPC_INST_ORI | ___PPC_RA(3) | - ___PPC_RS(3) | (val & 0xffff))); + ppc_inst(PPC_INST_ORI | ___PPC_RA(reg) | + ___PPC_RS(reg) | (val & 0xffff))); } int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) { - struct ppc_inst branch_op_callback, branch_emulate_step; + struct ppc_inst branch_op_callback, branch_emulate_step, temp; kprobe_opcode_t *op_callback_addr, *emulate_step_addr, *buff; long b_offset; unsigned long nip, size; @@ -249,7 +249,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) * Fixup the template with instructions to: * 1. load the address of the actual probepoint */ - patch_imm64_load_insns((unsigned long)op, buff + TMPL_OP_IDX); + patch_imm64_load_insns((unsigned long)op, 3, buff + TMPL_OP_IDX); /* * 2. branch to optimized_callback() and emulate_step() @@ -282,7 +282,11 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) /* * 3. load instruction to be emulated into relevant register, and */ - patch_imm32_load_insns(*p->ainsn.insn, buff + TMPL_INSN_IDX); + temp = ppc_inst_read((struct ppc_inst *)p->ainsn.insn); + patch_imm64_load_insns(ppc_inst_val(temp) | + ((u64)ppc_inst_suffix(temp) << 32), + 4, + buff + TMPL_INSN_IDX); /* * 4. branch back from trampoline diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S index cf383520843f..ff8ba4d3824d 100644 --- a/arch/powerpc/kernel/optprobes_head.S +++ b/arch/powerpc/kernel/optprobes_head.S @@ -94,6 +94,9 @@ optprobe_template_insn: /* 2, Pass instruction to be emulated in r4 */ nop nop + nop + nop + nop .global optprobe_template_call_emulate optprobe_template_call_emulate: diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index d946f7d6bb32..58b67b62d5d3 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -24,13 +24,24 @@ static int __patch_instruction(struct ppc_inst *exec_addr, struct ppc_inst instr { int err = 0; - __put_user_asm(ppc_inst_val(instr), patch_addr, err, "stw"); - if (err) - return err; + if (!ppc_inst_prefixed(instr)) { + __put_user_asm(ppc_inst_val(instr), patch_addr, err, "stw"); + if (err) + return err; + } else { +#ifdef CONFIG_CPU_LITTLE_ENDIAN + __put_user_asm((u64)ppc_inst_suffix(instr) << 32 | + ppc_inst_val(instr), patch_addr, err, "std"); +#else + __put_user_asm((u64)ppc_inst_val(instr) << 32 | + ppc_inst_suffix(instr), patch_addr, err, "std"); +#endif /* CONFIG_CPU_LITTLE_ENDIAN */ + if (err) + return err; + } asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr), "r" (exec_addr)); - return 0; } diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 2bd2b752de4f..a8238eff3a31 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -84,12 +84,13 @@ static int patch_feature_section(unsigned long value, struct fixup_entry *fcur) src = alt_start; dest = start; - for (; src < alt_end; src++, dest++) { + for (; src < alt_end; src = (void *)src + ppc_inst_len(ppc_inst_read(src)), + (dest = (void *)dest + ppc_inst_len(ppc_inst_read(dest)))) { if (patch_alt_instruction(src, dest, alt_start, alt_end)) return 1; } - for (; dest < end; dest++) + for (; dest < end; dest = (void *)dest + ppc_inst_len(ppc_inst(PPC_INST_NOP))) raw_patch_instruction(dest, ppc_inst(PPC_INST_NOP)); return 0; diff --git a/arch/powerpc/lib/inst.c b/arch/powerpc/lib/inst.c index 08dedd927268..eb6f9ee28ac6 100644 --- a/arch/powerpc/lib/inst.c +++ b/arch/powerpc/lib/inst.c @@ -3,9 +3,49 @@ * Copyright 2020, IBM Corporation. */ +#include <asm/ppc-opcode.h> #include <linux/uaccess.h> #include <asm/inst.h> +#ifdef __powerpc64__ +int probe_user_read_inst(struct ppc_inst *inst, + struct ppc_inst *nip) +{ + unsigned int val, suffix; + int err; + + err = probe_user_read(&val, nip, sizeof(val)); + if (err) + return err; + if ((val >> 26) == OP_PREFIX) { + err = probe_user_read(&suffix, (void *)nip + 4, + sizeof(unsigned int)); + *inst = ppc_inst_prefix(val, suffix); + } else { + *inst = ppc_inst(val); + } + return err; +} + +int probe_kernel_read_inst(struct ppc_inst *inst, + struct ppc_inst *src) +{ + unsigned int val, suffix; + int err; + + err = probe_kernel_read(&val, src, sizeof(val)); + if (err) + return err; + if ((val >> 26) == OP_PREFIX) { + err = probe_kernel_read(&suffix, (void *)src + 4, + sizeof(unsigned int)); + *inst = ppc_inst_prefix(val, suffix); + } else { + *inst = ppc_inst(val); + } + return err; +} +#else int probe_user_read_inst(struct ppc_inst *inst, struct ppc_inst *nip) { @@ -27,3 +67,4 @@ int probe_kernel_read_inst(struct ppc_inst *inst, *inst = ppc_inst(val); return err; } +#endif /* __powerpc64__ */ diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 95a56bb1ba3f..ecd756c346fd 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1169,10 +1169,12 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, unsigned long int imm; unsigned long int val, val2; unsigned int mb, me, sh; - unsigned int word; + unsigned int word, suffix; long ival; word = ppc_inst_val(instr); + suffix = ppc_inst_suffix(instr); + op->type = COMPUTE; opcode = ppc_inst_primary_opcode(instr); diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 4d6980d51456..647b3829c4eb 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -758,8 +758,8 @@ static int xmon_bpt(struct pt_regs *regs) /* Are we at the trap at bp->instr[1] for some bp? */ bp = in_breakpoint_table(regs->nip, &offset); - if (bp != NULL && offset == 4) { - regs->nip = bp->address + 4; + if (bp != NULL && (offset == 4 || offset == 8)) { + regs->nip = bp->address + offset; atomic_dec(&bp->ref_count); return 1; } diff --git a/arch/powerpc/xmon/xmon_bpts.S b/arch/powerpc/xmon/xmon_bpts.S index f3ad0ab50854..69726814cd27 100644 --- a/arch/powerpc/xmon/xmon_bpts.S +++ b/arch/powerpc/xmon/xmon_bpts.S @@ -4,6 +4,8 @@ #include <asm/asm-offsets.h> #include "xmon_bpts.h" +/* Prefixed instructions can not cross 64 byte boundaries */ +.align 6 .global bpt_table bpt_table: .space NBPTS * BPT_SIZE