diff mbox series

[v2,5/5] bpf ppc32: Add instructions for atomic_[cmp]xchg

Message ID 20220610155552.25892-6-hbathini@linux.ibm.com (mailing list archive)
State Accepted
Headers show
Series Atomics support for eBPF on powerpc | expand

Checks

Context Check Description
snowpatch_ozlabs/github-powerpc_selftests success Successfully ran 10 jobs.
snowpatch_ozlabs/github-powerpc_ppctests success Successfully ran 10 jobs.
snowpatch_ozlabs/github-powerpc_clang success Successfully ran 7 jobs.
snowpatch_ozlabs/github-powerpc_sparse success Successfully ran 4 jobs.
snowpatch_ozlabs/github-powerpc_kernel_qemu success Successfully ran 23 jobs.

Commit Message

Hari Bathini June 10, 2022, 3:55 p.m. UTC
This adds two atomic opcodes BPF_XCHG and BPF_CMPXCHG on ppc32, both
of which include the BPF_FETCH flag.  The kernel's atomic_cmpxchg
operation fundamentally has 3 operands, but we only have two register
fields. Therefore the operand we compare against (the kernel's API
calls it 'old') is hard-coded to be BPF_REG_R0. Also, kernel's
atomic_cmpxchg returns the previous value at dst_reg + off. JIT the
same for BPF too with return value put in BPF_REG_0.

  BPF_REG_R0 = atomic_cmpxchg(dst_reg + off, BPF_REG_R0, src_reg);

Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
---

Changes in v2:
* Moved variable declaration to avoid late declaration error on
  some compilers.
* Tried to make code readable and compact.


 arch/powerpc/net/bpf_jit_comp32.c | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

Comments

Christophe Leroy June 11, 2022, 5:34 p.m. UTC | #1
Le 10/06/2022 à 17:55, Hari Bathini a écrit :
> This adds two atomic opcodes BPF_XCHG and BPF_CMPXCHG on ppc32, both
> of which include the BPF_FETCH flag.  The kernel's atomic_cmpxchg
> operation fundamentally has 3 operands, but we only have two register
> fields. Therefore the operand we compare against (the kernel's API
> calls it 'old') is hard-coded to be BPF_REG_R0. Also, kernel's
> atomic_cmpxchg returns the previous value at dst_reg + off. JIT the
> same for BPF too with return value put in BPF_REG_0.
> 
>    BPF_REG_R0 = atomic_cmpxchg(dst_reg + off, BPF_REG_R0, src_reg);
> 
> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
> ---
> 
> Changes in v2:
> * Moved variable declaration to avoid late declaration error on
>    some compilers.
> * Tried to make code readable and compact.
> 
> 
>   arch/powerpc/net/bpf_jit_comp32.c | 25 ++++++++++++++++++++++---
>   1 file changed, 22 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
> index 28dc6a1a8f2f..43f1c76d48ce 100644
> --- a/arch/powerpc/net/bpf_jit_comp32.c
> +++ b/arch/powerpc/net/bpf_jit_comp32.c
> @@ -297,6 +297,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>   		u32 ax_reg = bpf_to_ppc(BPF_REG_AX);
>   		u32 tmp_reg = bpf_to_ppc(TMP_REG);
>   		u32 size = BPF_SIZE(code);
> +		u32 save_reg, ret_reg;
>   		s16 off = insn[i].off;
>   		s32 imm = insn[i].imm;
>   		bool func_addr_fixed;
> @@ -799,6 +800,9 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>   		 * BPF_STX ATOMIC (atomic ops)
>   		 */
>   		case BPF_STX | BPF_ATOMIC | BPF_W:
> +			save_reg = _R0;
> +			ret_reg = src_reg;
> +
>   			bpf_set_seen_register(ctx, tmp_reg);
>   			bpf_set_seen_register(ctx, ax_reg);
>   
> @@ -829,6 +833,21 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>   			case BPF_XOR | BPF_FETCH:
>   				EMIT(PPC_RAW_XOR(_R0, _R0, src_reg));
>   				break;
> +			case BPF_CMPXCHG:
> +				/*
> +				 * Return old value in BPF_REG_0 for BPF_CMPXCHG &
> +				 * in src_reg for other cases.
> +				 */
> +				ret_reg = bpf_to_ppc(BPF_REG_0);
> +
> +				/* Compare with old value in BPF_REG_0 */
> +				EMIT(PPC_RAW_CMPW(bpf_to_ppc(BPF_REG_0), _R0));
> +				/* Don't set if different from old value */
> +				PPC_BCC_SHORT(COND_NE, (ctx->idx + 3) * 4);
> +				fallthrough;
> +			case BPF_XCHG:
> +				save_reg = src_reg;

I'm a bit lost, when save_reg is src_reg, don't we expect the upper part 
(ie src_reg - 1) to be explicitely zeroised ?

> +				break;
>   			default:
>   				pr_err_ratelimited("eBPF filter atomic op code %02x (@%d) unsupported\n",
>   						   code, i);
> @@ -836,15 +855,15 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>   			}
>   
>   			/* store new value */
> -			EMIT(PPC_RAW_STWCX(_R0, tmp_reg, dst_reg));
> +			EMIT(PPC_RAW_STWCX(save_reg, tmp_reg, dst_reg));
>   			/* we're done if this succeeded */
>   			PPC_BCC_SHORT(COND_NE, tmp_idx);
>   
>   			/* For the BPF_FETCH variant, get old data into src_reg */
>   			if (imm & BPF_FETCH) {
> -				EMIT(PPC_RAW_MR(src_reg, ax_reg));
> +				EMIT(PPC_RAW_MR(ret_reg, ax_reg));
>   				if (!fp->aux->verifier_zext)
> -					EMIT(PPC_RAW_LI(src_reg_h, 0));
> +					EMIT(PPC_RAW_LI(ret_reg - 1, 0)); /* higher 32-bit */
>   			}
>   			break;
>
Hari Bathini June 13, 2022, 7:11 p.m. UTC | #2
On 11/06/22 11:04 pm, Christophe Leroy wrote:
> 
> 
> Le 10/06/2022 à 17:55, Hari Bathini a écrit :
>> This adds two atomic opcodes BPF_XCHG and BPF_CMPXCHG on ppc32, both
>> of which include the BPF_FETCH flag.  The kernel's atomic_cmpxchg
>> operation fundamentally has 3 operands, but we only have two register
>> fields. Therefore the operand we compare against (the kernel's API
>> calls it 'old') is hard-coded to be BPF_REG_R0. Also, kernel's
>> atomic_cmpxchg returns the previous value at dst_reg + off. JIT the
>> same for BPF too with return value put in BPF_REG_0.
>>
>>     BPF_REG_R0 = atomic_cmpxchg(dst_reg + off, BPF_REG_R0, src_reg);
>>
>> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
>> ---
>>
>> Changes in v2:
>> * Moved variable declaration to avoid late declaration error on
>>     some compilers.
>> * Tried to make code readable and compact.
>>
>>
>>    arch/powerpc/net/bpf_jit_comp32.c | 25 ++++++++++++++++++++++---
>>    1 file changed, 22 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
>> index 28dc6a1a8f2f..43f1c76d48ce 100644
>> --- a/arch/powerpc/net/bpf_jit_comp32.c
>> +++ b/arch/powerpc/net/bpf_jit_comp32.c
>> @@ -297,6 +297,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>>    		u32 ax_reg = bpf_to_ppc(BPF_REG_AX);
>>    		u32 tmp_reg = bpf_to_ppc(TMP_REG);
>>    		u32 size = BPF_SIZE(code);
>> +		u32 save_reg, ret_reg;
>>    		s16 off = insn[i].off;
>>    		s32 imm = insn[i].imm;
>>    		bool func_addr_fixed;
>> @@ -799,6 +800,9 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>>    		 * BPF_STX ATOMIC (atomic ops)
>>    		 */
>>    		case BPF_STX | BPF_ATOMIC | BPF_W:
>> +			save_reg = _R0;
>> +			ret_reg = src_reg;
>> +
>>    			bpf_set_seen_register(ctx, tmp_reg);
>>    			bpf_set_seen_register(ctx, ax_reg);
>>    
>> @@ -829,6 +833,21 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>>    			case BPF_XOR | BPF_FETCH:
>>    				EMIT(PPC_RAW_XOR(_R0, _R0, src_reg));
>>    				break;
>> +			case BPF_CMPXCHG:
>> +				/*
>> +				 * Return old value in BPF_REG_0 for BPF_CMPXCHG &
>> +				 * in src_reg for other cases.
>> +				 */
>> +				ret_reg = bpf_to_ppc(BPF_REG_0);
>> +
>> +				/* Compare with old value in BPF_REG_0 */
>> +				EMIT(PPC_RAW_CMPW(bpf_to_ppc(BPF_REG_0), _R0));
>> +				/* Don't set if different from old value */
>> +				PPC_BCC_SHORT(COND_NE, (ctx->idx + 3) * 4);
>> +				fallthrough;
>> +			case BPF_XCHG:
>> +				save_reg = src_reg;
> 
> I'm a bit lost, when save_reg is src_reg, don't we expect the upper part
> (ie src_reg - 1) to be explicitely zeroised ?
> 

For BPF_FETCH variants, old value is returned in src_reg (ret_reg).
In all such cases, higher 32-bit is zero'ed. But in case of BPF_CMPXCHG,
src_reg is untouched as BPF_REG_0 is used instead. So, higher 32-bit
remains untouched for that case alone..


>> +				break;
>>    			default:
>>    				pr_err_ratelimited("eBPF filter atomic op code %02x (@%d) unsupported\n",
>>    						   code, i);
>> @@ -836,15 +855,15 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>>    			}
>>    
>>    			/* store new value */
>> -			EMIT(PPC_RAW_STWCX(_R0, tmp_reg, dst_reg));
>> +			EMIT(PPC_RAW_STWCX(save_reg, tmp_reg, dst_reg));
>>    			/* we're done if this succeeded */
>>    			PPC_BCC_SHORT(COND_NE, tmp_idx);
>>    

>>    			/* For the BPF_FETCH variant, get old data into src_reg */

With this commit, this comment is not true for BPF_CMPXCHG. So, this
comment should not be removed..

Thanks
Hari
Hari Bathini June 13, 2022, 7:14 p.m. UTC | #3
On 14/06/22 12:41 am, Hari Bathini wrote:
> 
> 
> On 11/06/22 11:04 pm, Christophe Leroy wrote:
>>
>>
>> Le 10/06/2022 à 17:55, Hari Bathini a écrit :
>>> This adds two atomic opcodes BPF_XCHG and BPF_CMPXCHG on ppc32, both
>>> of which include the BPF_FETCH flag.  The kernel's atomic_cmpxchg
>>> operation fundamentally has 3 operands, but we only have two register
>>> fields. Therefore the operand we compare against (the kernel's API
>>> calls it 'old') is hard-coded to be BPF_REG_R0. Also, kernel's
>>> atomic_cmpxchg returns the previous value at dst_reg + off. JIT the
>>> same for BPF too with return value put in BPF_REG_0.
>>>
>>>     BPF_REG_R0 = atomic_cmpxchg(dst_reg + off, BPF_REG_R0, src_reg);
>>>
>>> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
>>> ---
>>>
>>> Changes in v2:
>>> * Moved variable declaration to avoid late declaration error on
>>>     some compilers.
>>> * Tried to make code readable and compact.
>>>
>>>
>>>    arch/powerpc/net/bpf_jit_comp32.c | 25 ++++++++++++++++++++++---
>>>    1 file changed, 22 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/arch/powerpc/net/bpf_jit_comp32.c 
>>> b/arch/powerpc/net/bpf_jit_comp32.c
>>> index 28dc6a1a8f2f..43f1c76d48ce 100644
>>> --- a/arch/powerpc/net/bpf_jit_comp32.c
>>> +++ b/arch/powerpc/net/bpf_jit_comp32.c
>>> @@ -297,6 +297,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 
>>> *image, struct codegen_context *
>>>            u32 ax_reg = bpf_to_ppc(BPF_REG_AX);
>>>            u32 tmp_reg = bpf_to_ppc(TMP_REG);
>>>            u32 size = BPF_SIZE(code);
>>> +        u32 save_reg, ret_reg;
>>>            s16 off = insn[i].off;
>>>            s32 imm = insn[i].imm;
>>>            bool func_addr_fixed;
>>> @@ -799,6 +800,9 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 
>>> *image, struct codegen_context *
>>>             * BPF_STX ATOMIC (atomic ops)
>>>             */
>>>            case BPF_STX | BPF_ATOMIC | BPF_W:
>>> +            save_reg = _R0;
>>> +            ret_reg = src_reg;
>>> +
>>>                bpf_set_seen_register(ctx, tmp_reg);
>>>                bpf_set_seen_register(ctx, ax_reg);
>>> @@ -829,6 +833,21 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 
>>> *image, struct codegen_context *
>>>                case BPF_XOR | BPF_FETCH:
>>>                    EMIT(PPC_RAW_XOR(_R0, _R0, src_reg));
>>>                    break;
>>> +            case BPF_CMPXCHG:
>>> +                /*
>>> +                 * Return old value in BPF_REG_0 for BPF_CMPXCHG &
>>> +                 * in src_reg for other cases.
>>> +                 */
>>> +                ret_reg = bpf_to_ppc(BPF_REG_0);
>>> +
>>> +                /* Compare with old value in BPF_REG_0 */
>>> +                EMIT(PPC_RAW_CMPW(bpf_to_ppc(BPF_REG_0), _R0));
>>> +                /* Don't set if different from old value */
>>> +                PPC_BCC_SHORT(COND_NE, (ctx->idx + 3) * 4);
>>> +                fallthrough;
>>> +            case BPF_XCHG:
>>> +                save_reg = src_reg;
>>
>> I'm a bit lost, when save_reg is src_reg, don't we expect the upper part
>> (ie src_reg - 1) to be explicitely zeroised ?
>>
> 
> For BPF_FETCH variants, old value is returned in src_reg (ret_reg).
> In all such cases, higher 32-bit is zero'ed. But in case of BPF_CMPXCHG,
> src_reg is untouched as BPF_REG_0 is used instead. So, higher 32-bit
> remains untouched for that case alone..
> 
> 
>>> +                break;
>>>                default:
>>>                    pr_err_ratelimited("eBPF filter atomic op code 
>>> %02x (@%d) unsupported\n",
>>>                               code, i);
>>> @@ -836,15 +855,15 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 
>>> *image, struct codegen_context *
>>>                }
>>>                /* store new value */
>>> -            EMIT(PPC_RAW_STWCX(_R0, tmp_reg, dst_reg));
>>> +            EMIT(PPC_RAW_STWCX(save_reg, tmp_reg, dst_reg));
>>>                /* we're done if this succeeded */
>>>                PPC_BCC_SHORT(COND_NE, tmp_idx);

> 
>>>                /* For the BPF_FETCH variant, get old data into 
>>> src_reg */
> 
> With this commit, this comment is not true for BPF_CMPXCHG. So, this
> comment should not be removed..

Sorry, the above should read:
   "should be removed" instead of "should not be removed"..
Naveen N. Rao June 24, 2022, 10:41 a.m. UTC | #4
Hari Bathini wrote:
> 
> 
> On 14/06/22 12:41 am, Hari Bathini wrote:
>> 
>> 
>> On 11/06/22 11:04 pm, Christophe Leroy wrote:
>>>
>>>
>>> Le 10/06/2022 à 17:55, Hari Bathini a écrit :
>>>> This adds two atomic opcodes BPF_XCHG and BPF_CMPXCHG on ppc32, both
>>>> of which include the BPF_FETCH flag.  The kernel's atomic_cmpxchg
>>>> operation fundamentally has 3 operands, but we only have two register
>>>> fields. Therefore the operand we compare against (the kernel's API
>>>> calls it 'old') is hard-coded to be BPF_REG_R0. Also, kernel's
>>>> atomic_cmpxchg returns the previous value at dst_reg + off. JIT the
>>>> same for BPF too with return value put in BPF_REG_0.
>>>>
>>>>     BPF_REG_R0 = atomic_cmpxchg(dst_reg + off, BPF_REG_R0, src_reg);
>>>>
>>>> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
>>>> ---
>>>>
>>>> Changes in v2:
>>>> * Moved variable declaration to avoid late declaration error on
>>>>     some compilers.
>>>> * Tried to make code readable and compact.
>>>>
>>>>
>>>>    arch/powerpc/net/bpf_jit_comp32.c | 25 ++++++++++++++++++++++---
>>>>    1 file changed, 22 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/arch/powerpc/net/bpf_jit_comp32.c 
>>>> b/arch/powerpc/net/bpf_jit_comp32.c
>>>> index 28dc6a1a8f2f..43f1c76d48ce 100644
>>>> --- a/arch/powerpc/net/bpf_jit_comp32.c
>>>> +++ b/arch/powerpc/net/bpf_jit_comp32.c
>>>> @@ -297,6 +297,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 
>>>> *image, struct codegen_context *
>>>>            u32 ax_reg = bpf_to_ppc(BPF_REG_AX);
>>>>            u32 tmp_reg = bpf_to_ppc(TMP_REG);
>>>>            u32 size = BPF_SIZE(code);
>>>> +        u32 save_reg, ret_reg;
>>>>            s16 off = insn[i].off;
>>>>            s32 imm = insn[i].imm;
>>>>            bool func_addr_fixed;
>>>> @@ -799,6 +800,9 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 
>>>> *image, struct codegen_context *
>>>>             * BPF_STX ATOMIC (atomic ops)
>>>>             */
>>>>            case BPF_STX | BPF_ATOMIC | BPF_W:
>>>> +            save_reg = _R0;
>>>> +            ret_reg = src_reg;
>>>> +
>>>>                bpf_set_seen_register(ctx, tmp_reg);
>>>>                bpf_set_seen_register(ctx, ax_reg);
>>>> @@ -829,6 +833,21 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 
>>>> *image, struct codegen_context *
>>>>                case BPF_XOR | BPF_FETCH:
>>>>                    EMIT(PPC_RAW_XOR(_R0, _R0, src_reg));
>>>>                    break;
>>>> +            case BPF_CMPXCHG:
>>>> +                /*
>>>> +                 * Return old value in BPF_REG_0 for BPF_CMPXCHG &
>>>> +                 * in src_reg for other cases.
>>>> +                 */
>>>> +                ret_reg = bpf_to_ppc(BPF_REG_0);
>>>> +
>>>> +                /* Compare with old value in BPF_REG_0 */
>>>> +                EMIT(PPC_RAW_CMPW(bpf_to_ppc(BPF_REG_0), _R0));
>>>> +                /* Don't set if different from old value */
>>>> +                PPC_BCC_SHORT(COND_NE, (ctx->idx + 3) * 4);
>>>> +                fallthrough;
>>>> +            case BPF_XCHG:
>>>> +                save_reg = src_reg;
>>>
>>> I'm a bit lost, when save_reg is src_reg, don't we expect the upper part
>>> (ie src_reg - 1) to be explicitely zeroised ?
>>>
>> 
>> For BPF_FETCH variants, old value is returned in src_reg (ret_reg).
>> In all such cases, higher 32-bit is zero'ed. But in case of BPF_CMPXCHG,
>> src_reg is untouched as BPF_REG_0 is used instead. So, higher 32-bit
>> remains untouched for that case alone..
>> 
>> 
>>>> +                break;
>>>>                default:
>>>>                    pr_err_ratelimited("eBPF filter atomic op code 
>>>> %02x (@%d) unsupported\n",
>>>>                               code, i);
>>>> @@ -836,15 +855,15 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 
>>>> *image, struct codegen_context *
>>>>                }
>>>>                /* store new value */
>>>> -            EMIT(PPC_RAW_STWCX(_R0, tmp_reg, dst_reg));
>>>> +            EMIT(PPC_RAW_STWCX(save_reg, tmp_reg, dst_reg));
>>>>                /* we're done if this succeeded */
>>>>                PPC_BCC_SHORT(COND_NE, tmp_idx);
> 
>> 
>>>>                /* For the BPF_FETCH variant, get old data into 
>>>> src_reg */
>> 
>> With this commit, this comment is not true for BPF_CMPXCHG. So, this
>> comment should not be removed..
> 
> Sorry, the above should read:
>    "should be removed" instead of "should not be removed"..
> 

Or, just add BPF_REG_0 at the end:
  /* For the BPF_FETCH variant, get old data into src_reg/BPF_REG_0 */

The comment in CMPXCHG anyway details the difference. In any case, we 
can clean this up subsequently.


- Naveen
diff mbox series

Patch

diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
index 28dc6a1a8f2f..43f1c76d48ce 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -297,6 +297,7 @@  int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 		u32 ax_reg = bpf_to_ppc(BPF_REG_AX);
 		u32 tmp_reg = bpf_to_ppc(TMP_REG);
 		u32 size = BPF_SIZE(code);
+		u32 save_reg, ret_reg;
 		s16 off = insn[i].off;
 		s32 imm = insn[i].imm;
 		bool func_addr_fixed;
@@ -799,6 +800,9 @@  int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 		 * BPF_STX ATOMIC (atomic ops)
 		 */
 		case BPF_STX | BPF_ATOMIC | BPF_W:
+			save_reg = _R0;
+			ret_reg = src_reg;
+
 			bpf_set_seen_register(ctx, tmp_reg);
 			bpf_set_seen_register(ctx, ax_reg);
 
@@ -829,6 +833,21 @@  int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 			case BPF_XOR | BPF_FETCH:
 				EMIT(PPC_RAW_XOR(_R0, _R0, src_reg));
 				break;
+			case BPF_CMPXCHG:
+				/*
+				 * Return old value in BPF_REG_0 for BPF_CMPXCHG &
+				 * in src_reg for other cases.
+				 */
+				ret_reg = bpf_to_ppc(BPF_REG_0);
+
+				/* Compare with old value in BPF_REG_0 */
+				EMIT(PPC_RAW_CMPW(bpf_to_ppc(BPF_REG_0), _R0));
+				/* Don't set if different from old value */
+				PPC_BCC_SHORT(COND_NE, (ctx->idx + 3) * 4);
+				fallthrough;
+			case BPF_XCHG:
+				save_reg = src_reg;
+				break;
 			default:
 				pr_err_ratelimited("eBPF filter atomic op code %02x (@%d) unsupported\n",
 						   code, i);
@@ -836,15 +855,15 @@  int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 			}
 
 			/* store new value */
-			EMIT(PPC_RAW_STWCX(_R0, tmp_reg, dst_reg));
+			EMIT(PPC_RAW_STWCX(save_reg, tmp_reg, dst_reg));
 			/* we're done if this succeeded */
 			PPC_BCC_SHORT(COND_NE, tmp_idx);
 
 			/* For the BPF_FETCH variant, get old data into src_reg */
 			if (imm & BPF_FETCH) {
-				EMIT(PPC_RAW_MR(src_reg, ax_reg));
+				EMIT(PPC_RAW_MR(ret_reg, ax_reg));
 				if (!fp->aux->verifier_zext)
-					EMIT(PPC_RAW_LI(src_reg_h, 0));
+					EMIT(PPC_RAW_LI(ret_reg - 1, 0)); /* higher 32-bit */
 			}
 			break;