diff mbox

[v2,0/8] sparc64: MM/IRQ patch queue.

Message ID 20141014.224028.932832496005778423.davem@davemloft.net
State Accepted
Delegated to: David Miller
Headers show

Commit Message

David Miller Oct. 15, 2014, 2:40 a.m. UTC
From: David Miller <davem@davemloft.net>
Date: Sun, 12 Oct 2014 23:53:09 -0400 (EDT)

> Anyways I'll dig further and fix this.

Bob, I am putting the following fix through some tests, I'll
commit and push everywhere after my tests complete.

Let me know if it works for you too, it should make that bootup
AES test faulure disappear.

Thanks!

Comments

Bob Picco Oct. 16, 2014, 12:36 p.m. UTC | #1
Hi,
David Miller wrote:	[Tue Oct 14 2014, 10:40:28PM EDT]
> From: David Miller <davem@davemloft.net>
> Date: Sun, 12 Oct 2014 23:53:09 -0400 (EDT)
> 
> > Anyways I'll dig further and fix this.
> 
> Bob, I am putting the following fix through some tests, I'll
> commit and push everywhere after my tests complete.
> 
> Let me know if it works for you too, it should make that bootup
> AES test faulure disappear.
> 
> Thanks!
You're welcome. Thanx for the splendid changelog! Otherwise time reviewing
the fpu paths and etc. would have been a challenge to schedule. Boots and
tests without issue on T5-2. Though I had configure them in and 
# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
. I did review the patch modifications too.
--
To unsubscribe from this list: send the line "unsubscribe sparclinux" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller Oct. 16, 2014, 4:12 p.m. UTC | #2
From: Bob Picco <bpicco@meloft.net>
Date: Thu, 16 Oct 2014 08:36:54 -0400

> You're welcome. Thanx for the splendid changelog! Otherwise time reviewing
> the fpu paths and etc. would have been a challenge to schedule. Boots and
> tests without issue on T5-2. Though I had configure them in and 
> # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
> . I did review the patch modifications too.

Thanks Bob.

I started working on a text document explaining how the FPU saving
stuff works on sparc64, and will use that to sprinkle comments around
and perhaps clean some of it up.

I think there is one bug I discovered during this documenting process,
in that if we do a VISEntryHalf I think we might not restore the %gsr
register on trap return.  In the rtrap pseudo-code below there are
two code paths which bypass the %gsr load, it is a combination of two
checks which run in sequence, the first is:

	!(t->fpsaved[depth] & (FPRS_FSF | FPRS_DU))

and the second is:

	!(t->fpsaved[depth] & FPRS_FSF)

The second check therefore can only trigger if only FPRS_DU is set.
There might be a reason why skipping the %gsr load is kosher in this
case, but I haven't figure it out yet.

Anyways, here is my doc in progress.

====================

struct thread_info:

       convention is that slot [0] of save arrays is for the user's FPU
       state, and slot [1] and later are for kernel FPU state

       thread_info->fpdepth bit zero indicates if there is user FPU
       state saved or not, bits one and higher are the kernel FPU
       save depth.  So user stuff is saved in slot zero, and kernel
       stuff is saved in slot "(fpdepth >> 1) + 1".

VisEntry:
	struct thread_info *t = current_thread_info();

	/* %o5 = %fprs */
	if (t->fpdepth == 0) {
		t->fpsaved[0] = 0;
		t->xfsr[0] = %fsr;
		return;
	}
	if (t->fpdepth == 1) {
	vis1:

	}


VisEntryHalf:
	struct thread_info *t = current_thread_info();

	/* %o5 = %fprs */
	g1 = t->fpdepth;
	if (t->fpdepth == 0) {
		t->fpsaved[0] = 0;
		t->xfsr[0] = %fsr;
		o5 = 0;
		%fprs = FPRS_FEF;
		return;
	}
	if (t->fpdepth == 1) {
		backtrack_return_pc(8);
		goto vis1;
	}

	tp->fpsaved[tp->fpdepth] = o5 & ~FPRS_DU;
	tp->gsr[tp->fpdepth] = %gsr;
	if (o5 & FPRS_DL) {
		store_block(f0, tp->fpregs + (g1 << 5) + 0x00);
		store_block(f16, tp->fpregs + (g1 << 5) + 0x40);
	}
	%fprs = (o5 & FPRS_DU) ^ FPRS_FEF;
	return;

VisExit/VisExitHalf:
	%fprs = 0

etrap:
	( from kernel )
	struct thread_info *t = current_thread_info();
	t->fpsaved[(t->fpdepth >> 1) + 1] = 0;
	t->fpdepth += 2;

rtrap:
	( to kernel )
	struct thread_info *t = current_thread_info();
	if (t->fpdepth) {
		l2 = t->fpsaved[t->fpdepth >> 1];
		o0 = tp->fpdepth >> 1;
		o1 = t + TI_GSR;
		l6 = l2 & FPRS_DL;
		if (!(l2 & (FPRS_FEF | FPRS_DU)))
			goto out_2;
		o5 = o0 << 3;
		if (!(l2 & FPRS_FEF))
			goto out_5;

		g1 = %fprs;
		%fprs = %g1 ^ FPRS_FEF;

		g1 = t->gsr[tp->fpdepth];
		o2 = o0 << 8;
		if (l6) {
			load_block(f0, tp->fpregs + %o2)
			load_block(f16, tp->fpregs + 0x40 + %o2)
		}
		%gsr = %g1
		if (l2 & FPRS_DU) {
			load_block(f32, tp->fpregs + 0x80 + %o2)
			load_block(f48, tp->fpregs + 0xc0 + %o2)
		}
		/* fallthru */
	out_2:
		t->fpdepth -= 2;
		goto out_99;
	out_5:
		%fprs = FPRS_FEF;
		o2 = o0 << 8;
		load_block(f32, tp->fpregs + 0x80 + %o2)
		load_block(f48, tp->fpregs + 0xc0 + %o2)
		%fprs = FPRS_DU;
		tp->fpdepth -= 2;
		goto out_99;
	}
	out_99:

{TSTATE,PSTATE}_PEF management:

	Firstly, as per V9, all traps set PSTATE_PEF in %pstate for
	the trap handler.  The "predefined" state for %pstate upon
	entry to a trap handler is:

		PSTATE.MM = unchanged
		PSTATE.RED = 0
		PSTATE.PEF = 1
		PSTATE.AM = 0
		PSTATE.PRIV = 1
		PSTATE.IE = 0
		PSTATE.AG = 1
		PSTATE.CLE = PSTATE.TLE

	1) start_thread{,32}() clears TSTATE_PEF
	2) All traps into kernel (both from user and kernel) set TSTATE_PEF in
	   %tstate and do a 'done' which propagates it into PSTATE_PEF in
	   %pstate.
	3) fpu_disabled trap sets TSTATE_PEF in %tstate before finishing
	   with 'retry' (which like 'done' propagates it to PSTATE_PEF in
	   %pstate).
	4) If the 64-bit set_context system call saves FPU state, TSTATE_PEF
	   is cleared in the task's registers.

--
To unsubscribe from this list: send the line "unsubscribe sparclinux" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

====================
[PATCH] sparc64: Fix FPU register corruption with AES crypto offload.

The AES loops in arch/sparc/crypto/aes_glue.c use a scheme where the
key material is preloaded into the FPU registers, and then we loop
over and over doing the crypt operation, reusing those pre-cooked key
registers.

There are intervening blkcipher*() calls between the crypt operation
calls.  And those might perform memcpy() and thus also try to use the
FPU.

The sparc64 kernel FPU usage mechanism is designed to allow such
recursive uses, but with a catch.

There has to be a trap between the two FPU using threads of control.

The mechanism works by, when the FPU is already in use by the kernel,
allocating a slot for FPU saving at trap time.  Then if, within the
trap handler, we try to use the FPU registers, the pre-trap FPU
register state is saved into the slot.  Then at trap return time we
notice this and restore the pre-trap FPU state.

Over the long term there are various more involved ways we can make
this work, but for a quick fix let's take advantage of the fact that
the situation where this happens is very limited.

All sparc64 chips that support the crypto instructiosn also are using
the Niagara4 memcpy routine, and that routine only uses the FPU for
large copies where we can't get the source aligned properly to a
multiple of 8 bytes.

We look to see if the FPU is already in use in this context, and if so
we use the non-large copy path which only uses integer registers.

Furthermore, we also limit this special logic to when we are doing
kernel copy, rather than a user copy.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/include/asm/visasm.h |  8 ++++++++
 arch/sparc/lib/NG4memcpy.S      | 14 +++++++++++++-
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/arch/sparc/include/asm/visasm.h b/arch/sparc/include/asm/visasm.h
index b266737..1f0aa20 100644
--- a/arch/sparc/include/asm/visasm.h
+++ b/arch/sparc/include/asm/visasm.h
@@ -39,6 +39,14 @@ 
 297:	wr		%o5, FPRS_FEF, %fprs;		\
 298:
 
+#define VISEntryHalfFast(fail_label)			\
+	rd		%fprs, %o5;			\
+	andcc		%o5, FPRS_FEF, %g0;		\
+	be,pt		%icc, 297f;			\
+	 nop;						\
+	ba,a,pt		%xcc, fail_label;		\
+297:	wr		%o5, FPRS_FEF, %fprs;
+
 #define VISExitHalf					\
 	wr		%o5, 0, %fprs;
 
diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S
index 9cf2ee0..140527a 100644
--- a/arch/sparc/lib/NG4memcpy.S
+++ b/arch/sparc/lib/NG4memcpy.S
@@ -41,6 +41,10 @@ 
 #endif
 #endif
 
+#if !defined(EX_LD) && !defined(EX_ST)
+#define NON_USER_COPY
+#endif
+
 #ifndef EX_LD
 #define EX_LD(x)	x
 #endif
@@ -197,9 +201,13 @@  FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	 mov		EX_RETVAL(%o3), %o0
 
 .Llarge_src_unaligned:
+#ifdef NON_USER_COPY
+	VISEntryHalfFast(.Lmedium_vis_entry_fail)
+#else
+	VISEntryHalf
+#endif
 	andn		%o2, 0x3f, %o4
 	sub		%o2, %o4, %o2
-	VISEntryHalf
 	alignaddr	%o1, %g0, %g1
 	add		%o1, %o4, %o1
 	EX_LD(LOAD(ldd, %g1 + 0x00, %f0))
@@ -240,6 +248,10 @@  FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	 nop
 	ba,a,pt		%icc, .Lmedium_unaligned
 
+#ifdef NON_USER_COPY
+.Lmedium_vis_entry_fail:
+	 or		%o0, %o1, %g2
+#endif
 .Lmedium:
 	LOAD(prefetch, %o1 + 0x40, #n_reads_strong)
 	andcc		%g2, 0x7, %g0