diff mbox series

[4/5] ARC: add support for DSP-enabled userspace applications

Message ID 20191227180347.3579-5-Eugeniy.Paltsev@synopsys.com
State New
Headers show
Series ARC: handle DSP presence in HW | expand

Commit Message

Eugeniy Paltsev Dec. 27, 2019, 6:03 p.m. UTC
To be able to run DSP-enabled userspace applications we need to
save and restore following DSP-related registers:
At IRQ/exception entry/exit:
 * ACC0_GLO, ACC0_GHI, DSP_CTRL
 * ACC0_LO, ACC0_HI (we already save them as r58, r59 pair)
At context switch:
 * DSP_BFLY0, DSP_FFT_CTRL

Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
---
 arch/arc/Kconfig                   |  7 +++
 arch/arc/include/asm/arcregs.h     |  2 +
 arch/arc/include/asm/dsp-impl.h    | 75 +++++++++++++++++++++++++++++-
 arch/arc/include/asm/dsp.h         | 42 +++++++++++++++++
 arch/arc/include/asm/entry-arcv2.h |  3 ++
 arch/arc/include/asm/processor.h   |  4 ++
 arch/arc/include/asm/ptrace.h      |  4 ++
 arch/arc/include/asm/switch_to.h   |  2 +
 arch/arc/kernel/asm-offsets.c      |  7 +++
 arch/arc/kernel/setup.c            |  2 +-
 10 files changed, 146 insertions(+), 2 deletions(-)
 create mode 100644 arch/arc/include/asm/dsp.h

Comments

Vineet Gupta Jan. 7, 2020, 6:25 p.m. UTC | #1
On 12/27/19 10:03 AM, Eugeniy Paltsev wrote:
> To be able to run DSP-enabled userspace applications we need to
> save and restore following DSP-related registers:
> At IRQ/exception entry/exit:
>  * ACC0_GLO, ACC0_GHI, DSP_CTRL
>  * ACC0_LO, ACC0_HI (we already save them as r58, r59 pair)
> At context switch:
>  * DSP_BFLY0, DSP_FFT_CTRL

Good summary: but the question is this more than needed.
For kernel proper, you've disabled guard bits, saturation etc already. AFAIKS gcc
won't generate complex/fractional math for kernel so your bullet #1 can likely be
considered user space only hence can go to bullet #3. Do you have reasons to
believe otherwise ?

> Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
> ---
>  arch/arc/Kconfig                   |  7 +++
>  arch/arc/include/asm/arcregs.h     |  2 +
>  arch/arc/include/asm/dsp-impl.h    | 75 +++++++++++++++++++++++++++++-
>  arch/arc/include/asm/dsp.h         | 42 +++++++++++++++++
>  arch/arc/include/asm/entry-arcv2.h |  3 ++
>  arch/arc/include/asm/processor.h   |  4 ++
>  arch/arc/include/asm/ptrace.h      |  4 ++
>  arch/arc/include/asm/switch_to.h   |  2 +
>  arch/arc/kernel/asm-offsets.c      |  7 +++
>  arch/arc/kernel/setup.c            |  2 +-
>  10 files changed, 146 insertions(+), 2 deletions(-)
>  create mode 100644 arch/arc/include/asm/dsp.h
>
> diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
> index b9cd7ce3f878..c3210754a3d2 100644
> --- a/arch/arc/Kconfig
> +++ b/arch/arc/Kconfig
> @@ -432,6 +432,13 @@ config ARC_DSP_KERNEL
>  	  DSP extension presence in HW, no support for DSP-enabled userspace
>  	  applications. We don't save / restore DSP registers and only do
>  	  some minimal preparations so userspace won't be able to break kernel
> +
> +config ARC_DSP_USERSPACE
> +	bool "Support DSP for userspace apps"
> +	select ARC_HAS_ACCL_REGS
> +	help
> +	  DSP extension presence in HW, support save / restore DSP registers to
> +	  run DSP-enabled userspace applications
>  endchoice
>  
>  config ARC_IRQ_NO_AUTOSAVE
> diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h
> index 0004b1e9b325..a713819cab3c 100644
> --- a/arch/arc/include/asm/arcregs.h
> +++ b/arch/arc/include/asm/arcregs.h
> @@ -118,6 +118,8 @@
>  
>  /*
>   * DSP-related registers
> + * Registers names must correspond to dsp_callee_regs structure fields names
> + * for automatic offset calculation in DSP_AUX_SAVE_RESTORE macros.

good idea for preventing carbon errors.

>   */
>  #define ARC_AUX_DSP_BUILD	0x7A
>  #define ARC_AUX_ACC0_LO		0x580
> diff --git a/arch/arc/include/asm/dsp-impl.h b/arch/arc/include/asm/dsp-impl.h
> index 788093cbe689..7b640a680dfc 100644
> --- a/arch/arc/include/asm/dsp-impl.h
> +++ b/arch/arc/include/asm/dsp-impl.h
> @@ -7,6 +7,8 @@
>  #ifndef __ASM_ARC_DSP_IMPL_H
>  #define __ASM_ARC_DSP_IMPL_H
>  
> +#include <asm/dsp.h>
> +
>  #define DSP_CTRL_DISABLED_ALL		0
>  
>  #ifdef __ASSEMBLY__
> @@ -28,11 +30,82 @@
>  	 * able to break kernel */
>  	mov	r58, DSP_CTRL_DISABLED_ALL
>  	sr	r58, [ARC_AUX_DSP_CTRL]
> -#endif /* ARC_DSP_KERNEL */
> +
> +#elif defined(ARC_DSP_SAVE_RESTORE_REGS)
> +	lr	r58, [ARC_AUX_ACC0_GLO]
> +	lr	r59, [ARC_AUX_ACC0_GHI]
> +	ST2	r58, r59, PT_ACC0_GLO
> +
> +	lr	r58, [ARC_AUX_DSP_CTRL]
> +	st	r58, [sp, PT_DSP_CTRL]
> +
> +#endif
> +.endm
> +
> +/* clobbers r58, r59 registers pair */
> +.macro DSP_RESTORE_REGFILE_IRQ
> +#if defined(ARC_DSP_SAVE_RESTORE_REGS)
> +	LD2	r58, r59, PT_ACC0_GLO
> +	sr	r58, [ARC_AUX_ACC0_GLO]
> +	sr	r59, [ARC_AUX_ACC0_GHI]
> +
> +	ld	r58, [sp, PT_DSP_CTRL]
> +	sr	r58, [ARC_AUX_DSP_CTRL]
> +
> +#endif

Assuming you still need this, consider using different scratch registers if
possible. I understand it gets tricky in restore path but there even more
registers are available to clobber as they will be restored after this code.

> +#ifdef ARC_DSP_SAVE_RESTORE_REGS
> +
> +/*
> + * As we save new and restore old AUX register value in the same place we
> + * can optimize a bit and use AEX instruction (swap contents of an auxiliary
> + * register with a core register) instead of LR + SR pair.
> + */
> +#define AUX_SAVE_RESTORE(_saveto, _readfrom, _offt, _aux, _scratch)	\
> +do {									\
> +	__asm__ __volatile__(						\
> +		"ld	%0, [%2, %4]			\n"		\
> +		"aex	%0, [%3]			\n"		\
> +		"st	%0, [%1, %4]			\n"		\
> +		:							\
> +		  "=&r" (_scratch)	/* must be early clobber */	\

Define the scratch variable locally for clarity and better liveness tracking - for
both code reader and compiler :-)
Also avoids having to initialize it.

> +		:							\
> +		   "r" (_saveto),					\
> +		   "r" (_readfrom),					\
> +		   "I" (_aux),						\
> +		   "I" (_offt)						\
> +		:							\

AEX with "I" constraint will likely be an 8 byte instructions always. Best to give
compiler wiggle room with "Ir"

> +		  "memory"						\
> +	);								\
> +} while (0)
> +
> +#define DSP_AUX_SAVE_RESTORE(_saveto, _readfrom, _aux, _scratch)	\
> +	AUX_SAVE_RESTORE(_saveto, _readfrom,				\
> +		offsetof(struct dsp_callee_regs, _aux),			\
> +		ARC_AUX_##_aux, _scratch)
> +
> +static inline void arc_dsp_save_restore(struct task_struct *prev,
> +					struct task_struct *next)
> +{
> +	long unsigned int *saveto = &prev->thread.dsp.DSP_BFLY0;
> +	long unsigned int *readfrom = &next->thread.dsp.DSP_BFLY0;
> +
> +	long unsigned int zero = 0;

See comment about pushing scratch to the relevant code block.

> +
> +	DSP_AUX_SAVE_RESTORE(saveto, readfrom, DSP_BFLY0, zero);
> +	DSP_AUX_SAVE_RESTORE(saveto, readfrom, DSP_FFT_CTRL, zero);
> +}
> +

[snip]

> +
> +#if defined(CONFIG_ARC_DSP_USERSPACE)
> +#define ARC_DSP_SAVE_RESTORE_REGS	1
> +#endif

I can see u added this for consistency with below which is a really bad idea: see
below.

> +
> +#ifndef __ASSEMBLY__
> +
> +/* some defines to simplify config sanitize in kernel/setup.c */
> +#if defined(CONFIG_ARC_DSP_KERNEL) 	|| \
> +    defined(CONFIG_ARC_DSP_USERSPACE)
> +#define ARC_DSP_HANDLED			1
> +#else
> +#define ARC_DSP_HANDLED			0
> +#endif

This is a really bad idea - u r introducing explicit include dependencies which
can change even outside of arch changes !
We've dealt with enough of these problems with current.h, so best to avoid, even
if there is some code clutter.

> +
> +#if defined(CONFIG_ARC_DSP_USERSPACE)
> +#define ARC_DSP_OPT_NAME		"CONFIG_ARC_DSP_USERSPACE"
> +#else
> +#define ARC_DSP_OPT_NAME		"CONFIG_ARC_DSP_KERNEL"
> +#endif
> +
> +/*
> + * DSP-related saved registers - need to be saved only when you are
> + * scheduled out.
> + * structure fields name must correspond to aux register defenitions for
> + * automatic offset calculation in DSP_AUX_SAVE_RESTORE macros
> + */
> +struct dsp_callee_regs {
> +	unsigned long DSP_BFLY0, DSP_FFT_CTRL;
> +};
> +
> +#endif /* !__ASSEMBLY__ */
> +
> +#endif /* __ASM_ARC_DSP_H */
> diff --git a/arch/arc/include/asm/entry-arcv2.h b/arch/arc/include/asm/entry-arcv2.h
> index e3f8bd3e2eba..5d079f0b6243 100644
> --- a/arch/arc/include/asm/entry-arcv2.h
> +++ b/arch/arc/include/asm/entry-arcv2.h
> @@ -192,6 +192,9 @@
>  	ld	r25, [sp, PT_user_r25]
>  #endif
>  
> +	/* clobbers r58, r59 registers pair, so must be before r58, r59 restore */
> +	DSP_RESTORE_REGFILE_IRQ
> +
>  #ifdef CONFIG_ARC_HAS_ACCL_REGS
>  	LD2	r58, r59, PT_r58
>  #endif
> diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
> index 706edeaa5583..1716df0f4472 100644
> --- a/arch/arc/include/asm/processor.h
> +++ b/arch/arc/include/asm/processor.h
> @@ -14,6 +14,7 @@
>  #ifndef __ASSEMBLY__
>  
>  #include <asm/ptrace.h>
> +#include <asm/dsp.h>
>  
>  #ifdef CONFIG_ARC_FPU_SAVE_RESTORE
>  /* These DPFP regs need to be saved/restored across ctx-sw */
> @@ -39,6 +40,9 @@ struct thread_struct {
>  	unsigned long ksp;	/* kernel mode stack pointer */
>  	unsigned long callee_reg;	/* pointer to callee regs */
>  	unsigned long fault_address;	/* dbls as brkpt holder as well */
> +#ifdef ARC_DSP_SAVE_RESTORE_REGS
> +	struct dsp_callee_regs dsp;
> +#endif
>  #ifdef CONFIG_ARC_FPU_SAVE_RESTORE
>  	struct arc_fpu fpu;
>  #endif
> diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h
> index ba9854ef39e8..a5851ee141de 100644
> --- a/arch/arc/include/asm/ptrace.h
> +++ b/arch/arc/include/asm/ptrace.h
> @@ -8,6 +8,7 @@
>  #define __ASM_ARC_PTRACE_H
>  
>  #include <uapi/asm/ptrace.h>
> +#include <asm/dsp.h>
>  
>  #ifndef __ASSEMBLY__
>  
> @@ -91,6 +92,9 @@ struct pt_regs {
>  #ifdef CONFIG_ARC_HAS_ACCL_REGS
>  	unsigned long r58, r59;	/* ACCL/ACCH used by FPU / DSP MPY */
>  #endif
> +#ifdef ARC_DSP_SAVE_RESTORE_REGS

Use the Kconfig option name directly or !CONFIG_NO_DSP etc

> +	unsigned long ACC0_GLO, ACC0_GHI, DSP_CTRL;
> +#endif

see comments at top.

>  
>  	/*------- Below list auto saved by h/w -----------*/
>  	unsigned long r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11;

[snip]

>  }
> diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
> index b3995dd673d9..30d31579c51d 100644
> --- a/arch/arc/kernel/setup.c
> +++ b/arch/arc/kernel/setup.c
> @@ -447,7 +447,7 @@ static void arc_chk_core_config(void)
>  		CHK_OPT_STRICT(CONFIG_ARC_HAS_ACCL_REGS, present);
>  
>  		present = dsp_exist();
> -		CHK_OPT_STRICT(CONFIG_ARC_DSP_KERNEL, present);
> +		chk_opt_strict(ARC_DSP_OPT_NAME, present, ARC_DSP_HANDLED);

This needs to be reworked given the header fudge is not a good idea.
Eugeniy Paltsev Jan. 9, 2020, 7:01 p.m. UTC | #2
Hi Vineet,

>From: Vineet Gupta <vgupta@synopsys.com>
>On 12/27/19 10:03 AM, Eugeniy Paltsev wrote:
>> To be able to run DSP-enabled userspace applications we need to
>> save and restore following DSP-related registers:
>> At IRQ/exception entry/exit:
>>  * ACC0_GLO, ACC0_GHI, DSP_CTRL
>>  * ACC0_LO, ACC0_HI (we already save them as r58, r59 pair)
>> At context switch:
>>  * DSP_BFLY0, DSP_FFT_CTRL
[snip]
>> +
>> +#ifndef __ASSEMBLY__
>> +
>> +/* some defines to simplify config sanitize in kernel/setup.c */
>> +#if defined(CONFIG_ARC_DSP_KERNEL)    || \
>> +    defined(CONFIG_ARC_DSP_USERSPACE)
>> +#define ARC_DSP_HANDLED                      1
>> +#else
>> +#define ARC_DSP_HANDLED                      0
>> +#endif
>
>This is a really bad idea - u r introducing explicit include dependencies which
>can change even outside of arch changes !
>We've dealt with enough of these problems with current.h, so best to avoid, even
>if there is some code clutter.

Hmm, would it be OK if I add this option as a private kconfig option?
I.E (for ARC_DSP_HANDLED):

---------------->8----------------------
config ARC_DSP_HANDLED
	def_bool n

choice
	prompt "DSP support"
	default ARC_DSP_NONE
	help
	  Depending on the configuration, CPU can contain DSP registers
	  (ACC0_GLO, ACC0_GHI, DSP_BFLY0, DSP_CTRL, DSP_FFT_CTRL).
	  Bellow is options describing how to handle these registers in
	  interrupt entry / exit and in context switch.

config ARC_DSP_NONE
	bool "No DSP extension presence in HW"
	help
	  No DSP extension presence in HW

config ARC_DSP_KERNEL
	bool "DSP extension in HW, no support for userspace"
	select ARC_HAS_ACCL_REGS
	select ARC_DSP_HANDLED
	help
	  DSP extension presence in HW, no support for DSP-enabled userspace
	  applications. We don't save / restore DSP registers and only do
	  some minimal preparations so userspace won't be able to break kernel

config ARC_DSP_USERSPACE
	bool "Support DSP for userspace apps"
	select ARC_HAS_ACCL_REGS
	select ARC_DSP_HANDLED
	help
	  DSP extension presence in HW, support save / restore DSP registers to
	  run DSP-enabled userspace applications
endchoice
---------------->8----------------------

---
 Eugeniy Paltsev
Vineet Gupta Jan. 9, 2020, 10:46 p.m. UTC | #3
On 1/9/20 11:01 AM, Eugeniy Paltsev wrote:
> Hi Vineet,
>
>> From: Vineet Gupta <vgupta@synopsys.com>
>> On 12/27/19 10:03 AM, Eugeniy Paltsev wrote:
>>> To be able to run DSP-enabled userspace applications we need to
>>> save and restore following DSP-related registers:
>>> At IRQ/exception entry/exit:
>>>  * ACC0_GLO, ACC0_GHI, DSP_CTRL
>>>  * ACC0_LO, ACC0_HI (we already save them as r58, r59 pair)
>>> At context switch:
>>>  * DSP_BFLY0, DSP_FFT_CTRL
> [snip]
>>> +
>>> +#ifndef __ASSEMBLY__
>>> +
>>> +/* some defines to simplify config sanitize in kernel/setup.c */
>>> +#if defined(CONFIG_ARC_DSP_KERNEL)    || \
>>> +    defined(CONFIG_ARC_DSP_USERSPACE)
>>> +#define ARC_DSP_HANDLED                      1
>>> +#else
>>> +#define ARC_DSP_HANDLED                      0
>>> +#endif
>> This is a really bad idea - u r introducing explicit include dependencies which
>> can change even outside of arch changes !
>> We've dealt with enough of these problems with current.h, so best to avoid, even
>> if there is some code clutter.
> Hmm, would it be OK if I add this option as a private kconfig option?
> I.E (for ARC_DSP_HANDLED):

Yes that would be good.

-Vineet
Eugeniy Paltsev March 4, 2020, 11:51 a.m. UTC | #4
Hi Vineet,

>From: Vineet Gupta <vgupta@synopsys.com>
>Sent: Tuesday, January 7, 2020 21:25
>To: Eugeniy Paltsev; linux-snps-arc@lists.infradead.org
>Cc: linux-kernel@vger.kernel.org; Alexey Brodkin
>Subject: Re: [PATCH 4/5] ARC: add support for DSP-enabled userspace applications
>> +/*
>> + * As we save new and restore old AUX register value in the same place we
>> + * can optimize a bit and use AEX instruction (swap contents of an auxiliary
>> + * register with a core register) instead of LR + SR pair.
>> + */
>> +#define AUX_SAVE_RESTORE(_saveto, _readfrom, _offt, _aux, _scratch)  \
>> +do {                                                                 \
>> +     __asm__ __volatile__(                                           \
>> +             "ld     %0, [%2, %4]                    \n"             \
>> +             "aex    %0, [%3]                        \n"             \
>> +             "st     %0, [%1, %4]                    \n"             \
>> +             :                                                       \
>> +               "=&r" (_scratch)      /* must be early clobber */     \
>> +             :                                                       \
>> +                "r" (_saveto),                                       \
>> +                "r" (_readfrom),                                     \
>> +                "I" (_aux),                                          \
>> +                "I" (_offt)                                          \
>> +             :                                                       \
>
>AEX with "I" constraint will likely be an 8 byte instructions always. Best to give
>compiler wiggle room with "Ir"

Could you please explain how "Ir" will work in this case?
Does this mean that compiler can pass the value either as constant ('I') or
via register ('r')?

Note that in this case both _aux and _offt are compile-time constants -
_aux comes from define and _offt comes from offsetof().

>> +               "memory"                                              \
>> +     );                                                              \
>> +} while (0)
>> +
Vineet Gupta March 4, 2020, 5:58 p.m. UTC | #5
On 3/4/20 3:51 AM, Eugeniy Paltsev wrote:
>>> +                "r" (_saveto),                                       \
>>> +                "r" (_readfrom),                                     \
>>> +                "I" (_aux),                                          \
>>> +                "I" (_offt)                                          \
>>> +             :                                                       \
>>
>> AEX with "I" constraint will likely be an 8 byte instructions always. Best to give
>> compiler wiggle room with "Ir"
>
> Could you please explain how "Ir" will work in this case?
> Does this mean that compiler can pass the value either as constant ('I') or
> via register ('r')?

Right. If the same compile-time const is used multiple times, say SR of same value
to a reg, if can decide to hoist the value in a register, if you provide it "r" or
"Ir", leading to shorter overall code.

-Vineet
diff mbox series

Patch

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index b9cd7ce3f878..c3210754a3d2 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -432,6 +432,13 @@  config ARC_DSP_KERNEL
 	  DSP extension presence in HW, no support for DSP-enabled userspace
 	  applications. We don't save / restore DSP registers and only do
 	  some minimal preparations so userspace won't be able to break kernel
+
+config ARC_DSP_USERSPACE
+	bool "Support DSP for userspace apps"
+	select ARC_HAS_ACCL_REGS
+	help
+	  DSP extension presence in HW, support save / restore DSP registers to
+	  run DSP-enabled userspace applications
 endchoice
 
 config ARC_IRQ_NO_AUTOSAVE
diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h
index 0004b1e9b325..a713819cab3c 100644
--- a/arch/arc/include/asm/arcregs.h
+++ b/arch/arc/include/asm/arcregs.h
@@ -118,6 +118,8 @@ 
 
 /*
  * DSP-related registers
+ * Registers names must correspond to dsp_callee_regs structure fields names
+ * for automatic offset calculation in DSP_AUX_SAVE_RESTORE macros.
  */
 #define ARC_AUX_DSP_BUILD	0x7A
 #define ARC_AUX_ACC0_LO		0x580
diff --git a/arch/arc/include/asm/dsp-impl.h b/arch/arc/include/asm/dsp-impl.h
index 788093cbe689..7b640a680dfc 100644
--- a/arch/arc/include/asm/dsp-impl.h
+++ b/arch/arc/include/asm/dsp-impl.h
@@ -7,6 +7,8 @@ 
 #ifndef __ASM_ARC_DSP_IMPL_H
 #define __ASM_ARC_DSP_IMPL_H
 
+#include <asm/dsp.h>
+
 #define DSP_CTRL_DISABLED_ALL		0
 
 #ifdef __ASSEMBLY__
@@ -28,11 +30,82 @@ 
 	 * able to break kernel */
 	mov	r58, DSP_CTRL_DISABLED_ALL
 	sr	r58, [ARC_AUX_DSP_CTRL]
-#endif /* ARC_DSP_KERNEL */
+
+#elif defined(ARC_DSP_SAVE_RESTORE_REGS)
+	lr	r58, [ARC_AUX_ACC0_GLO]
+	lr	r59, [ARC_AUX_ACC0_GHI]
+	ST2	r58, r59, PT_ACC0_GLO
+
+	lr	r58, [ARC_AUX_DSP_CTRL]
+	st	r58, [sp, PT_DSP_CTRL]
+
+#endif
+.endm
+
+/* clobbers r58, r59 registers pair */
+.macro DSP_RESTORE_REGFILE_IRQ
+#if defined(ARC_DSP_SAVE_RESTORE_REGS)
+	LD2	r58, r59, PT_ACC0_GLO
+	sr	r58, [ARC_AUX_ACC0_GLO]
+	sr	r59, [ARC_AUX_ACC0_GHI]
+
+	ld	r58, [sp, PT_DSP_CTRL]
+	sr	r58, [ARC_AUX_DSP_CTRL]
+
+#endif
 .endm
 
 #else /* __ASEMBLY__ */
 
+#include <linux/sched.h>
+#include <asm/switch_to.h>
+
+#ifdef ARC_DSP_SAVE_RESTORE_REGS
+
+/*
+ * As we save new and restore old AUX register value in the same place we
+ * can optimize a bit and use AEX instruction (swap contents of an auxiliary
+ * register with a core register) instead of LR + SR pair.
+ */
+#define AUX_SAVE_RESTORE(_saveto, _readfrom, _offt, _aux, _scratch)	\
+do {									\
+	__asm__ __volatile__(						\
+		"ld	%0, [%2, %4]			\n"		\
+		"aex	%0, [%3]			\n"		\
+		"st	%0, [%1, %4]			\n"		\
+		:							\
+		  "=&r" (_scratch)	/* must be early clobber */	\
+		:							\
+		   "r" (_saveto),					\
+		   "r" (_readfrom),					\
+		   "I" (_aux),						\
+		   "I" (_offt)						\
+		:							\
+		  "memory"						\
+	);								\
+} while (0)
+
+#define DSP_AUX_SAVE_RESTORE(_saveto, _readfrom, _aux, _scratch)	\
+	AUX_SAVE_RESTORE(_saveto, _readfrom,				\
+		offsetof(struct dsp_callee_regs, _aux),			\
+		ARC_AUX_##_aux, _scratch)
+
+static inline void arc_dsp_save_restore(struct task_struct *prev,
+					struct task_struct *next)
+{
+	long unsigned int *saveto = &prev->thread.dsp.DSP_BFLY0;
+	long unsigned int *readfrom = &next->thread.dsp.DSP_BFLY0;
+
+	long unsigned int zero = 0;
+
+	DSP_AUX_SAVE_RESTORE(saveto, readfrom, DSP_BFLY0, zero);
+	DSP_AUX_SAVE_RESTORE(saveto, readfrom, DSP_FFT_CTRL, zero);
+}
+
+#else /* !ARC_DSP_SAVE_RESTORE_REGS */
+#define arc_dsp_save_restore(p, n)
+#endif /* ARC_DSP_SAVE_RESTORE_REGS */
+
 static inline bool dsp_exist(void)
 {
 	struct bcr_generic bcr;
diff --git a/arch/arc/include/asm/dsp.h b/arch/arc/include/asm/dsp.h
new file mode 100644
index 000000000000..68507f04dea4
--- /dev/null
+++ b/arch/arc/include/asm/dsp.h
@@ -0,0 +1,42 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 Synopsys, Inc. (www.synopsys.com)
+ *
+ * Author: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
+ */
+#ifndef __ASM_ARC_DSP_H
+#define __ASM_ARC_DSP_H
+
+#if defined(CONFIG_ARC_DSP_USERSPACE)
+#define ARC_DSP_SAVE_RESTORE_REGS	1
+#endif
+
+#ifndef __ASSEMBLY__
+
+/* some defines to simplify config sanitize in kernel/setup.c */
+#if defined(CONFIG_ARC_DSP_KERNEL) 	|| \
+    defined(CONFIG_ARC_DSP_USERSPACE)
+#define ARC_DSP_HANDLED			1
+#else
+#define ARC_DSP_HANDLED			0
+#endif
+
+#if defined(CONFIG_ARC_DSP_USERSPACE)
+#define ARC_DSP_OPT_NAME		"CONFIG_ARC_DSP_USERSPACE"
+#else
+#define ARC_DSP_OPT_NAME		"CONFIG_ARC_DSP_KERNEL"
+#endif
+
+/*
+ * DSP-related saved registers - need to be saved only when you are
+ * scheduled out.
+ * structure fields name must correspond to aux register defenitions for
+ * automatic offset calculation in DSP_AUX_SAVE_RESTORE macros
+ */
+struct dsp_callee_regs {
+	unsigned long DSP_BFLY0, DSP_FFT_CTRL;
+};
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_ARC_DSP_H */
diff --git a/arch/arc/include/asm/entry-arcv2.h b/arch/arc/include/asm/entry-arcv2.h
index e3f8bd3e2eba..5d079f0b6243 100644
--- a/arch/arc/include/asm/entry-arcv2.h
+++ b/arch/arc/include/asm/entry-arcv2.h
@@ -192,6 +192,9 @@ 
 	ld	r25, [sp, PT_user_r25]
 #endif
 
+	/* clobbers r58, r59 registers pair, so must be before r58, r59 restore */
+	DSP_RESTORE_REGFILE_IRQ
+
 #ifdef CONFIG_ARC_HAS_ACCL_REGS
 	LD2	r58, r59, PT_r58
 #endif
diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index 706edeaa5583..1716df0f4472 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -14,6 +14,7 @@ 
 #ifndef __ASSEMBLY__
 
 #include <asm/ptrace.h>
+#include <asm/dsp.h>
 
 #ifdef CONFIG_ARC_FPU_SAVE_RESTORE
 /* These DPFP regs need to be saved/restored across ctx-sw */
@@ -39,6 +40,9 @@  struct thread_struct {
 	unsigned long ksp;	/* kernel mode stack pointer */
 	unsigned long callee_reg;	/* pointer to callee regs */
 	unsigned long fault_address;	/* dbls as brkpt holder as well */
+#ifdef ARC_DSP_SAVE_RESTORE_REGS
+	struct dsp_callee_regs dsp;
+#endif
 #ifdef CONFIG_ARC_FPU_SAVE_RESTORE
 	struct arc_fpu fpu;
 #endif
diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h
index ba9854ef39e8..a5851ee141de 100644
--- a/arch/arc/include/asm/ptrace.h
+++ b/arch/arc/include/asm/ptrace.h
@@ -8,6 +8,7 @@ 
 #define __ASM_ARC_PTRACE_H
 
 #include <uapi/asm/ptrace.h>
+#include <asm/dsp.h>
 
 #ifndef __ASSEMBLY__
 
@@ -91,6 +92,9 @@  struct pt_regs {
 #ifdef CONFIG_ARC_HAS_ACCL_REGS
 	unsigned long r58, r59;	/* ACCL/ACCH used by FPU / DSP MPY */
 #endif
+#ifdef ARC_DSP_SAVE_RESTORE_REGS
+	unsigned long ACC0_GLO, ACC0_GHI, DSP_CTRL;
+#endif
 
 	/*------- Below list auto saved by h/w -----------*/
 	unsigned long r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11;
diff --git a/arch/arc/include/asm/switch_to.h b/arch/arc/include/asm/switch_to.h
index 77f123385e96..b49f025136af 100644
--- a/arch/arc/include/asm/switch_to.h
+++ b/arch/arc/include/asm/switch_to.h
@@ -9,6 +9,7 @@ 
 #ifndef __ASSEMBLY__
 
 #include <linux/sched.h>
+#include <asm/dsp-impl.h>
 
 #ifdef CONFIG_ARC_FPU_SAVE_RESTORE
 
@@ -37,6 +38,7 @@  struct task_struct *__switch_to(struct task_struct *p, struct task_struct *n);
 do {					\
 	ARC_EZNPS_DP_PREV(prev, next);	\
 	ARC_FPU_PREV(prev, next);	\
+	arc_dsp_save_restore(prev, next);\
 	last = __switch_to(prev, next);\
 	ARC_FPU_NEXT(next);		\
 	mb();				\
diff --git a/arch/arc/kernel/asm-offsets.c b/arch/arc/kernel/asm-offsets.c
index c783bcd35eb8..f74426a71ba8 100644
--- a/arch/arc/kernel/asm-offsets.c
+++ b/arch/arc/kernel/asm-offsets.c
@@ -11,6 +11,8 @@ 
 #include <linux/ptrace.h>
 #include <asm/hardirq.h>
 #include <asm/page.h>
+#include <asm/dsp.h>
+
 
 int main(void)
 {
@@ -75,6 +77,11 @@  int main(void)
 	OFFSET(PT_r58, pt_regs, r58);
 	OFFSET(PT_r59, pt_regs, r59);
 #endif
+#ifdef ARC_DSP_SAVE_RESTORE_REGS
+	OFFSET(PT_ACC0_GLO, pt_regs, ACC0_GLO);
+	OFFSET(PT_ACC0_GHI, pt_regs, ACC0_GHI);
+	OFFSET(PT_DSP_CTRL, pt_regs, DSP_CTRL);
+#endif
 
 	return 0;
 }
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index b3995dd673d9..30d31579c51d 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -447,7 +447,7 @@  static void arc_chk_core_config(void)
 		CHK_OPT_STRICT(CONFIG_ARC_HAS_ACCL_REGS, present);
 
 		present = dsp_exist();
-		CHK_OPT_STRICT(CONFIG_ARC_DSP_KERNEL, present);
+		chk_opt_strict(ARC_DSP_OPT_NAME, present, ARC_DSP_HANDLED);
 	}
 }