Patchwork powerpc: Add Initiate Coprocessor Store Word (icswx) support

login
register
mail settings
Submitter Anton Blanchard
Date May 3, 2011, 6:43 a.m.
Message ID <20110503164304.7690a1b7@kryten>
Download mbox | patch
Permalink /patch/93743/
State Accepted
Commit 851d2e2fe8dbcbe3afcad6fc4569c881d8ad4ce9
Delegated to: Benjamin Herrenschmidt
Headers show

Comments

Anton Blanchard - May 3, 2011, 6:43 a.m.
From: Tseng-Hui (Frank) Lin <thlin@linux.vnet.ibm.com>

Icswx is a PowerPC instruction to send data to a co-processor. On Book-S
processors the LPAR_ID and process ID (PID) of the owning process are
registered in the window context of the co-processor at initialization
time. When the icswx instruction is executed the L2 generates a cop-reg
transaction on PowerBus. The transaction has no address and the
processor does not perform an MMU access to authenticate the transaction.
The co-processor compares the LPAR_ID and the PID included in the
transaction and the LPAR_ID and PID held in the window context to
determine if the process is authorized to generate the transaction.

The OS needs to assign a 16-bit PID for the process. This cop-PID needs
to be updated during context switch. The cop-PID needs to be destroyed
when the context is destroyed.

Signed-off-by: Sonny Rao <sonnyrao@linux.vnet.ibm.com>
Signed-off-by: Tseng-Hui (Frank) Lin <thlin@linux.vnet.ibm.com>
Signed-off-by: Anton Blanchard <anton@samba.org>
---

Changes:

- Go back to dynamically allocating the spinlock to avoid the mmu_context.h
  and spinlock_types.h include mess.
Benjamin Herrenschmidt - May 3, 2011, 6:47 a.m.
On Tue, 2011-05-03 at 16:43 +1000, Anton Blanchard wrote:
> From: Tseng-Hui (Frank) Lin <thlin@linux.vnet.ibm.com>
> 
> Icswx is a PowerPC instruction to send data to a co-processor. On Book-S
> processors the LPAR_ID and process ID (PID) of the owning process are
> registered in the window context of the co-processor at initialization
> time. When the icswx instruction is executed the L2 generates a cop-reg
> transaction on PowerBus. The transaction has no address and the
> processor does not perform an MMU access to authenticate the transaction.
> The co-processor compares the LPAR_ID and the PID included in the
> transaction and the LPAR_ID and PID held in the window context to
> determine if the process is authorized to generate the transaction.
> 
> The OS needs to assign a 16-bit PID for the process. This cop-PID needs
> to be updated during context switch. The cop-PID needs to be destroyed
> when the context is destroyed.

> Signed-off-by: Sonny Rao <sonnyrao@linux.vnet.ibm.com>
> Signed-off-by: Tseng-Hui (Frank) Lin <thlin@linux.vnet.ibm.com>
> Signed-off-by: Anton Blanchard <anton@samba.org>
> ---
> 
> Changes:
> 
> - Go back to dynamically allocating the spinlock to avoid the mmu_context.h
>   and spinlock_types.h include mess.

My only comment (sorry Anton :-) would have been that we could lazily
allocate the spinlock on the first use_cop() ... or do we have that
potentially called in the wrong context ?

Cheers,
Ben.

> Index: linux-powerpc/arch/powerpc/include/asm/cputable.h
> ===================================================================
> --- linux-powerpc.orig/arch/powerpc/include/asm/cputable.h	2011-05-03 16:39:21.779140935 +1000
> +++ linux-powerpc/arch/powerpc/include/asm/cputable.h	2011-05-03 16:39:26.059220272 +1000
> @@ -197,6 +197,7 @@ extern const char *powerpc_base_platform
>  #define CPU_FTR_STCX_CHECKS_ADDRESS	LONG_ASM_CONST(0x0200000000000000)
>  #define CPU_FTR_POPCNTB			LONG_ASM_CONST(0x0400000000000000)
>  #define CPU_FTR_POPCNTD			LONG_ASM_CONST(0x0800000000000000)
> +#define CPU_FTR_ICSWX			LONG_ASM_CONST(0x1000000000000000)
>  
>  #ifndef __ASSEMBLY__
>  
> @@ -418,7 +419,8 @@ extern const char *powerpc_base_platform
>  	    CPU_FTR_COHERENT_ICACHE | \
>  	    CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
>  	    CPU_FTR_DSCR | CPU_FTR_SAO  | CPU_FTR_ASYM_SMT | \
> -	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD)
> +	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
> +	    CPU_FTR_ICSWX)
>  #define CPU_FTRS_CELL	(CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
>  	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
>  	    CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
> Index: linux-powerpc/arch/powerpc/include/asm/mmu-hash64.h
> ===================================================================
> --- linux-powerpc.orig/arch/powerpc/include/asm/mmu-hash64.h	2011-05-03 16:39:21.789141120 +1000
> +++ linux-powerpc/arch/powerpc/include/asm/mmu-hash64.h	2011-05-03 16:39:26.059220272 +1000
> @@ -408,6 +408,7 @@ static inline void subpage_prot_init_new
>  #endif /* CONFIG_PPC_SUBPAGE_PROT */
>  
>  typedef unsigned long mm_context_id_t;
> +struct spinlock;
>  
>  typedef struct {
>  	mm_context_id_t id;
> @@ -423,6 +424,11 @@ typedef struct {
>  #ifdef CONFIG_PPC_SUBPAGE_PROT
>  	struct subpage_prot_table spt;
>  #endif /* CONFIG_PPC_SUBPAGE_PROT */
> +#ifdef CONFIG_PPC_ICSWX
> +	struct spinlock *cop_lockp; /* guard acop and cop_pid */
> +	unsigned long acop;	/* mask of enabled coprocessor types */
> +	unsigned int cop_pid;	/* pid value used with coprocessors */
> +#endif /* CONFIG_PPC_ICSWX */
>  } mm_context_t;
>  
> 
> Index: linux-powerpc/arch/powerpc/include/asm/mmu_context.h
> ===================================================================
> --- linux-powerpc.orig/arch/powerpc/include/asm/mmu_context.h	2011-05-03 16:39:21.779140935 +1000
> +++ linux-powerpc/arch/powerpc/include/asm/mmu_context.h	2011-05-03 16:39:26.059220272 +1000
> @@ -32,6 +32,10 @@ extern void __destroy_context(unsigned l
>  extern void mmu_context_init(void);
>  #endif
>  
> +extern void switch_cop(struct mm_struct *next);
> +extern int use_cop(unsigned long acop, struct mm_struct *mm);
> +extern void drop_cop(unsigned long acop, struct mm_struct *mm);
> +
>  /*
>   * switch_mm is the entry point called from the architecture independent
>   * code in kernel/sched.c
> @@ -55,6 +59,12 @@ static inline void switch_mm(struct mm_s
>  	if (prev == next)
>  		return;
>  
> +#ifdef CONFIG_PPC_ICSWX
> +	/* Switch coprocessor context only if prev or next uses a coprocessor */
> +	if (prev->context.acop || next->context.acop)
> +		switch_cop(next);
> +#endif /* CONFIG_PPC_ICSWX */
> +
>  	/* We must stop all altivec streams before changing the HW
>  	 * context
>  	 */
> Index: linux-powerpc/arch/powerpc/include/asm/reg.h
> ===================================================================
> --- linux-powerpc.orig/arch/powerpc/include/asm/reg.h	2011-05-03 16:39:21.799141306 +1000
> +++ linux-powerpc/arch/powerpc/include/asm/reg.h	2011-05-03 16:39:26.059220272 +1000
> @@ -188,6 +188,7 @@
>  
>  #define SPRN_CTR	0x009	/* Count Register */
>  #define SPRN_DSCR	0x11
> +#define SPRN_ACOP	0x1F	/* Available Coprocessor Register */
>  #define SPRN_CTRLF	0x088
>  #define SPRN_CTRLT	0x098
>  #define   CTRL_CT	0xc0000000	/* current thread */
> Index: linux-powerpc/arch/powerpc/mm/mmu_context_hash64.c
> ===================================================================
> --- linux-powerpc.orig/arch/powerpc/mm/mmu_context_hash64.c	2011-05-03 16:39:21.759140565 +1000
> +++ linux-powerpc/arch/powerpc/mm/mmu_context_hash64.c	2011-05-03 16:39:26.059220272 +1000
> @@ -20,9 +20,205 @@
>  #include <linux/idr.h>
>  #include <linux/module.h>
>  #include <linux/gfp.h>
> +#include <linux/slab.h>
>  
>  #include <asm/mmu_context.h>
>  
> +#ifdef CONFIG_PPC_ICSWX
> +/*
> + * The processor and its L2 cache cause the icswx instruction to
> + * generate a COP_REQ transaction on PowerBus. The transaction has
> + * no address, and the processor does not perform an MMU access
> + * to authenticate the transaction. The command portion of the
> + * PowerBus COP_REQ transaction includes the LPAR_ID (LPID) and
> + * the coprocessor Process ID (PID), which the coprocessor compares
> + * to the authorized LPID and PID held in the coprocessor, to determine
> + * if the process is authorized to generate the transaction.
> + * The data of the COP_REQ transaction is 128-byte or less and is
> + * placed in cacheable memory on a 128-byte cache line boundary.
> + *
> + * The task to use a coprocessor should use use_cop() to allocate
> + * a coprocessor PID before executing icswx instruction. use_cop()
> + * also enables the coprocessor context switching. Drop_cop() is
> + * used to free the coprocessor PID.
> + *
> + * Example:
> + * Host Fabric Interface (HFI) is a PowerPC network coprocessor.
> + * Each HFI have multiple windows. Each HFI window serves as a
> + * network device sending to and receiving from HFI network.
> + * HFI immediate send function uses icswx instruction. The immediate
> + * send function allows small (single cache-line) packets be sent
> + * without using the regular HFI send FIFO and doorbell, which are
> + * much slower than immediate send.
> + *
> + * For each task intending to use HFI immediate send, the HFI driver
> + * calls use_cop() to obtain a coprocessor PID for the task.
> + * The HFI driver then allocate a free HFI window and save the
> + * coprocessor PID to the HFI window to allow the task to use the
> + * HFI window.
> + *
> + * The HFI driver repeatedly creates immediate send packets and
> + * issues icswx instruction to send data through the HFI window.
> + * The HFI compares the coprocessor PID in the CPU PID register
> + * to the PID held in the HFI window to determine if the transaction
> + * is allowed.
> + *
> + * When the task to release the HFI window, the HFI driver calls
> + * drop_cop() to release the coprocessor PID.
> + */
> +
> +#define COP_PID_NONE 0
> +#define COP_PID_MIN (COP_PID_NONE + 1)
> +#define COP_PID_MAX (0xFFFF)
> +
> +static DEFINE_SPINLOCK(mmu_context_acop_lock);
> +static DEFINE_IDA(cop_ida);
> +
> +void switch_cop(struct mm_struct *next)
> +{
> +	mtspr(SPRN_PID, next->context.cop_pid);
> +	mtspr(SPRN_ACOP, next->context.acop);
> +}
> +
> +static int new_cop_pid(struct ida *ida, int min_id, int max_id,
> +		       spinlock_t *lock)
> +{
> +	int index;
> +	int err;
> +
> +again:
> +	if (!ida_pre_get(ida, GFP_KERNEL))
> +		return -ENOMEM;
> +
> +	spin_lock(lock);
> +	err = ida_get_new_above(ida, min_id, &index);
> +	spin_unlock(lock);
> +
> +	if (err == -EAGAIN)
> +		goto again;
> +	else if (err)
> +		return err;
> +
> +	if (index > max_id) {
> +		spin_lock(lock);
> +		ida_remove(ida, index);
> +		spin_unlock(lock);
> +		return -ENOMEM;
> +	}
> +
> +	return index;
> +}
> +
> +static void sync_cop(void *arg)
> +{
> +	struct mm_struct *mm = arg;
> +
> +	if (mm == current->active_mm)
> +		switch_cop(current->active_mm);
> +}
> +
> +/**
> + * Start using a coprocessor.
> + * @acop: mask of coprocessor to be used.
> + * @mm: The mm the coprocessor to associate with. Most likely current mm.
> + *
> + * Return a positive PID if successful. Negative errno otherwise.
> + * The returned PID will be fed to the coprocessor to determine if an
> + * icswx transaction is authenticated.
> + */
> +int use_cop(unsigned long acop, struct mm_struct *mm)
> +{
> +	int ret;
> +
> +	if (!cpu_has_feature(CPU_FTR_ICSWX))
> +		return -ENODEV;
> +
> +	if (!mm || !acop)
> +		return -EINVAL;
> +
> +	/* We need to make sure mm_users doesn't change */
> +	down_read(&mm->mmap_sem);
> +	spin_lock(mm->context.cop_lockp);
> +
> +	if (mm->context.cop_pid == COP_PID_NONE) {
> +		ret = new_cop_pid(&cop_ida, COP_PID_MIN, COP_PID_MAX,
> +				  &mmu_context_acop_lock);
> +		if (ret < 0)
> +			goto out;
> +
> +		mm->context.cop_pid = ret;
> +	}
> +	mm->context.acop |= acop;
> +
> +	sync_cop(mm);
> +
> +	/*
> +	 * If this is a threaded process then there might be other threads
> +	 * running. We need to send an IPI to force them to pick up any
> +	 * change in PID and ACOP.
> +	 */
> +	if (atomic_read(&mm->mm_users) > 1)
> +		smp_call_function(sync_cop, mm, 1);
> +
> +	ret = mm->context.cop_pid;
> +
> +out:
> +	spin_unlock(mm->context.cop_lockp);
> +	up_read(&mm->mmap_sem);
> +
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(use_cop);
> +
> +/**
> + * Stop using a coprocessor.
> + * @acop: mask of coprocessor to be stopped.
> + * @mm: The mm the coprocessor associated with.
> + */
> +void drop_cop(unsigned long acop, struct mm_struct *mm)
> +{
> +	int free_pid = COP_PID_NONE;
> +
> +	if (!cpu_has_feature(CPU_FTR_ICSWX))
> +		return;
> +
> +	if (WARN_ON_ONCE(!mm))
> +		return;
> +
> +	/* We need to make sure mm_users doesn't change */
> +	down_read(&mm->mmap_sem);
> +	spin_lock(mm->context.cop_lockp);
> +
> +	mm->context.acop &= ~acop;
> +
> +	if ((!mm->context.acop) && (mm->context.cop_pid != COP_PID_NONE)) {
> +		free_pid = mm->context.cop_pid;
> +		mm->context.cop_pid = COP_PID_NONE;
> +	}
> +
> +	sync_cop(mm);
> +
> +	/*
> +	 * If this is a threaded process then there might be other threads
> +	 * running. We need to send an IPI to force them to pick up any
> +	 * change in PID and ACOP.
> +	 */
> +	if (atomic_read(&mm->mm_users) > 1)
> +		smp_call_function(sync_cop, mm, 1);
> +
> +	if (free_pid != COP_PID_NONE) {
> +		spin_lock(&mmu_context_acop_lock);
> +		ida_remove(&cop_ida, free_pid);
> +		spin_unlock(&mmu_context_acop_lock);
> +	}
> +
> +	spin_unlock(mm->context.cop_lockp);
> +	up_read(&mm->mmap_sem);
> +}
> +EXPORT_SYMBOL_GPL(drop_cop);
> +
> +#endif /* CONFIG_PPC_ICSWX */
> +
>  static DEFINE_SPINLOCK(mmu_context_lock);
>  static DEFINE_IDA(mmu_context_ida);
>  
> @@ -78,6 +274,16 @@ int init_new_context(struct task_struct
>  		slice_set_user_psize(mm, mmu_virtual_psize);
>  	subpage_prot_init_new_context(mm);
>  	mm->context.id = index;
> +#ifdef CONFIG_PPC_ICSWX
> +	mm->context.cop_lockp = kmalloc(sizeof(spinlock_t), GFP_KERNEL);
> +	if (!mm->context.cop_lockp) {
> +		__destroy_context(index);
> +		subpage_prot_free(mm);
> +		mm->context.id = NO_CONTEXT;
> +		return -ENOMEM;
> +	}
> +	spin_lock_init(mm->context.cop_lockp);
> +#endif /* CONFIG_PPC_ICSWX */
>  
>  	return 0;
>  }
> @@ -92,6 +298,11 @@ EXPORT_SYMBOL_GPL(__destroy_context);
>  
>  void destroy_context(struct mm_struct *mm)
>  {
> +#ifdef CONFIG_PPC_ICSWX
> +	drop_cop(mm->context.acop, mm);
> +	kfree(mm->context.cop_lockp);
> +	mm->context.cop_lockp = NULL;
> +#endif /* CONFIG_PPC_ICSWX */
>  	__destroy_context(mm->context.id);
>  	subpage_prot_free(mm);
>  	mm->context.id = MMU_NO_CONTEXT;
> Index: linux-powerpc/arch/powerpc/platforms/Kconfig.cputype
> ===================================================================
> --- linux-powerpc.orig/arch/powerpc/platforms/Kconfig.cputype	2011-05-03 16:39:21.809141492 +1000
> +++ linux-powerpc/arch/powerpc/platforms/Kconfig.cputype	2011-05-03 16:39:26.059220272 +1000
> @@ -230,6 +230,24 @@ config VSX
>  
>  	  If in doubt, say Y here.
>  
> +config PPC_ICSWX
> +	bool "Support for PowerPC icswx coprocessor instruction"
> +	depends on POWER4
> +	default n
> +	---help---
> +
> +	  This option enables kernel support for the PowerPC Initiate
> +	  Coprocessor Store Word (icswx) coprocessor instruction on POWER7
> +	  or newer processors.
> +
> +	  This option is only useful if you have a processor that supports
> +	  the icswx coprocessor instruction. It does not have any effect
> +	  on processors without the icswx coprocessor instruction.
> +
> +	  This option slightly increases kernel memory usage.
> +
> +	  If in doubt, say N here.
> +
>  config SPE
>  	bool "SPE Support"
>  	depends on E200 || (E500 && !PPC_E500MC)
Anton Blanchard - May 3, 2011, 6:55 a.m.
Hi Ben,

> My only comment (sorry Anton :-) would have been that we could lazily
> allocate the spinlock on the first use_cop() ... or do we have that
> potentially called in the wrong context ?

I worry what might happen in a threaded app. It would be a strange
thing to do, but the program may call use_cop from two threads at the
same time. In that case you could corrupt the PID/ACOP values
and leak a spinlock of memory I think.

Anton
Benjamin Herrenschmidt - May 3, 2011, 8:57 a.m.
On Tue, 2011-05-03 at 16:55 +1000, Anton Blanchard wrote:
> Hi Ben,
> 
> > My only comment (sorry Anton :-) would have been that we could lazily
> > allocate the spinlock on the first use_cop() ... or do we have that
> > potentially called in the wrong context ?
> 
> I worry what might happen in a threaded app. It would be a strange
> thing to do, but the program may call use_cop from two threads at the
> same time. In that case you could corrupt the PID/ACOP values
> and leak a spinlock of memory I think.

OK, it's a bit nasty to solve. I'll put the latest patch in my queue and
if all goes well, it should hit next by the end of the week.

Cheers,
Ben.

Patch

Index: linux-powerpc/arch/powerpc/include/asm/cputable.h
===================================================================
--- linux-powerpc.orig/arch/powerpc/include/asm/cputable.h	2011-05-03 16:39:21.779140935 +1000
+++ linux-powerpc/arch/powerpc/include/asm/cputable.h	2011-05-03 16:39:26.059220272 +1000
@@ -197,6 +197,7 @@  extern const char *powerpc_base_platform
 #define CPU_FTR_STCX_CHECKS_ADDRESS	LONG_ASM_CONST(0x0200000000000000)
 #define CPU_FTR_POPCNTB			LONG_ASM_CONST(0x0400000000000000)
 #define CPU_FTR_POPCNTD			LONG_ASM_CONST(0x0800000000000000)
+#define CPU_FTR_ICSWX			LONG_ASM_CONST(0x1000000000000000)
 
 #ifndef __ASSEMBLY__
 
@@ -418,7 +419,8 @@  extern const char *powerpc_base_platform
 	    CPU_FTR_COHERENT_ICACHE | \
 	    CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
 	    CPU_FTR_DSCR | CPU_FTR_SAO  | CPU_FTR_ASYM_SMT | \
-	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD)
+	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
+	    CPU_FTR_ICSWX)
 #define CPU_FTRS_CELL	(CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
 	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
 	    CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
Index: linux-powerpc/arch/powerpc/include/asm/mmu-hash64.h
===================================================================
--- linux-powerpc.orig/arch/powerpc/include/asm/mmu-hash64.h	2011-05-03 16:39:21.789141120 +1000
+++ linux-powerpc/arch/powerpc/include/asm/mmu-hash64.h	2011-05-03 16:39:26.059220272 +1000
@@ -408,6 +408,7 @@  static inline void subpage_prot_init_new
 #endif /* CONFIG_PPC_SUBPAGE_PROT */
 
 typedef unsigned long mm_context_id_t;
+struct spinlock;
 
 typedef struct {
 	mm_context_id_t id;
@@ -423,6 +424,11 @@  typedef struct {
 #ifdef CONFIG_PPC_SUBPAGE_PROT
 	struct subpage_prot_table spt;
 #endif /* CONFIG_PPC_SUBPAGE_PROT */
+#ifdef CONFIG_PPC_ICSWX
+	struct spinlock *cop_lockp; /* guard acop and cop_pid */
+	unsigned long acop;	/* mask of enabled coprocessor types */
+	unsigned int cop_pid;	/* pid value used with coprocessors */
+#endif /* CONFIG_PPC_ICSWX */
 } mm_context_t;
 
 
Index: linux-powerpc/arch/powerpc/include/asm/mmu_context.h
===================================================================
--- linux-powerpc.orig/arch/powerpc/include/asm/mmu_context.h	2011-05-03 16:39:21.779140935 +1000
+++ linux-powerpc/arch/powerpc/include/asm/mmu_context.h	2011-05-03 16:39:26.059220272 +1000
@@ -32,6 +32,10 @@  extern void __destroy_context(unsigned l
 extern void mmu_context_init(void);
 #endif
 
+extern void switch_cop(struct mm_struct *next);
+extern int use_cop(unsigned long acop, struct mm_struct *mm);
+extern void drop_cop(unsigned long acop, struct mm_struct *mm);
+
 /*
  * switch_mm is the entry point called from the architecture independent
  * code in kernel/sched.c
@@ -55,6 +59,12 @@  static inline void switch_mm(struct mm_s
 	if (prev == next)
 		return;
 
+#ifdef CONFIG_PPC_ICSWX
+	/* Switch coprocessor context only if prev or next uses a coprocessor */
+	if (prev->context.acop || next->context.acop)
+		switch_cop(next);
+#endif /* CONFIG_PPC_ICSWX */
+
 	/* We must stop all altivec streams before changing the HW
 	 * context
 	 */
Index: linux-powerpc/arch/powerpc/include/asm/reg.h
===================================================================
--- linux-powerpc.orig/arch/powerpc/include/asm/reg.h	2011-05-03 16:39:21.799141306 +1000
+++ linux-powerpc/arch/powerpc/include/asm/reg.h	2011-05-03 16:39:26.059220272 +1000
@@ -188,6 +188,7 @@ 
 
 #define SPRN_CTR	0x009	/* Count Register */
 #define SPRN_DSCR	0x11
+#define SPRN_ACOP	0x1F	/* Available Coprocessor Register */
 #define SPRN_CTRLF	0x088
 #define SPRN_CTRLT	0x098
 #define   CTRL_CT	0xc0000000	/* current thread */
Index: linux-powerpc/arch/powerpc/mm/mmu_context_hash64.c
===================================================================
--- linux-powerpc.orig/arch/powerpc/mm/mmu_context_hash64.c	2011-05-03 16:39:21.759140565 +1000
+++ linux-powerpc/arch/powerpc/mm/mmu_context_hash64.c	2011-05-03 16:39:26.059220272 +1000
@@ -20,9 +20,205 @@ 
 #include <linux/idr.h>
 #include <linux/module.h>
 #include <linux/gfp.h>
+#include <linux/slab.h>
 
 #include <asm/mmu_context.h>
 
+#ifdef CONFIG_PPC_ICSWX
+/*
+ * The processor and its L2 cache cause the icswx instruction to
+ * generate a COP_REQ transaction on PowerBus. The transaction has
+ * no address, and the processor does not perform an MMU access
+ * to authenticate the transaction. The command portion of the
+ * PowerBus COP_REQ transaction includes the LPAR_ID (LPID) and
+ * the coprocessor Process ID (PID), which the coprocessor compares
+ * to the authorized LPID and PID held in the coprocessor, to determine
+ * if the process is authorized to generate the transaction.
+ * The data of the COP_REQ transaction is 128-byte or less and is
+ * placed in cacheable memory on a 128-byte cache line boundary.
+ *
+ * The task to use a coprocessor should use use_cop() to allocate
+ * a coprocessor PID before executing icswx instruction. use_cop()
+ * also enables the coprocessor context switching. Drop_cop() is
+ * used to free the coprocessor PID.
+ *
+ * Example:
+ * Host Fabric Interface (HFI) is a PowerPC network coprocessor.
+ * Each HFI have multiple windows. Each HFI window serves as a
+ * network device sending to and receiving from HFI network.
+ * HFI immediate send function uses icswx instruction. The immediate
+ * send function allows small (single cache-line) packets be sent
+ * without using the regular HFI send FIFO and doorbell, which are
+ * much slower than immediate send.
+ *
+ * For each task intending to use HFI immediate send, the HFI driver
+ * calls use_cop() to obtain a coprocessor PID for the task.
+ * The HFI driver then allocate a free HFI window and save the
+ * coprocessor PID to the HFI window to allow the task to use the
+ * HFI window.
+ *
+ * The HFI driver repeatedly creates immediate send packets and
+ * issues icswx instruction to send data through the HFI window.
+ * The HFI compares the coprocessor PID in the CPU PID register
+ * to the PID held in the HFI window to determine if the transaction
+ * is allowed.
+ *
+ * When the task to release the HFI window, the HFI driver calls
+ * drop_cop() to release the coprocessor PID.
+ */
+
+#define COP_PID_NONE 0
+#define COP_PID_MIN (COP_PID_NONE + 1)
+#define COP_PID_MAX (0xFFFF)
+
+static DEFINE_SPINLOCK(mmu_context_acop_lock);
+static DEFINE_IDA(cop_ida);
+
+void switch_cop(struct mm_struct *next)
+{
+	mtspr(SPRN_PID, next->context.cop_pid);
+	mtspr(SPRN_ACOP, next->context.acop);
+}
+
+static int new_cop_pid(struct ida *ida, int min_id, int max_id,
+		       spinlock_t *lock)
+{
+	int index;
+	int err;
+
+again:
+	if (!ida_pre_get(ida, GFP_KERNEL))
+		return -ENOMEM;
+
+	spin_lock(lock);
+	err = ida_get_new_above(ida, min_id, &index);
+	spin_unlock(lock);
+
+	if (err == -EAGAIN)
+		goto again;
+	else if (err)
+		return err;
+
+	if (index > max_id) {
+		spin_lock(lock);
+		ida_remove(ida, index);
+		spin_unlock(lock);
+		return -ENOMEM;
+	}
+
+	return index;
+}
+
+static void sync_cop(void *arg)
+{
+	struct mm_struct *mm = arg;
+
+	if (mm == current->active_mm)
+		switch_cop(current->active_mm);
+}
+
+/**
+ * Start using a coprocessor.
+ * @acop: mask of coprocessor to be used.
+ * @mm: The mm the coprocessor to associate with. Most likely current mm.
+ *
+ * Return a positive PID if successful. Negative errno otherwise.
+ * The returned PID will be fed to the coprocessor to determine if an
+ * icswx transaction is authenticated.
+ */
+int use_cop(unsigned long acop, struct mm_struct *mm)
+{
+	int ret;
+
+	if (!cpu_has_feature(CPU_FTR_ICSWX))
+		return -ENODEV;
+
+	if (!mm || !acop)
+		return -EINVAL;
+
+	/* We need to make sure mm_users doesn't change */
+	down_read(&mm->mmap_sem);
+	spin_lock(mm->context.cop_lockp);
+
+	if (mm->context.cop_pid == COP_PID_NONE) {
+		ret = new_cop_pid(&cop_ida, COP_PID_MIN, COP_PID_MAX,
+				  &mmu_context_acop_lock);
+		if (ret < 0)
+			goto out;
+
+		mm->context.cop_pid = ret;
+	}
+	mm->context.acop |= acop;
+
+	sync_cop(mm);
+
+	/*
+	 * If this is a threaded process then there might be other threads
+	 * running. We need to send an IPI to force them to pick up any
+	 * change in PID and ACOP.
+	 */
+	if (atomic_read(&mm->mm_users) > 1)
+		smp_call_function(sync_cop, mm, 1);
+
+	ret = mm->context.cop_pid;
+
+out:
+	spin_unlock(mm->context.cop_lockp);
+	up_read(&mm->mmap_sem);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(use_cop);
+
+/**
+ * Stop using a coprocessor.
+ * @acop: mask of coprocessor to be stopped.
+ * @mm: The mm the coprocessor associated with.
+ */
+void drop_cop(unsigned long acop, struct mm_struct *mm)
+{
+	int free_pid = COP_PID_NONE;
+
+	if (!cpu_has_feature(CPU_FTR_ICSWX))
+		return;
+
+	if (WARN_ON_ONCE(!mm))
+		return;
+
+	/* We need to make sure mm_users doesn't change */
+	down_read(&mm->mmap_sem);
+	spin_lock(mm->context.cop_lockp);
+
+	mm->context.acop &= ~acop;
+
+	if ((!mm->context.acop) && (mm->context.cop_pid != COP_PID_NONE)) {
+		free_pid = mm->context.cop_pid;
+		mm->context.cop_pid = COP_PID_NONE;
+	}
+
+	sync_cop(mm);
+
+	/*
+	 * If this is a threaded process then there might be other threads
+	 * running. We need to send an IPI to force them to pick up any
+	 * change in PID and ACOP.
+	 */
+	if (atomic_read(&mm->mm_users) > 1)
+		smp_call_function(sync_cop, mm, 1);
+
+	if (free_pid != COP_PID_NONE) {
+		spin_lock(&mmu_context_acop_lock);
+		ida_remove(&cop_ida, free_pid);
+		spin_unlock(&mmu_context_acop_lock);
+	}
+
+	spin_unlock(mm->context.cop_lockp);
+	up_read(&mm->mmap_sem);
+}
+EXPORT_SYMBOL_GPL(drop_cop);
+
+#endif /* CONFIG_PPC_ICSWX */
+
 static DEFINE_SPINLOCK(mmu_context_lock);
 static DEFINE_IDA(mmu_context_ida);
 
@@ -78,6 +274,16 @@  int init_new_context(struct task_struct
 		slice_set_user_psize(mm, mmu_virtual_psize);
 	subpage_prot_init_new_context(mm);
 	mm->context.id = index;
+#ifdef CONFIG_PPC_ICSWX
+	mm->context.cop_lockp = kmalloc(sizeof(spinlock_t), GFP_KERNEL);
+	if (!mm->context.cop_lockp) {
+		__destroy_context(index);
+		subpage_prot_free(mm);
+		mm->context.id = NO_CONTEXT;
+		return -ENOMEM;
+	}
+	spin_lock_init(mm->context.cop_lockp);
+#endif /* CONFIG_PPC_ICSWX */
 
 	return 0;
 }
@@ -92,6 +298,11 @@  EXPORT_SYMBOL_GPL(__destroy_context);
 
 void destroy_context(struct mm_struct *mm)
 {
+#ifdef CONFIG_PPC_ICSWX
+	drop_cop(mm->context.acop, mm);
+	kfree(mm->context.cop_lockp);
+	mm->context.cop_lockp = NULL;
+#endif /* CONFIG_PPC_ICSWX */
 	__destroy_context(mm->context.id);
 	subpage_prot_free(mm);
 	mm->context.id = MMU_NO_CONTEXT;
Index: linux-powerpc/arch/powerpc/platforms/Kconfig.cputype
===================================================================
--- linux-powerpc.orig/arch/powerpc/platforms/Kconfig.cputype	2011-05-03 16:39:21.809141492 +1000
+++ linux-powerpc/arch/powerpc/platforms/Kconfig.cputype	2011-05-03 16:39:26.059220272 +1000
@@ -230,6 +230,24 @@  config VSX
 
 	  If in doubt, say Y here.
 
+config PPC_ICSWX
+	bool "Support for PowerPC icswx coprocessor instruction"
+	depends on POWER4
+	default n
+	---help---
+
+	  This option enables kernel support for the PowerPC Initiate
+	  Coprocessor Store Word (icswx) coprocessor instruction on POWER7
+	  or newer processors.
+
+	  This option is only useful if you have a processor that supports
+	  the icswx coprocessor instruction. It does not have any effect
+	  on processors without the icswx coprocessor instruction.
+
+	  This option slightly increases kernel memory usage.
+
+	  If in doubt, say N here.
+
 config SPE
 	bool "SPE Support"
 	depends on E200 || (E500 && !PPC_E500MC)