@@ -25,17 +25,33 @@ extern atomic_t cxl_use_count;
static inline bool cxl_ctx_in_use(void)
{
- return (atomic_read(&cxl_use_count) != 0);
+ /*
+ * This is called when sending an TLBI, to know whether it
+ * should be global or local.
+ *
+ * We need to make sure the PTE update is happening before
+ * reading the context global flag. Otherwise, reading the
+ * flag may be re-ordered and happen first, and we could end
+ * up in a situation where the old PTE is seen by the device,
+ * but the TLBI is not global.
+ */
+ mb();
+ return (atomic_read(&cxl_use_count) != 0);
}
static inline void cxl_ctx_get(void)
{
- atomic_inc(&cxl_use_count);
+ atomic_inc(&cxl_use_count);
+ /*
+ * Barrier guarantees that the device will receive all TLBIs
+ * from that point on
+ */
+ wmb();
}
static inline void cxl_ctx_put(void)
{
- atomic_dec(&cxl_use_count);
+ atomic_dec(&cxl_use_count);
}
struct cxl_afu *cxl_afu_get(struct cxl_afu *afu);
With the hash memory model, all TLBIs become global when the cxl driver is active, i.e. as soon as one context is open. It is theoretically possible to send a TLBI with the wrong scope as there's currently no memory barrier between when the driver is marked as in use, and attaching a context to the device, therefore we are exposed to re-ordering. It is highly unlikely as the use count for the driver is incremented on open() and the attachment to the device happens on a different system call (ioctl) Signed-off-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com> --- include/misc/cxl-base.h | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-)