diff mbox series

[linux,dev-5.10,v3,10/18] ipmi: kcs_bmc: Don't enforce single-open policy in the kernel

Message ID 20210510064955.1704652-11-andrew@aj.id.au
State New
Headers show
Series ipmi: Allow raw access to KCS devices | expand

Commit Message

Andrew Jeffery May 10, 2021, 6:49 a.m. UTC
Soon it will be possible for one KCS device to have multiple associated
chardevs exposed to userspace (for IPMI and raw-style access). However,
don't prevent userspace from:

1. Opening more than one chardev at a time, or
2. Opening the same chardev more than once.

System behaviour is undefined for both classes of multiple access, so
userspace must manage itself accordingly.

The implementation delivers IBF and OBF events to the first chardev
client to associate with the KCS device. An open on a related chardev
cannot associate its client with the KCS device and so will not
receive notification of events. However, any fd on any chardev may race
their accesses to the data and status registers.

Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
---
 drivers/char/ipmi/kcs_bmc.c         | 34 ++++++++++-------------------
 drivers/char/ipmi/kcs_bmc_aspeed.c  |  3 +--
 drivers/char/ipmi/kcs_bmc_npcm7xx.c |  3 +--
 3 files changed, 14 insertions(+), 26 deletions(-)

Comments

William Kennington May 10, 2021, 8:56 a.m. UTC | #1
Why would we want to change this? I personally think the original
mutual exclusion policy makes even more sense with multiple client
types, so that the upstack programs actually know when they are being
locked out. Having clients that are able to open the fd but remain
broken if they don't do higher level synchronization just feels like a
good way to have hard to understand behavior.

On Sun, May 9, 2021 at 11:56 PM Andrew Jeffery <andrew@aj.id.au> wrote:
>
> Soon it will be possible for one KCS device to have multiple associated
> chardevs exposed to userspace (for IPMI and raw-style access). However,
> don't prevent userspace from:
>
> 1. Opening more than one chardev at a time, or
> 2. Opening the same chardev more than once.
>
> System behaviour is undefined for both classes of multiple access, so
> userspace must manage itself accordingly.
>
> The implementation delivers IBF and OBF events to the first chardev
> client to associate with the KCS device. An open on a related chardev
> cannot associate its client with the KCS device and so will not
> receive notification of events. However, any fd on any chardev may race
> their accesses to the data and status registers.
>
> Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
> ---
>  drivers/char/ipmi/kcs_bmc.c         | 34 ++++++++++-------------------
>  drivers/char/ipmi/kcs_bmc_aspeed.c  |  3 +--
>  drivers/char/ipmi/kcs_bmc_npcm7xx.c |  3 +--
>  3 files changed, 14 insertions(+), 26 deletions(-)
>
> diff --git a/drivers/char/ipmi/kcs_bmc.c b/drivers/char/ipmi/kcs_bmc.c
> index 7081541bb6ce..ad9ff13ba831 100644
> --- a/drivers/char/ipmi/kcs_bmc.c
> +++ b/drivers/char/ipmi/kcs_bmc.c
> @@ -55,24 +55,12 @@ EXPORT_SYMBOL(kcs_bmc_update_status);
>  irqreturn_t kcs_bmc_handle_event(struct kcs_bmc_device *kcs_bmc)
>  {
>         struct kcs_bmc_client *client;
> -       irqreturn_t rc;
> +       irqreturn_t rc = IRQ_NONE;
>
>         spin_lock(&kcs_bmc->lock);
>         client = kcs_bmc->client;
> -       if (client) {
> +       if (client)
>                 rc = client->ops->event(client);
> -       } else {
> -               u8 status;
> -
> -               status = kcs_bmc_read_status(kcs_bmc);
> -               if (status & KCS_BMC_STR_IBF) {
> -                       /* Ack the event by reading the data */
> -                       kcs_bmc_read_data(kcs_bmc);
> -                       rc = IRQ_HANDLED;
> -               } else {
> -                       rc = IRQ_NONE;
> -               }
> -       }
>         spin_unlock(&kcs_bmc->lock);
>
>         return rc;
> @@ -81,26 +69,28 @@ EXPORT_SYMBOL(kcs_bmc_handle_event);
>
>  int kcs_bmc_enable_device(struct kcs_bmc_device *kcs_bmc, struct kcs_bmc_client *client)
>  {
> -       int rc;
> -
>         spin_lock_irq(&kcs_bmc->lock);
> -       if (kcs_bmc->client) {
> -               rc = -EBUSY;
> -       } else {
> +       if (!kcs_bmc->client) {
> +               u8 mask = KCS_BMC_EVENT_TYPE_IBF;
> +
>                 kcs_bmc->client = client;
> -               rc = 0;
> +               kcs_bmc_update_event_mask(kcs_bmc, mask, mask);
>         }
>         spin_unlock_irq(&kcs_bmc->lock);
>
> -       return rc;
> +       return 0;
>  }
>  EXPORT_SYMBOL(kcs_bmc_enable_device);
>
>  void kcs_bmc_disable_device(struct kcs_bmc_device *kcs_bmc, struct kcs_bmc_client *client)
>  {
>         spin_lock_irq(&kcs_bmc->lock);
> -       if (client == kcs_bmc->client)
> +       if (client == kcs_bmc->client) {
> +               u8 mask = KCS_BMC_EVENT_TYPE_IBF | KCS_BMC_EVENT_TYPE_OBE;
> +
> +               kcs_bmc_update_event_mask(kcs_bmc, mask, 0);
>                 kcs_bmc->client = NULL;
> +       }
>         spin_unlock_irq(&kcs_bmc->lock);
>  }
>  EXPORT_SYMBOL(kcs_bmc_disable_device);
> diff --git a/drivers/char/ipmi/kcs_bmc_aspeed.c b/drivers/char/ipmi/kcs_bmc_aspeed.c
> index fade0e2faf2c..2c88b34b803c 100644
> --- a/drivers/char/ipmi/kcs_bmc_aspeed.c
> +++ b/drivers/char/ipmi/kcs_bmc_aspeed.c
> @@ -414,8 +414,7 @@ static int aspeed_kcs_probe(struct platform_device *pdev)
>
>         platform_set_drvdata(pdev, priv);
>
> -       aspeed_kcs_irq_mask_update(kcs_bmc, (KCS_BMC_EVENT_TYPE_IBF | KCS_BMC_EVENT_TYPE_OBE),
> -                                  KCS_BMC_EVENT_TYPE_IBF);
> +       aspeed_kcs_irq_mask_update(kcs_bmc, (KCS_BMC_EVENT_TYPE_IBF | KCS_BMC_EVENT_TYPE_OBE), 0);
>         aspeed_kcs_enable_channel(kcs_bmc, true);
>
>         kcs_bmc_add_device(&priv->kcs_bmc);
> diff --git a/drivers/char/ipmi/kcs_bmc_npcm7xx.c b/drivers/char/ipmi/kcs_bmc_npcm7xx.c
> index f8b7162fb830..ab4a8caf1270 100644
> --- a/drivers/char/ipmi/kcs_bmc_npcm7xx.c
> +++ b/drivers/char/ipmi/kcs_bmc_npcm7xx.c
> @@ -202,8 +202,7 @@ static int npcm7xx_kcs_probe(struct platform_device *pdev)
>         if (rc)
>                 return rc;
>
> -       npcm7xx_kcs_irq_mask_update(kcs_bmc, (KCS_BMC_EVENT_TYPE_IBF | KCS_BMC_EVENT_TYPE_OBE),
> -                                   KCS_BMC_EVENT_TYPE_IBF);
> +       npcm7xx_kcs_irq_mask_update(kcs_bmc, (KCS_BMC_EVENT_TYPE_IBF | KCS_BMC_EVENT_TYPE_OBE), 0);
>         npcm7xx_kcs_enable_channel(kcs_bmc, true);
>
>         pr_info("channel=%u idr=0x%x odr=0x%x str=0x%x\n",
> --
> 2.27.0
>
Andrew Jeffery May 10, 2021, 11:59 p.m. UTC | #2
On Mon, 10 May 2021, at 18:26, William Kennington wrote:
> Why would we want to change this? I personally think the original
> mutual exclusion policy makes even more sense with multiple client
> types, so that the upstack programs actually know when they are being
> locked out. Having clients that are able to open the fd but remain
> broken if they don't do higher level synchronization just feels like a
> good way to have hard to understand behavior.

So there's this from Arnd:

https://lore.kernel.org/lkml/CAK8P3a2e3zNqMJSN-LAAjYmy8Gr=wjn5MMDMinxawOWcMgo7Ww@mail.gmail.com/

> On Wed, Mar 3, 2021 at 2:54 PM Alex Bennée <alex.bennee@linaro.org> wrote:
> >
> > +       /* the rpmb is single open! */
> > +       if (test_and_set_bit(RPMB_DEV_OPEN, &rdev->status))
> > +               return -EBUSY;
> 
> open counters on device nodes are fundamentally broken, because
> they do not stop you from using dup() or sharing the file descriptor
> across a fork. Just remove this.

Assuming Arnd's point didn't stand on its own, trying to prevent 
multiple opens requires two classes of checks in the kernel, where:

1. Multiple file descriptors can be associated with each device node
2. Multiple device nodes can be associated with each device.

Ensuring we don't have multiple-opens via multiple device nodes (2 
above) can be done generally in kcs_bmc.c by associating the device 
node (client) context with the device driver instance and erroring out 
if an association already exists. But addressing 1. requires each 
client (chardev) implementation to enforce the 1-fd-per-node 
requirement as well, which isn't great.

If you squint, the IPMI KCS devices look like a simple UART and so we 
can look to the TTY layer for inspiration. TTYs suffer the same issue 
of hard to understand behaviour in the face of multiple opens, and 
define a single mechanism for avoiding both 1 and 2 above by way of 
userspace lock files:

https://tldp.org/HOWTO/Serial-HOWTO-13.html

The lock should be defined in terms of the underlying device as we can 
have multiple behaviours exposed through multiple chardevs for each 
device. The device can be derived by e.g:

```
# echo $(basename $(realpath /sys$(udevadm info --query=path /dev/raw-kcs4)/device))
1e789114.kcs
```

Given that the kernel currently attempts to prevent multiple open we 
can assume this is something userspace isn't doing, thus it's safe to 
lift the restriction.

If we do have userspace competing for access then it needs to implement 
the locking scheme outlined above, which can be done regardless of 
whether the kernel supports multiple-open or not.

Andrew
Zev Weiss May 21, 2021, 7:43 a.m. UTC | #3
On Mon, May 10, 2021 at 06:59:24PM CDT, Andrew Jeffery wrote:
>On Mon, 10 May 2021, at 18:26, William Kennington wrote:
>> Why would we want to change this? I personally think the original
>> mutual exclusion policy makes even more sense with multiple client
>> types, so that the upstack programs actually know when they are being
>> locked out. Having clients that are able to open the fd but remain
>> broken if they don't do higher level synchronization just feels like a
>> good way to have hard to understand behavior.
>
>So there's this from Arnd:
>
>https://lore.kernel.org/lkml/CAK8P3a2e3zNqMJSN-LAAjYmy8Gr=wjn5MMDMinxawOWcMgo7Ww@mail.gmail.com/
>
>> On Wed, Mar 3, 2021 at 2:54 PM Alex Bennée <alex.bennee@linaro.org> wrote:
>> >
>> > +       /* the rpmb is single open! */
>> > +       if (test_and_set_bit(RPMB_DEV_OPEN, &rdev->status))
>> > +               return -EBUSY;
>>
>> open counters on device nodes are fundamentally broken, because
>> they do not stop you from using dup() or sharing the file descriptor
>> across a fork. Just remove this.
>
>Assuming Arnd's point didn't stand on its own, trying to prevent
>multiple opens requires two classes of checks in the kernel, where:
>
>1. Multiple file descriptors can be associated with each device node
>2. Multiple device nodes can be associated with each device.
>
>Ensuring we don't have multiple-opens via multiple device nodes (2
>above) can be done generally in kcs_bmc.c by associating the device
>node (client) context with the device driver instance and erroring out
>if an association already exists. But addressing 1. requires each
>client (chardev) implementation to enforce the 1-fd-per-node
>requirement as well, which isn't great.
>
>If you squint, the IPMI KCS devices look like a simple UART and so we
>can look to the TTY layer for inspiration. TTYs suffer the same issue
>of hard to understand behaviour in the face of multiple opens, and
>define a single mechanism for avoiding both 1 and 2 above by way of
>userspace lock files:
>
>https://tldp.org/HOWTO/Serial-HOWTO-13.html
>
>The lock should be defined in terms of the underlying device as we can
>have multiple behaviours exposed through multiple chardevs for each
>device. The device can be derived by e.g:
>
>```
># echo $(basename $(realpath /sys$(udevadm info --query=path /dev/raw-kcs4)/device))
>1e789114.kcs
>```
>
>Given that the kernel currently attempts to prevent multiple open we
>can assume this is something userspace isn't doing, thus it's safe to
>lift the restriction.
>
>If we do have userspace competing for access then it needs to implement
>the locking scheme outlined above, which can be done regardless of
>whether the kernel supports multiple-open or not.
>
>Andrew

I guess I'm kind of on the fence on this one, though maybe leaning
slightly in William's direction.  Certainly as Arnd's point illustrates
it's never going to be a totally foolproof safeguard against userspace
that's sufficiently determined to get itself into trouble (e.g. dup(),
fork()), and yes, it does seem fairly analogous to the situation with
ttys...but at the same time, it's not like it compromises the integrity
of the kernel if it does end up happening anyway, and if as your commit
message says multiple-access leads to undefined behavior, it seems like
we might as well make that trap slightly harder to fall into instead of
easier?



Zev
diff mbox series

Patch

diff --git a/drivers/char/ipmi/kcs_bmc.c b/drivers/char/ipmi/kcs_bmc.c
index 7081541bb6ce..ad9ff13ba831 100644
--- a/drivers/char/ipmi/kcs_bmc.c
+++ b/drivers/char/ipmi/kcs_bmc.c
@@ -55,24 +55,12 @@  EXPORT_SYMBOL(kcs_bmc_update_status);
 irqreturn_t kcs_bmc_handle_event(struct kcs_bmc_device *kcs_bmc)
 {
 	struct kcs_bmc_client *client;
-	irqreturn_t rc;
+	irqreturn_t rc = IRQ_NONE;
 
 	spin_lock(&kcs_bmc->lock);
 	client = kcs_bmc->client;
-	if (client) {
+	if (client)
 		rc = client->ops->event(client);
-	} else {
-		u8 status;
-
-		status = kcs_bmc_read_status(kcs_bmc);
-		if (status & KCS_BMC_STR_IBF) {
-			/* Ack the event by reading the data */
-			kcs_bmc_read_data(kcs_bmc);
-			rc = IRQ_HANDLED;
-		} else {
-			rc = IRQ_NONE;
-		}
-	}
 	spin_unlock(&kcs_bmc->lock);
 
 	return rc;
@@ -81,26 +69,28 @@  EXPORT_SYMBOL(kcs_bmc_handle_event);
 
 int kcs_bmc_enable_device(struct kcs_bmc_device *kcs_bmc, struct kcs_bmc_client *client)
 {
-	int rc;
-
 	spin_lock_irq(&kcs_bmc->lock);
-	if (kcs_bmc->client) {
-		rc = -EBUSY;
-	} else {
+	if (!kcs_bmc->client) {
+		u8 mask = KCS_BMC_EVENT_TYPE_IBF;
+
 		kcs_bmc->client = client;
-		rc = 0;
+		kcs_bmc_update_event_mask(kcs_bmc, mask, mask);
 	}
 	spin_unlock_irq(&kcs_bmc->lock);
 
-	return rc;
+	return 0;
 }
 EXPORT_SYMBOL(kcs_bmc_enable_device);
 
 void kcs_bmc_disable_device(struct kcs_bmc_device *kcs_bmc, struct kcs_bmc_client *client)
 {
 	spin_lock_irq(&kcs_bmc->lock);
-	if (client == kcs_bmc->client)
+	if (client == kcs_bmc->client) {
+		u8 mask = KCS_BMC_EVENT_TYPE_IBF | KCS_BMC_EVENT_TYPE_OBE;
+
+		kcs_bmc_update_event_mask(kcs_bmc, mask, 0);
 		kcs_bmc->client = NULL;
+	}
 	spin_unlock_irq(&kcs_bmc->lock);
 }
 EXPORT_SYMBOL(kcs_bmc_disable_device);
diff --git a/drivers/char/ipmi/kcs_bmc_aspeed.c b/drivers/char/ipmi/kcs_bmc_aspeed.c
index fade0e2faf2c..2c88b34b803c 100644
--- a/drivers/char/ipmi/kcs_bmc_aspeed.c
+++ b/drivers/char/ipmi/kcs_bmc_aspeed.c
@@ -414,8 +414,7 @@  static int aspeed_kcs_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, priv);
 
-	aspeed_kcs_irq_mask_update(kcs_bmc, (KCS_BMC_EVENT_TYPE_IBF | KCS_BMC_EVENT_TYPE_OBE),
-				   KCS_BMC_EVENT_TYPE_IBF);
+	aspeed_kcs_irq_mask_update(kcs_bmc, (KCS_BMC_EVENT_TYPE_IBF | KCS_BMC_EVENT_TYPE_OBE), 0);
 	aspeed_kcs_enable_channel(kcs_bmc, true);
 
 	kcs_bmc_add_device(&priv->kcs_bmc);
diff --git a/drivers/char/ipmi/kcs_bmc_npcm7xx.c b/drivers/char/ipmi/kcs_bmc_npcm7xx.c
index f8b7162fb830..ab4a8caf1270 100644
--- a/drivers/char/ipmi/kcs_bmc_npcm7xx.c
+++ b/drivers/char/ipmi/kcs_bmc_npcm7xx.c
@@ -202,8 +202,7 @@  static int npcm7xx_kcs_probe(struct platform_device *pdev)
 	if (rc)
 		return rc;
 
-	npcm7xx_kcs_irq_mask_update(kcs_bmc, (KCS_BMC_EVENT_TYPE_IBF | KCS_BMC_EVENT_TYPE_OBE),
-				    KCS_BMC_EVENT_TYPE_IBF);
+	npcm7xx_kcs_irq_mask_update(kcs_bmc, (KCS_BMC_EVENT_TYPE_IBF | KCS_BMC_EVENT_TYPE_OBE), 0);
 	npcm7xx_kcs_enable_channel(kcs_bmc, true);
 
 	pr_info("channel=%u idr=0x%x odr=0x%x str=0x%x\n",