[arm/aspeed/ast2500,v2] ipmi: add an Aspeed KCS IPMI BMC driver

Message ID 1516810009-16353-1-git-send-email-haiyue.wang@linux.intel.com
State Not Applicable, archived
Headers show
Series
  • [arm/aspeed/ast2500,v2] ipmi: add an Aspeed KCS IPMI BMC driver
Related show

Commit Message

Wang, Haiyue Jan. 24, 2018, 4:06 p.m.
The KCS (Keyboard Controller Style) interface is used to perform in-band
IPMI communication between a server host and its BMC (BaseBoard Management
Controllers).

This driver exposes the KCS interface on ASpeed SOCs (AST2400 and AST2500)
as a character device. Such SOCs are commonly used as BMCs and this driver
implements the BMC side of the KCS interface.

Signed-off-by: Haiyue Wang <haiyue.wang@linux.intel.com>

---
v1->v2

- Divide the driver into two parts, one handles the BMC KCS IPMI 2.0 state;
  the other handles the BMC KCS controller such as AST2500 IO accessing. 
- Use the spin lock APIs to handle the device file operations and BMC chip
  IRQ inferface for accessing the same KCS BMC data structure.
- Enhanced the phases handling of the KCS BMC.
- Unified the IOCTL definition for IPMI BMC, it will be used by KCS and BT.

---
 .../devicetree/bindings/ipmi/aspeed-kcs-bmc.txt    |  26 ++
 drivers/char/ipmi/Kconfig                          |  20 +
 drivers/char/ipmi/Makefile                         |   2 +
 drivers/char/ipmi/kcs_bmc.c                        | 430 +++++++++++++++++++++
 drivers/char/ipmi/kcs_bmc.h                        |  88 +++++
 drivers/char/ipmi/kcs_bmc_aspeed.c                 | 319 +++++++++++++++
 include/uapi/linux/ipmi_bmc.h                      |  14 +
 7 files changed, 899 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/ipmi/aspeed-kcs-bmc.txt
 create mode 100644 drivers/char/ipmi/kcs_bmc.c
 create mode 100644 drivers/char/ipmi/kcs_bmc.h
 create mode 100644 drivers/char/ipmi/kcs_bmc_aspeed.c
 create mode 100644 include/uapi/linux/ipmi_bmc.h

Comments

Andy Shevchenko Jan. 24, 2018, 5:05 p.m. | #1
On Thu, 2018-01-25 at 00:06 +0800, Haiyue Wang wrote:
> The KCS (Keyboard Controller Style) interface is used to perform in-
> band
> IPMI communication between a server host and its BMC (BaseBoard
> Management
> Controllers).
> 
> This driver exposes the KCS interface on ASpeed SOCs (AST2400 and
> AST2500)
> as a character device. Such SOCs are commonly used as BMCs and this
> driver
> implements the BMC side of the KCS interface.

> +config IPMI_KCS_BMC
> +	tristate 'IPMI KCS BMC Interface'
> +	help
> +	  Provides a device driver for the KCS (Keyboard Controller
> Style)
> +	  IPMI interface which meets the requirement of the BMC
> (Baseboard
> +	  Management Controllers) side for handling the IPMI request
> from
> +	  host system software.

Now time to split to two patches.

> +config ASPEED_KCS_IPMI_BMC
> +	depends on ARCH_ASPEED || COMPILE_TEST
> +	depends on IPMI_KCS_BMC
> +	select REGMAP_MMIO
> +	tristate "Aspeed KCS IPMI BMC driver"
> +	help
> +	  Provides a driver for the KCS (Keyboard Controller Style)
> IPMI
> +	  interface found on Aspeed SOCs (AST2400 and AST2500).
> +
> +	  The driver implements the BMC side of the KCS contorller,
> it
> +	  provides the access of KCS IO space for BMC side.

> +obj-$(CONFIG_IPMI_KCS_BMC) += kcs_bmc.o
>  obj-$(CONFIG_ASPEED_BT_IPMI_BMC) += bt-bmc.o
> +obj-$(CONFIG_ASPEED_KCS_IPMI_BMC) += kcs_bmc_aspeed.o
> \ No newline at end of file

Do something with your text editor. The end of text file is a \n at the
end.

> +/* IPMI 2.0 - Table 9-1, KCS Interface Status Register Bits */
> +#define KCS_STATUS_STATE(state) (state << 6)
> +#define KCS_STATUS_STATE_MASK   KCS_STATUS_STATE(0x3)

GENMASK(8, 6)

> +
> +

Remove extra line in such cases

> +static inline u8 read_data(struct kcs_bmc *kcs_bmc)
> +{
> +	return kcs_bmc->io_inputb(kcs_bmc, kcs_bmc->ioreg.idr);
> +}
> +
> +static inline void write_data(struct kcs_bmc *kcs_bmc, u8 data)
> +{
> +	kcs_bmc->io_outputb(kcs_bmc, kcs_bmc->ioreg.odr, data);
> +}
> +
> +static inline u8 read_status(struct kcs_bmc *kcs_bmc)
> +{
> +	return kcs_bmc->io_inputb(kcs_bmc, kcs_bmc->ioreg.str);
> +}
> +
> +static inline void write_status(struct kcs_bmc *kcs_bmc, u8 data)
> +{
> +	kcs_bmc->io_outputb(kcs_bmc, kcs_bmc->ioreg.str, data);
> +}
> +
> +static void update_status_bits(struct kcs_bmc *kcs_bmc, u8 mask, u8
> val)
> +{
> +	u8 tmp;
> +
> +	tmp = read_status(kcs_bmc);
> +
> +	tmp &= ~mask;
> +	tmp |= val & mask;
> +
> +	write_status(kcs_bmc, tmp);
> +}

Shouldn't be above some kind of regmap API?

> +int kcs_bmc_handle_event(struct kcs_bmc *kcs_bmc)
> +{
> +	unsigned long flags;
> +	int ret = 0;
> +	u8 status;
> +
> +	spin_lock_irqsave(&kcs_bmc->lock, flags);
> +
> +	status = read_status(kcs_bmc) & (KCS_STATUS_IBF |
> KCS_STATUS_CMD_DAT);
> +
> +	switch (status) {
> +	case KCS_STATUS_IBF | KCS_STATUS_CMD_DAT:
> +		kcs_bmc_handle_command(kcs_bmc);
> +		break;
> +
> +	case KCS_STATUS_IBF:
> +		kcs_bmc_handle_data(kcs_bmc);
> +		break;
> +
> +	default:

> +		ret = -1;

Use proper errno.

> +		break;
> +	}
> +
> +	spin_unlock_irqrestore(&kcs_bmc->lock, flags);
> +
> +	return ret;
> +}
> +EXPORT_SYMBOL(kcs_bmc_handle_event);
> +
> +static inline struct kcs_bmc *file_kcs_bmc(struct file *filp)
> +{
> +	return container_of(filp->private_data, struct kcs_bmc,
> miscdev);
> +}

Such helper we call to_<smth>() where <smth> in your cases kcs_bmc

> +static ssize_t kcs_bmc_write(struct file *filp, const char *buf,
> +			     size_t count, loff_t *offset)
> +{
> +	struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
> +	ssize_t ret = count;
> +
> +	if (count < 1 || count > KCS_MSG_BUFSIZ)
> +		return -EINVAL;

Is the first part even possible?

> +}


> +struct kcs_bmc *kcs_bmc_alloc(struct device *dev, int sizeof_priv,
> u32 channel)
> +{
> +	struct kcs_bmc *kcs_bmc;

> +	int rc;

What compiler does think about this?

> +
> +	kcs_bmc = devm_kzalloc(dev, sizeof(*kcs_bmc) + sizeof_priv,
> GFP_KERNEL);
> +	if (!kcs_bmc)
> +		return NULL;

> +	dev_set_name(dev, "ipmi-kcs%u", channel);
> +
> +	spin_lock_init(&kcs_bmc->lock);
> +	kcs_bmc->channel = channel;
> +
> +	init_waitqueue_head(&kcs_bmc->queue);

> +	kcs_bmc->data_in  = devm_kmalloc(dev, KCS_MSG_BUFSIZ,
> GFP_KERNEL);
> +	kcs_bmc->data_out = devm_kmalloc(dev, KCS_MSG_BUFSIZ,
> GFP_KERNEL);
> +	if (kcs_bmc->data_in == NULL || kcs_bmc->data_out == NULL) {
> +		dev_err(dev, "Failed to allocate data buffers\n");
> +		return NULL;
> +	}

Split checks per allocation.

> +	kcs_bmc->miscdev.minor = MISC_DYNAMIC_MINOR;
> +	kcs_bmc->miscdev.name = dev_name(dev);
> +	kcs_bmc->miscdev.fops = &kcs_bmc_fops;
> +
> +	return kcs_bmc;
> +}
> +EXPORT_SYMBOL(kcs_bmc_alloc);
> 

> +/* Different phases of the KCS BMC module */
> +enum kcs_phases {
> +	/* BMC should not be expecting nor sending any data. */
> +	KCS_PHASE_IDLE,

Perhaps kernel-doc?

> +};


> +/* IPMI 2.0 - 9.5, KCS Interface Registers */
> +struct kcs_ioreg {
> +	u32 idr; /* Input Data Register */
> +	u32 odr; /* Output Data Register */
> +	u32 str; /* Status Register */

kernel-doc

> +};
> +
> +struct kcs_bmc {
> +	spinlock_t lock;
> +
> +	u32 channel;
> +	int running;
> +
> +	/* Setup by BMC KCS controller driver */
> +	struct kcs_ioreg ioreg;
> +	u8 (*io_inputb)(struct kcs_bmc *kcs_bmc, u32 reg);
> +	void (*io_outputb)(struct kcs_bmc *kcs_bmc, u32 reg, u8 b);
> +
> +	enum kcs_phases phase;
> +	enum kcs_errors error;
> +
> +	wait_queue_head_t queue;
> +	bool data_in_avail;
> +	int  data_in_idx;
> +	u8  *data_in;
> +
> +	int  data_out_idx;
> +	int  data_out_len;
> +	u8  *data_out;
> +
> +	struct miscdevice miscdev;
> +
> +	unsigned long long priv[];

unsigned long is enough.

> +};
> +
> +static inline void *kcs_bmc_priv(const struct kcs_bmc *kcs_bmc)
> +{
> +	return kcs_bmc->priv;
> +}
> +
> +extern int kcs_bmc_handle_event(struct kcs_bmc *kcs_bmc);
> +extern struct kcs_bmc *kcs_bmc_alloc(struct device *dev, int
> sizeof_priv,
> +					u32 channel);

Drop extern.

> +#endif

Next one could be reviewed when you split this patch to two.
Corey Minyard Jan. 24, 2018, 5:48 p.m. | #2
On 01/24/2018 10:06 AM, Haiyue Wang wrote:
> The KCS (Keyboard Controller Style) interface is used to perform in-band
> IPMI communication between a server host and its BMC (BaseBoard Management
> Controllers).
>
> This driver exposes the KCS interface on ASpeed SOCs (AST2400 and AST2500)
> as a character device. Such SOCs are commonly used as BMCs and this driver
> implements the BMC side of the KCS interface.
>
> Signed-off-by: Haiyue Wang <haiyue.wang@linux.intel.com>
>
> ---
> v1->v2
>
> - Divide the driver into two parts, one handles the BMC KCS IPMI 2.0 state;
>    the other handles the BMC KCS controller such as AST2500 IO accessing.
> - Use the spin lock APIs to handle the device file operations and BMC chip
>    IRQ inferface for accessing the same KCS BMC data structure.
> - Enhanced the phases handling of the KCS BMC.
> - Unified the IOCTL definition for IPMI BMC, it will be used by KCS and BT.
>
> ---
>   .../devicetree/bindings/ipmi/aspeed-kcs-bmc.txt    |  26 ++
>   drivers/char/ipmi/Kconfig                          |  20 +
>   drivers/char/ipmi/Makefile                         |   2 +
>   drivers/char/ipmi/kcs_bmc.c                        | 430 +++++++++++++++++++++
>   drivers/char/ipmi/kcs_bmc.h                        |  88 +++++
>   drivers/char/ipmi/kcs_bmc_aspeed.c                 | 319 +++++++++++++++
>   include/uapi/linux/ipmi_bmc.h                      |  14 +
>   7 files changed, 899 insertions(+)
>   create mode 100644 Documentation/devicetree/bindings/ipmi/aspeed-kcs-bmc.txt
>   create mode 100644 drivers/char/ipmi/kcs_bmc.c
>   create mode 100644 drivers/char/ipmi/kcs_bmc.h
>   create mode 100644 drivers/char/ipmi/kcs_bmc_aspeed.c
>   create mode 100644 include/uapi/linux/ipmi_bmc.h
>
> diff --git a/Documentation/devicetree/bindings/ipmi/aspeed-kcs-bmc.txt b/Documentation/devicetree/bindings/ipmi/aspeed-kcs-bmc.txt
> new file mode 100644
> index 0000000..613c34c
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/ipmi/aspeed-kcs-bmc.txt
> @@ -0,0 +1,26 @@
> +* Aspeed KCS (Keyboard Controller Style) IPMI interface
> +
> +The Aspeed SOCs (AST2400 and AST2500) are commonly used as BMCs
> +(Baseboard Management Controllers) and the KCS interface can be
> +used to perform in-band IPMI communication with their host.
> +
> +Required properties:
> +- compatible : should be one of
> +    "aspeed,ast2400-kcs-bmc"
> +    "aspeed,ast2500-kcs-bmc"
> +- interrupts : interrupt generated by the controller
> +- kcs_chan : The LPC channel number in the controller
> +- kcs_addr : The host CPU IO map address
> +
> +
> +Example:
> +
> +    kcs3: kcs3@0 {
> +        compatible = "aspeed,ast2500-kcs-bmc";
> +        reg = <0x0 0x80>;
> +        interrupts = <8>;
> +        kcs_chan = <3>;
> +        kcs_addr = <0xCA2>;
> +        status = "okay";
> +    };
> +
> diff --git a/drivers/char/ipmi/Kconfig b/drivers/char/ipmi/Kconfig
> index 3544abc..770def0 100644
> --- a/drivers/char/ipmi/Kconfig
> +++ b/drivers/char/ipmi/Kconfig
> @@ -96,6 +96,26 @@ config IPMI_POWEROFF
>   
>   endif # IPMI_HANDLER
>   
> +config IPMI_KCS_BMC
> +	tristate 'IPMI KCS BMC Interface'
> +	help
> +	  Provides a device driver for the KCS (Keyboard Controller Style)
> +	  IPMI interface which meets the requirement of the BMC (Baseboard
> +	  Management Controllers) side for handling the IPMI request from
> +	  host system software.
> +
> +config ASPEED_KCS_IPMI_BMC
> +	depends on ARCH_ASPEED || COMPILE_TEST
> +	depends on IPMI_KCS_BMC
> +	select REGMAP_MMIO
> +	tristate "Aspeed KCS IPMI BMC driver"
> +	help
> +	  Provides a driver for the KCS (Keyboard Controller Style) IPMI
> +	  interface found on Aspeed SOCs (AST2400 and AST2500).
> +
> +	  The driver implements the BMC side of the KCS contorller, it
> +	  provides the access of KCS IO space for BMC side.
> +
>   config ASPEED_BT_IPMI_BMC
>   	depends on ARCH_ASPEED || COMPILE_TEST
>          depends on REGMAP && REGMAP_MMIO && MFD_SYSCON
> diff --git a/drivers/char/ipmi/Makefile b/drivers/char/ipmi/Makefile
> index 33b899f..10c591a 100644
> --- a/drivers/char/ipmi/Makefile
> +++ b/drivers/char/ipmi/Makefile
> @@ -21,4 +21,6 @@ obj-$(CONFIG_IPMI_SSIF) += ipmi_ssif.o
>   obj-$(CONFIG_IPMI_POWERNV) += ipmi_powernv.o
>   obj-$(CONFIG_IPMI_WATCHDOG) += ipmi_watchdog.o
>   obj-$(CONFIG_IPMI_POWEROFF) += ipmi_poweroff.o
> +obj-$(CONFIG_IPMI_KCS_BMC) += kcs_bmc.o
>   obj-$(CONFIG_ASPEED_BT_IPMI_BMC) += bt-bmc.o
> +obj-$(CONFIG_ASPEED_KCS_IPMI_BMC) += kcs_bmc_aspeed.o
> \ No newline at end of file
> diff --git a/drivers/char/ipmi/kcs_bmc.c b/drivers/char/ipmi/kcs_bmc.c
> new file mode 100644
> index 0000000..4ef3c0b
> --- /dev/null
> +++ b/drivers/char/ipmi/kcs_bmc.c
> @@ -0,0 +1,430 @@
> +// SPDX-License-Identifier: GPL-2.0
> +// Copyright (c) 2015-2018, Intel Corporation.
> +
> +#include <linux/errno.h>
> +#include <linux/io.h>
> +#include <linux/ipmi_bmc.h>
> +#include <linux/module.h>
> +#include <linux/platform_device.h>
> +#include <linux/poll.h>
> +#include <linux/sched.h>
> +#include <linux/slab.h>
> +
> +#include "kcs_bmc.h"
> +
> +#define KCS_MSG_BUFSIZ    1024
> +
> +#define KCS_ZERO_DATA     0
> +
> +
> +/* IPMI 2.0 - Table 9-1, KCS Interface Status Register Bits */
> +#define KCS_STATUS_STATE(state) (state << 6)
> +#define KCS_STATUS_STATE_MASK   KCS_STATUS_STATE(0x3)
> +#define KCS_STATUS_CMD_DAT      BIT(3)
> +#define KCS_STATUS_SMS_ATN      BIT(2)
> +#define KCS_STATUS_IBF          BIT(1)
> +#define KCS_STATUS_OBF          BIT(0)
> +
> +/* IPMI 2.0 - Table 9-2, KCS Interface State Bits */
> +enum kcs_states {
> +	IDLE_STATE  = 0,
> +	READ_STATE  = 1,
> +	WRITE_STATE = 2,
> +	ERROR_STATE = 3,
> +};
> +
> +/* IPMI 2.0 - Table 9-3, KCS Interface Control Codes */
> +#define KCS_CMD_GET_STATUS      0x60
> +#define KCS_CMD_ABORT           0x60
> +#define KCS_CMD_WRITE_START     0x61
> +#define KCS_CMD_WRITE_END       0x62
> +#define KCS_CMD_READ_BYTE       0x68
> +
> +
> +static inline u8 read_data(struct kcs_bmc *kcs_bmc)
> +{
> +	return kcs_bmc->io_inputb(kcs_bmc, kcs_bmc->ioreg.idr);
> +}
> +
> +static inline void write_data(struct kcs_bmc *kcs_bmc, u8 data)
> +{
> +	kcs_bmc->io_outputb(kcs_bmc, kcs_bmc->ioreg.odr, data);
> +}
> +
> +static inline u8 read_status(struct kcs_bmc *kcs_bmc)
> +{
> +	return kcs_bmc->io_inputb(kcs_bmc, kcs_bmc->ioreg.str);
> +}
> +
> +static inline void write_status(struct kcs_bmc *kcs_bmc, u8 data)
> +{
> +	kcs_bmc->io_outputb(kcs_bmc, kcs_bmc->ioreg.str, data);
> +}
> +
> +static void update_status_bits(struct kcs_bmc *kcs_bmc, u8 mask, u8 val)
> +{
> +	u8 tmp;
> +
> +	tmp = read_status(kcs_bmc);
> +
> +	tmp &= ~mask;
> +	tmp |= val & mask;
> +
> +	write_status(kcs_bmc, tmp);
> +}
> +
> +static inline void set_state(struct kcs_bmc *kcs_bmc, u8 state)
> +{
> +	update_status_bits(kcs_bmc, KCS_STATUS_STATE_MASK,
> +					KCS_STATUS_STATE(state));
> +}
> +
> +
> +static void kcs_bmc_handle_data(struct kcs_bmc *kcs_bmc)
> +{
> +	u8 data;
> +
> +	switch (kcs_bmc->phase) {
> +	case KCS_PHASE_WRITE:
> +		set_state(kcs_bmc, WRITE_STATE);
> +
> +		/* set OBF before reading data */
> +		write_data(kcs_bmc, KCS_ZERO_DATA);
> +
> +		if (kcs_bmc->data_in_idx < KCS_MSG_BUFSIZ)
> +			kcs_bmc->data_in[kcs_bmc->data_in_idx++] =
> +						read_data(kcs_bmc);
> +		break;
> +
> +	case KCS_PHASE_WRITE_END:
> +		set_state(kcs_bmc, READ_STATE);
> +
> +		if (kcs_bmc->data_in_idx < KCS_MSG_BUFSIZ)
> +			kcs_bmc->data_in[kcs_bmc->data_in_idx++] =
> +						read_data(kcs_bmc);
> +
> +		kcs_bmc->phase = KCS_PHASE_WAIT_READ;
> +		if (kcs_bmc->running) {

Why do you only do this when running is set?  It won't hurt anything if 
it's not
set.  As it is, you have a race if something opens the device while this 
code
runs.

Also, don't set the state to wait read until the "write" has finished 
(userland has
read the data out of the buffer.  More on that later.

> +			kcs_bmc->data_in_avail = true;
> +			wake_up_interruptible(&kcs_bmc->queue);
> +		}
> +		break;
> +
> +	case KCS_PHASE_READ:
> +		if (kcs_bmc->data_out_idx == kcs_bmc->data_out_len)
> +			set_state(kcs_bmc, IDLE_STATE);
> +
> +		data = read_data(kcs_bmc);
> +		if (data != KCS_CMD_READ_BYTE) {
> +			set_state(kcs_bmc, ERROR_STATE);
> +			write_data(kcs_bmc, KCS_ZERO_DATA);
> +			break;
> +		}
> +
> +		if (kcs_bmc->data_out_idx == kcs_bmc->data_out_len) {
> +			write_data(kcs_bmc, KCS_ZERO_DATA);
> +			kcs_bmc->phase = KCS_PHASE_IDLE;
> +			break;
> +		}
> +
> +		write_data(kcs_bmc,
> +			kcs_bmc->data_out[kcs_bmc->data_out_idx++]);
> +		break;
> +
> +	case KCS_PHASE_ABORT_ERROR1:
> +		set_state(kcs_bmc, READ_STATE);
> +
> +		/* Read the Dummy byte */
> +		read_data(kcs_bmc);
> +
> +		write_data(kcs_bmc, kcs_bmc->error);
> +		kcs_bmc->phase = KCS_PHASE_ABORT_ERROR2;
> +		break;
> +
> +	case KCS_PHASE_ABORT_ERROR2:
> +		set_state(kcs_bmc, IDLE_STATE);
> +
> +		/* Read the Dummy byte */
> +		read_data(kcs_bmc);
> +
> +		write_data(kcs_bmc, KCS_ZERO_DATA);
> +		kcs_bmc->phase = KCS_PHASE_IDLE;
> +
> +		break;
> +
> +	default:
> +		set_state(kcs_bmc, ERROR_STATE);
> +
> +		/* Read the Dummy byte */
> +		read_data(kcs_bmc);
> +
> +		write_data(kcs_bmc, KCS_ZERO_DATA);
> +		break;
> +	}
> +}
> +
> +static void kcs_bmc_handle_command(struct kcs_bmc *kcs_bmc)
> +{
> +	u8 cmd;
> +
> +	set_state(kcs_bmc, WRITE_STATE);
> +
> +	/* Dummy data to generate OBF */
> +	write_data(kcs_bmc, KCS_ZERO_DATA);
> +
> +	cmd = read_data(kcs_bmc);

Shouldn't you check the phase in all the cases below and do error
handling if the phase isn't correct?

Similar thing if the device here isn't open.  You need to handle
that gracefully.

Also, you should remove data_in_avail and data_in_idx setting from
here, for reasons I will explain later.

> +	switch (cmd) {
> +	case KCS_CMD_WRITE_START:
> +		kcs_bmc->data_in_avail = false;
> +		kcs_bmc->data_in_idx   = 0;
> +		kcs_bmc->phase         = KCS_PHASE_WRITE;
> +		kcs_bmc->error         = KCS_NO_ERROR;
> +		break;
> +
> +	case KCS_CMD_WRITE_END:
> +		kcs_bmc->phase = KCS_PHASE_WRITE_END;
> +		break;
> +
> +	case KCS_CMD_ABORT:
> +		if (kcs_bmc->error == KCS_NO_ERROR)
> +			kcs_bmc->error = KCS_ABORTED_BY_COMMAND;
> +
> +		kcs_bmc->phase = KCS_PHASE_ABORT_ERROR1;
> +		break;
> +
> +	default:
> +		kcs_bmc->error = KCS_ILLEGAL_CONTROL_CODE;
> +		set_state(kcs_bmc, ERROR_STATE);
> +		write_data(kcs_bmc, kcs_bmc->error);
> +		kcs_bmc->phase = KCS_PHASE_ERROR;
> +		break;
> +	}
> +}
> +
> +int kcs_bmc_handle_event(struct kcs_bmc *kcs_bmc)
> +{
> +	unsigned long flags;
> +	int ret = 0;
> +	u8 status;
> +
> +	spin_lock_irqsave(&kcs_bmc->lock, flags);
> +
> +	status = read_status(kcs_bmc) & (KCS_STATUS_IBF | KCS_STATUS_CMD_DAT);
> +
> +	switch (status) {
> +	case KCS_STATUS_IBF | KCS_STATUS_CMD_DAT:
> +		kcs_bmc_handle_command(kcs_bmc);
> +		break;
> +
> +	case KCS_STATUS_IBF:
> +		kcs_bmc_handle_data(kcs_bmc);
> +		break;
> +
> +	default:
> +		ret = -1;
> +		break;
> +	}
> +
> +	spin_unlock_irqrestore(&kcs_bmc->lock, flags);
> +
> +	return ret;
> +}
> +EXPORT_SYMBOL(kcs_bmc_handle_event);
> +
> +static inline struct kcs_bmc *file_kcs_bmc(struct file *filp)
> +{
> +	return container_of(filp->private_data, struct kcs_bmc, miscdev);
> +}
> +
> +static int kcs_bmc_open(struct inode *inode, struct file *filp)
> +{
> +	struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
> +	int ret = 0;
> +
> +	spin_lock_irq(&kcs_bmc->lock);
> +
> +	if (!kcs_bmc->running) {
> +		kcs_bmc->running       = 1;
> +		kcs_bmc->phase         = KCS_PHASE_IDLE;
> +		kcs_bmc->data_in_avail = false;

If you do everything right, setting the phase and data_in_avail should not
be necessary here.

> +	} else {
> +		ret = -EBUSY;
> +	}
> +
> +	spin_unlock_irq(&kcs_bmc->lock);
> +
> +	return ret;
> +}
> +
> +static unsigned int kcs_bmc_poll(struct file *filp, poll_table *wait)
> +{
> +	struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
> +	unsigned int mask = 0;
> +
> +	poll_wait(filp, &kcs_bmc->queue, wait);
> +
> +	spin_lock_irq(&kcs_bmc->lock);
> +
> +	if (kcs_bmc->data_in_avail)
> +		mask |= POLLIN;
> +
> +	spin_unlock_irq(&kcs_bmc->lock);
> +
> +	return mask;
> +}
> +
> +static ssize_t kcs_bmc_read(struct file *filp, char *buf,
> +			    size_t count, loff_t *offset)
> +{
> +	struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
> +	ssize_t ret = -EAGAIN;
> +

This function still has some issues.

You can't call copy_to_user() with a spinlock held or interrupts disabled.
To handle readers, you probably need a separate mutex.

Also, this function can return -EAGAIN even if O_NONBLOCK is not set if
kcs_bmc->data_in_avail changes between when you wait on the event
and when you check it under the lock.

You also clear data_in_avail even if the copy_to_user() fails, which is
wrong.

I believe the best way to handle this would be to have the spinlock
protect the inner workings of the state machine and a mutex handle
copying data out, setting/clearing the running flag (thus a mutex
instead of spinlock in open and release) and the ioctl settings (except
for abort where you will need to grab the spinlock).

After the wait event below, grab the mutex.  If data is not available
and O_NONBLOCK is not set, drop the mutex and retry.  Otherwise
this is the only place (besides release) that sets data_in_avail to false.
Do the copy_to_user(), grab the spinlock, clear data_in_avail and
data_in_idx, then release the lock and mutex.  If you are really
adventurous you can do this without grabbing the lock using
barriers, but it's probably not necessary here.

> +	if (!(filp->f_flags & O_NONBLOCK))
> +		wait_event_interruptible(kcs_bmc->queue,
> +					 kcs_bmc->data_in_avail);
> +
> +	spin_lock_irq(&kcs_bmc->lock);
> +
> +	if (kcs_bmc->data_in_avail) {
> +		kcs_bmc->data_in_avail = false;
> +
> +		if (count > kcs_bmc->data_in_idx)
> +			count = kcs_bmc->data_in_idx;
> +
> +		if (!copy_to_user(buf, kcs_bmc->data_in, count))
> +			ret = count;
> +		else
> +			ret = -EFAULT;
> +	}
> +
> +	spin_unlock_irq(&kcs_bmc->lock);
> +
> +	return ret;
> +}
> +
> +static ssize_t kcs_bmc_write(struct file *filp, const char *buf,
> +			     size_t count, loff_t *offset)
> +{
> +	struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
> +	ssize_t ret = count;
> +
> +	if (count < 1 || count > KCS_MSG_BUFSIZ)
> +		return -EINVAL;
> +
> +	spin_lock_irq(&kcs_bmc->lock);
> +
> +	if (kcs_bmc->phase == KCS_PHASE_WAIT_READ) {
> +		if (copy_from_user(kcs_bmc->data_out, buf, count)) {
> +			spin_unlock_irq(&kcs_bmc->lock);
> +			return -EFAULT;
> +		}
> +
> +		kcs_bmc->phase = KCS_PHASE_READ;
> +		kcs_bmc->data_out_idx = 1;
> +		kcs_bmc->data_out_len = count;
> +		write_data(kcs_bmc, kcs_bmc->data_out[0]);
> +	} else if (kcs_bmc->phase == KCS_PHASE_READ) {
> +		ret = -EBUSY;
> +	} else {
> +		ret = -EINVAL;

Is there a reason you return -EINVAL here?  Why not just -EBUSY in all
cases?  Is there something that userland will need to do differently?

> +	}
> +
> +	spin_unlock_irq(&kcs_bmc->lock);
> +
> +	return ret;
> +}
> +
> +static long kcs_bmc_ioctl(struct file *filp, unsigned int cmd,
> +			  unsigned long arg)
> +{
> +	struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
> +	long ret = 0;
> +
> +	spin_lock_irq(&kcs_bmc->lock);
> +
> +	switch (cmd) {
> +	case IPMI_BMC_IOCTL_SET_SMS_ATN:
> +		update_status_bits(kcs_bmc, KCS_STATUS_SMS_ATN,
> +						KCS_STATUS_SMS_ATN);
> +		break;
> +
> +	case IPMI_BMC_IOCTL_CLEAR_SMS_ATN:
> +		update_status_bits(kcs_bmc, KCS_STATUS_SMS_ATN,
> +						0);
> +		break;
> +
> +	case IPMI_BMC_IOCTL_FORCE_ABORT:
> +		set_state(kcs_bmc, ERROR_STATE);
> +		read_data(kcs_bmc);
> +		write_data(kcs_bmc, KCS_ZERO_DATA);
> +
> +		kcs_bmc->phase = KCS_PHASE_ERROR;
> +		kcs_bmc->data_in_avail = false;
> +		break;
> +
> +	default:
> +		ret = -EINVAL;
> +		break;
> +	}
> +
> +	spin_unlock_irq(&kcs_bmc->lock);
> +
> +	return ret;
> +}
> +
> +static int kcs_bmc_release(struct inode *inode, struct file *filp)
> +{
> +	struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
> +

What happens if the device gets closed in the middle of a transaction?  
That's
an important case to handle.  If something is in process, you need to 
abort it.

> +	spin_lock_irq(&kcs_bmc->lock);
> +
> +	kcs_bmc->running = 0;
> +
> +	spin_unlock_irq(&kcs_bmc->lock);
> +
> +	return 0;
> +}
> +
> +static const struct file_operations kcs_bmc_fops = {
> +	.owner          = THIS_MODULE,
> +	.open           = kcs_bmc_open,
> +	.read           = kcs_bmc_read,
> +	.write          = kcs_bmc_write,
> +	.release        = kcs_bmc_release,
> +	.poll           = kcs_bmc_poll,
> +	.unlocked_ioctl = kcs_bmc_ioctl,
> +};
> +
> +struct kcs_bmc *kcs_bmc_alloc(struct device *dev, int sizeof_priv, u32 channel)
> +{
> +	struct kcs_bmc *kcs_bmc;
> +	int rc;
> +
> +	kcs_bmc = devm_kzalloc(dev, sizeof(*kcs_bmc) + sizeof_priv, GFP_KERNEL);
> +	if (!kcs_bmc)
> +		return NULL;
> +
> +	dev_set_name(dev, "ipmi-kcs%u", channel);
> +
> +	spin_lock_init(&kcs_bmc->lock);
> +	kcs_bmc->channel = channel;
> +
> +	init_waitqueue_head(&kcs_bmc->queue);
> +	kcs_bmc->data_in  = devm_kmalloc(dev, KCS_MSG_BUFSIZ, GFP_KERNEL);
> +	kcs_bmc->data_out = devm_kmalloc(dev, KCS_MSG_BUFSIZ, GFP_KERNEL);
> +	if (kcs_bmc->data_in == NULL || kcs_bmc->data_out == NULL) {
> +		dev_err(dev, "Failed to allocate data buffers\n");
> +		return NULL;
> +	}
> +
> +	kcs_bmc->miscdev.minor = MISC_DYNAMIC_MINOR;
> +	kcs_bmc->miscdev.name = dev_name(dev);
> +	kcs_bmc->miscdev.fops = &kcs_bmc_fops;
> +
> +	return kcs_bmc;
> +}
> +EXPORT_SYMBOL(kcs_bmc_alloc);
> +
> +MODULE_LICENSE("GPL v2");
> +MODULE_AUTHOR("Haiyue Wang <haiyue.wang@linux.intel.com>");
> +MODULE_DESCRIPTION("KCS BMC to handle the IPMI request from system software");
> diff --git a/drivers/char/ipmi/kcs_bmc.h b/drivers/char/ipmi/kcs_bmc.h
> new file mode 100644
> index 0000000..b56c250
> --- /dev/null
> +++ b/drivers/char/ipmi/kcs_bmc.h
> @@ -0,0 +1,88 @@
> +// SPDX-License-Identifier: GPL-2.0
> +// Copyright (c) 2015-2018, Intel Corporation.
> +
> +#ifndef __KCS_BMC_H__
> +#define __KCS_BMC_H__
> +
> +#include <linux/miscdevice.h>
> +
> +/* Different phases of the KCS BMC module */
> +enum kcs_phases {
> +	/* BMC should not be expecting nor sending any data. */
> +	KCS_PHASE_IDLE,
> +
> +	/* BMC is receiving a packet from system software. */
> +	KCS_PHASE_WRITE,
> +
> +	/* BMC is waiting a last data byte from system software. */
> +	KCS_PHASE_WRITE_END,
> +
> +	/* BMC is waiting the response from the upper IPMI service. */
> +	KCS_PHASE_WAIT_READ,
> +
> +	/* BMC is transferring the response to system software. */
> +	KCS_PHASE_READ,
> +
> +	/* BMC is waiting error status request from system software. */
> +	KCS_PHASE_ABORT_ERROR1,
> +
> +	/* BMC is waiting for idle status afer error from system software. */
> +	KCS_PHASE_ABORT_ERROR2,
> +
> +	/* BMC has detected a protocol violation at the interface level. */
> +	KCS_PHASE_ERROR
> +};
> +
> +/* IPMI 2.0 - Table 9-4, KCS Interface Status Codes */
> +enum kcs_errors {
> +	KCS_NO_ERROR                = 0x00,
> +	KCS_ABORTED_BY_COMMAND      = 0x01,
> +	KCS_ILLEGAL_CONTROL_CODE    = 0x02,
> +	KCS_LENGTH_ERROR            = 0x06,
> +	KCS_UNSPECIFIED_ERROR       = 0xFF
> +};
> +
> +/* IPMI 2.0 - 9.5, KCS Interface Registers */
> +struct kcs_ioreg {
> +	u32 idr; /* Input Data Register */
> +	u32 odr; /* Output Data Register */
> +	u32 str; /* Status Register */
> +};
> +
> +struct kcs_bmc {
> +	spinlock_t lock;
> +
> +	u32 channel;
> +	int running;
> +
> +	/* Setup by BMC KCS controller driver */
> +	struct kcs_ioreg ioreg;
> +	u8 (*io_inputb)(struct kcs_bmc *kcs_bmc, u32 reg);
> +	void (*io_outputb)(struct kcs_bmc *kcs_bmc, u32 reg, u8 b);
> +
> +	enum kcs_phases phase;
> +	enum kcs_errors error;
> +
> +	wait_queue_head_t queue;
> +	bool data_in_avail;
> +	int  data_in_idx;
> +	u8  *data_in;
> +
> +	int  data_out_idx;
> +	int  data_out_len;
> +	u8  *data_out;
> +
> +	struct miscdevice miscdev;
> +
> +	unsigned long long priv[];
> +};
> +
> +static inline void *kcs_bmc_priv(const struct kcs_bmc *kcs_bmc)
> +{
> +	return kcs_bmc->priv;
> +}
> +
> +extern int kcs_bmc_handle_event(struct kcs_bmc *kcs_bmc);
> +extern struct kcs_bmc *kcs_bmc_alloc(struct device *dev, int sizeof_priv,
> +					u32 channel);
> +#endif
> diff --git a/drivers/char/ipmi/kcs_bmc_aspeed.c b/drivers/char/ipmi/kcs_bmc_aspeed.c
> new file mode 100644
> index 0000000..5002de3
> --- /dev/null
> +++ b/drivers/char/ipmi/kcs_bmc_aspeed.c
> @@ -0,0 +1,319 @@
> +// SPDX-License-Identifier: GPL-2.0
> +// Copyright (c) 2015-2018, Intel Corporation.
> +
> +#define pr_fmt(fmt) "aspeed-kcs-bmc: " fmt
> +
> +#include <linux/atomic.h>
> +#include <linux/errno.h>
> +#include <linux/interrupt.h>
> +#include <linux/io.h>
> +#include <linux/mfd/syscon.h>
> +#include <linux/module.h>
> +#include <linux/of.h>
> +#include <linux/platform_device.h>
> +#include <linux/poll.h>
> +#include <linux/regmap.h>
> +#include <linux/sched.h>
> +#include <linux/slab.h>
> +#include <linux/timer.h>
> +
> +#include "kcs_bmc.h"
> +
> +
> +#define DEVICE_NAME     "ast-kcs-bmc"
> +
> +#define KCS_CHANNEL_MAX     4
> +
> +/* mapped to lpc-bmc@0 IO space */
> +#define LPC_HICR0            0x000
> +#define     LPC_HICR0_LPC3E          BIT(7)
> +#define     LPC_HICR0_LPC2E          BIT(6)
> +#define     LPC_HICR0_LPC1E          BIT(5)
> +#define LPC_HICR2            0x008
> +#define     LPC_HICR2_IBFIF3         BIT(3)
> +#define     LPC_HICR2_IBFIF2         BIT(2)
> +#define     LPC_HICR2_IBFIF1         BIT(1)
> +#define LPC_HICR4            0x010
> +#define     LPC_HICR4_LADR12AS       BIT(7)
> +#define     LPC_HICR4_KCSENBL        BIT(2)
> +#define LPC_LADR3H           0x014
> +#define LPC_LADR3L           0x018
> +#define LPC_LADR12H          0x01C
> +#define LPC_LADR12L          0x020
> +#define LPC_IDR1             0x024
> +#define LPC_IDR2             0x028
> +#define LPC_IDR3             0x02C
> +#define LPC_ODR1             0x030
> +#define LPC_ODR2             0x034
> +#define LPC_ODR3             0x038
> +#define LPC_STR1             0x03C
> +#define LPC_STR2             0x040
> +#define LPC_STR3             0x044
> +
> +/* mapped to lpc-host@80 IO space */
> +#define LPC_HICRB            0x080
> +#define     LPC_HICRB_IBFIF4         BIT(1)
> +#define     LPC_HICRB_LPC4E          BIT(0)
> +#define LPC_LADR4            0x090
> +#define LPC_IDR4             0x094
> +#define LPC_ODR4             0x098
> +#define LPC_STR4             0x09C
> +
> +struct aspeed_kcs_bmc {
> +	struct regmap *map;
> +};
> +
> +
> +static u8 aspeed_kcs_inb(struct kcs_bmc *kcs_bmc, u32 reg)
> +{
> +	struct aspeed_kcs_bmc *priv = kcs_bmc_priv(kcs_bmc);
> +	u32 val = 0;
> +	int rc;
> +
> +	rc = regmap_read(priv->map, reg, &val);
> +	WARN(rc != 0, "regmap_read() failed: %d\n", rc);
> +
> +	return rc == 0 ? (u8) val : 0;
> +}
> +
> +static void aspeed_kcs_outb(struct kcs_bmc *kcs_bmc, u32 reg, u8 data)
> +{
> +	struct aspeed_kcs_bmc *priv = kcs_bmc_priv(kcs_bmc);
> +	int rc;
> +
> +	rc = regmap_write(priv->map, reg, data);
> +	WARN(rc != 0, "regmap_write() failed: %d\n", rc);
> +}
> +
> +
> +/*
> + * AST_usrGuide_KCS.pdf
> + * 2. Background:
> + *   we note D for Data, and C for Cmd/Status, default rules are
> + *     A. KCS1 / KCS2 ( D / C:X / X+4 )
> + *        D / C : CA0h / CA4h
> + *        D / C : CA8h / CACh
> + *     B. KCS3 ( D / C:XX2h / XX3h )
> + *        D / C : CA2h / CA3h
> + *        D / C : CB2h / CB3h
> + *     C. KCS4
> + *        D / C : CA4h / CA5h
> + */
> +static void aspeed_kcs_set_address(struct kcs_bmc *kcs_bmc, u16 addr)
> +{
> +	struct aspeed_kcs_bmc *priv = kcs_bmc_priv(kcs_bmc);
> +
> +	switch (kcs_bmc->channel) {
> +	case 1:
> +		regmap_update_bits(priv->map, LPC_HICR4,
> +				LPC_HICR4_LADR12AS, 0);
> +		regmap_write(priv->map, LPC_LADR12H, addr >> 8);
> +		regmap_write(priv->map, LPC_LADR12L, addr & 0xFF);
> +		break;
> +
> +	case 2:
> +		regmap_update_bits(priv->map, LPC_HICR4,
> +				LPC_HICR4_LADR12AS, LPC_HICR4_LADR12AS);
> +		regmap_write(priv->map, LPC_LADR12H, addr >> 8);
> +		regmap_write(priv->map, LPC_LADR12L, addr & 0xFF);
> +		break;
> +
> +	case 3:
> +		regmap_write(priv->map, LPC_LADR3H, addr >> 8);
> +		regmap_write(priv->map, LPC_LADR3L, addr & 0xFF);
> +		break;
> +
> +	case 4:
> +		regmap_write(priv->map, LPC_LADR4, ((addr + 1) << 16) |
> +			addr);
> +		break;
> +
> +	default:
> +		break;
> +	}
> +}
> +
> +static void aspeed_kcs_enable_channel(struct kcs_bmc *kcs_bmc, bool enable)
> +{
> +	struct aspeed_kcs_bmc *priv = kcs_bmc_priv(kcs_bmc);
> +
> +	switch (kcs_bmc->channel) {
> +	case 1:
> +		if (enable) {
> +			regmap_update_bits(priv->map, LPC_HICR2,
> +					LPC_HICR2_IBFIF1, LPC_HICR2_IBFIF1);
> +			regmap_update_bits(priv->map, LPC_HICR0,
> +					LPC_HICR0_LPC1E, LPC_HICR0_LPC1E);
> +		} else {
> +			regmap_update_bits(priv->map, LPC_HICR0,
> +					LPC_HICR0_LPC1E, 0);
> +			regmap_update_bits(priv->map, LPC_HICR2,
> +					LPC_HICR2_IBFIF1, 0);
> +		}
> +		break;
> +
> +	case 2:
> +		if (enable) {
> +			regmap_update_bits(priv->map, LPC_HICR2,
> +					LPC_HICR2_IBFIF2, LPC_HICR2_IBFIF2);
> +			regmap_update_bits(priv->map, LPC_HICR0,
> +					LPC_HICR0_LPC2E, LPC_HICR0_LPC2E);
> +		} else {
> +			regmap_update_bits(priv->map, LPC_HICR0,
> +					LPC_HICR0_LPC2E, 0);
> +			regmap_update_bits(priv->map, LPC_HICR2,
> +					LPC_HICR2_IBFIF2, 0);
> +		}
> +		break;
> +
> +	case 3:
> +		if (enable) {
> +			regmap_update_bits(priv->map, LPC_HICR2,
> +					LPC_HICR2_IBFIF3, LPC_HICR2_IBFIF3);
> +			regmap_update_bits(priv->map, LPC_HICR0,
> +					LPC_HICR0_LPC3E, LPC_HICR0_LPC3E);
> +			regmap_update_bits(priv->map, LPC_HICR4,
> +					LPC_HICR4_KCSENBL, LPC_HICR4_KCSENBL);
> +		} else {
> +			regmap_update_bits(priv->map, LPC_HICR0,
> +					LPC_HICR0_LPC3E, 0);
> +			regmap_update_bits(priv->map, LPC_HICR4,
> +					LPC_HICR4_KCSENBL, 0);
> +			regmap_update_bits(priv->map, LPC_HICR2,
> +					LPC_HICR2_IBFIF3, 0);
> +		}
> +		break;
> +
> +	case 4:
> +		if (enable)
> +			regmap_update_bits(priv->map, LPC_HICRB,
> +					LPC_HICRB_IBFIF4 | LPC_HICRB_LPC4E,
> +					LPC_HICRB_IBFIF4 | LPC_HICRB_LPC4E);
> +		else
> +			regmap_update_bits(priv->map, LPC_HICRB,
> +					LPC_HICRB_IBFIF4 | LPC_HICRB_LPC4E,
> +					0);
> +		break;
> +
> +	default:
> +		break;
> +	}
> +}
> +
> +static irqreturn_t aspeed_kcs_irq(int irq, void *arg)
> +{
> +	struct kcs_bmc *kcs_bmc = arg;
> +
> +	if (!kcs_bmc_handle_event(kcs_bmc))
> +		return IRQ_HANDLED;
> +
> +	return IRQ_NONE;
> +}
> +
> +static int aspeed_kcs_config_irq(struct kcs_bmc *kcs_bmc,
> +			struct platform_device *pdev)
> +{
> +	struct device *dev = &pdev->dev;
> +	int irq;
> +
> +	irq = platform_get_irq(pdev, 0);
> +	if (irq < 0)
> +		return irq;
> +
> +	return devm_request_irq(dev, irq, aspeed_kcs_irq, IRQF_SHARED,
> +				dev_name(dev), kcs_bmc);
> +}
> +
> +static const struct kcs_ioreg ast_kcs_bmc_ioregs[KCS_CHANNEL_MAX] = {
> +	{ .idr = LPC_IDR1, .odr = LPC_ODR1, .str = LPC_STR1 },
> +	{ .idr = LPC_IDR2, .odr = LPC_ODR2, .str = LPC_STR2 },
> +	{ .idr = LPC_IDR3, .odr = LPC_ODR3, .str = LPC_STR3 },
> +	{ .idr = LPC_IDR4, .odr = LPC_ODR4, .str = LPC_STR4 },
> +};
> +
> +static int aspeed_kcs_probe(struct platform_device *pdev)
> +{
> +	struct device *dev = &pdev->dev;
> +	struct aspeed_kcs_bmc *priv;
> +	struct kcs_bmc *kcs_bmc;
> +	u32 chan, addr;
> +	int rc;
> +
> +	rc = of_property_read_u32(dev->of_node, "kcs_chan", &chan);
> +	if ((rc != 0) || (chan == 0 || chan > KCS_CHANNEL_MAX)) {
> +		dev_err(dev, "no valid 'kcs_chan' configured\n");
> +		return -ENODEV;
> +	}
> +
> +	rc = of_property_read_u32(dev->of_node, "kcs_addr", &addr);
> +	if (rc) {
> +		dev_err(dev, "no valid 'kcs_addr' configured\n");
> +		return -ENODEV;
> +	}
> +
> +	kcs_bmc = kcs_bmc_alloc(dev, sizeof(*priv), chan);
> +	if (!kcs_bmc)
> +		return -ENOMEM;
> +
> +	priv = kcs_bmc_priv(kcs_bmc);
> +	priv->map = syscon_node_to_regmap(dev->parent->of_node);
> +	if (IS_ERR(priv->map)) {
> +		dev_err(dev, "Couldn't get regmap\n");
> +		return -ENODEV;
> +	}
> +
> +	kcs_bmc->ioreg = ast_kcs_bmc_ioregs[chan - 1];
> +	kcs_bmc->io_inputb = aspeed_kcs_inb;
> +	kcs_bmc->io_outputb = aspeed_kcs_outb;
> +
> +	dev_set_drvdata(dev, kcs_bmc);
> +
> +	aspeed_kcs_set_address(kcs_bmc, addr);
> +	aspeed_kcs_enable_channel(kcs_bmc, true);
> +	rc = aspeed_kcs_config_irq(kcs_bmc, pdev);
> +	if (rc)
> +		return rc;
> +
> +	rc = misc_register(&kcs_bmc->miscdev);
> +	if (rc) {
> +		dev_err(dev, "Unable to register device\n");
> +		return rc;
> +	}
> +
> +	pr_info("channel=%u address=0x%x idr=0x%x odr=0x%x str=0x%x\n",
> +		chan, addr,
> +		kcs_bmc->ioreg.idr, kcs_bmc->ioreg.odr, kcs_bmc->ioreg.str);
> +
> +	return 0;
> +}
> +
> +static int aspeed_kcs_remove(struct platform_device *pdev)
> +{
> +	struct kcs_bmc *kcs_bmc = dev_get_drvdata(&pdev->dev);
> +
> +	misc_deregister(&kcs_bmc->miscdev);
> +
> +	return 0;
> +}
> +
> +static const struct of_device_id ast_kcs_bmc_match[] = {
> +	{ .compatible = "aspeed,ast2400-kcs-bmc" },
> +	{ .compatible = "aspeed,ast2500-kcs-bmc" },
> +	{ }
> +};
> +
> +static struct platform_driver ast_kcs_bmc_driver = {
> +	.driver = {
> +		.name           = DEVICE_NAME,
> +		.of_match_table = ast_kcs_bmc_match,
> +	},
> +	.probe = aspeed_kcs_probe,
> +	.remove = aspeed_kcs_remove,
> +};
> +
> +module_platform_driver(ast_kcs_bmc_driver);
> +
> +MODULE_DEVICE_TABLE(of, ast_kcs_bmc_match);
> +MODULE_LICENSE("GPL v2");
> +MODULE_AUTHOR("Haiyue Wang <haiyue.wang@linux.intel.com>");
> +MODULE_DESCRIPTION("Aspeed device interface to the KCS BMC device");
> diff --git a/include/uapi/linux/ipmi_bmc.h b/include/uapi/linux/ipmi_bmc.h
> new file mode 100644
> index 0000000..2f9f97e
> --- /dev/null
> +++ b/include/uapi/linux/ipmi_bmc.h
> @@ -0,0 +1,14 @@
> +// SPDX-License-Identifier: GPL-2.0
> +// Copyright (c) 2015-2018, Intel Corporation.
> +
> +#ifndef _UAPI_LINUX_IPMI_BMC_H
> +#define _UAPI_LINUX_IPMI_BMC_H
> +
> +#include <linux/ioctl.h>
> +
> +#define __IPMI_BMC_IOCTL_MAGIC        0xB1
> +#define IPMI_BMC_IOCTL_SET_SMS_ATN    _IO(__IPMI_BMC_IOCTL_MAGIC, 0x00)
> +#define IPMI_BMC_IOCTL_CLEAR_SMS_ATN  _IO(__IPMI_BMC_IOCTL_MAGIC, 0x01)
> +#define IPMI_BMC_IOCTL_FORCE_ABORT    _IO(__IPMI_BMC_IOCTL_MAGIC, 0x02)
> +
> +#endif /* _UAPI_LINUX_KCS_BMC_H */
Wang, Haiyue Jan. 26, 2018, 5:33 a.m. | #3
On 2018-01-25 01:05, Andy Shevchenko wrote:
> On Thu, 2018-01-25 at 00:06 +0800, Haiyue Wang wrote:
>> The KCS (Keyboard Controller Style) interface is used to perform in-
>> band
>> IPMI communication between a server host and its BMC (BaseBoard
>> Management
>> Controllers).
>>
>>
>> +config ASPEED_KCS_IPMI_BMC
>> +	depends on ARCH_ASPEED || COMPILE_TEST
>> +	depends on IPMI_KCS_BMC
>> +	select REGMAP_MMIO
>> +	tristate "Aspeed KCS IPMI BMC driver"
>> +	help
>> +	  Provides a driver for the KCS (Keyboard Controller Style)
>> IPMI
>> +	  interface found on Aspeed SOCs (AST2400 and AST2500).
>> +
>> +	  The driver implements the BMC side of the KCS contorller,
>> it
>> +	  provides the access of KCS IO space for BMC side.
>> +static inline u8 read_data(struct kcs_bmc *kcs_bmc)
>> +{
>> +	return kcs_bmc->io_inputb(kcs_bmc, kcs_bmc->ioreg.idr);
>> +}
>> +
>> +static inline void write_data(struct kcs_bmc *kcs_bmc, u8 data)
>> +{
>> +	kcs_bmc->io_outputb(kcs_bmc, kcs_bmc->ioreg.odr, data);
>> +}
>> +
>> +static inline u8 read_status(struct kcs_bmc *kcs_bmc)
>> +{
>> +	return kcs_bmc->io_inputb(kcs_bmc, kcs_bmc->ioreg.str);
>> +}
>> +
>> +static inline void write_status(struct kcs_bmc *kcs_bmc, u8 data)
>> +{
>> +	kcs_bmc->io_outputb(kcs_bmc, kcs_bmc->ioreg.str, data);
>> +}
>> +
>> +static void update_status_bits(struct kcs_bmc *kcs_bmc, u8 mask, u8
>> val)
>> +{
>> +	u8 tmp;
>> +
>> +	tmp = read_status(kcs_bmc);
>> +
>> +	tmp &= ~mask;
>> +	tmp |= val & mask;
>> +
>> +	write_status(kcs_bmc, tmp);
>> +}
> Shouldn't be above some kind of regmap API?
It is KCS spec defined IO access for hidden the low level, if the low 
level supports regmap, such as in kcs_bmc_aspeed.c,
aspeed_kcs_inb & aspeed_kcs_outb.
>
>> +/* Different phases of the KCS BMC module */
>> +enum kcs_phases {
>> +	/* BMC should not be expecting nor sending any data. */
>> +	KCS_PHASE_IDLE,
> Perhaps kernel-doc?
Code + inline comments should be better than kernel-doc ? Or move it out 
like :

/* The interface for checksum offload between the stack and networking 
drivers
  * is as follows...
  *
  * A. IP checksum related features
  *
  * Drivers advertise checksum offload capabilities in the features of a 
device.
  * From the stack's point of view these are capabilities offered by the 
driver,
  * a driver typically only advertises features that it is capable of 
offloading
  * to its device.
  *
  * The checksum related features are:
  *
  *    NETIF_F_HW_CSUM    - The driver (or its device) is able to 
compute one
  *              IP (one's complement) checksum for any combination
  *              of protocols or protocol layering. The checksum is
  *              computed and set in a packet per the CHECKSUM_PARTIAL
  *              interface (see below).
  *
  *    NETIF_F_IP_CSUM - Driver (device) is only able to checksum plain
  *              TCP or UDP packets over IPv4. These are specifically
  *              unencapsulated packets of the form IPv4|TCP or
  *              IPv4|UDP where the Protocol field in the IPv4 header
  *              is TCP or UDP. The IPv4 header may contain IP options
  *              This feature cannot be set in features for a device
  *              with NETIF_F_HW_CSUM also set. This feature is being
  *              DEPRECATED (see below).
>> +};
>
>> +/* IPMI 2.0 - 9.5, KCS Interface Registers */
>> +struct kcs_ioreg {
>> +	u32 idr; /* Input Data Register */
>> +	u32 odr; /* Output Data Register */
>> +	u32 str; /* Status Register */
> kernel-doc
>> +};
>> +
>> +static inline void *kcs_bmc_priv(const struct kcs_bmc *kcs_bmc)
>> +{
>> +	return kcs_bmc->priv;
>> +}
>> +
>> +extern int kcs_bmc_handle_event(struct kcs_bmc *kcs_bmc);
>> +extern struct kcs_bmc *kcs_bmc_alloc(struct device *dev, int
>> sizeof_priv,
>> +					u32 channel);
> Drop extern.
After dropping extern, it truly passed compilation, have any special 
reason to drop 'extern' ?
I saw in kernel still use extern like : extern void printk_nmi_init(void);
>> +#endif
> Next one could be reviewed when you split this patch to two.
Got it!
Wang, Haiyue Jan. 26, 2018, 6:08 a.m. | #4
On 2018-01-25 01:48, Corey Minyard wrote:
> On 01/24/2018 10:06 AM, Haiyue Wang wrote:
>> The KCS (Keyboard Controller Style) interface is used to perform in-band
>> IPMI communication between a server host and its BMC (BaseBoard 
>> Management
>> Controllers).
>>
>> This driver exposes the KCS interface on ASpeed SOCs (AST2400 and 
>> AST2500)
>> as a character device. Such SOCs are commonly used as BMCs and this 
>> driver
>> implements the BMC side of the KCS interface.
>>
>> Signed-off-by: Haiyue Wang <haiyue.wang@linux.intel.com>
>>
>> ---
>> v1->v2
>>
>> - Divide the driver into two parts, one handles the BMC KCS IPMI 2.0 
>> state;
>>    the other handles the BMC KCS controller such as AST2500 IO 
>> accessing.
>> - Use the spin lock APIs to handle the device file operations and BMC 
>> chip
>>    IRQ inferface for accessing the same KCS BMC data structure.
>> - Enhanced the phases handling of the KCS BMC.
>> - Unified the IOCTL definition for IPMI BMC, it will be used by KCS 
>> and BT.
>>
>> ---
>>
>> +
>> +static void kcs_bmc_handle_data(struct kcs_bmc *kcs_bmc)
>> +{
>> +    u8 data;
>> +
>> +    switch (kcs_bmc->phase) {
>> +    case KCS_PHASE_WRITE:
>> +        set_state(kcs_bmc, WRITE_STATE);
>> +
>> +        /* set OBF before reading data */
>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>> +
>> +        if (kcs_bmc->data_in_idx < KCS_MSG_BUFSIZ)
>> +            kcs_bmc->data_in[kcs_bmc->data_in_idx++] =
>> +                        read_data(kcs_bmc);
>> +        break;
>> +
>> +    case KCS_PHASE_WRITE_END:
>> +        set_state(kcs_bmc, READ_STATE);
>> +
>> +        if (kcs_bmc->data_in_idx < KCS_MSG_BUFSIZ)
>> +            kcs_bmc->data_in[kcs_bmc->data_in_idx++] =
>> +                        read_data(kcs_bmc);
>> +
>> +        kcs_bmc->phase = KCS_PHASE_WAIT_READ;
>> +        if (kcs_bmc->running) {
>
> Why do you only do this when running is set?  It won't hurt anything 
> if it's not
> set.  As it is, you have a race if something opens the device while 
> this code
> runs.
>
> Also, don't set the state to wait read until the "write" has finished 
> (userland has
> read the data out of the buffer.  More on that later.
>
Understood.
>> +            kcs_bmc->data_in_avail = true;
>> +            wake_up_interruptible(&kcs_bmc->queue);
>> +        }
>> +        break;
>> +
>> +    case KCS_PHASE_READ:
>> +        if (kcs_bmc->data_out_idx == kcs_bmc->data_out_len)
>> +            set_state(kcs_bmc, IDLE_STATE);
>> +
>> +        data = read_data(kcs_bmc);
>> +        if (data != KCS_CMD_READ_BYTE) {
>> +            set_state(kcs_bmc, ERROR_STATE);
>> +            write_data(kcs_bmc, KCS_ZERO_DATA);
>> +            break;
>> +        }
>> +
>> +        if (kcs_bmc->data_out_idx == kcs_bmc->data_out_len) {
>> +            write_data(kcs_bmc, KCS_ZERO_DATA);
>> +            kcs_bmc->phase = KCS_PHASE_IDLE;
>> +            break;
>> +        }
>> +
>> +        write_data(kcs_bmc,
>> +            kcs_bmc->data_out[kcs_bmc->data_out_idx++]);
>> +        break;
>> +
>> +    case KCS_PHASE_ABORT_ERROR1:
>> +        set_state(kcs_bmc, READ_STATE);
>> +
>> +        /* Read the Dummy byte */
>> +        read_data(kcs_bmc);
>> +
>> +        write_data(kcs_bmc, kcs_bmc->error);
>> +        kcs_bmc->phase = KCS_PHASE_ABORT_ERROR2;
>> +        break;
>> +
>> +    case KCS_PHASE_ABORT_ERROR2:
>> +        set_state(kcs_bmc, IDLE_STATE);
>> +
>> +        /* Read the Dummy byte */
>> +        read_data(kcs_bmc);
>> +
>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>> +        kcs_bmc->phase = KCS_PHASE_IDLE;
>> +
>> +        break;
>> +
>> +    default:
>> +        set_state(kcs_bmc, ERROR_STATE);
>> +
>> +        /* Read the Dummy byte */
>> +        read_data(kcs_bmc);
>> +
>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>> +        break;
>> +    }
>> +}
>> +
>> +static void kcs_bmc_handle_command(struct kcs_bmc *kcs_bmc)
>> +{
>> +    u8 cmd;
>> +
>> +    set_state(kcs_bmc, WRITE_STATE);
>> +
>> +    /* Dummy data to generate OBF */
>> +    write_data(kcs_bmc, KCS_ZERO_DATA);
>> +
>> +    cmd = read_data(kcs_bmc);
>
> Shouldn't you check the phase in all the cases below and do error
> handling if the phase isn't correct?
>
> Similar thing if the device here isn't open.  You need to handle
> that gracefully.
>
> Also, you should remove data_in_avail and data_in_idx setting from
> here, for reasons I will explain later.
>
If host software sends the data twice such as a retry before the BMC's 
IPMI service starts,
then the two IPMI requests will be merged into one, if not clear 
data_in_idx after receving
KCS_CMD_WRITE_START. Most of the states are driven by host software 
(SMS). :(
>> +    switch (cmd) {
>> +    case KCS_CMD_WRITE_START:
>> +        kcs_bmc->data_in_avail = false;
>> +        kcs_bmc->data_in_idx   = 0;
>> +        kcs_bmc->phase         = KCS_PHASE_WRITE;
>> +        kcs_bmc->error         = KCS_NO_ERROR;
>> +        break;
>> +
>> +    case KCS_CMD_WRITE_END:
>> +        kcs_bmc->phase = KCS_PHASE_WRITE_END;
>> +        break;
>> +
>> +    case KCS_CMD_ABORT:
>> +        if (kcs_bmc->error == KCS_NO_ERROR)
>> +            kcs_bmc->error = KCS_ABORTED_BY_COMMAND;
>> +
>> +        kcs_bmc->phase = KCS_PHASE_ABORT_ERROR1;
>> +        break;
>> +
>> +    default:
>> +        kcs_bmc->error = KCS_ILLEGAL_CONTROL_CODE;
>> +        set_state(kcs_bmc, ERROR_STATE);
>> +        write_data(kcs_bmc, kcs_bmc->error);
>> +        kcs_bmc->phase = KCS_PHASE_ERROR;
>> +        break;
>> +    }
>> +}
>> +
>> +int kcs_bmc_handle_event(struct kcs_bmc *kcs_bmc)
>> +{
>> +    unsigned long flags;
>> +    int ret = 0;
>> +    u8 status;
>> +
>> +    spin_lock_irqsave(&kcs_bmc->lock, flags);
>> +
>> +    status = read_status(kcs_bmc) & (KCS_STATUS_IBF | 
>> KCS_STATUS_CMD_DAT);
>> +
>> +    switch (status) {
>> +    case KCS_STATUS_IBF | KCS_STATUS_CMD_DAT:
>> +        kcs_bmc_handle_command(kcs_bmc);
>> +        break;
>> +
>> +    case KCS_STATUS_IBF:
>> +        kcs_bmc_handle_data(kcs_bmc);
>> +        break;
>> +
>> +    default:
>> +        ret = -1;
>> +        break;
>> +    }
>> +
>> +    spin_unlock_irqrestore(&kcs_bmc->lock, flags);
>> +
>> +    return ret;
>> +}
>> +EXPORT_SYMBOL(kcs_bmc_handle_event);
>> +
>> +static inline struct kcs_bmc *file_kcs_bmc(struct file *filp)
>> +{
>> +    return container_of(filp->private_data, struct kcs_bmc, miscdev);
>> +}
>> +
>> +static int kcs_bmc_open(struct inode *inode, struct file *filp)
>> +{
>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>> +    int ret = 0;
>> +
>> +    spin_lock_irq(&kcs_bmc->lock);
>> +
>> +    if (!kcs_bmc->running) {
>> +        kcs_bmc->running       = 1;
>> +        kcs_bmc->phase         = KCS_PHASE_IDLE;
>> +        kcs_bmc->data_in_avail = false;
>
> If you do everything right, setting the phase and data_in_avail should 
> not
> be necessary here.
>
>> +    } else {
>> +        ret = -EBUSY;
>> +    }
>> +
>> +    spin_unlock_irq(&kcs_bmc->lock);
>> +
>> +    return ret;
>> +}
>> +
>> +static unsigned int kcs_bmc_poll(struct file *filp, poll_table *wait)
>> +{
>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>> +    unsigned int mask = 0;
>> +
>> +    poll_wait(filp, &kcs_bmc->queue, wait);
>> +
>> +    spin_lock_irq(&kcs_bmc->lock);
>> +
>> +    if (kcs_bmc->data_in_avail)
>> +        mask |= POLLIN;
>> +
>> +    spin_unlock_irq(&kcs_bmc->lock);
>> +
>> +    return mask;
>> +}
>> +
>> +static ssize_t kcs_bmc_read(struct file *filp, char *buf,
>> +                size_t count, loff_t *offset)
>> +{
>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>> +    ssize_t ret = -EAGAIN;
>> +
>
> This function still has some issues.
>
> You can't call copy_to_user() with a spinlock held or interrupts 
> disabled.
> To handle readers, you probably need a separate mutex.
>
> Also, this function can return -EAGAIN even if O_NONBLOCK is not set if
> kcs_bmc->data_in_avail changes between when you wait on the event
> and when you check it under the lock.
>
> You also clear data_in_avail even if the copy_to_user() fails, which is
> wrong.
>
> I believe the best way to handle this would be to have the spinlock
> protect the inner workings of the state machine and a mutex handle
> copying data out, setting/clearing the running flag (thus a mutex
> instead of spinlock in open and release) and the ioctl settings (except
> for abort where you will need to grab the spinlock).
>
> After the wait event below, grab the mutex.  If data is not available
> and O_NONBLOCK is not set, drop the mutex and retry.  Otherwise
> this is the only place (besides release) that sets data_in_avail to 
> false.
> Do the copy_to_user(), grab the spinlock, clear data_in_avail and
> data_in_idx, then release the lock and mutex.  If you are really
> adventurous you can do this without grabbing the lock using
> barriers, but it's probably not necessary here.
>
>> +    if (!(filp->f_flags & O_NONBLOCK))
>> +        wait_event_interruptible(kcs_bmc->queue,
>> +                     kcs_bmc->data_in_avail);
>> +
>> +    spin_lock_irq(&kcs_bmc->lock);
>> +
>> +    if (kcs_bmc->data_in_avail) {
>> +        kcs_bmc->data_in_avail = false;
>> +
>> +        if (count > kcs_bmc->data_in_idx)
>> +            count = kcs_bmc->data_in_idx;
>> +
>> +        if (!copy_to_user(buf, kcs_bmc->data_in, count))
>> +            ret = count;
>> +        else
>> +            ret = -EFAULT;
>> +    }
>> +
>> +    spin_unlock_irq(&kcs_bmc->lock);
>> +
>> +    return ret;
>> +}
>> +
>> +static ssize_t kcs_bmc_write(struct file *filp, const char *buf,
>> +                 size_t count, loff_t *offset)
>> +{
>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>> +    ssize_t ret = count;
>> +
>> +    if (count < 1 || count > KCS_MSG_BUFSIZ)
>> +        return -EINVAL;
>> +
>> +    spin_lock_irq(&kcs_bmc->lock);
>> +
>> +    if (kcs_bmc->phase == KCS_PHASE_WAIT_READ) {
>> +        if (copy_from_user(kcs_bmc->data_out, buf, count)) {
>> +            spin_unlock_irq(&kcs_bmc->lock);
>> +            return -EFAULT;
>> +        }
>> +
>> +        kcs_bmc->phase = KCS_PHASE_READ;
>> +        kcs_bmc->data_out_idx = 1;
>> +        kcs_bmc->data_out_len = count;
>> +        write_data(kcs_bmc, kcs_bmc->data_out[0]);
>> +    } else if (kcs_bmc->phase == KCS_PHASE_READ) {
>> +        ret = -EBUSY;
>> +    } else {
>> +        ret = -EINVAL;
>
> Is there a reason you return -EINVAL here?  Why not just -EBUSY in all
> cases?  Is there something that userland will need to do differently?
>
>> +    }
>> +
>> +    spin_unlock_irq(&kcs_bmc->lock);
>> +
>> +    return ret;
>> +}
>> +
>> +static long kcs_bmc_ioctl(struct file *filp, unsigned int cmd,
>> +              unsigned long arg)
>> +{
>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>> +    long ret = 0;
>> +
>> +    spin_lock_irq(&kcs_bmc->lock);
>> +
>> +    switch (cmd) {
>> +    case IPMI_BMC_IOCTL_SET_SMS_ATN:
>> +        update_status_bits(kcs_bmc, KCS_STATUS_SMS_ATN,
>> +                        KCS_STATUS_SMS_ATN);
>> +        break;
>> +
>> +    case IPMI_BMC_IOCTL_CLEAR_SMS_ATN:
>> +        update_status_bits(kcs_bmc, KCS_STATUS_SMS_ATN,
>> +                        0);
>> +        break;
>> +
>> +    case IPMI_BMC_IOCTL_FORCE_ABORT:
>> +        set_state(kcs_bmc, ERROR_STATE);
>> +        read_data(kcs_bmc);
>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>> +
>> +        kcs_bmc->phase = KCS_PHASE_ERROR;
>> +        kcs_bmc->data_in_avail = false;
>> +        break;
>> +
>> +    default:
>> +        ret = -EINVAL;
>> +        break;
>> +    }
>> +
>> +    spin_unlock_irq(&kcs_bmc->lock);
>> +
>> +    return ret;
>> +}
>> +
>> +static int kcs_bmc_release(struct inode *inode, struct file *filp)
>> +{
>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>> +
>
> What happens if the device gets closed in the middle of a 
> transaction?  That's
> an important case to handle.  If something is in process, you need to 
> abort it.
>
The device just provides the read & write data, the transaction is 
handled in the KCS
controller's IRQ handler.
>> +    spin_lock_irq(&kcs_bmc->lock);
>> +
>> +    kcs_bmc->running = 0;
>> +
>> +    spin_unlock_irq(&kcs_bmc->lock);
>> +
>> +    return 0;
>> +}
>> +
Wang, Haiyue Jan. 26, 2018, 6:26 a.m. | #5
On 2018-01-25 01:48, Corey Minyard wrote:
> On 01/24/2018 10:06 AM, Haiyue Wang wrote:
>> The KCS (Keyboard Controller Style) interface is used to perform in-band
>> IPMI communication between a server host and its BMC (BaseBoard 
>> Management
>> Controllers).
>>
>> This driver exposes the KCS interface on ASpeed SOCs (AST2400 and 
>> AST2500)
>> as a character device. Such SOCs are commonly used as BMCs and this 
>> driver
>> implements the BMC side of the KCS interface.
>>
>> Signed-off-by: Haiyue Wang <haiyue.wang@linux.intel.com>
>>
>> ---
>
>> +
>> +static ssize_t kcs_bmc_read(struct file *filp, char *buf,
>> +                size_t count, loff_t *offset)
>> +{
>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>> +    ssize_t ret = -EAGAIN;
>> +
>
> This function still has some issues.
>
> You can't call copy_to_user() with a spinlock held or interrupts 
> disabled.
> To handle readers, you probably need a separate mutex.
>
> Also, this function can return -EAGAIN even if O_NONBLOCK is not set if
> kcs_bmc->data_in_avail changes between when you wait on the event
> and when you check it under the lock.
>
> You also clear data_in_avail even if the copy_to_user() fails, which is
> wrong.
>
> I believe the best way to handle this would be to have the spinlock
> protect the inner workings of the state machine and a mutex handle
> copying data out, setting/clearing the running flag (thus a mutex
> instead of spinlock in open and release) and the ioctl settings (except
> for abort where you will need to grab the spinlock).
>
> After the wait event below, grab the mutex.  If data is not available
> and O_NONBLOCK is not set, drop the mutex and retry.  Otherwise
> this is the only place (besides release) that sets data_in_avail to 
> false.
> Do the copy_to_user(), grab the spinlock, clear data_in_avail and
> data_in_idx, then release the lock and mutex.  If you are really
> adventurous you can do this without grabbing the lock using
> barriers, but it's probably not necessary here.
>
The main race is data_in and data_out memory copy from & to between one 
user-land (ipmid) and
the irq handler. If separates the copy_to_user into two parts: check the 
'access_ok(VERIFY_WRITE, to, n)',
if no errors, then grap the spinlock and irq disabled, then 
'memcpy((void __force *)to, from, n);' It it right
calling ?

I will add a mutex to avoid spinlcok using as possible.
>> +    if (!(filp->f_flags & O_NONBLOCK))
>> +        wait_event_interruptible(kcs_bmc->queue,
>> +                     kcs_bmc->data_in_avail);
>> +
>> +    spin_lock_irq(&kcs_bmc->lock);
>> +
>> +    if (kcs_bmc->data_in_avail) {
>> +        kcs_bmc->data_in_avail = false;
>> +
>> +        if (count > kcs_bmc->data_in_idx)
>> +            count = kcs_bmc->data_in_idx;
>> +
>> +        if (!copy_to_user(buf, kcs_bmc->data_in, count))
>> +            ret = count;
>> +        else
>> +            ret = -EFAULT;
>> +    }
>> +
>> +    spin_unlock_irq(&kcs_bmc->lock);
>> +
>> +    return ret;
>> +}
>> +
>
>> +    }
>> +
>> +    spin_unlock_irq(&kcs_bmc->lock);
>> +
>> +    return ret;
>> +}
>
Corey Minyard Jan. 26, 2018, 2:48 p.m. | #6
On 01/26/2018 12:08 AM, Wang, Haiyue wrote:
>
>
> On 2018-01-25 01:48, Corey Minyard wrote:
>> On 01/24/2018 10:06 AM, Haiyue Wang wrote:
>>> The KCS (Keyboard Controller Style) interface is used to perform 
>>> in-band
>>> IPMI communication between a server host and its BMC (BaseBoard 
>>> Management
>>> Controllers).
>>>
>>> This driver exposes the KCS interface on ASpeed SOCs (AST2400 and 
>>> AST2500)
>>> as a character device. Such SOCs are commonly used as BMCs and this 
>>> driver
>>> implements the BMC side of the KCS interface.
>>>
>>> Signed-off-by: Haiyue Wang <haiyue.wang@linux.intel.com>
>>>
>>> ---
>>> v1->v2
>>>
>>> - Divide the driver into two parts, one handles the BMC KCS IPMI 2.0 
>>> state;
>>>    the other handles the BMC KCS controller such as AST2500 IO 
>>> accessing.
>>> - Use the spin lock APIs to handle the device file operations and 
>>> BMC chip
>>>    IRQ inferface for accessing the same KCS BMC data structure.
>>> - Enhanced the phases handling of the KCS BMC.
>>> - Unified the IOCTL definition for IPMI BMC, it will be used by KCS 
>>> and BT.
>>>
>>> ---
>>>
>>> +
>>> +static void kcs_bmc_handle_data(struct kcs_bmc *kcs_bmc)
>>> +{
>>> +    u8 data;
>>> +
>>> +    switch (kcs_bmc->phase) {
>>> +    case KCS_PHASE_WRITE:
>>> +        set_state(kcs_bmc, WRITE_STATE);
>>> +
>>> +        /* set OBF before reading data */
>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>> +
>>> +        if (kcs_bmc->data_in_idx < KCS_MSG_BUFSIZ)
>>> +            kcs_bmc->data_in[kcs_bmc->data_in_idx++] =
>>> +                        read_data(kcs_bmc);

I missed this earlier, you need to issue a length error if the data is 
too large.

>>> +        break;
>>> +
>>> +    case KCS_PHASE_WRITE_END:
>>> +        set_state(kcs_bmc, READ_STATE);
>>> +
>>> +        if (kcs_bmc->data_in_idx < KCS_MSG_BUFSIZ)
>>> +            kcs_bmc->data_in[kcs_bmc->data_in_idx++] =
>>> +                        read_data(kcs_bmc);
>>> +
>>> +        kcs_bmc->phase = KCS_PHASE_WAIT_READ;
>>> +        if (kcs_bmc->running) {
>>
>> Why do you only do this when running is set?  It won't hurt anything 
>> if it's not
>> set.  As it is, you have a race if something opens the device while 
>> this code
>> runs.
>>
>> Also, don't set the state to wait read until the "write" has finished 
>> (userland has
>> read the data out of the buffer.  More on that later.
>>
> Understood.
>>> +            kcs_bmc->data_in_avail = true;
>>> +            wake_up_interruptible(&kcs_bmc->queue);
>>> +        }
>>> +        break;
>>> +
>>> +    case KCS_PHASE_READ:
>>> +        if (kcs_bmc->data_out_idx == kcs_bmc->data_out_len)
>>> +            set_state(kcs_bmc, IDLE_STATE);
>>> +
>>> +        data = read_data(kcs_bmc);
>>> +        if (data != KCS_CMD_READ_BYTE) {
>>> +            set_state(kcs_bmc, ERROR_STATE);
>>> +            write_data(kcs_bmc, KCS_ZERO_DATA);
>>> +            break;
>>> +        }
>>> +
>>> +        if (kcs_bmc->data_out_idx == kcs_bmc->data_out_len) {
>>> +            write_data(kcs_bmc, KCS_ZERO_DATA);
>>> +            kcs_bmc->phase = KCS_PHASE_IDLE;
>>> +            break;
>>> +        }
>>> +
>>> +        write_data(kcs_bmc,
>>> + kcs_bmc->data_out[kcs_bmc->data_out_idx++]);
>>> +        break;
>>> +
>>> +    case KCS_PHASE_ABORT_ERROR1:
>>> +        set_state(kcs_bmc, READ_STATE);
>>> +
>>> +        /* Read the Dummy byte */
>>> +        read_data(kcs_bmc);
>>> +
>>> +        write_data(kcs_bmc, kcs_bmc->error);
>>> +        kcs_bmc->phase = KCS_PHASE_ABORT_ERROR2;
>>> +        break;
>>> +
>>> +    case KCS_PHASE_ABORT_ERROR2:
>>> +        set_state(kcs_bmc, IDLE_STATE);
>>> +
>>> +        /* Read the Dummy byte */
>>> +        read_data(kcs_bmc);
>>> +
>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>> +        kcs_bmc->phase = KCS_PHASE_IDLE;
>>> +
>>> +        break;
>>> +
>>> +    default:
>>> +        set_state(kcs_bmc, ERROR_STATE);
>>> +
>>> +        /* Read the Dummy byte */
>>> +        read_data(kcs_bmc);
>>> +
>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>> +        break;
>>> +    }
>>> +}
>>> +
>>> +static void kcs_bmc_handle_command(struct kcs_bmc *kcs_bmc)
>>> +{
>>> +    u8 cmd;
>>> +
>>> +    set_state(kcs_bmc, WRITE_STATE);
>>> +
>>> +    /* Dummy data to generate OBF */
>>> +    write_data(kcs_bmc, KCS_ZERO_DATA);
>>> +
>>> +    cmd = read_data(kcs_bmc);
>>
>> Shouldn't you check the phase in all the cases below and do error
>> handling if the phase isn't correct?
>>
>> Similar thing if the device here isn't open.  You need to handle
>> that gracefully.
>>
>> Also, you should remove data_in_avail and data_in_idx setting from
>> here, for reasons I will explain later.
>>
> If host software sends the data twice such as a retry before the BMC's 
> IPMI service starts,
> then the two IPMI requests will be merged into one, if not clear 
> data_in_idx after receving
> KCS_CMD_WRITE_START. Most of the states are driven by host software 
> (SMS). :(

True, but what if the host issues WRITE_START or a WRITE_END while this 
driver is in read
state?  The spec is unclear on this, but it really only makes sense for 
the host to issue
WRITE_START in idle stat and WRITE_END in write state.  IMHO it should 
go to error
state.  You might make the case that a WRITE_START anywhere restarts the 
transaction,
but the feel of the error state machine kind of goes against that. 
WRITE_END is definitely
wrong anywhere but write state.

I just found the following in the spec (section 9.12):

    Thus, since the interface will allow a command transfer to be
    started or restarted
    at any time when the input buffer is empty, software could elect to
    simply retry
    the command upon detecting an error condition, or issue a ‘known good’
    command in order to clear ERROR_STATE

So a WRITE_START anywhere is ok.  A WRITE_END in the wrong state should 
probably
still go to error state.  This means the user needs to be able to handle 
a write error at
any time.  It also means it's very important to make sure the user does 
a read before
doing a write.  If the host re-issues a WRITE_START and writes a new command
between the time the use reads the data and writes the response, the 
response would
be for the wrong command.

>>> +    switch (cmd) {
>>> +    case KCS_CMD_WRITE_START:
>>> +        kcs_bmc->data_in_avail = false;
>>> +        kcs_bmc->data_in_idx   = 0;
>>> +        kcs_bmc->phase         = KCS_PHASE_WRITE;
>>> +        kcs_bmc->error         = KCS_NO_ERROR;
>>> +        break;
>>> +
>>> +    case KCS_CMD_WRITE_END:
>>> +        kcs_bmc->phase = KCS_PHASE_WRITE_END;
>>> +        break;
>>> +
>>> +    case KCS_CMD_ABORT:
>>> +        if (kcs_bmc->error == KCS_NO_ERROR)
>>> +            kcs_bmc->error = KCS_ABORTED_BY_COMMAND;
>>> +
>>> +        kcs_bmc->phase = KCS_PHASE_ABORT_ERROR1;
>>> +        break;
>>> +
>>> +    default:
>>> +        kcs_bmc->error = KCS_ILLEGAL_CONTROL_CODE;
>>> +        set_state(kcs_bmc, ERROR_STATE);
>>> +        write_data(kcs_bmc, kcs_bmc->error);
>>> +        kcs_bmc->phase = KCS_PHASE_ERROR;
>>> +        break;
>>> +    }
>>> +}
>>> +
>>> +int kcs_bmc_handle_event(struct kcs_bmc *kcs_bmc)
>>> +{
>>> +    unsigned long flags;
>>> +    int ret = 0;
>>> +    u8 status;
>>> +
>>> +    spin_lock_irqsave(&kcs_bmc->lock, flags);
>>> +
>>> +    status = read_status(kcs_bmc) & (KCS_STATUS_IBF | 
>>> KCS_STATUS_CMD_DAT);
>>> +
>>> +    switch (status) {
>>> +    case KCS_STATUS_IBF | KCS_STATUS_CMD_DAT:
>>> +        kcs_bmc_handle_command(kcs_bmc);
>>> +        break;
>>> +
>>> +    case KCS_STATUS_IBF:
>>> +        kcs_bmc_handle_data(kcs_bmc);
>>> +        break;
>>> +
>>> +    default:
>>> +        ret = -1;
>>> +        break;
>>> +    }
>>> +
>>> +    spin_unlock_irqrestore(&kcs_bmc->lock, flags);
>>> +
>>> +    return ret;
>>> +}
>>> +EXPORT_SYMBOL(kcs_bmc_handle_event);
>>> +
>>> +static inline struct kcs_bmc *file_kcs_bmc(struct file *filp)
>>> +{
>>> +    return container_of(filp->private_data, struct kcs_bmc, miscdev);
>>> +}
>>> +
>>> +static int kcs_bmc_open(struct inode *inode, struct file *filp)
>>> +{
>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>> +    int ret = 0;
>>> +
>>> +    spin_lock_irq(&kcs_bmc->lock);
>>> +
>>> +    if (!kcs_bmc->running) {
>>> +        kcs_bmc->running       = 1;
>>> +        kcs_bmc->phase         = KCS_PHASE_IDLE;
>>> +        kcs_bmc->data_in_avail = false;
>>
>> If you do everything right, setting the phase and data_in_avail 
>> should not
>> be necessary here.
>>
>>> +    } else {
>>> +        ret = -EBUSY;
>>> +    }
>>> +
>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>> +
>>> +    return ret;
>>> +}
>>> +
>>> +static unsigned int kcs_bmc_poll(struct file *filp, poll_table *wait)
>>> +{
>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>> +    unsigned int mask = 0;
>>> +
>>> +    poll_wait(filp, &kcs_bmc->queue, wait);
>>> +
>>> +    spin_lock_irq(&kcs_bmc->lock);
>>> +
>>> +    if (kcs_bmc->data_in_avail)
>>> +        mask |= POLLIN;
>>> +
>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>> +
>>> +    return mask;
>>> +}
>>> +
>>> +static ssize_t kcs_bmc_read(struct file *filp, char *buf,
>>> +                size_t count, loff_t *offset)
>>> +{
>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>> +    ssize_t ret = -EAGAIN;
>>> +
>>
>> This function still has some issues.
>>
>> You can't call copy_to_user() with a spinlock held or interrupts 
>> disabled.
>> To handle readers, you probably need a separate mutex.
>>
>> Also, this function can return -EAGAIN even if O_NONBLOCK is not set if
>> kcs_bmc->data_in_avail changes between when you wait on the event
>> and when you check it under the lock.
>>
>> You also clear data_in_avail even if the copy_to_user() fails, which is
>> wrong.
>>
>> I believe the best way to handle this would be to have the spinlock
>> protect the inner workings of the state machine and a mutex handle
>> copying data out, setting/clearing the running flag (thus a mutex
>> instead of spinlock in open and release) and the ioctl settings (except
>> for abort where you will need to grab the spinlock).
>>
>> After the wait event below, grab the mutex.  If data is not available
>> and O_NONBLOCK is not set, drop the mutex and retry.  Otherwise
>> this is the only place (besides release) that sets data_in_avail to 
>> false.
>> Do the copy_to_user(), grab the spinlock, clear data_in_avail and
>> data_in_idx, then release the lock and mutex.  If you are really
>> adventurous you can do this without grabbing the lock using
>> barriers, but it's probably not necessary here.
>>

With the state machine being able to be restarted at any time, you need
something a little different here.  You still need the mutex to handle
multiple readers and the copy.  I think the function should be something
like:

    static ssize_t kcs_bmc_read(struct file *filp, char *buf,
                     size_t count, loff_t *offset)
    {
         struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
         ssize_t ret;
         bool avail;
         size_t data_size;
         u8 *data;

         data = kmalloc(KCS_MSG_BUFSIZ, GFP_KERNEL);
         if (!data)
             return -ENOMEM;

    retry:
         ret = -EAGAIN;
         if (!(filp->f_flags & O_NONBLOCK))
             wait_event_interruptible(kcs_bmc->queue,
                          kcs_bmc->data_in_avail);

         mutex_lock(&kcs_bmc->read_mutex);

         spin_lock_irq(&kcs_bmc->lock);
         avail = kcs_bmc->data_in_avail;
         if (avail) {
             memcpy(data, kcs_bmc->data_in, kcs_bmc->data_in_idx);
             data_size = kcs_bmc->data_in_idx;
         }
         spin_unlock_irq(&kcs_bmc->lock);

         if (!avail) {
             if (filp->f_flags & O_NONBLOCK)
                 goto out_mutex_unlock;
             mutex_unlock(&kcs_bmc->read_mutex);
             goto retry;
         }

         if (count < data_size) {
             ret = -EOVERFLOW;
              ? I'm not sure about the error, but userspace needs to know.
             goto out_mutex_unlock;
         }

         if (!copy_to_user(buf, data, data_size)) {
             ret = -EFAULT;
             goto out_mutex_unlock;
         }

         ret = data_size;

         spin_lock_irq(&kcs_bmc->lock);

         if (kcs_bmc->phase != KCS_PHASE_WRITE_END_DONE)
             /* Something aborted or restarted the state machine. */
             ? Maybe restart if O_NONBLOCK is not set and -EAGAIN if it is?
             ret = -EIO;
         } else {
             kcs_bmc->phase = KCS_PHASE_WAIT_READ;
             kcs_bmc->data_in_avail = false;
             kcs_bmc->data_in_idx = 0;
         }

         spin_unlock_irq(&kcs_bmc->lock);

    out_mutex_unlock:
         mutex_unlock(&kcs_bmc->read_mutex);

         kfree(data);

         return ret;
    } 

Note that I added a state, KCS_PHASE_WRITE_END_DONE, which would be
set after the final byte from the host is received.  You want the read here
done before you can do the write below to avoid the race I talked about.

There is a local copy made of the data.  What you *never* want to happen
here is for the state machine to start processing a new write command
while the data is being copied.  It could result in corrupt data being read
and some random operation being done by the BMC.

If you want to avoid the local copy, it could be done, but it's more 
complex.

>>> +    if (!(filp->f_flags & O_NONBLOCK))
>>> +        wait_event_interruptible(kcs_bmc->queue,
>>> +                     kcs_bmc->data_in_avail);
>>> +
>>> +    spin_lock_irq(&kcs_bmc->lock);
>>> +
>>> +    if (kcs_bmc->data_in_avail) {
>>> +        kcs_bmc->data_in_avail = false;
>>> +
>>> +        if (count > kcs_bmc->data_in_idx)
>>> +            count = kcs_bmc->data_in_idx;
>>> +
>>> +        if (!copy_to_user(buf, kcs_bmc->data_in, count))
>>> +            ret = count;
>>> +        else
>>> +            ret = -EFAULT;
>>> +    }
>>> +
>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>> +
>>> +    return ret;
>>> +}
>>> +
>>> +static ssize_t kcs_bmc_write(struct file *filp, const char *buf,
>>> +                 size_t count, loff_t *offset)
>>> +{
>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>> +    ssize_t ret = count;
>>> +
>>> +    if (count < 1 || count > KCS_MSG_BUFSIZ)
>>> +        return -EINVAL;
>>> +
>>> +    spin_lock_irq(&kcs_bmc->lock);
>>> +
>>> +    if (kcs_bmc->phase == KCS_PHASE_WAIT_READ) {
>>> +        if (copy_from_user(kcs_bmc->data_out, buf, count)) {
>>> +            spin_unlock_irq(&kcs_bmc->lock);
>>> +            return -EFAULT;
>>> +        }
>>> +
>>> +        kcs_bmc->phase = KCS_PHASE_READ;
>>> +        kcs_bmc->data_out_idx = 1;
>>> +        kcs_bmc->data_out_len = count;
>>> +        write_data(kcs_bmc, kcs_bmc->data_out[0]);
>>> +    } else if (kcs_bmc->phase == KCS_PHASE_READ) {
>>> +        ret = -EBUSY;
>>> +    } else {
>>> +        ret = -EINVAL;
>>
>> Is there a reason you return -EINVAL here?  Why not just -EBUSY in all
>> cases?  Is there something that userland will need to do differently?
>>
>>> +    }
>>> +
>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>> +
>>> +    return ret;
>>> +}
>>> +
>>> +static long kcs_bmc_ioctl(struct file *filp, unsigned int cmd,
>>> +              unsigned long arg)
>>> +{
>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>> +    long ret = 0;
>>> +
>>> +    spin_lock_irq(&kcs_bmc->lock);
>>> +
>>> +    switch (cmd) {
>>> +    case IPMI_BMC_IOCTL_SET_SMS_ATN:
>>> +        update_status_bits(kcs_bmc, KCS_STATUS_SMS_ATN,
>>> +                        KCS_STATUS_SMS_ATN);
>>> +        break;
>>> +
>>> +    case IPMI_BMC_IOCTL_CLEAR_SMS_ATN:
>>> +        update_status_bits(kcs_bmc, KCS_STATUS_SMS_ATN,
>>> +                        0);
>>> +        break;
>>> +
>>> +    case IPMI_BMC_IOCTL_FORCE_ABORT:
>>> +        set_state(kcs_bmc, ERROR_STATE);
>>> +        read_data(kcs_bmc);
>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>> +
>>> +        kcs_bmc->phase = KCS_PHASE_ERROR;
>>> +        kcs_bmc->data_in_avail = false;
>>> +        break;
>>> +
>>> +    default:
>>> +        ret = -EINVAL;
>>> +        break;
>>> +    }
>>> +
>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>> +
>>> +    return ret;
>>> +}
>>> +
>>> +static int kcs_bmc_release(struct inode *inode, struct file *filp)
>>> +{
>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>> +
>>
>> What happens if the device gets closed in the middle of a 
>> transaction?  That's
>> an important case to handle.  If something is in process, you need to 
>> abort it.
>>
> The device just provides the read & write data, the transaction is 
> handled in the KCS
> controller's IRQ handler.

 From the spec, section 9.14:

    The BMC must change the status to ERROR_STATE on any condition where it
    aborts a command transfer in progress.

So you need to do something here.

>>> + spin_lock_irq(&kcs_bmc->lock);
>>> +
>>> +    kcs_bmc->running = 0;
>>> +
>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>> +
>>> +    return 0;
>>> +}
>>> +
Wang, Haiyue Jan. 29, 2018, 1:57 p.m. | #7
On 2018-01-26 22:48, Corey Minyard wrote:
> On 01/26/2018 12:08 AM, Wang, Haiyue wrote:
>>
>>
>> On 2018-01-25 01:48, Corey Minyard wrote:
>>> On 01/24/2018 10:06 AM, Haiyue Wang wrote:
>>>> The KCS (Keyboard Controller Style) interface is used to perform 
>>>> in-band
>>>> IPMI communication between a server host and its BMC (BaseBoard 
>>>> Management
>>>> Controllers).
>>>>
>>>> This driver exposes the KCS interface on ASpeed SOCs (AST2400 and 
>>>> AST2500)
>>>> as a character device. Such SOCs are commonly used as BMCs and this 
>>>> driver
>>>> implements the BMC side of the KCS interface.
>>>>
>>>> Signed-off-by: Haiyue Wang <haiyue.wang@linux.intel.com>
>>>>
>>>> ---
>>>> v1->v2
>>>>
>>>> - Divide the driver into two parts, one handles the BMC KCS IPMI 
>>>> 2.0 state;
>>>>    the other handles the BMC KCS controller such as AST2500 IO 
>>>> accessing.
>>>> - Use the spin lock APIs to handle the device file operations and 
>>>> BMC chip
>>>>    IRQ inferface for accessing the same KCS BMC data structure.
>>>> - Enhanced the phases handling of the KCS BMC.
>>>> - Unified the IOCTL definition for IPMI BMC, it will be used by KCS 
>>>> and BT.
>>>>
>>>> ---
>>>>
>>>> +
>>>> +static void kcs_bmc_handle_data(struct kcs_bmc *kcs_bmc)
>>>> +{
>>>> +    u8 data;
>>>> +
>>>> +    switch (kcs_bmc->phase) {
>>>> +    case KCS_PHASE_WRITE:
>>>> +        set_state(kcs_bmc, WRITE_STATE);
>>>> +
>>>> +        /* set OBF before reading data */
>>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>>> +
>>>> +        if (kcs_bmc->data_in_idx < KCS_MSG_BUFSIZ)
>>>> +            kcs_bmc->data_in[kcs_bmc->data_in_idx++] =
>>>> +                        read_data(kcs_bmc);
>
> I missed this earlier, you need to issue a length error if the data is 
> too large.
>
>>>> +        break;
>>>> +
>>>> +    case KCS_PHASE_WRITE_END:
>>>> +        set_state(kcs_bmc, READ_STATE);
>>>> +
>>>> +        if (kcs_bmc->data_in_idx < KCS_MSG_BUFSIZ)
>>>> +            kcs_bmc->data_in[kcs_bmc->data_in_idx++] =
>>>> +                        read_data(kcs_bmc);
>>>> +
>>>> +        kcs_bmc->phase = KCS_PHASE_WAIT_READ;
>>>> +        if (kcs_bmc->running) {
>>>
>>> Why do you only do this when running is set?  It won't hurt anything 
>>> if it's not
>>> set.  As it is, you have a race if something opens the device while 
>>> this code
>>> runs.
>>>
>>> Also, don't set the state to wait read until the "write" has 
>>> finished (userland has
>>> read the data out of the buffer.  More on that later.
>>>
>> Understood.
>>>> +            kcs_bmc->data_in_avail = true;
>>>> +            wake_up_interruptible(&kcs_bmc->queue);
>>>> +        }
>>>> +        break;
>>>> +
>>>> +    case KCS_PHASE_READ:
>>>> +        if (kcs_bmc->data_out_idx == kcs_bmc->data_out_len)
>>>> +            set_state(kcs_bmc, IDLE_STATE);
>>>> +
>>>> +        data = read_data(kcs_bmc);
>>>> +        if (data != KCS_CMD_READ_BYTE) {
>>>> +            set_state(kcs_bmc, ERROR_STATE);
>>>> +            write_data(kcs_bmc, KCS_ZERO_DATA);
>>>> +            break;
>>>> +        }
>>>> +
>>>> +        if (kcs_bmc->data_out_idx == kcs_bmc->data_out_len) {
>>>> +            write_data(kcs_bmc, KCS_ZERO_DATA);
>>>> +            kcs_bmc->phase = KCS_PHASE_IDLE;
>>>> +            break;
>>>> +        }
>>>> +
>>>> +        write_data(kcs_bmc,
>>>> + kcs_bmc->data_out[kcs_bmc->data_out_idx++]);
>>>> +        break;
>>>> +
>>>> +    case KCS_PHASE_ABORT_ERROR1:
>>>> +        set_state(kcs_bmc, READ_STATE);
>>>> +
>>>> +        /* Read the Dummy byte */
>>>> +        read_data(kcs_bmc);
>>>> +
>>>> +        write_data(kcs_bmc, kcs_bmc->error);
>>>> +        kcs_bmc->phase = KCS_PHASE_ABORT_ERROR2;
>>>> +        break;
>>>> +
>>>> +    case KCS_PHASE_ABORT_ERROR2:
>>>> +        set_state(kcs_bmc, IDLE_STATE);
>>>> +
>>>> +        /* Read the Dummy byte */
>>>> +        read_data(kcs_bmc);
>>>> +
>>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>>> +        kcs_bmc->phase = KCS_PHASE_IDLE;
>>>> +
>>>> +        break;
>>>> +
>>>> +    default:
>>>> +        set_state(kcs_bmc, ERROR_STATE);
>>>> +
>>>> +        /* Read the Dummy byte */
>>>> +        read_data(kcs_bmc);
>>>> +
>>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>>> +        break;
>>>> +    }
>>>> +}
>>>> +
>>>> +static void kcs_bmc_handle_command(struct kcs_bmc *kcs_bmc)
>>>> +{
>>>> +    u8 cmd;
>>>> +
>>>> +    set_state(kcs_bmc, WRITE_STATE);
>>>> +
>>>> +    /* Dummy data to generate OBF */
>>>> +    write_data(kcs_bmc, KCS_ZERO_DATA);
>>>> +
>>>> +    cmd = read_data(kcs_bmc);
>>>
>>> Shouldn't you check the phase in all the cases below and do error
>>> handling if the phase isn't correct?
>>>
>>> Similar thing if the device here isn't open.  You need to handle
>>> that gracefully.
>>>
>>> Also, you should remove data_in_avail and data_in_idx setting from
>>> here, for reasons I will explain later.
>>>
>> If host software sends the data twice such as a retry before the 
>> BMC's IPMI service starts,
>> then the two IPMI requests will be merged into one, if not clear 
>> data_in_idx after receving
>> KCS_CMD_WRITE_START. Most of the states are driven by host software 
>> (SMS). :(
>
> True, but what if the host issues WRITE_START or a WRITE_END while 
> this driver is in read
> state?  The spec is unclear on this, but it really only makes sense 
> for the host to issue
> WRITE_START in idle stat and WRITE_END in write state.  IMHO it should 
> go to error
> state.  You might make the case that a WRITE_START anywhere restarts 
> the transaction,
> but the feel of the error state machine kind of goes against that. 
> WRITE_END is definitely
> wrong anywhere but write state.
>
> I just found the following in the spec (section 9.12):
>
>    Thus, since the interface will allow a command transfer to be
>    started or restarted
>    at any time when the input buffer is empty, software could elect to
>    simply retry
>    the command upon detecting an error condition, or issue a ‘known good’
>    command in order to clear ERROR_STATE
>
> So a WRITE_START anywhere is ok.  A WRITE_END in the wrong state 
> should probably
> still go to error state.  This means the user needs to be able to 
> handle a write error at
> any time.  It also means it's very important to make sure the user 
> does a read before
> doing a write.  If the host re-issues a WRITE_START and writes a new 
> command
> between the time the use reads the data and writes the response, the 
> response would
> be for the wrong command.
>
>>>> +    switch (cmd) {
>>>> +    case KCS_CMD_WRITE_START:
>>>> +        kcs_bmc->data_in_avail = false;
>>>> +        kcs_bmc->data_in_idx   = 0;
>>>> +        kcs_bmc->phase         = KCS_PHASE_WRITE;
>>>> +        kcs_bmc->error         = KCS_NO_ERROR;
>>>> +        break;
>>>> +
>>>> +    case KCS_CMD_WRITE_END:
>>>> +        kcs_bmc->phase = KCS_PHASE_WRITE_END;
>>>> +        break;
>>>> +
>>>> +    case KCS_CMD_ABORT:
>>>> +        if (kcs_bmc->error == KCS_NO_ERROR)
>>>> +            kcs_bmc->error = KCS_ABORTED_BY_COMMAND;
>>>> +
>>>> +        kcs_bmc->phase = KCS_PHASE_ABORT_ERROR1;
>>>> +        break;
>>>> +
>>>> +    default:
>>>> +        kcs_bmc->error = KCS_ILLEGAL_CONTROL_CODE;
>>>> +        set_state(kcs_bmc, ERROR_STATE);
>>>> +        write_data(kcs_bmc, kcs_bmc->error);
>>>> +        kcs_bmc->phase = KCS_PHASE_ERROR;
>>>> +        break;
>>>> +    }
>>>> +}
>>>> +
>>>> +int kcs_bmc_handle_event(struct kcs_bmc *kcs_bmc)
>>>> +{
>>>> +    unsigned long flags;
>>>> +    int ret = 0;
>>>> +    u8 status;
>>>> +
>>>> +    spin_lock_irqsave(&kcs_bmc->lock, flags);
>>>> +
>>>> +    status = read_status(kcs_bmc) & (KCS_STATUS_IBF | 
>>>> KCS_STATUS_CMD_DAT);
>>>> +
>>>> +    switch (status) {
>>>> +    case KCS_STATUS_IBF | KCS_STATUS_CMD_DAT:
>>>> +        kcs_bmc_handle_command(kcs_bmc);
>>>> +        break;
>>>> +
>>>> +    case KCS_STATUS_IBF:
>>>> +        kcs_bmc_handle_data(kcs_bmc);
>>>> +        break;
>>>> +
>>>> +    default:
>>>> +        ret = -1;
>>>> +        break;
>>>> +    }
>>>> +
>>>> +    spin_unlock_irqrestore(&kcs_bmc->lock, flags);
>>>> +
>>>> +    return ret;
>>>> +}
>>>> +EXPORT_SYMBOL(kcs_bmc_handle_event);
>>>> +
>>>> +static inline struct kcs_bmc *file_kcs_bmc(struct file *filp)
>>>> +{
>>>> +    return container_of(filp->private_data, struct kcs_bmc, miscdev);
>>>> +}
>>>> +
>>>> +static int kcs_bmc_open(struct inode *inode, struct file *filp)
>>>> +{
>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>> +    int ret = 0;
>>>> +
>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>> +
>>>> +    if (!kcs_bmc->running) {
>>>> +        kcs_bmc->running       = 1;
>>>> +        kcs_bmc->phase         = KCS_PHASE_IDLE;
>>>> +        kcs_bmc->data_in_avail = false;
>>>
>>> If you do everything right, setting the phase and data_in_avail 
>>> should not
>>> be necessary here.
>>>
>>>> +    } else {
>>>> +        ret = -EBUSY;
>>>> +    }
>>>> +
>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>> +
>>>> +    return ret;
>>>> +}
>>>> +
>>>> +static unsigned int kcs_bmc_poll(struct file *filp, poll_table *wait)
>>>> +{
>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>> +    unsigned int mask = 0;
>>>> +
>>>> +    poll_wait(filp, &kcs_bmc->queue, wait);
>>>> +
>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>> +
>>>> +    if (kcs_bmc->data_in_avail)
>>>> +        mask |= POLLIN;
>>>> +
>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>> +
>>>> +    return mask;
>>>> +}
>>>> +
>>>> +static ssize_t kcs_bmc_read(struct file *filp, char *buf,
>>>> +                size_t count, loff_t *offset)
>>>> +{
>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>> +    ssize_t ret = -EAGAIN;
>>>> +
>>>
>>> This function still has some issues.
>>>
>>> You can't call copy_to_user() with a spinlock held or interrupts 
>>> disabled.
>>> To handle readers, you probably need a separate mutex.
>>>
>>> Also, this function can return -EAGAIN even if O_NONBLOCK is not set if
>>> kcs_bmc->data_in_avail changes between when you wait on the event
>>> and when you check it under the lock.
>>>
>>> You also clear data_in_avail even if the copy_to_user() fails, which is
>>> wrong.
>>>
>>> I believe the best way to handle this would be to have the spinlock
>>> protect the inner workings of the state machine and a mutex handle
>>> copying data out, setting/clearing the running flag (thus a mutex
>>> instead of spinlock in open and release) and the ioctl settings (except
>>> for abort where you will need to grab the spinlock).
>>>
>>> After the wait event below, grab the mutex.  If data is not available
>>> and O_NONBLOCK is not set, drop the mutex and retry. Otherwise
>>> this is the only place (besides release) that sets data_in_avail to 
>>> false.
>>> Do the copy_to_user(), grab the spinlock, clear data_in_avail and
>>> data_in_idx, then release the lock and mutex.  If you are really
>>> adventurous you can do this without grabbing the lock using
>>> barriers, but it's probably not necessary here.
>>>
>
> With the state machine being able to be restarted at any time, you need
> something a little different here.  You still need the mutex to handle
> multiple readers and the copy.  I think the function should be something
> like:
>
Since KCS is not a multi-reader protocol from BMC's view, you makes 
things complex. :-)
>    static ssize_t kcs_bmc_read(struct file *filp, char *buf,
>                     size_t count, loff_t *offset)
>    {
>         struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>         ssize_t ret;
>         bool avail;
>         size_t data_size;
>         u8 *data;
>
>         data = kmalloc(KCS_MSG_BUFSIZ, GFP_KERNEL);
>         if (!data)
>             return -ENOMEM;
>
>    retry:
>         ret = -EAGAIN;
>         if (!(filp->f_flags & O_NONBLOCK))
>             wait_event_interruptible(kcs_bmc->queue,
>                          kcs_bmc->data_in_avail);
>
>         mutex_lock(&kcs_bmc->read_mutex);
>
>         spin_lock_irq(&kcs_bmc->lock);
>         avail = kcs_bmc->data_in_avail;
>         if (avail) {
>             memcpy(data, kcs_bmc->data_in, kcs_bmc->data_in_idx);
>             data_size = kcs_bmc->data_in_idx;
>         }
>         spin_unlock_irq(&kcs_bmc->lock);
>
>         if (!avail) {
>             if (filp->f_flags & O_NONBLOCK)
>                 goto out_mutex_unlock;
>             mutex_unlock(&kcs_bmc->read_mutex);
>             goto retry;
>         }
>
>         if (count < data_size) {
>             ret = -EOVERFLOW;
>              ? I'm not sure about the error, but userspace needs to know.
>             goto out_mutex_unlock;
>         }
>
>         if (!copy_to_user(buf, data, data_size)) {
>             ret = -EFAULT;
>             goto out_mutex_unlock;
>         }
>
>         ret = data_size;
>
>         spin_lock_irq(&kcs_bmc->lock);
>
>         if (kcs_bmc->phase != KCS_PHASE_WRITE_END_DONE)
>             /* Something aborted or restarted the state machine. */
>             ? Maybe restart if O_NONBLOCK is not set and -EAGAIN if it 
> is?
>             ret = -EIO;
>         } else {
>             kcs_bmc->phase = KCS_PHASE_WAIT_READ;
>             kcs_bmc->data_in_avail = false;
>             kcs_bmc->data_in_idx = 0;
>         }
>
>         spin_unlock_irq(&kcs_bmc->lock);
>
>    out_mutex_unlock:
>         mutex_unlock(&kcs_bmc->read_mutex);
>
>         kfree(data);
>
>         return ret;
>    }
> Note that I added a state, KCS_PHASE_WRITE_END_DONE, which would be
> set after the final byte from the host is received.  You want the read 
> here
> done before you can do the write below to avoid the race I talked about.
>
> There is a local copy made of the data.  What you *never* want to happen
> here is for the state machine to start processing a new write command
> while the data is being copied.  It could result in corrupt data being 
> read
> and some random operation being done by the BMC.
>
> If you want to avoid the local copy, it could be done, but it's more 
> complex.
>
>>>> +    if (!(filp->f_flags & O_NONBLOCK))
>>>> +        wait_event_interruptible(kcs_bmc->queue,
>>>> +                     kcs_bmc->data_in_avail);
>>>> +
>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>> +
>>>> +    if (kcs_bmc->data_in_avail) {
>>>> +        kcs_bmc->data_in_avail = false;
>>>> +
>>>> +        if (count > kcs_bmc->data_in_idx)
>>>> +            count = kcs_bmc->data_in_idx;
>>>> +
>>>> +        if (!copy_to_user(buf, kcs_bmc->data_in, count))
>>>> +            ret = count;
>>>> +        else
>>>> +            ret = -EFAULT;
>>>> +    }
>>>> +
>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>> +
>>>> +    return ret;
>>>> +}
>>>> +
>>>> +static ssize_t kcs_bmc_write(struct file *filp, const char *buf,
>>>> +                 size_t count, loff_t *offset)
>>>> +{
>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>> +    ssize_t ret = count;
>>>> +
>>>> +    if (count < 1 || count > KCS_MSG_BUFSIZ)
>>>> +        return -EINVAL;
>>>> +
>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>> +
>>>> +    if (kcs_bmc->phase == KCS_PHASE_WAIT_READ) {
>>>> +        if (copy_from_user(kcs_bmc->data_out, buf, count)) {
>>>> +            spin_unlock_irq(&kcs_bmc->lock);
>>>> +            return -EFAULT;
>>>> +        }
>>>> +
>>>> +        kcs_bmc->phase = KCS_PHASE_READ;
>>>> +        kcs_bmc->data_out_idx = 1;
>>>> +        kcs_bmc->data_out_len = count;
>>>> +        write_data(kcs_bmc, kcs_bmc->data_out[0]);
>>>> +    } else if (kcs_bmc->phase == KCS_PHASE_READ) {
>>>> +        ret = -EBUSY;
>>>> +    } else {
>>>> +        ret = -EINVAL;
>>>
>>> Is there a reason you return -EINVAL here?  Why not just -EBUSY in all
>>> cases?  Is there something that userland will need to do differently?
>>>
>>>> +    }
>>>> +
>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>> +
>>>> +    return ret;
>>>> +}
>>>> +
>>>> +static long kcs_bmc_ioctl(struct file *filp, unsigned int cmd,
>>>> +              unsigned long arg)
>>>> +{
>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>> +    long ret = 0;
>>>> +
>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>> +
>>>> +    switch (cmd) {
>>>> +    case IPMI_BMC_IOCTL_SET_SMS_ATN:
>>>> +        update_status_bits(kcs_bmc, KCS_STATUS_SMS_ATN,
>>>> +                        KCS_STATUS_SMS_ATN);
>>>> +        break;
>>>> +
>>>> +    case IPMI_BMC_IOCTL_CLEAR_SMS_ATN:
>>>> +        update_status_bits(kcs_bmc, KCS_STATUS_SMS_ATN,
>>>> +                        0);
>>>> +        break;
>>>> +
>>>> +    case IPMI_BMC_IOCTL_FORCE_ABORT:
>>>> +        set_state(kcs_bmc, ERROR_STATE);
>>>> +        read_data(kcs_bmc);
>>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>>> +
>>>> +        kcs_bmc->phase = KCS_PHASE_ERROR;
>>>> +        kcs_bmc->data_in_avail = false;
>>>> +        break;
>>>> +
>>>> +    default:
>>>> +        ret = -EINVAL;
>>>> +        break;
>>>> +    }
>>>> +
>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>> +
>>>> +    return ret;
>>>> +}
>>>> +
>>>> +static int kcs_bmc_release(struct inode *inode, struct file *filp)
>>>> +{
>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>> +
>>>
>>> What happens if the device gets closed in the middle of a 
>>> transaction?  That's
>>> an important case to handle.  If something is in process, you need 
>>> to abort it.
>>>
>> The device just provides the read & write data, the transaction is 
>> handled in the KCS
>> controller's IRQ handler.
>
> From the spec, section 9.14:
>
>    The BMC must change the status to ERROR_STATE on any condition 
> where it
>    aborts a command transfer in progress.
>
> So you need to do something here.
>
In practice, we do this as spec said in ipmid, NOT in driver, driver 
can't handle anything, let's
make it simple, thanks!

Whenever the BMC is reset (from power-on or a hard reset), the State 
Bits are initialized to “11 - Error State”. Doing so
allows SMS to detect that the BMC has been reset and that any message in 
process has been terminated by the BMC.
>>>> + spin_lock_irq(&kcs_bmc->lock);
>>>> +
>>>> +    kcs_bmc->running = 0;
>>>> +
>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>
>
Corey Minyard Jan. 30, 2018, 1:49 p.m. | #8
On 01/29/2018 07:57 AM, Wang, Haiyue wrote:
>
>
> On 2018-01-26 22:48, Corey Minyard wrote:
>> On 01/26/2018 12:08 AM, Wang, Haiyue wrote:
>>>
>>>
>>> On 2018-01-25 01:48, Corey Minyard wrote:
>>>> On 01/24/2018 10:06 AM, Haiyue Wang wrote:
>>>>> The KCS (Keyboard Controller Style) interface is used to perform 
>>>>> in-band
>>>>> IPMI communication between a server host and its BMC (BaseBoard 
>>>>> Management
>>>>> Controllers).
>>>>>
>>>>> This driver exposes the KCS interface on ASpeed SOCs (AST2400 and 
>>>>> AST2500)
>>>>> as a character device. Such SOCs are commonly used as BMCs and 
>>>>> this driver
>>>>> implements the BMC side of the KCS interface.
>>>>>
>>>>> Signed-off-by: Haiyue Wang <haiyue.wang@linux.intel.com>
>>>>>
>>>>> ---
>>>>> v1->v2
>>>>>
>>>>> - Divide the driver into two parts, one handles the BMC KCS IPMI 
>>>>> 2.0 state;
>>>>>    the other handles the BMC KCS controller such as AST2500 IO 
>>>>> accessing.
>>>>> - Use the spin lock APIs to handle the device file operations and 
>>>>> BMC chip
>>>>>    IRQ inferface for accessing the same KCS BMC data structure.
>>>>> - Enhanced the phases handling of the KCS BMC.
>>>>> - Unified the IOCTL definition for IPMI BMC, it will be used by 
>>>>> KCS and BT.
>>>>>
>>>>> ---
>>>>>
>>>>> +
>>>>> +static void kcs_bmc_handle_data(struct kcs_bmc *kcs_bmc)
>>>>> +{
>>>>> +    u8 data;
>>>>> +
>>>>> +    switch (kcs_bmc->phase) {
>>>>> +    case KCS_PHASE_WRITE:
>>>>> +        set_state(kcs_bmc, WRITE_STATE);
>>>>> +
>>>>> +        /* set OBF before reading data */
>>>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>> +
>>>>> +        if (kcs_bmc->data_in_idx < KCS_MSG_BUFSIZ)
>>>>> + kcs_bmc->data_in[kcs_bmc->data_in_idx++] =
>>>>> +                        read_data(kcs_bmc);
>>
>> I missed this earlier, you need to issue a length error if the data 
>> is too large.
>>
>>>>> +        break;
>>>>> +
>>>>> +    case KCS_PHASE_WRITE_END:
>>>>> +        set_state(kcs_bmc, READ_STATE);
>>>>> +
>>>>> +        if (kcs_bmc->data_in_idx < KCS_MSG_BUFSIZ)
>>>>> + kcs_bmc->data_in[kcs_bmc->data_in_idx++] =
>>>>> +                        read_data(kcs_bmc);
>>>>> +
>>>>> +        kcs_bmc->phase = KCS_PHASE_WAIT_READ;
>>>>> +        if (kcs_bmc->running) {
>>>>
>>>> Why do you only do this when running is set?  It won't hurt 
>>>> anything if it's not
>>>> set.  As it is, you have a race if something opens the device while 
>>>> this code
>>>> runs.
>>>>
>>>> Also, don't set the state to wait read until the "write" has 
>>>> finished (userland has
>>>> read the data out of the buffer.  More on that later.
>>>>
>>> Understood.
>>>>> + kcs_bmc->data_in_avail = true;
>>>>> + wake_up_interruptible(&kcs_bmc->queue);
>>>>> +        }
>>>>> +        break;
>>>>> +
>>>>> +    case KCS_PHASE_READ:
>>>>> +        if (kcs_bmc->data_out_idx == kcs_bmc->data_out_len)
>>>>> +            set_state(kcs_bmc, IDLE_STATE);
>>>>> +
>>>>> +        data = read_data(kcs_bmc);
>>>>> +        if (data != KCS_CMD_READ_BYTE) {
>>>>> +            set_state(kcs_bmc, ERROR_STATE);
>>>>> +            write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>> +            break;
>>>>> +        }
>>>>> +
>>>>> +        if (kcs_bmc->data_out_idx == kcs_bmc->data_out_len) {
>>>>> +            write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>> +            kcs_bmc->phase = KCS_PHASE_IDLE;
>>>>> +            break;
>>>>> +        }
>>>>> +
>>>>> +        write_data(kcs_bmc,
>>>>> + kcs_bmc->data_out[kcs_bmc->data_out_idx++]);
>>>>> +        break;
>>>>> +
>>>>> +    case KCS_PHASE_ABORT_ERROR1:
>>>>> +        set_state(kcs_bmc, READ_STATE);
>>>>> +
>>>>> +        /* Read the Dummy byte */
>>>>> +        read_data(kcs_bmc);
>>>>> +
>>>>> +        write_data(kcs_bmc, kcs_bmc->error);
>>>>> +        kcs_bmc->phase = KCS_PHASE_ABORT_ERROR2;
>>>>> +        break;
>>>>> +
>>>>> +    case KCS_PHASE_ABORT_ERROR2:
>>>>> +        set_state(kcs_bmc, IDLE_STATE);
>>>>> +
>>>>> +        /* Read the Dummy byte */
>>>>> +        read_data(kcs_bmc);
>>>>> +
>>>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>> +        kcs_bmc->phase = KCS_PHASE_IDLE;
>>>>> +
>>>>> +        break;
>>>>> +
>>>>> +    default:
>>>>> +        set_state(kcs_bmc, ERROR_STATE);
>>>>> +
>>>>> +        /* Read the Dummy byte */
>>>>> +        read_data(kcs_bmc);
>>>>> +
>>>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>> +        break;
>>>>> +    }
>>>>> +}
>>>>> +
>>>>> +static void kcs_bmc_handle_command(struct kcs_bmc *kcs_bmc)
>>>>> +{
>>>>> +    u8 cmd;
>>>>> +
>>>>> +    set_state(kcs_bmc, WRITE_STATE);
>>>>> +
>>>>> +    /* Dummy data to generate OBF */
>>>>> +    write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>> +
>>>>> +    cmd = read_data(kcs_bmc);
>>>>
>>>> Shouldn't you check the phase in all the cases below and do error
>>>> handling if the phase isn't correct?
>>>>
>>>> Similar thing if the device here isn't open.  You need to handle
>>>> that gracefully.
>>>>
>>>> Also, you should remove data_in_avail and data_in_idx setting from
>>>> here, for reasons I will explain later.
>>>>
>>> If host software sends the data twice such as a retry before the 
>>> BMC's IPMI service starts,
>>> then the two IPMI requests will be merged into one, if not clear 
>>> data_in_idx after receving
>>> KCS_CMD_WRITE_START. Most of the states are driven by host software 
>>> (SMS). :(
>>
>> True, but what if the host issues WRITE_START or a WRITE_END while 
>> this driver is in read
>> state?  The spec is unclear on this, but it really only makes sense 
>> for the host to issue
>> WRITE_START in idle stat and WRITE_END in write state.  IMHO it 
>> should go to error
>> state.  You might make the case that a WRITE_START anywhere restarts 
>> the transaction,
>> but the feel of the error state machine kind of goes against that. 
>> WRITE_END is definitely
>> wrong anywhere but write state.
>>
>> I just found the following in the spec (section 9.12):
>>
>>    Thus, since the interface will allow a command transfer to be
>>    started or restarted
>>    at any time when the input buffer is empty, software could elect to
>>    simply retry
>>    the command upon detecting an error condition, or issue a ‘known 
>> good’
>>    command in order to clear ERROR_STATE
>>
>> So a WRITE_START anywhere is ok.  A WRITE_END in the wrong state 
>> should probably
>> still go to error state.  This means the user needs to be able to 
>> handle a write error at
>> any time.  It also means it's very important to make sure the user 
>> does a read before
>> doing a write.  If the host re-issues a WRITE_START and writes a new 
>> command
>> between the time the use reads the data and writes the response, the 
>> response would
>> be for the wrong command.
>>
>>>>> +    switch (cmd) {
>>>>> +    case KCS_CMD_WRITE_START:
>>>>> +        kcs_bmc->data_in_avail = false;
>>>>> +        kcs_bmc->data_in_idx   = 0;
>>>>> +        kcs_bmc->phase         = KCS_PHASE_WRITE;
>>>>> +        kcs_bmc->error         = KCS_NO_ERROR;
>>>>> +        break;
>>>>> +
>>>>> +    case KCS_CMD_WRITE_END:
>>>>> +        kcs_bmc->phase = KCS_PHASE_WRITE_END;
>>>>> +        break;
>>>>> +
>>>>> +    case KCS_CMD_ABORT:
>>>>> +        if (kcs_bmc->error == KCS_NO_ERROR)
>>>>> +            kcs_bmc->error = KCS_ABORTED_BY_COMMAND;
>>>>> +
>>>>> +        kcs_bmc->phase = KCS_PHASE_ABORT_ERROR1;
>>>>> +        break;
>>>>> +
>>>>> +    default:
>>>>> +        kcs_bmc->error = KCS_ILLEGAL_CONTROL_CODE;
>>>>> +        set_state(kcs_bmc, ERROR_STATE);
>>>>> +        write_data(kcs_bmc, kcs_bmc->error);
>>>>> +        kcs_bmc->phase = KCS_PHASE_ERROR;
>>>>> +        break;
>>>>> +    }
>>>>> +}
>>>>> +
>>>>> +int kcs_bmc_handle_event(struct kcs_bmc *kcs_bmc)
>>>>> +{
>>>>> +    unsigned long flags;
>>>>> +    int ret = 0;
>>>>> +    u8 status;
>>>>> +
>>>>> +    spin_lock_irqsave(&kcs_bmc->lock, flags);
>>>>> +
>>>>> +    status = read_status(kcs_bmc) & (KCS_STATUS_IBF | 
>>>>> KCS_STATUS_CMD_DAT);
>>>>> +
>>>>> +    switch (status) {
>>>>> +    case KCS_STATUS_IBF | KCS_STATUS_CMD_DAT:
>>>>> +        kcs_bmc_handle_command(kcs_bmc);
>>>>> +        break;
>>>>> +
>>>>> +    case KCS_STATUS_IBF:
>>>>> +        kcs_bmc_handle_data(kcs_bmc);
>>>>> +        break;
>>>>> +
>>>>> +    default:
>>>>> +        ret = -1;
>>>>> +        break;
>>>>> +    }
>>>>> +
>>>>> +    spin_unlock_irqrestore(&kcs_bmc->lock, flags);
>>>>> +
>>>>> +    return ret;
>>>>> +}
>>>>> +EXPORT_SYMBOL(kcs_bmc_handle_event);
>>>>> +
>>>>> +static inline struct kcs_bmc *file_kcs_bmc(struct file *filp)
>>>>> +{
>>>>> +    return container_of(filp->private_data, struct kcs_bmc, 
>>>>> miscdev);
>>>>> +}
>>>>> +
>>>>> +static int kcs_bmc_open(struct inode *inode, struct file *filp)
>>>>> +{
>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>> +    int ret = 0;
>>>>> +
>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>> +
>>>>> +    if (!kcs_bmc->running) {
>>>>> +        kcs_bmc->running       = 1;
>>>>> +        kcs_bmc->phase         = KCS_PHASE_IDLE;
>>>>> +        kcs_bmc->data_in_avail = false;
>>>>
>>>> If you do everything right, setting the phase and data_in_avail 
>>>> should not
>>>> be necessary here.
>>>>
>>>>> +    } else {
>>>>> +        ret = -EBUSY;
>>>>> +    }
>>>>> +
>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>> +
>>>>> +    return ret;
>>>>> +}
>>>>> +
>>>>> +static unsigned int kcs_bmc_poll(struct file *filp, poll_table 
>>>>> *wait)
>>>>> +{
>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>> +    unsigned int mask = 0;
>>>>> +
>>>>> +    poll_wait(filp, &kcs_bmc->queue, wait);
>>>>> +
>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>> +
>>>>> +    if (kcs_bmc->data_in_avail)
>>>>> +        mask |= POLLIN;
>>>>> +
>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>> +
>>>>> +    return mask;
>>>>> +}
>>>>> +
>>>>> +static ssize_t kcs_bmc_read(struct file *filp, char *buf,
>>>>> +                size_t count, loff_t *offset)
>>>>> +{
>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>> +    ssize_t ret = -EAGAIN;
>>>>> +
>>>>
>>>> This function still has some issues.
>>>>
>>>> You can't call copy_to_user() with a spinlock held or interrupts 
>>>> disabled.
>>>> To handle readers, you probably need a separate mutex.
>>>>
>>>> Also, this function can return -EAGAIN even if O_NONBLOCK is not 
>>>> set if
>>>> kcs_bmc->data_in_avail changes between when you wait on the event
>>>> and when you check it under the lock.
>>>>
>>>> You also clear data_in_avail even if the copy_to_user() fails, 
>>>> which is
>>>> wrong.
>>>>
>>>> I believe the best way to handle this would be to have the spinlock
>>>> protect the inner workings of the state machine and a mutex handle
>>>> copying data out, setting/clearing the running flag (thus a mutex
>>>> instead of spinlock in open and release) and the ioctl settings 
>>>> (except
>>>> for abort where you will need to grab the spinlock).
>>>>
>>>> After the wait event below, grab the mutex.  If data is not available
>>>> and O_NONBLOCK is not set, drop the mutex and retry. Otherwise
>>>> this is the only place (besides release) that sets data_in_avail to 
>>>> false.
>>>> Do the copy_to_user(), grab the spinlock, clear data_in_avail and
>>>> data_in_idx, then release the lock and mutex.  If you are really
>>>> adventurous you can do this without grabbing the lock using
>>>> barriers, but it's probably not necessary here.
>>>>
>>
>> With the state machine being able to be restarted at any time, you need
>> something a little different here.  You still need the mutex to handle
>> multiple readers and the copy.  I think the function should be something
>> like:
>>
> Since KCS is not a multi-reader protocol from BMC's view, you makes 
> things complex. :-)

No, I don't think you understand.  The primary purpose of the complexity
here is to protect the driver from the host system (on the other side of
the KCS interface).  Without this protection, it is possible for the host
system to start a new write while the user on the BMC side is reading
data out, resulting in corrupt data being read.

I haven't thought too much about this.  There may be a simpler way,
but the protection needs to be there.

And you may not think you need to protect the driver against a
malicious BMC side user code, but you would be wrong.  You can
only have one opener, but with threads or a fork you can have
multiple readers.  And you don't know if a malicious piece of
code has taken over userland.  You always need to protect the
kernel.

>>    static ssize_t kcs_bmc_read(struct file *filp, char *buf,
>>                     size_t count, loff_t *offset)
>>    {
>>         struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>         ssize_t ret;
>>         bool avail;
>>         size_t data_size;
>>         u8 *data;
>>
>>         data = kmalloc(KCS_MSG_BUFSIZ, GFP_KERNEL);
>>         if (!data)
>>             return -ENOMEM;
>>
>>    retry:
>>         ret = -EAGAIN;
>>         if (!(filp->f_flags & O_NONBLOCK))
>>             wait_event_interruptible(kcs_bmc->queue,
>>                          kcs_bmc->data_in_avail);
>>
>>         mutex_lock(&kcs_bmc->read_mutex);
>>
>>         spin_lock_irq(&kcs_bmc->lock);
>>         avail = kcs_bmc->data_in_avail;
>>         if (avail) {
>>             memcpy(data, kcs_bmc->data_in, kcs_bmc->data_in_idx);
>>             data_size = kcs_bmc->data_in_idx;
>>         }
>>         spin_unlock_irq(&kcs_bmc->lock);
>>
>>         if (!avail) {
>>             if (filp->f_flags & O_NONBLOCK)
>>                 goto out_mutex_unlock;
>>             mutex_unlock(&kcs_bmc->read_mutex);
>>             goto retry;
>>         }
>>
>>         if (count < data_size) {
>>             ret = -EOVERFLOW;
>>              ? I'm not sure about the error, but userspace needs to 
>> know.
>>             goto out_mutex_unlock;

Maybe a length error to the host side here?

>>         }
>>
>>         if (!copy_to_user(buf, data, data_size)) {
>>             ret = -EFAULT;
>>             goto out_mutex_unlock;
>>         }
>>
>>         ret = data_size;
>>
>>         spin_lock_irq(&kcs_bmc->lock);
>>
>>         if (kcs_bmc->phase != KCS_PHASE_WRITE_END_DONE)
>>             /* Something aborted or restarted the state machine. */
>>             ? Maybe restart if O_NONBLOCK is not set and -EAGAIN if 
>> it is?
>>             ret = -EIO;
>>         } else {
>>             kcs_bmc->phase = KCS_PHASE_WAIT_READ;
>>             kcs_bmc->data_in_avail = false;
>>             kcs_bmc->data_in_idx = 0;
>>         }
>>
>>         spin_unlock_irq(&kcs_bmc->lock);
>>
>>    out_mutex_unlock:
>>         mutex_unlock(&kcs_bmc->read_mutex);
>>
>>         kfree(data);
>>
>>         return ret;
>>    }
>> Note that I added a state, KCS_PHASE_WRITE_END_DONE, which would be
>> set after the final byte from the host is received.  You want the 
>> read here
>> done before you can do the write below to avoid the race I talked about.
>>
>> There is a local copy made of the data.  What you *never* want to happen
>> here is for the state machine to start processing a new write command
>> while the data is being copied.  It could result in corrupt data 
>> being read
>> and some random operation being done by the BMC.
>>
>> If you want to avoid the local copy, it could be done, but it's more 
>> complex.
>>
>>>>> +    if (!(filp->f_flags & O_NONBLOCK))
>>>>> +        wait_event_interruptible(kcs_bmc->queue,
>>>>> +                     kcs_bmc->data_in_avail);
>>>>> +
>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>> +
>>>>> +    if (kcs_bmc->data_in_avail) {
>>>>> +        kcs_bmc->data_in_avail = false;
>>>>> +
>>>>> +        if (count > kcs_bmc->data_in_idx)
>>>>> +            count = kcs_bmc->data_in_idx;
>>>>> +
>>>>> +        if (!copy_to_user(buf, kcs_bmc->data_in, count))
>>>>> +            ret = count;
>>>>> +        else
>>>>> +            ret = -EFAULT;
>>>>> +    }
>>>>> +
>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>> +
>>>>> +    return ret;
>>>>> +}
>>>>> +
>>>>> +static ssize_t kcs_bmc_write(struct file *filp, const char *buf,
>>>>> +                 size_t count, loff_t *offset)
>>>>> +{
>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>> +    ssize_t ret = count;
>>>>> +
>>>>> +    if (count < 1 || count > KCS_MSG_BUFSIZ)
>>>>> +        return -EINVAL;
>>>>> +
>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>> +
>>>>> +    if (kcs_bmc->phase == KCS_PHASE_WAIT_READ) {
>>>>> +        if (copy_from_user(kcs_bmc->data_out, buf, count)) {
>>>>> +            spin_unlock_irq(&kcs_bmc->lock);
>>>>> +            return -EFAULT;
>>>>> +        }
>>>>> +
>>>>> +        kcs_bmc->phase = KCS_PHASE_READ;
>>>>> +        kcs_bmc->data_out_idx = 1;
>>>>> +        kcs_bmc->data_out_len = count;
>>>>> +        write_data(kcs_bmc, kcs_bmc->data_out[0]);
>>>>> +    } else if (kcs_bmc->phase == KCS_PHASE_READ) {
>>>>> +        ret = -EBUSY;
>>>>> +    } else {
>>>>> +        ret = -EINVAL;
>>>>
>>>> Is there a reason you return -EINVAL here?  Why not just -EBUSY in all
>>>> cases?  Is there something that userland will need to do differently?
>>>>
>>>>> +    }
>>>>> +
>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>> +
>>>>> +    return ret;
>>>>> +}
>>>>> +
>>>>> +static long kcs_bmc_ioctl(struct file *filp, unsigned int cmd,
>>>>> +              unsigned long arg)
>>>>> +{
>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>> +    long ret = 0;
>>>>> +
>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>> +
>>>>> +    switch (cmd) {
>>>>> +    case IPMI_BMC_IOCTL_SET_SMS_ATN:
>>>>> +        update_status_bits(kcs_bmc, KCS_STATUS_SMS_ATN,
>>>>> +                        KCS_STATUS_SMS_ATN);
>>>>> +        break;
>>>>> +
>>>>> +    case IPMI_BMC_IOCTL_CLEAR_SMS_ATN:
>>>>> +        update_status_bits(kcs_bmc, KCS_STATUS_SMS_ATN,
>>>>> +                        0);
>>>>> +        break;
>>>>> +
>>>>> +    case IPMI_BMC_IOCTL_FORCE_ABORT:
>>>>> +        set_state(kcs_bmc, ERROR_STATE);
>>>>> +        read_data(kcs_bmc);
>>>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>> +
>>>>> +        kcs_bmc->phase = KCS_PHASE_ERROR;
>>>>> +        kcs_bmc->data_in_avail = false;
>>>>> +        break;
>>>>> +
>>>>> +    default:
>>>>> +        ret = -EINVAL;
>>>>> +        break;
>>>>> +    }
>>>>> +
>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>> +
>>>>> +    return ret;
>>>>> +}
>>>>> +
>>>>> +static int kcs_bmc_release(struct inode *inode, struct file *filp)
>>>>> +{
>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>> +
>>>>
>>>> What happens if the device gets closed in the middle of a 
>>>> transaction?  That's
>>>> an important case to handle.  If something is in process, you need 
>>>> to abort it.
>>>>
>>> The device just provides the read & write data, the transaction is 
>>> handled in the KCS
>>> controller's IRQ handler.
>>
>> From the spec, section 9.14:
>>
>>    The BMC must change the status to ERROR_STATE on any condition 
>> where it
>>    aborts a command transfer in progress.
>>
>> So you need to do something here.
>>
> In practice, we do this as spec said in ipmid, NOT in driver, driver 
> can't handle anything, let's
> make it simple, thanks!

If ipmid crashes or is killed, how does it accomplish this?

>
> Whenever the BMC is reset (from power-on or a hard reset), the State 
> Bits are initialized to “11 - Error State”. Doing so
> allows SMS to detect that the BMC has been reset and that any message 
> in process has been terminated by the BMC.

Right, that's fine, like it should be.  But we are not talking about a 
reset.

-corey

>>>>> + spin_lock_irq(&kcs_bmc->lock);
>>>>> +
>>>>> +    kcs_bmc->running = 0;
>>>>> +
>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>> +
>>>>> +    return 0;
>>>>> +}
>>>>> +
>>
>>
>
Wang, Haiyue Jan. 31, 2018, 12:02 a.m. | #9
On 2018-01-30 21:49, Corey Minyard wrote:
> On 01/29/2018 07:57 AM, Wang, Haiyue wrote:
>>
>>
>> On 2018-01-26 22:48, Corey Minyard wrote:
>>> On 01/26/2018 12:08 AM, Wang, Haiyue wrote:
>>>>
>>>>
>>>> On 2018-01-25 01:48, Corey Minyard wrote:
>>>>> On 01/24/2018 10:06 AM, Haiyue Wang wrote:
>>>>>> The KCS (Keyboard Controller Style) interface is used to perform 
>>>>>> in-band
>>>>>> IPMI communication between a server host and its BMC (BaseBoard 
>>>>>> Management
>>>>>> Controllers).
>>>>>>
>>>>>> This driver exposes the KCS interface on ASpeed SOCs (AST2400 and 
>>>>>> AST2500)
>>>>>> as a character device. Such SOCs are commonly used as BMCs and 
>>>>>> this driver
>>>>>> implements the BMC side of the KCS interface.
>>>>>>
>>>>>> Signed-off-by: Haiyue Wang <haiyue.wang@linux.intel.com>
>>>>>>
>>>>>> ---
>>>>>> v1->v2
>>>>>>
>>>>>> - Divide the driver into two parts, one handles the BMC KCS IPMI 
>>>>>> 2.0 state;
>>>>>>    the other handles the BMC KCS controller such as AST2500 IO 
>>>>>> accessing.
>>>>>> - Use the spin lock APIs to handle the device file operations and 
>>>>>> BMC chip
>>>>>>    IRQ inferface for accessing the same KCS BMC data structure.
>>>>>> - Enhanced the phases handling of the KCS BMC.
>>>>>> - Unified the IOCTL definition for IPMI BMC, it will be used by 
>>>>>> KCS and BT.
>>>>>>
>>>>>> ---
>>>>>>
>>>>>> +
>>>>>> +static void kcs_bmc_handle_data(struct kcs_bmc *kcs_bmc)
>>>>>> +{
>>>>>> +    u8 data;
>>>>>> +
>>>>>> +    switch (kcs_bmc->phase) {
>>>>>> +    case KCS_PHASE_WRITE:
>>>>>> +        set_state(kcs_bmc, WRITE_STATE);
>>>>>> +
>>>>>> +        /* set OBF before reading data */
>>>>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>> +
>>>>>> +        if (kcs_bmc->data_in_idx < KCS_MSG_BUFSIZ)
>>>>>> + kcs_bmc->data_in[kcs_bmc->data_in_idx++] =
>>>>>> +                        read_data(kcs_bmc);
>>>
>>> I missed this earlier, you need to issue a length error if the data 
>>> is too large.
>>>
>>>>>> +        break;
>>>>>> +
>>>>>> +    case KCS_PHASE_WRITE_END:
>>>>>> +        set_state(kcs_bmc, READ_STATE);
>>>>>> +
>>>>>> +        if (kcs_bmc->data_in_idx < KCS_MSG_BUFSIZ)
>>>>>> + kcs_bmc->data_in[kcs_bmc->data_in_idx++] =
>>>>>> +                        read_data(kcs_bmc);
>>>>>> +
>>>>>> +        kcs_bmc->phase = KCS_PHASE_WAIT_READ;
>>>>>> +        if (kcs_bmc->running) {
>>>>>
>>>>> Why do you only do this when running is set?  It won't hurt 
>>>>> anything if it's not
>>>>> set.  As it is, you have a race if something opens the device 
>>>>> while this code
>>>>> runs.
>>>>>
>>>>> Also, don't set the state to wait read until the "write" has 
>>>>> finished (userland has
>>>>> read the data out of the buffer.  More on that later.
>>>>>
>>>> Understood.
>>>>>> + kcs_bmc->data_in_avail = true;
>>>>>> + wake_up_interruptible(&kcs_bmc->queue);
>>>>>> +        }
>>>>>> +        break;
>>>>>> +
>>>>>> +    case KCS_PHASE_READ:
>>>>>> +        if (kcs_bmc->data_out_idx == kcs_bmc->data_out_len)
>>>>>> +            set_state(kcs_bmc, IDLE_STATE);
>>>>>> +
>>>>>> +        data = read_data(kcs_bmc);
>>>>>> +        if (data != KCS_CMD_READ_BYTE) {
>>>>>> +            set_state(kcs_bmc, ERROR_STATE);
>>>>>> +            write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>> +            break;
>>>>>> +        }
>>>>>> +
>>>>>> +        if (kcs_bmc->data_out_idx == kcs_bmc->data_out_len) {
>>>>>> +            write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>> +            kcs_bmc->phase = KCS_PHASE_IDLE;
>>>>>> +            break;
>>>>>> +        }
>>>>>> +
>>>>>> +        write_data(kcs_bmc,
>>>>>> + kcs_bmc->data_out[kcs_bmc->data_out_idx++]);
>>>>>> +        break;
>>>>>> +
>>>>>> +    case KCS_PHASE_ABORT_ERROR1:
>>>>>> +        set_state(kcs_bmc, READ_STATE);
>>>>>> +
>>>>>> +        /* Read the Dummy byte */
>>>>>> +        read_data(kcs_bmc);
>>>>>> +
>>>>>> +        write_data(kcs_bmc, kcs_bmc->error);
>>>>>> +        kcs_bmc->phase = KCS_PHASE_ABORT_ERROR2;
>>>>>> +        break;
>>>>>> +
>>>>>> +    case KCS_PHASE_ABORT_ERROR2:
>>>>>> +        set_state(kcs_bmc, IDLE_STATE);
>>>>>> +
>>>>>> +        /* Read the Dummy byte */
>>>>>> +        read_data(kcs_bmc);
>>>>>> +
>>>>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>> +        kcs_bmc->phase = KCS_PHASE_IDLE;
>>>>>> +
>>>>>> +        break;
>>>>>> +
>>>>>> +    default:
>>>>>> +        set_state(kcs_bmc, ERROR_STATE);
>>>>>> +
>>>>>> +        /* Read the Dummy byte */
>>>>>> +        read_data(kcs_bmc);
>>>>>> +
>>>>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>> +        break;
>>>>>> +    }
>>>>>> +}
>>>>>> +
>>>>>> +static void kcs_bmc_handle_command(struct kcs_bmc *kcs_bmc)
>>>>>> +{
>>>>>> +    u8 cmd;
>>>>>> +
>>>>>> +    set_state(kcs_bmc, WRITE_STATE);
>>>>>> +
>>>>>> +    /* Dummy data to generate OBF */
>>>>>> +    write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>> +
>>>>>> +    cmd = read_data(kcs_bmc);
>>>>>
>>>>> Shouldn't you check the phase in all the cases below and do error
>>>>> handling if the phase isn't correct?
>>>>>
>>>>> Similar thing if the device here isn't open.  You need to handle
>>>>> that gracefully.
>>>>>
>>>>> Also, you should remove data_in_avail and data_in_idx setting from
>>>>> here, for reasons I will explain later.
>>>>>
>>>> If host software sends the data twice such as a retry before the 
>>>> BMC's IPMI service starts,
>>>> then the two IPMI requests will be merged into one, if not clear 
>>>> data_in_idx after receving
>>>> KCS_CMD_WRITE_START. Most of the states are driven by host software 
>>>> (SMS). :(
>>>
>>> True, but what if the host issues WRITE_START or a WRITE_END while 
>>> this driver is in read
>>> state?  The spec is unclear on this, but it really only makes sense 
>>> for the host to issue
>>> WRITE_START in idle stat and WRITE_END in write state.  IMHO it 
>>> should go to error
>>> state.  You might make the case that a WRITE_START anywhere restarts 
>>> the transaction,
>>> but the feel of the error state machine kind of goes against that. 
>>> WRITE_END is definitely
>>> wrong anywhere but write state.
>>>
>>> I just found the following in the spec (section 9.12):
>>>
>>>    Thus, since the interface will allow a command transfer to be
>>>    started or restarted
>>>    at any time when the input buffer is empty, software could elect to
>>>    simply retry
>>>    the command upon detecting an error condition, or issue a ‘known 
>>> good’
>>>    command in order to clear ERROR_STATE
>>>
>>> So a WRITE_START anywhere is ok.  A WRITE_END in the wrong state 
>>> should probably
>>> still go to error state.  This means the user needs to be able to 
>>> handle a write error at
>>> any time.  It also means it's very important to make sure the user 
>>> does a read before
>>> doing a write.  If the host re-issues a WRITE_START and writes a new 
>>> command
>>> between the time the use reads the data and writes the response, the 
>>> response would
>>> be for the wrong command.
>>>
>>>>>> +    switch (cmd) {
>>>>>> +    case KCS_CMD_WRITE_START:
>>>>>> +        kcs_bmc->data_in_avail = false;
>>>>>> +        kcs_bmc->data_in_idx   = 0;
>>>>>> +        kcs_bmc->phase         = KCS_PHASE_WRITE;
>>>>>> +        kcs_bmc->error         = KCS_NO_ERROR;
>>>>>> +        break;
>>>>>> +
>>>>>> +    case KCS_CMD_WRITE_END:
>>>>>> +        kcs_bmc->phase = KCS_PHASE_WRITE_END;
>>>>>> +        break;
>>>>>> +
>>>>>> +    case KCS_CMD_ABORT:
>>>>>> +        if (kcs_bmc->error == KCS_NO_ERROR)
>>>>>> +            kcs_bmc->error = KCS_ABORTED_BY_COMMAND;
>>>>>> +
>>>>>> +        kcs_bmc->phase = KCS_PHASE_ABORT_ERROR1;
>>>>>> +        break;
>>>>>> +
>>>>>> +    default:
>>>>>> +        kcs_bmc->error = KCS_ILLEGAL_CONTROL_CODE;
>>>>>> +        set_state(kcs_bmc, ERROR_STATE);
>>>>>> +        write_data(kcs_bmc, kcs_bmc->error);
>>>>>> +        kcs_bmc->phase = KCS_PHASE_ERROR;
>>>>>> +        break;
>>>>>> +    }
>>>>>> +}
>>>>>> +
>>>>>> +int kcs_bmc_handle_event(struct kcs_bmc *kcs_bmc)
>>>>>> +{
>>>>>> +    unsigned long flags;
>>>>>> +    int ret = 0;
>>>>>> +    u8 status;
>>>>>> +
>>>>>> +    spin_lock_irqsave(&kcs_bmc->lock, flags);
>>>>>> +
>>>>>> +    status = read_status(kcs_bmc) & (KCS_STATUS_IBF | 
>>>>>> KCS_STATUS_CMD_DAT);
>>>>>> +
>>>>>> +    switch (status) {
>>>>>> +    case KCS_STATUS_IBF | KCS_STATUS_CMD_DAT:
>>>>>> +        kcs_bmc_handle_command(kcs_bmc);
>>>>>> +        break;
>>>>>> +
>>>>>> +    case KCS_STATUS_IBF:
>>>>>> +        kcs_bmc_handle_data(kcs_bmc);
>>>>>> +        break;
>>>>>> +
>>>>>> +    default:
>>>>>> +        ret = -1;
>>>>>> +        break;
>>>>>> +    }
>>>>>> +
>>>>>> +    spin_unlock_irqrestore(&kcs_bmc->lock, flags);
>>>>>> +
>>>>>> +    return ret;
>>>>>> +}
>>>>>> +EXPORT_SYMBOL(kcs_bmc_handle_event);
>>>>>> +
>>>>>> +static inline struct kcs_bmc *file_kcs_bmc(struct file *filp)
>>>>>> +{
>>>>>> +    return container_of(filp->private_data, struct kcs_bmc, 
>>>>>> miscdev);
>>>>>> +}
>>>>>> +
>>>>>> +static int kcs_bmc_open(struct inode *inode, struct file *filp)
>>>>>> +{
>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>> +    int ret = 0;
>>>>>> +
>>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>>> +
>>>>>> +    if (!kcs_bmc->running) {
>>>>>> +        kcs_bmc->running       = 1;
>>>>>> +        kcs_bmc->phase         = KCS_PHASE_IDLE;
>>>>>> +        kcs_bmc->data_in_avail = false;
>>>>>
>>>>> If you do everything right, setting the phase and data_in_avail 
>>>>> should not
>>>>> be necessary here.
>>>>>
>>>>>> +    } else {
>>>>>> +        ret = -EBUSY;
>>>>>> +    }
>>>>>> +
>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>> +
>>>>>> +    return ret;
>>>>>> +}
>>>>>> +
>>>>>> +static unsigned int kcs_bmc_poll(struct file *filp, poll_table 
>>>>>> *wait)
>>>>>> +{
>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>> +    unsigned int mask = 0;
>>>>>> +
>>>>>> +    poll_wait(filp, &kcs_bmc->queue, wait);
>>>>>> +
>>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>>> +
>>>>>> +    if (kcs_bmc->data_in_avail)
>>>>>> +        mask |= POLLIN;
>>>>>> +
>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>> +
>>>>>> +    return mask;
>>>>>> +}
>>>>>> +
>>>>>> +static ssize_t kcs_bmc_read(struct file *filp, char *buf,
>>>>>> +                size_t count, loff_t *offset)
>>>>>> +{
>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>> +    ssize_t ret = -EAGAIN;
>>>>>> +
>>>>>
>>>>> This function still has some issues.
>>>>>
>>>>> You can't call copy_to_user() with a spinlock held or interrupts 
>>>>> disabled.
>>>>> To handle readers, you probably need a separate mutex.
>>>>>
>>>>> Also, this function can return -EAGAIN even if O_NONBLOCK is not 
>>>>> set if
>>>>> kcs_bmc->data_in_avail changes between when you wait on the event
>>>>> and when you check it under the lock.
>>>>>
>>>>> You also clear data_in_avail even if the copy_to_user() fails, 
>>>>> which is
>>>>> wrong.
>>>>>
>>>>> I believe the best way to handle this would be to have the spinlock
>>>>> protect the inner workings of the state machine and a mutex handle
>>>>> copying data out, setting/clearing the running flag (thus a mutex
>>>>> instead of spinlock in open and release) and the ioctl settings 
>>>>> (except
>>>>> for abort where you will need to grab the spinlock).
>>>>>
>>>>> After the wait event below, grab the mutex.  If data is not available
>>>>> and O_NONBLOCK is not set, drop the mutex and retry. Otherwise
>>>>> this is the only place (besides release) that sets data_in_avail 
>>>>> to false.
>>>>> Do the copy_to_user(), grab the spinlock, clear data_in_avail and
>>>>> data_in_idx, then release the lock and mutex.  If you are really
>>>>> adventurous you can do this without grabbing the lock using
>>>>> barriers, but it's probably not necessary here.
>>>>>
>>>
>>> With the state machine being able to be restarted at any time, you need
>>> something a little different here.  You still need the mutex to handle
>>> multiple readers and the copy.  I think the function should be 
>>> something
>>> like:
>>>
>> Since KCS is not a multi-reader protocol from BMC's view, you makes 
>> things complex. :-)
>
> No, I don't think you understand.  The primary purpose of the complexity
> here is to protect the driver from the host system (on the other side of
> the KCS interface).  Without this protection, it is possible for the host
> system to start a new write while the user on the BMC side is reading
> data out, resulting in corrupt data being read.
>
> I haven't thought too much about this.  There may be a simpler way,
> but the protection needs to be there.
>
> And you may not think you need to protect the driver against a
> malicious BMC side user code, but you would be wrong.  You can
> only have one opener, but with threads or a fork you can have
> multiple readers.  And you don't know if a malicious piece of
> code has taken over userland.  You always need to protect the
> kernel.
>
Sure, the read/write have protected the critical data area with IRQ, and 
also, these
functions should be thread local safe I believe.

spin_lock_irq(&kcs_bmc->lock);
...
spin_unlock_irq(&kcs_bmc->lock);

>>>    static ssize_t kcs_bmc_read(struct file *filp, char *buf,
>>>                     size_t count, loff_t *offset)
>>>    {
>>>         struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>         ssize_t ret;
>>>         bool avail;
>>>         size_t data_size;
>>>         u8 *data;
>>>
>>>         data = kmalloc(KCS_MSG_BUFSIZ, GFP_KERNEL);
>>>         if (!data)
>>>             return -ENOMEM;
>>>
>>>    retry:
>>>         ret = -EAGAIN;
>>>         if (!(filp->f_flags & O_NONBLOCK))
>>>             wait_event_interruptible(kcs_bmc->queue,
>>>                          kcs_bmc->data_in_avail);
>>>
>>>         mutex_lock(&kcs_bmc->read_mutex);
>>>
>>>         spin_lock_irq(&kcs_bmc->lock);
>>>         avail = kcs_bmc->data_in_avail;
>>>         if (avail) {
>>>             memcpy(data, kcs_bmc->data_in, kcs_bmc->data_in_idx);
>>>             data_size = kcs_bmc->data_in_idx;
>>>         }
>>>         spin_unlock_irq(&kcs_bmc->lock);
>>>
>>>         if (!avail) {
>>>             if (filp->f_flags & O_NONBLOCK)
>>>                 goto out_mutex_unlock;
>>>             mutex_unlock(&kcs_bmc->read_mutex);
>>>             goto retry;
>>>         }
>>>
>>>         if (count < data_size) {
>>>             ret = -EOVERFLOW;
>>>              ? I'm not sure about the error, but userspace needs to 
>>> know.
>>>             goto out_mutex_unlock;
>
> Maybe a length error to the host side here?
>
>>>         }
>>>
>>>         if (!copy_to_user(buf, data, data_size)) {
>>>             ret = -EFAULT;
>>>             goto out_mutex_unlock;
>>>         }
>>>
>>>         ret = data_size;
>>>
>>>         spin_lock_irq(&kcs_bmc->lock);
>>>
>>>         if (kcs_bmc->phase != KCS_PHASE_WRITE_END_DONE)
>>>             /* Something aborted or restarted the state machine. */
>>>             ? Maybe restart if O_NONBLOCK is not set and -EAGAIN if 
>>> it is?
>>>             ret = -EIO;
>>>         } else {
>>>             kcs_bmc->phase = KCS_PHASE_WAIT_READ;
>>>             kcs_bmc->data_in_avail = false;
>>>             kcs_bmc->data_in_idx = 0;
>>>         }
>>>
>>>         spin_unlock_irq(&kcs_bmc->lock);
>>>
>>>    out_mutex_unlock:
>>>         mutex_unlock(&kcs_bmc->read_mutex);
>>>
>>>         kfree(data);
>>>
>>>         return ret;
>>>    }
>>> Note that I added a state, KCS_PHASE_WRITE_END_DONE, which would be
>>> set after the final byte from the host is received.  You want the 
>>> read here
>>> done before you can do the write below to avoid the race I talked 
>>> about.
>>>
>>> There is a local copy made of the data.  What you *never* want to 
>>> happen
>>> here is for the state machine to start processing a new write command
>>> while the data is being copied.  It could result in corrupt data 
>>> being read
>>> and some random operation being done by the BMC.
>>>
>>> If you want to avoid the local copy, it could be done, but it's more 
>>> complex.
>>>
>>>>>> +    if (!(filp->f_flags & O_NONBLOCK))
>>>>>> +        wait_event_interruptible(kcs_bmc->queue,
>>>>>> +                     kcs_bmc->data_in_avail);
>>>>>> +
>>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>>> +
>>>>>> +    if (kcs_bmc->data_in_avail) {
>>>>>> +        kcs_bmc->data_in_avail = false;
>>>>>> +
>>>>>> +        if (count > kcs_bmc->data_in_idx)
>>>>>> +            count = kcs_bmc->data_in_idx;
>>>>>> +
>>>>>> +        if (!copy_to_user(buf, kcs_bmc->data_in, count))
>>>>>> +            ret = count;
>>>>>> +        else
>>>>>> +            ret = -EFAULT;
>>>>>> +    }
>>>>>> +
>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>> +
>>>>>> +    return ret;
>>>>>> +}
>>>>>> +
>>>>>> +static ssize_t kcs_bmc_write(struct file *filp, const char *buf,
>>>>>> +                 size_t count, loff_t *offset)
>>>>>> +{
>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>> +    ssize_t ret = count;
>>>>>> +
>>>>>> +    if (count < 1 || count > KCS_MSG_BUFSIZ)
>>>>>> +        return -EINVAL;
>>>>>> +
>>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>>> +
>>>>>> +    if (kcs_bmc->phase == KCS_PHASE_WAIT_READ) {
>>>>>> +        if (copy_from_user(kcs_bmc->data_out, buf, count)) {
>>>>>> +            spin_unlock_irq(&kcs_bmc->lock);
>>>>>> +            return -EFAULT;
>>>>>> +        }
>>>>>> +
>>>>>> +        kcs_bmc->phase = KCS_PHASE_READ;
>>>>>> +        kcs_bmc->data_out_idx = 1;
>>>>>> +        kcs_bmc->data_out_len = count;
>>>>>> +        write_data(kcs_bmc, kcs_bmc->data_out[0]);
>>>>>> +    } else if (kcs_bmc->phase == KCS_PHASE_READ) {
>>>>>> +        ret = -EBUSY;
>>>>>> +    } else {
>>>>>> +        ret = -EINVAL;
>>>>>
>>>>> Is there a reason you return -EINVAL here?  Why not just -EBUSY in 
>>>>> all
>>>>> cases?  Is there something that userland will need to do differently?
>>>>>
>>>>>> +    }
>>>>>> +
>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>> +
>>>>>> +    return ret;
>>>>>> +}
>>>>>> +
>>>>>> +static long kcs_bmc_ioctl(struct file *filp, unsigned int cmd,
>>>>>> +              unsigned long arg)
>>>>>> +{
>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>> +    long ret = 0;
>>>>>> +
>>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>>> +
>>>>>> +    switch (cmd) {
>>>>>> +    case IPMI_BMC_IOCTL_SET_SMS_ATN:
>>>>>> +        update_status_bits(kcs_bmc, KCS_STATUS_SMS_ATN,
>>>>>> +                        KCS_STATUS_SMS_ATN);
>>>>>> +        break;
>>>>>> +
>>>>>> +    case IPMI_BMC_IOCTL_CLEAR_SMS_ATN:
>>>>>> +        update_status_bits(kcs_bmc, KCS_STATUS_SMS_ATN,
>>>>>> +                        0);
>>>>>> +        break;
>>>>>> +
>>>>>> +    case IPMI_BMC_IOCTL_FORCE_ABORT:
>>>>>> +        set_state(kcs_bmc, ERROR_STATE);
>>>>>> +        read_data(kcs_bmc);
>>>>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>> +
>>>>>> +        kcs_bmc->phase = KCS_PHASE_ERROR;
>>>>>> +        kcs_bmc->data_in_avail = false;
>>>>>> +        break;
>>>>>> +
>>>>>> +    default:
>>>>>> +        ret = -EINVAL;
>>>>>> +        break;
>>>>>> +    }
>>>>>> +
>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>> +
>>>>>> +    return ret;
>>>>>> +}
>>>>>> +
>>>>>> +static int kcs_bmc_release(struct inode *inode, struct file *filp)
>>>>>> +{
>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>> +
>>>>>
>>>>> What happens if the device gets closed in the middle of a 
>>>>> transaction?  That's
>>>>> an important case to handle.  If something is in process, you need 
>>>>> to abort it.
>>>>>
>>>> The device just provides the read & write data, the transaction is 
>>>> handled in the KCS
>>>> controller's IRQ handler.
>>>
>>> From the spec, section 9.14:
>>>
>>>    The BMC must change the status to ERROR_STATE on any condition 
>>> where it
>>>    aborts a command transfer in progress.
>>>
>>> So you need to do something here.
>>>
>> In practice, we do this as spec said in ipmid, NOT in driver, driver 
>> can't handle anything, let's
>> make it simple, thanks!
>
> If ipmid crashes or is killed, how does it accomplish this?
>
Every time ipmids (or kcsd) crashed or killed, it needs start to call 
FORCE_ARBORT firstly, to sync with
host side software.
>>
>> Whenever the BMC is reset (from power-on or a hard reset), the State 
>> Bits are initialized to “11 - Error State”. Doing so
>> allows SMS to detect that the BMC has been reset and that any message 
>> in process has been terminated by the BMC.
>
> Right, that's fine, like it should be.  But we are not talking about a 
> reset.
>
I think the final error handling solution is that kcsd (user land) runs, 
otherwise, the host software side still got stuck. We meet
this kind of issue, so in general, we just doesn't handle some mirror 
errors in driver, then in kcsd, when it can provide the real
IPMI service, it will reset the channel firstly to sync with host side 
software.
> -corey
>
>>>>>> + spin_lock_irq(&kcs_bmc->lock);
>>>>>> +
>>>>>> +    kcs_bmc->running = 0;
>>>>>> +
>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>> +
>>>>>> +    return 0;
>>>>>> +}
>>>>>> +
>>>
>>>
>>
>
Corey Minyard Jan. 31, 2018, 12:52 a.m. | #10
On 01/30/2018 06:02 PM, Wang, Haiyue wrote:
>
>
> On 2018-01-30 21:49, Corey Minyard wrote:
>> On 01/29/2018 07:57 AM, Wang, Haiyue wrote:
>>>
>>>
>>> On 2018-01-26 22:48, Corey Minyard wrote:
>>>> On 01/26/2018 12:08 AM, Wang, Haiyue wrote:
>>>>>
>>>>>
>>>>> On 2018-01-25 01:48, Corey Minyard wrote:
>>>>>> On 01/24/2018 10:06 AM, Haiyue Wang wrote:
>>>>>>> The KCS (Keyboard Controller Style) interface is used to perform 
>>>>>>> in-band
>>>>>>> IPMI communication between a server host and its BMC (BaseBoard 
>>>>>>> Management
>>>>>>> Controllers).
>>>>>>>
>>>>>>> This driver exposes the KCS interface on ASpeed SOCs (AST2400 
>>>>>>> and AST2500)
>>>>>>> as a character device. Such SOCs are commonly used as BMCs and 
>>>>>>> this driver
>>>>>>> implements the BMC side of the KCS interface.
>>>>>>>
>>>>>>> Signed-off-by: Haiyue Wang <haiyue.wang@linux.intel.com>
>>>>>>>
>>>>>>> ---
>>>>>>> v1->v2
>>>>>>>
>>>>>>> - Divide the driver into two parts, one handles the BMC KCS IPMI 
>>>>>>> 2.0 state;
>>>>>>>    the other handles the BMC KCS controller such as AST2500 IO 
>>>>>>> accessing.
>>>>>>> - Use the spin lock APIs to handle the device file operations 
>>>>>>> and BMC chip
>>>>>>>    IRQ inferface for accessing the same KCS BMC data structure.
>>>>>>> - Enhanced the phases handling of the KCS BMC.
>>>>>>> - Unified the IOCTL definition for IPMI BMC, it will be used by 
>>>>>>> KCS and BT.
>>>>>>>
>>>>>>> ---
>>>>>>>
>>>>>>> +
>>>>>>> +static void kcs_bmc_handle_data(struct kcs_bmc *kcs_bmc)
>>>>>>> +{
>>>>>>> +    u8 data;
>>>>>>> +
>>>>>>> +    switch (kcs_bmc->phase) {
>>>>>>> +    case KCS_PHASE_WRITE:
>>>>>>> +        set_state(kcs_bmc, WRITE_STATE);
>>>>>>> +
>>>>>>> +        /* set OBF before reading data */
>>>>>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>> +
>>>>>>> +        if (kcs_bmc->data_in_idx < KCS_MSG_BUFSIZ)
>>>>>>> + kcs_bmc->data_in[kcs_bmc->data_in_idx++] =
>>>>>>> +                        read_data(kcs_bmc);
>>>>
>>>> I missed this earlier, you need to issue a length error if the data 
>>>> is too large.
>>>>
>>>>>>> +        break;
>>>>>>> +
>>>>>>> +    case KCS_PHASE_WRITE_END:
>>>>>>> +        set_state(kcs_bmc, READ_STATE);
>>>>>>> +
>>>>>>> +        if (kcs_bmc->data_in_idx < KCS_MSG_BUFSIZ)
>>>>>>> + kcs_bmc->data_in[kcs_bmc->data_in_idx++] =
>>>>>>> +                        read_data(kcs_bmc);
>>>>>>> +
>>>>>>> +        kcs_bmc->phase = KCS_PHASE_WAIT_READ;
>>>>>>> +        if (kcs_bmc->running) {
>>>>>>
>>>>>> Why do you only do this when running is set?  It won't hurt 
>>>>>> anything if it's not
>>>>>> set.  As it is, you have a race if something opens the device 
>>>>>> while this code
>>>>>> runs.
>>>>>>
>>>>>> Also, don't set the state to wait read until the "write" has 
>>>>>> finished (userland has
>>>>>> read the data out of the buffer.  More on that later.
>>>>>>
>>>>> Understood.
>>>>>>> + kcs_bmc->data_in_avail = true;
>>>>>>> + wake_up_interruptible(&kcs_bmc->queue);
>>>>>>> +        }
>>>>>>> +        break;
>>>>>>> +
>>>>>>> +    case KCS_PHASE_READ:
>>>>>>> +        if (kcs_bmc->data_out_idx == kcs_bmc->data_out_len)
>>>>>>> +            set_state(kcs_bmc, IDLE_STATE);
>>>>>>> +
>>>>>>> +        data = read_data(kcs_bmc);
>>>>>>> +        if (data != KCS_CMD_READ_BYTE) {
>>>>>>> +            set_state(kcs_bmc, ERROR_STATE);
>>>>>>> +            write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>> +            break;
>>>>>>> +        }
>>>>>>> +
>>>>>>> +        if (kcs_bmc->data_out_idx == kcs_bmc->data_out_len) {
>>>>>>> +            write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>> +            kcs_bmc->phase = KCS_PHASE_IDLE;
>>>>>>> +            break;
>>>>>>> +        }
>>>>>>> +
>>>>>>> +        write_data(kcs_bmc,
>>>>>>> + kcs_bmc->data_out[kcs_bmc->data_out_idx++]);
>>>>>>> +        break;
>>>>>>> +
>>>>>>> +    case KCS_PHASE_ABORT_ERROR1:
>>>>>>> +        set_state(kcs_bmc, READ_STATE);
>>>>>>> +
>>>>>>> +        /* Read the Dummy byte */
>>>>>>> +        read_data(kcs_bmc);
>>>>>>> +
>>>>>>> +        write_data(kcs_bmc, kcs_bmc->error);
>>>>>>> +        kcs_bmc->phase = KCS_PHASE_ABORT_ERROR2;
>>>>>>> +        break;
>>>>>>> +
>>>>>>> +    case KCS_PHASE_ABORT_ERROR2:
>>>>>>> +        set_state(kcs_bmc, IDLE_STATE);
>>>>>>> +
>>>>>>> +        /* Read the Dummy byte */
>>>>>>> +        read_data(kcs_bmc);
>>>>>>> +
>>>>>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>> +        kcs_bmc->phase = KCS_PHASE_IDLE;
>>>>>>> +
>>>>>>> +        break;
>>>>>>> +
>>>>>>> +    default:
>>>>>>> +        set_state(kcs_bmc, ERROR_STATE);
>>>>>>> +
>>>>>>> +        /* Read the Dummy byte */
>>>>>>> +        read_data(kcs_bmc);
>>>>>>> +
>>>>>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>> +        break;
>>>>>>> +    }
>>>>>>> +}
>>>>>>> +
>>>>>>> +static void kcs_bmc_handle_command(struct kcs_bmc *kcs_bmc)
>>>>>>> +{
>>>>>>> +    u8 cmd;
>>>>>>> +
>>>>>>> +    set_state(kcs_bmc, WRITE_STATE);
>>>>>>> +
>>>>>>> +    /* Dummy data to generate OBF */
>>>>>>> +    write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>> +
>>>>>>> +    cmd = read_data(kcs_bmc);
>>>>>>
>>>>>> Shouldn't you check the phase in all the cases below and do error
>>>>>> handling if the phase isn't correct?
>>>>>>
>>>>>> Similar thing if the device here isn't open.  You need to handle
>>>>>> that gracefully.
>>>>>>
>>>>>> Also, you should remove data_in_avail and data_in_idx setting from
>>>>>> here, for reasons I will explain later.
>>>>>>
>>>>> If host software sends the data twice such as a retry before the 
>>>>> BMC's IPMI service starts,
>>>>> then the two IPMI requests will be merged into one, if not clear 
>>>>> data_in_idx after receving
>>>>> KCS_CMD_WRITE_START. Most of the states are driven by host 
>>>>> software (SMS). :(
>>>>
>>>> True, but what if the host issues WRITE_START or a WRITE_END while 
>>>> this driver is in read
>>>> state?  The spec is unclear on this, but it really only makes sense 
>>>> for the host to issue
>>>> WRITE_START in idle stat and WRITE_END in write state.  IMHO it 
>>>> should go to error
>>>> state.  You might make the case that a WRITE_START anywhere 
>>>> restarts the transaction,
>>>> but the feel of the error state machine kind of goes against that. 
>>>> WRITE_END is definitely
>>>> wrong anywhere but write state.
>>>>
>>>> I just found the following in the spec (section 9.12):
>>>>
>>>>    Thus, since the interface will allow a command transfer to be
>>>>    started or restarted
>>>>    at any time when the input buffer is empty, software could elect to
>>>>    simply retry
>>>>    the command upon detecting an error condition, or issue a ‘known 
>>>> good’
>>>>    command in order to clear ERROR_STATE
>>>>
>>>> So a WRITE_START anywhere is ok.  A WRITE_END in the wrong state 
>>>> should probably
>>>> still go to error state.  This means the user needs to be able to 
>>>> handle a write error at
>>>> any time.  It also means it's very important to make sure the user 
>>>> does a read before
>>>> doing a write.  If the host re-issues a WRITE_START and writes a 
>>>> new command
>>>> between the time the use reads the data and writes the response, 
>>>> the response would
>>>> be for the wrong command.
>>>>
>>>>>>> +    switch (cmd) {
>>>>>>> +    case KCS_CMD_WRITE_START:
>>>>>>> +        kcs_bmc->data_in_avail = false;
>>>>>>> +        kcs_bmc->data_in_idx   = 0;
>>>>>>> +        kcs_bmc->phase         = KCS_PHASE_WRITE;
>>>>>>> +        kcs_bmc->error         = KCS_NO_ERROR;
>>>>>>> +        break;
>>>>>>> +
>>>>>>> +    case KCS_CMD_WRITE_END:
>>>>>>> +        kcs_bmc->phase = KCS_PHASE_WRITE_END;
>>>>>>> +        break;
>>>>>>> +
>>>>>>> +    case KCS_CMD_ABORT:
>>>>>>> +        if (kcs_bmc->error == KCS_NO_ERROR)
>>>>>>> +            kcs_bmc->error = KCS_ABORTED_BY_COMMAND;
>>>>>>> +
>>>>>>> +        kcs_bmc->phase = KCS_PHASE_ABORT_ERROR1;
>>>>>>> +        break;
>>>>>>> +
>>>>>>> +    default:
>>>>>>> +        kcs_bmc->error = KCS_ILLEGAL_CONTROL_CODE;
>>>>>>> +        set_state(kcs_bmc, ERROR_STATE);
>>>>>>> +        write_data(kcs_bmc, kcs_bmc->error);
>>>>>>> +        kcs_bmc->phase = KCS_PHASE_ERROR;
>>>>>>> +        break;
>>>>>>> +    }
>>>>>>> +}
>>>>>>> +
>>>>>>> +int kcs_bmc_handle_event(struct kcs_bmc *kcs_bmc)
>>>>>>> +{
>>>>>>> +    unsigned long flags;
>>>>>>> +    int ret = 0;
>>>>>>> +    u8 status;
>>>>>>> +
>>>>>>> +    spin_lock_irqsave(&kcs_bmc->lock, flags);
>>>>>>> +
>>>>>>> +    status = read_status(kcs_bmc) & (KCS_STATUS_IBF | 
>>>>>>> KCS_STATUS_CMD_DAT);
>>>>>>> +
>>>>>>> +    switch (status) {
>>>>>>> +    case KCS_STATUS_IBF | KCS_STATUS_CMD_DAT:
>>>>>>> +        kcs_bmc_handle_command(kcs_bmc);
>>>>>>> +        break;
>>>>>>> +
>>>>>>> +    case KCS_STATUS_IBF:
>>>>>>> +        kcs_bmc_handle_data(kcs_bmc);
>>>>>>> +        break;
>>>>>>> +
>>>>>>> +    default:
>>>>>>> +        ret = -1;
>>>>>>> +        break;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    spin_unlock_irqrestore(&kcs_bmc->lock, flags);
>>>>>>> +
>>>>>>> +    return ret;
>>>>>>> +}
>>>>>>> +EXPORT_SYMBOL(kcs_bmc_handle_event);
>>>>>>> +
>>>>>>> +static inline struct kcs_bmc *file_kcs_bmc(struct file *filp)
>>>>>>> +{
>>>>>>> +    return container_of(filp->private_data, struct kcs_bmc, 
>>>>>>> miscdev);
>>>>>>> +}
>>>>>>> +
>>>>>>> +static int kcs_bmc_open(struct inode *inode, struct file *filp)
>>>>>>> +{
>>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>> +    int ret = 0;
>>>>>>> +
>>>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>>>> +
>>>>>>> +    if (!kcs_bmc->running) {
>>>>>>> +        kcs_bmc->running       = 1;
>>>>>>> +        kcs_bmc->phase         = KCS_PHASE_IDLE;
>>>>>>> +        kcs_bmc->data_in_avail = false;
>>>>>>
>>>>>> If you do everything right, setting the phase and data_in_avail 
>>>>>> should not
>>>>>> be necessary here.
>>>>>>
>>>>>>> +    } else {
>>>>>>> +        ret = -EBUSY;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>>> +
>>>>>>> +    return ret;
>>>>>>> +}
>>>>>>> +
>>>>>>> +static unsigned int kcs_bmc_poll(struct file *filp, poll_table 
>>>>>>> *wait)
>>>>>>> +{
>>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>> +    unsigned int mask = 0;
>>>>>>> +
>>>>>>> +    poll_wait(filp, &kcs_bmc->queue, wait);
>>>>>>> +
>>>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>>>> +
>>>>>>> +    if (kcs_bmc->data_in_avail)
>>>>>>> +        mask |= POLLIN;
>>>>>>> +
>>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>>> +
>>>>>>> +    return mask;
>>>>>>> +}
>>>>>>> +
>>>>>>> +static ssize_t kcs_bmc_read(struct file *filp, char *buf,
>>>>>>> +                size_t count, loff_t *offset)
>>>>>>> +{
>>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>> +    ssize_t ret = -EAGAIN;
>>>>>>> +
>>>>>>
>>>>>> This function still has some issues.
>>>>>>
>>>>>> You can't call copy_to_user() with a spinlock held or interrupts 
>>>>>> disabled.
>>>>>> To handle readers, you probably need a separate mutex.
>>>>>>
>>>>>> Also, this function can return -EAGAIN even if O_NONBLOCK is not 
>>>>>> set if
>>>>>> kcs_bmc->data_in_avail changes between when you wait on the event
>>>>>> and when you check it under the lock.
>>>>>>
>>>>>> You also clear data_in_avail even if the copy_to_user() fails, 
>>>>>> which is
>>>>>> wrong.
>>>>>>
>>>>>> I believe the best way to handle this would be to have the spinlock
>>>>>> protect the inner workings of the state machine and a mutex handle
>>>>>> copying data out, setting/clearing the running flag (thus a mutex
>>>>>> instead of spinlock in open and release) and the ioctl settings 
>>>>>> (except
>>>>>> for abort where you will need to grab the spinlock).
>>>>>>
>>>>>> After the wait event below, grab the mutex.  If data is not 
>>>>>> available
>>>>>> and O_NONBLOCK is not set, drop the mutex and retry. Otherwise
>>>>>> this is the only place (besides release) that sets data_in_avail 
>>>>>> to false.
>>>>>> Do the copy_to_user(), grab the spinlock, clear data_in_avail and
>>>>>> data_in_idx, then release the lock and mutex.  If you are really
>>>>>> adventurous you can do this without grabbing the lock using
>>>>>> barriers, but it's probably not necessary here.
>>>>>>
>>>>
>>>> With the state machine being able to be restarted at any time, you 
>>>> need
>>>> something a little different here.  You still need the mutex to handle
>>>> multiple readers and the copy.  I think the function should be 
>>>> something
>>>> like:
>>>>
>>> Since KCS is not a multi-reader protocol from BMC's view, you makes 
>>> things complex. :-)
>>
>> No, I don't think you understand.  The primary purpose of the complexity
>> here is to protect the driver from the host system (on the other side of
>> the KCS interface).  Without this protection, it is possible for the 
>> host
>> system to start a new write while the user on the BMC side is reading
>> data out, resulting in corrupt data being read.
>>
>> I haven't thought too much about this.  There may be a simpler way,
>> but the protection needs to be there.
>>
>> And you may not think you need to protect the driver against a
>> malicious BMC side user code, but you would be wrong.  You can
>> only have one opener, but with threads or a fork you can have
>> multiple readers.  And you don't know if a malicious piece of
>> code has taken over userland.  You always need to protect the
>> kernel.
>>
> Sure, the read/write have protected the critical data area with IRQ, 
> and also, these
> functions should be thread local safe I believe.
>
> spin_lock_irq(&kcs_bmc->lock);
> ...
> spin_unlock_irq(&kcs_bmc->lock);
>

But remember, you can't call copy_to_user() when IRQs are off or when 
you are holding
a spinlock.  That is an absolute no.  It can crash the kernel.

So you need a design that takes this into account, but will not result 
in the possibility
of bad data being read.

>>>>    static ssize_t kcs_bmc_read(struct file *filp, char *buf,
>>>>                     size_t count, loff_t *offset)
>>>>    {
>>>>         struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>         ssize_t ret;
>>>>         bool avail;
>>>>         size_t data_size;
>>>>         u8 *data;
>>>>
>>>>         data = kmalloc(KCS_MSG_BUFSIZ, GFP_KERNEL);
>>>>         if (!data)
>>>>             return -ENOMEM;
>>>>
>>>>    retry:
>>>>         ret = -EAGAIN;
>>>>         if (!(filp->f_flags & O_NONBLOCK))
>>>>             wait_event_interruptible(kcs_bmc->queue,
>>>>                          kcs_bmc->data_in_avail);
>>>>
>>>>         mutex_lock(&kcs_bmc->read_mutex);
>>>>
>>>>         spin_lock_irq(&kcs_bmc->lock);
>>>>         avail = kcs_bmc->data_in_avail;
>>>>         if (avail) {
>>>>             memcpy(data, kcs_bmc->data_in, kcs_bmc->data_in_idx);
>>>>             data_size = kcs_bmc->data_in_idx;
>>>>         }
>>>>         spin_unlock_irq(&kcs_bmc->lock);
>>>>
>>>>         if (!avail) {
>>>>             if (filp->f_flags & O_NONBLOCK)
>>>>                 goto out_mutex_unlock;
>>>>             mutex_unlock(&kcs_bmc->read_mutex);
>>>>             goto retry;
>>>>         }
>>>>
>>>>         if (count < data_size) {
>>>>             ret = -EOVERFLOW;
>>>>              ? I'm not sure about the error, but userspace needs to 
>>>> know.
>>>>             goto out_mutex_unlock;
>>
>> Maybe a length error to the host side here?

You didn't comment on this or the other length error.  That needs to be
handled.

>>
>>>>         }
>>>>
>>>>         if (!copy_to_user(buf, data, data_size)) {
>>>>             ret = -EFAULT;
>>>>             goto out_mutex_unlock;
>>>>         }
>>>>
>>>>         ret = data_size;
>>>>
>>>>         spin_lock_irq(&kcs_bmc->lock);
>>>>
>>>>         if (kcs_bmc->phase != KCS_PHASE_WRITE_END_DONE)
>>>>             /* Something aborted or restarted the state machine. */
>>>>             ? Maybe restart if O_NONBLOCK is not set and -EAGAIN if 
>>>> it is?
>>>>             ret = -EIO;
>>>>         } else {
>>>>             kcs_bmc->phase = KCS_PHASE_WAIT_READ;
>>>>             kcs_bmc->data_in_avail = false;
>>>>             kcs_bmc->data_in_idx = 0;
>>>>         }
>>>>
>>>>         spin_unlock_irq(&kcs_bmc->lock);
>>>>
>>>>    out_mutex_unlock:
>>>>         mutex_unlock(&kcs_bmc->read_mutex);
>>>>
>>>>         kfree(data);
>>>>
>>>>         return ret;
>>>>    }
>>>> Note that I added a state, KCS_PHASE_WRITE_END_DONE, which would be
>>>> set after the final byte from the host is received.  You want the 
>>>> read here
>>>> done before you can do the write below to avoid the race I talked 
>>>> about.
>>>>
>>>> There is a local copy made of the data.  What you *never* want to 
>>>> happen
>>>> here is for the state machine to start processing a new write command
>>>> while the data is being copied.  It could result in corrupt data 
>>>> being read
>>>> and some random operation being done by the BMC.
>>>>
>>>> If you want to avoid the local copy, it could be done, but it's 
>>>> more complex.
>>>>
>>>>>>> +    if (!(filp->f_flags & O_NONBLOCK))
>>>>>>> +        wait_event_interruptible(kcs_bmc->queue,
>>>>>>> +                     kcs_bmc->data_in_avail);
>>>>>>> +
>>>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>>>> +
>>>>>>> +    if (kcs_bmc->data_in_avail) {
>>>>>>> +        kcs_bmc->data_in_avail = false;
>>>>>>> +
>>>>>>> +        if (count > kcs_bmc->data_in_idx)
>>>>>>> +            count = kcs_bmc->data_in_idx;
>>>>>>> +
>>>>>>> +        if (!copy_to_user(buf, kcs_bmc->data_in, count))
>>>>>>> +            ret = count;
>>>>>>> +        else
>>>>>>> +            ret = -EFAULT;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>>> +
>>>>>>> +    return ret;
>>>>>>> +}
>>>>>>> +
>>>>>>> +static ssize_t kcs_bmc_write(struct file *filp, const char *buf,
>>>>>>> +                 size_t count, loff_t *offset)
>>>>>>> +{
>>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>> +    ssize_t ret = count;
>>>>>>> +
>>>>>>> +    if (count < 1 || count > KCS_MSG_BUFSIZ)
>>>>>>> +        return -EINVAL;
>>>>>>> +
>>>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>>>> +
>>>>>>> +    if (kcs_bmc->phase == KCS_PHASE_WAIT_READ) {
>>>>>>> +        if (copy_from_user(kcs_bmc->data_out, buf, count)) {
>>>>>>> +            spin_unlock_irq(&kcs_bmc->lock);
>>>>>>> +            return -EFAULT;
>>>>>>> +        }
>>>>>>> +
>>>>>>> +        kcs_bmc->phase = KCS_PHASE_READ;
>>>>>>> +        kcs_bmc->data_out_idx = 1;
>>>>>>> +        kcs_bmc->data_out_len = count;
>>>>>>> +        write_data(kcs_bmc, kcs_bmc->data_out[0]);
>>>>>>> +    } else if (kcs_bmc->phase == KCS_PHASE_READ) {
>>>>>>> +        ret = -EBUSY;
>>>>>>> +    } else {
>>>>>>> +        ret = -EINVAL;
>>>>>>
>>>>>> Is there a reason you return -EINVAL here?  Why not just -EBUSY 
>>>>>> in all
>>>>>> cases?  Is there something that userland will need to do 
>>>>>> differently?
>>>>>>
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>>> +
>>>>>>> +    return ret;
>>>>>>> +}
>>>>>>> +
>>>>>>> +static long kcs_bmc_ioctl(struct file *filp, unsigned int cmd,
>>>>>>> +              unsigned long arg)
>>>>>>> +{
>>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>> +    long ret = 0;
>>>>>>> +
>>>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>>>> +
>>>>>>> +    switch (cmd) {
>>>>>>> +    case IPMI_BMC_IOCTL_SET_SMS_ATN:
>>>>>>> +        update_status_bits(kcs_bmc, KCS_STATUS_SMS_ATN,
>>>>>>> +                        KCS_STATUS_SMS_ATN);
>>>>>>> +        break;
>>>>>>> +
>>>>>>> +    case IPMI_BMC_IOCTL_CLEAR_SMS_ATN:
>>>>>>> +        update_status_bits(kcs_bmc, KCS_STATUS_SMS_ATN,
>>>>>>> +                        0);
>>>>>>> +        break;
>>>>>>> +
>>>>>>> +    case IPMI_BMC_IOCTL_FORCE_ABORT:
>>>>>>> +        set_state(kcs_bmc, ERROR_STATE);
>>>>>>> +        read_data(kcs_bmc);
>>>>>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>> +
>>>>>>> +        kcs_bmc->phase = KCS_PHASE_ERROR;
>>>>>>> +        kcs_bmc->data_in_avail = false;
>>>>>>> +        break;
>>>>>>> +
>>>>>>> +    default:
>>>>>>> +        ret = -EINVAL;
>>>>>>> +        break;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>>> +
>>>>>>> +    return ret;
>>>>>>> +}
>>>>>>> +
>>>>>>> +static int kcs_bmc_release(struct inode *inode, struct file *filp)
>>>>>>> +{
>>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>> +
>>>>>>
>>>>>> What happens if the device gets closed in the middle of a 
>>>>>> transaction?  That's
>>>>>> an important case to handle.  If something is in process, you 
>>>>>> need to abort it.
>>>>>>
>>>>> The device just provides the read & write data, the transaction is 
>>>>> handled in the KCS
>>>>> controller's IRQ handler.
>>>>
>>>> From the spec, section 9.14:
>>>>
>>>>    The BMC must change the status to ERROR_STATE on any condition 
>>>> where it
>>>>    aborts a command transfer in progress.
>>>>
>>>> So you need to do something here.
>>>>
>>> In practice, we do this as spec said in ipmid, NOT in driver, driver 
>>> can't handle anything, let's
>>> make it simple, thanks!
>>
>> If ipmid crashes or is killed, how does it accomplish this?
>>
> Every time ipmids (or kcsd) crashed or killed, it needs start to call 
> FORCE_ARBORT firstly, to sync with
> host side software.
>>>
>>> Whenever the BMC is reset (from power-on or a hard reset), the State 
>>> Bits are initialized to “11 - Error State”. Doing so
>>> allows SMS to detect that the BMC has been reset and that any 
>>> message in process has been terminated by the BMC.
>>
>> Right, that's fine, like it should be.  But we are not talking about 
>> a reset.
>>
> I think the final error handling solution is that kcsd (user land) 
> runs, otherwise, the host software side still got stuck. We meet
> this kind of issue, so in general, we just doesn't handle some mirror 
> errors in driver, then in kcsd, when it can provide the real
> IPMI service, it will reset the channel firstly to sync with host side 
> software.

"Userland will do the right thing" is not very convincing to a kernel 
developer.

Plus if the above is true, I would think that you would just want to 
hold the device
in an error state when it wasn't opened.

-corey

>> -corey
>>
>>>>>>> + spin_lock_irq(&kcs_bmc->lock);
>>>>>>> +
>>>>>>> +    kcs_bmc->running = 0;
>>>>>>> +
>>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>>> +
>>>>>>> +    return 0;
>>>>>>> +}
>>>>>>> +
>>>>
>>>>
>>>
>>
>
Wang, Haiyue Jan. 31, 2018, 1:02 a.m. | #11
On 2018-01-31 08:52, Corey Minyard wrote:
> On 01/30/2018 06:02 PM, Wang, Haiyue wrote:
>>
>>
>> On 2018-01-30 21:49, Corey Minyard wrote:
>>> On 01/29/2018 07:57 AM, Wang, Haiyue wrote:
>>>>
>>>>
>>>> On 2018-01-26 22:48, Corey Minyard wrote:
>>>>> On 01/26/2018 12:08 AM, Wang, Haiyue wrote:
>>>>>>
>>>>>>
>>>>>> On 2018-01-25 01:48, Corey Minyard wrote:
>>>>>>> On 01/24/2018 10:06 AM, Haiyue Wang wrote:
>>>>>>>> The KCS (Keyboard Controller Style) interface is used to 
>>>>>>>> perform in-band
>>>>>>>> IPMI communication between a server host and its BMC (BaseBoard 
>>>>>>>> Management
>>>>>>>> Controllers).
>>>>>>>>
>>>>>>>> This driver exposes the KCS interface on ASpeed SOCs (AST2400 
>>>>>>>> and AST2500)
>>>>>>>> as a character device. Such SOCs are commonly used as BMCs and 
>>>>>>>> this driver
>>>>>>>> implements the BMC side of the KCS interface.
>>>>>>>>
>>>>>>>> Signed-off-by: Haiyue Wang <haiyue.wang@linux.intel.com>
>>>>>>>>
>>>>>>>> ---
>>>>>>>> v1->v2
>>>>>>>>
>>>>>>>> - Divide the driver into two parts, one handles the BMC KCS 
>>>>>>>> IPMI 2.0 state;
>>>>>>>>    the other handles the BMC KCS controller such as AST2500 IO 
>>>>>>>> accessing.
>>>>>>>> - Use the spin lock APIs to handle the device file operations 
>>>>>>>> and BMC chip
>>>>>>>>    IRQ inferface for accessing the same KCS BMC data structure.
>>>>>>>> - Enhanced the phases handling of the KCS BMC.
>>>>>>>> - Unified the IOCTL definition for IPMI BMC, it will be used by 
>>>>>>>> KCS and BT.
>>>>>>>>
>>>>>>>> ---
>>>>>>>>
>>>>>>>> +
>>>>>>>> +static void kcs_bmc_handle_data(struct kcs_bmc *kcs_bmc)
>>>>>>>> +{
>>>>>>>> +    u8 data;
>>>>>>>> +
>>>>>>>> +    switch (kcs_bmc->phase) {
>>>>>>>> +    case KCS_PHASE_WRITE:
>>>>>>>> +        set_state(kcs_bmc, WRITE_STATE);
>>>>>>>> +
>>>>>>>> +        /* set OBF before reading data */
>>>>>>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>>> +
>>>>>>>> +        if (kcs_bmc->data_in_idx < KCS_MSG_BUFSIZ)
>>>>>>>> + kcs_bmc->data_in[kcs_bmc->data_in_idx++] =
>>>>>>>> +                        read_data(kcs_bmc);
>>>>>
>>>>> I missed this earlier, you need to issue a length error if the 
>>>>> data is too large.
>>>>>
>>>>>>>> +        break;
>>>>>>>> +
>>>>>>>> +    case KCS_PHASE_WRITE_END:
>>>>>>>> +        set_state(kcs_bmc, READ_STATE);
>>>>>>>> +
>>>>>>>> +        if (kcs_bmc->data_in_idx < KCS_MSG_BUFSIZ)
>>>>>>>> + kcs_bmc->data_in[kcs_bmc->data_in_idx++] =
>>>>>>>> +                        read_data(kcs_bmc);
>>>>>>>> +
>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_WAIT_READ;
>>>>>>>> +        if (kcs_bmc->running) {
>>>>>>>
>>>>>>> Why do you only do this when running is set?  It won't hurt 
>>>>>>> anything if it's not
>>>>>>> set.  As it is, you have a race if something opens the device 
>>>>>>> while this code
>>>>>>> runs.
>>>>>>>
>>>>>>> Also, don't set the state to wait read until the "write" has 
>>>>>>> finished (userland has
>>>>>>> read the data out of the buffer.  More on that later.
>>>>>>>
>>>>>> Understood.
>>>>>>>> + kcs_bmc->data_in_avail = true;
>>>>>>>> + wake_up_interruptible(&kcs_bmc->queue);
>>>>>>>> +        }
>>>>>>>> +        break;
>>>>>>>> +
>>>>>>>> +    case KCS_PHASE_READ:
>>>>>>>> +        if (kcs_bmc->data_out_idx == kcs_bmc->data_out_len)
>>>>>>>> +            set_state(kcs_bmc, IDLE_STATE);
>>>>>>>> +
>>>>>>>> +        data = read_data(kcs_bmc);
>>>>>>>> +        if (data != KCS_CMD_READ_BYTE) {
>>>>>>>> +            set_state(kcs_bmc, ERROR_STATE);
>>>>>>>> +            write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>>> +            break;
>>>>>>>> +        }
>>>>>>>> +
>>>>>>>> +        if (kcs_bmc->data_out_idx == kcs_bmc->data_out_len) {
>>>>>>>> +            write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>>> +            kcs_bmc->phase = KCS_PHASE_IDLE;
>>>>>>>> +            break;
>>>>>>>> +        }
>>>>>>>> +
>>>>>>>> +        write_data(kcs_bmc,
>>>>>>>> + kcs_bmc->data_out[kcs_bmc->data_out_idx++]);
>>>>>>>> +        break;
>>>>>>>> +
>>>>>>>> +    case KCS_PHASE_ABORT_ERROR1:
>>>>>>>> +        set_state(kcs_bmc, READ_STATE);
>>>>>>>> +
>>>>>>>> +        /* Read the Dummy byte */
>>>>>>>> +        read_data(kcs_bmc);
>>>>>>>> +
>>>>>>>> +        write_data(kcs_bmc, kcs_bmc->error);
>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_ABORT_ERROR2;
>>>>>>>> +        break;
>>>>>>>> +
>>>>>>>> +    case KCS_PHASE_ABORT_ERROR2:
>>>>>>>> +        set_state(kcs_bmc, IDLE_STATE);
>>>>>>>> +
>>>>>>>> +        /* Read the Dummy byte */
>>>>>>>> +        read_data(kcs_bmc);
>>>>>>>> +
>>>>>>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_IDLE;
>>>>>>>> +
>>>>>>>> +        break;
>>>>>>>> +
>>>>>>>> +    default:
>>>>>>>> +        set_state(kcs_bmc, ERROR_STATE);
>>>>>>>> +
>>>>>>>> +        /* Read the Dummy byte */
>>>>>>>> +        read_data(kcs_bmc);
>>>>>>>> +
>>>>>>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>>> +        break;
>>>>>>>> +    }
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +static void kcs_bmc_handle_command(struct kcs_bmc *kcs_bmc)
>>>>>>>> +{
>>>>>>>> +    u8 cmd;
>>>>>>>> +
>>>>>>>> +    set_state(kcs_bmc, WRITE_STATE);
>>>>>>>> +
>>>>>>>> +    /* Dummy data to generate OBF */
>>>>>>>> +    write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>>> +
>>>>>>>> +    cmd = read_data(kcs_bmc);
>>>>>>>
>>>>>>> Shouldn't you check the phase in all the cases below and do error
>>>>>>> handling if the phase isn't correct?
>>>>>>>
>>>>>>> Similar thing if the device here isn't open.  You need to handle
>>>>>>> that gracefully.
>>>>>>>
>>>>>>> Also, you should remove data_in_avail and data_in_idx setting from
>>>>>>> here, for reasons I will explain later.
>>>>>>>
>>>>>> If host software sends the data twice such as a retry before the 
>>>>>> BMC's IPMI service starts,
>>>>>> then the two IPMI requests will be merged into one, if not clear 
>>>>>> data_in_idx after receving
>>>>>> KCS_CMD_WRITE_START. Most of the states are driven by host 
>>>>>> software (SMS). :(
>>>>>
>>>>> True, but what if the host issues WRITE_START or a WRITE_END while 
>>>>> this driver is in read
>>>>> state?  The spec is unclear on this, but it really only makes 
>>>>> sense for the host to issue
>>>>> WRITE_START in idle stat and WRITE_END in write state. IMHO it 
>>>>> should go to error
>>>>> state.  You might make the case that a WRITE_START anywhere 
>>>>> restarts the transaction,
>>>>> but the feel of the error state machine kind of goes against that. 
>>>>> WRITE_END is definitely
>>>>> wrong anywhere but write state.
>>>>>
>>>>> I just found the following in the spec (section 9.12):
>>>>>
>>>>>    Thus, since the interface will allow a command transfer to be
>>>>>    started or restarted
>>>>>    at any time when the input buffer is empty, software could 
>>>>> elect to
>>>>>    simply retry
>>>>>    the command upon detecting an error condition, or issue a 
>>>>> ‘known good’
>>>>>    command in order to clear ERROR_STATE
>>>>>
>>>>> So a WRITE_START anywhere is ok.  A WRITE_END in the wrong state 
>>>>> should probably
>>>>> still go to error state.  This means the user needs to be able to 
>>>>> handle a write error at
>>>>> any time.  It also means it's very important to make sure the user 
>>>>> does a read before
>>>>> doing a write.  If the host re-issues a WRITE_START and writes a 
>>>>> new command
>>>>> between the time the use reads the data and writes the response, 
>>>>> the response would
>>>>> be for the wrong command.
>>>>>
>>>>>>>> +    switch (cmd) {
>>>>>>>> +    case KCS_CMD_WRITE_START:
>>>>>>>> +        kcs_bmc->data_in_avail = false;
>>>>>>>> +        kcs_bmc->data_in_idx   = 0;
>>>>>>>> +        kcs_bmc->phase         = KCS_PHASE_WRITE;
>>>>>>>> +        kcs_bmc->error         = KCS_NO_ERROR;
>>>>>>>> +        break;
>>>>>>>> +
>>>>>>>> +    case KCS_CMD_WRITE_END:
>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_WRITE_END;
>>>>>>>> +        break;
>>>>>>>> +
>>>>>>>> +    case KCS_CMD_ABORT:
>>>>>>>> +        if (kcs_bmc->error == KCS_NO_ERROR)
>>>>>>>> +            kcs_bmc->error = KCS_ABORTED_BY_COMMAND;
>>>>>>>> +
>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_ABORT_ERROR1;
>>>>>>>> +        break;
>>>>>>>> +
>>>>>>>> +    default:
>>>>>>>> +        kcs_bmc->error = KCS_ILLEGAL_CONTROL_CODE;
>>>>>>>> +        set_state(kcs_bmc, ERROR_STATE);
>>>>>>>> +        write_data(kcs_bmc, kcs_bmc->error);
>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_ERROR;
>>>>>>>> +        break;
>>>>>>>> +    }
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +int kcs_bmc_handle_event(struct kcs_bmc *kcs_bmc)
>>>>>>>> +{
>>>>>>>> +    unsigned long flags;
>>>>>>>> +    int ret = 0;
>>>>>>>> +    u8 status;
>>>>>>>> +
>>>>>>>> +    spin_lock_irqsave(&kcs_bmc->lock, flags);
>>>>>>>> +
>>>>>>>> +    status = read_status(kcs_bmc) & (KCS_STATUS_IBF | 
>>>>>>>> KCS_STATUS_CMD_DAT);
>>>>>>>> +
>>>>>>>> +    switch (status) {
>>>>>>>> +    case KCS_STATUS_IBF | KCS_STATUS_CMD_DAT:
>>>>>>>> +        kcs_bmc_handle_command(kcs_bmc);
>>>>>>>> +        break;
>>>>>>>> +
>>>>>>>> +    case KCS_STATUS_IBF:
>>>>>>>> +        kcs_bmc_handle_data(kcs_bmc);
>>>>>>>> +        break;
>>>>>>>> +
>>>>>>>> +    default:
>>>>>>>> +        ret = -1;
>>>>>>>> +        break;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    spin_unlock_irqrestore(&kcs_bmc->lock, flags);
>>>>>>>> +
>>>>>>>> +    return ret;
>>>>>>>> +}
>>>>>>>> +EXPORT_SYMBOL(kcs_bmc_handle_event);
>>>>>>>> +
>>>>>>>> +static inline struct kcs_bmc *file_kcs_bmc(struct file *filp)
>>>>>>>> +{
>>>>>>>> +    return container_of(filp->private_data, struct kcs_bmc, 
>>>>>>>> miscdev);
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +static int kcs_bmc_open(struct inode *inode, struct file *filp)
>>>>>>>> +{
>>>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>>> +    int ret = 0;
>>>>>>>> +
>>>>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>>>>> +
>>>>>>>> +    if (!kcs_bmc->running) {
>>>>>>>> +        kcs_bmc->running       = 1;
>>>>>>>> +        kcs_bmc->phase         = KCS_PHASE_IDLE;
>>>>>>>> +        kcs_bmc->data_in_avail = false;
>>>>>>>
>>>>>>> If you do everything right, setting the phase and data_in_avail 
>>>>>>> should not
>>>>>>> be necessary here.
>>>>>>>
>>>>>>>> +    } else {
>>>>>>>> +        ret = -EBUSY;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>> +
>>>>>>>> +    return ret;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +static unsigned int kcs_bmc_poll(struct file *filp, poll_table 
>>>>>>>> *wait)
>>>>>>>> +{
>>>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>>> +    unsigned int mask = 0;
>>>>>>>> +
>>>>>>>> +    poll_wait(filp, &kcs_bmc->queue, wait);
>>>>>>>> +
>>>>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>>>>> +
>>>>>>>> +    if (kcs_bmc->data_in_avail)
>>>>>>>> +        mask |= POLLIN;
>>>>>>>> +
>>>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>> +
>>>>>>>> +    return mask;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +static ssize_t kcs_bmc_read(struct file *filp, char *buf,
>>>>>>>> +                size_t count, loff_t *offset)
>>>>>>>> +{
>>>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>>> +    ssize_t ret = -EAGAIN;
>>>>>>>> +
>>>>>>>
>>>>>>> This function still has some issues.
>>>>>>>
>>>>>>> You can't call copy_to_user() with a spinlock held or interrupts 
>>>>>>> disabled.
>>>>>>> To handle readers, you probably need a separate mutex.
>>>>>>>
>>>>>>> Also, this function can return -EAGAIN even if O_NONBLOCK is not 
>>>>>>> set if
>>>>>>> kcs_bmc->data_in_avail changes between when you wait on the event
>>>>>>> and when you check it under the lock.
>>>>>>>
>>>>>>> You also clear data_in_avail even if the copy_to_user() fails, 
>>>>>>> which is
>>>>>>> wrong.
>>>>>>>
>>>>>>> I believe the best way to handle this would be to have the spinlock
>>>>>>> protect the inner workings of the state machine and a mutex handle
>>>>>>> copying data out, setting/clearing the running flag (thus a mutex
>>>>>>> instead of spinlock in open and release) and the ioctl settings 
>>>>>>> (except
>>>>>>> for abort where you will need to grab the spinlock).
>>>>>>>
>>>>>>> After the wait event below, grab the mutex.  If data is not 
>>>>>>> available
>>>>>>> and O_NONBLOCK is not set, drop the mutex and retry. Otherwise
>>>>>>> this is the only place (besides release) that sets data_in_avail 
>>>>>>> to false.
>>>>>>> Do the copy_to_user(), grab the spinlock, clear data_in_avail and
>>>>>>> data_in_idx, then release the lock and mutex.  If you are really
>>>>>>> adventurous you can do this without grabbing the lock using
>>>>>>> barriers, but it's probably not necessary here.
>>>>>>>
>>>>>
>>>>> With the state machine being able to be restarted at any time, you 
>>>>> need
>>>>> something a little different here.  You still need the mutex to 
>>>>> handle
>>>>> multiple readers and the copy.  I think the function should be 
>>>>> something
>>>>> like:
>>>>>
>>>> Since KCS is not a multi-reader protocol from BMC's view, you makes 
>>>> things complex. :-)
>>>
>>> No, I don't think you understand.  The primary purpose of the 
>>> complexity
>>> here is to protect the driver from the host system (on the other 
>>> side of
>>> the KCS interface).  Without this protection, it is possible for the 
>>> host
>>> system to start a new write while the user on the BMC side is reading
>>> data out, resulting in corrupt data being read.
>>>
>>> I haven't thought too much about this.  There may be a simpler way,
>>> but the protection needs to be there.
>>>
>>> And you may not think you need to protect the driver against a
>>> malicious BMC side user code, but you would be wrong.  You can
>>> only have one opener, but with threads or a fork you can have
>>> multiple readers.  And you don't know if a malicious piece of
>>> code has taken over userland.  You always need to protect the
>>> kernel.
>>>
>> Sure, the read/write have protected the critical data area with IRQ, 
>> and also, these
>> functions should be thread local safe I believe.
>>
>> spin_lock_irq(&kcs_bmc->lock);
>> ...
>> spin_unlock_irq(&kcs_bmc->lock);
>>
>
> But remember, you can't call copy_to_user() when IRQs are off or when 
> you are holding
> a spinlock.  That is an absolute no.  It can crash the kernel.
>
> So you need a design that takes this into account, but will not result 
> in the possibility
> of bad data being read.
>
Yes, sure, as I said before: access_ok(VERIFY_WRITE, to, n), then memcpy 
in spin_lock.
>>>>>    static ssize_t kcs_bmc_read(struct file *filp, char *buf,
>>>>>                     size_t count, loff_t *offset)
>>>>>    {
>>>>>         struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>         ssize_t ret;
>>>>>         bool avail;
>>>>>         size_t data_size;
>>>>>         u8 *data;
>>>>>
>>>>>         data = kmalloc(KCS_MSG_BUFSIZ, GFP_KERNEL);
>>>>>         if (!data)
>>>>>             return -ENOMEM;
>>>>>
>>>>>    retry:
>>>>>         ret = -EAGAIN;
>>>>>         if (!(filp->f_flags & O_NONBLOCK))
>>>>>             wait_event_interruptible(kcs_bmc->queue,
>>>>>                          kcs_bmc->data_in_avail);
>>>>>
>>>>>         mutex_lock(&kcs_bmc->read_mutex);
>>>>>
>>>>>         spin_lock_irq(&kcs_bmc->lock);
>>>>>         avail = kcs_bmc->data_in_avail;
>>>>>         if (avail) {
>>>>>             memcpy(data, kcs_bmc->data_in, kcs_bmc->data_in_idx);
>>>>>             data_size = kcs_bmc->data_in_idx;
>>>>>         }
>>>>>         spin_unlock_irq(&kcs_bmc->lock);
>>>>>
>>>>>         if (!avail) {
>>>>>             if (filp->f_flags & O_NONBLOCK)
>>>>>                 goto out_mutex_unlock;
>>>>>             mutex_unlock(&kcs_bmc->read_mutex);
>>>>>             goto retry;
>>>>>         }
>>>>>
>>>>>         if (count < data_size) {
>>>>>             ret = -EOVERFLOW;
>>>>>              ? I'm not sure about the error, but userspace needs 
>>>>> to know.
>>>>>             goto out_mutex_unlock;
>>>
>>> Maybe a length error to the host side here?
>
> You didn't comment on this or the other length error.  That needs to be
> handled.
>
Yes, will send a length error by following KCS spec.
>>>
>>>>>         }
>>>>>
>>>>>         if (!copy_to_user(buf, data, data_size)) {
>>>>>             ret = -EFAULT;
>>>>>             goto out_mutex_unlock;
>>>>>         }
>>>>>
>>>>>         ret = data_size;
>>>>>
>>>>>         spin_lock_irq(&kcs_bmc->lock);
>>>>>
>>>>>         if (kcs_bmc->phase != KCS_PHASE_WRITE_END_DONE)
>>>>>             /* Something aborted or restarted the state machine. */
>>>>>             ? Maybe restart if O_NONBLOCK is not set and -EAGAIN 
>>>>> if it is?
>>>>>             ret = -EIO;
>>>>>         } else {
>>>>>             kcs_bmc->phase = KCS_PHASE_WAIT_READ;
>>>>>             kcs_bmc->data_in_avail = false;
>>>>>             kcs_bmc->data_in_idx = 0;
>>>>>         }
>>>>>
>>>>>         spin_unlock_irq(&kcs_bmc->lock);
>>>>>
>>>>>    out_mutex_unlock:
>>>>>         mutex_unlock(&kcs_bmc->read_mutex);
>>>>>
>>>>>         kfree(data);
>>>>>
>>>>>         return ret;
>>>>>    }
>>>>> Note that I added a state, KCS_PHASE_WRITE_END_DONE, which would be
>>>>> set after the final byte from the host is received.  You want the 
>>>>> read here
>>>>> done before you can do the write below to avoid the race I talked 
>>>>> about.
>>>>>
>>>>> There is a local copy made of the data.  What you *never* want to 
>>>>> happen
>>>>> here is for the state machine to start processing a new write command
>>>>> while the data is being copied.  It could result in corrupt data 
>>>>> being read
>>>>> and some random operation being done by the BMC.
>>>>>
>>>>> If you want to avoid the local copy, it could be done, but it's 
>>>>> more complex.
>>>>>
>>>>>>>> +    if (!(filp->f_flags & O_NONBLOCK))
>>>>>>>> +        wait_event_interruptible(kcs_bmc->queue,
>>>>>>>> +                     kcs_bmc->data_in_avail);
>>>>>>>> +
>>>>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>>>>> +
>>>>>>>> +    if (kcs_bmc->data_in_avail) {
>>>>>>>> +        kcs_bmc->data_in_avail = false;
>>>>>>>> +
>>>>>>>> +        if (count > kcs_bmc->data_in_idx)
>>>>>>>> +            count = kcs_bmc->data_in_idx;
>>>>>>>> +
>>>>>>>> +        if (!copy_to_user(buf, kcs_bmc->data_in, count))
>>>>>>>> +            ret = count;
>>>>>>>> +        else
>>>>>>>> +            ret = -EFAULT;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>> +
>>>>>>>> +    return ret;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +static ssize_t kcs_bmc_write(struct file *filp, const char *buf,
>>>>>>>> +                 size_t count, loff_t *offset)
>>>>>>>> +{
>>>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>>> +    ssize_t ret = count;
>>>>>>>> +
>>>>>>>> +    if (count < 1 || count > KCS_MSG_BUFSIZ)
>>>>>>>> +        return -EINVAL;
>>>>>>>> +
>>>>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>>>>> +
>>>>>>>> +    if (kcs_bmc->phase == KCS_PHASE_WAIT_READ) {
>>>>>>>> +        if (copy_from_user(kcs_bmc->data_out, buf, count)) {
>>>>>>>> +            spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>> +            return -EFAULT;
>>>>>>>> +        }
>>>>>>>> +
>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_READ;
>>>>>>>> +        kcs_bmc->data_out_idx = 1;
>>>>>>>> +        kcs_bmc->data_out_len = count;
>>>>>>>> +        write_data(kcs_bmc, kcs_bmc->data_out[0]);
>>>>>>>> +    } else if (kcs_bmc->phase == KCS_PHASE_READ) {
>>>>>>>> +        ret = -EBUSY;
>>>>>>>> +    } else {
>>>>>>>> +        ret = -EINVAL;
>>>>>>>
>>>>>>> Is there a reason you return -EINVAL here?  Why not just -EBUSY 
>>>>>>> in all
>>>>>>> cases?  Is there something that userland will need to do 
>>>>>>> differently?
>>>>>>>
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>> +
>>>>>>>> +    return ret;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +static long kcs_bmc_ioctl(struct file *filp, unsigned int cmd,
>>>>>>>> +              unsigned long arg)
>>>>>>>> +{
>>>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>>> +    long ret = 0;
>>>>>>>> +
>>>>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>>>>> +
>>>>>>>> +    switch (cmd) {
>>>>>>>> +    case IPMI_BMC_IOCTL_SET_SMS_ATN:
>>>>>>>> +        update_status_bits(kcs_bmc, KCS_STATUS_SMS_ATN,
>>>>>>>> +                        KCS_STATUS_SMS_ATN);
>>>>>>>> +        break;
>>>>>>>> +
>>>>>>>> +    case IPMI_BMC_IOCTL_CLEAR_SMS_ATN:
>>>>>>>> +        update_status_bits(kcs_bmc, KCS_STATUS_SMS_ATN,
>>>>>>>> +                        0);
>>>>>>>> +        break;
>>>>>>>> +
>>>>>>>> +    case IPMI_BMC_IOCTL_FORCE_ABORT:
>>>>>>>> +        set_state(kcs_bmc, ERROR_STATE);
>>>>>>>> +        read_data(kcs_bmc);
>>>>>>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>>> +
>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_ERROR;
>>>>>>>> +        kcs_bmc->data_in_avail = false;
>>>>>>>> +        break;
>>>>>>>> +
>>>>>>>> +    default:
>>>>>>>> +        ret = -EINVAL;
>>>>>>>> +        break;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>> +
>>>>>>>> +    return ret;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +static int kcs_bmc_release(struct inode *inode, struct file 
>>>>>>>> *filp)
>>>>>>>> +{
>>>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>>> +
>>>>>>>
>>>>>>> What happens if the device gets closed in the middle of a 
>>>>>>> transaction?  That's
>>>>>>> an important case to handle.  If something is in process, you 
>>>>>>> need to abort it.
>>>>>>>
>>>>>> The device just provides the read & write data, the transaction 
>>>>>> is handled in the KCS
>>>>>> controller's IRQ handler.
>>>>>
>>>>> From the spec, section 9.14:
>>>>>
>>>>>    The BMC must change the status to ERROR_STATE on any condition 
>>>>> where it
>>>>>    aborts a command transfer in progress.
>>>>>
>>>>> So you need to do something here.
>>>>>
>>>> In practice, we do this as spec said in ipmid, NOT in driver, 
>>>> driver can't handle anything, let's
>>>> make it simple, thanks!
>>>
>>> If ipmid crashes or is killed, how does it accomplish this?
>>>
>> Every time ipmids (or kcsd) crashed or killed, it needs start to call 
>> FORCE_ARBORT firstly, to sync with
>> host side software.
>>>>
>>>> Whenever the BMC is reset (from power-on or a hard reset), the 
>>>> State Bits are initialized to “11 - Error State”. Doing so
>>>> allows SMS to detect that the BMC has been reset and that any 
>>>> message in process has been terminated by the BMC.
>>>
>>> Right, that's fine, like it should be.  But we are not talking about 
>>> a reset.
>>>
>> I think the final error handling solution is that kcsd (user land) 
>> runs, otherwise, the host software side still got stuck. We meet
>> this kind of issue, so in general, we just doesn't handle some mirror 
>> errors in driver, then in kcsd, when it can provide the real
>> IPMI service, it will reset the channel firstly to sync with host 
>> side software.
>
> "Userland will do the right thing" is not very convincing to a kernel 
> developer.
>
> Plus if the above is true, I would think that you would just want to 
> hold the device
> in an error state when it wasn't opened.
>
I understand your concern, of course, driver need handles things well. 
But in fact, if a user app is truly a bad boy, it still can hang
the host side: set SMS_ATN, but no message returned when software host 
side requests, then host open-ipmi driver will hang, we
meet this kind of error to hang the customer's host. :) In my 
understanding, kcs-bmc should do the right thing about read and write,
the real transaction should be handled correctly by the kcsd.

And if no kcsd starts, then this kind of BMC can't be sold out. :)
> -corey
>
>>> -corey
>>>
>>>>>>>> + spin_lock_irq(&kcs_bmc->lock);
>>>>>>>> +
>>>>>>>> +    kcs_bmc->running = 0;
>>>>>>>> +
>>>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>> +
>>>>>>>> +    return 0;
>>>>>>>> +}
>>>>>>>> +
>>>>>
>>>>>
>>>>
>>>
>>
>
Corey Minyard Jan. 31, 2018, 1:25 a.m. | #12
On 01/30/2018 07:02 PM, Wang, Haiyue wrote:
>
>
> On 2018-01-31 08:52, Corey Minyard wrote:
>> On 01/30/2018 06:02 PM, Wang, Haiyue wrote:
>>>
>>>
>>> On 2018-01-30 21:49, Corey Minyard wrote:
>>>> On 01/29/2018 07:57 AM, Wang, Haiyue wrote:
>>>>>
>>>>>
>>>>> On 2018-01-26 22:48, Corey Minyard wrote:
>>>>>> On 01/26/2018 12:08 AM, Wang, Haiyue wrote:
>>>>>>>
>>>>>>>
>>>>>>> On 2018-01-25 01:48, Corey Minyard wrote:
>>>>>>>> On 01/24/2018 10:06 AM, Haiyue Wang wrote:
>>>>>>>>> The KCS (Keyboard Controller Style) interface is used to 
>>>>>>>>> perform in-band
>>>>>>>>> IPMI communication between a server host and its BMC 
>>>>>>>>> (BaseBoard Management
>>>>>>>>> Controllers).
>>>>>>>>>
>>>>>>>>> This driver exposes the KCS interface on ASpeed SOCs (AST2400 
>>>>>>>>> and AST2500)
>>>>>>>>> as a character device. Such SOCs are commonly used as BMCs and 
>>>>>>>>> this driver
>>>>>>>>> implements the BMC side of the KCS interface.
>>>>>>>>>
>>>>>>>>> Signed-off-by: Haiyue Wang <haiyue.wang@linux.intel.com>
>>>>>>>>>
>>>>>>>>> ---
>>>>>>>>> v1->v2
>>>>>>>>>
>>>>>>>>> - Divide the driver into two parts, one handles the BMC KCS 
>>>>>>>>> IPMI 2.0 state;
>>>>>>>>>    the other handles the BMC KCS controller such as AST2500 IO 
>>>>>>>>> accessing.
>>>>>>>>> - Use the spin lock APIs to handle the device file operations 
>>>>>>>>> and BMC chip
>>>>>>>>>    IRQ inferface for accessing the same KCS BMC data structure.
>>>>>>>>> - Enhanced the phases handling of the KCS BMC.
>>>>>>>>> - Unified the IOCTL definition for IPMI BMC, it will be used 
>>>>>>>>> by KCS and BT.
>>>>>>>>>
>>>>>>>>> ---
>>>>>>>>>
>>>>>>>>> +
>>>>>>>>> +static void kcs_bmc_handle_data(struct kcs_bmc *kcs_bmc)
>>>>>>>>> +{
>>>>>>>>> +    u8 data;
>>>>>>>>> +
>>>>>>>>> +    switch (kcs_bmc->phase) {
>>>>>>>>> +    case KCS_PHASE_WRITE:
>>>>>>>>> +        set_state(kcs_bmc, WRITE_STATE);
>>>>>>>>> +
>>>>>>>>> +        /* set OBF before reading data */
>>>>>>>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>>>> +
>>>>>>>>> +        if (kcs_bmc->data_in_idx < KCS_MSG_BUFSIZ)
>>>>>>>>> + kcs_bmc->data_in[kcs_bmc->data_in_idx++] =
>>>>>>>>> +                        read_data(kcs_bmc);
>>>>>>
>>>>>> I missed this earlier, you need to issue a length error if the 
>>>>>> data is too large.
>>>>>>
>>>>>>>>> +        break;
>>>>>>>>> +
>>>>>>>>> +    case KCS_PHASE_WRITE_END:
>>>>>>>>> +        set_state(kcs_bmc, READ_STATE);
>>>>>>>>> +
>>>>>>>>> +        if (kcs_bmc->data_in_idx < KCS_MSG_BUFSIZ)
>>>>>>>>> + kcs_bmc->data_in[kcs_bmc->data_in_idx++] =
>>>>>>>>> +                        read_data(kcs_bmc);
>>>>>>>>> +
>>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_WAIT_READ;
>>>>>>>>> +        if (kcs_bmc->running) {
>>>>>>>>
>>>>>>>> Why do you only do this when running is set?  It won't hurt 
>>>>>>>> anything if it's not
>>>>>>>> set.  As it is, you have a race if something opens the device 
>>>>>>>> while this code
>>>>>>>> runs.
>>>>>>>>
>>>>>>>> Also, don't set the state to wait read until the "write" has 
>>>>>>>> finished (userland has
>>>>>>>> read the data out of the buffer.  More on that later.
>>>>>>>>
>>>>>>> Understood.
>>>>>>>>> + kcs_bmc->data_in_avail = true;
>>>>>>>>> + wake_up_interruptible(&kcs_bmc->queue);
>>>>>>>>> +        }
>>>>>>>>> +        break;
>>>>>>>>> +
>>>>>>>>> +    case KCS_PHASE_READ:
>>>>>>>>> +        if (kcs_bmc->data_out_idx == kcs_bmc->data_out_len)
>>>>>>>>> +            set_state(kcs_bmc, IDLE_STATE);
>>>>>>>>> +
>>>>>>>>> +        data = read_data(kcs_bmc);
>>>>>>>>> +        if (data != KCS_CMD_READ_BYTE) {
>>>>>>>>> +            set_state(kcs_bmc, ERROR_STATE);
>>>>>>>>> +            write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>>>> +            break;
>>>>>>>>> +        }
>>>>>>>>> +
>>>>>>>>> +        if (kcs_bmc->data_out_idx == kcs_bmc->data_out_len) {
>>>>>>>>> +            write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>>>> +            kcs_bmc->phase = KCS_PHASE_IDLE;
>>>>>>>>> +            break;
>>>>>>>>> +        }
>>>>>>>>> +
>>>>>>>>> +        write_data(kcs_bmc,
>>>>>>>>> + kcs_bmc->data_out[kcs_bmc->data_out_idx++]);
>>>>>>>>> +        break;
>>>>>>>>> +
>>>>>>>>> +    case KCS_PHASE_ABORT_ERROR1:
>>>>>>>>> +        set_state(kcs_bmc, READ_STATE);
>>>>>>>>> +
>>>>>>>>> +        /* Read the Dummy byte */
>>>>>>>>> +        read_data(kcs_bmc);
>>>>>>>>> +
>>>>>>>>> +        write_data(kcs_bmc, kcs_bmc->error);
>>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_ABORT_ERROR2;
>>>>>>>>> +        break;
>>>>>>>>> +
>>>>>>>>> +    case KCS_PHASE_ABORT_ERROR2:
>>>>>>>>> +        set_state(kcs_bmc, IDLE_STATE);
>>>>>>>>> +
>>>>>>>>> +        /* Read the Dummy byte */
>>>>>>>>> +        read_data(kcs_bmc);
>>>>>>>>> +
>>>>>>>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_IDLE;
>>>>>>>>> +
>>>>>>>>> +        break;
>>>>>>>>> +
>>>>>>>>> +    default:
>>>>>>>>> +        set_state(kcs_bmc, ERROR_STATE);
>>>>>>>>> +
>>>>>>>>> +        /* Read the Dummy byte */
>>>>>>>>> +        read_data(kcs_bmc);
>>>>>>>>> +
>>>>>>>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>>>> +        break;
>>>>>>>>> +    }
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>> +static void kcs_bmc_handle_command(struct kcs_bmc *kcs_bmc)
>>>>>>>>> +{
>>>>>>>>> +    u8 cmd;
>>>>>>>>> +
>>>>>>>>> +    set_state(kcs_bmc, WRITE_STATE);
>>>>>>>>> +
>>>>>>>>> +    /* Dummy data to generate OBF */
>>>>>>>>> +    write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>>>> +
>>>>>>>>> +    cmd = read_data(kcs_bmc);
>>>>>>>>
>>>>>>>> Shouldn't you check the phase in all the cases below and do error
>>>>>>>> handling if the phase isn't correct?
>>>>>>>>
>>>>>>>> Similar thing if the device here isn't open.  You need to handle
>>>>>>>> that gracefully.
>>>>>>>>
>>>>>>>> Also, you should remove data_in_avail and data_in_idx setting from
>>>>>>>> here, for reasons I will explain later.
>>>>>>>>
>>>>>>> If host software sends the data twice such as a retry before the 
>>>>>>> BMC's IPMI service starts,
>>>>>>> then the two IPMI requests will be merged into one, if not clear 
>>>>>>> data_in_idx after receving
>>>>>>> KCS_CMD_WRITE_START. Most of the states are driven by host 
>>>>>>> software (SMS). :(
>>>>>>
>>>>>> True, but what if the host issues WRITE_START or a WRITE_END 
>>>>>> while this driver is in read
>>>>>> state?  The spec is unclear on this, but it really only makes 
>>>>>> sense for the host to issue
>>>>>> WRITE_START in idle stat and WRITE_END in write state. IMHO it 
>>>>>> should go to error
>>>>>> state.  You might make the case that a WRITE_START anywhere 
>>>>>> restarts the transaction,
>>>>>> but the feel of the error state machine kind of goes against 
>>>>>> that. WRITE_END is definitely
>>>>>> wrong anywhere but write state.
>>>>>>
>>>>>> I just found the following in the spec (section 9.12):
>>>>>>
>>>>>>    Thus, since the interface will allow a command transfer to be
>>>>>>    started or restarted
>>>>>>    at any time when the input buffer is empty, software could 
>>>>>> elect to
>>>>>>    simply retry
>>>>>>    the command upon detecting an error condition, or issue a 
>>>>>> ‘known good’
>>>>>>    command in order to clear ERROR_STATE
>>>>>>
>>>>>> So a WRITE_START anywhere is ok.  A WRITE_END in the wrong state 
>>>>>> should probably
>>>>>> still go to error state.  This means the user needs to be able to 
>>>>>> handle a write error at
>>>>>> any time.  It also means it's very important to make sure the 
>>>>>> user does a read before
>>>>>> doing a write.  If the host re-issues a WRITE_START and writes a 
>>>>>> new command
>>>>>> between the time the use reads the data and writes the response, 
>>>>>> the response would
>>>>>> be for the wrong command.
>>>>>>
>>>>>>>>> +    switch (cmd) {
>>>>>>>>> +    case KCS_CMD_WRITE_START:
>>>>>>>>> +        kcs_bmc->data_in_avail = false;
>>>>>>>>> +        kcs_bmc->data_in_idx   = 0;
>>>>>>>>> +        kcs_bmc->phase         = KCS_PHASE_WRITE;
>>>>>>>>> +        kcs_bmc->error         = KCS_NO_ERROR;
>>>>>>>>> +        break;
>>>>>>>>> +
>>>>>>>>> +    case KCS_CMD_WRITE_END:
>>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_WRITE_END;
>>>>>>>>> +        break;
>>>>>>>>> +
>>>>>>>>> +    case KCS_CMD_ABORT:
>>>>>>>>> +        if (kcs_bmc->error == KCS_NO_ERROR)
>>>>>>>>> +            kcs_bmc->error = KCS_ABORTED_BY_COMMAND;
>>>>>>>>> +
>>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_ABORT_ERROR1;
>>>>>>>>> +        break;
>>>>>>>>> +
>>>>>>>>> +    default:
>>>>>>>>> +        kcs_bmc->error = KCS_ILLEGAL_CONTROL_CODE;
>>>>>>>>> +        set_state(kcs_bmc, ERROR_STATE);
>>>>>>>>> +        write_data(kcs_bmc, kcs_bmc->error);
>>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_ERROR;
>>>>>>>>> +        break;
>>>>>>>>> +    }
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>> +int kcs_bmc_handle_event(struct kcs_bmc *kcs_bmc)
>>>>>>>>> +{
>>>>>>>>> +    unsigned long flags;
>>>>>>>>> +    int ret = 0;
>>>>>>>>> +    u8 status;
>>>>>>>>> +
>>>>>>>>> +    spin_lock_irqsave(&kcs_bmc->lock, flags);
>>>>>>>>> +
>>>>>>>>> +    status = read_status(kcs_bmc) & (KCS_STATUS_IBF | 
>>>>>>>>> KCS_STATUS_CMD_DAT);
>>>>>>>>> +
>>>>>>>>> +    switch (status) {
>>>>>>>>> +    case KCS_STATUS_IBF | KCS_STATUS_CMD_DAT:
>>>>>>>>> +        kcs_bmc_handle_command(kcs_bmc);
>>>>>>>>> +        break;
>>>>>>>>> +
>>>>>>>>> +    case KCS_STATUS_IBF:
>>>>>>>>> +        kcs_bmc_handle_data(kcs_bmc);
>>>>>>>>> +        break;
>>>>>>>>> +
>>>>>>>>> +    default:
>>>>>>>>> +        ret = -1;
>>>>>>>>> +        break;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    spin_unlock_irqrestore(&kcs_bmc->lock, flags);
>>>>>>>>> +
>>>>>>>>> +    return ret;
>>>>>>>>> +}
>>>>>>>>> +EXPORT_SYMBOL(kcs_bmc_handle_event);
>>>>>>>>> +
>>>>>>>>> +static inline struct kcs_bmc *file_kcs_bmc(struct file *filp)
>>>>>>>>> +{
>>>>>>>>> +    return container_of(filp->private_data, struct kcs_bmc, 
>>>>>>>>> miscdev);
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>> +static int kcs_bmc_open(struct inode *inode, struct file *filp)
>>>>>>>>> +{
>>>>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>>>> +    int ret = 0;
>>>>>>>>> +
>>>>>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>>>>>> +
>>>>>>>>> +    if (!kcs_bmc->running) {
>>>>>>>>> +        kcs_bmc->running       = 1;
>>>>>>>>> +        kcs_bmc->phase         = KCS_PHASE_IDLE;
>>>>>>>>> +        kcs_bmc->data_in_avail = false;
>>>>>>>>
>>>>>>>> If you do everything right, setting the phase and data_in_avail 
>>>>>>>> should not
>>>>>>>> be necessary here.
>>>>>>>>
>>>>>>>>> +    } else {
>>>>>>>>> +        ret = -EBUSY;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>>> +
>>>>>>>>> +    return ret;
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>> +static unsigned int kcs_bmc_poll(struct file *filp, 
>>>>>>>>> poll_table *wait)
>>>>>>>>> +{
>>>>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>>>> +    unsigned int mask = 0;
>>>>>>>>> +
>>>>>>>>> +    poll_wait(filp, &kcs_bmc->queue, wait);
>>>>>>>>> +
>>>>>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>>>>>> +
>>>>>>>>> +    if (kcs_bmc->data_in_avail)
>>>>>>>>> +        mask |= POLLIN;
>>>>>>>>> +
>>>>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>>> +
>>>>>>>>> +    return mask;
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>> +static ssize_t kcs_bmc_read(struct file *filp, char *buf,
>>>>>>>>> +                size_t count, loff_t *offset)
>>>>>>>>> +{
>>>>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>>>> +    ssize_t ret = -EAGAIN;
>>>>>>>>> +
>>>>>>>>
>>>>>>>> This function still has some issues.
>>>>>>>>
>>>>>>>> You can't call copy_to_user() with a spinlock held or 
>>>>>>>> interrupts disabled.
>>>>>>>> To handle readers, you probably need a separate mutex.
>>>>>>>>
>>>>>>>> Also, this function can return -EAGAIN even if O_NONBLOCK is 
>>>>>>>> not set if
>>>>>>>> kcs_bmc->data_in_avail changes between when you wait on the event
>>>>>>>> and when you check it under the lock.
>>>>>>>>
>>>>>>>> You also clear data_in_avail even if the copy_to_user() fails, 
>>>>>>>> which is
>>>>>>>> wrong.
>>>>>>>>
>>>>>>>> I believe the best way to handle this would be to have the 
>>>>>>>> spinlock
>>>>>>>> protect the inner workings of the state machine and a mutex handle
>>>>>>>> copying data out, setting/clearing the running flag (thus a mutex
>>>>>>>> instead of spinlock in open and release) and the ioctl settings 
>>>>>>>> (except
>>>>>>>> for abort where you will need to grab the spinlock).
>>>>>>>>
>>>>>>>> After the wait event below, grab the mutex.  If data is not 
>>>>>>>> available
>>>>>>>> and O_NONBLOCK is not set, drop the mutex and retry. Otherwise
>>>>>>>> this is the only place (besides release) that sets 
>>>>>>>> data_in_avail to false.
>>>>>>>> Do the copy_to_user(), grab the spinlock, clear data_in_avail and
>>>>>>>> data_in_idx, then release the lock and mutex.  If you are really
>>>>>>>> adventurous you can do this without grabbing the lock using
>>>>>>>> barriers, but it's probably not necessary here.
>>>>>>>>
>>>>>>
>>>>>> With the state machine being able to be restarted at any time, 
>>>>>> you need
>>>>>> something a little different here.  You still need the mutex to 
>>>>>> handle
>>>>>> multiple readers and the copy.  I think the function should be 
>>>>>> something
>>>>>> like:
>>>>>>
>>>>> Since KCS is not a multi-reader protocol from BMC's view, you 
>>>>> makes things complex. :-)
>>>>
>>>> No, I don't think you understand.  The primary purpose of the 
>>>> complexity
>>>> here is to protect the driver from the host system (on the other 
>>>> side of
>>>> the KCS interface).  Without this protection, it is possible for 
>>>> the host
>>>> system to start a new write while the user on the BMC side is reading
>>>> data out, resulting in corrupt data being read.
>>>>
>>>> I haven't thought too much about this.  There may be a simpler way,
>>>> but the protection needs to be there.
>>>>
>>>> And you may not think you need to protect the driver against a
>>>> malicious BMC side user code, but you would be wrong.  You can
>>>> only have one opener, but with threads or a fork you can have
>>>> multiple readers.  And you don't know if a malicious piece of
>>>> code has taken over userland.  You always need to protect the
>>>> kernel.
>>>>
>>> Sure, the read/write have protected the critical data area with IRQ, 
>>> and also, these
>>> functions should be thread local safe I believe.
>>>
>>> spin_lock_irq(&kcs_bmc->lock);
>>> ...
>>> spin_unlock_irq(&kcs_bmc->lock);
>>>
>>
>> But remember, you can't call copy_to_user() when IRQs are off or when 
>> you are holding
>> a spinlock.  That is an absolute no.  It can crash the kernel.
>>
>> So you need a design that takes this into account, but will not 
>> result in the possibility
>> of bad data being read.
>>
> Yes, sure, as I said before: access_ok(VERIFY_WRITE, to, n), then 
> memcpy in spin_lock.

Where did you get the idea that this was ok?  It's not.  access_ok() is 
not actually very
useful, since the permissions on memory can change at any time unless 
you are holding
the mm lock, which is also not an ok thing to do.  It is entirely 
possible for access_ok()
to pass and copy_to_user() to fail.

I'm not exactly sure what you are saying, though.  In any event, a 
well-designed read()/write()
operation should leave the system unchanged if it gets an error.

-corey

>>>>>>    static ssize_t kcs_bmc_read(struct file *filp, char *buf,
>>>>>>                     size_t count, loff_t *offset)
>>>>>>    {
>>>>>>         struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>         ssize_t ret;
>>>>>>         bool avail;
>>>>>>         size_t data_size;
>>>>>>         u8 *data;
>>>>>>
>>>>>>         data = kmalloc(KCS_MSG_BUFSIZ, GFP_KERNEL);
>>>>>>         if (!data)
>>>>>>             return -ENOMEM;
>>>>>>
>>>>>>    retry:
>>>>>>         ret = -EAGAIN;
>>>>>>         if (!(filp->f_flags & O_NONBLOCK))
>>>>>>             wait_event_interruptible(kcs_bmc->queue,
>>>>>>                          kcs_bmc->data_in_avail);
>>>>>>
>>>>>>         mutex_lock(&kcs_bmc->read_mutex);
>>>>>>
>>>>>>         spin_lock_irq(&kcs_bmc->lock);
>>>>>>         avail = kcs_bmc->data_in_avail;
>>>>>>         if (avail) {
>>>>>>             memcpy(data, kcs_bmc->data_in, kcs_bmc->data_in_idx);
>>>>>>             data_size = kcs_bmc->data_in_idx;
>>>>>>         }
>>>>>>         spin_unlock_irq(&kcs_bmc->lock);
>>>>>>
>>>>>>         if (!avail) {
>>>>>>             if (filp->f_flags & O_NONBLOCK)
>>>>>>                 goto out_mutex_unlock;
>>>>>>             mutex_unlock(&kcs_bmc->read_mutex);
>>>>>>             goto retry;
>>>>>>         }
>>>>>>
>>>>>>         if (count < data_size) {
>>>>>>             ret = -EOVERFLOW;
>>>>>>              ? I'm not sure about the error, but userspace needs 
>>>>>> to know.
>>>>>>             goto out_mutex_unlock;
>>>>
>>>> Maybe a length error to the host side here?
>>
>> You didn't comment on this or the other length error.  That needs to be
>> handled.
>>
> Yes, will send a length error by following KCS spec.
>>>>
>>>>>>         }
>>>>>>
>>>>>>         if (!copy_to_user(buf, data, data_size)) {
>>>>>>             ret = -EFAULT;
>>>>>>             goto out_mutex_unlock;
>>>>>>         }
>>>>>>
>>>>>>         ret = data_size;
>>>>>>
>>>>>>         spin_lock_irq(&kcs_bmc->lock);
>>>>>>
>>>>>>         if (kcs_bmc->phase != KCS_PHASE_WRITE_END_DONE)
>>>>>>             /* Something aborted or restarted the state machine. */
>>>>>>             ? Maybe restart if O_NONBLOCK is not set and -EAGAIN 
>>>>>> if it is?
>>>>>>             ret = -EIO;
>>>>>>         } else {
>>>>>>             kcs_bmc->phase = KCS_PHASE_WAIT_READ;
>>>>>>             kcs_bmc->data_in_avail = false;
>>>>>>             kcs_bmc->data_in_idx = 0;
>>>>>>         }
>>>>>>
>>>>>>         spin_unlock_irq(&kcs_bmc->lock);
>>>>>>
>>>>>>    out_mutex_unlock:
>>>>>>         mutex_unlock(&kcs_bmc->read_mutex);
>>>>>>
>>>>>>         kfree(data);
>>>>>>
>>>>>>         return ret;
>>>>>>    }
>>>>>> Note that I added a state, KCS_PHASE_WRITE_END_DONE, which would be
>>>>>> set after the final byte from the host is received.  You want the 
>>>>>> read here
>>>>>> done before you can do the write below to avoid the race I talked 
>>>>>> about.
>>>>>>
>>>>>> There is a local copy made of the data.  What you *never* want to 
>>>>>> happen
>>>>>> here is for the state machine to start processing a new write 
>>>>>> command
>>>>>> while the data is being copied.  It could result in corrupt data 
>>>>>> being read
>>>>>> and some random operation being done by the BMC.
>>>>>>
>>>>>> If you want to avoid the local copy, it could be done, but it's 
>>>>>> more complex.
>>>>>>
>>>>>>>>> +    if (!(filp->f_flags & O_NONBLOCK))
>>>>>>>>> + wait_event_interruptible(kcs_bmc->queue,
>>>>>>>>> +                     kcs_bmc->data_in_avail);
>>>>>>>>> +
>>>>>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>>>>>> +
>>>>>>>>> +    if (kcs_bmc->data_in_avail) {
>>>>>>>>> +        kcs_bmc->data_in_avail = false;
>>>>>>>>> +
>>>>>>>>> +        if (count > kcs_bmc->data_in_idx)
>>>>>>>>> +            count = kcs_bmc->data_in_idx;
>>>>>>>>> +
>>>>>>>>> +        if (!copy_to_user(buf, kcs_bmc->data_in, count))
>>>>>>>>> +            ret = count;
>>>>>>>>> +        else
>>>>>>>>> +            ret = -EFAULT;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>>> +
>>>>>>>>> +    return ret;
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>> +static ssize_t kcs_bmc_write(struct file *filp, const char *buf,
>>>>>>>>> +                 size_t count, loff_t *offset)
>>>>>>>>> +{
>>>>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>>>> +    ssize_t ret = count;
>>>>>>>>> +
>>>>>>>>> +    if (count < 1 || count > KCS_MSG_BUFSIZ)
>>>>>>>>> +        return -EINVAL;
>>>>>>>>> +
>>>>>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>>>>>> +
>>>>>>>>> +    if (kcs_bmc->phase == KCS_PHASE_WAIT_READ) {
>>>>>>>>> +        if (copy_from_user(kcs_bmc->data_out, buf, count)) {
>>>>>>>>> + spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>>> +            return -EFAULT;
>>>>>>>>> +        }
>>>>>>>>> +
>>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_READ;
>>>>>>>>> +        kcs_bmc->data_out_idx = 1;
>>>>>>>>> +        kcs_bmc->data_out_len = count;
>>>>>>>>> +        write_data(kcs_bmc, kcs_bmc->data_out[0]);
>>>>>>>>> +    } else if (kcs_bmc->phase == KCS_PHASE_READ) {
>>>>>>>>> +        ret = -EBUSY;
>>>>>>>>> +    } else {
>>>>>>>>> +        ret = -EINVAL;
>>>>>>>>
>>>>>>>> Is there a reason you return -EINVAL here?  Why not just -EBUSY 
>>>>>>>> in all
>>>>>>>> cases?  Is there something that userland will need to do 
>>>>>>>> differently?
>>>>>>>>
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>>> +
>>>>>>>>> +    return ret;
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>> +static long kcs_bmc_ioctl(struct file *filp, unsigned int cmd,
>>>>>>>>> +              unsigned long arg)
>>>>>>>>> +{
>>>>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>>>> +    long ret = 0;
>>>>>>>>> +
>>>>>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>>>>>> +
>>>>>>>>> +    switch (cmd) {
>>>>>>>>> +    case IPMI_BMC_IOCTL_SET_SMS_ATN:
>>>>>>>>> +        update_status_bits(kcs_bmc, KCS_STATUS_SMS_ATN,
>>>>>>>>> +                        KCS_STATUS_SMS_ATN);
>>>>>>>>> +        break;
>>>>>>>>> +
>>>>>>>>> +    case IPMI_BMC_IOCTL_CLEAR_SMS_ATN:
>>>>>>>>> +        update_status_bits(kcs_bmc, KCS_STATUS_SMS_ATN,
>>>>>>>>> +                        0);
>>>>>>>>> +        break;
>>>>>>>>> +
>>>>>>>>> +    case IPMI_BMC_IOCTL_FORCE_ABORT:
>>>>>>>>> +        set_state(kcs_bmc, ERROR_STATE);
>>>>>>>>> +        read_data(kcs_bmc);
>>>>>>>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>>>> +
>>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_ERROR;
>>>>>>>>> +        kcs_bmc->data_in_avail = false;
>>>>>>>>> +        break;
>>>>>>>>> +
>>>>>>>>> +    default:
>>>>>>>>> +        ret = -EINVAL;
>>>>>>>>> +        break;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>>> +
>>>>>>>>> +    return ret;
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>> +static int kcs_bmc_release(struct inode *inode, struct file 
>>>>>>>>> *filp)
>>>>>>>>> +{
>>>>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>>>> +
>>>>>>>>
>>>>>>>> What happens if the device gets closed in the middle of a 
>>>>>>>> transaction?  That's
>>>>>>>> an important case to handle.  If something is in process, you 
>>>>>>>> need to abort it.
>>>>>>>>
>>>>>>> The device just provides the read & write data, the transaction 
>>>>>>> is handled in the KCS
>>>>>>> controller's IRQ handler.
>>>>>>
>>>>>> From the spec, section 9.14:
>>>>>>
>>>>>>    The BMC must change the status to ERROR_STATE on any condition 
>>>>>> where it
>>>>>>    aborts a command transfer in progress.
>>>>>>
>>>>>> So you need to do something here.
>>>>>>
>>>>> In practice, we do this as spec said in ipmid, NOT in driver, 
>>>>> driver can't handle anything, let's
>>>>> make it simple, thanks!
>>>>
>>>> If ipmid crashes or is killed, how does it accomplish this?
>>>>
>>> Every time ipmids (or kcsd) crashed or killed, it needs start to 
>>> call FORCE_ARBORT firstly, to sync with
>>> host side software.
>>>>>
>>>>> Whenever the BMC is reset (from power-on or a hard reset), the 
>>>>> State Bits are initialized to “11 - Error State”. Doing so
>>>>> allows SMS to detect that the BMC has been reset and that any 
>>>>> message in process has been terminated by the BMC.
>>>>
>>>> Right, that's fine, like it should be.  But we are not talking 
>>>> about a reset.
>>>>
>>> I think the final error handling solution is that kcsd (user land) 
>>> runs, otherwise, the host software side still got stuck. We meet
>>> this kind of issue, so in general, we just doesn't handle some 
>>> mirror errors in driver, then in kcsd, when it can provide the real
>>> IPMI service, it will reset the channel firstly to sync with host 
>>> side software.
>>
>> "Userland will do the right thing" is not very convincing to a kernel 
>> developer.
>>
>> Plus if the above is true, I would think that you would just want to 
>> hold the device
>> in an error state when it wasn't opened.
>>
> I understand your concern, of course, driver need handles things well. 
> But in fact, if a user app is truly a bad boy, it still can hang
> the host side: set SMS_ATN, but no message returned when software host 
> side requests, then host open-ipmi driver will hang, we
> meet this kind of error to hang the customer's host. :) In my 
> understanding, kcs-bmc should do the right thing about read and write,
> the real transaction should be handled correctly by the kcsd.
>
> And if no kcsd starts, then this kind of BMC can't be sold out. :)

True.  I'm not as concerned about this sort of thing.  It's nicer to the 
host side if
it can detect problems quickly, but it will eventually time out.

 From what I can tell from the current design, if the BMC userland is 
not running,
the driver will step through the state machine until it hits read state, 
then it
will sit there until the host times out and aborts the operation.

IMHO, it would be better for the host side if the driver just stayed in 
error state
if nothing had it open.  It would think the spec says that in the quote 
I referenced
above, but that quote, like many things in the IPMI spec, is fairly 
vague and could
be interpreted many ways.

-corey


>> -corey
>>
>>>> -corey
>>>>
>>>>>>>>> + spin_lock_irq(&kcs_bmc->lock);
>>>>>>>>> +
>>>>>>>>> +    kcs_bmc->running = 0;
>>>>>>>>> +
>>>>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>>> +
>>>>>>>>> +    return 0;
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>
>>>>>>
>>>>>
>>>>
>>>
>>
>
Wang, Haiyue Jan. 31, 2018, 1:37 a.m. | #13
On 2018-01-31 09:25, Corey Minyard wrote:
> On 01/30/2018 07:02 PM, Wang, Haiyue wrote:
>>
>>
>> On 2018-01-31 08:52, Corey Minyard wrote:
>>> On 01/30/2018 06:02 PM, Wang, Haiyue wrote:
>>>>
>>>>
>>>> On 2018-01-30 21:49, Corey Minyard wrote:
>>>>> On 01/29/2018 07:57 AM, Wang, Haiyue wrote:
>>>>>>
>>>>>>
>>>>>> On 2018-01-26 22:48, Corey Minyard wrote:
>>>>>>> On 01/26/2018 12:08 AM, Wang, Haiyue wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>> On 2018-01-25 01:48, Corey Minyard wrote:
>>>>>>>>> On 01/24/2018 10:06 AM, Haiyue Wang wrote:
>>>>>>>>>> The KCS (Keyboard Controller Style) interface is used to 
>>>>>>>>>> perform in-band
>>>>>>>>>> IPMI communication between a server host and its BMC 
>>>>>>>>>> (BaseBoard Management
>>>>>>>>>> Controllers).
>>>>>>>>>>
>>>>>>>>>> This driver exposes the KCS interface on ASpeed SOCs (AST2400 
>>>>>>>>>> and AST2500)
>>>>>>>>>> as a character device. Such SOCs are commonly used as BMCs 
>>>>>>>>>> and this driver
>>>>>>>>>> implements the BMC side of the KCS interface.
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Haiyue Wang <haiyue.wang@linux.intel.com>
>>>>>>>>>>
>>>>>>>>>> ---
>>>>>>>>>> v1->v2
>>>>>>>>>>
>>>>>>>>>> - Divide the driver into two parts, one handles the BMC KCS 
>>>>>>>>>> IPMI 2.0 state;
>>>>>>>>>>    the other handles the BMC KCS controller such as AST2500 
>>>>>>>>>> IO accessing.
>>>>>>>>>> - Use the spin lock APIs to handle the device file operations 
>>>>>>>>>> and BMC chip
>>>>>>>>>>    IRQ inferface for accessing the same KCS BMC data structure.
>>>>>>>>>> - Enhanced the phases handling of the KCS BMC.
>>>>>>>>>> - Unified the IOCTL definition for IPMI BMC, it will be used 
>>>>>>>>>> by KCS and BT.
>>>>>>>>>>
>>>>>>>>>> ---
>>>>>>>>>>
>>>>>>>>>> +
>>>>>>>>>> +static void kcs_bmc_handle_data(struct kcs_bmc *kcs_bmc)
>>>>>>>>>> +{
>>>>>>>>>> +    u8 data;
>>>>>>>>>> +
>>>>>>>>>> +    switch (kcs_bmc->phase) {
>>>>>>>>>> +    case KCS_PHASE_WRITE:
>>>>>>>>>> +        set_state(kcs_bmc, WRITE_STATE);
>>>>>>>>>> +
>>>>>>>>>> +        /* set OBF before reading data */
>>>>>>>>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>>>>> +
>>>>>>>>>> +        if (kcs_bmc->data_in_idx < KCS_MSG_BUFSIZ)
>>>>>>>>>> + kcs_bmc->data_in[kcs_bmc->data_in_idx++] =
>>>>>>>>>> +                        read_data(kcs_bmc);
>>>>>>>
>>>>>>> I missed this earlier, you need to issue a length error if the 
>>>>>>> data is too large.
>>>>>>>
>>>>>>>>>> +        break;
>>>>>>>>>> +
>>>>>>>>>> +    case KCS_PHASE_WRITE_END:
>>>>>>>>>> +        set_state(kcs_bmc, READ_STATE);
>>>>>>>>>> +
>>>>>>>>>> +        if (kcs_bmc->data_in_idx < KCS_MSG_BUFSIZ)
>>>>>>>>>> + kcs_bmc->data_in[kcs_bmc->data_in_idx++] =
>>>>>>>>>> +                        read_data(kcs_bmc);
>>>>>>>>>> +
>>>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_WAIT_READ;
>>>>>>>>>> +        if (kcs_bmc->running) {
>>>>>>>>>
>>>>>>>>> Why do you only do this when running is set?  It won't hurt 
>>>>>>>>> anything if it's not
>>>>>>>>> set.  As it is, you have a race if something opens the device 
>>>>>>>>> while this code
>>>>>>>>> runs.
>>>>>>>>>
>>>>>>>>> Also, don't set the state to wait read until the "write" has 
>>>>>>>>> finished (userland has
>>>>>>>>> read the data out of the buffer.  More on that later.
>>>>>>>>>
>>>>>>>> Understood.
>>>>>>>>>> + kcs_bmc->data_in_avail = true;
>>>>>>>>>> + wake_up_interruptible(&kcs_bmc->queue);
>>>>>>>>>> +        }
>>>>>>>>>> +        break;
>>>>>>>>>> +
>>>>>>>>>> +    case KCS_PHASE_READ:
>>>>>>>>>> +        if (kcs_bmc->data_out_idx == kcs_bmc->data_out_len)
>>>>>>>>>> +            set_state(kcs_bmc, IDLE_STATE);
>>>>>>>>>> +
>>>>>>>>>> +        data = read_data(kcs_bmc);
>>>>>>>>>> +        if (data != KCS_CMD_READ_BYTE) {
>>>>>>>>>> +            set_state(kcs_bmc, ERROR_STATE);
>>>>>>>>>> +            write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>>>>> +            break;
>>>>>>>>>> +        }
>>>>>>>>>> +
>>>>>>>>>> +        if (kcs_bmc->data_out_idx == kcs_bmc->data_out_len) {
>>>>>>>>>> +            write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>>>>> +            kcs_bmc->phase = KCS_PHASE_IDLE;
>>>>>>>>>> +            break;
>>>>>>>>>> +        }
>>>>>>>>>> +
>>>>>>>>>> +        write_data(kcs_bmc,
>>>>>>>>>> + kcs_bmc->data_out[kcs_bmc->data_out_idx++]);
>>>>>>>>>> +        break;
>>>>>>>>>> +
>>>>>>>>>> +    case KCS_PHASE_ABORT_ERROR1:
>>>>>>>>>> +        set_state(kcs_bmc, READ_STATE);
>>>>>>>>>> +
>>>>>>>>>> +        /* Read the Dummy byte */
>>>>>>>>>> +        read_data(kcs_bmc);
>>>>>>>>>> +
>>>>>>>>>> +        write_data(kcs_bmc, kcs_bmc->error);
>>>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_ABORT_ERROR2;
>>>>>>>>>> +        break;
>>>>>>>>>> +
>>>>>>>>>> +    case KCS_PHASE_ABORT_ERROR2:
>>>>>>>>>> +        set_state(kcs_bmc, IDLE_STATE);
>>>>>>>>>> +
>>>>>>>>>> +        /* Read the Dummy byte */
>>>>>>>>>> +        read_data(kcs_bmc);
>>>>>>>>>> +
>>>>>>>>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_IDLE;
>>>>>>>>>> +
>>>>>>>>>> +        break;
>>>>>>>>>> +
>>>>>>>>>> +    default:
>>>>>>>>>> +        set_state(kcs_bmc, ERROR_STATE);
>>>>>>>>>> +
>>>>>>>>>> +        /* Read the Dummy byte */
>>>>>>>>>> +        read_data(kcs_bmc);
>>>>>>>>>> +
>>>>>>>>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>>>>> +        break;
>>>>>>>>>> +    }
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> +static void kcs_bmc_handle_command(struct kcs_bmc *kcs_bmc)
>>>>>>>>>> +{
>>>>>>>>>> +    u8 cmd;
>>>>>>>>>> +
>>>>>>>>>> +    set_state(kcs_bmc, WRITE_STATE);
>>>>>>>>>> +
>>>>>>>>>> +    /* Dummy data to generate OBF */
>>>>>>>>>> +    write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>>>>> +
>>>>>>>>>> +    cmd = read_data(kcs_bmc);
>>>>>>>>>
>>>>>>>>> Shouldn't you check the phase in all the cases below and do error
>>>>>>>>> handling if the phase isn't correct?
>>>>>>>>>
>>>>>>>>> Similar thing if the device here isn't open.  You need to handle
>>>>>>>>> that gracefully.
>>>>>>>>>
>>>>>>>>> Also, you should remove data_in_avail and data_in_idx setting 
>>>>>>>>> from
>>>>>>>>> here, for reasons I will explain later.
>>>>>>>>>
>>>>>>>> If host software sends the data twice such as a retry before 
>>>>>>>> the BMC's IPMI service starts,
>>>>>>>> then the two IPMI requests will be merged into one, if not 
>>>>>>>> clear data_in_idx after receving
>>>>>>>> KCS_CMD_WRITE_START. Most of the states are driven by host 
>>>>>>>> software (SMS). :(
>>>>>>>
>>>>>>> True, but what if the host issues WRITE_START or a WRITE_END 
>>>>>>> while this driver is in read
>>>>>>> state?  The spec is unclear on this, but it really only makes 
>>>>>>> sense for the host to issue
>>>>>>> WRITE_START in idle stat and WRITE_END in write state. IMHO it 
>>>>>>> should go to error
>>>>>>> state.  You might make the case that a WRITE_START anywhere 
>>>>>>> restarts the transaction,
>>>>>>> but the feel of the error state machine kind of goes against 
>>>>>>> that. WRITE_END is definitely
>>>>>>> wrong anywhere but write state.
>>>>>>>
>>>>>>> I just found the following in the spec (section 9.12):
>>>>>>>
>>>>>>>    Thus, since the interface will allow a command transfer to be
>>>>>>>    started or restarted
>>>>>>>    at any time when the input buffer is empty, software could 
>>>>>>> elect to
>>>>>>>    simply retry
>>>>>>>    the command upon detecting an error condition, or issue a 
>>>>>>> ‘known good’
>>>>>>>    command in order to clear ERROR_STATE
>>>>>>>
>>>>>>> So a WRITE_START anywhere is ok.  A WRITE_END in the wrong state 
>>>>>>> should probably
>>>>>>> still go to error state.  This means the user needs to be able 
>>>>>>> to handle a write error at
>>>>>>> any time.  It also means it's very important to make sure the 
>>>>>>> user does a read before
>>>>>>> doing a write.  If the host re-issues a WRITE_START and writes a 
>>>>>>> new command
>>>>>>> between the time the use reads the data and writes the response, 
>>>>>>> the response would
>>>>>>> be for the wrong command.
>>>>>>>
>>>>>>>>>> +    switch (cmd) {
>>>>>>>>>> +    case KCS_CMD_WRITE_START:
>>>>>>>>>> +        kcs_bmc->data_in_avail = false;
>>>>>>>>>> +        kcs_bmc->data_in_idx   = 0;
>>>>>>>>>> +        kcs_bmc->phase         = KCS_PHASE_WRITE;
>>>>>>>>>> +        kcs_bmc->error         = KCS_NO_ERROR;
>>>>>>>>>> +        break;
>>>>>>>>>> +
>>>>>>>>>> +    case KCS_CMD_WRITE_END:
>>>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_WRITE_END;
>>>>>>>>>> +        break;
>>>>>>>>>> +
>>>>>>>>>> +    case KCS_CMD_ABORT:
>>>>>>>>>> +        if (kcs_bmc->error == KCS_NO_ERROR)
>>>>>>>>>> +            kcs_bmc->error = KCS_ABORTED_BY_COMMAND;
>>>>>>>>>> +
>>>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_ABORT_ERROR1;
>>>>>>>>>> +        break;
>>>>>>>>>> +
>>>>>>>>>> +    default:
>>>>>>>>>> +        kcs_bmc->error = KCS_ILLEGAL_CONTROL_CODE;
>>>>>>>>>> +        set_state(kcs_bmc, ERROR_STATE);
>>>>>>>>>> +        write_data(kcs_bmc, kcs_bmc->error);
>>>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_ERROR;
>>>>>>>>>> +        break;
>>>>>>>>>> +    }
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> +int kcs_bmc_handle_event(struct kcs_bmc *kcs_bmc)
>>>>>>>>>> +{
>>>>>>>>>> +    unsigned long flags;
>>>>>>>>>> +    int ret = 0;
>>>>>>>>>> +    u8 status;
>>>>>>>>>> +
>>>>>>>>>> +    spin_lock_irqsave(&kcs_bmc->lock, flags);
>>>>>>>>>> +
>>>>>>>>>> +    status = read_status(kcs_bmc) & (KCS_STATUS_IBF | 
>>>>>>>>>> KCS_STATUS_CMD_DAT);
>>>>>>>>>> +
>>>>>>>>>> +    switch (status) {
>>>>>>>>>> +    case KCS_STATUS_IBF | KCS_STATUS_CMD_DAT:
>>>>>>>>>> +        kcs_bmc_handle_command(kcs_bmc);
>>>>>>>>>> +        break;
>>>>>>>>>> +
>>>>>>>>>> +    case KCS_STATUS_IBF:
>>>>>>>>>> +        kcs_bmc_handle_data(kcs_bmc);
>>>>>>>>>> +        break;
>>>>>>>>>> +
>>>>>>>>>> +    default:
>>>>>>>>>> +        ret = -1;
>>>>>>>>>> +        break;
>>>>>>>>>> +    }
>>>>>>>>>> +
>>>>>>>>>> + spin_unlock_irqrestore(&kcs_bmc->lock, flags);
>>>>>>>>>> +
>>>>>>>>>> +    return ret;
>>>>>>>>>> +}
>>>>>>>>>> +EXPORT_SYMBOL(kcs_bmc_handle_event);
>>>>>>>>>> +
>>>>>>>>>> +static inline struct kcs_bmc *file_kcs_bmc(struct file *filp)
>>>>>>>>>> +{
>>>>>>>>>> +    return container_of(filp->private_data, struct kcs_bmc, 
>>>>>>>>>> miscdev);
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> +static int kcs_bmc_open(struct inode *inode, struct file *filp)
>>>>>>>>>> +{
>>>>>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>>>>> +    int ret = 0;
>>>>>>>>>> +
>>>>>>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>>>>>>> +
>>>>>>>>>> +    if (!kcs_bmc->running) {
>>>>>>>>>> +        kcs_bmc->running       = 1;
>>>>>>>>>> +        kcs_bmc->phase         = KCS_PHASE_IDLE;
>>>>>>>>>> +        kcs_bmc->data_in_avail = false;
>>>>>>>>>
>>>>>>>>> If you do everything right, setting the phase and 
>>>>>>>>> data_in_avail should not
>>>>>>>>> be necessary here.
>>>>>>>>>
>>>>>>>>>> +    } else {
>>>>>>>>>> +        ret = -EBUSY;
>>>>>>>>>> +    }
>>>>>>>>>> +
>>>>>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>>>> +
>>>>>>>>>> +    return ret;
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> +static unsigned int kcs_bmc_poll(struct file *filp, 
>>>>>>>>>> poll_table *wait)
>>>>>>>>>> +{
>>>>>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>>>>> +    unsigned int mask = 0;
>>>>>>>>>> +
>>>>>>>>>> +    poll_wait(filp, &kcs_bmc->queue, wait);
>>>>>>>>>> +
>>>>>>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>>>>>>> +
>>>>>>>>>> +    if (kcs_bmc->data_in_avail)
>>>>>>>>>> +        mask |= POLLIN;
>>>>>>>>>> +
>>>>>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>>>> +
>>>>>>>>>> +    return mask;
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> +static ssize_t kcs_bmc_read(struct file *filp, char *buf,
>>>>>>>>>> +                size_t count, loff_t *offset)
>>>>>>>>>> +{
>>>>>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>>>>> +    ssize_t ret = -EAGAIN;
>>>>>>>>>> +
>>>>>>>>>
>>>>>>>>> This function still has some issues.
>>>>>>>>>
>>>>>>>>> You can't call copy_to_user() with a spinlock held or 
>>>>>>>>> interrupts disabled.
>>>>>>>>> To handle readers, you probably need a separate mutex.
>>>>>>>>>
>>>>>>>>> Also, this function can return -EAGAIN even if O_NONBLOCK is 
>>>>>>>>> not set if
>>>>>>>>> kcs_bmc->data_in_avail changes between when you wait on the event
>>>>>>>>> and when you check it under the lock.
>>>>>>>>>
>>>>>>>>> You also clear data_in_avail even if the copy_to_user() fails, 
>>>>>>>>> which is
>>>>>>>>> wrong.
>>>>>>>>>
>>>>>>>>> I believe the best way to handle this would be to have the 
>>>>>>>>> spinlock
>>>>>>>>> protect the inner workings of the state machine and a mutex 
>>>>>>>>> handle
>>>>>>>>> copying data out, setting/clearing the running flag (thus a mutex
>>>>>>>>> instead of spinlock in open and release) and the ioctl 
>>>>>>>>> settings (except
>>>>>>>>> for abort where you will need to grab the spinlock).
>>>>>>>>>
>>>>>>>>> After the wait event below, grab the mutex.  If data is not 
>>>>>>>>> available
>>>>>>>>> and O_NONBLOCK is not set, drop the mutex and retry. Otherwise
>>>>>>>>> this is the only place (besides release) that sets 
>>>>>>>>> data_in_avail to false.
>>>>>>>>> Do the copy_to_user(), grab the spinlock, clear data_in_avail and
>>>>>>>>> data_in_idx, then release the lock and mutex.  If you are really
>>>>>>>>> adventurous you can do this without grabbing the lock using
>>>>>>>>> barriers, but it's probably not necessary here.
>>>>>>>>>
>>>>>>>
>>>>>>> With the state machine being able to be restarted at any time, 
>>>>>>> you need
>>>>>>> something a little different here.  You still need the mutex to 
>>>>>>> handle
>>>>>>> multiple readers and the copy.  I think the function should be 
>>>>>>> something
>>>>>>> like:
>>>>>>>
>>>>>> Since KCS is not a multi-reader protocol from BMC's view, you 
>>>>>> makes things complex. :-)
>>>>>
>>>>> No, I don't think you understand.  The primary purpose of the 
>>>>> complexity
>>>>> here is to protect the driver from the host system (on the other 
>>>>> side of
>>>>> the KCS interface).  Without this protection, it is possible for 
>>>>> the host
>>>>> system to start a new write while the user on the BMC side is reading
>>>>> data out, resulting in corrupt data being read.
>>>>>
>>>>> I haven't thought too much about this.  There may be a simpler way,
>>>>> but the protection needs to be there.
>>>>>
>>>>> And you may not think you need to protect the driver against a
>>>>> malicious BMC side user code, but you would be wrong.  You can
>>>>> only have one opener, but with threads or a fork you can have
>>>>> multiple readers.  And you don't know if a malicious piece of
>>>>> code has taken over userland.  You always need to protect the
>>>>> kernel.
>>>>>
>>>> Sure, the read/write have protected the critical data area with 
>>>> IRQ, and also, these
>>>> functions should be thread local safe I believe.
>>>>
>>>> spin_lock_irq(&kcs_bmc->lock);
>>>> ...
>>>> spin_unlock_irq(&kcs_bmc->lock);
>>>>
>>>
>>> But remember, you can't call copy_to_user() when IRQs are off or 
>>> when you are holding
>>> a spinlock.  That is an absolute no.  It can crash the kernel.
>>>
>>> So you need a design that takes this into account, but will not 
>>> result in the possibility
>>> of bad data being read.
>>>
>> Yes, sure, as I said before: access_ok(VERIFY_WRITE, to, n), then 
>> memcpy in spin_lock.
>
> Where did you get the idea that this was ok?  It's not. access_ok() is 
> not actually very
> useful, since the permissions on memory can change at any time unless 
> you are holding
> the mm lock, which is also not an ok thing to do.  It is entirely 
> possible for access_ok()
> to pass and copy_to_user() to fail.
>
I thought memcpy will not fail. :(
> I'm not exactly sure what you are saying, though.  In any event, a 
> well-designed read()/write()
> operation should leave the system unchanged if it gets an error.
>
I saw BT use a local buffer, If I change the '#define KCS_MSG_BUFSIZ    
1024' to ".. 512", should it be OK
as BT ?

static ssize_t bt_bmc_read(struct file *file, char __user *buf,
                size_t count, loff_t *ppos)
{
     struct bt_bmc *bt_bmc = file_bt_bmc(file);
     u8 len;
     int len_byte = 1;
     u8 kbuffer[BT_BMC_BUFFER_SIZE];  --> #define BT_BMC_BUFFER_SIZE 256

> -corey
>
>>>>>>>    static ssize_t kcs_bmc_read(struct file *filp, char *buf,
>>>>>>>                     size_t count, loff_t *offset)
>>>>>>>    {
>>>>>>>         struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>>         ssize_t ret;
>>>>>>>         bool avail;
>>>>>>>         size_t data_size;
>>>>>>>         u8 *data;
>>>>>>>
>>>>>>>         data = kmalloc(KCS_MSG_BUFSIZ, GFP_KERNEL);
>>>>>>>         if (!data)
>>>>>>>             return -ENOMEM;
>>>>>>>
>>>>>>>    retry:
>>>>>>>         ret = -EAGAIN;
>>>>>>>         if (!(filp->f_flags & O_NONBLOCK))
>>>>>>> wait_event_interruptible(kcs_bmc->queue,
>>>>>>>                          kcs_bmc->data_in_avail);
>>>>>>>
>>>>>>>         mutex_lock(&kcs_bmc->read_mutex);
>>>>>>>
>>>>>>>         spin_lock_irq(&kcs_bmc->lock);
>>>>>>>         avail = kcs_bmc->data_in_avail;
>>>>>>>         if (avail) {
>>>>>>>             memcpy(data, kcs_bmc->data_in, kcs_bmc->data_in_idx);
>>>>>>>             data_size = kcs_bmc->data_in_idx;
>>>>>>>         }
>>>>>>>         spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>
>>>>>>>         if (!avail) {
>>>>>>>             if (filp->f_flags & O_NONBLOCK)
>>>>>>>                 goto out_mutex_unlock;
>>>>>>>             mutex_unlock(&kcs_bmc->read_mutex);
>>>>>>>             goto retry;
>>>>>>>         }
>>>>>>>
>>>>>>>         if (count < data_size) {
>>>>>>>             ret = -EOVERFLOW;
>>>>>>>              ? I'm not sure about the error, but userspace needs 
>>>>>>> to know.
>>>>>>>             goto out_mutex_unlock;
>>>>>
>>>>> Maybe a length error to the host side here?
>>>
>>> You didn't comment on this or the other length error.  That needs to be
>>> handled.
>>>
>> Yes, will send a length error by following KCS spec.
>>>>>
>>>>>>>         }
>>>>>>>
>>>>>>>         if (!copy_to_user(buf, data, data_size)) {
>>>>>>>             ret = -EFAULT;
>>>>>>>             goto out_mutex_unlock;
>>>>>>>         }
>>>>>>>
>>>>>>>         ret = data_size;
>>>>>>>
>>>>>>>         spin_lock_irq(&kcs_bmc->lock);
>>>>>>>
>>>>>>>         if (kcs_bmc->phase != KCS_PHASE_WRITE_END_DONE)
>>>>>>>             /* Something aborted or restarted the state machine. */
>>>>>>>             ? Maybe restart if O_NONBLOCK is not set and -EAGAIN 
>>>>>>> if it is?
>>>>>>>             ret = -EIO;
>>>>>>>         } else {
>>>>>>>             kcs_bmc->phase = KCS_PHASE_WAIT_READ;
>>>>>>>             kcs_bmc->data_in_avail = false;
>>>>>>>             kcs_bmc->data_in_idx = 0;
>>>>>>>         }
>>>>>>>
>>>>>>>         spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>
>>>>>>>    out_mutex_unlock:
>>>>>>>         mutex_unlock(&kcs_bmc->read_mutex);
>>>>>>>
>>>>>>>         kfree(data);
>>>>>>>
>>>>>>>         return ret;
>>>>>>>    }
>>>>>>> Note that I added a state, KCS_PHASE_WRITE_END_DONE, which would be
>>>>>>> set after the final byte from the host is received. You want the 
>>>>>>> read here
>>>>>>> done before you can do the write below to avoid the race I 
>>>>>>> talked about.
>>>>>>>
>>>>>>> There is a local copy made of the data.  What you *never* want 
>>>>>>> to happen
>>>>>>> here is for the state machine to start processing a new write 
>>>>>>> command
>>>>>>> while the data is being copied.  It could result in corrupt data 
>>>>>>> being read
>>>>>>> and some random operation being done by the BMC.
>>>>>>>
>>>>>>> If you want to avoid the local copy, it could be done, but it's 
>>>>>>> more complex.
>>>>>>>
>>>>>>>>>> +    if (!(filp->f_flags & O_NONBLOCK))
>>>>>>>>>> + wait_event_interruptible(kcs_bmc->queue,
>>>>>>>>>> + kcs_bmc->data_in_avail);
>>>>>>>>>> +
>>>>>>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>>>>>>> +
>>>>>>>>>> +    if (kcs_bmc->data_in_avail) {
>>>>>>>>>> +        kcs_bmc->data_in_avail = false;
>>>>>>>>>> +
>>>>>>>>>> +        if (count > kcs_bmc->data_in_idx)
>>>>>>>>>> +            count = kcs_bmc->data_in_idx;
>>>>>>>>>> +
>>>>>>>>>> +        if (!copy_to_user(buf, kcs_bmc->data_in, count))
>>>>>>>>>> +            ret = count;
>>>>>>>>>> +        else
>>>>>>>>>> +            ret = -EFAULT;
>>>>>>>>>> +    }
>>>>>>>>>> +
>>>>>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>>>> +
>>>>>>>>>> +    return ret;
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> +static ssize_t kcs_bmc_write(struct file *filp, const char 
>>>>>>>>>> *buf,
>>>>>>>>>> +                 size_t count, loff_t *offset)
>>>>>>>>>> +{
>>>>>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>>>>> +    ssize_t ret = count;
>>>>>>>>>> +
>>>>>>>>>> +    if (count < 1 || count > KCS_MSG_BUFSIZ)
>>>>>>>>>> +        return -EINVAL;
>>>>>>>>>> +
>>>>>>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>>>>>>> +
>>>>>>>>>> +    if (kcs_bmc->phase == KCS_PHASE_WAIT_READ) {
>>>>>>>>>> +        if (copy_from_user(kcs_bmc->data_out, buf, count)) {
>>>>>>>>>> + spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>>>> +            return -EFAULT;
>>>>>>>>>> +        }
>>>>>>>>>> +
>>>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_READ;
>>>>>>>>>> +        kcs_bmc->data_out_idx = 1;
>>>>>>>>>> +        kcs_bmc->data_out_len = count;
>>>>>>>>>> +        write_data(kcs_bmc, kcs_bmc->data_out[0]);
>>>>>>>>>> +    } else if (kcs_bmc->phase == KCS_PHASE_READ) {
>>>>>>>>>> +        ret = -EBUSY;
>>>>>>>>>> +    } else {
>>>>>>>>>> +        ret = -EINVAL;
>>>>>>>>>
>>>>>>>>> Is there a reason you return -EINVAL here?  Why not just 
>>>>>>>>> -EBUSY in all
>>>>>>>>> cases?  Is there something that userland will need to do 
>>>>>>>>> differently?
>>>>>>>>>
>>>>>>>>>> +    }
>>>>>>>>>> +
>>>>>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>>>> +
>>>>>>>>>> +    return ret;
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> +static long kcs_bmc_ioctl(struct file *filp, unsigned int cmd,
>>>>>>>>>> +              unsigned long arg)
>>>>>>>>>> +{
>>>>>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>>>>> +    long ret = 0;
>>>>>>>>>> +
>>>>>>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>>>>>>> +
>>>>>>>>>> +    switch (cmd) {
>>>>>>>>>> +    case IPMI_BMC_IOCTL_SET_SMS_ATN:
>>>>>>>>>> +        update_status_bits(kcs_bmc, KCS_STATUS_SMS_ATN,
>>>>>>>>>> +                        KCS_STATUS_SMS_ATN);
>>>>>>>>>> +        break;
>>>>>>>>>> +
>>>>>>>>>> +    case IPMI_BMC_IOCTL_CLEAR_SMS_ATN:
>>>>>>>>>> +        update_status_bits(kcs_bmc, KCS_STATUS_SMS_ATN,
>>>>>>>>>> +                        0);
>>>>>>>>>> +        break;
>>>>>>>>>> +
>>>>>>>>>> +    case IPMI_BMC_IOCTL_FORCE_ABORT:
>>>>>>>>>> +        set_state(kcs_bmc, ERROR_STATE);
>>>>>>>>>> +        read_data(kcs_bmc);
>>>>>>>>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>>>>> +
>>>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_ERROR;
>>>>>>>>>> +        kcs_bmc->data_in_avail = false;
>>>>>>>>>> +        break;
>>>>>>>>>> +
>>>>>>>>>> +    default:
>>>>>>>>>> +        ret = -EINVAL;
>>>>>>>>>> +        break;
>>>>>>>>>> +    }
>>>>>>>>>> +
>>>>>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>>>> +
>>>>>>>>>> +    return ret;
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> +static int kcs_bmc_release(struct inode *inode, struct file 
>>>>>>>>>> *filp)
>>>>>>>>>> +{
>>>>>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>>>>> +
>>>>>>>>>
>>>>>>>>> What happens if the device gets closed in the middle of a 
>>>>>>>>> transaction?  That's
>>>>>>>>> an important case to handle.  If something is in process, you 
>>>>>>>>> need to abort it.
>>>>>>>>>
>>>>>>>> The device just provides the read & write data, the transaction 
>>>>>>>> is handled in the KCS
>>>>>>>> controller's IRQ handler.
>>>>>>>
>>>>>>> From the spec, section 9.14:
>>>>>>>
>>>>>>>    The BMC must change the status to ERROR_STATE on any 
>>>>>>> condition where it
>>>>>>>    aborts a command transfer in progress.
>>>>>>>
>>>>>>> So you need to do something here.
>>>>>>>
>>>>>> In practice, we do this as spec said in ipmid, NOT in driver, 
>>>>>> driver can't handle anything, let's
>>>>>> make it simple, thanks!
>>>>>
>>>>> If ipmid crashes or is killed, how does it accomplish this?
>>>>>
>>>> Every time ipmids (or kcsd) crashed or killed, it needs start to 
>>>> call FORCE_ARBORT firstly, to sync with
>>>> host side software.
>>>>>>
>>>>>> Whenever the BMC is reset (from power-on or a hard reset), the 
>>>>>> State Bits are initialized to “11 - Error State”. Doing so
>>>>>> allows SMS to detect that the BMC has been reset and that any 
>>>>>> message in process has been terminated by the BMC.
>>>>>
>>>>> Right, that's fine, like it should be.  But we are not talking 
>>>>> about a reset.
>>>>>
>>>> I think the final error handling solution is that kcsd (user land) 
>>>> runs, otherwise, the host software side still got stuck. We meet
>>>> this kind of issue, so in general, we just doesn't handle some 
>>>> mirror errors in driver, then in kcsd, when it can provide the real
>>>> IPMI service, it will reset the channel firstly to sync with host 
>>>> side software.
>>>
>>> "Userland will do the right thing" is not very convincing to a 
>>> kernel developer.
>>>
>>> Plus if the above is true, I would think that you would just want to 
>>> hold the device
>>> in an error state when it wasn't opened.
>>>
>> I understand your concern, of course, driver need handles things 
>> well. But in fact, if a user app is truly a bad boy, it still can hang
>> the host side: set SMS_ATN, but no message returned when software 
>> host side requests, then host open-ipmi driver will hang, we
>> meet this kind of error to hang the customer's host. :) In my 
>> understanding, kcs-bmc should do the right thing about read and write,
>> the real transaction should be handled correctly by the kcsd.
>>
>> And if no kcsd starts, then this kind of BMC can't be sold out. :)
>
> True.  I'm not as concerned about this sort of thing.  It's nicer to 
> the host side if
> it can detect problems quickly, but it will eventually time out.
>
> From what I can tell from the current design, if the BMC userland is 
> not running,
> the driver will step through the state machine until it hits read 
> state, then it
> will sit there until the host times out and aborts the operation.
>
> IMHO, it would be better for the host side if the driver just stayed 
> in error state
> if nothing had it open.  It would think the spec says that in the 
> quote I referenced
> above, but that quote, like many things in the IPMI spec, is fairly 
> vague and could
> be interpreted many ways.
>
Well, I will try to fix this errors as possible.
> -corey
>
>
>>> -corey
>>>
>>>>> -corey
>>>>>
>>>>>>>>>> + spin_lock_irq(&kcs_bmc->lock);
>>>>>>>>>> +
>>>>>>>>>> +    kcs_bmc->running = 0;
>>>>>>>>>> +
>>>>>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>>>> +
>>>>>>>>>> +    return 0;
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>
>>>>>>>
>>>>>>
>>>>>
>>>>
>>>
>>
>
Corey Minyard Jan. 31, 2018, 1:52 a.m. | #14
On 01/30/2018 07:37 PM, Wang, Haiyue wrote:
>
>
> On 2018-01-31 09:25, Corey Minyard wrote:
>> On 01/30/2018 07:02 PM, Wang, Haiyue wrote:
>>>
>>>
>>> On 2018-01-31 08:52, Corey Minyard wrote:
>>>> On 01/30/2018 06:02 PM, Wang, Haiyue wrote:
>>>>>
>>>>>
>>>>> On 2018-01-30 21:49, Corey Minyard wrote:
>>>>>> On 01/29/2018 07:57 AM, Wang, Haiyue wrote:
>>>>>>>
>>>>>>>
>>>>>>> On 2018-01-26 22:48, Corey Minyard wrote:
>>>>>>>> On 01/26/2018 12:08 AM, Wang, Haiyue wrote:
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> On 2018-01-25 01:48, Corey Minyard wrote:
>>>>>>>>>> On 01/24/2018 10:06 AM, Haiyue Wang wrote:
>>>>>>>>>>> The KCS (Keyboard Controller Style) interface is used to 
>>>>>>>>>>> perform in-band
>>>>>>>>>>> IPMI communication between a server host and its BMC 
>>>>>>>>>>> (BaseBoard Management
>>>>>>>>>>> Controllers).
>>>>>>>>>>>
>>>>>>>>>>> This driver exposes the KCS interface on ASpeed SOCs 
>>>>>>>>>>> (AST2400 and AST2500)
>>>>>>>>>>> as a character device. Such SOCs are commonly used as BMCs 
>>>>>>>>>>> and this driver
>>>>>>>>>>> implements the BMC side of the KCS interface.
>>>>>>>>>>>
>>>>>>>>>>> Signed-off-by: Haiyue Wang <haiyue.wang@linux.intel.com>
>>>>>>>>>>>
>>>>>>>>>>> ---
>>>>>>>>>>> v1->v2
>>>>>>>>>>>
>>>>>>>>>>> - Divide the driver into two parts, one handles the BMC KCS 
>>>>>>>>>>> IPMI 2.0 state;
>>>>>>>>>>>    the other handles the BMC KCS controller such as AST2500 
>>>>>>>>>>> IO accessing.
>>>>>>>>>>> - Use the spin lock APIs to handle the device file 
>>>>>>>>>>> operations and BMC chip
>>>>>>>>>>>    IRQ inferface for accessing the same KCS BMC data structure.
>>>>>>>>>>> - Enhanced the phases handling of the KCS BMC.
>>>>>>>>>>> - Unified the IOCTL definition for IPMI BMC, it will be used 
>>>>>>>>>>> by KCS and BT.
>>>>>>>>>>>
>>>>>>>>>>> ---
>>>>>>>>>>>
>>>>>>>>>>> +
>>>>>>>>>>> +static void kcs_bmc_handle_data(struct kcs_bmc *kcs_bmc)
>>>>>>>>>>> +{
>>>>>>>>>>> +    u8 data;
>>>>>>>>>>> +
>>>>>>>>>>> +    switch (kcs_bmc->phase) {
>>>>>>>>>>> +    case KCS_PHASE_WRITE:
>>>>>>>>>>> +        set_state(kcs_bmc, WRITE_STATE);
>>>>>>>>>>> +
>>>>>>>>>>> +        /* set OBF before reading data */
>>>>>>>>>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>>>>>> +
>>>>>>>>>>> +        if (kcs_bmc->data_in_idx < KCS_MSG_BUFSIZ)
>>>>>>>>>>> + kcs_bmc->data_in[kcs_bmc->data_in_idx++] =
>>>>>>>>>>> +                        read_data(kcs_bmc);
>>>>>>>>
>>>>>>>> I missed this earlier, you need to issue a length error if the 
>>>>>>>> data is too large.
>>>>>>>>
>>>>>>>>>>> +        break;
>>>>>>>>>>> +
>>>>>>>>>>> +    case KCS_PHASE_WRITE_END:
>>>>>>>>>>> +        set_state(kcs_bmc, READ_STATE);
>>>>>>>>>>> +
>>>>>>>>>>> +        if (kcs_bmc->data_in_idx < KCS_MSG_BUFSIZ)
>>>>>>>>>>> + kcs_bmc->data_in[kcs_bmc->data_in_idx++] =
>>>>>>>>>>> +                        read_data(kcs_bmc);
>>>>>>>>>>> +
>>>>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_WAIT_READ;
>>>>>>>>>>> +        if (kcs_bmc->running) {
>>>>>>>>>>
>>>>>>>>>> Why do you only do this when running is set?  It won't hurt 
>>>>>>>>>> anything if it's not
>>>>>>>>>> set.  As it is, you have a race if something opens the device 
>>>>>>>>>> while this code
>>>>>>>>>> runs.
>>>>>>>>>>
>>>>>>>>>> Also, don't set the state to wait read until the "write" has 
>>>>>>>>>> finished (userland has
>>>>>>>>>> read the data out of the buffer.  More on that later.
>>>>>>>>>>
>>>>>>>>> Understood.
>>>>>>>>>>> + kcs_bmc->data_in_avail = true;
>>>>>>>>>>> + wake_up_interruptible(&kcs_bmc->queue);
>>>>>>>>>>> +        }
>>>>>>>>>>> +        break;
>>>>>>>>>>> +
>>>>>>>>>>> +    case KCS_PHASE_READ:
>>>>>>>>>>> +        if (kcs_bmc->data_out_idx == kcs_bmc->data_out_len)
>>>>>>>>>>> +            set_state(kcs_bmc, IDLE_STATE);
>>>>>>>>>>> +
>>>>>>>>>>> +        data = read_data(kcs_bmc);
>>>>>>>>>>> +        if (data != KCS_CMD_READ_BYTE) {
>>>>>>>>>>> +            set_state(kcs_bmc, ERROR_STATE);
>>>>>>>>>>> +            write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>>>>>> +            break;
>>>>>>>>>>> +        }
>>>>>>>>>>> +
>>>>>>>>>>> +        if (kcs_bmc->data_out_idx == kcs_bmc->data_out_len) {
>>>>>>>>>>> +            write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>>>>>> +            kcs_bmc->phase = KCS_PHASE_IDLE;
>>>>>>>>>>> +            break;
>>>>>>>>>>> +        }
>>>>>>>>>>> +
>>>>>>>>>>> +        write_data(kcs_bmc,
>>>>>>>>>>> + kcs_bmc->data_out[kcs_bmc->data_out_idx++]);
>>>>>>>>>>> +        break;
>>>>>>>>>>> +
>>>>>>>>>>> +    case KCS_PHASE_ABORT_ERROR1:
>>>>>>>>>>> +        set_state(kcs_bmc, READ_STATE);
>>>>>>>>>>> +
>>>>>>>>>>> +        /* Read the Dummy byte */
>>>>>>>>>>> +        read_data(kcs_bmc);
>>>>>>>>>>> +
>>>>>>>>>>> +        write_data(kcs_bmc, kcs_bmc->error);
>>>>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_ABORT_ERROR2;
>>>>>>>>>>> +        break;
>>>>>>>>>>> +
>>>>>>>>>>> +    case KCS_PHASE_ABORT_ERROR2:
>>>>>>>>>>> +        set_state(kcs_bmc, IDLE_STATE);
>>>>>>>>>>> +
>>>>>>>>>>> +        /* Read the Dummy byte */
>>>>>>>>>>> +        read_data(kcs_bmc);
>>>>>>>>>>> +
>>>>>>>>>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_IDLE;
>>>>>>>>>>> +
>>>>>>>>>>> +        break;
>>>>>>>>>>> +
>>>>>>>>>>> +    default:
>>>>>>>>>>> +        set_state(kcs_bmc, ERROR_STATE);
>>>>>>>>>>> +
>>>>>>>>>>> +        /* Read the Dummy byte */
>>>>>>>>>>> +        read_data(kcs_bmc);
>>>>>>>>>>> +
>>>>>>>>>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>>>>>> +        break;
>>>>>>>>>>> +    }
>>>>>>>>>>> +}
>>>>>>>>>>> +
>>>>>>>>>>> +static void kcs_bmc_handle_command(struct kcs_bmc *kcs_bmc)
>>>>>>>>>>> +{
>>>>>>>>>>> +    u8 cmd;
>>>>>>>>>>> +
>>>>>>>>>>> +    set_state(kcs_bmc, WRITE_STATE);
>>>>>>>>>>> +
>>>>>>>>>>> +    /* Dummy data to generate OBF */
>>>>>>>>>>> +    write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>>>>>> +
>>>>>>>>>>> +    cmd = read_data(kcs_bmc);
>>>>>>>>>>
>>>>>>>>>> Shouldn't you check the phase in all the cases below and do 
>>>>>>>>>> error
>>>>>>>>>> handling if the phase isn't correct?
>>>>>>>>>>
>>>>>>>>>> Similar thing if the device here isn't open. You need to handle
>>>>>>>>>> that gracefully.
>>>>>>>>>>
>>>>>>>>>> Also, you should remove data_in_avail and data_in_idx setting 
>>>>>>>>>> from
>>>>>>>>>> here, for reasons I will explain later.
>>>>>>>>>>
>>>>>>>>> If host software sends the data twice such as a retry before 
>>>>>>>>> the BMC's IPMI service starts,
>>>>>>>>> then the two IPMI requests will be merged into one, if not 
>>>>>>>>> clear data_in_idx after receving
>>>>>>>>> KCS_CMD_WRITE_START. Most of the states are driven by host 
>>>>>>>>> software (SMS). :(
>>>>>>>>
>>>>>>>> True, but what if the host issues WRITE_START or a WRITE_END 
>>>>>>>> while this driver is in read
>>>>>>>> state?  The spec is unclear on this, but it really only makes 
>>>>>>>> sense for the host to issue
>>>>>>>> WRITE_START in idle stat and WRITE_END in write state. IMHO it 
>>>>>>>> should go to error
>>>>>>>> state.  You might make the case that a WRITE_START anywhere 
>>>>>>>> restarts the transaction,
>>>>>>>> but the feel of the error state machine kind of goes against 
>>>>>>>> that. WRITE_END is definitely
>>>>>>>> wrong anywhere but write state.
>>>>>>>>
>>>>>>>> I just found the following in the spec (section 9.12):
>>>>>>>>
>>>>>>>>    Thus, since the interface will allow a command transfer to be
>>>>>>>>    started or restarted
>>>>>>>>    at any time when the input buffer is empty, software could 
>>>>>>>> elect to
>>>>>>>>    simply retry
>>>>>>>>    the command upon detecting an error condition, or issue a 
>>>>>>>> ‘known good’
>>>>>>>>    command in order to clear ERROR_STATE
>>>>>>>>
>>>>>>>> So a WRITE_START anywhere is ok.  A WRITE_END in the wrong 
>>>>>>>> state should probably
>>>>>>>> still go to error state.  This means the user needs to be able 
>>>>>>>> to handle a write error at
>>>>>>>> any time.  It also means it's very important to make sure the 
>>>>>>>> user does a read before
>>>>>>>> doing a write.  If the host re-issues a WRITE_START and writes 
>>>>>>>> a new command
>>>>>>>> between the time the use reads the data and writes the 
>>>>>>>> response, the response would
>>>>>>>> be for the wrong command.
>>>>>>>>
>>>>>>>>>>> +    switch (cmd) {
>>>>>>>>>>> +    case KCS_CMD_WRITE_START:
>>>>>>>>>>> +        kcs_bmc->data_in_avail = false;
>>>>>>>>>>> +        kcs_bmc->data_in_idx   = 0;
>>>>>>>>>>> +        kcs_bmc->phase         = KCS_PHASE_WRITE;
>>>>>>>>>>> +        kcs_bmc->error         = KCS_NO_ERROR;
>>>>>>>>>>> +        break;
>>>>>>>>>>> +
>>>>>>>>>>> +    case KCS_CMD_WRITE_END:
>>>>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_WRITE_END;
>>>>>>>>>>> +        break;
>>>>>>>>>>> +
>>>>>>>>>>> +    case KCS_CMD_ABORT:
>>>>>>>>>>> +        if (kcs_bmc->error == KCS_NO_ERROR)
>>>>>>>>>>> +            kcs_bmc->error = KCS_ABORTED_BY_COMMAND;
>>>>>>>>>>> +
>>>>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_ABORT_ERROR1;
>>>>>>>>>>> +        break;
>>>>>>>>>>> +
>>>>>>>>>>> +    default:
>>>>>>>>>>> +        kcs_bmc->error = KCS_ILLEGAL_CONTROL_CODE;
>>>>>>>>>>> +        set_state(kcs_bmc, ERROR_STATE);
>>>>>>>>>>> +        write_data(kcs_bmc, kcs_bmc->error);
>>>>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_ERROR;
>>>>>>>>>>> +        break;
>>>>>>>>>>> +    }
>>>>>>>>>>> +}
>>>>>>>>>>> +
>>>>>>>>>>> +int kcs_bmc_handle_event(struct kcs_bmc *kcs_bmc)
>>>>>>>>>>> +{
>>>>>>>>>>> +    unsigned long flags;
>>>>>>>>>>> +    int ret = 0;
>>>>>>>>>>> +    u8 status;
>>>>>>>>>>> +
>>>>>>>>>>> +    spin_lock_irqsave(&kcs_bmc->lock, flags);
>>>>>>>>>>> +
>>>>>>>>>>> +    status = read_status(kcs_bmc) & (KCS_STATUS_IBF | 
>>>>>>>>>>> KCS_STATUS_CMD_DAT);
>>>>>>>>>>> +
>>>>>>>>>>> +    switch (status) {
>>>>>>>>>>> +    case KCS_STATUS_IBF | KCS_STATUS_CMD_DAT:
>>>>>>>>>>> +        kcs_bmc_handle_command(kcs_bmc);
>>>>>>>>>>> +        break;
>>>>>>>>>>> +
>>>>>>>>>>> +    case KCS_STATUS_IBF:
>>>>>>>>>>> +        kcs_bmc_handle_data(kcs_bmc);
>>>>>>>>>>> +        break;
>>>>>>>>>>> +
>>>>>>>>>>> +    default:
>>>>>>>>>>> +        ret = -1;
>>>>>>>>>>> +        break;
>>>>>>>>>>> +    }
>>>>>>>>>>> +
>>>>>>>>>>> + spin_unlock_irqrestore(&kcs_bmc->lock, flags);
>>>>>>>>>>> +
>>>>>>>>>>> +    return ret;
>>>>>>>>>>> +}
>>>>>>>>>>> +EXPORT_SYMBOL(kcs_bmc_handle_event);
>>>>>>>>>>> +
>>>>>>>>>>> +static inline struct kcs_bmc *file_kcs_bmc(struct file *filp)
>>>>>>>>>>> +{
>>>>>>>>>>> +    return container_of(filp->private_data, struct kcs_bmc, 
>>>>>>>>>>> miscdev);
>>>>>>>>>>> +}
>>>>>>>>>>> +
>>>>>>>>>>> +static int kcs_bmc_open(struct inode *inode, struct file 
>>>>>>>>>>> *filp)
>>>>>>>>>>> +{
>>>>>>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>>>>>> +    int ret = 0;
>>>>>>>>>>> +
>>>>>>>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>>>>>>>> +
>>>>>>>>>>> +    if (!kcs_bmc->running) {
>>>>>>>>>>> +        kcs_bmc->running       = 1;
>>>>>>>>>>> +        kcs_bmc->phase         = KCS_PHASE_IDLE;
>>>>>>>>>>> +        kcs_bmc->data_in_avail = false;
>>>>>>>>>>
>>>>>>>>>> If you do everything right, setting the phase and 
>>>>>>>>>> data_in_avail should not
>>>>>>>>>> be necessary here.
>>>>>>>>>>
>>>>>>>>>>> +    } else {
>>>>>>>>>>> +        ret = -EBUSY;
>>>>>>>>>>> +    }
>>>>>>>>>>> +
>>>>>>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>>>>> +
>>>>>>>>>>> +    return ret;
>>>>>>>>>>> +}
>>>>>>>>>>> +
>>>>>>>>>>> +static unsigned int kcs_bmc_poll(struct file *filp, 
>>>>>>>>>>> poll_table *wait)
>>>>>>>>>>> +{
>>>>>>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>>>>>> +    unsigned int mask = 0;
>>>>>>>>>>> +
>>>>>>>>>>> +    poll_wait(filp, &kcs_bmc->queue, wait);
>>>>>>>>>>> +
>>>>>>>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>>>>>>>> +
>>>>>>>>>>> +    if (kcs_bmc->data_in_avail)
>>>>>>>>>>> +        mask |= POLLIN;
>>>>>>>>>>> +
>>>>>>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>>>>> +
>>>>>>>>>>> +    return mask;
>>>>>>>>>>> +}
>>>>>>>>>>> +
>>>>>>>>>>> +static ssize_t kcs_bmc_read(struct file *filp, char *buf,
>>>>>>>>>>> +                size_t count, loff_t *offset)
>>>>>>>>>>> +{
>>>>>>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>>>>>> +    ssize_t ret = -EAGAIN;
>>>>>>>>>>> +
>>>>>>>>>>
>>>>>>>>>> This function still has some issues.
>>>>>>>>>>
>>>>>>>>>> You can't call copy_to_user() with a spinlock held or 
>>>>>>>>>> interrupts disabled.
>>>>>>>>>> To handle readers, you probably need a separate mutex.
>>>>>>>>>>
>>>>>>>>>> Also, this function can return -EAGAIN even if O_NONBLOCK is 
>>>>>>>>>> not set if
>>>>>>>>>> kcs_bmc->data_in_avail changes between when you wait on the 
>>>>>>>>>> event
>>>>>>>>>> and when you check it under the lock.
>>>>>>>>>>
>>>>>>>>>> You also clear data_in_avail even if the copy_to_user() 
>>>>>>>>>> fails, which is
>>>>>>>>>> wrong.
>>>>>>>>>>
>>>>>>>>>> I believe the best way to handle this would be to have the 
>>>>>>>>>> spinlock
>>>>>>>>>> protect the inner workings of the state machine and a mutex 
>>>>>>>>>> handle
>>>>>>>>>> copying data out, setting/clearing the running flag (thus a 
>>>>>>>>>> mutex
>>>>>>>>>> instead of spinlock in open and release) and the ioctl 
>>>>>>>>>> settings (except
>>>>>>>>>> for abort where you will need to grab the spinlock).
>>>>>>>>>>
>>>>>>>>>> After the wait event below, grab the mutex.  If data is not 
>>>>>>>>>> available
>>>>>>>>>> and O_NONBLOCK is not set, drop the mutex and retry. Otherwise
>>>>>>>>>> this is the only place (besides release) that sets 
>>>>>>>>>> data_in_avail to false.
>>>>>>>>>> Do the copy_to_user(), grab the spinlock, clear data_in_avail 
>>>>>>>>>> and
>>>>>>>>>> data_in_idx, then release the lock and mutex. If you are really
>>>>>>>>>> adventurous you can do this without grabbing the lock using
>>>>>>>>>> barriers, but it's probably not necessary here.
>>>>>>>>>>
>>>>>>>>
>>>>>>>> With the state machine being able to be restarted at any time, 
>>>>>>>> you need
>>>>>>>> something a little different here.  You still need the mutex to 
>>>>>>>> handle
>>>>>>>> multiple readers and the copy.  I think the function should be 
>>>>>>>> something
>>>>>>>> like:
>>>>>>>>
>>>>>>> Since KCS is not a multi-reader protocol from BMC's view, you 
>>>>>>> makes things complex. :-)
>>>>>>
>>>>>> No, I don't think you understand.  The primary purpose of the 
>>>>>> complexity
>>>>>> here is to protect the driver from the host system (on the other 
>>>>>> side of
>>>>>> the KCS interface).  Without this protection, it is possible for 
>>>>>> the host
>>>>>> system to start a new write while the user on the BMC side is 
>>>>>> reading
>>>>>> data out, resulting in corrupt data being read.
>>>>>>
>>>>>> I haven't thought too much about this.  There may be a simpler way,
>>>>>> but the protection needs to be there.
>>>>>>
>>>>>> And you may not think you need to protect the driver against a
>>>>>> malicious BMC side user code, but you would be wrong. You can
>>>>>> only have one opener, but with threads or a fork you can have
>>>>>> multiple readers.  And you don't know if a malicious piece of
>>>>>> code has taken over userland.  You always need to protect the
>>>>>> kernel.
>>>>>>
>>>>> Sure, the read/write have protected the critical data area with 
>>>>> IRQ, and also, these
>>>>> functions should be thread local safe I believe.
>>>>>
>>>>> spin_lock_irq(&kcs_bmc->lock);
>>>>> ...
>>>>> spin_unlock_irq(&kcs_bmc->lock);
>>>>>
>>>>
>>>> But remember, you can't call copy_to_user() when IRQs are off or 
>>>> when you are holding
>>>> a spinlock.  That is an absolute no.  It can crash the kernel.
>>>>
>>>> So you need a design that takes this into account, but will not 
>>>> result in the possibility
>>>> of bad data being read.
>>>>
>>> Yes, sure, as I said before: access_ok(VERIFY_WRITE, to, n), then 
>>> memcpy in spin_lock.
>>
>> Where did you get the idea that this was ok?  It's not. access_ok() 
>> is not actually very
>> useful, since the permissions on memory can change at any time unless 
>> you are holding
>> the mm lock, which is also not an ok thing to do.  It is entirely 
>> possible for access_ok()
>> to pass and copy_to_user() to fail.
>>
> I thought memcpy will not fail. :(

Oh, memcpy won't fail as long as the source and destination is kernel 
memory.
I was a little confused by the access_ok() thing, it's common for people to
assume that if they do access_ok(), that copy_to_user() won't fail.

>> I'm not exactly sure what you are saying, though.  In any event, a 
>> well-designed read()/write()
>> operation should leave the system unchanged if it gets an error.
>>
> I saw BT use a local buffer, If I change the '#define 
> KCS_MSG_BUFSIZ    1024' to ".. 512", should it be OK
> as BT ?
>
> static ssize_t bt_bmc_read(struct file *file, char __user *buf,
>                size_t count, loff_t *ppos)
> {
>     struct bt_bmc *bt_bmc = file_bt_bmc(file);
>     u8 len;
>     int len_byte = 1;
>     u8 kbuffer[BT_BMC_BUFFER_SIZE];  --> #define BT_BMC_BUFFER_SIZE 256

It's good practice to keep larger things off the stack, which is why I 
dynamically
allocated it.  But if you have a mutex, you can put that buffer in 
struct bt_bmc
since it would only be accessed when holding the mutex.

>
>> -corey
>>
>>>>>>>>    static ssize_t kcs_bmc_read(struct file *filp, char *buf,
>>>>>>>>                     size_t count, loff_t *offset)
>>>>>>>>    {
>>>>>>>>         struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>>>         ssize_t ret;
>>>>>>>>         bool avail;
>>>>>>>>         size_t data_size;
>>>>>>>>         u8 *data;
>>>>>>>>
>>>>>>>>         data = kmalloc(KCS_MSG_BUFSIZ, GFP_KERNEL);
>>>>>>>>         if (!data)
>>>>>>>>             return -ENOMEM;
>>>>>>>>
>>>>>>>>    retry:
>>>>>>>>         ret = -EAGAIN;
>>>>>>>>         if (!(filp->f_flags & O_NONBLOCK))
>>>>>>>> wait_event_interruptible(kcs_bmc->queue,
>>>>>>>>                          kcs_bmc->data_in_avail);
>>>>>>>>
>>>>>>>>         mutex_lock(&kcs_bmc->read_mutex);
>>>>>>>>
>>>>>>>>         spin_lock_irq(&kcs_bmc->lock);
>>>>>>>>         avail = kcs_bmc->data_in_avail;
>>>>>>>>         if (avail) {
>>>>>>>>             memcpy(data, kcs_bmc->data_in, kcs_bmc->data_in_idx);
>>>>>>>>             data_size = kcs_bmc->data_in_idx;
>>>>>>>>         }
>>>>>>>>         spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>>
>>>>>>>>         if (!avail) {
>>>>>>>>             if (filp->f_flags & O_NONBLOCK)
>>>>>>>>                 goto out_mutex_unlock;
>>>>>>>> mutex_unlock(&kcs_bmc->read_mutex);
>>>>>>>>             goto retry;
>>>>>>>>         }
>>>>>>>>
>>>>>>>>         if (count < data_size) {
>>>>>>>>             ret = -EOVERFLOW;
>>>>>>>>              ? I'm not sure about the error, but userspace 
>>>>>>>> needs to know.
>>>>>>>>             goto out_mutex_unlock;
>>>>>>
>>>>>> Maybe a length error to the host side here?
>>>>
>>>> You didn't comment on this or the other length error.  That needs 
>>>> to be
>>>> handled.
>>>>
>>> Yes, will send a length error by following KCS spec.
>>>>>>
>>>>>>>>         }
>>>>>>>>
>>>>>>>>         if (!copy_to_user(buf, data, data_size)) {
>>>>>>>>             ret = -EFAULT;
>>>>>>>>             goto out_mutex_unlock;
>>>>>>>>         }
>>>>>>>>
>>>>>>>>         ret = data_size;
>>>>>>>>
>>>>>>>>         spin_lock_irq(&kcs_bmc->lock);
>>>>>>>>
>>>>>>>>         if (kcs_bmc->phase != KCS_PHASE_WRITE_END_DONE)
>>>>>>>>             /* Something aborted or restarted the state 
>>>>>>>> machine. */
>>>>>>>>             ? Maybe restart if O_NONBLOCK is not set and 
>>>>>>>> -EAGAIN if it is?
>>>>>>>>             ret = -EIO;
>>>>>>>>         } else {
>>>>>>>>             kcs_bmc->phase = KCS_PHASE_WAIT_READ;
>>>>>>>>             kcs_bmc->data_in_avail = false;
>>>>>>>>             kcs_bmc->data_in_idx = 0;
>>>>>>>>         }
>>>>>>>>
>>>>>>>>         spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>>
>>>>>>>>    out_mutex_unlock:
>>>>>>>>         mutex_unlock(&kcs_bmc->read_mutex);
>>>>>>>>
>>>>>>>>         kfree(data);
>>>>>>>>
>>>>>>>>         return ret;
>>>>>>>>    }
>>>>>>>> Note that I added a state, KCS_PHASE_WRITE_END_DONE, which 
>>>>>>>> would be
>>>>>>>> set after the final byte from the host is received. You want 
>>>>>>>> the read here
>>>>>>>> done before you can do the write below to avoid the race I 
>>>>>>>> talked about.
>>>>>>>>
>>>>>>>> There is a local copy made of the data.  What you *never* want 
>>>>>>>> to happen
>>>>>>>> here is for the state machine to start processing a new write 
>>>>>>>> command
>>>>>>>> while the data is being copied.  It could result in corrupt 
>>>>>>>> data being read
>>>>>>>> and some random operation being done by the BMC.
>>>>>>>>
>>>>>>>> If you want to avoid the local copy, it could be done, but it's 
>>>>>>>> more complex.
>>>>>>>>
>>>>>>>>>>> +    if (!(filp->f_flags & O_NONBLOCK))
>>>>>>>>>>> + wait_event_interruptible(kcs_bmc->queue,
>>>>>>>>>>> + kcs_bmc->data_in_avail);
>>>>>>>>>>> +
>>>>>>>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>>>>>>>> +
>>>>>>>>>>> +    if (kcs_bmc->data_in_avail) {
>>>>>>>>>>> +        kcs_bmc->data_in_avail = false;
>>>>>>>>>>> +
>>>>>>>>>>> +        if (count > kcs_bmc->data_in_idx)
>>>>>>>>>>> +            count = kcs_bmc->data_in_idx;
>>>>>>>>>>> +
>>>>>>>>>>> +        if (!copy_to_user(buf, kcs_bmc->data_in, count))
>>>>>>>>>>> +            ret = count;
>>>>>>>>>>> +        else
>>>>>>>>>>> +            ret = -EFAULT;
>>>>>>>>>>> +    }
>>>>>>>>>>> +
>>>>>>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>>>>> +
>>>>>>>>>>> +    return ret;
>>>>>>>>>>> +}
>>>>>>>>>>> +
>>>>>>>>>>> +static ssize_t kcs_bmc_write(struct file *filp, const char 
>>>>>>>>>>> *buf,
>>>>>>>>>>> +                 size_t count, loff_t *offset)
>>>>>>>>>>> +{
>>>>>>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>>>>>> +    ssize_t ret = count;
>>>>>>>>>>> +
>>>>>>>>>>> +    if (count < 1 || count > KCS_MSG_BUFSIZ)
>>>>>>>>>>> +        return -EINVAL;
>>>>>>>>>>> +
>>>>>>>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>>>>>>>> +
>>>>>>>>>>> +    if (kcs_bmc->phase == KCS_PHASE_WAIT_READ) {
>>>>>>>>>>> +        if (copy_from_user(kcs_bmc->data_out, buf, count)) {
>>>>>>>>>>> + spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>>>>> +            return -EFAULT;
>>>>>>>>>>> +        }
>>>>>>>>>>> +
>>>>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_READ;
>>>>>>>>>>> +        kcs_bmc->data_out_idx = 1;
>>>>>>>>>>> +        kcs_bmc->data_out_len = count;
>>>>>>>>>>> +        write_data(kcs_bmc, kcs_bmc->data_out[0]);
>>>>>>>>>>> +    } else if (kcs_bmc->phase == KCS_PHASE_READ) {
>>>>>>>>>>> +        ret = -EBUSY;
>>>>>>>>>>> +    } else {
>>>>>>>>>>> +        ret = -EINVAL;
>>>>>>>>>>
>>>>>>>>>> Is there a reason you return -EINVAL here?  Why not just 
>>>>>>>>>> -EBUSY in all
>>>>>>>>>> cases?  Is there something that userland will need to do 
>>>>>>>>>> differently?
>>>>>>>>>>
>>>>>>>>>>> +    }
>>>>>>>>>>> +
>>>>>>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>>>>> +
>>>>>>>>>>> +    return ret;
>>>>>>>>>>> +}
>>>>>>>>>>> +
>>>>>>>>>>> +static long kcs_bmc_ioctl(struct file *filp, unsigned int cmd,
>>>>>>>>>>> +              unsigned long arg)
>>>>>>>>>>> +{
>>>>>>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>>>>>> +    long ret = 0;
>>>>>>>>>>> +
>>>>>>>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>>>>>>>> +
>>>>>>>>>>> +    switch (cmd) {
>>>>>>>>>>> +    case IPMI_BMC_IOCTL_SET_SMS_ATN:
>>>>>>>>>>> +        update_status_bits(kcs_bmc, KCS_STATUS_SMS_ATN,
>>>>>>>>>>> +                        KCS_STATUS_SMS_ATN);
>>>>>>>>>>> +        break;
>>>>>>>>>>> +
>>>>>>>>>>> +    case IPMI_BMC_IOCTL_CLEAR_SMS_ATN:
>>>>>>>>>>> +        update_status_bits(kcs_bmc, KCS_STATUS_SMS_ATN,
>>>>>>>>>>> +                        0);
>>>>>>>>>>> +        break;
>>>>>>>>>>> +
>>>>>>>>>>> +    case IPMI_BMC_IOCTL_FORCE_ABORT:
>>>>>>>>>>> +        set_state(kcs_bmc, ERROR_STATE);
>>>>>>>>>>> +        read_data(kcs_bmc);
>>>>>>>>>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>>>>>> +
>>>>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_ERROR;
>>>>>>>>>>> +        kcs_bmc->data_in_avail = false;
>>>>>>>>>>> +        break;
>>>>>>>>>>> +
>>>>>>>>>>> +    default:
>>>>>>>>>>> +        ret = -EINVAL;
>>>>>>>>>>> +        break;
>>>>>>>>>>> +    }
>>>>>>>>>>> +
>>>>>>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>>>>> +
>>>>>>>>>>> +    return ret;
>>>>>>>>>>> +}
>>>>>>>>>>> +
>>>>>>>>>>> +static int kcs_bmc_release(struct inode *inode, struct file 
>>>>>>>>>>> *filp)
>>>>>>>>>>> +{
>>>>>>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>>>>>> +
>>>>>>>>>>
>>>>>>>>>> What happens if the device gets closed in the middle of a 
>>>>>>>>>> transaction?  That's
>>>>>>>>>> an important case to handle.  If something is in process, you 
>>>>>>>>>> need to abort it.
>>>>>>>>>>
>>>>>>>>> The device just provides the read & write data, the 
>>>>>>>>> transaction is handled in the KCS
>>>>>>>>> controller's IRQ handler.
>>>>>>>>
>>>>>>>> From the spec, section 9.14:
>>>>>>>>
>>>>>>>>    The BMC must change the status to ERROR_STATE on any 
>>>>>>>> condition where it
>>>>>>>>    aborts a command transfer in progress.
>>>>>>>>
>>>>>>>> So you need to do something here.
>>>>>>>>
>>>>>>> In practice, we do this as spec said in ipmid, NOT in driver, 
>>>>>>> driver can't handle anything, let's
>>>>>>> make it simple, thanks!
>>>>>>
>>>>>> If ipmid crashes or is killed, how does it accomplish this?
>>>>>>
>>>>> Every time ipmids (or kcsd) crashed or killed, it needs start to 
>>>>> call FORCE_ARBORT firstly, to sync with
>>>>> host side software.
>>>>>>>
>>>>>>> Whenever the BMC is reset (from power-on or a hard reset), the 
>>>>>>> State Bits are initialized to “11 - Error State”. Doing so
>>>>>>> allows SMS to detect that the BMC has been reset and that any 
>>>>>>> message in process has been terminated by the BMC.
>>>>>>
>>>>>> Right, that's fine, like it should be.  But we are not talking 
>>>>>> about a reset.
>>>>>>
>>>>> I think the final error handling solution is that kcsd (user land) 
>>>>> runs, otherwise, the host software side still got stuck. We meet
>>>>> this kind of issue, so in general, we just doesn't handle some 
>>>>> mirror errors in driver, then in kcsd, when it can provide the real
>>>>> IPMI service, it will reset the channel firstly to sync with host 
>>>>> side software.
>>>>
>>>> "Userland will do the right thing" is not very convincing to a 
>>>> kernel developer.
>>>>
>>>> Plus if the above is true, I would think that you would just want 
>>>> to hold the device
>>>> in an error state when it wasn't opened.
>>>>
>>> I understand your concern, of course, driver need handles things 
>>> well. But in fact, if a user app is truly a bad boy, it still can hang
>>> the host side: set SMS_ATN, but no message returned when software 
>>> host side requests, then host open-ipmi driver will hang, we
>>> meet this kind of error to hang the customer's host. :) In my 
>>> understanding, kcs-bmc should do the right thing about read and write,
>>> the real transaction should be handled correctly by the kcsd.
>>>
>>> And if no kcsd starts, then this kind of BMC can't be sold out. :)
>>
>> True.  I'm not as concerned about this sort of thing.  It's nicer to 
>> the host side if
>> it can detect problems quickly, but it will eventually time out.
>>
>> From what I can tell from the current design, if the BMC userland is 
>> not running,
>> the driver will step through the state machine until it hits read 
>> state, then it
>> will sit there until the host times out and aborts the operation.
>>
>> IMHO, it would be better for the host side if the driver just stayed 
>> in error state
>> if nothing had it open.  It would think the spec says that in the 
>> quote I referenced
>> above, but that quote, like many things in the IPMI spec, is fairly 
>> vague and could
>> be interpreted many ways.
>>
> Well, I will try to fix this errors as possible.
>> -corey
>>
>>
>>>> -corey
>>>>
>>>>>> -corey
>>>>>>
>>>>>>>>>>> + spin_lock_irq(&kcs_bmc->lock);
>>>>>>>>>>> +
>>>>>>>>>>> +    kcs_bmc->running = 0;
>>>>>>>>>>> +
>>>>>>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>>>>> +
>>>>>>>>>>> +    return 0;
>>>>>>>>>>> +}
>>>>>>>>>>> +
>>>>>>>>
>>>>>>>>
>>>>>>>
>>>>>>
>>>>>
>>>>
>>>
>>
>
Wang, Haiyue Jan. 31, 2018, 2:01 a.m. | #15
On 2018-01-31 09:52, Corey Minyard wrote:
> On 01/30/2018 07:37 PM, Wang, Haiyue wrote:
>>
>>
>> On 2018-01-31 09:25, Corey Minyard wrote:
>>> On 01/30/2018 07:02 PM, Wang, Haiyue wrote:
>>>>
>>>>
>>>> On 2018-01-31 08:52, Corey Minyard wrote:
>>>>> On 01/30/2018 06:02 PM, Wang, Haiyue wrote:
>>>>>>
>>>>>>
>>>>>> On 2018-01-30 21:49, Corey Minyard wrote:
>>>>>>> On 01/29/2018 07:57 AM, Wang, Haiyue wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>> On 2018-01-26 22:48, Corey Minyard wrote:
>>>>>>>>> On 01/26/2018 12:08 AM, Wang, Haiyue wrote:
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> On 2018-01-25 01:48, Corey Minyard wrote:
>>>>>>>>>>> On 01/24/2018 10:06 AM, Haiyue Wang wrote:
>>>>>>>>>>>> The KCS (Keyboard Controller Style) interface is used to 
>>>>>>>>>>>> perform in-band
>>>>>>>>>>>> IPMI communication between a server host and its BMC 
>>>>>>>>>>>> (BaseBoard Management
>>>>>>>>>>>> Controllers).
>>>>>>>>>>>>
>>>>>>>>>>>> This driver exposes the KCS interface on ASpeed SOCs 
>>>>>>>>>>>> (AST2400 and AST2500)
>>>>>>>>>>>> as a character device. Such SOCs are commonly used as BMCs 
>>>>>>>>>>>> and this driver
>>>>>>>>>>>> implements the BMC side of the KCS interface.
>>>>>>>>>>>>
>>>>>>>>>>>> Signed-off-by: Haiyue Wang <haiyue.wang@linux.intel.com>
>>>>>>>>>>>>
>>>>>>>>>>>> ---
>>>>>>>>>>>> v1->v2
>>>>>>>>>>>>
>>>>>>>>>>>> - Divide the driver into two parts, one handles the BMC KCS 
>>>>>>>>>>>> IPMI 2.0 state;
>>>>>>>>>>>>    the other handles the BMC KCS controller such as AST2500 
>>>>>>>>>>>> IO accessing.
>>>>>>>>>>>> - Use the spin lock APIs to handle the device file 
>>>>>>>>>>>> operations and BMC chip
>>>>>>>>>>>>    IRQ inferface for accessing the same KCS BMC data 
>>>>>>>>>>>> structure.
>>>>>>>>>>>> - Enhanced the phases handling of the KCS BMC.
>>>>>>>>>>>> - Unified the IOCTL definition for IPMI BMC, it will be 
>>>>>>>>>>>> used by KCS and BT.
>>>>>>>>>>>>
>>>>>>>>>>>> ---
>>>>>>>>>>>>
>>>>>>>>>>>> +
>>>>>>>>>>>> +static void kcs_bmc_handle_data(struct kcs_bmc *kcs_bmc)
>>>>>>>>>>>> +{
>>>>>>>>>>>> +    u8 data;
>>>>>>>>>>>> +
>>>>>>>>>>>> +    switch (kcs_bmc->phase) {
>>>>>>>>>>>> +    case KCS_PHASE_WRITE:
>>>>>>>>>>>> +        set_state(kcs_bmc, WRITE_STATE);
>>>>>>>>>>>> +
>>>>>>>>>>>> +        /* set OBF before reading data */
>>>>>>>>>>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>>>>>>> +
>>>>>>>>>>>> +        if (kcs_bmc->data_in_idx < KCS_MSG_BUFSIZ)
>>>>>>>>>>>> + kcs_bmc->data_in[kcs_bmc->data_in_idx++] =
>>>>>>>>>>>> +                        read_data(kcs_bmc);
>>>>>>>>>
>>>>>>>>> I missed this earlier, you need to issue a length error if the 
>>>>>>>>> data is too large.
>>>>>>>>>
>>>>>>>>>>>> +        break;
>>>>>>>>>>>> +
>>>>>>>>>>>> +    case KCS_PHASE_WRITE_END:
>>>>>>>>>>>> +        set_state(kcs_bmc, READ_STATE);
>>>>>>>>>>>> +
>>>>>>>>>>>> +        if (kcs_bmc->data_in_idx < KCS_MSG_BUFSIZ)
>>>>>>>>>>>> + kcs_bmc->data_in[kcs_bmc->data_in_idx++] =
>>>>>>>>>>>> +                        read_data(kcs_bmc);
>>>>>>>>>>>> +
>>>>>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_WAIT_READ;
>>>>>>>>>>>> +        if (kcs_bmc->running) {
>>>>>>>>>>>
>>>>>>>>>>> Why do you only do this when running is set? It won't hurt 
>>>>>>>>>>> anything if it's not
>>>>>>>>>>> set.  As it is, you have a race if something opens the 
>>>>>>>>>>> device while this code
>>>>>>>>>>> runs.
>>>>>>>>>>>
>>>>>>>>>>> Also, don't set the state to wait read until the "write" has 
>>>>>>>>>>> finished (userland has
>>>>>>>>>>> read the data out of the buffer.  More on that later.
>>>>>>>>>>>
>>>>>>>>>> Understood.
>>>>>>>>>>>> + kcs_bmc->data_in_avail = true;
>>>>>>>>>>>> + wake_up_interruptible(&kcs_bmc->queue);
>>>>>>>>>>>> +        }
>>>>>>>>>>>> +        break;
>>>>>>>>>>>> +
>>>>>>>>>>>> +    case KCS_PHASE_READ:
>>>>>>>>>>>> +        if (kcs_bmc->data_out_idx == kcs_bmc->data_out_len)
>>>>>>>>>>>> +            set_state(kcs_bmc, IDLE_STATE);
>>>>>>>>>>>> +
>>>>>>>>>>>> +        data = read_data(kcs_bmc);
>>>>>>>>>>>> +        if (data != KCS_CMD_READ_BYTE) {
>>>>>>>>>>>> +            set_state(kcs_bmc, ERROR_STATE);
>>>>>>>>>>>> +            write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>>>>>>> +            break;
>>>>>>>>>>>> +        }
>>>>>>>>>>>> +
>>>>>>>>>>>> +        if (kcs_bmc->data_out_idx == kcs_bmc->data_out_len) {
>>>>>>>>>>>> +            write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>>>>>>> +            kcs_bmc->phase = KCS_PHASE_IDLE;
>>>>>>>>>>>> +            break;
>>>>>>>>>>>> +        }
>>>>>>>>>>>> +
>>>>>>>>>>>> +        write_data(kcs_bmc,
>>>>>>>>>>>> + kcs_bmc->data_out[kcs_bmc->data_out_idx++]);
>>>>>>>>>>>> +        break;
>>>>>>>>>>>> +
>>>>>>>>>>>> +    case KCS_PHASE_ABORT_ERROR1:
>>>>>>>>>>>> +        set_state(kcs_bmc, READ_STATE);
>>>>>>>>>>>> +
>>>>>>>>>>>> +        /* Read the Dummy byte */
>>>>>>>>>>>> +        read_data(kcs_bmc);
>>>>>>>>>>>> +
>>>>>>>>>>>> +        write_data(kcs_bmc, kcs_bmc->error);
>>>>>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_ABORT_ERROR2;
>>>>>>>>>>>> +        break;
>>>>>>>>>>>> +
>>>>>>>>>>>> +    case KCS_PHASE_ABORT_ERROR2:
>>>>>>>>>>>> +        set_state(kcs_bmc, IDLE_STATE);
>>>>>>>>>>>> +
>>>>>>>>>>>> +        /* Read the Dummy byte */
>>>>>>>>>>>> +        read_data(kcs_bmc);
>>>>>>>>>>>> +
>>>>>>>>>>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_IDLE;
>>>>>>>>>>>> +
>>>>>>>>>>>> +        break;
>>>>>>>>>>>> +
>>>>>>>>>>>> +    default:
>>>>>>>>>>>> +        set_state(kcs_bmc, ERROR_STATE);
>>>>>>>>>>>> +
>>>>>>>>>>>> +        /* Read the Dummy byte */
>>>>>>>>>>>> +        read_data(kcs_bmc);
>>>>>>>>>>>> +
>>>>>>>>>>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>>>>>>> +        break;
>>>>>>>>>>>> +    }
>>>>>>>>>>>> +}
>>>>>>>>>>>> +
>>>>>>>>>>>> +static void kcs_bmc_handle_command(struct kcs_bmc *kcs_bmc)
>>>>>>>>>>>> +{
>>>>>>>>>>>> +    u8 cmd;
>>>>>>>>>>>> +
>>>>>>>>>>>> +    set_state(kcs_bmc, WRITE_STATE);
>>>>>>>>>>>> +
>>>>>>>>>>>> +    /* Dummy data to generate OBF */
>>>>>>>>>>>> +    write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>>>>>>> +
>>>>>>>>>>>> +    cmd = read_data(kcs_bmc);
>>>>>>>>>>>
>>>>>>>>>>> Shouldn't you check the phase in all the cases below and do 
>>>>>>>>>>> error
>>>>>>>>>>> handling if the phase isn't correct?
>>>>>>>>>>>
>>>>>>>>>>> Similar thing if the device here isn't open. You need to handle
>>>>>>>>>>> that gracefully.
>>>>>>>>>>>
>>>>>>>>>>> Also, you should remove data_in_avail and data_in_idx 
>>>>>>>>>>> setting from
>>>>>>>>>>> here, for reasons I will explain later.
>>>>>>>>>>>
>>>>>>>>>> If host software sends the data twice such as a retry before 
>>>>>>>>>> the BMC's IPMI service starts,
>>>>>>>>>> then the two IPMI requests will be merged into one, if not 
>>>>>>>>>> clear data_in_idx after receving
>>>>>>>>>> KCS_CMD_WRITE_START. Most of the states are driven by host 
>>>>>>>>>> software (SMS). :(
>>>>>>>>>
>>>>>>>>> True, but what if the host issues WRITE_START or a WRITE_END 
>>>>>>>>> while this driver is in read
>>>>>>>>> state?  The spec is unclear on this, but it really only makes 
>>>>>>>>> sense for the host to issue
>>>>>>>>> WRITE_START in idle stat and WRITE_END in write state. IMHO it 
>>>>>>>>> should go to error
>>>>>>>>> state.  You might make the case that a WRITE_START anywhere 
>>>>>>>>> restarts the transaction,
>>>>>>>>> but the feel of the error state machine kind of goes against 
>>>>>>>>> that. WRITE_END is definitely
>>>>>>>>> wrong anywhere but write state.
>>>>>>>>>
>>>>>>>>> I just found the following in the spec (section 9.12):
>>>>>>>>>
>>>>>>>>>    Thus, since the interface will allow a command transfer to be
>>>>>>>>>    started or restarted
>>>>>>>>>    at any time when the input buffer is empty, software could 
>>>>>>>>> elect to
>>>>>>>>>    simply retry
>>>>>>>>>    the command upon detecting an error condition, or issue a 
>>>>>>>>> ‘known good’
>>>>>>>>>    command in order to clear ERROR_STATE
>>>>>>>>>
>>>>>>>>> So a WRITE_START anywhere is ok.  A WRITE_END in the wrong 
>>>>>>>>> state should probably
>>>>>>>>> still go to error state.  This means the user needs to be able 
>>>>>>>>> to handle a write error at
>>>>>>>>> any time.  It also means it's very important to make sure the 
>>>>>>>>> user does a read before
>>>>>>>>> doing a write.  If the host re-issues a WRITE_START and writes 
>>>>>>>>> a new command
>>>>>>>>> between the time the use reads the data and writes the 
>>>>>>>>> response, the response would
>>>>>>>>> be for the wrong command.
>>>>>>>>>
>>>>>>>>>>>> +    switch (cmd) {
>>>>>>>>>>>> +    case KCS_CMD_WRITE_START:
>>>>>>>>>>>> +        kcs_bmc->data_in_avail = false;
>>>>>>>>>>>> +        kcs_bmc->data_in_idx   = 0;
>>>>>>>>>>>> +        kcs_bmc->phase         = KCS_PHASE_WRITE;
>>>>>>>>>>>> +        kcs_bmc->error         = KCS_NO_ERROR;
>>>>>>>>>>>> +        break;
>>>>>>>>>>>> +
>>>>>>>>>>>> +    case KCS_CMD_WRITE_END:
>>>>>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_WRITE_END;
>>>>>>>>>>>> +        break;
>>>>>>>>>>>> +
>>>>>>>>>>>> +    case KCS_CMD_ABORT:
>>>>>>>>>>>> +        if (kcs_bmc->error == KCS_NO_ERROR)
>>>>>>>>>>>> +            kcs_bmc->error = KCS_ABORTED_BY_COMMAND;
>>>>>>>>>>>> +
>>>>>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_ABORT_ERROR1;
>>>>>>>>>>>> +        break;
>>>>>>>>>>>> +
>>>>>>>>>>>> +    default:
>>>>>>>>>>>> +        kcs_bmc->error = KCS_ILLEGAL_CONTROL_CODE;
>>>>>>>>>>>> +        set_state(kcs_bmc, ERROR_STATE);
>>>>>>>>>>>> +        write_data(kcs_bmc, kcs_bmc->error);
>>>>>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_ERROR;
>>>>>>>>>>>> +        break;
>>>>>>>>>>>> +    }
>>>>>>>>>>>> +}
>>>>>>>>>>>> +
>>>>>>>>>>>> +int kcs_bmc_handle_event(struct kcs_bmc *kcs_bmc)
>>>>>>>>>>>> +{
>>>>>>>>>>>> +    unsigned long flags;
>>>>>>>>>>>> +    int ret = 0;
>>>>>>>>>>>> +    u8 status;
>>>>>>>>>>>> +
>>>>>>>>>>>> + spin_lock_irqsave(&kcs_bmc->lock, flags);
>>>>>>>>>>>> +
>>>>>>>>>>>> +    status = read_status(kcs_bmc) & (KCS_STATUS_IBF | 
>>>>>>>>>>>> KCS_STATUS_CMD_DAT);
>>>>>>>>>>>> +
>>>>>>>>>>>> +    switch (status) {
>>>>>>>>>>>> +    case KCS_STATUS_IBF | KCS_STATUS_CMD_DAT:
>>>>>>>>>>>> +        kcs_bmc_handle_command(kcs_bmc);
>>>>>>>>>>>> +        break;
>>>>>>>>>>>> +
>>>>>>>>>>>> +    case KCS_STATUS_IBF:
>>>>>>>>>>>> +        kcs_bmc_handle_data(kcs_bmc);
>>>>>>>>>>>> +        break;
>>>>>>>>>>>> +
>>>>>>>>>>>> +    default:
>>>>>>>>>>>> +        ret = -1;
>>>>>>>>>>>> +        break;
>>>>>>>>>>>> +    }
>>>>>>>>>>>> +
>>>>>>>>>>>> + spin_unlock_irqrestore(&kcs_bmc->lock, flags);
>>>>>>>>>>>> +
>>>>>>>>>>>> +    return ret;
>>>>>>>>>>>> +}
>>>>>>>>>>>> +EXPORT_SYMBOL(kcs_bmc_handle_event);
>>>>>>>>>>>> +
>>>>>>>>>>>> +static inline struct kcs_bmc *file_kcs_bmc(struct file *filp)
>>>>>>>>>>>> +{
>>>>>>>>>>>> +    return container_of(filp->private_data, struct 
>>>>>>>>>>>> kcs_bmc, miscdev);
>>>>>>>>>>>> +}
>>>>>>>>>>>> +
>>>>>>>>>>>> +static int kcs_bmc_open(struct inode *inode, struct file 
>>>>>>>>>>>> *filp)
>>>>>>>>>>>> +{
>>>>>>>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>>>>>>> +    int ret = 0;
>>>>>>>>>>>> +
>>>>>>>>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>>>>>>>>> +
>>>>>>>>>>>> +    if (!kcs_bmc->running) {
>>>>>>>>>>>> +        kcs_bmc->running       = 1;
>>>>>>>>>>>> +        kcs_bmc->phase         = KCS_PHASE_IDLE;
>>>>>>>>>>>> +        kcs_bmc->data_in_avail = false;
>>>>>>>>>>>
>>>>>>>>>>> If you do everything right, setting the phase and 
>>>>>>>>>>> data_in_avail should not
>>>>>>>>>>> be necessary here.
>>>>>>>>>>>
>>>>>>>>>>>> +    } else {
>>>>>>>>>>>> +        ret = -EBUSY;
>>>>>>>>>>>> +    }
>>>>>>>>>>>> +
>>>>>>>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>>>>>> +
>>>>>>>>>>>> +    return ret;
>>>>>>>>>>>> +}
>>>>>>>>>>>> +
>>>>>>>>>>>> +static unsigned int kcs_bmc_poll(struct file *filp, 
>>>>>>>>>>>> poll_table *wait)
>>>>>>>>>>>> +{
>>>>>>>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>>>>>>> +    unsigned int mask = 0;
>>>>>>>>>>>> +
>>>>>>>>>>>> +    poll_wait(filp, &kcs_bmc->queue, wait);
>>>>>>>>>>>> +
>>>>>>>>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>>>>>>>>> +
>>>>>>>>>>>> +    if (kcs_bmc->data_in_avail)
>>>>>>>>>>>> +        mask |= POLLIN;
>>>>>>>>>>>> +
>>>>>>>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>>>>>> +
>>>>>>>>>>>> +    return mask;
>>>>>>>>>>>> +}
>>>>>>>>>>>> +
>>>>>>>>>>>> +static ssize_t kcs_bmc_read(struct file *filp, char *buf,
>>>>>>>>>>>> +                size_t count, loff_t *offset)
>>>>>>>>>>>> +{
>>>>>>>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>>>>>>> +    ssize_t ret = -EAGAIN;
>>>>>>>>>>>> +
>>>>>>>>>>>
>>>>>>>>>>> This function still has some issues.
>>>>>>>>>>>
>>>>>>>>>>> You can't call copy_to_user() with a spinlock held or 
>>>>>>>>>>> interrupts disabled.
>>>>>>>>>>> To handle readers, you probably need a separate mutex.
>>>>>>>>>>>
>>>>>>>>>>> Also, this function can return -EAGAIN even if O_NONBLOCK is 
>>>>>>>>>>> not set if
>>>>>>>>>>> kcs_bmc->data_in_avail changes between when you wait on the 
>>>>>>>>>>> event
>>>>>>>>>>> and when you check it under the lock.
>>>>>>>>>>>
>>>>>>>>>>> You also clear data_in_avail even if the copy_to_user() 
>>>>>>>>>>> fails, which is
>>>>>>>>>>> wrong.
>>>>>>>>>>>
>>>>>>>>>>> I believe the best way to handle this would be to have the 
>>>>>>>>>>> spinlock
>>>>>>>>>>> protect the inner workings of the state machine and a mutex 
>>>>>>>>>>> handle
>>>>>>>>>>> copying data out, setting/clearing the running flag (thus a 
>>>>>>>>>>> mutex
>>>>>>>>>>> instead of spinlock in open and release) and the ioctl 
>>>>>>>>>>> settings (except
>>>>>>>>>>> for abort where you will need to grab the spinlock).
>>>>>>>>>>>
>>>>>>>>>>> After the wait event below, grab the mutex. If data is not 
>>>>>>>>>>> available
>>>>>>>>>>> and O_NONBLOCK is not set, drop the mutex and retry. Otherwise
>>>>>>>>>>> this is the only place (besides release) that sets 
>>>>>>>>>>> data_in_avail to false.
>>>>>>>>>>> Do the copy_to_user(), grab the spinlock, clear 
>>>>>>>>>>> data_in_avail and
>>>>>>>>>>> data_in_idx, then release the lock and mutex. If you are really
>>>>>>>>>>> adventurous you can do this without grabbing the lock using
>>>>>>>>>>> barriers, but it's probably not necessary here.
>>>>>>>>>>>
>>>>>>>>>
>>>>>>>>> With the state machine being able to be restarted at any time, 
>>>>>>>>> you need
>>>>>>>>> something a little different here.  You still need the mutex 
>>>>>>>>> to handle
>>>>>>>>> multiple readers and the copy.  I think the function should be 
>>>>>>>>> something
>>>>>>>>> like:
>>>>>>>>>
>>>>>>>> Since KCS is not a multi-reader protocol from BMC's view, you 
>>>>>>>> makes things complex. :-)
>>>>>>>
>>>>>>> No, I don't think you understand.  The primary purpose of the 
>>>>>>> complexity
>>>>>>> here is to protect the driver from the host system (on the other 
>>>>>>> side of
>>>>>>> the KCS interface).  Without this protection, it is possible for 
>>>>>>> the host
>>>>>>> system to start a new write while the user on the BMC side is 
>>>>>>> reading
>>>>>>> data out, resulting in corrupt data being read.
>>>>>>>
>>>>>>> I haven't thought too much about this.  There may be a simpler way,
>>>>>>> but the protection needs to be there.
>>>>>>>
>>>>>>> And you may not think you need to protect the driver against a
>>>>>>> malicious BMC side user code, but you would be wrong. You can
>>>>>>> only have one opener, but with threads or a fork you can have
>>>>>>> multiple readers.  And you don't know if a malicious piece of
>>>>>>> code has taken over userland.  You always need to protect the
>>>>>>> kernel.
>>>>>>>
>>>>>> Sure, the read/write have protected the critical data area with 
>>>>>> IRQ, and also, these
>>>>>> functions should be thread local safe I believe.
>>>>>>
>>>>>> spin_lock_irq(&kcs_bmc->lock);
>>>>>> ...
>>>>>> spin_unlock_irq(&kcs_bmc->lock);
>>>>>>
>>>>>
>>>>> But remember, you can't call copy_to_user() when IRQs are off or 
>>>>> when you are holding
>>>>> a spinlock.  That is an absolute no.  It can crash the kernel.
>>>>>
>>>>> So you need a design that takes this into account, but will not 
>>>>> result in the possibility
>>>>> of bad data being read.
>>>>>
>>>> Yes, sure, as I said before: access_ok(VERIFY_WRITE, to, n), then 
>>>> memcpy in spin_lock.
>>>
>>> Where did you get the idea that this was ok?  It's not. access_ok() 
>>> is not actually very
>>> useful, since the permissions on memory can change at any time 
>>> unless you are holding
>>> the mm lock, which is also not an ok thing to do.  It is entirely 
>>> possible for access_ok()
>>> to pass and copy_to_user() to fail.
>>>
>> I thought memcpy will not fail. :(
>
> Oh, memcpy won't fail as long as the source and destination is kernel 
> memory.
> I was a little confused by the access_ok() thing, it's common for 
> people to
> assume that if they do access_ok(), that copy_to_user() won't fail.
>
Yes, commonly misunderstand,  didn't well understand the hidden things 
that kernel do for memory
management.
>>> I'm not exactly sure what you are saying, though.  In any event, a 
>>> well-designed read()/write()
>>> operation should leave the system unchanged if it gets an error.
>>>
>> I saw BT use a local buffer, If I change the '#define 
>> KCS_MSG_BUFSIZ    1024' to ".. 512", should it be OK
>> as BT ?
>>
>> static ssize_t bt_bmc_read(struct file *file, char __user *buf,
>>                size_t count, loff_t *ppos)
>> {
>>     struct bt_bmc *bt_bmc = file_bt_bmc(file);
>>     u8 len;
>>     int len_byte = 1;
>>     u8 kbuffer[BT_BMC_BUFFER_SIZE];  --> #define BT_BMC_BUFFER_SIZE 256
>
> It's good practice to keep larger things off the stack, which is why I 
> dynamically
> allocated it.  But if you have a mutex, you can put that buffer in 
> struct bt_bmc
> since it would only be accessed when holding the mutex.
>
Got it, looks like this is the best idea. I will rewrite the driver 
again, hope I can catch all of your code review
comments. :-)
>>
>>> -corey
>>>
>>>>>>>>>    static ssize_t kcs_bmc_read(struct file *filp, char *buf,
>>>>>>>>>                     size_t count, loff_t *offset)
>>>>>>>>>    {
>>>>>>>>>         struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>>>>         ssize_t ret;
>>>>>>>>>         bool avail;
>>>>>>>>>         size_t data_size;
>>>>>>>>>         u8 *data;
>>>>>>>>>
>>>>>>>>>         data = kmalloc(KCS_MSG_BUFSIZ, GFP_KERNEL);
>>>>>>>>>         if (!data)
>>>>>>>>>             return -ENOMEM;
>>>>>>>>>
>>>>>>>>>    retry:
>>>>>>>>>         ret = -EAGAIN;
>>>>>>>>>         if (!(filp->f_flags & O_NONBLOCK))
>>>>>>>>> wait_event_interruptible(kcs_bmc->queue,
>>>>>>>>> kcs_bmc->data_in_avail);
>>>>>>>>>
>>>>>>>>>         mutex_lock(&kcs_bmc->read_mutex);
>>>>>>>>>
>>>>>>>>>         spin_lock_irq(&kcs_bmc->lock);
>>>>>>>>>         avail = kcs_bmc->data_in_avail;
>>>>>>>>>         if (avail) {
>>>>>>>>>             memcpy(data, kcs_bmc->data_in, kcs_bmc->data_in_idx);
>>>>>>>>>             data_size = kcs_bmc->data_in_idx;
>>>>>>>>>         }
>>>>>>>>>         spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>>>
>>>>>>>>>         if (!avail) {
>>>>>>>>>             if (filp->f_flags & O_NONBLOCK)
>>>>>>>>>                 goto out_mutex_unlock;
>>>>>>>>> mutex_unlock(&kcs_bmc->read_mutex);
>>>>>>>>>             goto retry;
>>>>>>>>>         }
>>>>>>>>>
>>>>>>>>>         if (count < data_size) {
>>>>>>>>>             ret = -EOVERFLOW;
>>>>>>>>>              ? I'm not sure about the error, but userspace 
>>>>>>>>> needs to know.
>>>>>>>>>             goto out_mutex_unlock;
>>>>>>>
>>>>>>> Maybe a length error to the host side here?
>>>>>
>>>>> You didn't comment on this or the other length error. That needs 
>>>>> to be
>>>>> handled.
>>>>>
>>>> Yes, will send a length error by following KCS spec.
>>>>>>>
>>>>>>>>>         }
>>>>>>>>>
>>>>>>>>>         if (!copy_to_user(buf, data, data_size)) {
>>>>>>>>>             ret = -EFAULT;
>>>>>>>>>             goto out_mutex_unlock;
>>>>>>>>>         }
>>>>>>>>>
>>>>>>>>>         ret = data_size;
>>>>>>>>>
>>>>>>>>>         spin_lock_irq(&kcs_bmc->lock);
>>>>>>>>>
>>>>>>>>>         if (kcs_bmc->phase != KCS_PHASE_WRITE_END_DONE)
>>>>>>>>>             /* Something aborted or restarted the state 
>>>>>>>>> machine. */
>>>>>>>>>             ? Maybe restart if O_NONBLOCK is not set and 
>>>>>>>>> -EAGAIN if it is?
>>>>>>>>>             ret = -EIO;
>>>>>>>>>         } else {
>>>>>>>>>             kcs_bmc->phase = KCS_PHASE_WAIT_READ;
>>>>>>>>>             kcs_bmc->data_in_avail = false;
>>>>>>>>>             kcs_bmc->data_in_idx = 0;
>>>>>>>>>         }
>>>>>>>>>
>>>>>>>>>         spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>>>
>>>>>>>>>    out_mutex_unlock:
>>>>>>>>>         mutex_unlock(&kcs_bmc->read_mutex);
>>>>>>>>>
>>>>>>>>>         kfree(data);
>>>>>>>>>
>>>>>>>>>         return ret;
>>>>>>>>>    }
>>>>>>>>> Note that I added a state, KCS_PHASE_WRITE_END_DONE, which 
>>>>>>>>> would be
>>>>>>>>> set after the final byte from the host is received. You want 
>>>>>>>>> the read here
>>>>>>>>> done before you can do the write below to avoid the race I 
>>>>>>>>> talked about.
>>>>>>>>>
>>>>>>>>> There is a local copy made of the data.  What you *never* want 
>>>>>>>>> to happen
>>>>>>>>> here is for the state machine to start processing a new write 
>>>>>>>>> command
>>>>>>>>> while the data is being copied.  It could result in corrupt 
>>>>>>>>> data being read
>>>>>>>>> and some random operation being done by the BMC.
>>>>>>>>>
>>>>>>>>> If you want to avoid the local copy, it could be done, but 
>>>>>>>>> it's more complex.
>>>>>>>>>
>>>>>>>>>>>> +    if (!(filp->f_flags & O_NONBLOCK))
>>>>>>>>>>>> + wait_event_interruptible(kcs_bmc->queue,
>>>>>>>>>>>> + kcs_bmc->data_in_avail);
>>>>>>>>>>>> +
>>>>>>>>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>>>>>>>>> +
>>>>>>>>>>>> +    if (kcs_bmc->data_in_avail) {
>>>>>>>>>>>> +        kcs_bmc->data_in_avail = false;
>>>>>>>>>>>> +
>>>>>>>>>>>> +        if (count > kcs_bmc->data_in_idx)
>>>>>>>>>>>> +            count = kcs_bmc->data_in_idx;
>>>>>>>>>>>> +
>>>>>>>>>>>> +        if (!copy_to_user(buf, kcs_bmc->data_in, count))
>>>>>>>>>>>> +            ret = count;
>>>>>>>>>>>> +        else
>>>>>>>>>>>> +            ret = -EFAULT;
>>>>>>>>>>>> +    }
>>>>>>>>>>>> +
>>>>>>>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>>>>>> +
>>>>>>>>>>>> +    return ret;
>>>>>>>>>>>> +}
>>>>>>>>>>>> +
>>>>>>>>>>>> +static ssize_t kcs_bmc_write(struct file *filp, const char 
>>>>>>>>>>>> *buf,
>>>>>>>>>>>> +                 size_t count, loff_t *offset)
>>>>>>>>>>>> +{
>>>>>>>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>>>>>>> +    ssize_t ret = count;
>>>>>>>>>>>> +
>>>>>>>>>>>> +    if (count < 1 || count > KCS_MSG_BUFSIZ)
>>>>>>>>>>>> +        return -EINVAL;
>>>>>>>>>>>> +
>>>>>>>>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>>>>>>>>> +
>>>>>>>>>>>> +    if (kcs_bmc->phase == KCS_PHASE_WAIT_READ) {
>>>>>>>>>>>> +        if (copy_from_user(kcs_bmc->data_out, buf, count)) {
>>>>>>>>>>>> + spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>>>>>> +            return -EFAULT;
>>>>>>>>>>>> +        }
>>>>>>>>>>>> +
>>>>>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_READ;
>>>>>>>>>>>> +        kcs_bmc->data_out_idx = 1;
>>>>>>>>>>>> +        kcs_bmc->data_out_len = count;
>>>>>>>>>>>> +        write_data(kcs_bmc, kcs_bmc->data_out[0]);
>>>>>>>>>>>> +    } else if (kcs_bmc->phase == KCS_PHASE_READ) {
>>>>>>>>>>>> +        ret = -EBUSY;
>>>>>>>>>>>> +    } else {
>>>>>>>>>>>> +        ret = -EINVAL;
>>>>>>>>>>>
>>>>>>>>>>> Is there a reason you return -EINVAL here? Why not just 
>>>>>>>>>>> -EBUSY in all
>>>>>>>>>>> cases?  Is there something that userland will need to do 
>>>>>>>>>>> differently?
>>>>>>>>>>>
>>>>>>>>>>>> +    }
>>>>>>>>>>>> +
>>>>>>>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>>>>>> +
>>>>>>>>>>>> +    return ret;
>>>>>>>>>>>> +}
>>>>>>>>>>>> +
>>>>>>>>>>>> +static long kcs_bmc_ioctl(struct file *filp, unsigned int 
>>>>>>>>>>>> cmd,
>>>>>>>>>>>> +              unsigned long arg)
>>>>>>>>>>>> +{
>>>>>>>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>>>>>>> +    long ret = 0;
>>>>>>>>>>>> +
>>>>>>>>>>>> +    spin_lock_irq(&kcs_bmc->lock);
>>>>>>>>>>>> +
>>>>>>>>>>>> +    switch (cmd) {
>>>>>>>>>>>> +    case IPMI_BMC_IOCTL_SET_SMS_ATN:
>>>>>>>>>>>> +        update_status_bits(kcs_bmc, KCS_STATUS_SMS_ATN,
>>>>>>>>>>>> + KCS_STATUS_SMS_ATN);
>>>>>>>>>>>> +        break;
>>>>>>>>>>>> +
>>>>>>>>>>>> +    case IPMI_BMC_IOCTL_CLEAR_SMS_ATN:
>>>>>>>>>>>> +        update_status_bits(kcs_bmc, KCS_STATUS_SMS_ATN,
>>>>>>>>>>>> +                        0);
>>>>>>>>>>>> +        break;
>>>>>>>>>>>> +
>>>>>>>>>>>> +    case IPMI_BMC_IOCTL_FORCE_ABORT:
>>>>>>>>>>>> +        set_state(kcs_bmc, ERROR_STATE);
>>>>>>>>>>>> +        read_data(kcs_bmc);
>>>>>>>>>>>> +        write_data(kcs_bmc, KCS_ZERO_DATA);
>>>>>>>>>>>> +
>>>>>>>>>>>> +        kcs_bmc->phase = KCS_PHASE_ERROR;
>>>>>>>>>>>> +        kcs_bmc->data_in_avail = false;
>>>>>>>>>>>> +        break;
>>>>>>>>>>>> +
>>>>>>>>>>>> +    default:
>>>>>>>>>>>> +        ret = -EINVAL;
>>>>>>>>>>>> +        break;
>>>>>>>>>>>> +    }
>>>>>>>>>>>> +
>>>>>>>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>>>>>> +
>>>>>>>>>>>> +    return ret;
>>>>>>>>>>>> +}
>>>>>>>>>>>> +
>>>>>>>>>>>> +static int kcs_bmc_release(struct inode *inode, struct 
>>>>>>>>>>>> file *filp)
>>>>>>>>>>>> +{
>>>>>>>>>>>> +    struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
>>>>>>>>>>>> +
>>>>>>>>>>>
>>>>>>>>>>> What happens if the device gets closed in the middle of a 
>>>>>>>>>>> transaction?  That's
>>>>>>>>>>> an important case to handle.  If something is in process, 
>>>>>>>>>>> you need to abort it.
>>>>>>>>>>>
>>>>>>>>>> The device just provides the read & write data, the 
>>>>>>>>>> transaction is handled in the KCS
>>>>>>>>>> controller's IRQ handler.
>>>>>>>>>
>>>>>>>>> From the spec, section 9.14:
>>>>>>>>>
>>>>>>>>>    The BMC must change the status to ERROR_STATE on any 
>>>>>>>>> condition where it
>>>>>>>>>    aborts a command transfer in progress.
>>>>>>>>>
>>>>>>>>> So you need to do something here.
>>>>>>>>>
>>>>>>>> In practice, we do this as spec said in ipmid, NOT in driver, 
>>>>>>>> driver can't handle anything, let's
>>>>>>>> make it simple, thanks!
>>>>>>>
>>>>>>> If ipmid crashes or is killed, how does it accomplish this?
>>>>>>>
>>>>>> Every time ipmids (or kcsd) crashed or killed, it needs start to 
>>>>>> call FORCE_ARBORT firstly, to sync with
>>>>>> host side software.
>>>>>>>>
>>>>>>>> Whenever the BMC is reset (from power-on or a hard reset), the 
>>>>>>>> State Bits are initialized to “11 - Error State”. Doing so
>>>>>>>> allows SMS to detect that the BMC has been reset and that any 
>>>>>>>> message in process has been terminated by the BMC.
>>>>>>>
>>>>>>> Right, that's fine, like it should be.  But we are not talking 
>>>>>>> about a reset.
>>>>>>>
>>>>>> I think the final error handling solution is that kcsd (user 
>>>>>> land) runs, otherwise, the host software side still got stuck. We 
>>>>>> meet
>>>>>> this kind of issue, so in general, we just doesn't handle some 
>>>>>> mirror errors in driver, then in kcsd, when it can provide the real
>>>>>> IPMI service, it will reset the channel firstly to sync with host 
>>>>>> side software.
>>>>>
>>>>> "Userland will do the right thing" is not very convincing to a 
>>>>> kernel developer.
>>>>>
>>>>> Plus if the above is true, I would think that you would just want 
>>>>> to hold the device
>>>>> in an error state when it wasn't opened.
>>>>>
>>>> I understand your concern, of course, driver need handles things 
>>>> well. But in fact, if a user app is truly a bad boy, it still can hang
>>>> the host side: set SMS_ATN, but no message returned when software 
>>>> host side requests, then host open-ipmi driver will hang, we
>>>> meet this kind of error to hang the customer's host. :) In my 
>>>> understanding, kcs-bmc should do the right thing about read and write,
>>>> the real transaction should be handled correctly by the kcsd.
>>>>
>>>> And if no kcsd starts, then this kind of BMC can't be sold out. :)
>>>
>>> True.  I'm not as concerned about this sort of thing.  It's nicer to 
>>> the host side if
>>> it can detect problems quickly, but it will eventually time out.
>>>
>>> From what I can tell from the current design, if the BMC userland is 
>>> not running,
>>> the driver will step through the state machine until it hits read 
>>> state, then it
>>> will sit there until the host times out and aborts the operation.
>>>
>>> IMHO, it would be better for the host side if the driver just stayed 
>>> in error state
>>> if nothing had it open.  It would think the spec says that in the 
>>> quote I referenced
>>> above, but that quote, like many things in the IPMI spec, is fairly 
>>> vague and could
>>> be interpreted many ways.
>>>
>> Well, I will try to fix this errors as possible.
>>> -corey
>>>
>>>
>>>>> -corey
>>>>>
>>>>>>> -corey
>>>>>>>
>>>>>>>>>>>> + spin_lock_irq(&kcs_bmc->lock);
>>>>>>>>>>>> +
>>>>>>>>>>>> +    kcs_bmc->running = 0;
>>>>>>>>>>>> +
>>>>>>>>>>>> +    spin_unlock_irq(&kcs_bmc->lock);
>>>>>>>>>>>> +
>>>>>>>>>>>> +    return 0;
>>>>>>>>>>>> +}
>>>>>>>>>>>> +
>>>>>>>>>
>>>>>>>>>
>>>>>>>>
>>>>>>>
>>>>>>
>>>>>
>>>>
>>>
>>
>

Patch

diff --git a/Documentation/devicetree/bindings/ipmi/aspeed-kcs-bmc.txt b/Documentation/devicetree/bindings/ipmi/aspeed-kcs-bmc.txt
new file mode 100644
index 0000000..613c34c
--- /dev/null
+++ b/Documentation/devicetree/bindings/ipmi/aspeed-kcs-bmc.txt
@@ -0,0 +1,26 @@ 
+* Aspeed KCS (Keyboard Controller Style) IPMI interface
+
+The Aspeed SOCs (AST2400 and AST2500) are commonly used as BMCs
+(Baseboard Management Controllers) and the KCS interface can be
+used to perform in-band IPMI communication with their host.
+
+Required properties:
+- compatible : should be one of
+    "aspeed,ast2400-kcs-bmc"
+    "aspeed,ast2500-kcs-bmc"
+- interrupts : interrupt generated by the controller
+- kcs_chan : The LPC channel number in the controller
+- kcs_addr : The host CPU IO map address
+
+
+Example:
+
+    kcs3: kcs3@0 {
+        compatible = "aspeed,ast2500-kcs-bmc";
+        reg = <0x0 0x80>;
+        interrupts = <8>;
+        kcs_chan = <3>;
+        kcs_addr = <0xCA2>;
+        status = "okay";
+    };
+
diff --git a/drivers/char/ipmi/Kconfig b/drivers/char/ipmi/Kconfig
index 3544abc..770def0 100644
--- a/drivers/char/ipmi/Kconfig
+++ b/drivers/char/ipmi/Kconfig
@@ -96,6 +96,26 @@  config IPMI_POWEROFF
 
 endif # IPMI_HANDLER
 
+config IPMI_KCS_BMC
+	tristate 'IPMI KCS BMC Interface'
+	help
+	  Provides a device driver for the KCS (Keyboard Controller Style)
+	  IPMI interface which meets the requirement of the BMC (Baseboard
+	  Management Controllers) side for handling the IPMI request from
+	  host system software.
+
+config ASPEED_KCS_IPMI_BMC
+	depends on ARCH_ASPEED || COMPILE_TEST
+	depends on IPMI_KCS_BMC
+	select REGMAP_MMIO
+	tristate "Aspeed KCS IPMI BMC driver"
+	help
+	  Provides a driver for the KCS (Keyboard Controller Style) IPMI
+	  interface found on Aspeed SOCs (AST2400 and AST2500).
+
+	  The driver implements the BMC side of the KCS contorller, it
+	  provides the access of KCS IO space for BMC side.
+
 config ASPEED_BT_IPMI_BMC
 	depends on ARCH_ASPEED || COMPILE_TEST
        depends on REGMAP && REGMAP_MMIO && MFD_SYSCON
diff --git a/drivers/char/ipmi/Makefile b/drivers/char/ipmi/Makefile
index 33b899f..10c591a 100644
--- a/drivers/char/ipmi/Makefile
+++ b/drivers/char/ipmi/Makefile
@@ -21,4 +21,6 @@  obj-$(CONFIG_IPMI_SSIF) += ipmi_ssif.o
 obj-$(CONFIG_IPMI_POWERNV) += ipmi_powernv.o
 obj-$(CONFIG_IPMI_WATCHDOG) += ipmi_watchdog.o
 obj-$(CONFIG_IPMI_POWEROFF) += ipmi_poweroff.o
+obj-$(CONFIG_IPMI_KCS_BMC) += kcs_bmc.o
 obj-$(CONFIG_ASPEED_BT_IPMI_BMC) += bt-bmc.o
+obj-$(CONFIG_ASPEED_KCS_IPMI_BMC) += kcs_bmc_aspeed.o
\ No newline at end of file
diff --git a/drivers/char/ipmi/kcs_bmc.c b/drivers/char/ipmi/kcs_bmc.c
new file mode 100644
index 0000000..4ef3c0b
--- /dev/null
+++ b/drivers/char/ipmi/kcs_bmc.c
@@ -0,0 +1,430 @@ 
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2015-2018, Intel Corporation.
+
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/ipmi_bmc.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include "kcs_bmc.h"
+
+#define KCS_MSG_BUFSIZ    1024
+
+#define KCS_ZERO_DATA     0
+
+
+/* IPMI 2.0 - Table 9-1, KCS Interface Status Register Bits */
+#define KCS_STATUS_STATE(state) (state << 6)
+#define KCS_STATUS_STATE_MASK   KCS_STATUS_STATE(0x3)
+#define KCS_STATUS_CMD_DAT      BIT(3)
+#define KCS_STATUS_SMS_ATN      BIT(2)
+#define KCS_STATUS_IBF          BIT(1)
+#define KCS_STATUS_OBF          BIT(0)
+
+/* IPMI 2.0 - Table 9-2, KCS Interface State Bits */
+enum kcs_states {
+	IDLE_STATE  = 0,
+	READ_STATE  = 1,
+	WRITE_STATE = 2,
+	ERROR_STATE = 3,
+};
+
+/* IPMI 2.0 - Table 9-3, KCS Interface Control Codes */
+#define KCS_CMD_GET_STATUS      0x60
+#define KCS_CMD_ABORT           0x60
+#define KCS_CMD_WRITE_START     0x61
+#define KCS_CMD_WRITE_END       0x62
+#define KCS_CMD_READ_BYTE       0x68
+
+
+static inline u8 read_data(struct kcs_bmc *kcs_bmc)
+{
+	return kcs_bmc->io_inputb(kcs_bmc, kcs_bmc->ioreg.idr);
+}
+
+static inline void write_data(struct kcs_bmc *kcs_bmc, u8 data)
+{
+	kcs_bmc->io_outputb(kcs_bmc, kcs_bmc->ioreg.odr, data);
+}
+
+static inline u8 read_status(struct kcs_bmc *kcs_bmc)
+{
+	return kcs_bmc->io_inputb(kcs_bmc, kcs_bmc->ioreg.str);
+}
+
+static inline void write_status(struct kcs_bmc *kcs_bmc, u8 data)
+{
+	kcs_bmc->io_outputb(kcs_bmc, kcs_bmc->ioreg.str, data);
+}
+
+static void update_status_bits(struct kcs_bmc *kcs_bmc, u8 mask, u8 val)
+{
+	u8 tmp;
+
+	tmp = read_status(kcs_bmc);
+
+	tmp &= ~mask;
+	tmp |= val & mask;
+
+	write_status(kcs_bmc, tmp);
+}
+
+static inline void set_state(struct kcs_bmc *kcs_bmc, u8 state)
+{
+	update_status_bits(kcs_bmc, KCS_STATUS_STATE_MASK,
+					KCS_STATUS_STATE(state));
+}
+
+
+static void kcs_bmc_handle_data(struct kcs_bmc *kcs_bmc)
+{
+	u8 data;
+
+	switch (kcs_bmc->phase) {
+	case KCS_PHASE_WRITE:
+		set_state(kcs_bmc, WRITE_STATE);
+
+		/* set OBF before reading data */
+		write_data(kcs_bmc, KCS_ZERO_DATA);
+
+		if (kcs_bmc->data_in_idx < KCS_MSG_BUFSIZ)
+			kcs_bmc->data_in[kcs_bmc->data_in_idx++] =
+						read_data(kcs_bmc);
+		break;
+
+	case KCS_PHASE_WRITE_END:
+		set_state(kcs_bmc, READ_STATE);
+
+		if (kcs_bmc->data_in_idx < KCS_MSG_BUFSIZ)
+			kcs_bmc->data_in[kcs_bmc->data_in_idx++] =
+						read_data(kcs_bmc);
+
+		kcs_bmc->phase = KCS_PHASE_WAIT_READ;
+		if (kcs_bmc->running) {
+			kcs_bmc->data_in_avail = true;
+			wake_up_interruptible(&kcs_bmc->queue);
+		}
+		break;
+
+	case KCS_PHASE_READ:
+		if (kcs_bmc->data_out_idx == kcs_bmc->data_out_len)
+			set_state(kcs_bmc, IDLE_STATE);
+
+		data = read_data(kcs_bmc);
+		if (data != KCS_CMD_READ_BYTE) {
+			set_state(kcs_bmc, ERROR_STATE);
+			write_data(kcs_bmc, KCS_ZERO_DATA);
+			break;
+		}
+
+		if (kcs_bmc->data_out_idx == kcs_bmc->data_out_len) {
+			write_data(kcs_bmc, KCS_ZERO_DATA);
+			kcs_bmc->phase = KCS_PHASE_IDLE;
+			break;
+		}
+
+		write_data(kcs_bmc,
+			kcs_bmc->data_out[kcs_bmc->data_out_idx++]);
+		break;
+
+	case KCS_PHASE_ABORT_ERROR1:
+		set_state(kcs_bmc, READ_STATE);
+
+		/* Read the Dummy byte */
+		read_data(kcs_bmc);
+
+		write_data(kcs_bmc, kcs_bmc->error);
+		kcs_bmc->phase = KCS_PHASE_ABORT_ERROR2;
+		break;
+
+	case KCS_PHASE_ABORT_ERROR2:
+		set_state(kcs_bmc, IDLE_STATE);
+
+		/* Read the Dummy byte */
+		read_data(kcs_bmc);
+
+		write_data(kcs_bmc, KCS_ZERO_DATA);
+		kcs_bmc->phase = KCS_PHASE_IDLE;
+
+		break;
+
+	default:
+		set_state(kcs_bmc, ERROR_STATE);
+
+		/* Read the Dummy byte */
+		read_data(kcs_bmc);
+
+		write_data(kcs_bmc, KCS_ZERO_DATA);
+		break;
+	}
+}
+
+static void kcs_bmc_handle_command(struct kcs_bmc *kcs_bmc)
+{
+	u8 cmd;
+
+	set_state(kcs_bmc, WRITE_STATE);
+
+	/* Dummy data to generate OBF */
+	write_data(kcs_bmc, KCS_ZERO_DATA);
+
+	cmd = read_data(kcs_bmc);
+	switch (cmd) {
+	case KCS_CMD_WRITE_START:
+		kcs_bmc->data_in_avail = false;
+		kcs_bmc->data_in_idx   = 0;
+		kcs_bmc->phase         = KCS_PHASE_WRITE;
+		kcs_bmc->error         = KCS_NO_ERROR;
+		break;
+
+	case KCS_CMD_WRITE_END:
+		kcs_bmc->phase = KCS_PHASE_WRITE_END;
+		break;
+
+	case KCS_CMD_ABORT:
+		if (kcs_bmc->error == KCS_NO_ERROR)
+			kcs_bmc->error = KCS_ABORTED_BY_COMMAND;
+
+		kcs_bmc->phase = KCS_PHASE_ABORT_ERROR1;
+		break;
+
+	default:
+		kcs_bmc->error = KCS_ILLEGAL_CONTROL_CODE;
+		set_state(kcs_bmc, ERROR_STATE);
+		write_data(kcs_bmc, kcs_bmc->error);
+		kcs_bmc->phase = KCS_PHASE_ERROR;
+		break;
+	}
+}
+
+int kcs_bmc_handle_event(struct kcs_bmc *kcs_bmc)
+{
+	unsigned long flags;
+	int ret = 0;
+	u8 status;
+
+	spin_lock_irqsave(&kcs_bmc->lock, flags);
+
+	status = read_status(kcs_bmc) & (KCS_STATUS_IBF | KCS_STATUS_CMD_DAT);
+
+	switch (status) {
+	case KCS_STATUS_IBF | KCS_STATUS_CMD_DAT:
+		kcs_bmc_handle_command(kcs_bmc);
+		break;
+
+	case KCS_STATUS_IBF:
+		kcs_bmc_handle_data(kcs_bmc);
+		break;
+
+	default:
+		ret = -1;
+		break;
+	}
+
+	spin_unlock_irqrestore(&kcs_bmc->lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(kcs_bmc_handle_event);
+
+static inline struct kcs_bmc *file_kcs_bmc(struct file *filp)
+{
+	return container_of(filp->private_data, struct kcs_bmc, miscdev);
+}
+
+static int kcs_bmc_open(struct inode *inode, struct file *filp)
+{
+	struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
+	int ret = 0;
+
+	spin_lock_irq(&kcs_bmc->lock);
+
+	if (!kcs_bmc->running) {
+		kcs_bmc->running       = 1;
+		kcs_bmc->phase         = KCS_PHASE_IDLE;
+		kcs_bmc->data_in_avail = false;
+	} else {
+		ret = -EBUSY;
+	}
+
+	spin_unlock_irq(&kcs_bmc->lock);
+
+	return ret;
+}
+
+static unsigned int kcs_bmc_poll(struct file *filp, poll_table *wait)
+{
+	struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
+	unsigned int mask = 0;
+
+	poll_wait(filp, &kcs_bmc->queue, wait);
+
+	spin_lock_irq(&kcs_bmc->lock);
+
+	if (kcs_bmc->data_in_avail)
+		mask |= POLLIN;
+
+	spin_unlock_irq(&kcs_bmc->lock);
+
+	return mask;
+}
+
+static ssize_t kcs_bmc_read(struct file *filp, char *buf,
+			    size_t count, loff_t *offset)
+{
+	struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
+	ssize_t ret = -EAGAIN;
+
+	if (!(filp->f_flags & O_NONBLOCK))
+		wait_event_interruptible(kcs_bmc->queue,
+					 kcs_bmc->data_in_avail);
+
+	spin_lock_irq(&kcs_bmc->lock);
+
+	if (kcs_bmc->data_in_avail) {
+		kcs_bmc->data_in_avail = false;
+
+		if (count > kcs_bmc->data_in_idx)
+			count = kcs_bmc->data_in_idx;
+
+		if (!copy_to_user(buf, kcs_bmc->data_in, count))
+			ret = count;
+		else
+			ret = -EFAULT;
+	}
+
+	spin_unlock_irq(&kcs_bmc->lock);
+
+	return ret;
+}
+
+static ssize_t kcs_bmc_write(struct file *filp, const char *buf,
+			     size_t count, loff_t *offset)
+{
+	struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
+	ssize_t ret = count;
+
+	if (count < 1 || count > KCS_MSG_BUFSIZ)
+		return -EINVAL;
+
+	spin_lock_irq(&kcs_bmc->lock);
+
+	if (kcs_bmc->phase == KCS_PHASE_WAIT_READ) {
+		if (copy_from_user(kcs_bmc->data_out, buf, count)) {
+			spin_unlock_irq(&kcs_bmc->lock);
+			return -EFAULT;
+		}
+
+		kcs_bmc->phase = KCS_PHASE_READ;
+		kcs_bmc->data_out_idx = 1;
+		kcs_bmc->data_out_len = count;
+		write_data(kcs_bmc, kcs_bmc->data_out[0]);
+	} else if (kcs_bmc->phase == KCS_PHASE_READ) {
+		ret = -EBUSY;
+	} else {
+		ret = -EINVAL;
+	}
+
+	spin_unlock_irq(&kcs_bmc->lock);
+
+	return ret;
+}
+
+static long kcs_bmc_ioctl(struct file *filp, unsigned int cmd,
+			  unsigned long arg)
+{
+	struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
+	long ret = 0;
+
+	spin_lock_irq(&kcs_bmc->lock);
+
+	switch (cmd) {
+	case IPMI_BMC_IOCTL_SET_SMS_ATN:
+		update_status_bits(kcs_bmc, KCS_STATUS_SMS_ATN,
+						KCS_STATUS_SMS_ATN);
+		break;
+
+	case IPMI_BMC_IOCTL_CLEAR_SMS_ATN:
+		update_status_bits(kcs_bmc, KCS_STATUS_SMS_ATN,
+						0);
+		break;
+
+	case IPMI_BMC_IOCTL_FORCE_ABORT:
+		set_state(kcs_bmc, ERROR_STATE);
+		read_data(kcs_bmc);
+		write_data(kcs_bmc, KCS_ZERO_DATA);
+
+		kcs_bmc->phase = KCS_PHASE_ERROR;
+		kcs_bmc->data_in_avail = false;
+		break;
+
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	spin_unlock_irq(&kcs_bmc->lock);
+
+	return ret;
+}
+
+static int kcs_bmc_release(struct inode *inode, struct file *filp)
+{
+	struct kcs_bmc *kcs_bmc = file_kcs_bmc(filp);
+
+	spin_lock_irq(&kcs_bmc->lock);
+
+	kcs_bmc->running = 0;
+
+	spin_unlock_irq(&kcs_bmc->lock);
+
+	return 0;
+}
+
+static const struct file_operations kcs_bmc_fops = {
+	.owner          = THIS_MODULE,
+	.open           = kcs_bmc_open,
+	.read           = kcs_bmc_read,
+	.write          = kcs_bmc_write,
+	.release        = kcs_bmc_release,
+	.poll           = kcs_bmc_poll,
+	.unlocked_ioctl = kcs_bmc_ioctl,
+};
+
+struct kcs_bmc *kcs_bmc_alloc(struct device *dev, int sizeof_priv, u32 channel)
+{
+	struct kcs_bmc *kcs_bmc;
+	int rc;
+
+	kcs_bmc = devm_kzalloc(dev, sizeof(*kcs_bmc) + sizeof_priv, GFP_KERNEL);
+	if (!kcs_bmc)
+		return NULL;
+
+	dev_set_name(dev, "ipmi-kcs%u", channel);
+
+	spin_lock_init(&kcs_bmc->lock);
+	kcs_bmc->channel = channel;
+
+	init_waitqueue_head(&kcs_bmc->queue);
+	kcs_bmc->data_in  = devm_kmalloc(dev, KCS_MSG_BUFSIZ, GFP_KERNEL);
+	kcs_bmc->data_out = devm_kmalloc(dev, KCS_MSG_BUFSIZ, GFP_KERNEL);
+	if (kcs_bmc->data_in == NULL || kcs_bmc->data_out == NULL) {
+		dev_err(dev, "Failed to allocate data buffers\n");
+		return NULL;
+	}
+
+	kcs_bmc->miscdev.minor = MISC_DYNAMIC_MINOR;
+	kcs_bmc->miscdev.name = dev_name(dev);
+	kcs_bmc->miscdev.fops = &kcs_bmc_fops;
+
+	return kcs_bmc;
+}
+EXPORT_SYMBOL(kcs_bmc_alloc);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Haiyue Wang <haiyue.wang@linux.intel.com>");
+MODULE_DESCRIPTION("KCS BMC to handle the IPMI request from system software");
diff --git a/drivers/char/ipmi/kcs_bmc.h b/drivers/char/ipmi/kcs_bmc.h
new file mode 100644
index 0000000..b56c250
--- /dev/null
+++ b/drivers/char/ipmi/kcs_bmc.h
@@ -0,0 +1,88 @@ 
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2015-2018, Intel Corporation.
+
+#ifndef __KCS_BMC_H__
+#define __KCS_BMC_H__
+
+#include <linux/miscdevice.h>
+
+/* Different phases of the KCS BMC module */
+enum kcs_phases {
+	/* BMC should not be expecting nor sending any data. */
+	KCS_PHASE_IDLE,
+
+	/* BMC is receiving a packet from system software. */
+	KCS_PHASE_WRITE,
+
+	/* BMC is waiting a last data byte from system software. */
+	KCS_PHASE_WRITE_END,
+
+	/* BMC is waiting the response from the upper IPMI service. */
+	KCS_PHASE_WAIT_READ,
+
+	/* BMC is transferring the response to system software. */
+	KCS_PHASE_READ,
+
+	/* BMC is waiting error status request from system software. */
+	KCS_PHASE_ABORT_ERROR1,
+
+	/* BMC is waiting for idle status afer error from system software. */
+	KCS_PHASE_ABORT_ERROR2,
+
+	/* BMC has detected a protocol violation at the interface level. */
+	KCS_PHASE_ERROR
+};
+
+/* IPMI 2.0 - Table 9-4, KCS Interface Status Codes */
+enum kcs_errors {
+	KCS_NO_ERROR                = 0x00,
+	KCS_ABORTED_BY_COMMAND      = 0x01,
+	KCS_ILLEGAL_CONTROL_CODE    = 0x02,
+	KCS_LENGTH_ERROR            = 0x06,
+	KCS_UNSPECIFIED_ERROR       = 0xFF
+};
+
+/* IPMI 2.0 - 9.5, KCS Interface Registers */
+struct kcs_ioreg {
+	u32 idr; /* Input Data Register */
+	u32 odr; /* Output Data Register */
+	u32 str; /* Status Register */
+};
+
+struct kcs_bmc {
+	spinlock_t lock;
+
+	u32 channel;
+	int running;
+
+	/* Setup by BMC KCS controller driver */
+	struct kcs_ioreg ioreg;
+	u8 (*io_inputb)(struct kcs_bmc *kcs_bmc, u32 reg);
+	void (*io_outputb)(struct kcs_bmc *kcs_bmc, u32 reg, u8 b);
+
+	enum kcs_phases phase;
+	enum kcs_errors error;
+
+	wait_queue_head_t queue;
+	bool data_in_avail;
+	int  data_in_idx;
+	u8  *data_in;
+
+	int  data_out_idx;
+	int  data_out_len;
+	u8  *data_out;
+
+	struct miscdevice miscdev;
+
+	unsigned long long priv[];
+};
+
+static inline void *kcs_bmc_priv(const struct kcs_bmc *kcs_bmc)
+{
+	return kcs_bmc->priv;
+}
+
+extern int kcs_bmc_handle_event(struct kcs_bmc *kcs_bmc);
+extern struct kcs_bmc *kcs_bmc_alloc(struct device *dev, int sizeof_priv,
+					u32 channel);
+#endif
diff --git a/drivers/char/ipmi/kcs_bmc_aspeed.c b/drivers/char/ipmi/kcs_bmc_aspeed.c
new file mode 100644
index 0000000..5002de3
--- /dev/null
+++ b/drivers/char/ipmi/kcs_bmc_aspeed.c
@@ -0,0 +1,319 @@ 
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2015-2018, Intel Corporation.
+
+#define pr_fmt(fmt) "aspeed-kcs-bmc: " fmt
+
+#include <linux/atomic.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/poll.h>
+#include <linux/regmap.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/timer.h>
+
+#include "kcs_bmc.h"
+
+
+#define DEVICE_NAME     "ast-kcs-bmc"
+
+#define KCS_CHANNEL_MAX     4
+
+/* mapped to lpc-bmc@0 IO space */
+#define LPC_HICR0            0x000
+#define     LPC_HICR0_LPC3E          BIT(7)
+#define     LPC_HICR0_LPC2E          BIT(6)
+#define     LPC_HICR0_LPC1E          BIT(5)
+#define LPC_HICR2            0x008
+#define     LPC_HICR2_IBFIF3         BIT(3)
+#define     LPC_HICR2_IBFIF2         BIT(2)
+#define     LPC_HICR2_IBFIF1         BIT(1)
+#define LPC_HICR4            0x010
+#define     LPC_HICR4_LADR12AS       BIT(7)
+#define     LPC_HICR4_KCSENBL        BIT(2)
+#define LPC_LADR3H           0x014
+#define LPC_LADR3L           0x018
+#define LPC_LADR12H          0x01C
+#define LPC_LADR12L          0x020
+#define LPC_IDR1             0x024
+#define LPC_IDR2             0x028
+#define LPC_IDR3             0x02C
+#define LPC_ODR1             0x030
+#define LPC_ODR2             0x034
+#define LPC_ODR3             0x038
+#define LPC_STR1             0x03C
+#define LPC_STR2             0x040
+#define LPC_STR3             0x044
+
+/* mapped to lpc-host@80 IO space */
+#define LPC_HICRB            0x080
+#define     LPC_HICRB_IBFIF4         BIT(1)
+#define     LPC_HICRB_LPC4E          BIT(0)
+#define LPC_LADR4            0x090
+#define LPC_IDR4             0x094
+#define LPC_ODR4             0x098
+#define LPC_STR4             0x09C
+
+struct aspeed_kcs_bmc {
+	struct regmap *map;
+};
+
+
+static u8 aspeed_kcs_inb(struct kcs_bmc *kcs_bmc, u32 reg)
+{
+	struct aspeed_kcs_bmc *priv = kcs_bmc_priv(kcs_bmc);
+	u32 val = 0;
+	int rc;
+
+	rc = regmap_read(priv->map, reg, &val);
+	WARN(rc != 0, "regmap_read() failed: %d\n", rc);
+
+	return rc == 0 ? (u8) val : 0;
+}
+
+static void aspeed_kcs_outb(struct kcs_bmc *kcs_bmc, u32 reg, u8 data)
+{
+	struct aspeed_kcs_bmc *priv = kcs_bmc_priv(kcs_bmc);
+	int rc;
+
+	rc = regmap_write(priv->map, reg, data);
+	WARN(rc != 0, "regmap_write() failed: %d\n", rc);
+}
+
+
+/*
+ * AST_usrGuide_KCS.pdf
+ * 2. Background:
+ *   we note D for Data, and C for Cmd/Status, default rules are
+ *     A. KCS1 / KCS2 ( D / C:X / X+4 )
+ *        D / C : CA0h / CA4h
+ *        D / C : CA8h / CACh
+ *     B. KCS3 ( D / C:XX2h / XX3h )
+ *        D / C : CA2h / CA3h
+ *        D / C : CB2h / CB3h
+ *     C. KCS4
+ *        D / C : CA4h / CA5h
+ */
+static void aspeed_kcs_set_address(struct kcs_bmc *kcs_bmc, u16 addr)
+{
+	struct aspeed_kcs_bmc *priv = kcs_bmc_priv(kcs_bmc);
+
+	switch (kcs_bmc->channel) {
+	case 1:
+		regmap_update_bits(priv->map, LPC_HICR4,
+				LPC_HICR4_LADR12AS, 0);
+		regmap_write(priv->map, LPC_LADR12H, addr >> 8);
+		regmap_write(priv->map, LPC_LADR12L, addr & 0xFF);
+		break;
+
+	case 2:
+		regmap_update_bits(priv->map, LPC_HICR4,
+				LPC_HICR4_LADR12AS, LPC_HICR4_LADR12AS);
+		regmap_write(priv->map, LPC_LADR12H, addr >> 8);
+		regmap_write(priv->map, LPC_LADR12L, addr & 0xFF);
+		break;
+
+	case 3:
+		regmap_write(priv->map, LPC_LADR3H, addr >> 8);
+		regmap_write(priv->map, LPC_LADR3L, addr & 0xFF);
+		break;
+
+	case 4:
+		regmap_write(priv->map, LPC_LADR4, ((addr + 1) << 16) |
+			addr);
+		break;
+
+	default:
+		break;
+	}
+}
+
+static void aspeed_kcs_enable_channel(struct kcs_bmc *kcs_bmc, bool enable)
+{
+	struct aspeed_kcs_bmc *priv = kcs_bmc_priv(kcs_bmc);
+
+	switch (kcs_bmc->channel) {
+	case 1:
+		if (enable) {
+			regmap_update_bits(priv->map, LPC_HICR2,
+					LPC_HICR2_IBFIF1, LPC_HICR2_IBFIF1);
+			regmap_update_bits(priv->map, LPC_HICR0,
+					LPC_HICR0_LPC1E, LPC_HICR0_LPC1E);
+		} else {
+			regmap_update_bits(priv->map, LPC_HICR0,
+					LPC_HICR0_LPC1E, 0);
+			regmap_update_bits(priv->map, LPC_HICR2,
+					LPC_HICR2_IBFIF1, 0);
+		}
+		break;
+
+	case 2:
+		if (enable) {
+			regmap_update_bits(priv->map, LPC_HICR2,
+					LPC_HICR2_IBFIF2, LPC_HICR2_IBFIF2);
+			regmap_update_bits(priv->map, LPC_HICR0,
+					LPC_HICR0_LPC2E, LPC_HICR0_LPC2E);
+		} else {
+			regmap_update_bits(priv->map, LPC_HICR0,
+					LPC_HICR0_LPC2E, 0);
+			regmap_update_bits(priv->map, LPC_HICR2,
+					LPC_HICR2_IBFIF2, 0);
+		}
+		break;
+
+	case 3:
+		if (enable) {
+			regmap_update_bits(priv->map, LPC_HICR2,
+					LPC_HICR2_IBFIF3, LPC_HICR2_IBFIF3);
+			regmap_update_bits(priv->map, LPC_HICR0,
+					LPC_HICR0_LPC3E, LPC_HICR0_LPC3E);
+			regmap_update_bits(priv->map, LPC_HICR4,
+					LPC_HICR4_KCSENBL, LPC_HICR4_KCSENBL);
+		} else {
+			regmap_update_bits(priv->map, LPC_HICR0,
+					LPC_HICR0_LPC3E, 0);
+			regmap_update_bits(priv->map, LPC_HICR4,
+					LPC_HICR4_KCSENBL, 0);
+			regmap_update_bits(priv->map, LPC_HICR2,
+					LPC_HICR2_IBFIF3, 0);
+		}
+		break;
+
+	case 4:
+		if (enable)
+			regmap_update_bits(priv->map, LPC_HICRB,
+					LPC_HICRB_IBFIF4 | LPC_HICRB_LPC4E,
+					LPC_HICRB_IBFIF4 | LPC_HICRB_LPC4E);
+		else
+			regmap_update_bits(priv->map, LPC_HICRB,
+					LPC_HICRB_IBFIF4 | LPC_HICRB_LPC4E,
+					0);
+		break;
+
+	default:
+		break;
+	}
+}
+
+static irqreturn_t aspeed_kcs_irq(int irq, void *arg)
+{
+	struct kcs_bmc *kcs_bmc = arg;
+
+	if (!kcs_bmc_handle_event(kcs_bmc))
+		return IRQ_HANDLED;
+
+	return IRQ_NONE;
+}
+
+static int aspeed_kcs_config_irq(struct kcs_bmc *kcs_bmc,
+			struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	int irq;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	return devm_request_irq(dev, irq, aspeed_kcs_irq, IRQF_SHARED,
+				dev_name(dev), kcs_bmc);
+}
+
+static const struct kcs_ioreg ast_kcs_bmc_ioregs[KCS_CHANNEL_MAX] = {
+	{ .idr = LPC_IDR1, .odr = LPC_ODR1, .str = LPC_STR1 },
+	{ .idr = LPC_IDR2, .odr = LPC_ODR2, .str = LPC_STR2 },
+	{ .idr = LPC_IDR3, .odr = LPC_ODR3, .str = LPC_STR3 },
+	{ .idr = LPC_IDR4, .odr = LPC_ODR4, .str = LPC_STR4 },
+};
+
+static int aspeed_kcs_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct aspeed_kcs_bmc *priv;
+	struct kcs_bmc *kcs_bmc;
+	u32 chan, addr;
+	int rc;
+
+	rc = of_property_read_u32(dev->of_node, "kcs_chan", &chan);
+	if ((rc != 0) || (chan == 0 || chan > KCS_CHANNEL_MAX)) {
+		dev_err(dev, "no valid 'kcs_chan' configured\n");
+		return -ENODEV;
+	}
+
+	rc = of_property_read_u32(dev->of_node, "kcs_addr", &addr);
+	if (rc) {
+		dev_err(dev, "no valid 'kcs_addr' configured\n");
+		return -ENODEV;
+	}
+
+	kcs_bmc = kcs_bmc_alloc(dev, sizeof(*priv), chan);
+	if (!kcs_bmc)
+		return -ENOMEM;
+
+	priv = kcs_bmc_priv(kcs_bmc);
+	priv->map = syscon_node_to_regmap(dev->parent->of_node);
+	if (IS_ERR(priv->map)) {
+		dev_err(dev, "Couldn't get regmap\n");
+		return -ENODEV;
+	}
+
+	kcs_bmc->ioreg = ast_kcs_bmc_ioregs[chan - 1];
+	kcs_bmc->io_inputb = aspeed_kcs_inb;
+	kcs_bmc->io_outputb = aspeed_kcs_outb;
+
+	dev_set_drvdata(dev, kcs_bmc);
+
+	aspeed_kcs_set_address(kcs_bmc, addr);
+	aspeed_kcs_enable_channel(kcs_bmc, true);
+	rc = aspeed_kcs_config_irq(kcs_bmc, pdev);
+	if (rc)
+		return rc;
+
+	rc = misc_register(&kcs_bmc->miscdev);
+	if (rc) {
+		dev_err(dev, "Unable to register device\n");
+		return rc;
+	}
+
+	pr_info("channel=%u address=0x%x idr=0x%x odr=0x%x str=0x%x\n",
+		chan, addr,
+		kcs_bmc->ioreg.idr, kcs_bmc->ioreg.odr, kcs_bmc->ioreg.str);
+
+	return 0;
+}
+
+static int aspeed_kcs_remove(struct platform_device *pdev)
+{
+	struct kcs_bmc *kcs_bmc = dev_get_drvdata(&pdev->dev);
+
+	misc_deregister(&kcs_bmc->miscdev);
+
+	return 0;
+}
+
+static const struct of_device_id ast_kcs_bmc_match[] = {
+	{ .compatible = "aspeed,ast2400-kcs-bmc" },
+	{ .compatible = "aspeed,ast2500-kcs-bmc" },
+	{ }
+};
+
+static struct platform_driver ast_kcs_bmc_driver = {
+	.driver = {
+		.name           = DEVICE_NAME,
+		.of_match_table = ast_kcs_bmc_match,
+	},
+	.probe = aspeed_kcs_probe,
+	.remove = aspeed_kcs_remove,
+};
+
+module_platform_driver(ast_kcs_bmc_driver);
+
+MODULE_DEVICE_TABLE(of, ast_kcs_bmc_match);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Haiyue Wang <haiyue.wang@linux.intel.com>");
+MODULE_DESCRIPTION("Aspeed device interface to the KCS BMC device");
diff --git a/include/uapi/linux/ipmi_bmc.h b/include/uapi/linux/ipmi_bmc.h
new file mode 100644
index 0000000..2f9f97e
--- /dev/null
+++ b/include/uapi/linux/ipmi_bmc.h
@@ -0,0 +1,14 @@ 
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2015-2018, Intel Corporation.
+
+#ifndef _UAPI_LINUX_IPMI_BMC_H
+#define _UAPI_LINUX_IPMI_BMC_H
+
+#include <linux/ioctl.h>
+
+#define __IPMI_BMC_IOCTL_MAGIC        0xB1
+#define IPMI_BMC_IOCTL_SET_SMS_ATN    _IO(__IPMI_BMC_IOCTL_MAGIC, 0x00)
+#define IPMI_BMC_IOCTL_CLEAR_SMS_ATN  _IO(__IPMI_BMC_IOCTL_MAGIC, 0x01)
+#define IPMI_BMC_IOCTL_FORCE_ABORT    _IO(__IPMI_BMC_IOCTL_MAGIC, 0x02)
+
+#endif /* _UAPI_LINUX_KCS_BMC_H */