diff mbox

[linux,dev-4.7,7/8] ipmi: maintain a request expiry list

Message ID 1477465067-19034-8-git-send-email-clg@kaod.org
State Changes Requested, archived
Delegated to: Joel Stanley
Headers show

Commit Message

Cédric Le Goater Oct. 26, 2016, 6:57 a.m. UTC
Regarding the response expiration handling, the IPMI spec says :

   The BMC must not return a given response once the corresponding
   Request-to-Response interval has passed. The BMC can ensure this
   by maintaining its own internal list of outstanding requests through
   the interface. The BMC could age and expire the entries in the list
   by expiring the entries at an interval that is somewhat shorter than
   the specified Request-to-Response interval....

To handle such case, we maintain list of received requests using the
seq number of the BT message to identify them. The list is updated
each time a request is received and a response is sent. The expiration
of the reponses is handled at each updates but also with a timer.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 drivers/char/ipmi/bt-bmc.c | 132 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 132 insertions(+)

Comments

Cyril Bur Nov. 3, 2016, 12:26 a.m. UTC | #1
On Wed, 2016-10-26 at 08:57 +0200, Cédric Le Goater wrote:
> Regarding the response expiration handling, the IPMI spec says :
> 
>    The BMC must not return a given response once the corresponding
>    Request-to-Response interval has passed. The BMC can ensure this
>    by maintaining its own internal list of outstanding requests
> through
>    the interface. The BMC could age and expire the entries in the
> list
>    by expiring the entries at an interval that is somewhat shorter
> than
>    the specified Request-to-Response interval....
> 
> To handle such case, we maintain list of received requests using the
> seq number of the BT message to identify them. The list is updated
> each time a request is received and a response is sent. The
> expiration
> of the reponses is handled at each updates but also with a timer.
> 

I agree that the BMC kernel is most logical place to do this, at the
moment btbridged does attempt something similar no?

Should we patch btbridged to not? Should I least remove duplicated
logic? 

> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> ---
>  drivers/char/ipmi/bt-bmc.c | 132
> +++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 132 insertions(+)
> 
> diff --git a/drivers/char/ipmi/bt-bmc.c b/drivers/char/ipmi/bt-bmc.c
> index b49e61320952..228ecdb689de 100644
> --- a/drivers/char/ipmi/bt-bmc.c
> +++ b/drivers/char/ipmi/bt-bmc.c
> @@ -17,6 +17,7 @@
>  #include <linux/platform_device.h>
>  #include <linux/poll.h>
>  #include <linux/sched.h>
> +#include <linux/slab.h>
>  #include <linux/timer.h>
>  
>  /*
> @@ -57,6 +58,15 @@
>  
>  #define BT_BMC_BUFFER_SIZE 256
>  
> +#define BT_BMC_RESPONSE_TIME	5 /* seconds */
> +
> +struct ipmi_request {
> +	struct list_head list;
> +
> +	u8 seq;
> +	unsigned long expires;
> +};
> +
>  struct bt_bmc {
>  	struct device		dev;
>  	struct miscdevice	miscdev;
> @@ -65,6 +75,11 @@ struct bt_bmc {
>  	wait_queue_head_t	queue;
>  	struct timer_list	poll_timer;
>  	struct mutex		mutex;
> +
> +	unsigned int		response_time;
> +	struct timer_list	requests_timer;
> +	spinlock_t              requests_lock;
> +	struct list_head        requests;
>  };
>  
>  static atomic_t open_count = ATOMIC_INIT(0);
> @@ -163,6 +178,94 @@ static int bt_bmc_open(struct inode *inode,
> struct file *file)
>  }
>  
>  /*
> + * lock should be held
> + */
> +static void drop_expired_requests(struct bt_bmc *bt_bmc)
> +{
> +	unsigned long flags = 0;
> +	struct ipmi_request *req, *next;
> +	unsigned long next_expires = 0;
> +	int start_timer = 0;
> +
> +	spin_lock_irqsave(&bt_bmc->requests_lock, flags);
> +	list_for_each_entry_safe(req, next, &bt_bmc->requests, list)
> {
> +		if (time_after_eq(jiffies, req->expires)) {
> +			pr_warn("BT: request seq:%d has expired.
> dropping\n",
> +				req->seq);
> +			list_del(&req->list);
> +			kfree(req);
> +			continue;
> +		}
> +		if (!start_timer) {
> +			start_timer = 1;
> +			next_expires = req->expires;
> +		} else {
> +			next_expires = min(next_expires, req-
> >expires);
> +		}
> +	}
> +	spin_unlock_irqrestore(&bt_bmc->requests_lock, flags);
> +
> +	/* and possibly restart */
> +	if (start_timer)
> +		mod_timer(&bt_bmc->requests_timer, next_expires);
> +}
> +
> +static void requests_timer(unsigned long data)
> +{
> +	struct bt_bmc *bt_bmc = (void *)data;
> +
> +	drop_expired_requests(bt_bmc);
> +}
> +
> +static int bt_bmc_add_request(struct bt_bmc *bt_bmc, u8 seq)
> +{
> +	struct ipmi_request *req;
> +
> +	req = kmalloc(sizeof(*req), GFP_KERNEL);
> +	if (!req)
> +		return -ENOMEM;
> +
> +	req->seq = seq;
> +	req->expires = jiffies +
> +		msecs_to_jiffies(bt_bmc->response_time * 1000);
> +
> +	spin_lock(&bt_bmc->requests_lock);
> +	list_add(&req->list, &bt_bmc->requests);
> +	spin_unlock(&bt_bmc->requests_lock);
> +
> +	drop_expired_requests(bt_bmc);
> +	return 0;
> +}
> +
> +static int bt_bmc_remove_request(struct bt_bmc *bt_bmc, u8 seq)
> +{
> +	struct ipmi_request *req, *next;
> +	int ret = -EBADRQC; /* Invalid request code */
> +
> +	spin_lock(&bt_bmc->requests_lock);
> +	list_for_each_entry_safe(req, next, &bt_bmc->requests, list)
> {
> +		/*
> +		 * The sequence number should be unique, so remove
> the
> +		 * first matching request found. If there are
> others,
> +		 * they should expire
> +		 */
> +		if (req->seq == seq) {
> +			list_del(&req->list);
> +			kfree(req);
> +			ret = 0;
> +			break;
> +		}
> +	}
> +	spin_unlock(&bt_bmc->requests_lock);
> +
> +	if (ret)
> +		pr_warn("BT: request seq:%d is invalid\n", seq);
> +
> +	drop_expired_requests(bt_bmc);
> +	return ret;
> +}
> +
> +/*
>   * The BT (Block Transfer) interface means that entire messages are
>   * buffered by the host before a notification is sent to the BMC
> that
>   * there is data to be read. The first byte is the length and the
> @@ -231,6 +334,13 @@ static ssize_t bt_bmc_read(struct file *file,
> char __user *buf,
>  		len_byte = 0;
>  	}
>  
> +	if (ret > 0) {
> +		int ret2 = bt_bmc_add_request(bt_bmc, kbuffer[2]);
> +
> +		if (ret2)
> +			ret = ret2;
> +	}
> +
>  	clr_b_busy(bt_bmc);
>  
>  out_unlock:
> @@ -283,12 +393,20 @@ static ssize_t bt_bmc_write(struct file *file,
> const char __user *buf,
>  	clr_wr_ptr(bt_bmc);
>  
>  	while (count) {
> +		int ret2;
> +
>  		nwritten = min_t(ssize_t, count, sizeof(kbuffer));
>  		if (copy_from_user(&kbuffer, buf, nwritten)) {
>  			ret = -EFAULT;
>  			break;
>  		}
>  
> +		ret2 = bt_bmc_remove_request(bt_bmc, kbuffer[2]);
> +		if (ret2) {
> +			ret = ret2;
> +			break;
> +		}
> +
>  		bt_writen(bt_bmc, kbuffer, nwritten);
>  
>  		count -= nwritten;
> @@ -438,6 +556,8 @@ static int bt_bmc_probe(struct platform_device
> *pdev)
>  
>  	mutex_init(&bt_bmc->mutex);
>  	init_waitqueue_head(&bt_bmc->queue);
> +	INIT_LIST_HEAD(&bt_bmc->requests);
> +	spin_lock_init(&bt_bmc->requests_lock);
>  
>  	bt_bmc->miscdev.minor	= MISC_DYNAMIC_MINOR,
>  		bt_bmc->miscdev.name	= DEVICE_NAME,
> @@ -451,6 +571,8 @@ static int bt_bmc_probe(struct platform_device
> *pdev)
>  
>  	bt_bmc_config_irq(bt_bmc, pdev);
>  
> +	bt_bmc->response_time = BT_BMC_RESPONSE_TIME;
> +
>  	if (bt_bmc->irq) {
>  		dev_info(dev, "Using IRQ %d\n", bt_bmc->irq);
>  	} else {
> @@ -468,6 +590,9 @@ static int bt_bmc_probe(struct platform_device
> *pdev)
>  		  BT_CR0_ENABLE_IBT,
>  		  bt_bmc->base + BT_CR0);
>  
> +	setup_timer(&bt_bmc->requests_timer, requests_timer,
> +		    (unsigned long)bt_bmc);
> +
>  	clr_b_busy(bt_bmc);
>  
>  	return 0;
> @@ -476,10 +601,17 @@ static int bt_bmc_probe(struct platform_device
> *pdev)
>  static int bt_bmc_remove(struct platform_device *pdev)
>  {
>  	struct bt_bmc *bt_bmc = dev_get_drvdata(&pdev->dev);
> +	struct ipmi_request *req, *next;
>  
>  	misc_deregister(&bt_bmc->miscdev);
>  	if (!bt_bmc->irq)
>  		del_timer_sync(&bt_bmc->poll_timer);
> +
> +	del_timer_sync(&bt_bmc->requests_timer);
> +	list_for_each_entry_safe(req, next, &bt_bmc->requests, list)
> {
> +		list_del(&req->list);
> +		kfree(req);
> +	}
>  	return 0;
>  }
>
Cédric Le Goater Nov. 3, 2016, 9:06 a.m. UTC | #2
On 11/03/2016 01:26 AM, Cyril Bur wrote:
> On Wed, 2016-10-26 at 08:57 +0200, Cédric Le Goater wrote:
>> Regarding the response expiration handling, the IPMI spec says :
>>
>>    The BMC must not return a given response once the corresponding
>>    Request-to-Response interval has passed. The BMC can ensure this
>>    by maintaining its own internal list of outstanding requests
>> through
>>    the interface. The BMC could age and expire the entries in the
>> list
>>    by expiring the entries at an interval that is somewhat shorter
>> than
>>    the specified Request-to-Response interval....
>>
>> To handle such case, we maintain list of received requests using the
>> seq number of the BT message to identify them. The list is updated
>> each time a request is received and a response is sent. The
>> expiration
>> of the reponses is handled at each updates but also with a timer.
>>
> 
> I agree that the BMC kernel is most logical place to do this, at the
> moment btbridged does attempt something similar no?
>
> 
> Should we patch btbridged to not? Should I least remove duplicated
> logic? 

yes probably. Let's see what mainline says about it first. 
The patch has a minor flaw: struct ipmi_request should include 
the NetFn and Command values. Corey might ask for a resend.


More globally speaking, the patch raises the question on how we 
manage the BT interface capabilities, the in/out buffer message 
sizes, allowed retries, etc. For now, this is a bit foggy, there
are bits in the kernel, in btbridged and in phosphor-host-ipmid.

I think we should configure the driver BT capabilities when 
btbridged is started. I have sent a patch using sysfs but maybe 
we will switch to an ioctl as Joel proposed. if btbridged knows 
about the driver configuration, it should probably handle the 
"Get BT Interface Capabilities" and not phosphor-host-ipmid.


May be I should open an issue on that topic ? 

C.


>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>> ---
>>  drivers/char/ipmi/bt-bmc.c | 132
>> +++++++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 132 insertions(+)
>>
>> diff --git a/drivers/char/ipmi/bt-bmc.c b/drivers/char/ipmi/bt-bmc.c
>> index b49e61320952..228ecdb689de 100644
>> --- a/drivers/char/ipmi/bt-bmc.c
>> +++ b/drivers/char/ipmi/bt-bmc.c
>> @@ -17,6 +17,7 @@
>>  #include <linux/platform_device.h>
>>  #include <linux/poll.h>
>>  #include <linux/sched.h>
>> +#include <linux/slab.h>
>>  #include <linux/timer.h>
>>  
>>  /*
>> @@ -57,6 +58,15 @@
>>  
>>  #define BT_BMC_BUFFER_SIZE 256
>>  
>> +#define BT_BMC_RESPONSE_TIME	5 /* seconds */
>> +
>> +struct ipmi_request {
>> +	struct list_head list;
>> +
>> +	u8 seq;
>> +	unsigned long expires;
>> +};
>> +
>>  struct bt_bmc {
>>  	struct device		dev;
>>  	struct miscdevice	miscdev;
>> @@ -65,6 +75,11 @@ struct bt_bmc {
>>  	wait_queue_head_t	queue;
>>  	struct timer_list	poll_timer;
>>  	struct mutex		mutex;
>> +
>> +	unsigned int		response_time;
>> +	struct timer_list	requests_timer;
>> +	spinlock_t              requests_lock;
>> +	struct list_head        requests;
>>  };
>>  
>>  static atomic_t open_count = ATOMIC_INIT(0);
>> @@ -163,6 +178,94 @@ static int bt_bmc_open(struct inode *inode,
>> struct file *file)
>>  }
>>  
>>  /*
>> + * lock should be held
>> + */
>> +static void drop_expired_requests(struct bt_bmc *bt_bmc)
>> +{
>> +	unsigned long flags = 0;
>> +	struct ipmi_request *req, *next;
>> +	unsigned long next_expires = 0;
>> +	int start_timer = 0;
>> +
>> +	spin_lock_irqsave(&bt_bmc->requests_lock, flags);
>> +	list_for_each_entry_safe(req, next, &bt_bmc->requests, list)
>> {
>> +		if (time_after_eq(jiffies, req->expires)) {
>> +			pr_warn("BT: request seq:%d has expired.
>> dropping\n",
>> +				req->seq);
>> +			list_del(&req->list);
>> +			kfree(req);
>> +			continue;
>> +		}
>> +		if (!start_timer) {
>> +			start_timer = 1;
>> +			next_expires = req->expires;
>> +		} else {
>> +			next_expires = min(next_expires, req-
>>> expires);
>> +		}
>> +	}
>> +	spin_unlock_irqrestore(&bt_bmc->requests_lock, flags);
>> +
>> +	/* and possibly restart */
>> +	if (start_timer)
>> +		mod_timer(&bt_bmc->requests_timer, next_expires);
>> +}
>> +
>> +static void requests_timer(unsigned long data)
>> +{
>> +	struct bt_bmc *bt_bmc = (void *)data;
>> +
>> +	drop_expired_requests(bt_bmc);
>> +}
>> +
>> +static int bt_bmc_add_request(struct bt_bmc *bt_bmc, u8 seq)
>> +{
>> +	struct ipmi_request *req;
>> +
>> +	req = kmalloc(sizeof(*req), GFP_KERNEL);
>> +	if (!req)
>> +		return -ENOMEM;
>> +
>> +	req->seq = seq;
>> +	req->expires = jiffies +
>> +		msecs_to_jiffies(bt_bmc->response_time * 1000);
>> +
>> +	spin_lock(&bt_bmc->requests_lock);
>> +	list_add(&req->list, &bt_bmc->requests);
>> +	spin_unlock(&bt_bmc->requests_lock);
>> +
>> +	drop_expired_requests(bt_bmc);
>> +	return 0;
>> +}
>> +
>> +static int bt_bmc_remove_request(struct bt_bmc *bt_bmc, u8 seq)
>> +{
>> +	struct ipmi_request *req, *next;
>> +	int ret = -EBADRQC; /* Invalid request code */
>> +
>> +	spin_lock(&bt_bmc->requests_lock);
>> +	list_for_each_entry_safe(req, next, &bt_bmc->requests, list)
>> {
>> +		/*
>> +		 * The sequence number should be unique, so remove
>> the
>> +		 * first matching request found. If there are
>> others,
>> +		 * they should expire
>> +		 */
>> +		if (req->seq == seq) {
>> +			list_del(&req->list);
>> +			kfree(req);
>> +			ret = 0;
>> +			break;
>> +		}
>> +	}
>> +	spin_unlock(&bt_bmc->requests_lock);
>> +
>> +	if (ret)
>> +		pr_warn("BT: request seq:%d is invalid\n", seq);
>> +
>> +	drop_expired_requests(bt_bmc);
>> +	return ret;
>> +}
>> +
>> +/*
>>   * The BT (Block Transfer) interface means that entire messages are
>>   * buffered by the host before a notification is sent to the BMC
>> that
>>   * there is data to be read. The first byte is the length and the
>> @@ -231,6 +334,13 @@ static ssize_t bt_bmc_read(struct file *file,
>> char __user *buf,
>>  		len_byte = 0;
>>  	}
>>  
>> +	if (ret > 0) {
>> +		int ret2 = bt_bmc_add_request(bt_bmc, kbuffer[2]);
>> +
>> +		if (ret2)
>> +			ret = ret2;
>> +	}
>> +
>>  	clr_b_busy(bt_bmc);
>>  
>>  out_unlock:
>> @@ -283,12 +393,20 @@ static ssize_t bt_bmc_write(struct file *file,
>> const char __user *buf,
>>  	clr_wr_ptr(bt_bmc);
>>  
>>  	while (count) {
>> +		int ret2;
>> +
>>  		nwritten = min_t(ssize_t, count, sizeof(kbuffer));
>>  		if (copy_from_user(&kbuffer, buf, nwritten)) {
>>  			ret = -EFAULT;
>>  			break;
>>  		}
>>  
>> +		ret2 = bt_bmc_remove_request(bt_bmc, kbuffer[2]);
>> +		if (ret2) {
>> +			ret = ret2;
>> +			break;
>> +		}
>> +
>>  		bt_writen(bt_bmc, kbuffer, nwritten);
>>  
>>  		count -= nwritten;
>> @@ -438,6 +556,8 @@ static int bt_bmc_probe(struct platform_device
>> *pdev)
>>  
>>  	mutex_init(&bt_bmc->mutex);
>>  	init_waitqueue_head(&bt_bmc->queue);
>> +	INIT_LIST_HEAD(&bt_bmc->requests);
>> +	spin_lock_init(&bt_bmc->requests_lock);
>>  
>>  	bt_bmc->miscdev.minor	= MISC_DYNAMIC_MINOR,
>>  		bt_bmc->miscdev.name	= DEVICE_NAME,
>> @@ -451,6 +571,8 @@ static int bt_bmc_probe(struct platform_device
>> *pdev)
>>  
>>  	bt_bmc_config_irq(bt_bmc, pdev);
>>  
>> +	bt_bmc->response_time = BT_BMC_RESPONSE_TIME;
>> +
>>  	if (bt_bmc->irq) {
>>  		dev_info(dev, "Using IRQ %d\n", bt_bmc->irq);
>>  	} else {
>> @@ -468,6 +590,9 @@ static int bt_bmc_probe(struct platform_device
>> *pdev)
>>  		  BT_CR0_ENABLE_IBT,
>>  		  bt_bmc->base + BT_CR0);
>>  
>> +	setup_timer(&bt_bmc->requests_timer, requests_timer,
>> +		    (unsigned long)bt_bmc);
>> +
>>  	clr_b_busy(bt_bmc);
>>  
>>  	return 0;
>> @@ -476,10 +601,17 @@ static int bt_bmc_probe(struct platform_device
>> *pdev)
>>  static int bt_bmc_remove(struct platform_device *pdev)
>>  {
>>  	struct bt_bmc *bt_bmc = dev_get_drvdata(&pdev->dev);
>> +	struct ipmi_request *req, *next;
>>  
>>  	misc_deregister(&bt_bmc->miscdev);
>>  	if (!bt_bmc->irq)
>>  		del_timer_sync(&bt_bmc->poll_timer);
>> +
>> +	del_timer_sync(&bt_bmc->requests_timer);
>> +	list_for_each_entry_safe(req, next, &bt_bmc->requests, list)
>> {
>> +		list_del(&req->list);
>> +		kfree(req);
>> +	}
>>  	return 0;
>>  }
>>
Patrick Williams Nov. 3, 2016, 6:23 p.m. UTC | #3
On Thu, Nov 03, 2016 at 11:26:09AM +1100, Cyril Bur wrote:
> On Wed, 2016-10-26 at 08:57 +0200, Cédric Le Goater wrote:
> > Regarding the response expiration handling, the IPMI spec says :
> > 
> >    The BMC must not return a given response once the corresponding
> >    Request-to-Response interval has passed. The BMC can ensure this
> >    by maintaining its own internal list of outstanding requests
> > through
> >    the interface. The BMC could age and expire the entries in the
> > list
> >    by expiring the entries at an interval that is somewhat shorter
> > than
> >    the specified Request-to-Response interval....
> > 
> > To handle such case, we maintain list of received requests using the
> > seq number of the BT message to identify them. The list is updated
> > each time a request is received and a response is sent. The
> > expiration
> > of the reponses is handled at each updates but also with a timer.
> > 
> 
> I agree that the BMC kernel is most logical place to do this, at the
> moment btbridged does attempt something similar no?
> 
> Should we patch btbridged to not? Should I least remove duplicated
> logic? 
> 

Brendan,

Are you paying attention to this discussion?  I think you had some
opposition to the kernel doing any additional work in this space because
you wanted to run a non-IPMI protocol over the IPMI bridge.
Cédric Le Goater Nov. 3, 2016, 6:46 p.m. UTC | #4
On 11/03/2016 07:23 PM, Patrick Williams wrote:
> On Thu, Nov 03, 2016 at 11:26:09AM +1100, Cyril Bur wrote:
>> On Wed, 2016-10-26 at 08:57 +0200, Cédric Le Goater wrote:
>>> Regarding the response expiration handling, the IPMI spec says :
>>>
>>>    The BMC must not return a given response once the corresponding
>>>    Request-to-Response interval has passed. The BMC can ensure this
>>>    by maintaining its own internal list of outstanding requests
>>> through
>>>    the interface. The BMC could age and expire the entries in the
>>> list
>>>    by expiring the entries at an interval that is somewhat shorter
>>> than
>>>    the specified Request-to-Response interval....
>>>
>>> To handle such case, we maintain list of received requests using the
>>> seq number of the BT message to identify them. The list is updated
>>> each time a request is received and a response is sent. The
>>> expiration
>>> of the reponses is handled at each updates but also with a timer.
>>>
>>
>> I agree that the BMC kernel is most logical place to do this, at the
>> moment btbridged does attempt something similar no?
>>
>> Should we patch btbridged to not? Should I least remove duplicated
>> logic? 
>>
> 
> Brendan,
> 
> Are you paying attention to this discussion?  I think you had some
> opposition to the kernel doing any additional work in this space because
> you wanted to run a non-IPMI protocol over the IPMI bridge.

We will then need a new kernel driver for the non-IPMI interface. 
This one is for the iBT interface which is IPMI oriented. 

I am sure we can find some common points though. Hopefully, the 
userspace interface would be the same. For the moment we only
have an ioctl for the SMS ATN interrupt but we should be adding
some more.

Thanks,

C.
Patrick Williams Nov. 4, 2016, 1:55 a.m. UTC | #5
On Thu, Nov 03, 2016 at 07:46:28PM +0100, Cédric Le Goater wrote:
> On 11/03/2016 07:23 PM, Patrick Williams wrote:
> > On Thu, Nov 03, 2016 at 11:26:09AM +1100, Cyril Bur wrote:
> >> On Wed, 2016-10-26 at 08:57 +0200, Cédric Le Goater wrote:
> >>> Regarding the response expiration handling, the IPMI spec says :
> >>>
> >>>    The BMC must not return a given response once the corresponding
> >>>    Request-to-Response interval has passed. The BMC can ensure this
> >>>    by maintaining its own internal list of outstanding requests
> >>> through
> >>>    the interface. The BMC could age and expire the entries in the
> >>> list
> >>>    by expiring the entries at an interval that is somewhat shorter
> >>> than
> >>>    the specified Request-to-Response interval....
> >>>
> >>> To handle such case, we maintain list of received requests using the
> >>> seq number of the BT message to identify them. The list is updated
> >>> each time a request is received and a response is sent. The
> >>> expiration
> >>> of the reponses is handled at each updates but also with a timer.
> >>>
> >>
> >> I agree that the BMC kernel is most logical place to do this, at the
> >> moment btbridged does attempt something similar no?
> >>
> >> Should we patch btbridged to not? Should I least remove duplicated
> >> logic? 
> >>
> > 
> > Brendan,
> > 
> > Are you paying attention to this discussion?  I think you had some
> > opposition to the kernel doing any additional work in this space because
> > you wanted to run a non-IPMI protocol over the IPMI bridge.
> 
> We will then need a new kernel driver for the non-IPMI interface. 
> This one is for the iBT interface which is IPMI oriented. 
> 
> I am sure we can find some common points though. Hopefully, the 
> userspace interface would be the same. For the moment we only
> have an ioctl for the SMS ATN interrupt but we should be adding
> some more.
> 

I believe Brendan was keeping the IPMI protocol but implementing
different flow control on top of it.  That means, specifically, that
having the kernel track commands and timeouts would not be what he
wanted.

> Thanks,
> 
> C.  
>
Cédric Le Goater Nov. 4, 2016, 8:09 a.m. UTC | #6
On 11/04/2016 02:55 AM, Patrick Williams wrote:
> On Thu, Nov 03, 2016 at 07:46:28PM +0100, Cédric Le Goater wrote:
>> On 11/03/2016 07:23 PM, Patrick Williams wrote:
>>> On Thu, Nov 03, 2016 at 11:26:09AM +1100, Cyril Bur wrote:
>>>> On Wed, 2016-10-26 at 08:57 +0200, Cédric Le Goater wrote:
>>>>> Regarding the response expiration handling, the IPMI spec says :
>>>>>
>>>>>    The BMC must not return a given response once the corresponding
>>>>>    Request-to-Response interval has passed. The BMC can ensure this
>>>>>    by maintaining its own internal list of outstanding requests
>>>>> through
>>>>>    the interface. The BMC could age and expire the entries in the
>>>>> list
>>>>>    by expiring the entries at an interval that is somewhat shorter
>>>>> than
>>>>>    the specified Request-to-Response interval....
>>>>>
>>>>> To handle such case, we maintain list of received requests using the
>>>>> seq number of the BT message to identify them. The list is updated
>>>>> each time a request is received and a response is sent. The
>>>>> expiration
>>>>> of the reponses is handled at each updates but also with a timer.
>>>>>
>>>>
>>>> I agree that the BMC kernel is most logical place to do this, at the
>>>> moment btbridged does attempt something similar no?
>>>>
>>>> Should we patch btbridged to not? Should I least remove duplicated
>>>> logic? 
>>>>
>>>
>>> Brendan,
>>>
>>> Are you paying attention to this discussion?  I think you had some
>>> opposition to the kernel doing any additional work in this space because
>>> you wanted to run a non-IPMI protocol over the IPMI bridge.
>>
>> We will then need a new kernel driver for the non-IPMI interface. 
>> This one is for the iBT interface which is IPMI oriented. 
>>
>> I am sure we can find some common points though. Hopefully, the 
>> userspace interface would be the same. For the moment we only
>> have an ioctl for the SMS ATN interrupt but we should be adding
>> some more.
>>
> 
> I believe Brendan was keeping the IPMI protocol but implementing
> different flow control on top of it.  That means, specifically, that
> having the kernel track commands and timeouts would not be what he
> wanted.

ok. I have lost track of this. May be we could start a wiki on 
btbridge to hold all these ideas, development progress, etc. 
Like we have done for qemu and u-boot ?

Thanks,

C.
Brendan Higgins Nov. 4, 2016, 6:22 p.m. UTC | #7
Thanks for roping me in,

Patrick summed up my intention perfectly. IPMI messages will be structured
the same and use compatible fields, but would allow for different flow
control. In particular, the goal is to allow multiple requests and
responses under a single sequence number.
Cédric Le Goater Nov. 7, 2016, 5:25 a.m. UTC | #8
Hello,

On 11/04/2016 07:22 PM, Brendan Higgins wrote:
> Thanks for roping me in,
> 
> Patrick summed up my intention perfectly. IPMI messages will be structured 
> the same and use compatible fields, but would allow for different flow control. 
> In particular, the goal is to allow multiple requests and responses under a 
> single sequence number.

So if the combinations of { Seq, Command, NetFn } values are unique, we 
should be fine. The expiry list patch needs to be more precise when doing 
matching though.

Thanks,

C.
Patrick Williams Nov. 7, 2016, 3:02 p.m. UTC | #9
On Mon, Nov 07, 2016 at 06:25:41AM +0100, Cédric Le Goater wrote:
> Hello,
> 
> On 11/04/2016 07:22 PM, Brendan Higgins wrote:
> > Thanks for roping me in,
> > 
> > Patrick summed up my intention perfectly. IPMI messages will be structured 
> > the same and use compatible fields, but would allow for different flow control. 
> > In particular, the goal is to allow multiple requests and responses under a 
> > single sequence number.
> 
> So if the combinations of { Seq, Command, NetFn } values are unique, we 
> should be fine. The expiry list patch needs to be more precise when doing 
> matching though.

Brendan was implying that this is incorrect for OEM commands.

Brendan can you clarify?
Brendan Higgins Nov. 7, 2016, 7:29 p.m. UTC | #10
Patrick, you are correct.

The { Seq, Command, NetFn } set does not necessarily uniquely identify a
message. The the OEM/Group extension messages as defined in the IPMI v2.0
Specification Document Revision 1.1, Section 5.1 Network Function Codes,
Table 5, under the name OEM/Group defines unique message endpoints by the
following set: { NetFn, Payload{0, 1, 2}, Command }; where Payload{0, 1, 2}
are the first three bytes of a message payload.

In addition, I am working on an OEM/Group extension that would allow a
single transaction to take place across multiple messages, and in
particular would allow multiple IPMI requests to be sent, potentially only
sending (a) response(s) at the end of the transaction; in particular, this
would allow the following:

Master     | Slave
Request -> |
Request -> |
Request -> |
           | <- Response
           | <- Response

Where Request and Response have the same sequence number. Note: the change
I am making would also make the above example valid if there were only one
request and two responses.

@Cédric I see two remedies for your patch to allow this:

1) Allow users to opt out of the expiry list.

2) Update the keep alive list when *either* a new request or a new response
is sent with a given sequence number (change the list to be a hashmap of
sequence numbers to timeouts).

I will try to get a design doc for discussion describing this extension out
hopefully sometime this week; I think that might clear some things up.

Thanks!
Cédric Le Goater Nov. 8, 2016, 9:58 a.m. UTC | #11
Hello Brendan,

On 11/07/2016 08:29 PM, Brendan Higgins wrote:
> Patrick, you are correct.
> 
> The { Seq, Command, NetFn } set does not necessarily uniquely identify a message. 
> The the OEM/Group extension messages as defined in the IPMI v2.0 Specification 
> Document Revision 1.1, Section 5.1 Network Function Codes, Table 5, under the 
> name OEM/Group defines unique message endpoints by the following set: 
> 
>    { NetFn, Payload{0, 1, 2}, Command }; 
>
> where Payload{0, 1, 2} are the first three bytes of a message payload.

OK. I missed that part. I was referring to : 

  11.1 BT Interface-BMC Request Message Format

    ...
    The Seq field is used in combination with the NetFn and Command fields to 
    form a unique value. I.e. the same Seq value could be used in multiple 
    outstanding requests, as long as the combinations of Seq value, NetFn, 
    and Command were unique among the requests.
    ...
and

  11.3 Using the Seq Field
  11.4 Response Expiration Handling

    ...
    The BMC can define its own internal Seq value or tracking number for
    this purpose, or it could use the Seq, NetFn, and Command values in the
    same manner as SMS.
    ...

> In addition, I am working on an OEM/Group extension that would allow a single 
> transaction to take place across multiple messages, and in particular would 
> allow multiple IPMI requests to be sent, potentially only sending (a) 
> response(s) at the end of the transaction; in particular, this would allow 
> the following:
> 
> Master     | Slave
> Request -> |
> Request -> |
> Request -> |
>            | <- Response
>            | <- Response
> 
> Where Request and Response have the same sequence number. Note: the change 
> I am making would also make the above example valid if there were only one 
> request and two responses.

I am wondering how that fits with the BT Interface Capabilities which can 
recommend retries.

> @Cédric I see two remedies for your patch to allow this:
> 
> 1) Allow users to opt out of the expiry list.

yes that's should not be too hard.

> 2) Update the keep alive list when *either* a new request or a new response 
> is sent with a given sequence number (change the list to be a hashmap of 
> sequence numbers to timeouts).

The BMC driver could also define its own 'Seq' identifier (using the tuple 
above) to do the request/response matching instead of using the 'Seq' byte.

> I will try to get a design doc for discussion describing this extension out 
> hopefully sometime this week; I think that might clear some things up.

OK but we should be having this discussion on :
 
	openipmi-developer@lists.sourceforge.net

where the patches are being discussed. Corey would have some very valuable
input. Could you join that thread ? 

	http://lists.infradead.org/pipermail/linux-arm-kernel/2016-November/465131.html
	
Or I can copy you, it might be easier and then you can start another thread
with your design.

Thanks!

C.
Cédric Le Goater Nov. 10, 2016, 7:53 a.m. UTC | #12
On 11/03/2016 10:06 AM, Cédric Le Goater wrote:
> On 11/03/2016 01:26 AM, Cyril Bur wrote:
>> On Wed, 2016-10-26 at 08:57 +0200, Cédric Le Goater wrote:
>>> Regarding the response expiration handling, the IPMI spec says :
>>>
>>>    The BMC must not return a given response once the corresponding
>>>    Request-to-Response interval has passed. The BMC can ensure this
>>>    by maintaining its own internal list of outstanding requests through
>>>    the interface. The BMC could age and expire the entries in the list
>>>    by expiring the entries at an interval that is somewhat shorter than
>>>    the specified Request-to-Response interval....
>>>
>>> To handle such case, we maintain list of received requests using the
>>> seq number of the BT message to identify them. The list is updated
>>> each time a request is received and a response is sent. The
>>> expirationof the reponses is handled at each updates but also 
>>> with a timer.
>>>
>>
>> I agree that the BMC kernel is most logical place to do this, at the
>> moment btbridged does attempt something similar no?
>>
>>
>> Should we patch btbridged to not? Should I least remove duplicated
>> logic? 
> 
> yes probably. Let's see what mainline says about it first. 
> The patch has a minor flaw: struct ipmi_request should include 
> the NetFn and Command values. Corey might ask for a resend.

So it seems this patch is raising more questions than solving 
problems. Let's leave it aside for the moment and keep the list
in btbridged.

Thanks,

C.
diff mbox

Patch

diff --git a/drivers/char/ipmi/bt-bmc.c b/drivers/char/ipmi/bt-bmc.c
index b49e61320952..228ecdb689de 100644
--- a/drivers/char/ipmi/bt-bmc.c
+++ b/drivers/char/ipmi/bt-bmc.c
@@ -17,6 +17,7 @@ 
 #include <linux/platform_device.h>
 #include <linux/poll.h>
 #include <linux/sched.h>
+#include <linux/slab.h>
 #include <linux/timer.h>
 
 /*
@@ -57,6 +58,15 @@ 
 
 #define BT_BMC_BUFFER_SIZE 256
 
+#define BT_BMC_RESPONSE_TIME	5 /* seconds */
+
+struct ipmi_request {
+	struct list_head list;
+
+	u8 seq;
+	unsigned long expires;
+};
+
 struct bt_bmc {
 	struct device		dev;
 	struct miscdevice	miscdev;
@@ -65,6 +75,11 @@  struct bt_bmc {
 	wait_queue_head_t	queue;
 	struct timer_list	poll_timer;
 	struct mutex		mutex;
+
+	unsigned int		response_time;
+	struct timer_list	requests_timer;
+	spinlock_t              requests_lock;
+	struct list_head        requests;
 };
 
 static atomic_t open_count = ATOMIC_INIT(0);
@@ -163,6 +178,94 @@  static int bt_bmc_open(struct inode *inode, struct file *file)
 }
 
 /*
+ * lock should be held
+ */
+static void drop_expired_requests(struct bt_bmc *bt_bmc)
+{
+	unsigned long flags = 0;
+	struct ipmi_request *req, *next;
+	unsigned long next_expires = 0;
+	int start_timer = 0;
+
+	spin_lock_irqsave(&bt_bmc->requests_lock, flags);
+	list_for_each_entry_safe(req, next, &bt_bmc->requests, list) {
+		if (time_after_eq(jiffies, req->expires)) {
+			pr_warn("BT: request seq:%d has expired. dropping\n",
+				req->seq);
+			list_del(&req->list);
+			kfree(req);
+			continue;
+		}
+		if (!start_timer) {
+			start_timer = 1;
+			next_expires = req->expires;
+		} else {
+			next_expires = min(next_expires, req->expires);
+		}
+	}
+	spin_unlock_irqrestore(&bt_bmc->requests_lock, flags);
+
+	/* and possibly restart */
+	if (start_timer)
+		mod_timer(&bt_bmc->requests_timer, next_expires);
+}
+
+static void requests_timer(unsigned long data)
+{
+	struct bt_bmc *bt_bmc = (void *)data;
+
+	drop_expired_requests(bt_bmc);
+}
+
+static int bt_bmc_add_request(struct bt_bmc *bt_bmc, u8 seq)
+{
+	struct ipmi_request *req;
+
+	req = kmalloc(sizeof(*req), GFP_KERNEL);
+	if (!req)
+		return -ENOMEM;
+
+	req->seq = seq;
+	req->expires = jiffies +
+		msecs_to_jiffies(bt_bmc->response_time * 1000);
+
+	spin_lock(&bt_bmc->requests_lock);
+	list_add(&req->list, &bt_bmc->requests);
+	spin_unlock(&bt_bmc->requests_lock);
+
+	drop_expired_requests(bt_bmc);
+	return 0;
+}
+
+static int bt_bmc_remove_request(struct bt_bmc *bt_bmc, u8 seq)
+{
+	struct ipmi_request *req, *next;
+	int ret = -EBADRQC; /* Invalid request code */
+
+	spin_lock(&bt_bmc->requests_lock);
+	list_for_each_entry_safe(req, next, &bt_bmc->requests, list) {
+		/*
+		 * The sequence number should be unique, so remove the
+		 * first matching request found. If there are others,
+		 * they should expire
+		 */
+		if (req->seq == seq) {
+			list_del(&req->list);
+			kfree(req);
+			ret = 0;
+			break;
+		}
+	}
+	spin_unlock(&bt_bmc->requests_lock);
+
+	if (ret)
+		pr_warn("BT: request seq:%d is invalid\n", seq);
+
+	drop_expired_requests(bt_bmc);
+	return ret;
+}
+
+/*
  * The BT (Block Transfer) interface means that entire messages are
  * buffered by the host before a notification is sent to the BMC that
  * there is data to be read. The first byte is the length and the
@@ -231,6 +334,13 @@  static ssize_t bt_bmc_read(struct file *file, char __user *buf,
 		len_byte = 0;
 	}
 
+	if (ret > 0) {
+		int ret2 = bt_bmc_add_request(bt_bmc, kbuffer[2]);
+
+		if (ret2)
+			ret = ret2;
+	}
+
 	clr_b_busy(bt_bmc);
 
 out_unlock:
@@ -283,12 +393,20 @@  static ssize_t bt_bmc_write(struct file *file, const char __user *buf,
 	clr_wr_ptr(bt_bmc);
 
 	while (count) {
+		int ret2;
+
 		nwritten = min_t(ssize_t, count, sizeof(kbuffer));
 		if (copy_from_user(&kbuffer, buf, nwritten)) {
 			ret = -EFAULT;
 			break;
 		}
 
+		ret2 = bt_bmc_remove_request(bt_bmc, kbuffer[2]);
+		if (ret2) {
+			ret = ret2;
+			break;
+		}
+
 		bt_writen(bt_bmc, kbuffer, nwritten);
 
 		count -= nwritten;
@@ -438,6 +556,8 @@  static int bt_bmc_probe(struct platform_device *pdev)
 
 	mutex_init(&bt_bmc->mutex);
 	init_waitqueue_head(&bt_bmc->queue);
+	INIT_LIST_HEAD(&bt_bmc->requests);
+	spin_lock_init(&bt_bmc->requests_lock);
 
 	bt_bmc->miscdev.minor	= MISC_DYNAMIC_MINOR,
 		bt_bmc->miscdev.name	= DEVICE_NAME,
@@ -451,6 +571,8 @@  static int bt_bmc_probe(struct platform_device *pdev)
 
 	bt_bmc_config_irq(bt_bmc, pdev);
 
+	bt_bmc->response_time = BT_BMC_RESPONSE_TIME;
+
 	if (bt_bmc->irq) {
 		dev_info(dev, "Using IRQ %d\n", bt_bmc->irq);
 	} else {
@@ -468,6 +590,9 @@  static int bt_bmc_probe(struct platform_device *pdev)
 		  BT_CR0_ENABLE_IBT,
 		  bt_bmc->base + BT_CR0);
 
+	setup_timer(&bt_bmc->requests_timer, requests_timer,
+		    (unsigned long)bt_bmc);
+
 	clr_b_busy(bt_bmc);
 
 	return 0;
@@ -476,10 +601,17 @@  static int bt_bmc_probe(struct platform_device *pdev)
 static int bt_bmc_remove(struct platform_device *pdev)
 {
 	struct bt_bmc *bt_bmc = dev_get_drvdata(&pdev->dev);
+	struct ipmi_request *req, *next;
 
 	misc_deregister(&bt_bmc->miscdev);
 	if (!bt_bmc->irq)
 		del_timer_sync(&bt_bmc->poll_timer);
+
+	del_timer_sync(&bt_bmc->requests_timer);
+	list_for_each_entry_safe(req, next, &bt_bmc->requests, list) {
+		list_del(&req->list);
+		kfree(req);
+	}
 	return 0;
 }