diff mbox

[OpenWrt-Devel] Fix 'Dropping frame due to full tx queue' for Ralink wifi get stuck.

Message ID 68668337.pM24Mi1f2Q@nl-ubook
State Changes Requested
Headers show

Commit Message

N.Leiten Sept. 11, 2015, 11:33 a.m. UTC
Fix instability of Ralink WiFi general queue management on high load.
rt2x00 driver logs in dmesg "Dropping frame due to full queue ..." several times and at some point get stuck.

Solutions in patch:
1) Increasing number of frames in each TX queue helps with speed and decreases queue overflows. Actually 256 frames can be increased to 512 (this number of frames used in proprietary drivers for every queue).
2) Setting number of frames in TX/RX queues to equal values resolves async speed behaviour with better RX on AP-side (uplink from STAs), where it needs to be at least equal or better on TX queue on AP (download to STA).
3) In rt2x00mac.c additional check for queue full added and reassignment in this case, so interface will not drop frame.
4) Fixes in queue initialization. Default values for AC_BK, AC_BE, AC_VI, AC_VO set from WMM.

Tested on RT3883, RT5350, MT7620 SoCs and on RT3092 pcie interface for 10 days.

I'm planning to send this patch to mac80211 soon, but need to be sure that it works on other Ralink/Mediatek platforms and it's appropriate to do so.


Signed-off-by: Nick Leiten <nickleiten@gmail.com>

Comments

John Crispin Sept. 11, 2015, 11:39 a.m. UTC | #1
Hi,

before looking at this in detail, could please send a properly formatted
patch. the patch you sent is not using the unified option when
generating it. upstream linux-wireless wont accept it otherwise and we
wont either.

also please prefix the email with "mac80211: " so that we know which
subsystem it applies to.

that being said, the bug has been annoying me for eons and i never had
time to fix it so thanks for the time debugging this :)

	John


On 11/09/2015 13:33, N.Leiten wrote:
> Fix instability of Ralink WiFi general queue management on high load.
> rt2x00 driver logs in dmesg "Dropping frame due to full queue ..." several times and at some point get stuck.
> 
> Solutions in patch:
> 1) Increasing number of frames in each TX queue helps with speed and decreases queue overflows. Actually 256 frames can be increased to 512 (this number of frames used in proprietary drivers for every queue).
> 2) Setting number of frames in TX/RX queues to equal values resolves async speed behaviour with better RX on AP-side (uplink from STAs), where it needs to be at least equal or better on TX queue on AP (download to STA).
> 3) In rt2x00mac.c additional check for queue full added and reassignment in this case, so interface will not drop frame.
> 4) Fixes in queue initialization. Default values for AC_BK, AC_BE, AC_VI, AC_VO set from WMM.
> 
> Tested on RT3883, RT5350, MT7620 SoCs and on RT3092 pcie interface for 10 days.
> 
> I'm planning to send this patch to mac80211 soon, but need to be sure that it works on other Ralink/Mediatek platforms and it's appropriate to do so.
> 
> 
> Signed-off-by: Nick Leiten <nickleiten@gmail.com>
> diff --git a/package/kernel/mac80211/patches/999-rt2x00-queue-update.patch b/package/kernel/mac80211/patches/999-rt2x00-queue-update.patch
> new file mode 100644
> index 0000000..9239bec
> --- /dev/null
> +++ b/package/kernel/mac80211/patches/999-rt2x00-queue-update.patch
> @@ -0,0 +1,142 @@
> +Only in compat-wireless-2015-03-09/drivers/net/wireless/rt2x00: limit
> +diff -c -r compat-wireless-2015-03-09-orig/drivers/net/wireless/rt2x00/rt2800mmio.c compat-wireless-2015-03-09/drivers/net/wireless/rt2x00/rt2800mmio.c
> +*** compat-wireless-2015-03-09-orig/drivers/net/wireless/rt2x00/rt2800mmio.c	2015-06-16 13:02:30.000000000 +0300
> +--- compat-wireless-2015-03-09/drivers/net/wireless/rt2x00/rt2800mmio.c	2015-09-04 11:50:09.665148666 +0300
> +***************
> +*** 700,706 ****
> +  
> +  	switch (queue->qid) {
> +  	case QID_RX:
> +! 		queue->limit = 128;
> +  		queue->data_size = AGGREGATION_SIZE;
> +  		queue->desc_size = RXD_DESC_SIZE;
> +  		queue->winfo_size = rxwi_size;
> +--- 700,706 ----
> +  
> +  	switch (queue->qid) {
> +  	case QID_RX:
> +! 		queue->limit = 256;
> +  		queue->data_size = AGGREGATION_SIZE;
> +  		queue->desc_size = RXD_DESC_SIZE;
> +  		queue->winfo_size = rxwi_size;
> +***************
> +*** 711,717 ****
> +  	case QID_AC_VI:
> +  	case QID_AC_BE:
> +  	case QID_AC_BK:
> +! 		queue->limit = 64;
> +  		queue->data_size = AGGREGATION_SIZE;
> +  		queue->desc_size = TXD_DESC_SIZE;
> +  		queue->winfo_size = txwi_size;
> +--- 711,717 ----
> +  	case QID_AC_VI:
> +  	case QID_AC_BE:
> +  	case QID_AC_BK:
> +! 		queue->limit = 256;
> +  		queue->data_size = AGGREGATION_SIZE;
> +  		queue->desc_size = TXD_DESC_SIZE;
> +  		queue->winfo_size = txwi_size;
> +diff -c -r compat-wireless-2015-03-09-orig/drivers/net/wireless/rt2x00/rt2x00mac.c compat-wireless-2015-03-09/drivers/net/wireless/rt2x00/rt2x00mac.c
> +*** compat-wireless-2015-03-09-orig/drivers/net/wireless/rt2x00/rt2x00mac.c	2015-06-16 13:02:30.000000000 +0300
> +--- compat-wireless-2015-03-09/drivers/net/wireless/rt2x00/rt2x00mac.c	2015-09-04 11:47:45.845449209 +0300
> +***************
> +*** 26,31 ****
> +--- 26,32 ----
> +  
> +  #include "rt2x00.h"
> +  #include "rt2x00lib.h"
> ++ #include "rt2x00queue.h"
> +  
> +  static int rt2x00mac_tx_rts_cts(struct rt2x00_dev *rt2x00dev,
> +  				struct data_queue *queue,
> +***************
> +*** 115,120 ****
> +--- 116,141 ----
> +  	if (!test_bit(DEVICE_STATE_PRESENT, &rt2x00dev->flags))
> +  		goto exit_free_skb;
> +  
> ++ 	/* Dirty hack for Queue overrun protection,
> ++ 	 * if AC_VO/AC_VI/AC_BE is full, use next queue.
> ++ 	 * if AC_BK is full use previous queue.
> ++ 	 */
> ++ 	if (qid < 4) {
> ++ 		queue = rt2x00queue_get_tx_queue(rt2x00dev,qid);
> ++ 		if (unlikely(rt2x00queue_full(queue))){
> ++ 			switch(qid){
> ++ 				case 0: /* QID_AC_VO */
> ++ 				case 1: /* QID_AC_VI */
> ++ 				case 2: /* QID_AC_BE */
> ++ 					qid++;
> ++ 					break;
> ++ 				case 3: /* QID_AC_BK */
> ++ 					qid--;
> ++ 					break;
> ++ 			}
> ++ 		}
> ++ 	}
> ++ 	
> +  	/*
> +  	 * Use the ATIM queue if appropriate and present.
> +  	 */
> +diff -c -r compat-wireless-2015-03-09-orig/drivers/net/wireless/rt2x00/rt2x00queue.c compat-wireless-2015-03-09/drivers/net/wireless/rt2x00/rt2x00queue.c
> +*** compat-wireless-2015-03-09-orig/drivers/net/wireless/rt2x00/rt2x00queue.c	2015-03-10 05:37:16.000000000 +0200
> +--- compat-wireless-2015-03-09/drivers/net/wireless/rt2x00/rt2x00queue.c	2015-09-04 11:49:37.911446071 +0300
> +***************
> +*** 1213,1228 ****
> +  static void rt2x00queue_init(struct rt2x00_dev *rt2x00dev,
> +  			     struct data_queue *queue, enum data_queue_qid qid)
> +  {
> +  	mutex_init(&queue->status_lock);
> +  	spin_lock_init(&queue->tx_lock);
> +  	spin_lock_init(&queue->index_lock);
> +  
> +  	queue->rt2x00dev = rt2x00dev;
> +  	queue->qid = qid;
> +! 	queue->txop = 0;
> +! 	queue->aifs = 2;
> +! 	queue->cw_min = 5;
> +! 	queue->cw_max = 10;
> +  
> +  	rt2x00dev->ops->queue_init(queue);
> +  
> +--- 1213,1252 ----
> +  static void rt2x00queue_init(struct rt2x00_dev *rt2x00dev,
> +  			     struct data_queue *queue, enum data_queue_qid qid)
> +  {
> ++ 	/* Use default values for each queue type */
> ++ 	unsigned short cwmin = 5, cwmax = 10, aifs = 2, txop = 0;
> ++ 	
> ++ 	switch (qid) {
> ++ 		case QID_AC_VO:
> ++ 			cwmin = 2;
> ++ 			cwmax = 3;
> ++ 			txop = 47;
> ++ 			break;
> ++ 		case QID_AC_VI:
> ++ 			cwmin = 3;
> ++ 			cwmax = 4;
> ++ 			txop = 94;
> ++ 			break;
> ++ 		case QID_AC_BE:
> ++ 			cwmin = 4;
> ++ 			aifs = 3;
> ++ 			break;
> ++ 		case QID_AC_BK:
> ++ 			cwmin = 4;
> ++ 			aifs = 7;
> ++ 			break;
> ++ 	}
> ++ 	
> +  	mutex_init(&queue->status_lock);
> +  	spin_lock_init(&queue->tx_lock);
> +  	spin_lock_init(&queue->index_lock);
> +  
> +  	queue->rt2x00dev = rt2x00dev;
> +  	queue->qid = qid;
> +! 	queue->txop = txop;
> +! 	queue->aifs = aifs;
> +! 	queue->cw_min = cwmin;
> +! 	queue->cw_max = cwmax;
> +  
> +  	rt2x00dev->ops->queue_init(queue);
> +  
> _______________________________________________
> openwrt-devel mailing list
> openwrt-devel@lists.openwrt.org
> https://lists.openwrt.org/cgi-bin/mailman/listinfo/openwrt-devel
>
Felix Fietkau Sept. 11, 2015, 11:49 a.m. UTC | #2
On 2015-09-11 13:33, N.Leiten wrote:
> Fix instability of Ralink WiFi general queue management on high load.
> rt2x00 driver logs in dmesg "Dropping frame due to full queue ..." several times and at some point get stuck.
> 
> Solutions in patch:
> 1) Increasing number of frames in each TX queue helps with speed and
> decreases queue overflows. Actually 256 frames can be increased to
> 512 (this number of frames used in proprietary drivers for every
> queue).
512 frames seems to be overly excessive. Ever heard of bufferbloat?
It seems to me that the driver should simply call ieee80211_stop_queue
earlier to ensure that mac80211 does not attempt to fill the queues as
much. The driver queue length should probably be around 64 or less.

> 2) Setting number of frames in TX/RX queues to equal values
> resolves async speed behaviour with better RX on AP-side (uplink from
> STAs), where it needs to be at least equal or better on TX queue on
> AP (download to STA).
That also doesn't make much sense to me as queueing behavior is
completely different between rx and tx. Rx queue processing speed is
determined by CPU speed, Tx queue processing speed is determined by
shared medium speed.

> 3) In rt2x00mac.c additional check for queue
> full added and reassignment in this case, so interface will not drop
> frame.
Why? If the queues are full, it's better to just drop packets instead of
making the problem worse.

> 4) Fixes in queue initialization. Default values for AC_BK,
> AC_BE, AC_VI, AC_VO set from WMM.
Why do you hardcode that stuff inside the driver? What difference do
these values make?

> Tested on RT3883, RT5350, MT7620 SoCs and on RT3092 pcie interface for 10 days.
> 
> I'm planning to send this patch to mac80211 soon, but need to be sure that it works on other Ralink/Mediatek platforms and it's appropriate to do so.
> 
> 
> Signed-off-by: Nick Leiten <nickleiten@gmail.com>
> diff --git a/package/kernel/mac80211/patches/999-rt2x00-queue-update.patch b/package/kernel/mac80211/patches/999-rt2x00-queue-update.patch
> new file mode 100644
> index 0000000..9239bec
> --- /dev/null
> +++ b/package/kernel/mac80211/patches/999-rt2x00-queue-update.patch
> @@ -0,0 +1,142 @@
> +Only in compat-wireless-2015-03-09/drivers/net/wireless/rt2x00: limit
> +diff -c -r compat-wireless-2015-03-09-orig/drivers/net/wireless/rt2x00/rt2800mmio.c compat-wireless-2015-03-09/drivers/net/wireless/rt2x00/rt2800mmio.c
> +*** compat-wireless-2015-03-09-orig/drivers/net/wireless/rt2x00/rt2800mmio.c	2015-06-16 13:02:30.000000000 +0300
> +--- compat-wireless-2015-03-09/drivers/net/wireless/rt2x00/rt2800mmio.c	2015-09-04 11:50:09.665148666 +0300
> +***************
> +*** 700,706 ****
> +  
> +  	switch (queue->qid) {
> +  	case QID_RX:
> +! 		queue->limit = 128;
> +  		queue->data_size = AGGREGATION_SIZE;
> +  		queue->desc_size = RXD_DESC_SIZE;
> +  		queue->winfo_size = rxwi_size;
> +--- 700,706 ----
> +  
> +  	switch (queue->qid) {
> +  	case QID_RX:
> +! 		queue->limit = 256;
> +  		queue->data_size = AGGREGATION_SIZE;
> +  		queue->desc_size = RXD_DESC_SIZE;
> +  		queue->winfo_size = rxwi_size;
> +***************
> +*** 711,717 ****
> +  	case QID_AC_VI:
> +  	case QID_AC_BE:
> +  	case QID_AC_BK:
> +! 		queue->limit = 64;
> +  		queue->data_size = AGGREGATION_SIZE;
> +  		queue->desc_size = TXD_DESC_SIZE;
> +  		queue->winfo_size = txwi_size;
> +--- 711,717 ----
> +  	case QID_AC_VI:
> +  	case QID_AC_BE:
> +  	case QID_AC_BK:
> +! 		queue->limit = 256;
> +  		queue->data_size = AGGREGATION_SIZE;
> +  		queue->desc_size = TXD_DESC_SIZE;
> +  		queue->winfo_size = txwi_size;
Wrong patch style, please run make package/mac80211/refresh.

- Felix
N.Leiten Sept. 11, 2015, 12:08 p.m. UTC | #3
Hi,

Ok, I'll do it.

In email dated Пятница - 11 сентября 2015 13:39:54 user John Crispin wrote:
> Hi,
> 
> before looking at this in detail, could please send a properly formatted
> patch. the patch you sent is not using the unified option when
> generating it. upstream linux-wireless wont accept it otherwise and we
> wont either.
> 
> also please prefix the email with "mac80211: " so that we know which
> subsystem it applies to.
> 
> that being said, the bug has been annoying me for eons and i never had
> time to fix it so thanks for the time debugging this :)
> 
> 	John
> 
> 
> On 11/09/2015 13:33, N.Leiten wrote:
> > Fix instability of Ralink WiFi general queue management on high load.
> > rt2x00 driver logs in dmesg "Dropping frame due to full queue ..." several times and at some point get stuck.
> > 
> > Solutions in patch:
> > 1) Increasing number of frames in each TX queue helps with speed and decreases queue overflows. Actually 256 frames can be increased to 512 (this number of frames used in proprietary drivers for every queue).
> > 2) Setting number of frames in TX/RX queues to equal values resolves async speed behaviour with better RX on AP-side (uplink from STAs), where it needs to be at least equal or better on TX queue on AP (download to STA).
> > 3) In rt2x00mac.c additional check for queue full added and reassignment in this case, so interface will not drop frame.
> > 4) Fixes in queue initialization. Default values for AC_BK, AC_BE, AC_VI, AC_VO set from WMM.
> > 
> > Tested on RT3883, RT5350, MT7620 SoCs and on RT3092 pcie interface for 10 days.
> > 
> > I'm planning to send this patch to mac80211 soon, but need to be sure that it works on other Ralink/Mediatek platforms and it's appropriate to do so.
> > 
> > 
> > Signed-off-by: Nick Leiten <nickleiten@gmail.com>
> > diff --git a/package/kernel/mac80211/patches/999-rt2x00-queue-update.patch b/package/kernel/mac80211/patches/999-rt2x00-queue-update.patch
> > new file mode 100644
> > index 0000000..9239bec
> > --- /dev/null
> > +++ b/package/kernel/mac80211/patches/999-rt2x00-queue-update.patch
> > @@ -0,0 +1,142 @@
> > +Only in compat-wireless-2015-03-09/drivers/net/wireless/rt2x00: limit
> > +diff -c -r compat-wireless-2015-03-09-orig/drivers/net/wireless/rt2x00/rt2800mmio.c compat-wireless-2015-03-09/drivers/net/wireless/rt2x00/rt2800mmio.c
> > +*** compat-wireless-2015-03-09-orig/drivers/net/wireless/rt2x00/rt2800mmio.c	2015-06-16 13:02:30.000000000 +0300
> > +--- compat-wireless-2015-03-09/drivers/net/wireless/rt2x00/rt2800mmio.c	2015-09-04 11:50:09.665148666 +0300
> > +***************
> > +*** 700,706 ****
> > +  
> > +  	switch (queue->qid) {
> > +  	case QID_RX:
> > +! 		queue->limit = 128;
> > +  		queue->data_size = AGGREGATION_SIZE;
> > +  		queue->desc_size = RXD_DESC_SIZE;
> > +  		queue->winfo_size = rxwi_size;
> > +--- 700,706 ----
> > +  
> > +  	switch (queue->qid) {
> > +  	case QID_RX:
> > +! 		queue->limit = 256;
> > +  		queue->data_size = AGGREGATION_SIZE;
> > +  		queue->desc_size = RXD_DESC_SIZE;
> > +  		queue->winfo_size = rxwi_size;
> > +***************
> > +*** 711,717 ****
> > +  	case QID_AC_VI:
> > +  	case QID_AC_BE:
> > +  	case QID_AC_BK:
> > +! 		queue->limit = 64;
> > +  		queue->data_size = AGGREGATION_SIZE;
> > +  		queue->desc_size = TXD_DESC_SIZE;
> > +  		queue->winfo_size = txwi_size;
> > +--- 711,717 ----
> > +  	case QID_AC_VI:
> > +  	case QID_AC_BE:
> > +  	case QID_AC_BK:
> > +! 		queue->limit = 256;
> > +  		queue->data_size = AGGREGATION_SIZE;
> > +  		queue->desc_size = TXD_DESC_SIZE;
> > +  		queue->winfo_size = txwi_size;
> > +diff -c -r compat-wireless-2015-03-09-orig/drivers/net/wireless/rt2x00/rt2x00mac.c compat-wireless-2015-03-09/drivers/net/wireless/rt2x00/rt2x00mac.c
> > +*** compat-wireless-2015-03-09-orig/drivers/net/wireless/rt2x00/rt2x00mac.c	2015-06-16 13:02:30.000000000 +0300
> > +--- compat-wireless-2015-03-09/drivers/net/wireless/rt2x00/rt2x00mac.c	2015-09-04 11:47:45.845449209 +0300
> > +***************
> > +*** 26,31 ****
> > +--- 26,32 ----
> > +  
> > +  #include "rt2x00.h"
> > +  #include "rt2x00lib.h"
> > ++ #include "rt2x00queue.h"
> > +  
> > +  static int rt2x00mac_tx_rts_cts(struct rt2x00_dev *rt2x00dev,
> > +  				struct data_queue *queue,
> > +***************
> > +*** 115,120 ****
> > +--- 116,141 ----
> > +  	if (!test_bit(DEVICE_STATE_PRESENT, &rt2x00dev->flags))
> > +  		goto exit_free_skb;
> > +  
> > ++ 	/* Dirty hack for Queue overrun protection,
> > ++ 	 * if AC_VO/AC_VI/AC_BE is full, use next queue.
> > ++ 	 * if AC_BK is full use previous queue.
> > ++ 	 */
> > ++ 	if (qid < 4) {
> > ++ 		queue = rt2x00queue_get_tx_queue(rt2x00dev,qid);
> > ++ 		if (unlikely(rt2x00queue_full(queue))){
> > ++ 			switch(qid){
> > ++ 				case 0: /* QID_AC_VO */
> > ++ 				case 1: /* QID_AC_VI */
> > ++ 				case 2: /* QID_AC_BE */
> > ++ 					qid++;
> > ++ 					break;
> > ++ 				case 3: /* QID_AC_BK */
> > ++ 					qid--;
> > ++ 					break;
> > ++ 			}
> > ++ 		}
> > ++ 	}
> > ++ 	
> > +  	/*
> > +  	 * Use the ATIM queue if appropriate and present.
> > +  	 */
> > +diff -c -r compat-wireless-2015-03-09-orig/drivers/net/wireless/rt2x00/rt2x00queue.c compat-wireless-2015-03-09/drivers/net/wireless/rt2x00/rt2x00queue.c
> > +*** compat-wireless-2015-03-09-orig/drivers/net/wireless/rt2x00/rt2x00queue.c	2015-03-10 05:37:16.000000000 +0200
> > +--- compat-wireless-2015-03-09/drivers/net/wireless/rt2x00/rt2x00queue.c	2015-09-04 11:49:37.911446071 +0300
> > +***************
> > +*** 1213,1228 ****
> > +  static void rt2x00queue_init(struct rt2x00_dev *rt2x00dev,
> > +  			     struct data_queue *queue, enum data_queue_qid qid)
> > +  {
> > +  	mutex_init(&queue->status_lock);
> > +  	spin_lock_init(&queue->tx_lock);
> > +  	spin_lock_init(&queue->index_lock);
> > +  
> > +  	queue->rt2x00dev = rt2x00dev;
> > +  	queue->qid = qid;
> > +! 	queue->txop = 0;
> > +! 	queue->aifs = 2;
> > +! 	queue->cw_min = 5;
> > +! 	queue->cw_max = 10;
> > +  
> > +  	rt2x00dev->ops->queue_init(queue);
> > +  
> > +--- 1213,1252 ----
> > +  static void rt2x00queue_init(struct rt2x00_dev *rt2x00dev,
> > +  			     struct data_queue *queue, enum data_queue_qid qid)
> > +  {
> > ++ 	/* Use default values for each queue type */
> > ++ 	unsigned short cwmin = 5, cwmax = 10, aifs = 2, txop = 0;
> > ++ 	
> > ++ 	switch (qid) {
> > ++ 		case QID_AC_VO:
> > ++ 			cwmin = 2;
> > ++ 			cwmax = 3;
> > ++ 			txop = 47;
> > ++ 			break;
> > ++ 		case QID_AC_VI:
> > ++ 			cwmin = 3;
> > ++ 			cwmax = 4;
> > ++ 			txop = 94;
> > ++ 			break;
> > ++ 		case QID_AC_BE:
> > ++ 			cwmin = 4;
> > ++ 			aifs = 3;
> > ++ 			break;
> > ++ 		case QID_AC_BK:
> > ++ 			cwmin = 4;
> > ++ 			aifs = 7;
> > ++ 			break;
> > ++ 	}
> > ++ 	
> > +  	mutex_init(&queue->status_lock);
> > +  	spin_lock_init(&queue->tx_lock);
> > +  	spin_lock_init(&queue->index_lock);
> > +  
> > +  	queue->rt2x00dev = rt2x00dev;
> > +  	queue->qid = qid;
> > +! 	queue->txop = txop;
> > +! 	queue->aifs = aifs;
> > +! 	queue->cw_min = cwmin;
> > +! 	queue->cw_max = cwmax;
> > +  
> > +  	rt2x00dev->ops->queue_init(queue);
> > +  
> > _______________________________________________
> > openwrt-devel mailing list
> > openwrt-devel@lists.openwrt.org
> > https://lists.openwrt.org/cgi-bin/mailman/listinfo/openwrt-devel
> > 
> _______________________________________________
> openwrt-devel mailing list
> openwrt-devel@lists.openwrt.org
> https://lists.openwrt.org/cgi-bin/mailman/listinfo/openwrt-devel
N.Leiten Sept. 11, 2015, 12:43 p.m. UTC | #4
In email dated Пятница - 11 сентября 2015 13:49:26 user Felix Fietkau wrote:
> On 2015-09-11 13:33, N.Leiten wrote:
> > Fix instability of Ralink WiFi general queue management on high load.
> > rt2x00 driver logs in dmesg "Dropping frame due to full queue ..." several times and at some point get stuck.
> > 
> > Solutions in patch:
> > 1) Increasing number of frames in each TX queue helps with speed and
> > decreases queue overflows. Actually 256 frames can be increased to
> > 512 (this number of frames used in proprietary drivers for every
> > queue).
> 512 frames seems to be overly excessive. Ever heard of bufferbloat?
> It seems to me that the driver should simply call ieee80211_stop_queue
> earlier to ensure that mac80211 does not attempt to fill the queues as
> much. The driver queue length should probably be around 64 or less.
> 

I agree. The patch was made for internal use on own hardware reference design in our company, so I had to look through proprietary and opensource drivers to see the difference. The point was to use recent versions of OpenWRT with mac80211 stack because proprietary still uses linux-2.6. And it makes porting full of pain in nails and bloody eyes reading that piece of code. For now mac80211 is preferrable.
As to bufferbloat - yes I'm aware of it. Maybe I'm wrong in observations, but simply increasing queue num to 128 frames make driver more stable but not enough, so I started digging in ralink drivers for maximum value, tried it and it increased stability even more, but still hang at some point. So I decreased it to 256. Well, I'll try to remove this part and test again.

> > 2) Setting number of frames in TX/RX queues to equal values
> > resolves async speed behaviour with better RX on AP-side (uplink from
> > STAs), where it needs to be at least equal or better on TX queue on
> > AP (download to STA).
> That also doesn't make much sense to me as queueing behavior is
> completely different between rx and tx. Rx queue processing speed is
> determined by CPU speed, Tx queue processing speed is determined by
> shared medium speed.
> 

I agree, that was another step in digging. As I can understand it's better even use NAPI for RX queue. But I found only one mention of it in mac80211 sources, need to explore this.
I'll remove it, test again and make new patch. Can you direct me with this kind of behaviour? On rt3092 (2T2R) we can get 30-40Mbit/s download and 80-120Mbit/s upload speeds in separate tests, on 1T1R and 2T2R stations.

> > 3) In rt2x00mac.c additional check for queue
> > full added and reassignment in this case, so interface will not drop
> > frame.
> Why? If the queues are full, it's better to just drop packets instead of
> making the problem worse.
> 

It was the simpliest solution at this point. I'll try to use ieee80211_stop_queue instead, but don't know how much time it'll consume.
Maybe there's problem in "kick/pause" queue mechanism? I managed to explore behaviour after hang-point. It seems that only QID_AC_* queues stuck, station assoc/auth goes fine but actual data not transmitted on AP-side, but it receives DHCP-requests from station and it looks like no network access on hanged AP.

> > 4) Fixes in queue initialization. Default values for AC_BK,
> > AC_BE, AC_VI, AC_VO set from WMM.
> Why do you hardcode that stuff inside the driver? What difference do
> these values make?
> 
> > Tested on RT3883, RT5350, MT7620 SoCs and on RT3092 pcie interface for 10 days.
> > 
> > I'm planning to send this patch to mac80211 soon, but need to be sure that it works on other Ralink/Mediatek platforms and it's appropriate to do so.
> > 
> > 
> > Signed-off-by: Nick Leiten <nickleiten@gmail.com>
> > diff --git a/package/kernel/mac80211/patches/999-rt2x00-queue-update.patch b/package/kernel/mac80211/patches/999-rt2x00-queue-update.patch
> > new file mode 100644
> > index 0000000..9239bec
> > --- /dev/null
> > +++ b/package/kernel/mac80211/patches/999-rt2x00-queue-update.patch
> > @@ -0,0 +1,142 @@
> > +Only in compat-wireless-2015-03-09/drivers/net/wireless/rt2x00: limit
> > +diff -c -r compat-wireless-2015-03-09-orig/drivers/net/wireless/rt2x00/rt2800mmio.c compat-wireless-2015-03-09/drivers/net/wireless/rt2x00/rt2800mmio.c
> > +*** compat-wireless-2015-03-09-orig/drivers/net/wireless/rt2x00/rt2800mmio.c	2015-06-16 13:02:30.000000000 +0300
> > +--- compat-wireless-2015-03-09/drivers/net/wireless/rt2x00/rt2800mmio.c	2015-09-04 11:50:09.665148666 +0300
> > +***************
> > +*** 700,706 ****
> > +  
> > +  	switch (queue->qid) {
> > +  	case QID_RX:
> > +! 		queue->limit = 128;
> > +  		queue->data_size = AGGREGATION_SIZE;
> > +  		queue->desc_size = RXD_DESC_SIZE;
> > +  		queue->winfo_size = rxwi_size;
> > +--- 700,706 ----
> > +  
> > +  	switch (queue->qid) {
> > +  	case QID_RX:
> > +! 		queue->limit = 256;
> > +  		queue->data_size = AGGREGATION_SIZE;
> > +  		queue->desc_size = RXD_DESC_SIZE;
> > +  		queue->winfo_size = rxwi_size;
> > +***************
> > +*** 711,717 ****
> > +  	case QID_AC_VI:
> > +  	case QID_AC_BE:
> > +  	case QID_AC_BK:
> > +! 		queue->limit = 64;
> > +  		queue->data_size = AGGREGATION_SIZE;
> > +  		queue->desc_size = TXD_DESC_SIZE;
> > +  		queue->winfo_size = txwi_size;
> > +--- 711,717 ----
> > +  	case QID_AC_VI:
> > +  	case QID_AC_BE:
> > +  	case QID_AC_BK:
> > +! 		queue->limit = 256;
> > +  		queue->data_size = AGGREGATION_SIZE;
> > +  		queue->desc_size = TXD_DESC_SIZE;
> > +  		queue->winfo_size = txwi_size;
> Wrong patch style, please run make package/mac80211/refresh.
> 
Ok, will fix it.
> - Felix
Mikko Hissa Sept. 11, 2015, 2:16 p.m. UTC | #5
> On 11 Sep 2015, at 15:43, N.Leiten <nickleiten@gmail.com> wrote:
> 
> In email dated Пятница - 11 сентября 2015 13:49:26 user Felix Fietkau wrote:
>> On 2015-09-11 13:33, N.Leiten wrote:
>>> Fix instability of Ralink WiFi general queue management on high load.
>>> rt2x00 driver logs in dmesg "Dropping frame due to full queue ..." several times and at some point get stuck.
>>> 
>>> Solutions in patch:
>>> 1) Increasing number of frames in each TX queue helps with speed and
>>> decreases queue overflows. Actually 256 frames can be increased to
>>> 512 (this number of frames used in proprietary drivers for every
>>> queue).
>> 512 frames seems to be overly excessive. Ever heard of bufferbloat?
>> It seems to me that the driver should simply call ieee80211_stop_queue
>> earlier to ensure that mac80211 does not attempt to fill the queues as
>> much. The driver queue length should probably be around 64 or less.
>> 
> 
> I agree. The patch was made for internal use on own hardware reference design in our company, so I had to look through proprietary and opensource drivers to see the difference. The point was to use recent versions of OpenWRT with mac80211 stack because proprietary still uses linux-2.6. And it makes porting full of pain in nails and bloody eyes reading that piece of code. For now mac80211 is preferrable.
> As to bufferbloat - yes I'm aware of it. Maybe I'm wrong in observations, but simply increasing queue num to 128 frames make driver more stable but not enough, so I started digging in ralink drivers for maximum value, tried it and it increased stability even more, but still hang at some point. So I decreased it to 256. Well, I'll try to remove this part and test again.

I think I solved the stuck tx-queue issue a few years ago by increasing the size of the txstatus fifo. Now looking at the code, I suggest you to try the following. 

in rt2800mmio.c:
	static void rt2800mmio_txstatus_interrupt(struct rt2x00_dev *rt2x00dev…
		I don’t know to what end was this added here but anyways remove the read limit, since it’s obvious that there might be more than just one tx-queue being used. Exiting the interrupt handler here with half full hw fifo (16 entries) will lead to lost txstatuses for sure:
		if (++i >= rt2x00dev->tx->limit)
                        break;

in rt2x00dev.c
	static int rt2x00lib_probe_hw(struct rt2x00_dev *rt2x00dev)…
		I used a static value here, but it seems that the hw might generate excess txstatuses (who knows why). So bigger is better!
		-int kfifo_size = roundup_pow_of_two(rt2x00dev->ops->tx_queues * rt2x00dev->tx->limit * sizeof(u32));
		+int kfifo_size = roundup_pow_of_two(2048 * sizeof(u32));

With these you should no longer be experiencing stuck tx-queues!

> 
>>> 2) Setting number of frames in TX/RX queues to equal values
>>> resolves async speed behaviour with better RX on AP-side (uplink from
>>> STAs), where it needs to be at least equal or better on TX queue on
>>> AP (download to STA).
>> That also doesn't make much sense to me as queueing behavior is
>> completely different between rx and tx. Rx queue processing speed is
>> determined by CPU speed, Tx queue processing speed is determined by
>> shared medium speed.
>> 
> 
> I agree, that was another step in digging. As I can understand it's better even use NAPI for RX queue. But I found only one mention of it in mac80211 sources, need to explore this.

The driver already uses tasklets and rx-interrupts stay disabled until the driver "catches up” with the rx-queue. So, there’s little to none performance gain here with napi. Although, I did see a noticeable performance increase by enabling delayed rx-interrupts from the hw. As if the hw is sending a BA every time when RX_CRX_IDX register is written? 

> I'll remove it, test again and make new patch. Can you direct me with this kind of behaviour? On rt3092 (2T2R) we can get 30-40Mbit/s download and 80-120Mbit/s upload speeds in separate tests, on 1T1R and 2T2R stations.
> 
>>> 3) In rt2x00mac.c additional check for queue
>>> full added and reassignment in this case, so interface will not drop
>>> frame.
>> Why? If the queues are full, it's better to just drop packets instead of
>> making the problem worse.
>> 
> 
> It was the simpliest solution at this point. I'll try to use ieee80211_stop_queue instead, but don't know how much time it'll consume.
> Maybe there's problem in "kick/pause" queue mechanism? I managed to explore behaviour after hang-point. It seems that only QID_AC_* queues stuck, station assoc/auth goes fine but actual data not transmitted on AP-side, but it receives DHCP-requests from station and it looks like no network access on hanged AP.

When the queue is stuck, you can see that the queue is filled to it’s threshold and it’s paused. So, the kick&pause works as it should.

> 
>>> 4) Fixes in queue initialization. Default values for AC_BK,
>>> AC_BE, AC_VI, AC_VO set from WMM.
>> Why do you hardcode that stuff inside the driver? What difference do
>> these values make?
>> 
>>> Tested on RT3883, RT5350, MT7620 SoCs and on RT3092 pcie interface for 10 days.
>>> 
>>> I'm planning to send this patch to mac80211 soon, but need to be sure that it works on other Ralink/Mediatek platforms and it's appropriate to do so.
>>> 
>>> 
>>> Signed-off-by: Nick Leiten <nickleiten@gmail.com>
>>> diff --git a/package/kernel/mac80211/patches/999-rt2x00-queue-update.patch b/package/kernel/mac80211/patches/999-rt2x00-queue-update.patch
>>> new file mode 100644
>>> index 0000000..9239bec
>>> --- /dev/null
>>> +++ b/package/kernel/mac80211/patches/999-rt2x00-queue-update.patch
>>> @@ -0,0 +1,142 @@
>>> +Only in compat-wireless-2015-03-09/drivers/net/wireless/rt2x00: limit
>>> +diff -c -r compat-wireless-2015-03-09-orig/drivers/net/wireless/rt2x00/rt2800mmio.c compat-wireless-2015-03-09/drivers/net/wireless/rt2x00/rt2800mmio.c
>>> +*** compat-wireless-2015-03-09-orig/drivers/net/wireless/rt2x00/rt2800mmio.c	2015-06-16 13:02:30.000000000 +0300
>>> +--- compat-wireless-2015-03-09/drivers/net/wireless/rt2x00/rt2800mmio.c	2015-09-04 11:50:09.665148666 +0300
>>> +***************
>>> +*** 700,706 ****
>>> +  
>>> +  	switch (queue->qid) {
>>> +  	case QID_RX:
>>> +! 		queue->limit = 128;
>>> +  		queue->data_size = AGGREGATION_SIZE;
>>> +  		queue->desc_size = RXD_DESC_SIZE;
>>> +  		queue->winfo_size = rxwi_size;
>>> +--- 700,706 ----
>>> +  
>>> +  	switch (queue->qid) {
>>> +  	case QID_RX:
>>> +! 		queue->limit = 256;
>>> +  		queue->data_size = AGGREGATION_SIZE;
>>> +  		queue->desc_size = RXD_DESC_SIZE;
>>> +  		queue->winfo_size = rxwi_size;
>>> +***************
>>> +*** 711,717 ****
>>> +  	case QID_AC_VI:
>>> +  	case QID_AC_BE:
>>> +  	case QID_AC_BK:
>>> +! 		queue->limit = 64;
>>> +  		queue->data_size = AGGREGATION_SIZE;
>>> +  		queue->desc_size = TXD_DESC_SIZE;
>>> +  		queue->winfo_size = txwi_size;
>>> +--- 711,717 ----
>>> +  	case QID_AC_VI:
>>> +  	case QID_AC_BE:
>>> +  	case QID_AC_BK:
>>> +! 		queue->limit = 256;
>>> +  		queue->data_size = AGGREGATION_SIZE;
>>> +  		queue->desc_size = TXD_DESC_SIZE;
>>> +  		queue->winfo_size = txwi_size;
>> Wrong patch style, please run make package/mac80211/refresh.
>> 
> Ok, will fix it.
>> - Felix
> _______________________________________________
> openwrt-devel mailing list
> openwrt-devel@lists.openwrt.org <mailto:openwrt-devel@lists.openwrt.org>
> https://lists.openwrt.org/cgi-bin/mailman/listinfo/openwrt-devel <https://lists.openwrt.org/cgi-bin/mailman/listinfo/openwrt-devel>
N.Leiten Sept. 11, 2015, 6:32 p.m. UTC | #6
Hi.

In email dated Пятница - 11 сентября 2015 17:16:35 user Mikko Hissa wrote:
> 
> > On 11 Sep 2015, at 15:43, N.Leiten <nickleiten@gmail.com> wrote:
> > 
> > In email dated Пятница - 11 сентября 2015 13:49:26 user Felix Fietkau wrote:
> >> On 2015-09-11 13:33, N.Leiten wrote:
> >>> Fix instability of Ralink WiFi general queue management on high load.
> >>> rt2x00 driver logs in dmesg "Dropping frame due to full queue ..." several times and at some point get stuck.
> >>> 
> >>> Solutions in patch:
> >>> 1) Increasing number of frames in each TX queue helps with speed and
> >>> decreases queue overflows. Actually 256 frames can be increased to
> >>> 512 (this number of frames used in proprietary drivers for every
> >>> queue).
> >> 512 frames seems to be overly excessive. Ever heard of bufferbloat?
> >> It seems to me that the driver should simply call ieee80211_stop_queue
> >> earlier to ensure that mac80211 does not attempt to fill the queues as
> >> much. The driver queue length should probably be around 64 or less.
> >> 
> > 
> > I agree. The patch was made for internal use on own hardware reference design in our company, so I had to look through proprietary and opensource drivers to see the difference. The point was to use recent versions of OpenWRT with mac80211 stack because proprietary still uses linux-2.6. And it makes porting full of pain in nails and bloody eyes reading that piece of code. For now mac80211 is preferrable.
> > As to bufferbloat - yes I'm aware of it. Maybe I'm wrong in observations, but simply increasing queue num to 128 frames make driver more stable but not enough, so I started digging in ralink drivers for maximum value, tried it and it increased stability even more, but still hang at some point. So I decreased it to 256. Well, I'll try to remove this part and test again.
> 
> I think I solved the stuck tx-queue issue a few years ago by increasing the size of the txstatus fifo. Now looking at the code, I suggest you to try the following. 
> 
> in rt2800mmio.c:
> 	static void rt2800mmio_txstatus_interrupt(struct rt2x00_dev *rt2x00dev…
> 		I don’t know to what end was this added here but anyways remove the read limit, since it’s obvious that there might be more than just one tx-queue being used. Exiting the interrupt handler here with half full hw fifo (16 entries) will lead to lost txstatuses for sure:
> 		if (++i >= rt2x00dev->tx->limit)
>                         break;
> 
Ok, I'll try that.

> in rt2x00dev.c
> 	static int rt2x00lib_probe_hw(struct rt2x00_dev *rt2x00dev)…
> 		I used a static value here, but it seems that the hw might generate excess txstatuses (who knows why). So bigger is better!
> 		-int kfifo_size = roundup_pow_of_two(rt2x00dev->ops->tx_queues * rt2x00dev->tx->limit * sizeof(u32));
> 		+int kfifo_size = roundup_pow_of_two(2048 * sizeof(u32));
> 
> With these you should no longer be experiencing stuck tx-queues!
> 

Actually,
   int kfifo_size = roundup_pow_of_two(2048 * sizeof(u32));
is the same way I go, but I increase rt2x00dev->tx->limit to 512 at first place and after tests decreased it to 256. So in math we got the same value  there (4 queues * 512 frames = 2048).

> > 
> >>> 2) Setting number of frames in TX/RX queues to equal values
> >>> resolves async speed behaviour with better RX on AP-side (uplink from
> >>> STAs), where it needs to be at least equal or better on TX queue on
> >>> AP (download to STA).
> >> That also doesn't make much sense to me as queueing behavior is
> >> completely different between rx and tx. Rx queue processing speed is
> >> determined by CPU speed, Tx queue processing speed is determined by
> >> shared medium speed.
> >> 
> > 
> > I agree, that was another step in digging. As I can understand it's better even use NAPI for RX queue. But I found only one mention of it in mac80211 sources, need to explore this.
> 
> The driver already uses tasklets and rx-interrupts stay disabled until the driver "catches up” with the rx-queue. So, there’s little to none performance gain here with napi. Although, I did see a noticeable performance increase by enabling delayed rx-interrupts from the hw. As if the hw is sending a BA every time when RX_CRX_IDX register is written? 
> 
> > I'll remove it, test again and make new patch. Can you direct me with this kind of behaviour? On rt3092 (2T2R) we can get 30-40Mbit/s download and 80-120Mbit/s upload speeds in separate tests, on 1T1R and 2T2R stations.
> > 
> >>> 3) In rt2x00mac.c additional check for queue
> >>> full added and reassignment in this case, so interface will not drop
> >>> frame.
> >> Why? If the queues are full, it's better to just drop packets instead of
> >> making the problem worse.
> >> 
> > 
> > It was the simpliest solution at this point. I'll try to use ieee80211_stop_queue instead, but don't know how much time it'll consume.
> > Maybe there's problem in "kick/pause" queue mechanism? I managed to explore behaviour after hang-point. It seems that only QID_AC_* queues stuck, station assoc/auth goes fine but actual data not transmitted on AP-side, but it receives DHCP-requests from station and it looks like no network access on hanged AP.
> 
> When the queue is stuck, you can see that the queue is filled to it’s threshold and it’s paused. So, the kick&pause works as it should.
> 
> > 
> >>> 4) Fixes in queue initialization. Default values for AC_BK,
> >>> AC_BE, AC_VI, AC_VO set from WMM.
> >> Why do you hardcode that stuff inside the driver? What difference do
> >> these values make?
> >> 
> >>> Tested on RT3883, RT5350, MT7620 SoCs and on RT3092 pcie interface for 10 days.
> >>> 
> >>> I'm planning to send this patch to mac80211 soon, but need to be sure that it works on other Ralink/Mediatek platforms and it's appropriate to do so.
> >>> 
> >>> 
> >>> Signed-off-by: Nick Leiten <nickleiten@gmail.com>
> >>> diff --git a/package/kernel/mac80211/patches/999-rt2x00-queue-update.patch b/package/kernel/mac80211/patches/999-rt2x00-queue-update.patch
> >>> new file mode 100644
> >>> index 0000000..9239bec
> >>> --- /dev/null
> >>> +++ b/package/kernel/mac80211/patches/999-rt2x00-queue-update.patch
> >>> @@ -0,0 +1,142 @@
> >>> +Only in compat-wireless-2015-03-09/drivers/net/wireless/rt2x00: limit
> >>> +diff -c -r compat-wireless-2015-03-09-orig/drivers/net/wireless/rt2x00/rt2800mmio.c compat-wireless-2015-03-09/drivers/net/wireless/rt2x00/rt2800mmio.c
> >>> +*** compat-wireless-2015-03-09-orig/drivers/net/wireless/rt2x00/rt2800mmio.c	2015-06-16 13:02:30.000000000 +0300
> >>> +--- compat-wireless-2015-03-09/drivers/net/wireless/rt2x00/rt2800mmio.c	2015-09-04 11:50:09.665148666 +0300
> >>> +***************
> >>> +*** 700,706 ****
> >>> +  
> >>> +  	switch (queue->qid) {
> >>> +  	case QID_RX:
> >>> +! 		queue->limit = 128;
> >>> +  		queue->data_size = AGGREGATION_SIZE;
> >>> +  		queue->desc_size = RXD_DESC_SIZE;
> >>> +  		queue->winfo_size = rxwi_size;
> >>> +--- 700,706 ----
> >>> +  
> >>> +  	switch (queue->qid) {
> >>> +  	case QID_RX:
> >>> +! 		queue->limit = 256;
> >>> +  		queue->data_size = AGGREGATION_SIZE;
> >>> +  		queue->desc_size = RXD_DESC_SIZE;
> >>> +  		queue->winfo_size = rxwi_size;
> >>> +***************
> >>> +*** 711,717 ****
> >>> +  	case QID_AC_VI:
> >>> +  	case QID_AC_BE:
> >>> +  	case QID_AC_BK:
> >>> +! 		queue->limit = 64;
> >>> +  		queue->data_size = AGGREGATION_SIZE;
> >>> +  		queue->desc_size = TXD_DESC_SIZE;
> >>> +  		queue->winfo_size = txwi_size;
> >>> +--- 711,717 ----
> >>> +  	case QID_AC_VI:
> >>> +  	case QID_AC_BE:
> >>> +  	case QID_AC_BK:
> >>> +! 		queue->limit = 256;
> >>> +  		queue->data_size = AGGREGATION_SIZE;
> >>> +  		queue->desc_size = TXD_DESC_SIZE;
> >>> +  		queue->winfo_size = txwi_size;
> >> Wrong patch style, please run make package/mac80211/refresh.
> >> 
> > Ok, will fix it.
> >> - Felix
> > _______________________________________________
> > openwrt-devel mailing list
> > openwrt-devel@lists.openwrt.org <mailto:openwrt-devel@lists.openwrt.org>
> > https://lists.openwrt.org/cgi-bin/mailman/listinfo/openwrt-devel <https://lists.openwrt.org/cgi-bin/mailman/listinfo/openwrt-devel>
Mikko Hissa Sept. 11, 2015, 8 p.m. UTC | #7
> On 11 Sep 2015, at 21:32, N.Leiten <nickleiten@gmail.com> wrote:
> 
> Hi.
> 
> In email dated Пятница - 11 сентября 2015 17:16:35 user Mikko Hissa wrote:
>> 
>>> On 11 Sep 2015, at 15:43, N.Leiten <nickleiten@gmail.com> wrote:
>>> 
>>> In email dated Пятница - 11 сентября 2015 13:49:26 user Felix Fietkau wrote:
>>>> On 2015-09-11 13:33, N.Leiten wrote:
>>>>> Fix instability of Ralink WiFi general queue management on high load.
>>>>> rt2x00 driver logs in dmesg "Dropping frame due to full queue ..." several times and at some point get stuck.
>>>>> 
>>>>> Solutions in patch:
>>>>> 1) Increasing number of frames in each TX queue helps with speed and
>>>>> decreases queue overflows. Actually 256 frames can be increased to
>>>>> 512 (this number of frames used in proprietary drivers for every
>>>>> queue).
>>>> 512 frames seems to be overly excessive. Ever heard of bufferbloat?
>>>> It seems to me that the driver should simply call ieee80211_stop_queue
>>>> earlier to ensure that mac80211 does not attempt to fill the queues as
>>>> much. The driver queue length should probably be around 64 or less.
>>>> 
>>> 
>>> I agree. The patch was made for internal use on own hardware reference design in our company, so I had to look through proprietary and opensource drivers to see the difference. The point was to use recent versions of OpenWRT with mac80211 stack because proprietary still uses linux-2.6. And it makes porting full of pain in nails and bloody eyes reading that piece of code. For now mac80211 is preferrable.
>>> As to bufferbloat - yes I'm aware of it. Maybe I'm wrong in observations, but simply increasing queue num to 128 frames make driver more stable but not enough, so I started digging in ralink drivers for maximum value, tried it and it increased stability even more, but still hang at some point. So I decreased it to 256. Well, I'll try to remove this part and test again.
>> 
>> I think I solved the stuck tx-queue issue a few years ago by increasing the size of the txstatus fifo. Now looking at the code, I suggest you to try the following. 
>> 
>> in rt2800mmio.c:
>> 	static void rt2800mmio_txstatus_interrupt(struct rt2x00_dev *rt2x00dev…
>> 		I don’t know to what end was this added here but anyways remove the read limit, since it’s obvious that there might be more than just one tx-queue being used. Exiting the interrupt handler here with half full hw fifo (16 entries) will lead to lost txstatuses for sure:
>> 		if (++i >= rt2x00dev->tx->limit)
>>                        break;
>> 
> Ok, I'll try that.
> 
>> in rt2x00dev.c
>> 	static int rt2x00lib_probe_hw(struct rt2x00_dev *rt2x00dev)…
>> 		I used a static value here, but it seems that the hw might generate excess txstatuses (who knows why). So bigger is better!
>> 		-int kfifo_size = roundup_pow_of_two(rt2x00dev->ops->tx_queues * rt2x00dev->tx->limit * sizeof(u32));
>> 		+int kfifo_size = roundup_pow_of_two(2048 * sizeof(u32));
>> 
>> With these you should no longer be experiencing stuck tx-queues!
>> 
> 
> Actually,
>   int kfifo_size = roundup_pow_of_two(2048 * sizeof(u32));
> is the same way I go, but I increase rt2x00dev->tx->limit to 512 at first place and after tests decreased it to 256. So in math we got the same value  there (4 queues * 512 frames = 2048).

Exactly, 2048 is not enough if you are using that kind of queue size. That 2048 fifo size was meant for the default queue sizes.

> 
>>> 
>>>>> 2) Setting number of frames in TX/RX queues to equal values
>>>>> resolves async speed behaviour with better RX on AP-side (uplink from
>>>>> STAs), where it needs to be at least equal or better on TX queue on
>>>>> AP (download to STA).
>>>> That also doesn't make much sense to me as queueing behavior is
>>>> completely different between rx and tx. Rx queue processing speed is
>>>> determined by CPU speed, Tx queue processing speed is determined by
>>>> shared medium speed.
>>>> 
>>> 
>>> I agree, that was another step in digging. As I can understand it's better even use NAPI for RX queue. But I found only one mention of it in mac80211 sources, need to explore this.
>> 
>> The driver already uses tasklets and rx-interrupts stay disabled until the driver "catches up” with the rx-queue. So, there’s little to none performance gain here with napi. Although, I did see a noticeable performance increase by enabling delayed rx-interrupts from the hw. As if the hw is sending a BA every time when RX_CRX_IDX register is written? 
>> 
>>> I'll remove it, test again and make new patch. Can you direct me with this kind of behaviour? On rt3092 (2T2R) we can get 30-40Mbit/s download and 80-120Mbit/s upload speeds in separate tests, on 1T1R and 2T2R stations.
>>> 
>>>>> 3) In rt2x00mac.c additional check for queue
>>>>> full added and reassignment in this case, so interface will not drop
>>>>> frame.
>>>> Why? If the queues are full, it's better to just drop packets instead of
>>>> making the problem worse.
>>>> 
>>> 
>>> It was the simpliest solution at this point. I'll try to use ieee80211_stop_queue instead, but don't know how much time it'll consume.
>>> Maybe there's problem in "kick/pause" queue mechanism? I managed to explore behaviour after hang-point. It seems that only QID_AC_* queues stuck, station assoc/auth goes fine but actual data not transmitted on AP-side, but it receives DHCP-requests from station and it looks like no network access on hanged AP.
>> 
>> When the queue is stuck, you can see that the queue is filled to it’s threshold and it’s paused. So, the kick&pause works as it should.
>> 
>>> 
>>>>> 4) Fixes in queue initialization. Default values for AC_BK,
>>>>> AC_BE, AC_VI, AC_VO set from WMM.
>>>> Why do you hardcode that stuff inside the driver? What difference do
>>>> these values make?
>>>> 
>>>>> Tested on RT3883, RT5350, MT7620 SoCs and on RT3092 pcie interface for 10 days.
>>>>> 
>>>>> I'm planning to send this patch to mac80211 soon, but need to be sure that it works on other Ralink/Mediatek platforms and it's appropriate to do so.
>>>>> 
>>>>> 
>>>>> Signed-off-by: Nick Leiten <nickleiten@gmail.com>
>>>>> diff --git a/package/kernel/mac80211/patches/999-rt2x00-queue-update.patch b/package/kernel/mac80211/patches/999-rt2x00-queue-update.patch
>>>>> new file mode 100644
>>>>> index 0000000..9239bec
>>>>> --- /dev/null
>>>>> +++ b/package/kernel/mac80211/patches/999-rt2x00-queue-update.patch
>>>>> @@ -0,0 +1,142 @@
>>>>> +Only in compat-wireless-2015-03-09/drivers/net/wireless/rt2x00: limit
>>>>> +diff -c -r compat-wireless-2015-03-09-orig/drivers/net/wireless/rt2x00/rt2800mmio.c compat-wireless-2015-03-09/drivers/net/wireless/rt2x00/rt2800mmio.c
>>>>> +*** compat-wireless-2015-03-09-orig/drivers/net/wireless/rt2x00/rt2800mmio.c	2015-06-16 13:02:30.000000000 +0300
>>>>> +--- compat-wireless-2015-03-09/drivers/net/wireless/rt2x00/rt2800mmio.c	2015-09-04 11:50:09.665148666 +0300
>>>>> +***************
>>>>> +*** 700,706 ****
>>>>> +  
>>>>> +  	switch (queue->qid) {
>>>>> +  	case QID_RX:
>>>>> +! 		queue->limit = 128;
>>>>> +  		queue->data_size = AGGREGATION_SIZE;
>>>>> +  		queue->desc_size = RXD_DESC_SIZE;
>>>>> +  		queue->winfo_size = rxwi_size;
>>>>> +--- 700,706 ----
>>>>> +  
>>>>> +  	switch (queue->qid) {
>>>>> +  	case QID_RX:
>>>>> +! 		queue->limit = 256;
>>>>> +  		queue->data_size = AGGREGATION_SIZE;
>>>>> +  		queue->desc_size = RXD_DESC_SIZE;
>>>>> +  		queue->winfo_size = rxwi_size;
>>>>> +***************
>>>>> +*** 711,717 ****
>>>>> +  	case QID_AC_VI:
>>>>> +  	case QID_AC_BE:
>>>>> +  	case QID_AC_BK:
>>>>> +! 		queue->limit = 64;
>>>>> +  		queue->data_size = AGGREGATION_SIZE;
>>>>> +  		queue->desc_size = TXD_DESC_SIZE;
>>>>> +  		queue->winfo_size = txwi_size;
>>>>> +--- 711,717 ----
>>>>> +  	case QID_AC_VI:
>>>>> +  	case QID_AC_BE:
>>>>> +  	case QID_AC_BK:
>>>>> +! 		queue->limit = 256;
>>>>> +  		queue->data_size = AGGREGATION_SIZE;
>>>>> +  		queue->desc_size = TXD_DESC_SIZE;
>>>>> +  		queue->winfo_size = txwi_size;
>>>> Wrong patch style, please run make package/mac80211/refresh.
>>>> 
>>> Ok, will fix it.
>>>> - Felix
>>> _______________________________________________
>>> openwrt-devel mailing list
>>> openwrt-devel@lists.openwrt.org <mailto:openwrt-devel@lists.openwrt.org> <mailto:openwrt-devel@lists.openwrt.org <mailto:openwrt-devel@lists.openwrt.org>>
>>> https://lists.openwrt.org/cgi-bin/mailman/listinfo/openwrt-devel <https://lists.openwrt.org/cgi-bin/mailman/listinfo/openwrt-devel> <https://lists.openwrt.org/cgi-bin/mailman/listinfo/openwrt-devel <https://lists.openwrt.org/cgi-bin/mailman/listinfo/openwrt-devel>>
> _______________________________________________
> openwrt-devel mailing list
> openwrt-devel@lists.openwrt.org <mailto:openwrt-devel@lists.openwrt.org>
> https://lists.openwrt.org/cgi-bin/mailman/listinfo/openwrt-devel <https://lists.openwrt.org/cgi-bin/mailman/listinfo/openwrt-devel>
diff mbox

Patch

diff --git a/package/kernel/mac80211/patches/999-rt2x00-queue-update.patch b/package/kernel/mac80211/patches/999-rt2x00-queue-update.patch
new file mode 100644
index 0000000..9239bec
--- /dev/null
+++ b/package/kernel/mac80211/patches/999-rt2x00-queue-update.patch
@@ -0,0 +1,142 @@ 
+Only in compat-wireless-2015-03-09/drivers/net/wireless/rt2x00: limit
+diff -c -r compat-wireless-2015-03-09-orig/drivers/net/wireless/rt2x00/rt2800mmio.c compat-wireless-2015-03-09/drivers/net/wireless/rt2x00/rt2800mmio.c
+*** compat-wireless-2015-03-09-orig/drivers/net/wireless/rt2x00/rt2800mmio.c	2015-06-16 13:02:30.000000000 +0300
+--- compat-wireless-2015-03-09/drivers/net/wireless/rt2x00/rt2800mmio.c	2015-09-04 11:50:09.665148666 +0300
+***************
+*** 700,706 ****
+  
+  	switch (queue->qid) {
+  	case QID_RX:
+! 		queue->limit = 128;
+  		queue->data_size = AGGREGATION_SIZE;
+  		queue->desc_size = RXD_DESC_SIZE;
+  		queue->winfo_size = rxwi_size;
+--- 700,706 ----
+  
+  	switch (queue->qid) {
+  	case QID_RX:
+! 		queue->limit = 256;
+  		queue->data_size = AGGREGATION_SIZE;
+  		queue->desc_size = RXD_DESC_SIZE;
+  		queue->winfo_size = rxwi_size;
+***************
+*** 711,717 ****
+  	case QID_AC_VI:
+  	case QID_AC_BE:
+  	case QID_AC_BK:
+! 		queue->limit = 64;
+  		queue->data_size = AGGREGATION_SIZE;
+  		queue->desc_size = TXD_DESC_SIZE;
+  		queue->winfo_size = txwi_size;
+--- 711,717 ----
+  	case QID_AC_VI:
+  	case QID_AC_BE:
+  	case QID_AC_BK:
+! 		queue->limit = 256;
+  		queue->data_size = AGGREGATION_SIZE;
+  		queue->desc_size = TXD_DESC_SIZE;
+  		queue->winfo_size = txwi_size;
+diff -c -r compat-wireless-2015-03-09-orig/drivers/net/wireless/rt2x00/rt2x00mac.c compat-wireless-2015-03-09/drivers/net/wireless/rt2x00/rt2x00mac.c
+*** compat-wireless-2015-03-09-orig/drivers/net/wireless/rt2x00/rt2x00mac.c	2015-06-16 13:02:30.000000000 +0300
+--- compat-wireless-2015-03-09/drivers/net/wireless/rt2x00/rt2x00mac.c	2015-09-04 11:47:45.845449209 +0300
+***************
+*** 26,31 ****
+--- 26,32 ----
+  
+  #include "rt2x00.h"
+  #include "rt2x00lib.h"
++ #include "rt2x00queue.h"
+  
+  static int rt2x00mac_tx_rts_cts(struct rt2x00_dev *rt2x00dev,
+  				struct data_queue *queue,
+***************
+*** 115,120 ****
+--- 116,141 ----
+  	if (!test_bit(DEVICE_STATE_PRESENT, &rt2x00dev->flags))
+  		goto exit_free_skb;
+  
++ 	/* Dirty hack for Queue overrun protection,
++ 	 * if AC_VO/AC_VI/AC_BE is full, use next queue.
++ 	 * if AC_BK is full use previous queue.
++ 	 */
++ 	if (qid < 4) {
++ 		queue = rt2x00queue_get_tx_queue(rt2x00dev,qid);
++ 		if (unlikely(rt2x00queue_full(queue))){
++ 			switch(qid){
++ 				case 0: /* QID_AC_VO */
++ 				case 1: /* QID_AC_VI */
++ 				case 2: /* QID_AC_BE */
++ 					qid++;
++ 					break;
++ 				case 3: /* QID_AC_BK */
++ 					qid--;
++ 					break;
++ 			}
++ 		}
++ 	}
++ 	
+  	/*
+  	 * Use the ATIM queue if appropriate and present.
+  	 */
+diff -c -r compat-wireless-2015-03-09-orig/drivers/net/wireless/rt2x00/rt2x00queue.c compat-wireless-2015-03-09/drivers/net/wireless/rt2x00/rt2x00queue.c
+*** compat-wireless-2015-03-09-orig/drivers/net/wireless/rt2x00/rt2x00queue.c	2015-03-10 05:37:16.000000000 +0200
+--- compat-wireless-2015-03-09/drivers/net/wireless/rt2x00/rt2x00queue.c	2015-09-04 11:49:37.911446071 +0300
+***************
+*** 1213,1228 ****
+  static void rt2x00queue_init(struct rt2x00_dev *rt2x00dev,
+  			     struct data_queue *queue, enum data_queue_qid qid)
+  {
+  	mutex_init(&queue->status_lock);
+  	spin_lock_init(&queue->tx_lock);
+  	spin_lock_init(&queue->index_lock);
+  
+  	queue->rt2x00dev = rt2x00dev;
+  	queue->qid = qid;
+! 	queue->txop = 0;
+! 	queue->aifs = 2;
+! 	queue->cw_min = 5;
+! 	queue->cw_max = 10;
+  
+  	rt2x00dev->ops->queue_init(queue);
+  
+--- 1213,1252 ----
+  static void rt2x00queue_init(struct rt2x00_dev *rt2x00dev,
+  			     struct data_queue *queue, enum data_queue_qid qid)
+  {
++ 	/* Use default values for each queue type */
++ 	unsigned short cwmin = 5, cwmax = 10, aifs = 2, txop = 0;
++ 	
++ 	switch (qid) {
++ 		case QID_AC_VO:
++ 			cwmin = 2;
++ 			cwmax = 3;
++ 			txop = 47;
++ 			break;
++ 		case QID_AC_VI:
++ 			cwmin = 3;
++ 			cwmax = 4;
++ 			txop = 94;
++ 			break;
++ 		case QID_AC_BE:
++ 			cwmin = 4;
++ 			aifs = 3;
++ 			break;
++ 		case QID_AC_BK:
++ 			cwmin = 4;
++ 			aifs = 7;
++ 			break;
++ 	}
++ 	
+  	mutex_init(&queue->status_lock);
+  	spin_lock_init(&queue->tx_lock);
+  	spin_lock_init(&queue->index_lock);
+  
+  	queue->rt2x00dev = rt2x00dev;
+  	queue->qid = qid;
+! 	queue->txop = txop;
+! 	queue->aifs = aifs;
+! 	queue->cw_min = cwmin;
+! 	queue->cw_max = cwmax;
+  
+  	rt2x00dev->ops->queue_init(queue);
+