diff --git a/Documentation/networking/scaling.txt b/Documentation/networking/scaling.txt
index 579994a..f454564 100644
--- a/Documentation/networking/scaling.txt
+++ b/Documentation/networking/scaling.txt
@@ -135,6 +135,18 @@ packets have been queued to their backlog queue. The IPI wakes backlog
 processing on the remote CPU, and any queued packets are then processed
 up the networking stack.
 
+==== RPS Overflow Protection
+
+By selecting the same cpu from the cpuset for each packet in the same
+flow, RPS will cause load imbalance when input flows are not uniformly
+random. In the extreme case, a single flow, all packets are handled on a
+single CPU, which limits the throughput of the machine to the throughput
+of that CPU. RPS has optional overflow protection, which disables flow
+affinity when an RPS CPU becomes saturated: during overload, its packets
+will be sent to the least loaded other CPU in the RPS cpuset. To enable
+this option, set sysctl net.core.netdev_max_rps_backlog to be smaller than
+net.core.netdev_max_backlog. Setting it to half is a reasonable heuristic.
+
 ==== RPS Configuration
 
 RPS requires a kernel compiled with the CONFIG_RPS kconfig symbol (on
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 18c5dc9..84624fa 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2609,6 +2609,9 @@ extern void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
 				    const struct net_device_stats *netdev_stats);
 
 extern int		netdev_max_backlog;
+#ifdef CONFIG_RPS
+extern int		netdev_max_rps_backlog;
+#endif
 extern int		netdev_tstamp_prequeue;
 extern int		weight_p;
 extern int		bpf_jit_enable;
diff --git a/net/core/dev.c b/net/core/dev.c
index 2f94df2..08c99ad 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2734,6 +2734,9 @@ EXPORT_SYMBOL(dev_queue_xmit);
 int netdev_max_backlog __read_mostly = 1000;
 EXPORT_SYMBOL(netdev_max_backlog);
 
+#ifdef CONFIG_RPS
+int netdev_max_rps_backlog __read_mostly = 1000;
+#endif
 int netdev_tstamp_prequeue __read_mostly = 1;
 int netdev_budget __read_mostly = 300;
 int weight_p __read_mostly = 64;            /* old backlog weight */
@@ -2834,6 +2837,36 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 	return rflow;
 }
 
+/* @return cpu under normal conditions, another rps_cpu if backlogged. */
+static int get_rps_overflow_cpu(int cpu, const struct rps_map* map)
+{
+       struct softnet_data *sd;
+       unsigned int cur, tcpu, min;
+       int i;
+
+       if (skb_queue_len(&per_cpu(softnet_data, cpu).input_pkt_queue) <
+           netdev_max_rps_backlog || !map)
+               return cpu;
+
+       /* leave room to prioritize the flows sent to the cpu by rxhash. */
+       min = netdev_max_rps_backlog;
+       min -= min >> 3;
+
+       for (i = 0; i < map->len; i++) {
+               tcpu = map->cpus[i];
+               if (cpu_online(tcpu)) {
+                       sd = &per_cpu(softnet_data, tcpu);
+                       cur = skb_queue_len(&sd->input_pkt_queue);
+                       if (cur < min) {
+                               min = cur;
+                               cpu = tcpu;
+                       }
+               }
+       }
+
+       return cpu;
+}
+
 /*
  * get_rps_cpu is called from netif_receive_skb and returns the target
  * CPU from the RPS map of the receiving queue for a given skb.
@@ -2912,7 +2945,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 
 		if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
 			*rflowp = rflow;
-			cpu = tcpu;
+			cpu = get_rps_overflow_cpu(tcpu, map);
 			goto done;
 		}
 	}
@@ -2921,7 +2954,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 		tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
 
 		if (cpu_online(tcpu)) {
-			cpu = tcpu;
+			cpu = get_rps_overflow_cpu(tcpu, map);
 			goto done;
 		}
 	}
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index d1b0804..c1b7829 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -129,6 +129,15 @@ static struct ctl_table net_core_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+#ifdef CONFIG_RPS
+	{
+		.procname	= "netdev_max_rps_backlog",
+		.data		= &netdev_max_rps_backlog,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+#endif
 #ifdef CONFIG_BPF_JIT
 	{
 		.procname	= "bpf_jit_enable",
