@@ -943,8 +943,9 @@ EXPORT_SYMBOL(congestion_wait);
* jiffies for either a BDI to exit congestion of the given @sync queue
* or a write to complete.
*
- * In the absence of zone congestion, cond_resched() is called to yield
- * the processor if necessary but otherwise does not sleep.
+ * In the absence of zone congestion, a short sleep or a cond_resched is
+ * performed to yield the processor and to allow other subsystems to make
+ * a forward progress.
*
* The return value is 0 if the sleep is for the full timeout. Otherwise,
* it is the number of jiffies that were still remaining when the function
@@ -964,7 +965,19 @@ long wait_iff_congested(struct zone *zone, int sync, long timeout)
*/
if (atomic_read(&nr_wb_congested[sync]) == 0 ||
!test_bit(ZONE_CONGESTED, &zone->flags)) {
- cond_resched();
+
+ /*
+ * Memory allocation/reclaim might be called from a WQ
+ * context and the current implementation of the WQ
+ * concurrency control doesn't recognize that a particular
+ * WQ is congested if the worker thread is looping without
+ * ever sleeping. Therefore we have to do a short sleep
+ * here rather than calling cond_resched().
+ */
+ if (current->flags & PF_WQ_WORKER)
+ schedule_timeout(1);
+ else
+ cond_resched();
/* In case we scheduled, work out time remaining */
ret = timeout - (jiffies - start);
@@ -1357,6 +1357,7 @@ static const struct file_operations proc_vmstat_file_operations = {
#endif /* CONFIG_PROC_FS */
#ifdef CONFIG_SMP
+static struct workqueue_struct *vmstat_wq;
static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
int sysctl_stat_interval __read_mostly = HZ;
static cpumask_var_t cpu_stat_off;
@@ -1369,7 +1370,8 @@ static void vmstat_update(struct work_struct *w)
* to occur in the future. Keep on running the
* update worker thread.
*/
- schedule_delayed_work(this_cpu_ptr(&vmstat_work),
+ queue_delayed_work(vmstat_wq,
+ this_cpu_ptr(&vmstat_work),
round_jiffies_relative(sysctl_stat_interval));
else {
/*
@@ -1437,7 +1439,7 @@ static void vmstat_shepherd(struct work_struct *w)
if (need_update(cpu) &&
cpumask_test_and_clear_cpu(cpu, cpu_stat_off))
- schedule_delayed_work_on(cpu,
+ queue_delayed_work_on(cpu, vmstat_wq,
&per_cpu(vmstat_work, cpu), 0);
put_online_cpus();
@@ -1526,6 +1528,7 @@ static int __init setup_vmstat(void)
start_shepherd_timer();
cpu_notifier_register_done();
+ vmstat_wq = alloc_workqueue("vmstat", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
#endif
#ifdef CONFIG_PROC_FS
proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations);