--- /dev/null
+--- linux-2.6.28.orig/mm/vmscan.c 2008-12-25 00:26:37.000000000 +0100
++++ linux-2.6.28/mm/vmscan.c 2010-12-08 12:32:23.203333383 +0100
+@@ -72,6 +72,12 @@ struct scan_control {
+
+ int order;
+
++ /*
++ * Intend to reclaim enough contenious memory rather than to reclaim
++ * enough amount memory. I.e, it's the mode for high order allocation.
++ */
++ bool lumpy_reclaim_mode;
++
+ /* Which cgroup do we reclaim from */
+ struct mem_cgroup *mem_cgroup;
+
+@@ -1024,6 +1030,47 @@ int isolate_lru_page(struct page *page)
+ }
+
+ /*
++ * Returns true if the caller should wait to clean dirty/writeback pages.
++ *
++ * If we are direct reclaiming for contiguous pages and we do not reclaim
++ * everything in the list, try again and wait for writeback IO to complete.
++ * This will stall high-order allocations noticeably. Only do that when really
++ * need to free the pages under high memory pressure.
++ */
++static inline bool should_reclaim_stall(unsigned long nr_taken,
++ unsigned long nr_freed,
++ int priority,
++ struct scan_control *sc)
++{
++ int lumpy_stall_priority;
++
++ /* kswapd should not stall on sync IO */
++ if (current_is_kswapd())
++ return false;
++
++ /* Only stall on lumpy reclaim */
++ if (!sc->lumpy_reclaim_mode)
++ return false;
++
++ /* If we have relaimed everything on the isolated list, no stall */
++ if (nr_freed == nr_taken)
++ return false;
++
++ /*
++ * For high-order allocations, there are two stall thresholds.
++ * High-cost allocations stall immediately where as lower
++ * order allocations such as stacks require the scanning
++ * priority to be much higher before stalling.
++ */
++ if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
++ lumpy_stall_priority = DEF_PRIORITY;
++ else
++ lumpy_stall_priority = DEF_PRIORITY / 3;
++
++ return priority <= lumpy_stall_priority;
++}
++
++/*
+ * shrink_inactive_list() is a helper for shrink_zone(). It returns the number
+ * of reclaimed pages
+ */
+@@ -1047,7 +1094,7 @@ static unsigned long shrink_inactive_lis
+ unsigned long nr_freed;
+ unsigned long nr_active;
+ unsigned int count[NR_LRU_LISTS] = { 0, };
+- int mode = ISOLATE_INACTIVE;
++// use lumpy int mode = ISOLATE_INACTIVE;
+
+ /*
+ * If we need a large contiguous chunk of memory, or have
+@@ -1056,13 +1103,11 @@ static unsigned long shrink_inactive_lis
+ *
+ * We use the same threshold as pageout congestion_wait below.
+ */
+- if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
+- mode = ISOLATE_BOTH;
+- else if (sc->order && priority < DEF_PRIORITY - 2)
+- mode = ISOLATE_BOTH;
+
+ nr_taken = sc->isolate_pages(sc->swap_cluster_max,
+- &page_list, &nr_scan, sc->order, mode,
++ &page_list, &nr_scan, sc->order,
++ sc->lumpy_reclaim_mode ?
++ ISOLATE_BOTH : ISOLATE_INACTIVE,
+ zone, sc->mem_cgroup, 0, file);
+ nr_active = clear_active_flags(&page_list, count);
+ __count_vm_events(PGDEACTIVATE, nr_active);
+@@ -1088,16 +1133,8 @@ static unsigned long shrink_inactive_lis
+ nr_scanned += nr_scan;
+ nr_freed = shrink_page_list(&page_list, sc, PAGEOUT_IO_ASYNC);
+
+- /*
+- * If we are direct reclaiming for contiguous pages and we do
+- * not reclaim everything in the list, try again and wait
+- * for IO to complete. This will stall high-order allocations
+- * but that should be acceptable to the caller
+- */
+- if (nr_freed < nr_taken && !current_is_kswapd() &&
+- sc->order > PAGE_ALLOC_COSTLY_ORDER) {
+- congestion_wait(WRITE, HZ/10);
+-
++ /* Check if we should syncronously wait for writeback */
++ if (should_reclaim_stall(nr_taken, nr_freed, priority, sc)) {
+ /*
+ * The attempt at page out may have made some
+ * of the pages active, mark them inactive again.
+@@ -1404,6 +1441,20 @@ static void get_scan_ratio(struct zone *
+ percent[1] = 100 - percent[0];
+ }
+
++static void set_lumpy_reclaim_mode(int priority, struct scan_control *sc)
++{
++ /*
++ * If we need a large contiguous chunk of memory, or have
++ * trouble getting a small set of contiguous pages, we
++ * will reclaim both active and inactive pages.
++ */
++ if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
++ sc->lumpy_reclaim_mode = 1;
++ else if (sc->order && priority < DEF_PRIORITY - 2)
++ sc->lumpy_reclaim_mode = 1;
++ else
++ sc->lumpy_reclaim_mode = 0;
++}
+
+ /*
+ * This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
+@@ -1419,6 +1470,8 @@ static unsigned long shrink_zone(int pri
+
+ get_scan_ratio(zone, sc, percent);
+
++ set_lumpy_reclaim_mode(priority, sc);
++
+ for_each_evictable_lru(l) {
+ if (scan_global_lru(sc)) {
+ int file = is_file_lru(l);