1 --- kernel-2.6.28/mm/vmscan.c.orig 2009-05-02 14:54:43.000000000 -0400
2 +++ kernel-2.6.28/mm/vmscan.c 2010-11-11 12:06:49.955635002 -0500
3 @@ -72,6 +72,12 @@ struct scan_control {
8 + * Intend to reclaim enough contenious memory rather than to reclaim
9 + * enough amount memory. I.e, it's the mode for high order allocation.
11 + bool lumpy_reclaim_mode;
13 /* Which cgroup do we reclaim from */
14 struct mem_cgroup *mem_cgroup;
16 @@ -1024,6 +1030,47 @@ int isolate_lru_page(struct page *page)
20 + * Returns true if the caller should wait to clean dirty/writeback pages.
22 + * If we are direct reclaiming for contiguous pages and we do not reclaim
23 + * everything in the list, try again and wait for writeback IO to complete.
24 + * This will stall high-order allocations noticeably. Only do that when really
25 + * need to free the pages under high memory pressure.
27 +static inline bool should_reclaim_stall(unsigned long nr_taken,
28 + unsigned long nr_freed,
30 + struct scan_control *sc)
32 + int lumpy_stall_priority;
34 + /* kswapd should not stall on sync IO */
35 + if (current_is_kswapd())
38 + /* Only stall on lumpy reclaim */
39 + if (!sc->lumpy_reclaim_mode)
42 + /* If we have relaimed everything on the isolated list, no stall */
43 + if (nr_freed == nr_taken)
47 + * For high-order allocations, there are two stall thresholds.
48 + * High-cost allocations stall immediately where as lower
49 + * order allocations such as stacks require the scanning
50 + * priority to be much higher before stalling.
52 + if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
53 + lumpy_stall_priority = DEF_PRIORITY;
55 + lumpy_stall_priority = DEF_PRIORITY / 3;
57 + return priority <= lumpy_stall_priority;
61 * shrink_inactive_list() is a helper for shrink_zone(). It returns the number
64 @@ -1047,7 +1094,7 @@ static unsigned long shrink_inactive_lis
65 unsigned long nr_freed;
66 unsigned long nr_active;
67 unsigned int count[NR_LRU_LISTS] = { 0, };
68 - int mode = ISOLATE_INACTIVE;
69 +// use lumpy int mode = ISOLATE_INACTIVE;
72 * If we need a large contiguous chunk of memory, or have
73 @@ -1056,13 +1103,11 @@ static unsigned long shrink_inactive_lis
75 * We use the same threshold as pageout congestion_wait below.
77 - if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
78 - mode = ISOLATE_BOTH;
79 - else if (sc->order && priority < DEF_PRIORITY - 2)
80 - mode = ISOLATE_BOTH;
82 nr_taken = sc->isolate_pages(sc->swap_cluster_max,
83 - &page_list, &nr_scan, sc->order, mode,
84 + &page_list, &nr_scan, sc->order,
85 + sc->lumpy_reclaim_mode ?
86 + ISOLATE_BOTH : ISOLATE_INACTIVE,
87 zone, sc->mem_cgroup, 0, file);
88 nr_active = clear_active_flags(&page_list, count);
89 __count_vm_events(PGDEACTIVATE, nr_active);
90 @@ -1088,16 +1133,8 @@ static unsigned long shrink_inactive_lis
91 nr_scanned += nr_scan;
92 nr_freed = shrink_page_list(&page_list, sc, PAGEOUT_IO_ASYNC);
95 - * If we are direct reclaiming for contiguous pages and we do
96 - * not reclaim everything in the list, try again and wait
97 - * for IO to complete. This will stall high-order allocations
98 - * but that should be acceptable to the caller
100 - if (nr_freed < nr_taken && !current_is_kswapd() &&
101 - sc->order > PAGE_ALLOC_COSTLY_ORDER) {
102 - congestion_wait(WRITE, HZ/10);
104 + /* Check if we should syncronously wait for writeback */
105 + if (should_reclaim_stall(nr_taken, nr_freed, priority, sc)) {
107 * The attempt at page out may have made some
108 * of the pages active, mark them inactive again.
109 @@ -1404,6 +1441,20 @@ static void get_scan_ratio(struct zone *
110 percent[1] = 100 - percent[0];
113 +static void set_lumpy_reclaim_mode(int priority, struct scan_control *sc)
116 + * If we need a large contiguous chunk of memory, or have
117 + * trouble getting a small set of contiguous pages, we
118 + * will reclaim both active and inactive pages.
120 + if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
121 + sc->lumpy_reclaim_mode = 1;
122 + else if (sc->order && priority < DEF_PRIORITY - 2)
123 + sc->lumpy_reclaim_mode = 1;
125 + sc->lumpy_reclaim_mode = 0;
129 * This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
130 @@ -1419,6 +1470,8 @@ static unsigned long shrink_zone(int pri
132 get_scan_ratio(zone, sc, percent);
134 + set_lumpy_reclaim_mode(priority, sc);
136 for_each_evictable_lru(l) {
137 if (scan_global_lru(sc)) {
138 int file = is_file_lru(l);