Changes to series and rx51_defconfig file for BFQ
[kernel-bfs] / kernel-bfs-2.6.28 / debian / patches / vanilla-2.6.28-anti-io-stalling.patch
1 --- linux-2.6.28.orig/mm/vmscan.c       2008-12-25 00:26:37.000000000 +0100
2 +++ linux-2.6.28/mm/vmscan.c    2010-12-08 12:32:23.203333383 +0100
3 @@ -72,6 +72,12 @@ struct scan_control {
4  
5         int order;
6  
7 +       /*
8 +        * Intend to reclaim enough contenious memory rather than to reclaim
9 +        * enough amount memory. I.e, it's the mode for high order allocation.
10 +        */
11 +       bool lumpy_reclaim_mode;
12 +
13         /* Which cgroup do we reclaim from */
14         struct mem_cgroup *mem_cgroup;
15  
16 @@ -1024,6 +1030,47 @@ int isolate_lru_page(struct page *page)
17  }
18  
19  /*
20 + * Returns true if the caller should wait to clean dirty/writeback pages.
21 + *
22 + * If we are direct reclaiming for contiguous pages and we do not reclaim
23 + * everything in the list, try again and wait for writeback IO to complete.
24 + * This will stall high-order allocations noticeably. Only do that when really
25 + * need to free the pages under high memory pressure.
26 + */
27 +static inline bool should_reclaim_stall(unsigned long nr_taken,
28 +                                       unsigned long nr_freed,
29 +                                       int priority,
30 +                                       struct scan_control *sc)
31 +{
32 +       int lumpy_stall_priority;
33 +
34 +       /* kswapd should not stall on sync IO */
35 +       if (current_is_kswapd())
36 +               return false;
37 +
38 +       /* Only stall on lumpy reclaim */
39 +       if (!sc->lumpy_reclaim_mode)
40 +               return false;
41 +
42 +       /* If we have relaimed everything on the isolated list, no stall */
43 +       if (nr_freed == nr_taken)
44 +               return false;
45 +
46 +       /*
47 +        * For high-order allocations, there are two stall thresholds.
48 +        * High-cost allocations stall immediately where as lower
49 +        * order allocations such as stacks require the scanning
50 +        * priority to be much higher before stalling.
51 +        */
52 +       if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
53 +               lumpy_stall_priority = DEF_PRIORITY;
54 +       else
55 +               lumpy_stall_priority = DEF_PRIORITY / 3;
56 +
57 +       return priority <= lumpy_stall_priority;
58 +}
59 +
60 +/*
61   * shrink_inactive_list() is a helper for shrink_zone().  It returns the number
62   * of reclaimed pages
63   */
64 @@ -1047,7 +1094,7 @@ static unsigned long shrink_inactive_lis
65                 unsigned long nr_freed;
66                 unsigned long nr_active;
67                 unsigned int count[NR_LRU_LISTS] = { 0, };
68 -               int mode = ISOLATE_INACTIVE;
69 +// use lumpy   int mode = ISOLATE_INACTIVE;
70  
71                 /*
72                  * If we need a large contiguous chunk of memory, or have
73 @@ -1056,13 +1103,11 @@ static unsigned long shrink_inactive_lis
74                  *
75                  * We use the same threshold as pageout congestion_wait below.
76                  */
77 -               if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
78 -                       mode = ISOLATE_BOTH;
79 -               else if (sc->order && priority < DEF_PRIORITY - 2)
80 -                       mode = ISOLATE_BOTH;
81  
82                 nr_taken = sc->isolate_pages(sc->swap_cluster_max,
83 -                            &page_list, &nr_scan, sc->order, mode,
84 +                            &page_list, &nr_scan, sc->order, 
85 +                               sc->lumpy_reclaim_mode ?
86 +                                       ISOLATE_BOTH : ISOLATE_INACTIVE,
87                                 zone, sc->mem_cgroup, 0, file);
88                 nr_active = clear_active_flags(&page_list, count);
89                 __count_vm_events(PGDEACTIVATE, nr_active);
90 @@ -1088,16 +1133,8 @@ static unsigned long shrink_inactive_lis
91                 nr_scanned += nr_scan;
92                 nr_freed = shrink_page_list(&page_list, sc, PAGEOUT_IO_ASYNC);
93  
94 -               /*
95 -                * If we are direct reclaiming for contiguous pages and we do
96 -                * not reclaim everything in the list, try again and wait
97 -                * for IO to complete. This will stall high-order allocations
98 -                * but that should be acceptable to the caller
99 -                */
100 -               if (nr_freed < nr_taken && !current_is_kswapd() &&
101 -                                       sc->order > PAGE_ALLOC_COSTLY_ORDER) {
102 -                       congestion_wait(WRITE, HZ/10);
103 -
104 +               /* Check if we should syncronously wait for writeback */
105 +               if (should_reclaim_stall(nr_taken, nr_freed, priority, sc)) {
106                         /*
107                          * The attempt at page out may have made some
108                          * of the pages active, mark them inactive again.
109 @@ -1404,6 +1441,20 @@ static void get_scan_ratio(struct zone *
110         percent[1] = 100 - percent[0];
111  }
112  
113 +static void set_lumpy_reclaim_mode(int priority, struct scan_control *sc)
114 +{
115 +       /*
116 +       * If we need a large contiguous chunk of memory, or have
117 +       * trouble getting a small set of contiguous pages, we
118 +       * will reclaim both active and inactive pages.
119 +       */
120 +       if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
121 +               sc->lumpy_reclaim_mode = 1;
122 +       else if (sc->order && priority < DEF_PRIORITY - 2)
123 +               sc->lumpy_reclaim_mode = 1;
124 +       else
125 +               sc->lumpy_reclaim_mode = 0;
126 +}
127  
128  /*
129   * This is a basic per-zone page freer.  Used by both kswapd and direct reclaim.
130 @@ -1419,6 +1470,8 @@ static unsigned long shrink_zone(int pri
131  
132         get_scan_ratio(zone, sc, percent);
133  
134 +       set_lumpy_reclaim_mode(priority, sc);
135 +
136         for_each_evictable_lru(l) {
137                 if (scan_global_lru(sc)) {
138                         int file = is_file_lru(l);