add anti I/O stalling patch by Adam Polkosnik (disabled by default)
authorDennis Groenen <dennis_groenen@hotmail.com>
Fri, 28 Jan 2011 20:33:50 +0000 (21:33 +0100)
committerDennis Groenen <dennis_groenen@hotmail.com>
Fri, 28 Jan 2011 20:33:50 +0000 (21:33 +0100)
kernel-bfs-2.6.28/debian/patches/series
kernel-bfs-2.6.28/debian/patches/vanilla-2.6.28-anti-io-stalling.patch [new file with mode: 0644]

index c7d0ab6..8af8cba 100644 (file)
@@ -46,3 +46,4 @@ radio-bcm2048.diff
 i2c-battery.diff
 usbhostmode.diff
 bt-mice.diff
+#vanilla-2.6.28-anti-io-stalling.patch
diff --git a/kernel-bfs-2.6.28/debian/patches/vanilla-2.6.28-anti-io-stalling.patch b/kernel-bfs-2.6.28/debian/patches/vanilla-2.6.28-anti-io-stalling.patch
new file mode 100644 (file)
index 0000000..f66781b
--- /dev/null
@@ -0,0 +1,138 @@
+--- linux-2.6.28.orig/mm/vmscan.c      2008-12-25 00:26:37.000000000 +0100
++++ linux-2.6.28/mm/vmscan.c   2010-12-08 12:32:23.203333383 +0100
+@@ -72,6 +72,12 @@ struct scan_control {
+       int order;
++      /*
++       * Intend to reclaim enough contenious memory rather than to reclaim
++       * enough amount memory. I.e, it's the mode for high order allocation.
++       */
++      bool lumpy_reclaim_mode;
++
+       /* Which cgroup do we reclaim from */
+       struct mem_cgroup *mem_cgroup;
+@@ -1024,6 +1030,47 @@ int isolate_lru_page(struct page *page)
+ }
+ /*
++ * Returns true if the caller should wait to clean dirty/writeback pages.
++ *
++ * If we are direct reclaiming for contiguous pages and we do not reclaim
++ * everything in the list, try again and wait for writeback IO to complete.
++ * This will stall high-order allocations noticeably. Only do that when really
++ * need to free the pages under high memory pressure.
++ */
++static inline bool should_reclaim_stall(unsigned long nr_taken,
++                                      unsigned long nr_freed,
++                                      int priority,
++                                      struct scan_control *sc)
++{
++      int lumpy_stall_priority;
++
++      /* kswapd should not stall on sync IO */
++      if (current_is_kswapd())
++              return false;
++
++      /* Only stall on lumpy reclaim */
++      if (!sc->lumpy_reclaim_mode)
++              return false;
++
++      /* If we have relaimed everything on the isolated list, no stall */
++      if (nr_freed == nr_taken)
++              return false;
++
++      /*
++       * For high-order allocations, there are two stall thresholds.
++       * High-cost allocations stall immediately where as lower
++       * order allocations such as stacks require the scanning
++       * priority to be much higher before stalling.
++       */
++      if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
++              lumpy_stall_priority = DEF_PRIORITY;
++      else
++              lumpy_stall_priority = DEF_PRIORITY / 3;
++
++      return priority <= lumpy_stall_priority;
++}
++
++/*
+  * shrink_inactive_list() is a helper for shrink_zone().  It returns the number
+  * of reclaimed pages
+  */
+@@ -1047,7 +1094,7 @@ static unsigned long shrink_inactive_lis
+               unsigned long nr_freed;
+               unsigned long nr_active;
+               unsigned int count[NR_LRU_LISTS] = { 0, };
+-              int mode = ISOLATE_INACTIVE;
++// use lumpy  int mode = ISOLATE_INACTIVE;
+               /*
+                * If we need a large contiguous chunk of memory, or have
+@@ -1056,13 +1103,11 @@ static unsigned long shrink_inactive_lis
+                *
+                * We use the same threshold as pageout congestion_wait below.
+                */
+-              if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
+-                      mode = ISOLATE_BOTH;
+-              else if (sc->order && priority < DEF_PRIORITY - 2)
+-                      mode = ISOLATE_BOTH;
+               nr_taken = sc->isolate_pages(sc->swap_cluster_max,
+-                           &page_list, &nr_scan, sc->order, mode,
++                           &page_list, &nr_scan, sc->order, 
++                              sc->lumpy_reclaim_mode ?
++                                      ISOLATE_BOTH : ISOLATE_INACTIVE,
+                               zone, sc->mem_cgroup, 0, file);
+               nr_active = clear_active_flags(&page_list, count);
+               __count_vm_events(PGDEACTIVATE, nr_active);
+@@ -1088,16 +1133,8 @@ static unsigned long shrink_inactive_lis
+               nr_scanned += nr_scan;
+               nr_freed = shrink_page_list(&page_list, sc, PAGEOUT_IO_ASYNC);
+-              /*
+-               * If we are direct reclaiming for contiguous pages and we do
+-               * not reclaim everything in the list, try again and wait
+-               * for IO to complete. This will stall high-order allocations
+-               * but that should be acceptable to the caller
+-               */
+-              if (nr_freed < nr_taken && !current_is_kswapd() &&
+-                                      sc->order > PAGE_ALLOC_COSTLY_ORDER) {
+-                      congestion_wait(WRITE, HZ/10);
+-
++              /* Check if we should syncronously wait for writeback */
++              if (should_reclaim_stall(nr_taken, nr_freed, priority, sc)) {
+                       /*
+                        * The attempt at page out may have made some
+                        * of the pages active, mark them inactive again.
+@@ -1404,6 +1441,20 @@ static void get_scan_ratio(struct zone *
+       percent[1] = 100 - percent[0];
+ }
++static void set_lumpy_reclaim_mode(int priority, struct scan_control *sc)
++{
++      /*
++      * If we need a large contiguous chunk of memory, or have
++      * trouble getting a small set of contiguous pages, we
++      * will reclaim both active and inactive pages.
++      */
++      if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
++              sc->lumpy_reclaim_mode = 1;
++      else if (sc->order && priority < DEF_PRIORITY - 2)
++              sc->lumpy_reclaim_mode = 1;
++      else
++              sc->lumpy_reclaim_mode = 0;
++}
+ /*
+  * This is a basic per-zone page freer.  Used by both kswapd and direct reclaim.
+@@ -1419,6 +1470,8 @@ static unsigned long shrink_zone(int pri
+       get_scan_ratio(zone, sc, percent);
++      set_lumpy_reclaim_mode(priority, sc);
++
+       for_each_evictable_lru(l) {
+               if (scan_global_lru(sc)) {
+                       int file = is_file_lru(l);