From: Peter Hunt Date: Thu, 26 May 2011 11:24:34 +0000 (+0000) Subject: Another patchset from CK to lighten vm load X-Git-Url: http://vcs.maemo.org/git/?p=kernel-bfs;a=commitdiff_plain;h=6b62552431c7be94512dc8ec2f4d3b77cd4d2938 Another patchset from CK to lighten vm load --- diff --git a/kernel-bfs-2.6.28/debian/patches/mm-background_scan-2.patch b/kernel-bfs-2.6.28/debian/patches/mm-background_scan-2.patch new file mode 100644 index 0000000..80286f8 --- /dev/null +++ b/kernel-bfs-2.6.28/debian/patches/mm-background_scan-2.patch @@ -0,0 +1,138 @@ +Add a background scanning timer to restore the watermarks to the pages_lots +level and only call on it if kswapd has not been called upon for the last 5 +seconds. This allows us to balance all zones to the more generous pages_lots +watermark at a time unrelated to page allocation thus leading to lighter +levels of vm load when called upon under page allocation. + +Signed-off-by: Con Kolivas + +The -ck patches modify mm/vmscan.c and add a timer to wake up kswapd every 5 +seconds. This timer is initialized after the creation of the kswapd thread. + +The kswapd() thread function calls mod_timer at the front of its infinite +service loop (to reset the timer to 5 seconds in the future). mod_timer() +includes a BUG_ON() to assert that the timer's callback function is set. + +Since the wakeup timer is initialized after the kswapd thread is created, if +kswapd gets scheduled before kswapd_run() has prepared the timer, the +BUG_ON() check will throw a stack trace and immediately terminate the kswapd +thread. + +This patch modifies the kswapd_run() function in mm/vmscan.c to initialize the +watermark timer before starting the kswapd thread. + +Signed-off-by: Chase Venters + + include/linux/mmzone.h | 2 ++ + mm/vmscan.c | 42 ++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 44 insertions(+) + +Index: linux-2.6.22-ck1/include/linux/mmzone.h +=================================================================== +--- linux-2.6.22-ck1.orig/include/linux/mmzone.h 2007-07-09 18:44:40.000000000 +1000 ++++ linux-2.6.22-ck1/include/linux/mmzone.h 2007-07-09 18:44:40.000000000 +1000 +@@ -13,6 +13,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -452,6 +453,7 @@ typedef struct pglist_data { + wait_queue_head_t kswapd_wait; + struct task_struct *kswapd; + int kswapd_max_order; ++ struct timer_list watermark_timer; + } pg_data_t; + + #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) +Index: linux-2.6.22-ck1/mm/vmscan.c +=================================================================== +--- linux-2.6.22-ck1.orig/mm/vmscan.c 2007-07-09 18:44:40.000000000 +1000 ++++ linux-2.6.22-ck1/mm/vmscan.c 2007-07-09 18:44:40.000000000 +1000 +@@ -37,6 +37,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -1330,6 +1331,8 @@ out: + return nr_reclaimed; + } + ++#define WT_EXPIRY (HZ * 5) /* Time to wakeup watermark_timer */ ++ + /* + * The background pageout daemon, started as a kernel thread + * from the init process. +@@ -1377,6 +1380,8 @@ static int kswapd(void *p) + for ( ; ; ) { + unsigned long new_order; + ++ /* kswapd has been busy so delay watermark_timer */ ++ mod_timer(&pgdat->watermark_timer, jiffies + WT_EXPIRY); + prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); + new_order = pgdat->kswapd_max_order; + pgdat->kswapd_max_order = 0; +@@ -1604,20 +1609,57 @@ static int __devinit cpu_callback(struct + } + + /* ++ * We wake up kswapd every WT_EXPIRY till free ram is above pages_lots ++ */ ++static void watermark_wakeup(unsigned long data) ++{ ++ pg_data_t *pgdat = (pg_data_t *)data; ++ struct timer_list *wt = &pgdat->watermark_timer; ++ int i; ++ ++ if (!waitqueue_active(&pgdat->kswapd_wait) || above_background_load()) ++ goto out; ++ for (i = pgdat->nr_zones - 1; i >= 0; i--) { ++ struct zone *z = pgdat->node_zones + i; ++ ++ if (!populated_zone(z) || is_highmem(z)) { ++ /* We are better off leaving highmem full */ ++ continue; ++ } ++ if (!zone_watermark_ok(z, 0, z->pages_lots, 0, 0)) { ++ wake_up_interruptible(&pgdat->kswapd_wait); ++ goto out; ++ } ++ } ++out: ++ mod_timer(wt, jiffies + WT_EXPIRY); ++ return; ++} ++ ++/* + * This kswapd start function will be called by init and node-hot-add. + * On node-hot-add, kswapd will moved to proper cpus if cpus are hot-added. + */ + int kswapd_run(int nid) + { + pg_data_t *pgdat = NODE_DATA(nid); ++ struct timer_list *wt; + int ret = 0; + + if (pgdat->kswapd) + return 0; + ++ wt = &pgdat->watermark_timer; ++ init_timer(wt); ++ wt->data = (unsigned long)pgdat; ++ wt->function = watermark_wakeup; ++ wt->expires = jiffies + WT_EXPIRY; ++ add_timer(wt); ++ + pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid); + if (IS_ERR(pgdat->kswapd)) { + /* failure at boot is fatal */ ++ del_timer(wt); + BUG_ON(system_state == SYSTEM_BOOTING); + printk("Failed to start kswapd on node %d\n",nid); + ret = -1; + diff --git a/kernel-bfs-2.6.28/debian/patches/mm-lots_watermark.diff b/kernel-bfs-2.6.28/debian/patches/mm-lots_watermark.diff new file mode 100644 index 0000000..c756bd6 --- /dev/null +++ b/kernel-bfs-2.6.28/debian/patches/mm-lots_watermark.diff @@ -0,0 +1,108 @@ +The vm currently performs scanning when allocating ram once the watermarks +are below the pages_low value and tries to restore them to the pages_high +watermark. The disadvantage of this is that we are scanning most aggresssively +at the same time we are allocating ram regardless of the stress the vm is +under. Add a pages_lots watermark and allow the watermark to be relaxed +according to the stress the vm is at the time (according to the priority +value). Thus we have more in reserve next time we are allocating ram and end +up scanning less aggresssively. Note the actual pages_lots isn't used directly +in this code. + +Signed-off-by: Con Kolivas + + include/linux/mmzone.h | 2 +- + mm/page_alloc.c | 3 +++ + mm/vmscan.c | 17 ++++++++++++++--- + 3 files changed, 18 insertions(+), 4 deletions(-) + +Index: linux-2.6.22-ck1/include/linux/mmzone.h +=================================================================== +--- linux-2.6.22-ck1.orig/include/linux/mmzone.h 2007-07-09 18:44:34.000000000 +1000 ++++ linux-2.6.22-ck1/include/linux/mmzone.h 2007-07-09 18:44:39.000000000 +1000 +@@ -181,7 +181,7 @@ enum zone_type { + + struct zone { + /* Fields commonly accessed by the page allocator */ +- unsigned long pages_min, pages_low, pages_high; ++ unsigned long pages_min, pages_low, pages_high, pages_lots; + /* + * We don't know if the memory that we're going to allocate will be freeable + * or/and it will be released eventually, so to avoid totally wasting several +Index: linux-2.6.22-ck1/mm/page_alloc.c +=================================================================== +--- linux-2.6.22-ck1.orig/mm/page_alloc.c 2007-07-09 18:44:34.000000000 +1000 ++++ linux-2.6.22-ck1/mm/page_alloc.c 2007-07-09 18:44:39.000000000 +1000 +@@ -1570,6 +1570,7 @@ void show_free_areas(void) + " min:%lukB" + " low:%lukB" + " high:%lukB" ++ " lots:%lukB" + " active_anon:%lukB" + " inactive_anon:%lukB" + " active_file:%lukB" +@@ -1581,6 +1582,7 @@ void show_free_areas(void) + K(zone->pages_min), + K(zone->pages_low), + K(zone->pages_high), ++ K(zone->pages_lots), + K(zone_page_state(zone, NR_ACTIVE_ANON)), + K(zone_page_state(zone, NR_INACTIVE_ANON)), + K(zone_page_state(zone, NR_ACTIVE_FILE)), +@@ -3142,6 +3144,7 @@ void setup_per_zone_pages_min(void) + + zone->pages_low = zone->pages_min + (tmp >> 2); + zone->pages_high = zone->pages_min + (tmp >> 1); ++ zone->pages_lots = zone->pages_min + tmp; + setup_zone_migrate_reserve(zone); + spin_unlock_irqrestore(&zone->lock, flags); + } +Index: linux-2.6.22-ck1/mm/vmscan.c +=================================================================== +--- linux-2.6.22-ck1.orig/mm/vmscan.c 2007-07-09 18:44:39.000000000 +1000 ++++ linux-2.6.22-ck1/mm/vmscan.c 2007-07-09 18:44:39.000000000 +1000 +@@ -1171,6 +1171,7 @@ loop_again: + */ + for (i = pgdat->nr_zones - 1; i >= 0; i--) { + struct zone *zone = pgdat->node_zones + i; ++ unsigned long watermark; + + if (!populated_zone(zone)) + continue; +@@ -1178,8 +1179,14 @@ loop_again: + shrink_active_list(SWAP_CLUSTER_MAX, zone, + &sc, priority, 0); + +- if (!zone_watermark_ok(zone, order, zone->pages_high, +- 0, 0)) { ++ /* ++ * The watermark is relaxed depending on the ++ * level of "priority" till it drops to ++ * pages_high. ++ */ ++ watermark = zone->pages_high + (zone->pages_high * ++ priority / DEF_PRIORITY); ++ if (!zone_watermark_ok(zone, order, watermark, 0, 0)) { + end_zone = i; + break; + } +@@ -1206,6 +1213,7 @@ loop_again: + for (i = 0; i <= end_zone; i++) { + struct zone *zone = pgdat->node_zones + i; + int nr_slab; ++ unsigned long watermark; + + if (!populated_zone(zone)) + continue; +@@ -1213,7 +1221,10 @@ loop_again: + priority != DEF_PRIORITY) + continue; + +- if (!zone_watermark_ok(zone, order, zone->pages_high, ++ watermark = zone->pages_high + (zone->pages_high * ++ priority / DEF_PRIORITY); ++ ++ if (!zone_watermark_ok(zone, order, watermark, + end_zone, 0)) + all_zones_ok = 0; + temp_priority[i] = priority; + diff --git a/kernel-bfs-2.6.28/debian/patches/series b/kernel-bfs-2.6.28/debian/patches/series index bc219dd..3339ffd 100644 --- a/kernel-bfs-2.6.28/debian/patches/series +++ b/kernel-bfs-2.6.28/debian/patches/series @@ -38,8 +38,10 @@ sched-add-above-background-load-function.patch mm-make_swappiness_really_mean_it.patch mm-enable_swaptoken_only_when_swap_full.patch mm-drop_swap_cache_aggressively.patch +mm-lots_watermark.diff mm-kswapd_inherit_prio-1.patch mm-idleprio_prio-1.patch +mm-background_scan-2.patch mm-lru_cache_add_lru_tail.patch hz-raise_max.patch cpufreq-bfs_tweaks.patch