From: Peter Hunt Date: Wed, 25 May 2011 21:12:06 +0000 (+0000) Subject: Several kernel patches by Con Kolivas to compliment BFS, adapted for 2.6.28 X-Git-Url: http://vcs.maemo.org/git/?p=kernel-bfs;a=commitdiff_plain;h=5d0f568d98ef9abe3f6f361733ca98603afa2b80 Several kernel patches by Con Kolivas to compliment BFS, adapted for 2.6.28 --- diff --git a/kernel-bfs-2.6.28/debian/patches/cpufreq-bfs_tweaks.patch b/kernel-bfs-2.6.28/debian/patches/cpufreq-bfs_tweaks.patch new file mode 100644 index 0000000..877b6bf --- /dev/null +++ b/kernel-bfs-2.6.28/debian/patches/cpufreq-bfs_tweaks.patch @@ -0,0 +1,41 @@ +Because of the way BFS works it needs to transition up in frequency more +aggressively and down more conservatively. + +-ck + +--- + drivers/cpufreq/cpufreq_ondemand.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +Index: linux-2.6.34-ck1/drivers/cpufreq/cpufreq_ondemand.c +=================================================================== +--- linux-2.6.34-ck1.orig/drivers/cpufreq/cpufreq_ondemand.c 2010-02-25 21:51:48.000000000 +1100 ++++ linux-2.6.34-ck1/drivers/cpufreq/cpufreq_ondemand.c 2010-05-18 12:26:18.124319654 +1000 +@@ -28,10 +28,10 @@ + * It helps to keep variable names smaller, simpler + */ + +-#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10) +-#define DEF_FREQUENCY_UP_THRESHOLD (80) ++#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (17) ++#define DEF_FREQUENCY_UP_THRESHOLD (63) + #define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (3) +-#define MICRO_FREQUENCY_UP_THRESHOLD (95) ++#define MICRO_FREQUENCY_UP_THRESHOLD (80) + #define MIN_FREQUENCY_UP_THRESHOLD (11) + #define MAX_FREQUENCY_UP_THRESHOLD (100) + +@@ -455,10 +455,10 @@ static void dbs_check_cpu(struct cpu_dbs + + /* + * Every sampling_rate, we check, if current idle time is less +- * than 20% (default), then we try to increase frequency ++ * than 37% (default), then we try to increase frequency + * Every sampling_rate, we look for a the lowest + * frequency which can sustain the load while keeping idle time over +- * 30%. If such a frequency exist, we try to decrease to this frequency. ++ * 50%. If such a frequency exist, we try to decrease to this frequency. + * + * Any frequency increase takes it to the maximum frequency. + * Frequency reduction happens at minimum steps of + diff --git a/kernel-bfs-2.6.28/debian/patches/mm-drop_swap_cache_aggressively.patch b/kernel-bfs-2.6.28/debian/patches/mm-drop_swap_cache_aggressively.patch new file mode 100644 index 0000000..6443346 --- /dev/null +++ b/kernel-bfs-2.6.28/debian/patches/mm-drop_swap_cache_aggressively.patch @@ -0,0 +1,65 @@ +While it may be nice to have a copy of pages on swap once written there, the +more garbage we leave in the swapspace the slower any further writes and +reads to and from it are. Just free swapcache whenever we can. + +-ck + +--- + include/linux/swap.h | 2 +- + mm/memory.c | 2 +- + mm/swapfile.c | 9 ++++----- + mm/vmscan.c | 2 +- + 4 files changed, 7 insertions(+), 8 deletions(-) + +Index: linux-2.6.34-ck1/mm/memory.c +=================================================================== +--- linux-2.6.34-ck1.orig/mm/memory.c 2010-05-18 12:24:33.852194874 +1000 ++++ linux-2.6.34-ck1/mm/memory.c 2010-05-18 12:26:16.646319673 +1000 +@@ -2713,7 +2713,7 @@ static int do_swap_page(struct mm_struct + page_add_anon_rmap(page, vma, address); + + swap_free(entry); +- if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) ++ if ((vma->vm_flags & VM_LOCKED) || PageMlocked(page)) + remove_exclusive_swap_page(page); + unlock_page(page); + +Index: linux-2.6.34-ck1/mm/swapfile.c +=================================================================== +@@ -712,8 +712,7 @@ int free_swap_and_cache(swp_entry_t entr + one_user = (page_count(page) == 2); + /* Only cache user (+us), or swap space full? Free it! */ + /* Also recheck PageSwapCache after page is locked (above) */ +- if (PageSwapCache(page) && !PageWriteback(page) && +- (one_user || vm_swap_full())) { ++ if (PageSwapCache(page) && !PageWriteback(page)) { + delete_from_swap_cache(page); + SetPageDirty(page); + } +Index: linux-2.6.34-ck1/mm/vmscan.c +=================================================================== +--- linux-2.6.34-ck1.orig/mm/vmscan.c 2010-05-18 12:26:16.371569589 +1000 ++++ linux-2.6.34-ck1/mm/vmscan.c 2010-05-18 12:26:16.647319427 +1000 +@@ -821,7 +821,7 @@ cull_mlocked: + + activate_locked: + /* Not a candidate for swapping, so reclaim swap space. */ +- if (PageSwapCache(page) && vm_swap_full()) ++ if (PageSwapCache(page)) + remove_exclusive_swap_page_ref(page); + VM_BUG_ON(PageActive(page)); + SetPageActive(page); +Index: linux-2.6.34-ck1/include/linux/swap.h +=================================================================== +--- linux-2.6.34-ck1.orig/include/linux/swap.h 2010-05-18 12:26:16.508569731 +1000 ++++ linux-2.6.34-ck1/include/linux/swap.h 2010-05-18 12:26:16.647319427 +1000 +@@ -189,7 +189,7 @@ struct swap_list_t { + int next; /* swapfile to be used next */ + }; + +-/* Swap 50% full? Release swapcache more aggressively.. */ ++/* Swap 50% full? */ + #define vm_swap_full() (nr_swap_pages*2 < total_swap_pages) + + /* linux/mm/page_alloc.c */ + diff --git a/kernel-bfs-2.6.28/debian/patches/mm-enable_swaptoken_only_when_swap_full.patch b/kernel-bfs-2.6.28/debian/patches/mm-enable_swaptoken_only_when_swap_full.patch new file mode 100644 index 0000000..ab73900 --- /dev/null +++ b/kernel-bfs-2.6.28/debian/patches/mm-enable_swaptoken_only_when_swap_full.patch @@ -0,0 +1,26 @@ +The swap token is only useful in conditions of swap thrash, and actually +worsens the common case by causing more swapping. Make it only have an effect +when swap is more than half full. + +-ck + +--- + include/linux/swap.h | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +Index: linux-2.6.39-ck1/include/linux/swap.h +=================================================================== +--- linux-2.6.39-ck1.orig/include/linux/swap.h 2011-05-19 16:29:16.642275387 +1000 ++++ linux-2.6.39-ck1/include/linux/swap.h 2011-05-19 19:36:35.917273667 +1000 +@@ -358,9 +358,10 @@ extern struct mm_struct *swap_token_mm; + extern void grab_swap_token(struct mm_struct *); + extern void __put_swap_token(struct mm_struct *); + ++/* Only allow swap token to have effect if swap is full */ + static inline int has_swap_token(struct mm_struct *mm) + { +- return (mm == swap_token_mm); ++ return (mm == swap_token_mm && vm_swap_full()); + } + + static inline void put_swap_token(struct mm_struct *mm) diff --git a/kernel-bfs-2.6.28/debian/patches/mm-idleprio_prio-1.patch b/kernel-bfs-2.6.28/debian/patches/mm-idleprio_prio-1.patch new file mode 100644 index 0000000..e80dad7 --- /dev/null +++ b/kernel-bfs-2.6.28/debian/patches/mm-idleprio_prio-1.patch @@ -0,0 +1,38 @@ +Set the effective priority of idleprio tasks to that of nice 19 tasks when +modifying vm reclaim behaviour. + +Signed-off-by: Con Kolivas + + include/linux/sched.h | 2 +- + mm/vmscan.c | 2 ++ + 2 files changed, 3 insertions(+), 1 deletion(-) + +Index: linux-2.6.34-ck1/mm/vmscan.c +=================================================================== +--- linux-2.6.34-ck1.orig/mm/vmscan.c 2010-05-18 12:26:16.942194964 +1000 ++++ linux-2.6.34-ck1/mm/vmscan.c 2010-05-18 12:26:17.090444482 +1000 +@@ -1711,6 +1711,8 @@ static inline int effective_sc_prio(stru + if (likely(p->mm)) { + if (rt_task(p)) + return -20; ++ if (p->policy == SCHED_IDLEPRIO) ++ return 19; + return task_nice(p); + } + return 0; +Index: linux-2.6.34-ck1/include/linux/sched.h +=================================================================== +--- linux-2.6.34-ck1.orig/include/linux/sched.h 2010-05-18 12:26:16.086194917 +1000 ++++ linux-2.6.34-ck1/include/linux/sched.h 2010-05-18 12:26:17.091445870 +1000 +@@ -38,9 +38,9 @@ + #define SCHED_BATCH 3 + /* SCHED_ISO: Implemented on BFS only */ + #define SCHED_IDLE 5 ++#define SCHED_IDLEPRIO SCHED_IDLE + #ifdef CONFIG_SCHED_BFS + #define SCHED_ISO 4 +-#define SCHED_IDLEPRIO SCHED_IDLE + #define SCHED_MAX (SCHED_IDLEPRIO) + #define SCHED_RANGE(policy) ((policy) <= SCHED_MAX) + #endif + diff --git a/kernel-bfs-2.6.28/debian/patches/mm-kswapd_inherit_prio-1.patch b/kernel-bfs-2.6.28/debian/patches/mm-kswapd_inherit_prio-1.patch new file mode 100644 index 0000000..0467603 --- /dev/null +++ b/kernel-bfs-2.6.28/debian/patches/mm-kswapd_inherit_prio-1.patch @@ -0,0 +1,75 @@ +When kswapd is awoken due to reclaim by a running task, set the priority of +kswapd to that of the calling task thus making memory reclaim cpu activity +affected by nice level. + +Signed-off-by: Con Kolivas + + mm/vmscan.c | 33 ++++++++++++++++++++++++++++++++- + 1 file changed, 32 insertions(+), 1 deletion(-) + +Index: linux-2.6.34-ck1/mm/vmscan.c +=================================================================== +--- linux-2.6.34-ck1.orig/mm/vmscan.c 2010-05-18 12:26:16.647319427 +1000 ++++ linux-2.6.34-ck1/mm/vmscan.c 2010-05-18 12:26:16.805569620 +1000 +@@ -1697,6 +1697,33 @@ static void shrink_zone(int priority, st + } + + /* ++ * Helper functions to adjust nice level of kswapd, based on the priority of ++ * the task (p) that called it. If it is already higher priority we do not ++ * demote its nice level since it is still working on behalf of a higher ++ * priority task. With kernel threads we leave it at nice 0. ++ * ++ * We don't ever run kswapd real time, so if a real time task calls kswapd we ++ * set it to highest SCHED_NORMAL priority. ++ */ ++static inline int effective_sc_prio(struct task_struct *p) ++{ ++ if (likely(p->mm)) { ++ if (rt_task(p)) ++ return -20; ++ return task_nice(p); ++ } ++ return 0; ++} ++ ++static void set_kswapd_nice(struct task_struct *kswapd, int active) ++{ ++ long nice = effective_sc_prio(current); ++ ++ if (task_nice(kswapd) > nice || !active) ++ set_user_nice(kswapd, nice); ++} ++ ++/* + * This is the direct reclaim path, for page-allocating processes. We only + * try to reclaim pages from zones which will satisfy the caller's allocation + * request. +@@ -2294,6 +2321,7 @@ static int kswapd(void *p) + } + } + ++ set_user_nice(tsk, 0); + order = pgdat->kswapd_max_order; + } + finish_wait(&pgdat->kswapd_wait, &wait); +@@ -2318,6 +2346,7 @@ static int kswapd(void *p) + void wakeup_kswapd(struct zone *zone, int order) + { + pg_data_t *pgdat; ++ int active; + + if (!populated_zone(zone)) + return; +@@ -2329,7 +2358,9 @@ void wakeup_kswapd(struct zone *zone, in + pgdat->kswapd_max_order = order; + if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) + return; +- if (!waitqueue_active(&pgdat->kswapd_wait)) ++ active = waitqueue_active(&pgdat->kswapd_wait); ++ set_kswapd_nice(pgdat->kswapd, active); ++ if (!active) + return; + wake_up_interruptible(&pgdat->kswapd_wait); + } + diff --git a/kernel-bfs-2.6.28/debian/patches/mm-make_swappiness_really_mean_it.patch b/kernel-bfs-2.6.28/debian/patches/mm-make_swappiness_really_mean_it.patch new file mode 100644 index 0000000..16a2ac0 --- /dev/null +++ b/kernel-bfs-2.6.28/debian/patches/mm-make_swappiness_really_mean_it.patch @@ -0,0 +1,35 @@ +Swappiness the tunable lies. It doesn't respect swappiness because it alters +the value when we're more than lightly loaded in the vm. Change it to -really- +mean swappiness unless we're about to go out of memory. + +-ck +--- + mm/vmscan.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +Index: linux-2.6.34-ck1/mm/vmscan.c +=================================================================== +--- linux-2.6.34-ck1.orig/mm/vmscan.c 2010-05-18 12:24:33.974319780 +1000 ++++ linux-2.6.34-ck1/mm/vmscan.c 2010-05-18 12:26:16.233444880 +1000 +@@ -1633,6 +1633,7 @@ static void shrink_zone(int priority, st + unsigned long nr_reclaimed = 0; + unsigned long percent[2]; /* anon @ 0; file @ 1 */ + enum lru_list l; ++ int tmp_priority; + + get_scan_ratio(zone, sc, percent); + +@@ -1648,7 +1649,11 @@ static void shrink_zone(int priority, st + + scan = zone_page_state(zone, NR_LRU_BASE + l); + if (priority) { +- scan >>= priority; ++ tmp_priority = priority; ++ ++ if (file && priority > 0) ++ tmp_priority = DEF_PRIORITY; ++ scan >>= tmp_priority; + scan = (scan * percent[file]) / 100; + } + zone->lru[l].nr_scan += scan; + diff --git a/kernel-bfs-2.6.28/debian/patches/sched-add-above-background-load-function.patch b/kernel-bfs-2.6.28/debian/patches/sched-add-above-background-load-function.patch new file mode 100644 index 0000000..5163565 --- /dev/null +++ b/kernel-bfs-2.6.28/debian/patches/sched-add-above-background-load-function.patch @@ -0,0 +1,66 @@ +Add an "above background load" function which can be used for background +tasks elsewhere (e.g. VM). + +-ck +--- + include/linux/sched.h | 7 +++++++ + kernel/sched_bfs.c | 20 ++++++++++++++++++++ + 2 files changed, 27 insertions(+) + +Index: linux-2.6.39-ck1/include/linux/sched.h +=================================================================== +--- linux-2.6.39-ck1.orig/include/linux/sched.h 2011-05-19 19:36:35.115273667 +1000 ++++ linux-2.6.39-ck1/include/linux/sched.h 2011-05-19 19:36:35.551273667 +1000 +@@ -1590,6 +1590,7 @@ static inline int iso_task(struct task_s + { + return (p->policy == SCHED_ISO); + } ++extern int above_background_load(void); + #else /* CFS */ + extern int runqueue_is_locked(int cpu); + extern void task_rq_unlock_wait(struct task_struct *p); +@@ -1620,6 +1621,12 @@ static inline int iso_task(struct task_s + { + return 0; + } ++ ++/* Anyone feel like implementing this? */ ++static inline int above_background_load(void) ++{ ++ return 1; ++} + #endif + + /* +Index: linux-2.6.39-ck1/kernel/sched_bfs.c +=================================================================== +--- linux-2.6.39-ck1.orig/kernel/sched_bfs.c 2011-05-19 19:36:35.121273667 +1000 ++++ linux-2.6.39-ck1/kernel/sched_bfs.c 2011-05-19 19:36:35.553273667 +1000 +@@ -563,6 +563,26 @@ static inline void __task_grq_unlock(voi + grq_unlock(); + } + ++/* ++ * Look for any tasks *anywhere* that are running nice 0 or better. We do ++ * this lockless for overhead reasons since the occasional wrong result ++ * is harmless. ++ */ ++int above_background_load(void) ++{ ++ struct task_struct *cpu_curr; ++ unsigned long cpu; ++ ++ for_each_online_cpu(cpu) { ++ cpu_curr = cpu_rq(cpu)->curr; ++ if (unlikely(!cpu_curr)) ++ continue; ++ if (PRIO_TO_NICE(cpu_curr->static_prio) < 1) ++ return 1; ++ } ++ return 0; ++} ++ + #ifndef __ARCH_WANT_UNLOCKED_CTXSW + static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) + { + diff --git a/kernel-bfs-2.6.28/debian/patches/series b/kernel-bfs-2.6.28/debian/patches/series index e349195..b1faea9 100644 --- a/kernel-bfs-2.6.28/debian/patches/series +++ b/kernel-bfs-2.6.28/debian/patches/series @@ -34,6 +34,13 @@ bfs-363-to-400.patch bfs-400-to-401.patch bfs401-penalise_fork_depth_account_threads.patch bfs-401-to-404.patch +sched-add-above-background-load-function.patch +mm-make_swappiness_really_mean_it.patch +mm-enable_swaptoken_only_when_swap_full.patch +mm-drop_swap_cache_aggressively.patch +mm-kswapd_inherit_prio-1.patch +mm-idleprio_prio-1.patch +cpufreq-bfs_tweaks.patch voltage_scaling_1.diff voltage_scaling_0.diff arm-proc-v7.diff