From e7fdb862157be305c57e01db39690c0edb9d1752 Mon Sep 17 00:00:00 2001 From: Peter Hunt Date: Thu, 26 May 2011 10:23:48 +0000 Subject: [PATCH] A couple more patches from CK for BFS --- .../debian/patches/hz-raise_max.patch | 180 ++++++++++++++ .../debian/patches/mm-lru_cache_add_lru_tail.patch | 254 ++++++++++++++++++++ kernel-bfs-2.6.28/debian/patches/series | 2 + 3 files changed, 436 insertions(+) create mode 100644 kernel-bfs-2.6.28/debian/patches/hz-raise_max.patch create mode 100644 kernel-bfs-2.6.28/debian/patches/mm-lru_cache_add_lru_tail.patch diff --git a/kernel-bfs-2.6.28/debian/patches/hz-raise_max.patch b/kernel-bfs-2.6.28/debian/patches/hz-raise_max.patch new file mode 100644 index 0000000..a52c8b3 --- /dev/null +++ b/kernel-bfs-2.6.28/debian/patches/hz-raise_max.patch @@ -0,0 +1,180 @@ +There's some really badly broken software out there that is entirely +dependant on HZ for its maximum performance. Raise the maximum HZ value +to some higher and slightly unreasonable values up to some higher and +completely obscene values. + +Signed-off-by: Con Kolivas + +--- + arch/x86/kernel/cpu/proc.c | 2 - + arch/x86/kernel/smpboot.c | 2 - + include/linux/nfsd/stats.h | 4 +- + include/net/inet_timewait_sock.h | 10 ++++-- + init/calibrate.c | 2 - + kernel/Kconfig.hz | 64 +++++++++++++++++++++++++++++++++++++++ + 6 files changed, 76 insertions(+), 8 deletions(-) + +Index: linux-2.6.32-ck1/kernel/Kconfig.hz +=================================================================== +--- linux-2.6.32-ck1.orig/kernel/Kconfig.hz 2009-12-10 23:00:22.485001550 +1100 ++++ linux-2.6.32-ck1/kernel/Kconfig.hz 2009-12-10 23:00:38.850376050 +1100 +@@ -48,6 +48,63 @@ choice + can also benefit from this choice without sacrificing battery life + if dynticks is also enabled. + ++ config HZ_1500 ++ bool "1500 HZ" ++ help ++ 1500 Hz is an insane value to use to run broken software that is Hz ++ limited. ++ ++ Being over 1000, driver breakage is likely. ++ ++ config HZ_2000 ++ bool "2000 HZ" ++ help ++ 2000 Hz is an insane value to use to run broken software that is Hz ++ limited. ++ ++ Being over 1000, driver breakage is likely. ++ ++ config HZ_3000 ++ bool "3000 HZ" ++ help ++ 3000 Hz is an insane value to use to run broken software that is Hz ++ limited. ++ ++ Being over 1000, driver breakage is likely. ++ ++ config HZ_4000 ++ bool "4000 HZ" ++ help ++ 4000 Hz is an insane value to use to run broken software that is Hz ++ limited. ++ ++ Being over 1000, driver breakage is likely. ++ ++ config HZ_5000 ++ bool "5000 HZ" ++ help ++ 5000 Hz is an obscene value to use to run broken software that is Hz ++ limited. ++ ++ Being over 1000, driver breakage is likely. ++ ++ config HZ_7500 ++ bool "7500 HZ" ++ help ++ 7500 Hz is an obscene value to use to run broken software that is Hz ++ limited. ++ ++ Being over 1000, driver breakage is likely. ++ ++ config HZ_10000 ++ bool "10000 HZ" ++ help ++ 10000 Hz is an obscene value to use to run broken software that is Hz ++ limited. ++ ++ Being over 1000, driver breakage is likely. ++ ++ + endchoice + + config HZ +@@ -56,6 +113,13 @@ config HZ + default 250 if HZ_250_NODEFAULT + default 300 if HZ_300 + default 1000 if HZ_1000 ++ default 1500 if HZ_1500 ++ default 2000 if HZ_2000 ++ default 3000 if HZ_3000 ++ default 4000 if HZ_4000 ++ default 5000 if HZ_5000 ++ default 7500 if HZ_7500 ++ default 10000 if HZ_10000 + + config SCHED_HRTICK + def_bool HIGH_RES_TIMERS && (!SMP || USE_GENERIC_SMP_HELPERS) +Index: linux-2.6.32-ck1/include/net/inet_timewait_sock.h +=================================================================== +--- linux-2.6.32-ck1.orig/include/net/inet_timewait_sock.h 2009-12-10 20:16:33.291376025 +1100 ++++ linux-2.6.32-ck1/include/net/inet_timewait_sock.h 2009-12-10 23:00:38.851376102 +1100 +@@ -39,8 +39,8 @@ struct inet_hashinfo; + * If time > 4sec, it is "slow" path, no recycling is required, + * so that we select tick to get range about 4 seconds. + */ +-#if HZ <= 16 || HZ > 4096 +-# error Unsupported: HZ <= 16 or HZ > 4096 ++#if HZ <= 16 || HZ > 16384 ++# error Unsupported: HZ <= 16 or HZ > 16384 + #elif HZ <= 32 + # define INET_TWDR_RECYCLE_TICK (5 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) + #elif HZ <= 64 +@@ -55,8 +55,12 @@ struct inet_hashinfo; + # define INET_TWDR_RECYCLE_TICK (10 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) + #elif HZ <= 2048 + # define INET_TWDR_RECYCLE_TICK (11 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) +-#else ++#elif HZ <= 4096 + # define INET_TWDR_RECYCLE_TICK (12 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) ++#elif HZ <= 8192 ++# define INET_TWDR_RECYCLE_TICK (13 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) ++#else ++# define INET_TWDR_RECYCLE_TICK (14 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) + #endif + + /* TIME_WAIT reaping mechanism. */ +Index: linux-2.6.32-ck1/init/calibrate.c +=================================================================== +--- linux-2.6.32-ck1.orig/init/calibrate.c 2009-12-10 20:16:33.332001658 +1100 ++++ linux-2.6.32-ck1/init/calibrate.c 2009-12-10 23:00:38.851376102 +1100 +@@ -172,5 +172,5 @@ void __cpuinit calibrate_delay(void) + } + printk(KERN_CONT "%lu.%02lu BogoMIPS (lpj=%lu)\n", + loops_per_jiffy/(500000/HZ), +- (loops_per_jiffy/(5000/HZ)) % 100, loops_per_jiffy); ++ (loops_per_jiffy * 10 /(50000/HZ)) % 100, loops_per_jiffy); + } +Index: linux-2.6.32-ck1/arch/x86/kernel/cpu/proc.c +=================================================================== +--- linux-2.6.32-ck1.orig/arch/x86/kernel/cpu/proc.c 2009-12-10 20:16:33.301376557 +1100 ++++ linux-2.6.32-ck1/arch/x86/kernel/cpu/proc.c 2009-12-10 23:00:38.851376102 +1100 +@@ -109,7 +109,7 @@ static int show_cpuinfo(struct seq_file + + seq_printf(m, "\nbogomips\t: %lu.%02lu\n", + c->loops_per_jiffy/(500000/HZ), +- (c->loops_per_jiffy/(5000/HZ)) % 100); ++ (c->loops_per_jiffy * 10 /(50000/HZ)) % 100); + + #ifdef CONFIG_X86_64 + if (c->x86_tlbsize > 0) +Index: linux-2.6.32-ck1/arch/x86/kernel/smpboot.c +=================================================================== +--- linux-2.6.32-ck1.orig/arch/x86/kernel/smpboot.c 2009-12-10 20:16:33.312001306 +1100 ++++ linux-2.6.32-ck1/arch/x86/kernel/smpboot.c 2009-12-10 23:00:38.852376639 +1100 +@@ -457,7 +457,7 @@ static void impress_friends(void) + "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", + num_online_cpus(), + bogosum/(500000/HZ), +- (bogosum/(5000/HZ))%100); ++ (bogosum * 10/(50000/HZ))%100); + + pr_debug("Before bogocount - setting activated=1.\n"); + } +Index: linux-2.6.32-ck1/include/linux/nfsd/stats.h +=================================================================== +--- linux-2.6.32-ck1.orig/include/linux/nfsd/stats.h 2009-12-10 20:16:33.281376047 +1100 ++++ linux-2.6.32-ck1/include/linux/nfsd/stats.h 2009-12-10 23:00:38.853376827 +1100 +@@ -11,8 +11,8 @@ + + #include + +-/* thread usage wraps very million seconds (approx one fortnight) */ +-#define NFSD_USAGE_WRAP (HZ*1000000) ++/* thread usage wraps every one hundred thousand seconds (approx one day) */ ++#define NFSD_USAGE_WRAP (HZ*100000) + + #ifdef __KERNEL__ + + diff --git a/kernel-bfs-2.6.28/debian/patches/mm-lru_cache_add_lru_tail.patch b/kernel-bfs-2.6.28/debian/patches/mm-lru_cache_add_lru_tail.patch new file mode 100644 index 0000000..6509618 --- /dev/null +++ b/kernel-bfs-2.6.28/debian/patches/mm-lru_cache_add_lru_tail.patch @@ -0,0 +1,254 @@ +When reading from large files through the generic file read functions into +page cache we can detect when a file is so large that it is unlikely to be +fully cached in ram. If that happens we can put it on the tail end of the +inactive lru list so it can be the first thing evicted next time we need ram. + +Do lots of funny buggers with underscores to preserve all the existing APIs. + +-ck + +--- + include/linux/mm_inline.h | 14 ++++++++++-- + include/linux/swap.h | 5 ++-- + mm/filemap.c | 51 +++++++++++++++++++++++++++++++++++++++------- + mm/swap.c | 29 ++++++++++++++++++++------ + 4 files changed, 82 insertions(+), 17 deletions(-) + +Index: linux-2.6.32-ck1/include/linux/mm_inline.h +=================================================================== +--- linux-2.6.32-ck1.orig/include/linux/mm_inline.h 2009-12-10 20:47:13.927251742 +1100 ++++ linux-2.6.32-ck1/include/linux/mm_inline.h 2009-12-10 22:45:33.041376670 +1100 +@@ -20,13 +20,23 @@ static inline int page_is_file_cache(str + } + + static inline void +-add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l) ++__add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l, int tail) + { +- list_add(&page->lru, &zone->lru[l].list); ++ /* See if this should be added to the tail of this lru list */ ++ if (tail) ++ list_add_tail(&page->lru, &zone->lru[l].list); ++ else ++ list_add(&page->lru, &zone->lru[l].list); + __inc_zone_state(zone, NR_LRU_BASE + l); + } + + static inline void ++add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l) ++{ ++ __add_page_to_lru_list(zone, page, l, 0); ++} ++ ++static inline void + del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l) + { + list_del(&page->lru); +Index: linux-2.6.32-ck1/include/linux/swap.h +=================================================================== +--- linux-2.6.32-ck1.orig/include/linux/swap.h 2009-12-10 20:45:55.306251536 +1100 ++++ linux-2.6.32-ck1/include/linux/swap.h 2009-12-10 22:47:38.415251020 +1100 +@@ -198,6 +198,7 @@ extern unsigned int nr_free_pagecache_pa + + + /* linux/mm/swap.c */ ++extern void ____lru_cache_add(struct page *, enum lru_list lru, int tail); + extern void __lru_cache_add(struct page *, enum lru_list lru); + extern void lru_cache_add_lru(struct page *, enum lru_list lru); + extern void activate_page(struct page *); +@@ -223,9 +224,9 @@ static inline void lru_cache_add_active_ + __lru_cache_add(page, LRU_ACTIVE_ANON); + } + +-static inline void lru_cache_add_file(struct page *page) ++static inline void lru_cache_add_file(struct page *page, int tail) + { +- __lru_cache_add(page, LRU_INACTIVE_FILE); ++ ____lru_cache_add(page, LRU_INACTIVE_FILE, tail); + } + + static inline void lru_cache_add_active_file(struct page *page) +Index: linux-2.6.32-ck1/mm/filemap.c +=================================================================== +--- linux-2.6.32-ck1.orig/mm/filemap.c 2009-12-10 20:52:17.597126805 +1100 ++++ linux-2.6.32-ck1/mm/filemap.c 2009-12-10 22:41:11.812251151 +1100 +@@ -454,8 +454,8 @@ out: + } + EXPORT_SYMBOL(add_to_page_cache_locked); + +-int add_to_page_cache_lru(struct page *page, struct address_space *mapping, +- pgoff_t offset, gfp_t gfp_mask) ++int __add_to_page_cache_lru(struct page *page, struct address_space *mapping, ++ pgoff_t offset, gfp_t gfp_mask, int tail) + { + int ret; + +@@ -471,12 +471,19 @@ int add_to_page_cache_lru(struct page *p + ret = add_to_page_cache(page, mapping, offset, gfp_mask); + if (ret == 0) { + if (page_is_file_cache(page)) +- lru_cache_add_file(page); ++ lru_cache_add_file(page, tail); + else + lru_cache_add_active_anon(page); + } + return ret; + } ++ ++int add_to_page_cache_lru(struct page *page, struct address_space *mapping, ++ pgoff_t offset, gfp_t gfp_mask) ++{ ++ return __add_to_page_cache_lru(page, mapping, offset, gfp_mask, 0); ++} ++ + + #ifdef CONFIG_NUMA + struct page *__page_cache_alloc(gfp_t gfp) +@@ -970,6 +977,28 @@ static void shrink_readahead_size_eio(st + } + EXPORT_SYMBOL(find_get_pages); + ++static inline int nr_mapped(void) ++{ ++ return global_page_state(NR_FILE_MAPPED) + ++ global_page_state(NR_ANON_PAGES); ++} ++ ++/* ++ * This examines how large in pages a file size is and returns 1 if it is ++ * more than half the unmapped ram. Avoid doing read_page_state which is ++ * expensive unless we already know it is likely to be large enough. ++ */ ++static int large_isize(unsigned long nr_pages) ++{ ++ if (nr_pages * 6 > vm_total_pages) { ++ unsigned long unmapped_ram = vm_total_pages - nr_mapped(); ++ ++ if (nr_pages * 2 > unmapped_ram) ++ return 1; ++ } ++ return 0; ++} ++ + /** + * do_generic_file_read - generic file read routine + * @filp: the file to read +@@ -994,7 +1023,7 @@ static void do_generic_file_read(struct + pgoff_t prev_index; + unsigned long offset; /* offset into pagecache page */ + unsigned int prev_offset; +- int error; ++ int error, tail = 0; + + index = *ppos >> PAGE_CACHE_SHIFT; + prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT; +@@ -1005,7 +1034,7 @@ static void do_generic_file_read(struct + for (;;) { + struct page *page; + pgoff_t end_index; +- loff_t isize; ++ loff_t isize = 0; + unsigned long nr, ret; + + cond_resched(); +@@ -1170,8 +1199,16 @@ no_cached_page: + desc->error = -ENOMEM; + goto out; + } +- error = add_to_page_cache_lru(page, mapping, +- index, GFP_KERNEL); ++ /* ++ * If we know the file is large we add the pages read to the ++ * end of the lru as we're unlikely to be able to cache the ++ * whole file in ram so make those pages the first to be ++ * dropped if not referenced soon. ++ */ ++ if (large_isize(end_index)) ++ tail = 1; ++ error = __add_to_page_cache_lru(page, mapping, ++ index, GFP_KERNEL, tail); + if (error) { + page_cache_release(page); + if (error == -EEXIST) +Index: linux-2.6.32-ck1/mm/swap.c +=================================================================== +--- linux-2.6.32-ck1.orig/mm/swap.c 2009-12-10 20:45:55.320251262 +1100 ++++ linux-2.6.32-ck1/mm/swap.c 2009-12-10 22:53:10.138009481 +1100 +@@ -214,22 +214,29 @@ void mark_page_accessed(struct page *pag + + EXPORT_SYMBOL(mark_page_accessed); + +-void __lru_cache_add(struct page *page, enum lru_list lru) ++void ______pagevec_lru_add(struct pagevec *pvec, enum lru_list lru, int tail); ++ ++void ____lru_cache_add(struct page *page, enum lru_list lru, int tail) + { + struct pagevec *pvec = &get_cpu_var(lru_add_pvecs)[lru]; + + page_cache_get(page); + if (!pagevec_add(pvec, page)) +- ____pagevec_lru_add(pvec, lru); ++ ______pagevec_lru_add(pvec, lru, tail); + put_cpu_var(lru_add_pvecs); + } + ++void __lru_cache_add(struct page *page, enum lru_list lru) ++{ ++ ____lru_cache_add(page, lru, 0); ++} ++ + /** + * lru_cache_add_lru - add a page to a page list + * @page: the page to be added to the LRU. + * @lru: the LRU list to which the page is added. + */ +-void lru_cache_add_lru(struct page *page, enum lru_list lru) ++void __lru_cache_add_lru(struct page *page, enum lru_list lru, int tail) + { + if (PageActive(page)) { + VM_BUG_ON(PageUnevictable(page)); +@@ -240,7 +247,12 @@ void lru_cache_add_lru(struct page *page + } + + VM_BUG_ON(PageLRU(page) || PageActive(page) || PageUnevictable(page)); +- __lru_cache_add(page, lru); ++ ____lru_cache_add(page, lru, tail); ++} ++ ++void lru_cache_add_lru(struct page *page, enum lru_list lru) ++{ ++ __lru_cache_add_lru(page, lru, 0); + } + + /** +@@ -400,7 +412,7 @@ EXPORT_SYMBOL(__pagevec_release); + * Add the passed pages to the LRU, then drop the caller's refcount + * on them. Reinitialises the caller's pagevec. + */ +-void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru) ++void ______pagevec_lru_add(struct pagevec *pvec, enum lru_list lru, int tail) + { + int i; + struct zone *zone = NULL; +@@ -428,7 +440,7 @@ void ____pagevec_lru_add(struct pagevec + SetPageActive(page); + zone->recent_rotated[file]++; + } +- add_page_to_lru_list(zone, page, lru); ++ __add_page_to_lru_list(zone, page, lru, tail); + } + if (zone) + spin_unlock_irq(&zone->lru_lock); +@@ -436,6 +448,11 @@ void ____pagevec_lru_add(struct pagevec + pagevec_reinit(pvec); + } + ++void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru) ++{ ++ ______pagevec_lru_add(pvec, lru, 0); ++} ++ + EXPORT_SYMBOL(____pagevec_lru_add); + + /* + diff --git a/kernel-bfs-2.6.28/debian/patches/series b/kernel-bfs-2.6.28/debian/patches/series index e651c7b..bc219dd 100644 --- a/kernel-bfs-2.6.28/debian/patches/series +++ b/kernel-bfs-2.6.28/debian/patches/series @@ -40,6 +40,8 @@ mm-enable_swaptoken_only_when_swap_full.patch mm-drop_swap_cache_aggressively.patch mm-kswapd_inherit_prio-1.patch mm-idleprio_prio-1.patch +mm-lru_cache_add_lru_tail.patch +hz-raise_max.patch cpufreq-bfs_tweaks.patch voltage_scaling_1.diff voltage_scaling_0.diff -- 1.7.9.5