+Don't unnecessarily preempt for a task on the wrong CPU.
+
+Cope with worker threads trying to wake themselves up due to shifting CPUs on
+suspend by reactivating it, instead of hitting the BUG_ON
+
+Wrap timer jiffies at 10 seconds instead of 5 minutes since 32 bit load
+averages don't work until the first timer wrap.
+
+Remove the last_task logic as it wasn't providing any significant performance
+advantage.
+
+Change the locality logic to try to reschedule on the exact same logical core
+instead of assuming scheduling on a sibling core or sibling thread is
+equivalent. This allows CPUs with a "turbo" mode (such as i7) to use that more
+often by using one CPU more than spreading out, and allows ondemand cpu
+frequency scaling to ramp up more easily when a task stays on the same CPU. It
+increases throughput on threaded CPUs when lightly loaded, and may offer both
+performance and power saving advantages on all SMP topologies with cpu
+frequency scaling.
+
+-ck
+
+---
+ include/linux/jiffies.h | 2 -
+ include/linux/sched.h | 2 -
+ kernel/sched_bfs.c | 89 ++++++++++++++++++++++--------------------------
+ 3 files changed, 43 insertions(+), 50 deletions(-)
+
+Index: linux-2.6.28/include/linux/jiffies.h
+===================================================================
+--- linux-2.6.28.orig/include/linux/jiffies.h 2010-12-14 22:13:10.975304692 +1100
++++ linux-2.6.28/include/linux/jiffies.h 2010-12-14 22:14:03.530569735 +1100
+@@ -154,7 +154,7 @@ static inline u64 get_jiffies_64(void)
+ * Have the 32 bit jiffies value wrap 5 minutes after boot
+ * so jiffies wrap bugs show up earlier.
+ */
+-#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ))
++#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-10*HZ))
+
+ /*
+ * Change timeval to jiffies, trying to avoid the
+Index: linux-2.6.28/include/linux/sched.h
+===================================================================
+--- linux-2.6.28.orig/include/linux/sched.h 2010-12-14 22:13:10.965304640 +1100
++++ linux-2.6.28/include/linux/sched.h 2010-12-14 22:14:03.524569704 +1100
+@@ -1426,7 +1426,7 @@ static inline void tsk_cpus_current(stru
+
+ static inline void print_scheduler_version(void)
+ {
+- printk(KERN_INFO"BFS CPU scheduler v0.357 by Con Kolivas.\n");
++ printk(KERN_INFO"BFS CPU scheduler v0.360 by Con Kolivas.\n");
+ }
+
+ static inline int iso_task(struct task_struct *p)
+Index: linux-2.6.28/kernel/sched_bfs.c
+===================================================================
+--- linux-2.6.28.orig/kernel/sched_bfs.c 2010-12-14 22:13:10.983304734 +1100
++++ linux-2.6.28/kernel/sched_bfs.c 2010-12-14 22:14:54.061814177 +1100
+@@ -204,7 +204,6 @@ struct rq {
+ #ifdef CONFIG_NO_HZ
+ unsigned char in_nohz_recently;
+ #endif
+- struct task_struct *last_task;
+ #endif
+
+ struct task_struct *curr, *idle;
+@@ -733,19 +732,12 @@ static int suitable_idle_cpus(struct tas
+
+ static void resched_task(struct task_struct *p);
+
+-/*
+- * last_task stores the last non-idle task scheduled on the local rq for
+- * cache warmth testing.
+- */
+-static inline void set_last_task(struct rq *rq, struct task_struct *p)
+-{
+- rq->last_task = p;
+-}
+-
+-#define CPUIDLE_CACHE_BUSY (1)
+-#define CPUIDLE_DIFF_CPU (2)
+-#define CPUIDLE_THREAD_BUSY (4)
+-#define CPUIDLE_DIFF_NODE (8)
++#define CPUIDLE_DIFF_THREAD (1)
++#define CPUIDLE_DIFF_CORE (2)
++#define CPUIDLE_CACHE_BUSY (4)
++#define CPUIDLE_DIFF_CPU (8)
++#define CPUIDLE_THREAD_BUSY (16)
++#define CPUIDLE_DIFF_NODE (32)
+
+ /*
+ * The best idle CPU is chosen according to the CPUIDLE ranking above where the
+@@ -798,27 +790,28 @@ static void resched_best_idle(struct tas
+ }
+ tmp_rq = cpu_rq(cpu_tmp);
+
+- if (rq->cpu_locality[cpu_tmp]) {
+- /* Check rq->last_task hasn't been dereferenced */
+- if (rq->last_task && p != rq->last_task) {
+ #ifdef CONFIG_NUMA
+- if (rq->cpu_locality[cpu_tmp] > 1)
+- ranking |= CPUIDLE_DIFF_NODE;
++ if (rq->cpu_locality[cpu_tmp] > 3)
++ ranking |= CPUIDLE_DIFF_NODE;
++ else
+ #endif
+- ranking |= CPUIDLE_DIFF_CPU;
+- }
+- }
++ if (rq->cpu_locality[cpu_tmp] > 2)
++ ranking |= CPUIDLE_DIFF_CPU;
+ #ifdef CONFIG_SCHED_MC
++ if (rq->cpu_locality[cpu_tmp] == 2)
++ ranking |= CPUIDLE_DIFF_CORE;
+ if (!(tmp_rq->cache_idle(cpu_tmp)))
+ ranking |= CPUIDLE_CACHE_BUSY;
+ #endif
+ #ifdef CONFIG_SCHED_SMT
++ if (rq->cpu_locality[cpu_tmp] == 1)
++ ranking |= CPUIDLE_DIFF_THREAD;
+ if (!(tmp_rq->siblings_idle(cpu_tmp)))
+ ranking |= CPUIDLE_THREAD_BUSY;
+ #endif
+ if (ranking < best_ranking) {
+ best_cpu = cpu_tmp;
+- if (ranking <= 1)
++ if (ranking == 0)
+ break;
+ best_ranking = ranking;
+ }
+@@ -835,11 +828,11 @@ static inline void resched_suitable_idle
+
+ /*
+ * The cpu cache locality difference between CPUs is used to determine how far
+- * to offset the virtual deadline. "One" difference in locality means that one
++ * to offset the virtual deadline. <2 difference in locality means that one
+ * timeslice difference is allowed longer for the cpu local tasks. This is
+ * enough in the common case when tasks are up to 2* number of CPUs to keep
+ * tasks within their shared cache CPUs only. CPUs on different nodes or not
+- * even in this domain (NUMA) have "3" difference, allowing 4 times longer
++ * even in this domain (NUMA) have "4" difference, allowing 4 times longer
+ * deadlines before being taken onto another cpu, allowing for 2* the double
+ * seen by separate CPUs above.
+ * Simple summary: Virtual deadlines are equal on shared cache CPUs, double
+@@ -848,12 +841,11 @@ static inline void resched_suitable_idle
+ static inline int
+ cache_distance(struct rq *task_rq, struct rq *rq, struct task_struct *p)
+ {
+- /* Check rq->last_task hasn't been dereferenced */
+- if (likely(rq->last_task)) {
+- if (rq->last_task == p)
+- return 0;
+- }
+- return rq->cpu_locality[cpu_of(task_rq)] * task_timeslice(p);
++ int locality = rq->cpu_locality[cpu_of(task_rq)] - 2;
++
++ if (locality > 0)
++ return task_timeslice(p) << locality;
++ return 0;
+ }
+ #else /* CONFIG_SMP */
+ static inline void inc_qnr(void)
+@@ -892,10 +884,6 @@ cache_distance(struct rq *task_rq, struc
+ {
+ return 0;
+ }
+-
+-static inline void set_last_task(struct rq *rq, struct task_struct *p)
+-{
+-}
+ #endif /* CONFIG_SMP */
+
+ /*
+@@ -1287,10 +1275,10 @@ static void try_preempt(struct task_stru
+ return;
+ }
+
+- if (online_cpus(p))
++ if (likely(online_cpus(p)))
+ cpus_and(tmp, cpu_online_map, p->cpus_allowed);
+ else
+- (cpumask_copy(&tmp, &cpu_online_map));
++ return;
+
+ latest_deadline = 0;
+ highest_prio = -1;
+@@ -2597,7 +2585,7 @@ need_resched_nonpreemptible:
+ prev->last_ran = rq->clock;
+
+ /* Task changed affinity off this CPU */
+- if (needs_other_cpu(prev, cpu))
++ if (unlikely(!cpu_isset(cpu, prev->cpus_allowed)))
+ resched_suitable_idle(prev);
+ else if (!deactivate) {
+ if (!queued_notrunning()) {
+@@ -2639,8 +2627,6 @@ need_resched_nonpreemptible:
+ if (likely(prev != next)) {
+ sched_info_switch(prev, next);
+
+- if (prev != idle)
+- set_last_task(rq, prev);
+ set_rq_task(rq, next);
+ grq.nr_switches++;
+ prev->oncpu = 0;
+@@ -6054,10 +6040,12 @@ void __init sched_init_smp(void)
+ cpu_set(other_cpu, rq->cache_siblings);
+ }
+ #endif
+- if (sd->level <= SD_LV_MC)
+- locality = 0;
+- else if (sd->level <= SD_LV_NODE)
++ if (sd->level <= SD_LV_SIBLING)
+ locality = 1;
++ else if (sd->level <= SD_LV_MC)
++ locality = 2;
++ else if (sd->level <= SD_LV_NODE)
++ locality = 3;
+ else
+ continue;
+
+@@ -6160,7 +6148,7 @@ void __init sched_init(void)
+ if (i == j)
+ rq->cpu_locality[j] = 0;
+ else
+- rq->cpu_locality[j] = 3;
++ rq->cpu_locality[j] = 4;
+ }
+ }
+ #endif