1 Don't unnecessarily preempt for a task on the wrong CPU.
3 Cope with worker threads trying to wake themselves up due to shifting CPUs on
4 suspend by reactivating it, instead of hitting the BUG_ON
6 Wrap timer jiffies at 10 seconds instead of 5 minutes since 32 bit load
7 averages don't work until the first timer wrap.
9 Remove the last_task logic as it wasn't providing any significant performance
12 Change the locality logic to try to reschedule on the exact same logical core
13 instead of assuming scheduling on a sibling core or sibling thread is
14 equivalent. This allows CPUs with a "turbo" mode (such as i7) to use that more
15 often by using one CPU more than spreading out, and allows ondemand cpu
16 frequency scaling to ramp up more easily when a task stays on the same CPU. It
17 increases throughput on threaded CPUs when lightly loaded, and may offer both
18 performance and power saving advantages on all SMP topologies with cpu
24 include/linux/jiffies.h | 2 -
25 include/linux/sched.h | 2 -
26 kernel/sched_bfs.c | 89 ++++++++++++++++++++++--------------------------
27 3 files changed, 43 insertions(+), 50 deletions(-)
29 Index: linux-2.6.28/include/linux/jiffies.h
30 ===================================================================
31 --- linux-2.6.28.orig/include/linux/jiffies.h 2010-12-14 22:13:10.975304692 +1100
32 +++ linux-2.6.28/include/linux/jiffies.h 2010-12-14 22:14:03.530569735 +1100
33 @@ -154,7 +154,7 @@ static inline u64 get_jiffies_64(void)
34 * Have the 32 bit jiffies value wrap 5 minutes after boot
35 * so jiffies wrap bugs show up earlier.
37 -#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ))
38 +#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-10*HZ))
41 * Change timeval to jiffies, trying to avoid the
42 Index: linux-2.6.28/include/linux/sched.h
43 ===================================================================
44 --- linux-2.6.28.orig/include/linux/sched.h 2010-12-14 22:13:10.965304640 +1100
45 +++ linux-2.6.28/include/linux/sched.h 2010-12-14 22:14:03.524569704 +1100
46 @@ -1426,7 +1426,7 @@ static inline void tsk_cpus_current(stru
48 static inline void print_scheduler_version(void)
50 - printk(KERN_INFO"BFS CPU scheduler v0.357 by Con Kolivas.\n");
51 + printk(KERN_INFO"BFS CPU scheduler v0.360 by Con Kolivas.\n");
54 static inline int iso_task(struct task_struct *p)
55 Index: linux-2.6.28/kernel/sched_bfs.c
56 ===================================================================
57 --- linux-2.6.28.orig/kernel/sched_bfs.c 2010-12-14 22:13:10.983304734 +1100
58 +++ linux-2.6.28/kernel/sched_bfs.c 2010-12-14 22:14:54.061814177 +1100
59 @@ -204,7 +204,6 @@ struct rq {
61 unsigned char in_nohz_recently;
63 - struct task_struct *last_task;
66 struct task_struct *curr, *idle;
67 @@ -733,19 +732,12 @@ static int suitable_idle_cpus(struct tas
69 static void resched_task(struct task_struct *p);
72 - * last_task stores the last non-idle task scheduled on the local rq for
73 - * cache warmth testing.
75 -static inline void set_last_task(struct rq *rq, struct task_struct *p)
80 -#define CPUIDLE_CACHE_BUSY (1)
81 -#define CPUIDLE_DIFF_CPU (2)
82 -#define CPUIDLE_THREAD_BUSY (4)
83 -#define CPUIDLE_DIFF_NODE (8)
84 +#define CPUIDLE_DIFF_THREAD (1)
85 +#define CPUIDLE_DIFF_CORE (2)
86 +#define CPUIDLE_CACHE_BUSY (4)
87 +#define CPUIDLE_DIFF_CPU (8)
88 +#define CPUIDLE_THREAD_BUSY (16)
89 +#define CPUIDLE_DIFF_NODE (32)
92 * The best idle CPU is chosen according to the CPUIDLE ranking above where the
93 @@ -798,27 +790,28 @@ static void resched_best_idle(struct tas
95 tmp_rq = cpu_rq(cpu_tmp);
97 - if (rq->cpu_locality[cpu_tmp]) {
98 - /* Check rq->last_task hasn't been dereferenced */
99 - if (rq->last_task && p != rq->last_task) {
101 - if (rq->cpu_locality[cpu_tmp] > 1)
102 - ranking |= CPUIDLE_DIFF_NODE;
103 + if (rq->cpu_locality[cpu_tmp] > 3)
104 + ranking |= CPUIDLE_DIFF_NODE;
107 - ranking |= CPUIDLE_DIFF_CPU;
110 + if (rq->cpu_locality[cpu_tmp] > 2)
111 + ranking |= CPUIDLE_DIFF_CPU;
112 #ifdef CONFIG_SCHED_MC
113 + if (rq->cpu_locality[cpu_tmp] == 2)
114 + ranking |= CPUIDLE_DIFF_CORE;
115 if (!(tmp_rq->cache_idle(cpu_tmp)))
116 ranking |= CPUIDLE_CACHE_BUSY;
118 #ifdef CONFIG_SCHED_SMT
119 + if (rq->cpu_locality[cpu_tmp] == 1)
120 + ranking |= CPUIDLE_DIFF_THREAD;
121 if (!(tmp_rq->siblings_idle(cpu_tmp)))
122 ranking |= CPUIDLE_THREAD_BUSY;
124 if (ranking < best_ranking) {
129 best_ranking = ranking;
131 @@ -835,11 +828,11 @@ static inline void resched_suitable_idle
134 * The cpu cache locality difference between CPUs is used to determine how far
135 - * to offset the virtual deadline. "One" difference in locality means that one
136 + * to offset the virtual deadline. <2 difference in locality means that one
137 * timeslice difference is allowed longer for the cpu local tasks. This is
138 * enough in the common case when tasks are up to 2* number of CPUs to keep
139 * tasks within their shared cache CPUs only. CPUs on different nodes or not
140 - * even in this domain (NUMA) have "3" difference, allowing 4 times longer
141 + * even in this domain (NUMA) have "4" difference, allowing 4 times longer
142 * deadlines before being taken onto another cpu, allowing for 2* the double
143 * seen by separate CPUs above.
144 * Simple summary: Virtual deadlines are equal on shared cache CPUs, double
145 @@ -848,12 +841,11 @@ static inline void resched_suitable_idle
147 cache_distance(struct rq *task_rq, struct rq *rq, struct task_struct *p)
149 - /* Check rq->last_task hasn't been dereferenced */
150 - if (likely(rq->last_task)) {
151 - if (rq->last_task == p)
154 - return rq->cpu_locality[cpu_of(task_rq)] * task_timeslice(p);
155 + int locality = rq->cpu_locality[cpu_of(task_rq)] - 2;
158 + return task_timeslice(p) << locality;
161 #else /* CONFIG_SMP */
162 static inline void inc_qnr(void)
163 @@ -892,10 +884,6 @@ cache_distance(struct rq *task_rq, struc
168 -static inline void set_last_task(struct rq *rq, struct task_struct *p)
171 #endif /* CONFIG_SMP */
174 @@ -1287,10 +1275,10 @@ static void try_preempt(struct task_stru
178 - if (online_cpus(p))
179 + if (likely(online_cpus(p)))
180 cpus_and(tmp, cpu_online_map, p->cpus_allowed);
182 - (cpumask_copy(&tmp, &cpu_online_map));
187 @@ -2597,7 +2585,7 @@ need_resched_nonpreemptible:
188 prev->last_ran = rq->clock;
190 /* Task changed affinity off this CPU */
191 - if (needs_other_cpu(prev, cpu))
192 + if (unlikely(!cpu_isset(cpu, prev->cpus_allowed)))
193 resched_suitable_idle(prev);
194 else if (!deactivate) {
195 if (!queued_notrunning()) {
196 @@ -2639,8 +2627,6 @@ need_resched_nonpreemptible:
197 if (likely(prev != next)) {
198 sched_info_switch(prev, next);
201 - set_last_task(rq, prev);
202 set_rq_task(rq, next);
205 @@ -6054,10 +6040,12 @@ void __init sched_init_smp(void)
206 cpu_set(other_cpu, rq->cache_siblings);
209 - if (sd->level <= SD_LV_MC)
211 - else if (sd->level <= SD_LV_NODE)
212 + if (sd->level <= SD_LV_SIBLING)
214 + else if (sd->level <= SD_LV_MC)
216 + else if (sd->level <= SD_LV_NODE)
221 @@ -6160,7 +6148,7 @@ void __init sched_init(void)
223 rq->cpu_locality[j] = 0;
225 - rq->cpu_locality[j] = 3;
226 + rq->cpu_locality[j] = 4;