Changes to series and rx51_defconfig file for BFQ
[kernel-bfs] / kernel-bfs-2.6.28 / debian / patches / bfs-357-to-360.patch
1 Don't unnecessarily preempt for a task on the wrong CPU.
2
3 Cope with worker threads trying to wake themselves up due to shifting CPUs on
4 suspend by reactivating it, instead of hitting the BUG_ON
5
6 Wrap timer jiffies at 10 seconds instead of 5 minutes since 32 bit load
7 averages don't work until the first timer wrap.
8
9 Remove the last_task logic as it wasn't providing any significant performance
10 advantage.
11
12 Change the locality logic to try to reschedule on the exact same logical core
13 instead of assuming scheduling on a sibling core or sibling thread is
14 equivalent. This allows CPUs with a "turbo" mode (such as i7) to use that more
15 often by using one CPU more than spreading out, and allows ondemand cpu
16 frequency scaling to ramp up more easily when a task stays on the same CPU. It
17 increases throughput on threaded CPUs when lightly loaded, and may offer both
18 performance and power saving advantages on all SMP topologies with cpu
19 frequency scaling.
20
21 -ck
22
23 ---
24  include/linux/jiffies.h |    2 -
25  include/linux/sched.h   |    2 -
26  kernel/sched_bfs.c      |   89 ++++++++++++++++++++++--------------------------
27  3 files changed, 43 insertions(+), 50 deletions(-)
28
29 Index: linux-2.6.28/include/linux/jiffies.h
30 ===================================================================
31 --- linux-2.6.28.orig/include/linux/jiffies.h   2010-12-14 22:13:10.975304692 +1100
32 +++ linux-2.6.28/include/linux/jiffies.h        2010-12-14 22:14:03.530569735 +1100
33 @@ -154,7 +154,7 @@ static inline u64 get_jiffies_64(void)
34   * Have the 32 bit jiffies value wrap 5 minutes after boot
35   * so jiffies wrap bugs show up earlier.
36   */
37 -#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ))
38 +#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-10*HZ))
39  
40  /*
41   * Change timeval to jiffies, trying to avoid the
42 Index: linux-2.6.28/include/linux/sched.h
43 ===================================================================
44 --- linux-2.6.28.orig/include/linux/sched.h     2010-12-14 22:13:10.965304640 +1100
45 +++ linux-2.6.28/include/linux/sched.h  2010-12-14 22:14:03.524569704 +1100
46 @@ -1426,7 +1426,7 @@ static inline void tsk_cpus_current(stru
47  
48  static inline void print_scheduler_version(void)
49  {
50 -       printk(KERN_INFO"BFS CPU scheduler v0.357 by Con Kolivas.\n");
51 +       printk(KERN_INFO"BFS CPU scheduler v0.360 by Con Kolivas.\n");
52  }
53  
54  static inline int iso_task(struct task_struct *p)
55 Index: linux-2.6.28/kernel/sched_bfs.c
56 ===================================================================
57 --- linux-2.6.28.orig/kernel/sched_bfs.c        2010-12-14 22:13:10.983304734 +1100
58 +++ linux-2.6.28/kernel/sched_bfs.c     2010-12-14 22:14:54.061814177 +1100
59 @@ -204,7 +204,6 @@ struct rq {
60  #ifdef CONFIG_NO_HZ
61         unsigned char in_nohz_recently;
62  #endif
63 -       struct task_struct *last_task;
64  #endif
65  
66         struct task_struct *curr, *idle;
67 @@ -733,19 +732,12 @@ static int suitable_idle_cpus(struct tas
68  
69  static void resched_task(struct task_struct *p);
70  
71 -/*
72 - * last_task stores the last non-idle task scheduled on the local rq for
73 - * cache warmth testing.
74 - */
75 -static inline void set_last_task(struct rq *rq, struct task_struct *p)
76 -{
77 -       rq->last_task = p;
78 -}
79 -
80 -#define CPUIDLE_CACHE_BUSY     (1)
81 -#define CPUIDLE_DIFF_CPU       (2)
82 -#define CPUIDLE_THREAD_BUSY    (4)
83 -#define CPUIDLE_DIFF_NODE      (8)
84 +#define CPUIDLE_DIFF_THREAD    (1)
85 +#define CPUIDLE_DIFF_CORE      (2)
86 +#define CPUIDLE_CACHE_BUSY     (4)
87 +#define CPUIDLE_DIFF_CPU       (8)
88 +#define CPUIDLE_THREAD_BUSY    (16)
89 +#define CPUIDLE_DIFF_NODE      (32)
90  
91  /*
92   * The best idle CPU is chosen according to the CPUIDLE ranking above where the
93 @@ -798,27 +790,28 @@ static void resched_best_idle(struct tas
94                 }
95                 tmp_rq = cpu_rq(cpu_tmp);
96  
97 -               if (rq->cpu_locality[cpu_tmp]) {
98 -                       /* Check rq->last_task hasn't been dereferenced */
99 -                       if (rq->last_task && p != rq->last_task) {
100  #ifdef CONFIG_NUMA
101 -                               if (rq->cpu_locality[cpu_tmp] > 1)
102 -                                       ranking |= CPUIDLE_DIFF_NODE;
103 +               if (rq->cpu_locality[cpu_tmp] > 3)
104 +                       ranking |= CPUIDLE_DIFF_NODE;
105 +               else
106  #endif
107 -                               ranking |= CPUIDLE_DIFF_CPU;
108 -                       }
109 -               }
110 +               if (rq->cpu_locality[cpu_tmp] > 2)
111 +                       ranking |= CPUIDLE_DIFF_CPU;
112  #ifdef CONFIG_SCHED_MC
113 +               if (rq->cpu_locality[cpu_tmp] == 2)
114 +                       ranking |= CPUIDLE_DIFF_CORE;
115                 if (!(tmp_rq->cache_idle(cpu_tmp)))
116                         ranking |= CPUIDLE_CACHE_BUSY;
117  #endif
118  #ifdef CONFIG_SCHED_SMT
119 +               if (rq->cpu_locality[cpu_tmp] == 1)
120 +                       ranking |= CPUIDLE_DIFF_THREAD;
121                 if (!(tmp_rq->siblings_idle(cpu_tmp)))
122                         ranking |= CPUIDLE_THREAD_BUSY;
123  #endif
124                 if (ranking < best_ranking) {
125                         best_cpu = cpu_tmp;
126 -                       if (ranking <= 1)
127 +                       if (ranking == 0)
128                                 break;
129                         best_ranking = ranking;
130                 }
131 @@ -835,11 +828,11 @@ static inline void resched_suitable_idle
132  
133  /*
134   * The cpu cache locality difference between CPUs is used to determine how far
135 - * to offset the virtual deadline. "One" difference in locality means that one
136 + * to offset the virtual deadline. <2 difference in locality means that one
137   * timeslice difference is allowed longer for the cpu local tasks. This is
138   * enough in the common case when tasks are up to 2* number of CPUs to keep
139   * tasks within their shared cache CPUs only. CPUs on different nodes or not
140 - * even in this domain (NUMA) have "3" difference, allowing 4 times longer
141 + * even in this domain (NUMA) have "4" difference, allowing 4 times longer
142   * deadlines before being taken onto another cpu, allowing for 2* the double
143   * seen by separate CPUs above.
144   * Simple summary: Virtual deadlines are equal on shared cache CPUs, double
145 @@ -848,12 +841,11 @@ static inline void resched_suitable_idle
146  static inline int
147  cache_distance(struct rq *task_rq, struct rq *rq, struct task_struct *p)
148  {
149 -       /* Check rq->last_task hasn't been dereferenced */
150 -       if (likely(rq->last_task)) {
151 -               if (rq->last_task == p)
152 -                       return 0;
153 -       }
154 -       return rq->cpu_locality[cpu_of(task_rq)] * task_timeslice(p);
155 +       int locality = rq->cpu_locality[cpu_of(task_rq)] - 2;
156 +
157 +       if (locality > 0)
158 +               return task_timeslice(p) << locality;
159 +       return 0;
160  }
161  #else /* CONFIG_SMP */
162  static inline void inc_qnr(void)
163 @@ -892,10 +884,6 @@ cache_distance(struct rq *task_rq, struc
164  {
165         return 0;
166  }
167 -
168 -static inline void set_last_task(struct rq *rq, struct task_struct *p)
169 -{
170 -}
171  #endif /* CONFIG_SMP */
172  
173  /*
174 @@ -1287,10 +1275,10 @@ static void try_preempt(struct task_stru
175                 return;
176         }
177  
178 -       if (online_cpus(p))
179 +       if (likely(online_cpus(p)))
180                 cpus_and(tmp, cpu_online_map, p->cpus_allowed);
181         else
182 -               (cpumask_copy(&tmp, &cpu_online_map));
183 +               return;
184  
185         latest_deadline = 0;
186         highest_prio = -1;
187 @@ -2597,7 +2585,7 @@ need_resched_nonpreemptible:
188                 prev->last_ran = rq->clock;
189  
190                 /* Task changed affinity off this CPU */
191 -               if (needs_other_cpu(prev, cpu))
192 +               if (unlikely(!cpu_isset(cpu, prev->cpus_allowed)))
193                         resched_suitable_idle(prev);
194                 else if (!deactivate) {
195                         if (!queued_notrunning()) {
196 @@ -2639,8 +2627,6 @@ need_resched_nonpreemptible:
197         if (likely(prev != next)) {
198                 sched_info_switch(prev, next);
199  
200 -               if (prev != idle)
201 -                       set_last_task(rq, prev);
202                 set_rq_task(rq, next);
203                 grq.nr_switches++;
204                 prev->oncpu = 0;
205 @@ -6054,10 +6040,12 @@ void __init sched_init_smp(void)
206                                         cpu_set(other_cpu, rq->cache_siblings);
207                         }
208  #endif
209 -                       if (sd->level <= SD_LV_MC)
210 -                               locality = 0;
211 -                       else if (sd->level <= SD_LV_NODE)
212 +                       if (sd->level <= SD_LV_SIBLING)
213                                 locality = 1;
214 +                       else if (sd->level <= SD_LV_MC)
215 +                               locality = 2;
216 +                       else if (sd->level <= SD_LV_NODE)
217 +                               locality = 3;
218                         else
219                                 continue;
220  
221 @@ -6160,7 +6148,7 @@ void __init sched_init(void)
222                         if (i == j)
223                                 rq->cpu_locality[j] = 0;
224                         else
225 -                               rq->cpu_locality[j] = 3;
226 +                               rq->cpu_locality[j] = 4;
227                 }
228         }
229  #endif