1 Make CPU offlining more robust by simply removing all affinity for processes
2 that no longer have any CPUs they can run on. This allows the machine stop
3 thread to complete offlining CPUs and makes for a little less overhead in hot
6 Allow SCHED_IDLEPRIO to wake up idle CPUs in try_preempt. This would have
7 caused minor slowdowns for IDLEPRIO tasks only on relatively quiescent systems.
9 Remove inappropriate likely()s.
11 Update cpustat for irq - may have been under-reporting interrupt load.
20 include/linux/sched.h | 2 -
21 kernel/sched_bfs.c | 59 ++++++++++++++++++++++++++++++++++++++------------
22 2 files changed, 46 insertions(+), 15 deletions(-)
24 Index: linux-2.6.28/kernel/sched_bfs.c
25 ===================================================================
26 --- linux-2.6.28.orig/kernel/sched_bfs.c 2011-01-01 14:44:45.863309853 +1100
27 +++ linux-2.6.28/kernel/sched_bfs.c 2011-01-01 14:45:09.246874529 +1100
29 #define NS_TO_MS(TIME) ((TIME) >> 20)
30 #define NS_TO_US(TIME) ((TIME) >> 10)
32 -#define RESCHED_US (100) /* Reschedule if less than this many us left */
33 +#define RESCHED_US (100) /* Reschedule if less than this many μs left */
37 @@ -1249,7 +1249,7 @@ static inline int online_cpus(struct tas
39 static inline int needs_other_cpu(struct task_struct *p, int cpu)
41 - if (unlikely(!cpu_isset(cpu, p->cpus_allowed) && online_cpus(p)))
42 + if (unlikely(!cpu_isset(cpu, p->cpus_allowed)))
46 @@ -1266,15 +1266,15 @@ static void try_preempt(struct task_stru
50 - /* IDLEPRIO tasks never preempt anything */
51 - if (p->policy == SCHED_IDLEPRIO)
54 if (suitable_idle_cpus(p)) {
59 + /* IDLEPRIO tasks never preempt anything */
60 + if (p->policy == SCHED_IDLEPRIO)
63 if (likely(online_cpus(p)))
64 cpus_and(tmp, cpu_online_map, p->cpus_allowed);
66 @@ -1720,14 +1720,14 @@ context_switch(struct rq *rq, struct tas
68 arch_enter_lazy_cpu_mode();
70 - if (unlikely(!mm)) {
72 next->active_mm = oldmm;
73 atomic_inc(&oldmm->mm_count);
74 enter_lazy_tlb(oldmm, next);
76 switch_mm(oldmm, mm, next);
78 - if (unlikely(!prev->mm)) {
80 prev->active_mm = NULL;
83 @@ -1853,9 +1853,13 @@ pc_system_time(struct rq *rq, struct tas
87 - if (hardirq_count() - hardirq_offset)
88 + if (hardirq_count() - hardirq_offset) {
90 - else if (softirq_count()) {
91 + if (rq->irq_pc >= 100) {
93 + cpustat->irq = cputime64_add(cpustat->irq, tmp);
95 + } else if (softirq_count()) {
97 if (rq->softirq_pc >= 100) {
98 rq->softirq_pc %= 100;
99 @@ -2245,7 +2249,7 @@ static void task_running_tick(struct rq
100 * Tasks that were scheduled in the first half of a tick are not
101 * allowed to run into the 2nd half of the next tick if they will
102 * run out of time slice in the interim. Otherwise, if they have
103 - * less than 100us of time slice left they will be rescheduled.
104 + * less than RESCHED_US μs of time slice left they will be rescheduled.
107 if (rq->rq_time_slice > HALF_JIFFY_US)
108 @@ -2585,7 +2589,7 @@ need_resched_nonpreemptible:
109 prev->last_ran = rq->clock;
111 /* Task changed affinity off this CPU */
112 - if (unlikely(!cpu_isset(cpu, prev->cpus_allowed)))
113 + if (needs_other_cpu(prev, cpu))
114 resched_suitable_idle(prev);
115 else if (!deactivate) {
116 if (!queued_notrunning()) {
117 @@ -3288,8 +3292,8 @@ recheck:
120 if (param->sched_priority < 0 ||
121 - (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) ||
122 - (!p->mm && param->sched_priority > MAX_RT_PRIO-1))
123 + (p->mm && param->sched_priority > MAX_USER_RT_PRIO - 1) ||
124 + (!p->mm && param->sched_priority > MAX_RT_PRIO - 1))
126 if (is_rt_policy(policy) != (param->sched_priority != 0))
128 @@ -3999,7 +4003,10 @@ void init_idle(struct task_struct *idle,
129 idle->prio = PRIO_LIMIT;
130 set_rq_task(rq, idle);
131 idle->cpus_allowed = cpumask_of_cpu(cpu);
132 + /* Silence PROVE_RCU */
134 set_task_cpu(idle, cpu);
136 rq->curr = rq->idle = idle;
138 set_cpuidle_map(cpu);
139 @@ -4218,6 +4225,29 @@ void move_task_off_dead_cpu(int dead_cpu
143 +/* Run through task list and find tasks affined to just the dead cpu, then
144 + * allocate a new affinity */
145 +static void break_sole_affinity(int src_cpu)
147 + struct task_struct *p, *t;
149 + do_each_thread(t, p) {
150 + if (!online_cpus(p)) {
151 + cpumask_copy(&p->cpus_allowed, cpu_possible_mask);
153 + * Don't tell them about moving exiting tasks or
154 + * kernel threads (both mm NULL), since they never
157 + if (p->mm && printk_ratelimit()) {
158 + printk(KERN_INFO "process %d (%s) no "
159 + "longer affine to cpu %d\n",
160 + task_pid_nr(p), p->comm, src_cpu);
163 + } while_each_thread(t, p);
167 * Schedules idle task to be the next runnable task on current CPU.
168 * It does so by boosting its priority to highest possible.
169 @@ -4238,6 +4268,7 @@ void sched_idle_next(void)
170 * and interrupts disabled on the current cpu.
172 grq_lock_irqsave(&flags);
173 + break_sole_affinity(this_cpu);
175 __setscheduler(idle, rq, SCHED_FIFO, MAX_RT_PRIO - 1);
177 Index: linux-2.6.28/include/linux/sched.h
178 ===================================================================
179 --- linux-2.6.28.orig/include/linux/sched.h 2010-12-16 15:43:24.006131284 +1100
180 +++ linux-2.6.28/include/linux/sched.h 2011-01-01 14:45:09.250874451 +1100
181 @@ -1426,7 +1426,7 @@ static inline void tsk_cpus_current(stru
183 static inline void print_scheduler_version(void)
185 - printk(KERN_INFO"BFS CPU scheduler v0.360 by Con Kolivas.\n");
186 + printk(KERN_INFO"BFS CPU scheduler v0.363 by Con Kolivas.\n");
189 static inline int iso_task(struct task_struct *p)