From: Corey O'Connor Date: Thu, 2 Sep 2010 20:10:09 +0000 (-0700) Subject: adding patch for BFS 318 to 330 X-Git-Url: https://vcs.maemo.org/git/?p=kernel-bfs;a=commitdiff_plain;h=36791cfdf23884115a5689b7f668d8f7ce5cd69e adding patch for BFS 318 to 330 --- diff --git a/kernel-power-2.6.28/debian/patches/bfs-318-to-330.patch b/kernel-power-2.6.28/debian/patches/bfs-318-to-330.patch new file mode 100644 index 0000000..7c9a557 --- /dev/null +++ b/kernel-power-2.6.28/debian/patches/bfs-318-to-330.patch @@ -0,0 +1,581 @@ +--- + Documentation/scheduler/sched-BFS.txt | 9 - + include/linux/sched.h | 5 + kernel/sched_bfs.c | 286 ++++++++++++++++++---------------- + 3 files changed, 155 insertions(+), 145 deletions(-) + +Index: kernel-2.6.28/include/linux/sched.h +=================================================================== +--- kernel-2.6.28.orig/include/linux/sched.h ++++ kernel-2.6.28/include/linux/sched.h +@@ -1152,9 +1152,6 @@ struct task_struct { + + unsigned int policy; + cpumask_t cpus_allowed; +-#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_SCHED_BFS) +- cpumask_t unplugged_mask; +-#endif + + #ifdef CONFIG_PREEMPT_RCU + int rcu_read_lock_nesting; +@@ -1422,7 +1419,7 @@ static inline void tsk_cpus_current(stru + + static inline void print_scheduler_version(void) + { +- printk(KERN_INFO"BFS CPU scheduler v0.318 by Con Kolivas ported by ToAsTcfh.\n"); ++ printk(KERN_INFO"BFS CPU scheduler v0.330 by Con Kolivas ported by ToAsTcfh.\n"); + } + + static inline int iso_task(struct task_struct *p) +Index: kernel-2.6.28/kernel/sched_bfs.c +=================================================================== +--- kernel-2.6.28.orig/kernel/sched_bfs.c ++++ kernel-2.6.28/kernel/sched_bfs.c +@@ -172,6 +172,11 @@ struct global_rq { + #ifdef CONFIG_SMP + unsigned long qnr; /* queued not running */ + cpumask_t cpu_idle_map; ++ int idle_cpus; ++#endif ++#if BITS_PER_LONG < 64 ++ unsigned long jiffies; ++ u64 jiffies_64; + #endif + }; + +@@ -188,6 +193,7 @@ struct rq { + unsigned char in_nohz_recently; + #endif + #endif ++ unsigned int skip_clock_update; + + struct task_struct *curr, *idle; + struct mm_struct *prev_mm; +@@ -198,6 +204,7 @@ struct rq { + int rq_time_slice; + u64 rq_last_ran; + int rq_prio; ++ int rq_running; /* There is a task running */ + + /* Accurate timekeeping data */ + u64 timekeep_clock; +@@ -331,7 +338,8 @@ static struct rq *uprq; + */ + static inline void update_rq_clock(struct rq *rq) + { +- rq->clock = sched_clock_cpu(cpu_of(rq)); ++ if (!rq->skip_clock_update) ++ rq->clock = sched_clock_cpu(cpu_of(rq)); + } + + static inline int task_running(struct task_struct *p) +@@ -506,6 +514,43 @@ static inline void finish_lock_switch(st + #endif /* __ARCH_WANT_UNLOCKED_CTXSW */ + + /* ++ * In order to have a monotonic clock that does not wrap we have a 64 bit ++ * unsigned long that's protected by grq.lock used in place of jiffies on ++ * 32 bit builds. ++ */ ++#if BITS_PER_LONG < 64 ++static inline void update_gjiffies(void) ++{ ++ if (grq.jiffies != jiffies) { ++ grq_lock(); ++ grq.jiffies = jiffies; ++ grq.jiffies_64++; ++ grq_unlock(); ++ } ++} ++ ++#define gjiffies (grq.jiffies_64) ++ ++#else /* BITS_PER_LONG < 64 */ ++static inline void update_gjiffies(void) ++{ ++} ++ ++#define gjiffies jiffies ++ ++#endif /* BITS_PER_LONG < 64 */ ++ ++static inline int deadline_before(u64 deadline, u64 time) ++{ ++ return (deadline < time); ++} ++ ++static inline int deadline_after(u64 deadline, u64 time) ++{ ++ return (deadline > time); ++} ++ ++/* + * A task that is queued but not running will be on the grq run list. + * A task that is not running or queued will not be on the grq run list. + * A task that is currently running will have ->oncpu set but not on the +@@ -628,21 +673,28 @@ static inline int queued_notrunning(void + } + + /* +- * The cpu_idle_map stores a bitmap of all the cpus currently idle to +- * allow easy lookup of whether any suitable idle cpus are available. ++ * The cpu_idle_map stores a bitmap of all the CPUs currently idle to ++ * allow easy lookup of whether any suitable idle CPUs are available. ++ * It's cheaper to maintain a binary yes/no if there are any idle CPUs on the ++ * idle_cpus variable than to do a full bitmask check when we are busy. + */ + static inline void set_cpuidle_map(unsigned long cpu) + { + cpu_set(cpu, grq.cpu_idle_map); ++ grq.idle_cpus = 1; + } + + static inline void clear_cpuidle_map(unsigned long cpu) + { + cpu_clear(cpu, grq.cpu_idle_map); ++ if (cpus_empty(grq.cpu_idle_map)) ++ grq.idle_cpus = 0; + } + + static int suitable_idle_cpus(struct task_struct *p) + { ++ if (!grq.idle_cpus) ++ return 0; + return (cpus_intersects(p->cpus_allowed, grq.cpu_idle_map)); + } + +@@ -1107,6 +1159,25 @@ void kick_process(struct task_struct *p) + #define rq_idle(rq) ((rq)->rq_prio == PRIO_LIMIT) + #define task_idle(p) ((p)->prio == PRIO_LIMIT) + ++#ifdef CONFIG_HOTPLUG_CPU ++/* ++ * Check to see if there is a task that is affined only to offline CPUs but ++ * still wants runtime. This happens to kernel threads during suspend/halt and ++ * disabling of CPUs. ++ */ ++static inline int online_cpus(struct task_struct *p) ++{ ++ return (likely(cpus_intersects(cpu_online_map, p->cpus_allowed))); ++} ++#else /* CONFIG_HOTPLUG_CPU */ ++/* All available CPUs are always online without hotplug. */ ++static inline int online_cpus(struct task_struct *p) ++{ ++ return 1; ++} ++#endif ++ ++ + /* + * RT tasks preempt purely on priority. SCHED_NORMAL tasks preempt on the + * basis of earlier deadlines. SCHED_BATCH, ISO and IDLEPRIO don't preempt +@@ -1128,7 +1199,11 @@ static void try_preempt(struct task_stru + return; + } + +- cpus_and(tmp, cpu_online_map, p->cpus_allowed); ++ if (online_cpus(p)) ++ cpus_and(tmp, cpu_online_map, p->cpus_allowed); ++ else ++ (cpumask_copy(&tmp, &cpu_online_map)); ++ + latest_deadline = 0; + highest_prio = -1; + +@@ -1146,7 +1221,7 @@ static void try_preempt(struct task_stru + cache_distance(this_rq, rq, p); + + if (rq_prio > highest_prio || +- (time_after(offset_deadline, latest_deadline) || ++ (deadline_after(offset_deadline, latest_deadline) || + (offset_deadline == latest_deadline && this_rq == rq))) { + latest_deadline = offset_deadline; + highest_prio = rq_prio; +@@ -1155,21 +1230,21 @@ static void try_preempt(struct task_stru + } + + if (p->prio > highest_prio || (p->prio == highest_prio && +- p->policy == SCHED_NORMAL && !time_before(p->deadline, latest_deadline))) +- return; ++ p->policy == SCHED_NORMAL && ++ !deadline_before(p->deadline, latest_deadline))) ++ return; + + /* p gets to preempt highest_prio_rq->curr */ + resched_task(highest_prio_rq->curr); +- return; ++ highest_prio_rq->skip_clock_update = 1; + } + #else /* CONFIG_SMP */ + static void try_preempt(struct task_struct *p, struct rq *this_rq) + { + if (p->prio < uprq->rq_prio || + (p->prio == uprq->rq_prio && p->policy == SCHED_NORMAL && +- time_before(p->deadline, uprq->rq_deadline))) ++ deadline_before(p->deadline, uprq->rq_deadline))) + resched_task(uprq->curr); +- return; + } + #endif /* CONFIG_SMP */ + +@@ -1335,9 +1410,9 @@ void wake_up_new_task(struct task_struct + struct rq *rq; + + rq = task_grq_lock(p, &flags); ; ++ p->state = TASK_RUNNING; + parent = p->parent; +- BUG_ON(p->state != TASK_RUNNING); +- /* Unnecessary but small chance that the parent changed cpus */ ++ /* Unnecessary but small chance that the parent changed CPU */ + set_task_cpu(p, task_cpu(parent)); + activate_task(p, rq); + trace_mark(kernel_sched_wakeup_new, +@@ -2015,15 +2090,16 @@ static void clear_iso_refractory(void) + /* + * Test if SCHED_ISO tasks have run longer than their alloted period as RT + * tasks and set the refractory flag if necessary. There is 10% hysteresis +- * for unsetting the flag. ++ * for unsetting the flag. 115/128 is ~90/100 as a fast shift instead of a ++ * slow division. + */ + static unsigned int test_ret_isorefractory(struct rq *rq) + { + if (likely(!grq.iso_refractory)) { +- if (grq.iso_ticks / ISO_PERIOD > sched_iso_cpu) ++ if (grq.iso_ticks > ISO_PERIOD * sched_iso_cpu) + set_iso_refractory(); + } else { +- if (grq.iso_ticks / ISO_PERIOD < (sched_iso_cpu * 90 / 100)) ++ if (grq.iso_ticks < ISO_PERIOD * (sched_iso_cpu * 115 / 128)) + clear_iso_refractory(); + } + return grq.iso_refractory; +@@ -2042,8 +2118,8 @@ static inline void no_iso_tick(void) + if (grq.iso_ticks) { + grq_lock(); + grq.iso_ticks -= grq.iso_ticks / ISO_PERIOD + 1; +- if (unlikely(grq.iso_refractory && grq.iso_ticks / +- ISO_PERIOD < (sched_iso_cpu * 90 / 100))) ++ if (unlikely(grq.iso_refractory && grq.iso_ticks < ++ ISO_PERIOD * (sched_iso_cpu * 115 / 128))) + clear_iso_refractory(); + grq_unlock(); + } +@@ -2110,6 +2186,7 @@ void scheduler_tick(void) + sched_clock_tick(); + update_rq_clock(rq); + update_cpu_clock(rq, rq->curr, 1); ++ update_gjiffies(); + if (!rq_idle(rq)) + task_running_tick(rq); + else +@@ -2175,7 +2252,7 @@ EXPORT_SYMBOL(sub_preempt_count); + #endif + + /* +- * Deadline is "now" in jiffies + (offset by priority). Setting the deadline ++ * Deadline is "now" in gjiffies + (offset by priority). Setting the deadline + * is the key to everything. It distributes cpu fairly amongst tasks of the + * same nice value, it proportions cpu according to nice level, it means the + * task that last woke up the longest ago has the earliest deadline, thus +@@ -2211,7 +2288,7 @@ static inline void time_slice_expired(st + { + reset_first_time_slice(p); + p->time_slice = timeslice(); +- p->deadline = jiffies + task_deadline_diff(p); ++ p->deadline = gjiffies + task_deadline_diff(p); + } + + static inline void check_deadline(struct task_struct *p) +@@ -2237,22 +2314,16 @@ static inline void check_deadline(struct + * earliest deadline. + * Finally if no SCHED_NORMAL tasks are found, SCHED_IDLEPRIO tasks are + * selected by the earliest deadline. +- * Once deadlines are expired (jiffies has passed it) tasks are chosen in FIFO +- * order. Note that very few tasks will be FIFO for very long because they +- * only end up that way if they sleep for long or if if there are enough fully +- * cpu bound tasks to push the load to ~8 higher than the number of CPUs for +- * nice 0. + */ + static inline struct + task_struct *earliest_deadline_task(struct rq *rq, struct task_struct *idle) + { + unsigned long dl, earliest_deadline = 0; /* Initialise to silence compiler */ +- struct task_struct *p, *edt; ++ struct task_struct *p, *edt = idle; + unsigned int cpu = cpu_of(rq); + struct list_head *queue; + int idx = 0; + +- edt = idle; + retry: + idx = find_next_bit(grq.prio_bitmap, PRIO_LIMIT, idx); + if (idx >= PRIO_LIMIT) +@@ -2260,7 +2331,7 @@ retry: + queue = grq.queue + idx; + list_for_each_entry(p, queue, run_list) { + /* Make sure cpu affinity is ok */ +- if (!cpu_isset(cpu, p->cpus_allowed)) ++ if (online_cpus(p) && !cpu_isset(cpu, p->cpus_allowed)) + continue; + if (idx < MAX_RT_PRIO) { + /* We found an rt task */ +@@ -2271,21 +2342,12 @@ retry: + dl = p->deadline + cache_distance(task_rq(p), rq, p); + + /* +- * Look for tasks with old deadlines and pick them in FIFO +- * order, taking the first one found. +- */ +- if (time_is_before_jiffies(dl)) { +- edt = p; +- goto out_take; +- } +- +- /* + * No rt tasks. Find the earliest deadline task. Now we're in + * O(n) territory. This is what we silenced the compiler for: + * edt will always start as idle. + */ + if (edt == idle || +- time_before(dl, earliest_deadline)) { ++ deadline_before(dl, earliest_deadline)) { + earliest_deadline = dl; + edt = p; + } +@@ -2358,6 +2420,10 @@ static inline void set_rq_task(struct rq + rq->rq_last_ran = p->last_ran; + rq->rq_policy = p->policy; + rq->rq_prio = p->prio; ++ if (p != rq->idle) ++ rq->rq_running = 1; ++ else ++ rq->rq_running = 0; + } + + static void reset_rq_task(struct rq *rq, struct task_struct *p) +@@ -2395,6 +2461,7 @@ need_resched_nonpreemptible: + local_irq_disable(); + update_rq_clock(rq); + update_cpu_clock(rq, prev, 0); ++ rq->skip_clock_update = 0; + + grq_lock(); + clear_tsk_need_resched(prev); +@@ -2415,8 +2482,10 @@ need_resched_nonpreemptible: + return_task(prev, deactivate); + /* Task changed affinity off this cpu */ + if (unlikely(!cpus_intersects(prev->cpus_allowed, +- cpumask_of_cpu(cpu)))) +- resched_suitable_idle(prev); ++ cpumask_of_cpu(cpu)))) { ++ if (online_cpus(prev)) ++ resched_suitable_idle(prev); ++ } + } + + if (likely(queued_notrunning())) { +@@ -2461,7 +2530,7 @@ need_resched_nonpreemptible: + goto need_resched_nonpreemptible; + preempt_enable_no_resched(); + if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) +- goto need_resched; ++ goto need_resched; + } + EXPORT_SYMBOL(schedule); + +@@ -2829,7 +2898,7 @@ void rt_mutex_setprio(struct task_struct + + BUG_ON(prio < 0 || prio > MAX_PRIO); + +- rq = time_task_grq_lock(p, &flags); ++ rq = task_grq_lock(p, &flags); + + oldprio = p->prio; + queued = task_queued(p); +@@ -2976,7 +3045,8 @@ int task_prio(const struct task_struct * + if (prio <= 0) + goto out; + +- delta = (p->deadline - jiffies) * 40 / longest_deadline_diff(); ++ delta = p->deadline - gjiffies; ++ delta = delta * 40 / longest_deadline_diff(); + if (delta > 0 && delta <= 80) + prio += delta; + if (idleprio_task(p)) +@@ -3126,7 +3196,7 @@ recheck: + if (policy == SCHED_NORMAL) + break; + if (policy != SCHED_IDLEPRIO) +- return -EPERM; ++ return -EPERM; + break; + case SCHED_IDLEPRIO: + if (policy == SCHED_IDLEPRIO) +@@ -3794,9 +3864,6 @@ void init_idle(struct task_struct *idle, + rq->curr = rq->idle = idle; + idle->oncpu = 1; + set_cpuidle_map(cpu); +-#ifdef CONFIG_HOTPLUG_CPU +- idle->unplugged_mask = CPU_MASK_NONE; +-#endif + grq_unlock_irqrestore(&flags); + + /* Set the preempt count _outside_ the spinlocks! */ +@@ -3975,8 +4042,11 @@ int set_cpus_allowed_ptr(struct task_str + + if (task_running(p)) { + /* Task is running on the wrong cpu now, reschedule it. */ +- set_tsk_need_resched(p); +- running_wrong = 1; ++ if (rq == this_rq()) { ++ set_tsk_need_resched(p); ++ running_wrong = 1; ++ } else ++ resched_task(p); + } else + set_task_cpu(p, any_online_cpu(*new_mask)); + +@@ -3993,7 +4063,24 @@ out: + EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); + + #ifdef CONFIG_HOTPLUG_CPU +-/* Schedules idle task to be the next runnable task on current CPU. ++/* ++ * Reschedule a task if it's on a dead CPU. ++ */ ++void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) ++{ ++ unsigned long flags; ++ struct rq *rq, *dead_rq; ++ ++ dead_rq = cpu_rq(dead_cpu); ++ rq = task_grq_lock(p, &flags); ++ if (rq == dead_rq && task_running(p)) ++ resched_task(p); ++ task_grq_unlock(&flags); ++ ++} ++ ++/* ++ * Schedules idle task to be the next runnable task on current CPU. + * It does so by boosting its priority to highest possible. + * Used by CPU offline code. + */ +@@ -4011,7 +4098,7 @@ void sched_idle_next(void) + * Strictly not necessary since rest of the CPUs are stopped by now + * and interrupts disabled on the current cpu. + */ +- time_grq_lock(rq, &flags); ++ grq_lock_irqsave(&flags); + + __setscheduler(idle, rq, SCHED_FIFO, MAX_RT_PRIO - 1); + +@@ -4295,7 +4382,7 @@ migration_call(struct notifier_block *nf + struct task_struct *idle; + int cpu = (long)hcpu; + unsigned long flags; +- struct rq *rq; ++ struct rq *rq = cpu_rq(cpu); + + switch (action) { + +@@ -4306,14 +4393,12 @@ migration_call(struct notifier_block *nf + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + /* Update our root-domain */ +- rq = cpu_rq(cpu); + grq_lock_irqsave(&flags); + if (rq->rd) { + BUG_ON(!cpu_isset(cpu, rq->rd->span)); + + set_rq_online(rq); + } +- add_cpu(cpu); + grq_unlock_irqrestore(&flags); + break; + +@@ -4325,11 +4410,9 @@ migration_call(struct notifier_block *nf + case CPU_DEAD: + case CPU_DEAD_FROZEN: + cpuset_lock(); /* around calls to cpuset_cpus_allowed_lock() */ +- rq = cpu_rq(cpu); + idle = rq->idle; + /* Idle task back to normal (off runqueue, low prio) */ + grq_lock_irq(); +- remove_cpu(cpu); + return_task(idle, 1); + idle->static_prio = MAX_PRIO; + __setscheduler(idle, rq, SCHED_NORMAL, 0); +@@ -4342,7 +4425,7 @@ migration_call(struct notifier_block *nf + + case CPU_DYING: + case CPU_DYING_FROZEN: +- rq = cpu_rq(cpu); ++ /* Update our root-domain */ + grq_lock_irqsave(&flags); + if (rq->rd) { + BUG_ON(!cpu_isset(cpu, rq->rd->span)); +@@ -5752,7 +5835,7 @@ static int cache_cpu_idle(unsigned long + void __init sched_init_smp(void) + { + struct sched_domain *sd; +- int cpu, i, cpu_scale; ++ int cpu, cpus; + + cpumask_t non_isolated_cpus; + +@@ -5784,16 +5867,11 @@ void __init sched_init_smp(void) + + /* + * Assume that every added cpu gives us slightly less overall latency +- * allowing us to increase the base rr_interval, but in a non linear +- * fashion. ++ * allowing us to increase the base rr_interval, non-linearly and with ++ * an upper bound. + */ +- cpu_scale = ilog2(num_online_cpus()); +- rr_interval *= 100; +- for (i = 0; i < cpu_scale; i++) { +- rr_interval *= 3; +- rr_interval /= 2; +- } +- rr_interval /= 100; ++ cpus = num_online_cpus(); ++ rr_interval = rr_interval * (4 * cpus + 4) / (cpus + 6); + + grq_lock_irq(); + /* +@@ -5874,8 +5952,12 @@ void __init sched_init(void) + prio_ratios[i] = prio_ratios[i - 1] * 11 / 10; + + spin_lock_init(&grq.lock); ++ grq.nr_running = grq.nr_uninterruptible = grq.nr_switches = 0; ++ grq.iso_ticks = grq.iso_refractory = 0; + #ifdef CONFIG_SMP + init_defrootdomain(); ++ grq.qnr = grq.idle_cpus = 0; ++ cpumask_clear(&grq.cpu_idle_map); + #else + uprq = &per_cpu(runqueues, 0); + #endif +@@ -5994,7 +6076,6 @@ void normalize_rt_tasks(void) + + spin_lock_irqsave(&p->pi_lock, flags); + rq = __task_grq_lock(p); +- update_rq_clock(rq); + + queued = task_queued(p); + if (queued) +@@ -6103,6 +6184,10 @@ cputime_t task_stime(struct task_struct + } + #endif + ++void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) ++{ ++} ++ + inline cputime_t task_gtime(struct task_struct *p) + { + return p->gtime;