From c8e511560ce9bdd38d3e8bbfe35a63a9b4114172 Mon Sep 17 00:00:00 2001 From: Peter Hunt Date: Mon, 29 Aug 2011 09:44:10 +0000 Subject: [PATCH] BFS prerequisites and tidy up of resched functions --- .../debian/patches/bfs/bfs-implement_prereqs.patch | 169 +++++ .../debian/patches/bfs/bfs-setup_prereqs.patch | 703 ++++++++++++++++++++ .../debian/patches/bfs/bfs-tidy_up_resched.patch | 84 +++ .../tick_sched-set_inidle_unconditionally.patch | 30 + kernel-bfs-2.6.28/debian/patches/series | 4 + 5 files changed, 990 insertions(+) create mode 100644 kernel-bfs-2.6.28/debian/patches/bfs/bfs-implement_prereqs.patch create mode 100644 kernel-bfs-2.6.28/debian/patches/bfs/bfs-setup_prereqs.patch create mode 100644 kernel-bfs-2.6.28/debian/patches/bfs/bfs-tidy_up_resched.patch create mode 100644 kernel-bfs-2.6.28/debian/patches/bfs/tick_sched-set_inidle_unconditionally.patch diff --git a/kernel-bfs-2.6.28/debian/patches/bfs/bfs-implement_prereqs.patch b/kernel-bfs-2.6.28/debian/patches/bfs/bfs-implement_prereqs.patch new file mode 100644 index 0000000..23f3e9f --- /dev/null +++ b/kernel-bfs-2.6.28/debian/patches/bfs/bfs-implement_prereqs.patch @@ -0,0 +1,169 @@ +--- linux-2.6.28.orig/kernel/sched_bfs.c 2011-06-18 18:46:15.009219526 +0200 ++++ linux-2.6.28.new/kernel/sched_bfs.c 2011-06-18 19:06:34.087896512 +0200 +@@ -229,9 +229,6 @@ struct rq { + struct sched_info rq_sched_info; + + /* sys_sched_yield() stats */ +- unsigned int yld_exp_empty; +- unsigned int yld_act_empty; +- unsigned int yld_both_empty; + unsigned int yld_count; + + /* schedule() stats */ +@@ -2585,6 +2582,46 @@ void __wake_up_locked(wait_queue_head_t + __wake_up_common(q, mode, 1, 0, NULL); + } + ++void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key) ++{ ++ __wake_up_common(q, mode, 1, 0, key); ++} ++ ++/** ++ * __wake_up_sync_key - wake up threads blocked on a waitqueue. ++ * @q: the waitqueue ++ * @mode: which threads ++ * @nr_exclusive: how many wake-one or wake-many threads to wake up ++ * @key: opaque value to be passed to wakeup targets ++ * ++ * The sync wakeup differs that the waker knows that it will schedule ++ * away soon, so while the target thread will be woken up, it will not ++ * be migrated to another CPU - ie. the two threads are 'synchronised' ++ * with each other. This can prevent needless bouncing between CPUs. ++ * ++ * On UP it can prevent extra preemption. ++ * ++ * It may be assumed that this function implies a write memory barrier before ++ * changing the task state if and only if any tasks are woken up. ++ */ ++void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, ++ int nr_exclusive, void *key) ++{ ++ unsigned long flags; ++ int sync = 1; ++ ++ if (unlikely(!q)) ++ return; ++ ++ if (unlikely(!nr_exclusive)) ++ sync = 0; ++ ++ spin_lock_irqsave(&q->lock, flags); ++ __wake_up_common(q, mode, nr_exclusive, sync, key); ++ spin_unlock_irqrestore(&q->lock, flags); ++} ++EXPORT_SYMBOL_GPL(__wake_up_sync_key); ++ + /** + * __wake_up_sync - wake up threads blocked on a waitqueue. + * @q: the waitqueue +@@ -2615,6 +2652,18 @@ void __wake_up_sync(wait_queue_head_t *q + } + EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */ + ++/** ++ * complete: - signals a single thread waiting on this completion ++ * @x: holds the state of this particular completion ++ * ++ * This will wake up a single thread waiting on this completion. Threads will be ++ * awakened in the same order in which they were queued. ++ * ++ * See also complete_all(), wait_for_completion() and related routines. ++ * ++ * It may be assumed that this function implies a write memory barrier before ++ * changing the task state if and only if any tasks are woken up. ++ */ + void complete(struct completion *x) + { + unsigned long flags; +@@ -2626,6 +2675,15 @@ void complete(struct completion *x) + } + EXPORT_SYMBOL(complete); + ++/** ++ * complete_all: - signals all threads waiting on this completion ++ * @x: holds the state of this particular completion ++ * ++ * This will wake up all threads waiting on this particular completion event. ++ * ++ * It may be assumed that this function implies a write memory barrier before ++ * changing the task state if and only if any tasks are woken up. ++ */ + void complete_all(struct completion *x) + { + unsigned long flags; +@@ -2677,12 +2735,31 @@ wait_for_common(struct completion *x, lo + return timeout; + } + ++/** ++ * wait_for_completion: - waits for completion of a task ++ * @x: holds the state of this particular completion ++ * ++ * This waits to be signaled for completion of a specific task. It is NOT ++ * interruptible and there is no timeout. ++ * ++ * See also similar routines (i.e. wait_for_completion_timeout()) with timeout ++ * and interrupt capability. Also see complete(). ++ */ + void __sched wait_for_completion(struct completion *x) + { + wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE); + } + EXPORT_SYMBOL(wait_for_completion); + ++/** ++ * wait_for_completion_timeout: - waits for completion of a task (w/timeout) ++ * @x: holds the state of this particular completion ++ * @timeout: timeout value in jiffies ++ * ++ * This waits for either a completion of a specific task to be signaled or for a ++ * specified timeout to expire. The timeout is in jiffies. It is not ++ * interruptible. ++ */ + unsigned long __sched + wait_for_completion_timeout(struct completion *x, unsigned long timeout) + { +@@ -2690,6 +2767,13 @@ wait_for_completion_timeout(struct compl + } + EXPORT_SYMBOL(wait_for_completion_timeout); + ++/** ++ * wait_for_completion_interruptible: - waits for completion of a task (w/intr) ++ * @x: holds the state of this particular completion ++ * ++ * This waits for completion of a specific task to be signaled. It is ++ * interruptible. ++ */ + int __sched wait_for_completion_interruptible(struct completion *x) + { + long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_INTERRUPTIBLE); +@@ -2699,6 +2783,14 @@ int __sched wait_for_completion_interrup + } + EXPORT_SYMBOL(wait_for_completion_interruptible); + ++/** ++ * wait_for_completion_interruptible_timeout: - waits for completion (w/(to,intr)) ++ * @x: holds the state of this particular completion ++ * @timeout: timeout value in jiffies ++ * ++ * This waits for either a completion of a specific task to be signaled or for a ++ * specified timeout to expire. It is interruptible. The timeout is in jiffies. ++ */ + unsigned long __sched + wait_for_completion_interruptible_timeout(struct completion *x, + unsigned long timeout) +@@ -2707,6 +2799,13 @@ wait_for_completion_interruptible_timeou + } + EXPORT_SYMBOL(wait_for_completion_interruptible_timeout); + ++/** ++ * wait_for_completion_killable: - waits for completion of a task (killable) ++ * @x: holds the state of this particular completion ++ * ++ * This waits to be signaled for completion of a specific task. It can be ++ * interrupted by a kill signal. ++ */ + int __sched wait_for_completion_killable(struct completion *x) + { + long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_KILLABLE); diff --git a/kernel-bfs-2.6.28/debian/patches/bfs/bfs-setup_prereqs.patch b/kernel-bfs-2.6.28/debian/patches/bfs/bfs-setup_prereqs.patch new file mode 100644 index 0000000..a95045e --- /dev/null +++ b/kernel-bfs-2.6.28/debian/patches/bfs/bfs-setup_prereqs.patch @@ -0,0 +1,703 @@ +diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c +index 994b4d3..1e1dc49 100644 +--- a/arch/arm/kernel/process.c ++++ b/arch/arm/kernel/process.c +@@ -373,7 +373,7 @@ void release_thread(struct task_struct *dead_task) + asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); + + int +-copy_thread(int nr, unsigned long clone_flags, unsigned long stack_start, ++copy_thread(unsigned long clone_flags, unsigned long stack_start, + unsigned long stk_sz, struct task_struct *p, struct pt_regs *regs) + { + struct thread_info *thread = task_thread_info(p); +diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c +index bc84e12..99bf9b0 100644 +--- a/drivers/char/tty_io.c ++++ b/drivers/char/tty_io.c +@@ -2682,7 +2682,7 @@ void __do_SAK(struct tty_struct *tty) + /* Kill the entire session */ + do_each_pid_task(session, PIDTYPE_SID, p) { + printk(KERN_NOTICE "SAK: killed process %d" +- " (%s): task_session_nr(p)==tty->session\n", ++ " (%s): task_session(p)==tty->session\n", + task_pid_nr(p), p->comm); + send_sig(SIGKILL, p, 1); + } while_each_pid_task(session, PIDTYPE_SID, p); +@@ -2692,7 +2692,7 @@ void __do_SAK(struct tty_struct *tty) + do_each_thread(g, p) { + if (p->signal->tty == tty) { + printk(KERN_NOTICE "SAK: killed process %d" +- " (%s): task_session_nr(p)==tty->session\n", ++ " (%s): task_session(p)==tty->session\n", + task_pid_nr(p), p->comm); + send_sig(SIGKILL, p, 1); + continue; +diff --git a/include/linux/sched.h b/include/linux/sched.h +index 011db2f..802d144 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -538,25 +539,8 @@ struct signal_struct { + + struct list_head cpu_timers[3]; + +- /* job control IDs */ +- +- /* +- * pgrp and session fields are deprecated. +- * use the task_session_Xnr and task_pgrp_Xnr routines below +- */ +- +- union { +- pid_t pgrp __deprecated; +- pid_t __pgrp; +- }; +- + struct pid *tty_old_pgrp; + +- union { +- pid_t session __deprecated; +- pid_t __session; +- }; +- + /* boolean value for session group leader */ + int leader; + +@@ -1453,16 +1437,6 @@ static inline int rt_task(struct task_struct *p) + return rt_prio(p->prio); + } + +-static inline void set_task_session(struct task_struct *tsk, pid_t session) +-{ +- tsk->signal->__session = session; +-} +- +-static inline void set_task_pgrp(struct task_struct *tsk, pid_t pgrp) +-{ +- tsk->signal->__pgrp = pgrp; +-} +- + static inline struct pid *task_pid(struct task_struct *task) + { + return task->pids[PIDTYPE_PID].pid; +@@ -1473,6 +1447,11 @@ static inline struct pid *task_tgid(struct task_struct *task) + return task->group_leader->pids[PIDTYPE_PID].pid; + } + ++/* ++ * Without tasklist or rcu lock it is not safe to dereference ++ * the result of task_pgrp/task_session even if task == current, ++ * we can race with another thread doing sys_setsid/sys_setpgid. ++ */ + static inline struct pid *task_pgrp(struct task_struct *task) + { + return task->group_leader->pids[PIDTYPE_PGID].pid; +@@ -1498,17 +1477,23 @@ struct pid_namespace; + * + * see also pid_nr() etc in include/linux/pid.h + */ ++pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, ++ struct pid_namespace *ns); + + static inline pid_t task_pid_nr(struct task_struct *tsk) + { + return tsk->pid; + } + +-pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns); ++static inline pid_t task_pid_nr_ns(struct task_struct *tsk, ++ struct pid_namespace *ns) ++{ ++ return __task_pid_nr_ns(tsk, PIDTYPE_PID, ns); ++} + + static inline pid_t task_pid_vnr(struct task_struct *tsk) + { +- return pid_vnr(task_pid(tsk)); ++ return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL); + } + + +@@ -1525,31 +1510,34 @@ static inline pid_t task_tgid_vnr(struct task_struct *tsk) + } + + +-static inline pid_t task_pgrp_nr(struct task_struct *tsk) ++static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, ++ struct pid_namespace *ns) + { +- return tsk->signal->__pgrp; ++ return __task_pid_nr_ns(tsk, PIDTYPE_PGID, ns); + } + +-pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns); +- + static inline pid_t task_pgrp_vnr(struct task_struct *tsk) + { +- return pid_vnr(task_pgrp(tsk)); ++ return __task_pid_nr_ns(tsk, PIDTYPE_PGID, NULL); + } + + +-static inline pid_t task_session_nr(struct task_struct *tsk) ++static inline pid_t task_session_nr_ns(struct task_struct *tsk, ++ struct pid_namespace *ns) + { +- return tsk->signal->__session; ++ return __task_pid_nr_ns(tsk, PIDTYPE_SID, ns); + } + +-pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns); +- + static inline pid_t task_session_vnr(struct task_struct *tsk) + { +- return pid_vnr(task_session(tsk)); ++ return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL); + } + ++/* obsolete, do not use */ ++static inline pid_t task_pgrp_nr(struct task_struct *tsk) ++{ ++ return task_pgrp_nr_ns(tsk, &init_pid_ns); ++} + + /** + * pid_alive - check that a task structure is not stale +@@ -1949,7 +1937,8 @@ extern void mm_release(struct task_struct *, struct mm_struct *); + /* Allocate a new mm structure and copy contents from tsk->mm */ + extern struct mm_struct *dup_mm(struct task_struct *tsk); + +-extern int copy_thread(int, unsigned long, unsigned long, unsigned long, struct task_struct *, struct pt_regs *); ++extern int copy_thread(unsigned long, unsigned long, unsigned long, ++ struct task_struct *, struct pt_regs *); + extern void flush_thread(void); + extern void exit_thread(void); + +diff --git a/include/linux/wait.h b/include/linux/wait.h +index a210ede..0d2eeb0 100644 +--- a/include/linux/wait.h ++++ b/include/linux/wait.h +@@ -135,8 +135,11 @@ static inline void __remove_wait_queue(wait_queue_head_t *head, + void __wake_up_common(wait_queue_head_t *q, unsigned int mode, + int nr_exclusive, int sync, void *key); + void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key); +-extern void __wake_up_locked(wait_queue_head_t *q, unsigned int mode); +-extern void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr); ++void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key); ++void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, ++ void *key); ++void __wake_up_locked(wait_queue_head_t *q, unsigned int mode); ++void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr); + void __wake_up_bit(wait_queue_head_t *, void *, int); + int __wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned); + int __wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned); +diff --git a/kernel/exit.c b/kernel/exit.c +index efd30cc..ca734c6f 100644 +--- a/kernel/exit.c ++++ b/kernel/exit.c +@@ -362,16 +362,12 @@ static void reparent_to_kthreadd(void) + void __set_special_pids(struct pid *pid) + { + struct task_struct *curr = current->group_leader; +- pid_t nr = pid_nr(pid); + +- if (task_session(curr) != pid) { ++ if (task_session(curr) != pid) + change_pid(curr, PIDTYPE_SID, pid); +- set_task_session(curr, nr); +- } +- if (task_pgrp(curr) != pid) { ++ ++ if (task_pgrp(curr) != pid) + change_pid(curr, PIDTYPE_PGID, pid); +- set_task_pgrp(curr, nr); +- } + } + + static void set_special_pids(struct pid *pid) +@@ -815,33 +811,44 @@ static void ptrace_exit_finish(struct task_struct *parent, + } + } + +-static void reparent_thread(struct task_struct *p, struct task_struct *father) ++/* Returns nonzero if the child should be released. */ ++static int reparent_thread(struct task_struct *p, struct task_struct *father) + { ++ int dead; ++ + if (p->pdeath_signal) + /* We already hold the tasklist_lock here. */ + group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p); + + list_move_tail(&p->sibling, &p->real_parent->children); + ++ if (task_detached(p)) ++ return 0; + /* If this is a threaded reparent there is no need to + * notify anyone anything has happened. + */ + if (same_thread_group(p->real_parent, father)) +- return; ++ return 0; + + /* We don't want people slaying init. */ +- if (!task_detached(p)) +- p->exit_signal = SIGCHLD; ++ p->exit_signal = SIGCHLD; + + /* If we'd notified the old parent about this child's death, + * also notify the new parent. + */ +- if (!ptrace_reparented(p) && +- p->exit_state == EXIT_ZOMBIE && +- !task_detached(p) && thread_group_empty(p)) ++ dead = 0; ++ if (!p->ptrace && ++ p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) { + do_notify_parent(p, p->exit_signal); ++ if (task_detached(p)) { ++ p->exit_state = EXIT_DEAD; ++ dead = 1; ++ } ++ } + + kill_orphaned_pgrp(p, father); ++ ++ return dead; + } + + /* +@@ -901,7 +908,8 @@ static void forget_original_parent(struct task_struct *father) + BUG_ON(p->ptrace); + p->parent = p->real_parent; + } +- reparent_thread(p, father); ++ if (reparent_thread(p, father)) ++ list_add(&p->ptrace_entry, &ptrace_dead);; + } + + write_unlock_irq(&tasklist_lock); +@@ -1420,6 +1428,18 @@ static int wait_task_zombie(struct task_struct *p, int options, + return retval; + } + ++static int *task_stopped_code(struct task_struct *p, bool ptrace) ++{ ++ if (ptrace) { ++ if (task_is_stopped_or_traced(p)) ++ return &p->exit_code; ++ } else { ++ if (p->signal->flags & SIGNAL_STOP_STOPPED) ++ return &p->signal->group_exit_code; ++ } ++ return NULL; ++} ++ + /* + * Handle sys_wait4 work for one task in state TASK_STOPPED. We hold + * read_lock(&tasklist_lock) on entry. If we return zero, we still hold +@@ -1430,7 +1450,7 @@ static int wait_task_stopped(int ptrace, struct task_struct *p, + int options, struct siginfo __user *infop, + int __user *stat_addr, struct rusage __user *ru) + { +- int retval, exit_code, why; ++ int retval, exit_code, *p_code, why; + uid_t uid = 0; /* unneeded, required by compiler */ + pid_t pid; + +@@ -1440,22 +1460,16 @@ static int wait_task_stopped(int ptrace, struct task_struct *p, + exit_code = 0; + spin_lock_irq(&p->sighand->siglock); + +- if (unlikely(!task_is_stopped_or_traced(p))) +- goto unlock_sig; +- +- if (!ptrace && p->signal->group_stop_count > 0) +- /* +- * A group stop is in progress and this is the group leader. +- * We won't report until all threads have stopped. +- */ ++ p_code = task_stopped_code(p, ptrace); ++ if (unlikely(!p_code)) + goto unlock_sig; + +- exit_code = p->exit_code; ++ exit_code = *p_code; + if (!exit_code) + goto unlock_sig; + + if (!unlikely(options & WNOWAIT)) +- p->exit_code = 0; ++ *p_code = 0; + + /* don't need the RCU readlock here as we're holding a spinlock */ + uid = __task_cred(p)->uid; +@@ -1611,7 +1625,7 @@ static int wait_consider_task(struct task_struct *parent, int ptrace, + */ + *notask_error = 0; + +- if (task_is_stopped_or_traced(p)) ++ if (task_stopped_code(p, ptrace)) + return wait_task_stopped(ptrace, p, options, + infop, stat_addr, ru); + +@@ -1811,7 +1825,7 @@ asmlinkage long sys_wait4(pid_t upid, in + pid = find_get_pid(-upid); + } else if (upid == 0) { + type = PIDTYPE_PGID; +- pid = get_pid(task_pgrp(current)); ++ pid = get_task_pid(current, PIDTYPE_PGID); + } else /* upid > 0 */ { + type = PIDTYPE_PID; + pid = find_get_pid(upid); +diff --git a/kernel/fork.c b/kernel/fork.c +index 4854c2c..cf9f156 100644 +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -1120,7 +1120,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, + goto bad_fork_cleanup_mm; + if ((retval = copy_io(clone_flags, p))) + goto bad_fork_cleanup_namespaces; +- retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); ++ retval = copy_thread(clone_flags, stack_start, stack_size, p, regs); + if (retval) + goto bad_fork_cleanup_io; + +@@ -1258,8 +1258,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, + p->signal->leader_pid = pid; + tty_kref_put(p->signal->tty); + p->signal->tty = tty_kref_get(current->signal->tty); +- set_task_pgrp(p, task_pgrp_nr(current)); +- set_task_session(p, task_session_nr(current)); + attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); + attach_pid(p, PIDTYPE_SID, task_session(current)); + list_add_tail_rcu(&p->tasks, &init_task.tasks); +diff --git a/kernel/pid.c b/kernel/pid.c +index 1b3586f..8582d4e 100644 +--- a/kernel/pid.c ++++ b/kernel/pid.c +@@ -403,6 +403,8 @@ struct pid *get_task_pid(struct task_str + { + struct pid *pid; + rcu_read_lock(); ++ if (type != PIDTYPE_PID) ++ task = task->group_leader; + pid = get_pid(task->pids[type].pid); + rcu_read_unlock(); + return pid; +@@ -450,11 +452,24 @@ pid_t pid_vnr(struct pid *pid) + } + EXPORT_SYMBOL_GPL(pid_vnr); + +-pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) ++pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, ++ struct pid_namespace *ns) + { +- return pid_nr_ns(task_pid(tsk), ns); ++ pid_t nr = 0; ++ ++ rcu_read_lock(); ++ if (!ns) ++ ns = current->nsproxy->pid_ns; ++ if (likely(pid_alive(task))) { ++ if (type != PIDTYPE_PID) ++ task = task->group_leader; ++ nr = pid_nr_ns(task->pids[type].pid, ns); ++ } ++ rcu_read_unlock(); ++ ++ return nr; + } +-EXPORT_SYMBOL(task_pid_nr_ns); ++EXPORT_SYMBOL(__task_pid_nr_ns); + + pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) + { +@@ -462,18 +477,6 @@ pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) + } + EXPORT_SYMBOL(task_tgid_nr_ns); + +-pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) +-{ +- return pid_nr_ns(task_pgrp(tsk), ns); +-} +-EXPORT_SYMBOL(task_pgrp_nr_ns); +- +-pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) +-{ +- return pid_nr_ns(task_session(tsk), ns); +-} +-EXPORT_SYMBOL(task_session_nr_ns); +- + /* + * Used by proc to find the first pid that is greater then or equal to nr. + * +diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c +index 476607f..0df323a 100644 +--- a/kernel/posix-cpu-timers.c ++++ b/kernel/posix-cpu-timers.c +@@ -1371,7 +1372,8 @@ static inline int fastpath_timer_check(struct task_struct *tsk) + if (task_cputime_expired(&group_sample, &sig->cputime_expires)) + return 1; + } +- return 0; ++ ++ return sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY; + } + + /* +@@ -1419,19 +1421,19 @@ void run_posix_cpu_timers(struct task_struct *tsk) + * timer call will interfere. + */ + list_for_each_entry_safe(timer, next, &firing, it.cpu.entry) { +- int firing; ++ int cpu_firing; ++ + spin_lock(&timer->it_lock); + list_del_init(&timer->it.cpu.entry); +- firing = timer->it.cpu.firing; ++ cpu_firing = timer->it.cpu.firing; + timer->it.cpu.firing = 0; + /* + * The firing flag is -1 if we collided with a reset + * of the timer, which already reported this + * almost-firing as an overrun. So don't generate an event. + */ +- if (likely(firing >= 0)) { ++ if (likely(cpu_firing >= 0)) + cpu_timer_fire(timer); +- } + spin_unlock(&timer->it_lock); + } + } +diff --git a/kernel/sched.c b/kernel/sched.c +index f1e8560..b0cdc3a 100644 +--- a/kernel/sched.c ++++ b/kernel/sched.c +@@ -618,9 +618,6 @@ struct rq { + /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */ + + /* sys_sched_yield() stats */ +- unsigned int yld_exp_empty; +- unsigned int yld_act_empty; +- unsigned int yld_both_empty; + unsigned int yld_count; + + /* schedule() stats */ +@@ -2750,7 +2747,40 @@ unsigned long nr_iowait(void) + return sum; + } + +-unsigned long nr_active(void) ++/* Variables and functions for calc_load */ ++static atomic_long_t calc_load_tasks; ++static unsigned long calc_load_update; ++unsigned long avenrun[3]; ++EXPORT_SYMBOL(avenrun); ++ ++/** ++ * get_avenrun - get the load average array ++ * @loads: pointer to dest load array ++ * @offset: offset to add ++ * @shift: shift count to shift the result left ++ * ++ * These values are estimates at best, so no need for locking. ++ */ ++void get_avenrun(unsigned long *loads, unsigned long offset, int shift) ++{ ++ loads[0] = (avenrun[0] + offset) << shift; ++ loads[1] = (avenrun[1] + offset) << shift; ++ loads[2] = (avenrun[2] + offset) << shift; ++} ++ ++static unsigned long ++calc_load(unsigned long load, unsigned long exp, unsigned long active) ++{ ++ load *= exp; ++ load += active * (FIXED_1 - exp); ++ return load >> FSHIFT; ++} ++ ++/* ++ * calc_load - update the avenrun load estimates 10 ticks after the ++ * CPUs have updated calc_load_tasks. ++ */ ++void calc_global_load(void) + { + unsigned long i, running = 0, uninterruptible = 0; + +@@ -4781,11 +4811,17 @@ void __wake_up_locked(wait_queue_head_t *q, unsigned int mode) + __wake_up_common(q, mode, 1, 0, NULL); + } + ++void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key) ++{ ++ __wake_up_common(q, mode, 1, 0, key); ++} ++ + /** +- * __wake_up_sync - wake up threads blocked on a waitqueue. ++ * __wake_up_sync_key - wake up threads blocked on a waitqueue. + * @q: the waitqueue + * @mode: which threads + * @nr_exclusive: how many wake-one or wake-many threads to wake up ++ * @key: opaque value to be passed to wakeup targets + * + * The sync wakeup differs that the waker knows that it will schedule + * away soon, so while the target thread will be woken up, it will not +@@ -4794,8 +4830,8 @@ void __wake_up_locked(wait_queue_head_t *q, unsigned int mode) + * + * On UP it can prevent extra preemption. + */ +-void +-__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive) ++void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, ++ int nr_exclusive, void *key) + { + unsigned long flags; + int sync = 1; +@@ -4807,9 +4843,18 @@ __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive) + sync = 0; + + spin_lock_irqsave(&q->lock, flags); +- __wake_up_common(q, mode, nr_exclusive, sync, NULL); ++ __wake_up_common(q, mode, nr_exclusive, sync, key); + spin_unlock_irqrestore(&q->lock, flags); + } ++EXPORT_SYMBOL_GPL(__wake_up_sync_key); ++ ++/* ++ * __wake_up_sync - see __wake_up_sync_key() ++ */ ++void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive) ++{ ++ __wake_up_sync_key(q, mode, nr_exclusive, NULL); ++} + EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */ + + /** +diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c +index 16eeba4e..bdf57bc 100644 +--- a/kernel/sched_debug.c ++++ b/kernel/sched_debug.c +@@ -287,9 +287,6 @@ static void print_cpu(struct seq_file *m, int cpu) + #ifdef CONFIG_SCHEDSTATS + #define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n); + +- P(yld_exp_empty); +- P(yld_act_empty); +- P(yld_both_empty); + P(yld_count); + + P(sched_switch); +@@ -314,7 +311,7 @@ static int sched_debug_show(struct seq_file *m, void *v) + u64 now = ktime_to_ns(ktime_get()); + int cpu; + +- SEQ_printf(m, "Sched Debug Version: v0.07, %s %.*s\n", ++ SEQ_printf(m, "Sched Debug Version: v0.09, %s %.*s\n", + init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); +diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h +index a8f93dd..32d2bd4 100644 +--- a/kernel/sched_stats.h ++++ b/kernel/sched_stats.h +@@ -4,7 +4,7 @@ + * bump this up when changing the output format or the meaning of an existing + * format, so that tools can adapt (or abort) + */ +-#define SCHEDSTAT_VERSION 14 ++#define SCHEDSTAT_VERSION 15 + + static int show_schedstat(struct seq_file *seq, void *v) + { +@@ -26,9 +26,8 @@ static int show_schedstat(struct seq_file *seq, void *v) + + /* runqueue-specific stats */ + seq_printf(seq, +- "cpu%d %u %u %u %u %u %u %u %u %u %llu %llu %lu", +- cpu, rq->yld_both_empty, +- rq->yld_act_empty, rq->yld_exp_empty, rq->yld_count, ++ "cpu%d %u %u %u %u %u %u %llu %llu %lu", ++ cpu, rq->yld_count, + rq->sched_switch, rq->sched_count, rq->sched_goidle, + rq->ttwu_count, rq->ttwu_local, + rq->rq_cpu_time, +diff --git a/kernel/sys.c b/kernel/sys.c +index 37f458e..742cefa 100644 +--- a/kernel/sys.c ++++ b/kernel/sys.c +@@ -1013,10 +1013,8 @@ SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid) + if (err) + goto out; + +- if (task_pgrp(p) != pgrp) { ++ if (task_pgrp(p) != pgrp) + change_pid(p, PIDTYPE_PGID, pgrp); +- set_task_pgrp(p, pid_nr(pgrp)); +- } + + err = 0; + out: +diff --git a/kernel/workqueue.c b/kernel/workqueue.c +index 1f0c509..6e9b6d1 100644 +--- a/kernel/workqueue.c ++++ b/kernel/workqueue.c +@@ -972,17 +972,19 @@ undo: + + #ifdef CONFIG_SMP + struct work_for_cpu { +- struct work_struct work; ++ struct completion completion; + long (*fn)(void *); + void *arg; + long ret; + }; + +-static void do_work_for_cpu(struct work_struct *w) ++static int do_work_for_cpu(void *_wfc) + { +- struct work_for_cpu *wfc = container_of(w, struct work_for_cpu, work); ++ struct work_for_cpu *wfc = _wfc; + + wfc->ret = wfc->fn(wfc->arg); ++ complete(&wfc->completion); ++ return 0; + } + + /** +@@ -996,20 +998,19 @@ static void do_work_for_cpu(struct work_ + */ + long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) + { +- struct work_for_cpu wfc; +- +- INIT_WORK(&wfc.work, do_work_for_cpu); +- wfc.fn = fn; +- wfc.arg = arg; +- get_online_cpus(); +- if (unlikely(!cpu_online(cpu))) +- wfc.ret = -EINVAL; +- else { +- schedule_work_on(cpu, &wfc.work); +- flush_work(&wfc.work); +- } +- put_online_cpus(); +- ++ struct task_struct *sub_thread; ++ struct work_for_cpu wfc = { ++ .completion = COMPLETION_INITIALIZER_ONSTACK(wfc.completion), ++ .fn = fn, ++ .arg = arg, ++ }; ++ ++ sub_thread = kthread_create(do_work_for_cpu, &wfc, "work_for_cpu"); ++ if (IS_ERR(sub_thread)) ++ return PTR_ERR(sub_thread); ++ kthread_bind(sub_thread, cpu); ++ wake_up_process(sub_thread); ++ wait_for_completion(&wfc.completion); + return wfc.ret; + } + EXPORT_SYMBOL_GPL(work_on_cpu); diff --git a/kernel-bfs-2.6.28/debian/patches/bfs/bfs-tidy_up_resched.patch b/kernel-bfs-2.6.28/debian/patches/bfs/bfs-tidy_up_resched.patch new file mode 100644 index 0000000..d1ff566 --- /dev/null +++ b/kernel-bfs-2.6.28/debian/patches/bfs/bfs-tidy_up_resched.patch @@ -0,0 +1,84 @@ +--- linux-2.6.28/kernel/sched_bfs.c 2011-06-17 23:09:25.884488799 +0200 ++++ linux-2.6.28.new/kernel/sched_bfs.c 2011-06-17 23:15:51.483825482 +0200 +@@ -2459,7 +2459,7 @@ need_resched_nonpreemptible: + if (unlikely(reacquire_kernel_lock(current) < 0)) + goto need_resched_nonpreemptible; + preempt_enable_no_resched(); +- if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) ++ if (need_resched()) + goto need_resched; + } + EXPORT_SYMBOL(schedule); +@@ -2491,7 +2491,7 @@ asmlinkage void __sched preempt_schedule + * between schedule and now. + */ + barrier(); +- } while (unlikely(test_thread_flag(TIF_NEED_RESCHED))); ++ } while (need_resched()); + } + EXPORT_SYMBOL(preempt_schedule); + +@@ -2520,7 +2520,7 @@ asmlinkage void __sched preempt_schedule + * between schedule and now. + */ + barrier(); +- } while (unlikely(test_thread_flag(TIF_NEED_RESCHED))); ++ } while (need_resched()); + } + + #endif /* CONFIG_PREEMPT */ +@@ -3489,6 +3489,11 @@ asmlinkage long sys_sched_yield(void) + return 0; + } + ++static inline int should_resched(void) ++{ ++ return need_resched() && !(preempt_count() & PREEMPT_ACTIVE); ++} ++ + static void __cond_resched(void) + { + /* NOT a real fix but will make voluntary preempt work. 馬鹿な事 */ +@@ -3511,8 +3516,7 @@ static void __cond_resched(void) + + int __sched _cond_resched(void) + { +- if (need_resched() && !(preempt_count() & PREEMPT_ACTIVE) && +- system_state == SYSTEM_RUNNING) { ++ if (should_resched()) { + __cond_resched(); + return 1; + } +@@ -3530,12 +3534,12 @@ EXPORT_SYMBOL(_cond_resched); + */ + int cond_resched_lock(spinlock_t *lock) + { +- int resched = need_resched() && system_state == SYSTEM_RUNNING; ++ int resched = should_resched(); + int ret = 0; + + if (spin_needbreak(lock) || resched) { + spin_unlock(lock); +- if (resched && need_resched()) ++ if (resched) + __cond_resched(); + else + cpu_relax(); +@@ -3550,7 +3554,7 @@ int __sched cond_resched_softirq(void) + { + BUG_ON(!in_softirq()); + +- if (need_resched() && system_state == SYSTEM_RUNNING) { ++ if (should_resched()) { + local_bh_enable(); + __cond_resched(); + local_bh_disable(); +@@ -3919,7 +3923,7 @@ void wake_up_idle_cpu(int cpu) + * lockless. The worst case is that the other CPU runs the + * idle task through an additional NOOP schedule() + */ +- set_tsk_thread_flag(idle, TIF_NEED_RESCHED); ++ set_tsk_need_resched(idle); + + /* NEED_RESCHED must be visible before we test polling */ + smp_mb(); diff --git a/kernel-bfs-2.6.28/debian/patches/bfs/tick_sched-set_inidle_unconditionally.patch b/kernel-bfs-2.6.28/debian/patches/bfs/tick_sched-set_inidle_unconditionally.patch new file mode 100644 index 0000000..a20d728 --- /dev/null +++ b/kernel-bfs-2.6.28/debian/patches/bfs/tick_sched-set_inidle_unconditionally.patch @@ -0,0 +1,30 @@ +--- linux-2.6.28/kernel/time/tick-sched.c 2011-06-20 00:00:22.673390790 +0200 ++++ linux-2.6.28.new/kernel/time/tick-sched.c 2011-06-20 12:12:30.374080397 +0200 +@@ -231,6 +231,13 @@ void tick_nohz_stop_sched_tick(int inidl + if (!inidle && !ts->inidle) + goto end; + ++ /* ++ * Set ts->inidle unconditionally. Even if the system did not ++ * switch to NOHZ mode the cpu frequency governers rely on the ++ * update of the idle time accounting in tick_nohz_start_idle(). ++ */ ++ ts->inidle = 1; ++ + now = tick_nohz_start_idle(ts); + + /* +@@ -248,12 +255,10 @@ void tick_nohz_stop_sched_tick(int inidl + if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) + goto end; + +- ts->inidle = 1; +- + if (need_resched()) + goto end; + +- if (unlikely(local_softirq_pending())) { ++ if (unlikely(local_softirq_pending() && cpu_online(cpu))) { + static int ratelimit; + + if (ratelimit < 10) { diff --git a/kernel-bfs-2.6.28/debian/patches/series b/kernel-bfs-2.6.28/debian/patches/series index 1ae8215..5b3ae99 100644 --- a/kernel-bfs-2.6.28/debian/patches/series +++ b/kernel-bfs-2.6.28/debian/patches/series @@ -50,7 +50,11 @@ class10sd_dto14_fix.diff ################################# # BFS Patches +bfs/tick_sched-set_inidle_unconditionally.patch +bfs/bfs-setup_prereqs.patch bfs/bfs.patch +bfs/bfs-implement_prereqs.patch +bfs/bfs-tidy_up_resched.patch bfs/bfs-316-to-318.patch bfs/bfs-318-to-330.patch bfs/sched_reset_on_fork.diff -- 1.7.9.5