BFS prerequisites and tidy up of resched functions
authorPeter Hunt <peter_j_hunt@hotmail.com>
Mon, 29 Aug 2011 09:44:10 +0000 (09:44 +0000)
committerPeter Hunt <peter_j_hunt@hotmail.com>
Mon, 29 Aug 2011 09:44:10 +0000 (09:44 +0000)
kernel-bfs-2.6.28/debian/patches/bfs/bfs-implement_prereqs.patch [new file with mode: 0644]
kernel-bfs-2.6.28/debian/patches/bfs/bfs-setup_prereqs.patch [new file with mode: 0644]
kernel-bfs-2.6.28/debian/patches/bfs/bfs-tidy_up_resched.patch [new file with mode: 0644]
kernel-bfs-2.6.28/debian/patches/bfs/tick_sched-set_inidle_unconditionally.patch [new file with mode: 0644]
kernel-bfs-2.6.28/debian/patches/series

diff --git a/kernel-bfs-2.6.28/debian/patches/bfs/bfs-implement_prereqs.patch b/kernel-bfs-2.6.28/debian/patches/bfs/bfs-implement_prereqs.patch
new file mode 100644 (file)
index 0000000..23f3e9f
--- /dev/null
@@ -0,0 +1,169 @@
+--- linux-2.6.28.orig/kernel/sched_bfs.c       2011-06-18 18:46:15.009219526 +0200
++++ linux-2.6.28.new/kernel/sched_bfs.c        2011-06-18 19:06:34.087896512 +0200
+@@ -229,9 +229,6 @@ struct rq {
+       struct sched_info rq_sched_info;
+       /* sys_sched_yield() stats */
+-      unsigned int yld_exp_empty;
+-      unsigned int yld_act_empty;
+-      unsigned int yld_both_empty;
+       unsigned int yld_count;
+       /* schedule() stats */
+@@ -2585,6 +2582,46 @@ void __wake_up_locked(wait_queue_head_t 
+       __wake_up_common(q, mode, 1, 0, NULL);
+ }
++void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
++{
++      __wake_up_common(q, mode, 1, 0, key);
++}
++
++/**
++ * __wake_up_sync_key - wake up threads blocked on a waitqueue.
++ * @q: the waitqueue
++ * @mode: which threads
++ * @nr_exclusive: how many wake-one or wake-many threads to wake up
++ * @key: opaque value to be passed to wakeup targets
++ *
++ * The sync wakeup differs that the waker knows that it will schedule
++ * away soon, so while the target thread will be woken up, it will not
++ * be migrated to another CPU - ie. the two threads are 'synchronised'
++ * with each other. This can prevent needless bouncing between CPUs.
++ *
++ * On UP it can prevent extra preemption.
++ *
++ * It may be assumed that this function implies a write memory barrier before
++ * changing the task state if and only if any tasks are woken up.
++ */
++void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode,
++                      int nr_exclusive, void *key)
++{
++      unsigned long flags;
++      int sync = 1;
++
++      if (unlikely(!q))
++              return;
++
++      if (unlikely(!nr_exclusive))
++              sync = 0;
++
++      spin_lock_irqsave(&q->lock, flags);
++      __wake_up_common(q, mode, nr_exclusive, sync, key);
++      spin_unlock_irqrestore(&q->lock, flags);
++}
++EXPORT_SYMBOL_GPL(__wake_up_sync_key);
++
+ /**
+  * __wake_up_sync - wake up threads blocked on a waitqueue.
+  * @q: the waitqueue
+@@ -2615,6 +2652,18 @@ void __wake_up_sync(wait_queue_head_t *q
+ }
+ EXPORT_SYMBOL_GPL(__wake_up_sync);    /* For internal use only */
++/**
++ * complete: - signals a single thread waiting on this completion
++ * @x:  holds the state of this particular completion
++ *
++ * This will wake up a single thread waiting on this completion. Threads will be
++ * awakened in the same order in which they were queued.
++ *
++ * See also complete_all(), wait_for_completion() and related routines.
++ *
++ * It may be assumed that this function implies a write memory barrier before
++ * changing the task state if and only if any tasks are woken up.
++ */
+ void complete(struct completion *x)
+ {
+       unsigned long flags;
+@@ -2626,6 +2675,15 @@ void complete(struct completion *x)
+ }
+ EXPORT_SYMBOL(complete);
++/**
++ * complete_all: - signals all threads waiting on this completion
++ * @x:  holds the state of this particular completion
++ *
++ * This will wake up all threads waiting on this particular completion event.
++ *
++ * It may be assumed that this function implies a write memory barrier before
++ * changing the task state if and only if any tasks are woken up.
++ */
+ void complete_all(struct completion *x)
+ {
+       unsigned long flags;
+@@ -2677,12 +2735,31 @@ wait_for_common(struct completion *x, lo
+       return timeout;
+ }
++/**
++ * wait_for_completion: - waits for completion of a task
++ * @x:  holds the state of this particular completion
++ *
++ * This waits to be signaled for completion of a specific task. It is NOT
++ * interruptible and there is no timeout.
++ *
++ * See also similar routines (i.e. wait_for_completion_timeout()) with timeout
++ * and interrupt capability. Also see complete().
++ */
+ void __sched wait_for_completion(struct completion *x)
+ {
+       wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
+ }
+ EXPORT_SYMBOL(wait_for_completion);
++/**
++ * wait_for_completion_timeout: - waits for completion of a task (w/timeout)
++ * @x:  holds the state of this particular completion
++ * @timeout:  timeout value in jiffies
++ *
++ * This waits for either a completion of a specific task to be signaled or for a
++ * specified timeout to expire. The timeout is in jiffies. It is not
++ * interruptible.
++ */
+ unsigned long __sched
+ wait_for_completion_timeout(struct completion *x, unsigned long timeout)
+ {
+@@ -2690,6 +2767,13 @@ wait_for_completion_timeout(struct compl
+ }
+ EXPORT_SYMBOL(wait_for_completion_timeout);
++/**
++ * wait_for_completion_interruptible: - waits for completion of a task (w/intr)
++ * @x:  holds the state of this particular completion
++ *
++ * This waits for completion of a specific task to be signaled. It is
++ * interruptible.
++ */
+ int __sched wait_for_completion_interruptible(struct completion *x)
+ {
+       long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_INTERRUPTIBLE);
+@@ -2699,6 +2783,14 @@ int __sched wait_for_completion_interrup
+ }
+ EXPORT_SYMBOL(wait_for_completion_interruptible);
++/**
++ * wait_for_completion_interruptible_timeout: - waits for completion (w/(to,intr))
++ * @x:  holds the state of this particular completion
++ * @timeout:  timeout value in jiffies
++ *
++ * This waits for either a completion of a specific task to be signaled or for a
++ * specified timeout to expire. It is interruptible. The timeout is in jiffies.
++ */
+ unsigned long __sched
+ wait_for_completion_interruptible_timeout(struct completion *x,
+                                         unsigned long timeout)
+@@ -2707,6 +2799,13 @@ wait_for_completion_interruptible_timeou
+ }
+ EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
++/**
++ * wait_for_completion_killable: - waits for completion of a task (killable)
++ * @x:  holds the state of this particular completion
++ *
++ * This waits to be signaled for completion of a specific task. It can be
++ * interrupted by a kill signal.
++ */
+ int __sched wait_for_completion_killable(struct completion *x)
+ {
+       long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_KILLABLE);
diff --git a/kernel-bfs-2.6.28/debian/patches/bfs/bfs-setup_prereqs.patch b/kernel-bfs-2.6.28/debian/patches/bfs/bfs-setup_prereqs.patch
new file mode 100644 (file)
index 0000000..a95045e
--- /dev/null
@@ -0,0 +1,703 @@
+diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
+index 994b4d3..1e1dc49 100644
+--- a/arch/arm/kernel/process.c
++++ b/arch/arm/kernel/process.c
+@@ -373,7 +373,7 @@ void release_thread(struct task_struct *dead_task)
+ asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
+ int
+-copy_thread(int nr, unsigned long clone_flags, unsigned long stack_start,
++copy_thread(unsigned long clone_flags, unsigned long stack_start,
+           unsigned long stk_sz, struct task_struct *p, struct pt_regs *regs)
+ {
+       struct thread_info *thread = task_thread_info(p);
+diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
+index bc84e12..99bf9b0 100644
+--- a/drivers/char/tty_io.c
++++ b/drivers/char/tty_io.c
+@@ -2682,7 +2682,7 @@ void __do_SAK(struct tty_struct *tty)
+       /* Kill the entire session */
+       do_each_pid_task(session, PIDTYPE_SID, p) {
+               printk(KERN_NOTICE "SAK: killed process %d"
+-                      " (%s): task_session_nr(p)==tty->session\n",
++                      " (%s): task_session(p)==tty->session\n",
+                       task_pid_nr(p), p->comm);
+               send_sig(SIGKILL, p, 1);
+       } while_each_pid_task(session, PIDTYPE_SID, p);
+@@ -2692,7 +2692,7 @@ void __do_SAK(struct tty_struct *tty)
+       do_each_thread(g, p) {
+               if (p->signal->tty == tty) {
+                       printk(KERN_NOTICE "SAK: killed process %d"
+-                          " (%s): task_session_nr(p)==tty->session\n",
++                          " (%s): task_session(p)==tty->session\n",
+                           task_pid_nr(p), p->comm);
+                       send_sig(SIGKILL, p, 1);
+                       continue;
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index 011db2f..802d144 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -538,25 +539,8 @@ struct signal_struct {
+       struct list_head cpu_timers[3];
+-      /* job control IDs */
+-
+-      /*
+-       * pgrp and session fields are deprecated.
+-       * use the task_session_Xnr and task_pgrp_Xnr routines below
+-       */
+-
+-      union {
+-              pid_t pgrp __deprecated;
+-              pid_t __pgrp;
+-      };
+-
+       struct pid *tty_old_pgrp;
+-      union {
+-              pid_t session __deprecated;
+-              pid_t __session;
+-      };
+-
+       /* boolean value for session group leader */
+       int leader;
+@@ -1453,16 +1437,6 @@ static inline int rt_task(struct task_struct *p)
+       return rt_prio(p->prio);
+ }
+-static inline void set_task_session(struct task_struct *tsk, pid_t session)
+-{
+-      tsk->signal->__session = session;
+-}
+-
+-static inline void set_task_pgrp(struct task_struct *tsk, pid_t pgrp)
+-{
+-      tsk->signal->__pgrp = pgrp;
+-}
+-
+ static inline struct pid *task_pid(struct task_struct *task)
+ {
+       return task->pids[PIDTYPE_PID].pid;
+@@ -1473,6 +1447,11 @@ static inline struct pid *task_tgid(struct task_struct *task)
+       return task->group_leader->pids[PIDTYPE_PID].pid;
+ }
++/*
++ * Without tasklist or rcu lock it is not safe to dereference
++ * the result of task_pgrp/task_session even if task == current,
++ * we can race with another thread doing sys_setsid/sys_setpgid.
++ */
+ static inline struct pid *task_pgrp(struct task_struct *task)
+ {
+       return task->group_leader->pids[PIDTYPE_PGID].pid;
+@@ -1498,17 +1477,23 @@ struct pid_namespace;
+  *
+  * see also pid_nr() etc in include/linux/pid.h
+  */
++pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
++                      struct pid_namespace *ns);
+ static inline pid_t task_pid_nr(struct task_struct *tsk)
+ {
+       return tsk->pid;
+ }
+-pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
++static inline pid_t task_pid_nr_ns(struct task_struct *tsk,
++                                      struct pid_namespace *ns)
++{
++      return __task_pid_nr_ns(tsk, PIDTYPE_PID, ns);
++}
+ static inline pid_t task_pid_vnr(struct task_struct *tsk)
+ {
+-      return pid_vnr(task_pid(tsk));
++      return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL);
+ }
+@@ -1525,31 +1510,34 @@ static inline pid_t task_tgid_vnr(struct task_struct *tsk)
+ }
+-static inline pid_t task_pgrp_nr(struct task_struct *tsk)
++static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk,
++                                      struct pid_namespace *ns)
+ {
+-      return tsk->signal->__pgrp;
++      return __task_pid_nr_ns(tsk, PIDTYPE_PGID, ns);
+ }
+-pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
+-
+ static inline pid_t task_pgrp_vnr(struct task_struct *tsk)
+ {
+-      return pid_vnr(task_pgrp(tsk));
++      return __task_pid_nr_ns(tsk, PIDTYPE_PGID, NULL);
+ }
+-static inline pid_t task_session_nr(struct task_struct *tsk)
++static inline pid_t task_session_nr_ns(struct task_struct *tsk,
++                                      struct pid_namespace *ns)
+ {
+-      return tsk->signal->__session;
++      return __task_pid_nr_ns(tsk, PIDTYPE_SID, ns);
+ }
+-pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
+-
+ static inline pid_t task_session_vnr(struct task_struct *tsk)
+ {
+-      return pid_vnr(task_session(tsk));
++      return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL);
+ }
++/* obsolete, do not use */
++static inline pid_t task_pgrp_nr(struct task_struct *tsk)
++{
++      return task_pgrp_nr_ns(tsk, &init_pid_ns);
++}
+ /**
+  * pid_alive - check that a task structure is not stale
+@@ -1949,7 +1937,8 @@ extern void mm_release(struct task_struct *, struct mm_struct *);
+ /* Allocate a new mm structure and copy contents from tsk->mm */
+ extern struct mm_struct *dup_mm(struct task_struct *tsk);
+-extern int  copy_thread(int, unsigned long, unsigned long, unsigned long, struct task_struct *, struct pt_regs *);
++extern int copy_thread(unsigned long, unsigned long, unsigned long,
++                      struct task_struct *, struct pt_regs *);
+ extern void flush_thread(void);
+ extern void exit_thread(void);
+diff --git a/include/linux/wait.h b/include/linux/wait.h
+index a210ede..0d2eeb0 100644
+--- a/include/linux/wait.h
++++ b/include/linux/wait.h
+@@ -135,8 +135,11 @@ static inline void __remove_wait_queue(wait_queue_head_t *head,
+ void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
+                       int nr_exclusive, int sync, void *key);
+ void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
+-extern void __wake_up_locked(wait_queue_head_t *q, unsigned int mode);
+-extern void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr);
++void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key);
++void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr,
++                      void *key);
++void __wake_up_locked(wait_queue_head_t *q, unsigned int mode);
++void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr);
+ void __wake_up_bit(wait_queue_head_t *, void *, int);
+ int __wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned);
+ int __wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned);
+diff --git a/kernel/exit.c b/kernel/exit.c
+index efd30cc..ca734c6f 100644
+--- a/kernel/exit.c
++++ b/kernel/exit.c
+@@ -362,16 +362,12 @@ static void reparent_to_kthreadd(void)
+ void __set_special_pids(struct pid *pid)
+ {
+       struct task_struct *curr = current->group_leader;
+-      pid_t nr = pid_nr(pid);
+-      if (task_session(curr) != pid) {
++      if (task_session(curr) != pid)
+               change_pid(curr, PIDTYPE_SID, pid);
+-              set_task_session(curr, nr);
+-      }
+-      if (task_pgrp(curr) != pid) {
++
++      if (task_pgrp(curr) != pid)
+               change_pid(curr, PIDTYPE_PGID, pid);
+-              set_task_pgrp(curr, nr);
+-      }
+ }
+ static void set_special_pids(struct pid *pid)
+@@ -815,33 +811,44 @@ static void ptrace_exit_finish(struct task_struct *parent,
+       }
+ }
+-static void reparent_thread(struct task_struct *p, struct task_struct *father)
++/* Returns nonzero if the child should be released. */
++static int reparent_thread(struct task_struct *p, struct task_struct *father)
+ {
++      int dead;
++
+       if (p->pdeath_signal)
+               /* We already hold the tasklist_lock here.  */
+               group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p);
+       list_move_tail(&p->sibling, &p->real_parent->children);
++      if (task_detached(p))
++              return 0;
+       /* If this is a threaded reparent there is no need to
+        * notify anyone anything has happened.
+        */
+       if (same_thread_group(p->real_parent, father))
+-              return;
++              return 0;
+       /* We don't want people slaying init.  */
+-      if (!task_detached(p))
+-              p->exit_signal = SIGCHLD;
++      p->exit_signal = SIGCHLD;
+       /* If we'd notified the old parent about this child's death,
+        * also notify the new parent.
+        */
+-      if (!ptrace_reparented(p) &&
+-          p->exit_state == EXIT_ZOMBIE &&
+-          !task_detached(p) && thread_group_empty(p))
++      dead = 0;
++      if (!p->ptrace &&
++          p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) {
+               do_notify_parent(p, p->exit_signal);
++              if (task_detached(p)) {
++                      p->exit_state = EXIT_DEAD;
++                      dead = 1;
++              }
++      }
+       kill_orphaned_pgrp(p, father);
++
++      return dead;
+ }
+ /*
+@@ -901,7 +908,8 @@ static void forget_original_parent(struct task_struct *father)
+                       BUG_ON(p->ptrace);
+                       p->parent = p->real_parent;
+               }
+-              reparent_thread(p, father);
++              if (reparent_thread(p, father))
++                      list_add(&p->ptrace_entry, &ptrace_dead);;
+       }
+       write_unlock_irq(&tasklist_lock);
+@@ -1420,6 +1428,18 @@ static int wait_task_zombie(struct task_struct *p, int options,
+       return retval;
+ }
++static int *task_stopped_code(struct task_struct *p, bool ptrace)
++{
++      if (ptrace) {
++              if (task_is_stopped_or_traced(p))
++                      return &p->exit_code;
++      } else {
++              if (p->signal->flags & SIGNAL_STOP_STOPPED)
++                      return &p->signal->group_exit_code;
++      }
++      return NULL;
++}
++
+ /*
+  * Handle sys_wait4 work for one task in state TASK_STOPPED.  We hold
+  * read_lock(&tasklist_lock) on entry.  If we return zero, we still hold
+@@ -1430,7 +1450,7 @@ static int wait_task_stopped(int ptrace, struct task_struct *p,
+                            int options, struct siginfo __user *infop,
+                            int __user *stat_addr, struct rusage __user *ru)
+ {
+-      int retval, exit_code, why;
++      int retval, exit_code, *p_code, why;
+       uid_t uid = 0; /* unneeded, required by compiler */
+       pid_t pid;
+@@ -1440,22 +1460,16 @@ static int wait_task_stopped(int ptrace, struct task_struct *p,
+       exit_code = 0;
+       spin_lock_irq(&p->sighand->siglock);
+-      if (unlikely(!task_is_stopped_or_traced(p)))
+-              goto unlock_sig;
+-
+-      if (!ptrace && p->signal->group_stop_count > 0)
+-              /*
+-               * A group stop is in progress and this is the group leader.
+-               * We won't report until all threads have stopped.
+-               */
++      p_code = task_stopped_code(p, ptrace);
++      if (unlikely(!p_code))
+               goto unlock_sig;
+-      exit_code = p->exit_code;
++      exit_code = *p_code;
+       if (!exit_code)
+               goto unlock_sig;
+       if (!unlikely(options & WNOWAIT))
+-              p->exit_code = 0;
++              *p_code = 0;
+       /* don't need the RCU readlock here as we're holding a spinlock */
+       uid = __task_cred(p)->uid;
+@@ -1611,7 +1625,7 @@ static int wait_consider_task(struct task_struct *parent, int ptrace,
+        */
+       *notask_error = 0;
+-      if (task_is_stopped_or_traced(p))
++      if (task_stopped_code(p, ptrace))
+               return wait_task_stopped(ptrace, p, options,
+                                        infop, stat_addr, ru);
+@@ -1811,7 +1825,7 @@  asmlinkage long sys_wait4(pid_t upid, in
+               pid = find_get_pid(-upid);
+       } else if (upid == 0) {
+               type = PIDTYPE_PGID;
+-              pid = get_pid(task_pgrp(current));
++              pid = get_task_pid(current, PIDTYPE_PGID);
+       } else /* upid > 0 */ {
+               type = PIDTYPE_PID;
+               pid = find_get_pid(upid);
+diff --git a/kernel/fork.c b/kernel/fork.c
+index 4854c2c..cf9f156 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -1120,7 +1120,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
+               goto bad_fork_cleanup_mm;
+       if ((retval = copy_io(clone_flags, p)))
+               goto bad_fork_cleanup_namespaces;
+-      retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
++      retval = copy_thread(clone_flags, stack_start, stack_size, p, regs);
+       if (retval)
+               goto bad_fork_cleanup_io;
+@@ -1258,8 +1258,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
+                       p->signal->leader_pid = pid;
+                       tty_kref_put(p->signal->tty);
+                       p->signal->tty = tty_kref_get(current->signal->tty);
+-                      set_task_pgrp(p, task_pgrp_nr(current));
+-                      set_task_session(p, task_session_nr(current));
+                       attach_pid(p, PIDTYPE_PGID, task_pgrp(current));
+                       attach_pid(p, PIDTYPE_SID, task_session(current));
+                       list_add_tail_rcu(&p->tasks, &init_task.tasks);
+diff --git a/kernel/pid.c b/kernel/pid.c
+index 1b3586f..8582d4e 100644
+--- a/kernel/pid.c
++++ b/kernel/pid.c
+@@ -403,6 +403,8 @@  struct pid *get_task_pid(struct task_str
+ {
+       struct pid *pid;
+       rcu_read_lock();
++      if (type != PIDTYPE_PID)
++              task = task->group_leader;
+       pid = get_pid(task->pids[type].pid);
+       rcu_read_unlock();
+       return pid;
+@@ -450,11 +452,24 @@ pid_t pid_vnr(struct pid *pid)
+ }
+ EXPORT_SYMBOL_GPL(pid_vnr);
+-pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
++pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
++                      struct pid_namespace *ns)
+ {
+-      return pid_nr_ns(task_pid(tsk), ns);
++      pid_t nr = 0;
++
++      rcu_read_lock();
++      if (!ns)
++              ns = current->nsproxy->pid_ns;
++      if (likely(pid_alive(task))) {
++              if (type != PIDTYPE_PID)
++                      task = task->group_leader;
++              nr = pid_nr_ns(task->pids[type].pid, ns);
++      }
++      rcu_read_unlock();
++
++      return nr;
+ }
+-EXPORT_SYMBOL(task_pid_nr_ns);
++EXPORT_SYMBOL(__task_pid_nr_ns);
+ pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
+ {
+@@ -462,18 +477,6 @@ pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
+ }
+ EXPORT_SYMBOL(task_tgid_nr_ns);
+-pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
+-{
+-      return pid_nr_ns(task_pgrp(tsk), ns);
+-}
+-EXPORT_SYMBOL(task_pgrp_nr_ns);
+-
+-pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
+-{
+-      return pid_nr_ns(task_session(tsk), ns);
+-}
+-EXPORT_SYMBOL(task_session_nr_ns);
+-
+ /*
+  * Used by proc to find the first pid that is greater then or equal to nr.
+  *
+diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
+index 476607f..0df323a 100644
+--- a/kernel/posix-cpu-timers.c
++++ b/kernel/posix-cpu-timers.c
+@@ -1371,7 +1372,8 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
+               if (task_cputime_expired(&group_sample, &sig->cputime_expires))
+                       return 1;
+       }
+-      return 0;
++
++      return sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY;
+ }
+ /*
+@@ -1419,19 +1421,19 @@ void run_posix_cpu_timers(struct task_struct *tsk)
+        * timer call will interfere.
+        */
+       list_for_each_entry_safe(timer, next, &firing, it.cpu.entry) {
+-              int firing;
++              int cpu_firing;
++
+               spin_lock(&timer->it_lock);
+               list_del_init(&timer->it.cpu.entry);
+-              firing = timer->it.cpu.firing;
++              cpu_firing = timer->it.cpu.firing;
+               timer->it.cpu.firing = 0;
+               /*
+                * The firing flag is -1 if we collided with a reset
+                * of the timer, which already reported this
+                * almost-firing as an overrun.  So don't generate an event.
+                */
+-              if (likely(firing >= 0)) {
++              if (likely(cpu_firing >= 0))
+                       cpu_timer_fire(timer);
+-              }
+               spin_unlock(&timer->it_lock);
+       }
+ }
+diff --git a/kernel/sched.c b/kernel/sched.c
+index f1e8560..b0cdc3a 100644
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -618,9 +618,6 @@ struct rq {
+       /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
+       /* sys_sched_yield() stats */
+-      unsigned int yld_exp_empty;
+-      unsigned int yld_act_empty;
+-      unsigned int yld_both_empty;
+       unsigned int yld_count;
+       /* schedule() stats */
+@@ -2750,7 +2747,40 @@ unsigned long nr_iowait(void)
+       return sum;
+ }
+-unsigned long nr_active(void)
++/* Variables and functions for calc_load */
++static atomic_long_t calc_load_tasks;
++static unsigned long calc_load_update;
++unsigned long avenrun[3];
++EXPORT_SYMBOL(avenrun);
++
++/**
++ * get_avenrun - get the load average array
++ * @loads:    pointer to dest load array
++ * @offset:   offset to add
++ * @shift:    shift count to shift the result left
++ *
++ * These values are estimates at best, so no need for locking.
++ */
++void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
++{
++      loads[0] = (avenrun[0] + offset) << shift;
++      loads[1] = (avenrun[1] + offset) << shift;
++      loads[2] = (avenrun[2] + offset) << shift;
++}
++
++static unsigned long
++calc_load(unsigned long load, unsigned long exp, unsigned long active)
++{
++      load *= exp;
++      load += active * (FIXED_1 - exp);
++      return load >> FSHIFT;
++}
++
++/*
++ * calc_load - update the avenrun load estimates 10 ticks after the
++ * CPUs have updated calc_load_tasks.
++ */
++void calc_global_load(void)
+ {
+       unsigned long i, running = 0, uninterruptible = 0;
+@@ -4781,11 +4811,17 @@ void __wake_up_locked(wait_queue_head_t *q, unsigned int mode)
+       __wake_up_common(q, mode, 1, 0, NULL);
+ }
++void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
++{
++      __wake_up_common(q, mode, 1, 0, key);
++}
++
+ /**
+- * __wake_up_sync - wake up threads blocked on a waitqueue.
++ * __wake_up_sync_key - wake up threads blocked on a waitqueue.
+  * @q: the waitqueue
+  * @mode: which threads
+  * @nr_exclusive: how many wake-one or wake-many threads to wake up
++ * @key: opaque value to be passed to wakeup targets
+  *
+  * The sync wakeup differs that the waker knows that it will schedule
+  * away soon, so while the target thread will be woken up, it will not
+@@ -4794,8 +4830,8 @@ void __wake_up_locked(wait_queue_head_t *q, unsigned int mode)
+  *
+  * On UP it can prevent extra preemption.
+  */
+-void
+-__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
++void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode,
++                      int nr_exclusive, void *key)
+ {
+       unsigned long flags;
+       int sync = 1;
+@@ -4807,9 +4843,18 @@ __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
+               sync = 0;
+       spin_lock_irqsave(&q->lock, flags);
+-      __wake_up_common(q, mode, nr_exclusive, sync, NULL);
++      __wake_up_common(q, mode, nr_exclusive, sync, key);
+       spin_unlock_irqrestore(&q->lock, flags);
+ }
++EXPORT_SYMBOL_GPL(__wake_up_sync_key);
++
++/*
++ * __wake_up_sync - see __wake_up_sync_key()
++ */
++void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
++{
++      __wake_up_sync_key(q, mode, nr_exclusive, NULL);
++}
+ EXPORT_SYMBOL_GPL(__wake_up_sync);    /* For internal use only */
+ /**
+diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
+index 16eeba4e..bdf57bc 100644
+--- a/kernel/sched_debug.c
++++ b/kernel/sched_debug.c
+@@ -287,9 +287,6 @@ static void print_cpu(struct seq_file *m, int cpu)
+ #ifdef CONFIG_SCHEDSTATS
+ #define P(n) SEQ_printf(m, "  .%-30s: %d\n", #n, rq->n);
+-      P(yld_exp_empty);
+-      P(yld_act_empty);
+-      P(yld_both_empty);
+       P(yld_count);
+       P(sched_switch);
+@@ -314,7 +311,7 @@ static int sched_debug_show(struct seq_file *m, void *v)
+       u64 now = ktime_to_ns(ktime_get());
+       int cpu;
+-      SEQ_printf(m, "Sched Debug Version: v0.07, %s %.*s\n",
++      SEQ_printf(m, "Sched Debug Version: v0.09, %s %.*s\n",
+               init_utsname()->release,
+               (int)strcspn(init_utsname()->version, " "),
+               init_utsname()->version);
+diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
+index a8f93dd..32d2bd4 100644
+--- a/kernel/sched_stats.h
++++ b/kernel/sched_stats.h
+@@ -4,7 +4,7 @@
+  * bump this up when changing the output format or the meaning of an existing
+  * format, so that tools can adapt (or abort)
+  */
+-#define SCHEDSTAT_VERSION 14
++#define SCHEDSTAT_VERSION 15
+ static int show_schedstat(struct seq_file *seq, void *v)
+ {
+@@ -26,9 +26,8 @@ static int show_schedstat(struct seq_file *seq, void *v)
+               /* runqueue-specific stats */
+               seq_printf(seq,
+-                  "cpu%d %u %u %u %u %u %u %u %u %u %llu %llu %lu",
+-                  cpu, rq->yld_both_empty,
+-                  rq->yld_act_empty, rq->yld_exp_empty, rq->yld_count,
++                  "cpu%d %u %u %u %u %u %u %llu %llu %lu",
++                  cpu, rq->yld_count,
+                   rq->sched_switch, rq->sched_count, rq->sched_goidle,
+                   rq->ttwu_count, rq->ttwu_local,
+                   rq->rq_cpu_time,
+diff --git a/kernel/sys.c b/kernel/sys.c
+index 37f458e..742cefa 100644
+--- a/kernel/sys.c
++++ b/kernel/sys.c
+@@ -1013,10 +1013,8 @@ SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid)
+       if (err)
+               goto out;
+-      if (task_pgrp(p) != pgrp) {
++      if (task_pgrp(p) != pgrp)
+               change_pid(p, PIDTYPE_PGID, pgrp);
+-              set_task_pgrp(p, pid_nr(pgrp));
+-      }
+       err = 0;
+ out:
+diff --git a/kernel/workqueue.c b/kernel/workqueue.c
+index 1f0c509..6e9b6d1 100644
+--- a/kernel/workqueue.c
++++ b/kernel/workqueue.c
+@@ -972,17 +972,19 @@ undo:
+ #ifdef CONFIG_SMP
+ struct work_for_cpu {
+-      struct work_struct work;
++      struct completion completion;
+       long (*fn)(void *);
+       void *arg;
+       long ret;
+ };
+-static void do_work_for_cpu(struct work_struct *w)
++static int do_work_for_cpu(void *_wfc)
+ {
+-      struct work_for_cpu *wfc = container_of(w, struct work_for_cpu, work);
++      struct work_for_cpu *wfc = _wfc;
+       wfc->ret = wfc->fn(wfc->arg);
++      complete(&wfc->completion);
++      return 0;
+ }
+ /**
+@@ -996,20 +998,19 @@ static void do_work_for_cpu(struct work_
+  */
+ long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
+ {
+-      struct work_for_cpu wfc;
+-
+-      INIT_WORK(&wfc.work, do_work_for_cpu);
+-      wfc.fn = fn;
+-      wfc.arg = arg;
+-      get_online_cpus();
+-      if (unlikely(!cpu_online(cpu)))
+-              wfc.ret = -EINVAL;
+-      else {
+-              schedule_work_on(cpu, &wfc.work);
+-              flush_work(&wfc.work);
+-      }
+-      put_online_cpus();
+-
++      struct task_struct *sub_thread;
++      struct work_for_cpu wfc = {
++              .completion = COMPLETION_INITIALIZER_ONSTACK(wfc.completion),
++              .fn = fn,
++              .arg = arg,
++      };
++
++      sub_thread = kthread_create(do_work_for_cpu, &wfc, "work_for_cpu");
++      if (IS_ERR(sub_thread))
++              return PTR_ERR(sub_thread);
++      kthread_bind(sub_thread, cpu);
++      wake_up_process(sub_thread);
++      wait_for_completion(&wfc.completion);
+       return wfc.ret;
+ }
+ EXPORT_SYMBOL_GPL(work_on_cpu);
diff --git a/kernel-bfs-2.6.28/debian/patches/bfs/bfs-tidy_up_resched.patch b/kernel-bfs-2.6.28/debian/patches/bfs/bfs-tidy_up_resched.patch
new file mode 100644 (file)
index 0000000..d1ff566
--- /dev/null
@@ -0,0 +1,84 @@
+--- linux-2.6.28/kernel/sched_bfs.c    2011-06-17 23:09:25.884488799 +0200
++++ linux-2.6.28.new/kernel/sched_bfs.c        2011-06-17 23:15:51.483825482 +0200
+@@ -2459,7 +2459,7 @@ need_resched_nonpreemptible:
+       if (unlikely(reacquire_kernel_lock(current) < 0))
+               goto need_resched_nonpreemptible;
+       preempt_enable_no_resched();
+-      if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
++      if (need_resched())
+               goto need_resched;
+ }
+ EXPORT_SYMBOL(schedule);
+@@ -2491,7 +2491,7 @@ asmlinkage void __sched preempt_schedule
+                * between schedule and now.
+                */
+               barrier();
+-      } while (unlikely(test_thread_flag(TIF_NEED_RESCHED)));
++      } while (need_resched());
+ }
+ EXPORT_SYMBOL(preempt_schedule);
+@@ -2520,7 +2520,7 @@ asmlinkage void __sched preempt_schedule
+                * between schedule and now.
+                */
+               barrier();
+-      } while (unlikely(test_thread_flag(TIF_NEED_RESCHED)));
++      } while (need_resched());
+ }
+ #endif /* CONFIG_PREEMPT */
+@@ -3489,6 +3489,11 @@ asmlinkage long sys_sched_yield(void)
+       return 0;
+ }
++static inline int should_resched(void)
++{
++      return need_resched() && !(preempt_count() & PREEMPT_ACTIVE);
++}
++
+ static void __cond_resched(void)
+ {
+       /* NOT a real fix but will make voluntary preempt work. 馬鹿な事 */
+@@ -3511,8 +3516,7 @@ static void __cond_resched(void)
+ int __sched _cond_resched(void)
+ {
+-      if (need_resched() && !(preempt_count() & PREEMPT_ACTIVE) &&
+-                                      system_state == SYSTEM_RUNNING) {
++      if (should_resched()) {
+               __cond_resched();
+               return 1;
+       }
+@@ -3530,12 +3534,12 @@ EXPORT_SYMBOL(_cond_resched);
+  */
+ int cond_resched_lock(spinlock_t *lock)
+ {
+-      int resched = need_resched() && system_state == SYSTEM_RUNNING;
++      int resched = should_resched();
+       int ret = 0;
+       if (spin_needbreak(lock) || resched) {
+               spin_unlock(lock);
+-              if (resched && need_resched())
++              if (resched)
+                       __cond_resched();
+               else
+                       cpu_relax();
+@@ -3550,7 +3554,7 @@ int __sched cond_resched_softirq(void)
+ {
+       BUG_ON(!in_softirq());
+-      if (need_resched() && system_state == SYSTEM_RUNNING) {
++      if (should_resched()) {
+               local_bh_enable();
+               __cond_resched();
+               local_bh_disable();
+@@ -3919,7 +3923,7 @@ void wake_up_idle_cpu(int cpu)
+        * lockless. The worst case is that the other CPU runs the
+        * idle task through an additional NOOP schedule()
+        */
+-      set_tsk_thread_flag(idle, TIF_NEED_RESCHED);
++      set_tsk_need_resched(idle);
+       /* NEED_RESCHED must be visible before we test polling */
+       smp_mb();
diff --git a/kernel-bfs-2.6.28/debian/patches/bfs/tick_sched-set_inidle_unconditionally.patch b/kernel-bfs-2.6.28/debian/patches/bfs/tick_sched-set_inidle_unconditionally.patch
new file mode 100644 (file)
index 0000000..a20d728
--- /dev/null
@@ -0,0 +1,30 @@
+--- linux-2.6.28/kernel/time/tick-sched.c      2011-06-20 00:00:22.673390790 +0200
++++ linux-2.6.28.new/kernel/time/tick-sched.c  2011-06-20 12:12:30.374080397 +0200
+@@ -231,6 +231,13 @@ void tick_nohz_stop_sched_tick(int inidl
+       if (!inidle && !ts->inidle)
+               goto end;
++      /*
++       * Set ts->inidle unconditionally. Even if the system did not
++       * switch to NOHZ mode the cpu frequency governers rely on the
++       * update of the idle time accounting in tick_nohz_start_idle().
++       */
++      ts->inidle = 1;
++
+       now = tick_nohz_start_idle(ts);
+       /*
+@@ -248,12 +255,10 @@ void tick_nohz_stop_sched_tick(int inidl
+       if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
+               goto end;
+-      ts->inidle = 1;
+-
+       if (need_resched())
+               goto end;
+-      if (unlikely(local_softirq_pending())) {
++      if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
+               static int ratelimit;
+               if (ratelimit < 10) {
index 1ae8215..5b3ae99 100644 (file)
@@ -50,7 +50,11 @@ class10sd_dto14_fix.diff
 
 #################################
 # BFS Patches
+bfs/tick_sched-set_inidle_unconditionally.patch
+bfs/bfs-setup_prereqs.patch
 bfs/bfs.patch
+bfs/bfs-implement_prereqs.patch
+bfs/bfs-tidy_up_resched.patch
 bfs/bfs-316-to-318.patch
 bfs/bfs-318-to-330.patch
 bfs/sched_reset_on_fork.diff