Add a new syscall latnice for "latency nice" with 4 levels, 1 for default, 0 for...
authorPeter Hunt <peter_j_hunt@hotmail.com>
Mon, 30 May 2011 13:44:19 +0000 (13:44 +0000)
committerPeter Hunt <peter_j_hunt@hotmail.com>
Mon, 30 May 2011 13:44:19 +0000 (13:44 +0000)
kernel-bfs-2.6.28/debian/patches/sched-latnice.patch [new file with mode: 0644]
kernel-bfs-2.6.28/debian/patches/series

diff --git a/kernel-bfs-2.6.28/debian/patches/sched-latnice.patch b/kernel-bfs-2.6.28/debian/patches/sched-latnice.patch
new file mode 100644 (file)
index 0000000..404d448
--- /dev/null
@@ -0,0 +1,282 @@
+diff -uprN linux-2.6.28/arch/arm/include/asm/unistd.h linux-2.6.28.new/arch/arm/include/asm/unistd.h
+--- linux-2.6.28/arch/arm/include/asm/unistd.h 2011-05-28 17:55:10.888715216 +0200
++++ linux-2.6.28.new/arch/arm/include/asm/unistd.h     2011-05-28 19:23:05.739661865 +0200
+@@ -387,6 +387,9 @@
+ #define __NR_dup3                     (__NR_SYSCALL_BASE+358)
+ #define __NR_pipe2                    (__NR_SYSCALL_BASE+359)
+ #define __NR_inotify_init1            (__NR_SYSCALL_BASE+360)
++#define __NR_latnice                  (__NR_SYSCALL_BASE+361)
++#define __NR_setlatnice               (__NR_SYSCALL_BASE+362)
++#define __NR_getlatnice               (__NR_SYSCALL_BASE+363)
+ /*
+  * The following SWIs are ARM private.
+diff -uprN linux-2.6.28/arch/arm/kernel/calls.S linux-2.6.28.new/arch/arm/kernel/calls.S
+--- linux-2.6.28/arch/arm/kernel/calls.S       2011-05-28 17:54:42.354377786 +0200
++++ linux-2.6.28.new/arch/arm/kernel/calls.S   2011-05-28 18:00:41.705628818 +0200
+@@ -370,6 +370,9 @@
+               CALL(sys_dup3)
+               CALL(sys_pipe2)
+ /* 360 */     CALL(sys_inotify_init1)
++              CALL(sys_latnice)
++              CALL(sys_setlatnice)
++              CALL(sys_getlatnice)
+ #ifndef syscalls_counted
+ .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
+ #define syscalls_counted
+diff -uprN linux-2.6.28/include/linux/init_task.h linux-2.6.28.new/include/linux/init_task.h
+--- linux-2.6.28/include/linux/init_task.h     2011-05-28 18:08:36.442148441 +0200
++++ linux-2.6.28.new/include/linux/init_task.h 2011-05-28 18:07:40.471745225 +0200
+@@ -133,6 +133,7 @@ extern struct group_info init_groups;
+       .prio           = NORMAL_PRIO,                                  \
+       .static_prio    = MAX_PRIO-20,                                  \
+       .normal_prio    = NORMAL_PRIO,                                  \
++      .latnice        = 1,                                            \
+       .deadline       = 0,                                            \
+       .policy         = SCHED_NORMAL,                                 \
+       .cpus_allowed   = CPU_MASK_ALL,                                 \
+diff -uprN linux-2.6.28/include/linux/sched.h linux-2.6.28.new/include/linux/sched.h
+--- linux-2.6.28/include/linux/sched.h 2011-05-28 18:09:18.241185206 +0200
++++ linux-2.6.28.new/include/linux/sched.h     2011-05-28 19:16:33.081566466 +0200
+@@ -1118,7 +1118,7 @@ struct task_struct {
+       int oncpu;
+ #endif
+-      int prio, static_prio, normal_prio;
++      int prio, static_prio, normal_prio, latnice;
+       unsigned int rt_priority;
+ #ifdef CONFIG_SCHED_BFS
+       int time_slice;
+diff -uprN linux-2.6.28/include/linux/syscalls.h linux-2.6.28.new/include/linux/syscalls.h
+--- linux-2.6.28/include/linux/syscalls.h      2011-05-28 18:08:46.013191142 +0200
++++ linux-2.6.28.new/include/linux/syscalls.h  2011-05-28 19:16:05.936280778 +0200
+@@ -207,6 +207,9 @@ asmlinkage long sys_clock_nanosleep(cloc
+                               struct timespec __user *rmtp);
+ asmlinkage long sys_nice(int increment);
++asmlinkage long sys_latnice(int latnice);
++asmlinkage long sys_setlatnice(pid_t pid, int latnice);
++asmlinkage long sys_getlatnice(pid_t pid);
+ asmlinkage long sys_sched_setscheduler(pid_t pid, int policy,
+                                       struct sched_param __user *param);
+ asmlinkage long sys_sched_setparam(pid_t pid,
+diff -uprN linux-2.6.28/kernel/sched_bfs.c linux-2.6.28.new/kernel/sched_bfs.c
+--- linux-2.6.28/kernel/sched_bfs.c    2011-05-28 18:10:39.803808798 +0200
++++ linux-2.6.28.new/kernel/sched_bfs.c        2011-05-28 20:33:37.485040832 +0200
+@@ -153,6 +153,8 @@ int rr_interval __read_mostly = 6;
+  */
+ int sched_iso_cpu __read_mostly = 70;
++int sched_interactive __read_mostly = 2;
++
+ /*
+  * group_thread_accounting - sysctl to decide whether to treat whole thread
+  * groups as a single entity for the purposes of CPU distribution.
+@@ -2669,14 +2671,22 @@ EXPORT_SYMBOL(sub_preempt_count);
+  * proportion works out to the square of the virtual deadline difference, so
+  * this equation will give nice 19 3% CPU compared to nice 0.
+  */
+-static inline u64 prio_deadline_diff(int user_prio)
++static inline u64 prio_deadline_diff(int user_prio, int latnice)
+ {
+-      return (prio_ratios[user_prio] * rr_interval * (MS_TO_NS(1) / 128));
++      u64 pdd = prio_ratios[user_prio] * rr_interval;
++
++      pdd *= MS_TO_NS(1) / 128;
++      if (!latnice)
++              pdd <<= 1;
++      else if (latnice > 1)
++              pdd >>= latnice - 1;
++
++      return pdd;
+ }
+ static inline u64 __task_deadline_diff(struct task_struct *p)
+ {
+-      return prio_deadline_diff(TASK_USER_PRIO(p));
++      return prio_deadline_diff(TASK_USER_PRIO(p), p->latnice);
+ }
+ static inline u64 task_deadline_diff(struct task_struct *p)
+@@ -2690,12 +2700,12 @@ static inline u64 task_deadline_diff(str
+ static inline u64 static_deadline_diff(int static_prio)
+ {
+-      return prio_deadline_diff(USER_PRIO(static_prio));
++      return prio_deadline_diff(USER_PRIO(static_prio), 1);
+ }
+ static inline int longest_deadline_diff(void)
+ {
+-      return prio_deadline_diff(39);
++      return prio_deadline_diff(39, 0);
+ }
+ static inline int ms_longest_deadline_diff(void)
+@@ -2710,6 +2720,8 @@ static inline int ms_longest_deadline_di
+ static void time_slice_expired(struct task_struct *p)
+ {
+       u64 tdd = task_deadline_diff(p);
++      unsigned long ts = timeslice();
++      int latnice = p->latnice;
+       /*
+        * We proportionately increase the deadline according to how many
+@@ -2724,7 +2736,24 @@ static void time_slice_expired(struct ta
+               if (*threads_running > 1)
+                       tdd += *threads_running * __task_deadline_diff(p);
+       }
+-      p->time_slice = timeslice();
++
++      switch (sched_interactive) {
++              case 0:
++                      if (!latnice)
++                              ts <<= 1;
++                      else if (latnice > 1)
++                              ts >>= latnice - 1;
++                      break;
++              case 1:
++                      break;
++              case 2:
++                      if (!latnice)
++                              ts >>= 1;
++                      else if (latnice > 1)
++                              ts <<= latnice - 1;
++                      break;
++      }
++      p->time_slice = ts;
+       p->deadline_niffy = grq.niffies;
+       p->deadline = grq.niffies + tdd;
+ }
+@@ -3524,6 +3553,104 @@ asmlinkage long sys_nice(int increment)
+ #endif
++static bool __check_same_owner(struct task_struct *p)
++{
++      uid_t euid = current_euid();
++      bool match;
++
++      rcu_read_lock();
++      match = (euid == task_euid(p) ||
++               euid == task_uid(p));
++      return match;
++}
++
++static bool check_same_owner(struct task_struct *p)
++{
++      bool match;
++
++      rcu_read_lock();
++      match = __check_same_owner(p);
++      rcu_read_unlock();
++      return match;
++}
++
++SYSCALL_DEFINE1(latnice, int, latnice)
++{
++      struct task_struct *p = current;
++      unsigned long flags;
++      struct rq *rq;
++
++      if (latnice < 0)
++              latnice = 0;
++      if (latnice > 3)
++              latnice = 3;
++
++      if (!check_same_owner(p))
++              return -EPERM;
++
++      rq = time_task_grq_lock(p, &flags);
++      p->latnice = latnice;
++      task_grq_unlock(&flags);
++
++      return 0;
++}
++
++static inline struct task_struct *find_process_by_pid(pid_t pid);
++
++SYSCALL_DEFINE2(setlatnice, pid_t, pid, int, latnice)
++{
++      struct task_struct *p;
++      int retval = -EINVAL;
++      unsigned long flags;
++      struct rq *rq;
++
++      if (pid < 0)
++              goto out_nounlock;
++
++      retval = -ESRCH;
++      rcu_read_lock();
++      p = find_process_by_pid(pid);
++      if (!p)
++              goto out_unlock;
++      if (!__check_same_owner(p)) {
++              retval = -EPERM;
++              goto out_unlock;
++      }
++
++      rq = time_task_grq_lock(p, &flags);
++      p->latnice = latnice;
++      task_grq_unlock(&flags);
++      retval = 0;
++
++out_unlock:
++      rcu_read_unlock();
++out_nounlock:
++      return retval;
++
++}
++
++SYSCALL_DEFINE1(getlatnice, pid_t, pid)
++{
++      struct task_struct *p;
++      int retval = -EINVAL;
++
++      if (pid < 0)
++              goto out_nounlock;
++
++      retval = -ESRCH;
++      rcu_read_lock();
++      p = find_process_by_pid(pid);
++      if (!p)
++              goto out_unlock;
++      retval = p->latnice;
++
++out_unlock:
++      rcu_read_unlock();
++out_nounlock:
++      return retval;
++
++}
++
+ /**
+  * task_prio - return the priority value of a given task.
+  * @p: the task in question.
+diff -uprN linux-2.6.28/kernel/sysctl.c linux-2.6.28.new/kernel/sysctl.c
+--- linux-2.6.28/kernel/sysctl.c       2011-05-28 18:10:57.710020361 +0200
++++ linux-2.6.28.new/kernel/sysctl.c   2011-05-28 19:15:03.708077897 +0200
+@@ -102,6 +102,7 @@ static int __read_mostly one_hundred = 1
+ #ifdef CONFIG_SCHED_BFS
+ extern int rr_interval;
+ extern int sched_iso_cpu;
++extern int sched_interactive;
+ extern int group_thread_accounting;
+ extern int fork_depth_penalty;
+ static int __read_mostly one_thousand = 1000;
+@@ -748,6 +749,15 @@ static struct ctl_table kern_table[] = {
+               .extra2         = &one_hundred,
+       },
+       {
++              .procname       = "interactive",
++              .data           = &sched_interactive,
++              .maxlen         = sizeof(int),
++              .mode           = 0644,
++              .proc_handler   = &proc_dointvec_minmax,
++              .extra1         = &zero,
++              .extra2         = &two,
++      },
++      {
+               .procname       = "group_thread_accounting",
+               .data           = &group_thread_accounting,
+               .maxlen         = sizeof (int),
index f07fad5..90fc740 100644 (file)
@@ -34,6 +34,7 @@ bfs-363-to-400.patch
 bfs-400-to-401.patch
 bfs401-penalise_fork_depth_account_threads.patch
 bfs-401-to-404.patch
+sched-latnice.patch
 sched-add-above-background-load-function.patch
 mm-make_swappiness_really_mean_it.patch
 mm-enable_swaptoken_only_when_swap_full.patch