1 I forgot about an awful lot of longs and ints that will overflow on 32 bit now
2 with u64 deadlines. Fix them.
4 Add some macro tidiness.
6 Make sched_clock sanity checking robust and standardised, using jiffy
7 difference as upper limit, and use nominal 1us when difference cannot be
15 include/linux/sched.h | 2 -
16 kernel/sched_bfs.c | 76 +++++++++++++++++++++++++++-----------------------
17 2 files changed, 43 insertions(+), 35 deletions(-)
19 Index: linux-2.6.28/kernel/sched_bfs.c
20 ===================================================================
21 --- linux-2.6.28.orig/kernel/sched_bfs.c 2010-10-03 21:29:08.421363441 +1100
22 +++ linux-2.6.28/kernel/sched_bfs.c 2010-10-04 11:39:08.027283891 +1100
24 * approximate multiples of ten for less overhead.
26 #define JIFFIES_TO_NS(TIME) ((TIME) * (1000000000 / HZ))
27 +#define JIFFY_NS (1000000000 / HZ)
28 #define HALF_JIFFY_NS (1000000000 / HZ / 2)
29 #define HALF_JIFFY_US (1000000 / HZ / 2)
30 #define MS_TO_NS(TIME) ((TIME) << 20)
31 #define MS_TO_US(TIME) ((TIME) << 10)
32 +#define US_TO_NS(TIME) ((TIME) >> 10)
33 #define NS_TO_MS(TIME) ((TIME) >> 20)
34 #define NS_TO_US(TIME) ((TIME) >> 10)
36 @@ -182,8 +184,8 @@ struct global_rq {
37 cpumask_t cpu_idle_map;
40 - /* Nanosecond jiffies */
42 + u64 niffies; /* Nanosecond jiffies */
43 + unsigned long last_jiffy; /* Last jiffy we updated niffies */
47 @@ -209,7 +211,7 @@ struct rq {
48 struct mm_struct *prev_mm;
50 /* Stored data about rq->curr to work outside grq lock */
51 - unsigned long rq_deadline;
53 unsigned int rq_policy;
56 @@ -315,6 +317,23 @@ static struct root_domain def_root_domai
58 static inline void update_rq_clock(struct rq *rq);
61 + * Sanity check should sched_clock return bogus values. We make sure it does
62 + * not appear to go backwards, and use jiffies to determine the maximum it
63 + * could possibly have increased. At least 1us will have always passed so we
64 + * use that when we don't trust the difference.
66 +static inline void niffy_diff(s64 *niff_diff, int jiff_diff)
68 + unsigned long max_diff;
70 + /* Round up to the nearest tick for maximum */
71 + max_diff = JIFFIES_TO_NS(jiff_diff + 1);
73 + if (unlikely(*niff_diff < 1 || *niff_diff > max_diff))
74 + *niff_diff = US_TO_NS(1);
78 #define cpu_rq(cpu) (&per_cpu(runqueues, (cpu)))
79 #define this_rq() (&__get_cpu_var(runqueues))
80 @@ -335,18 +354,16 @@ static inline int cpu_of(struct rq *rq)
81 static inline void update_clocks(struct rq *rq)
87 ndiff = rq->clock - rq->old_clock;
88 /* old_clock is only updated when we are updating niffies */
89 rq->old_clock = rq->clock;
90 ndiff -= grq.niffies - rq->last_niffy;
92 - * Sanity check should sched_clock return bogus values or be limited to
93 - * just jiffy resolution. Some time will always have passed.
95 - if (unlikely(ndiff < 1 || ndiff > MS_TO_NS(rr_interval)))
97 + jdiff = jiffies - grq.last_jiffy;
98 + niffy_diff(&ndiff, jdiff);
99 + grq.last_jiffy += jdiff;
100 grq.niffies += ndiff;
101 rq->last_niffy = grq.niffies;
103 @@ -364,12 +381,14 @@ static inline int cpu_of(struct rq *rq)
104 static inline void update_clocks(struct rq *rq)
110 ndiff = rq->clock - rq->old_clock;
111 rq->old_clock = rq->clock;
112 - if (unlikely(ndiff < 1 || ndiff > MS_TO_US(rr_interval)))
114 + jdiff = jiffies - grq.last_jiffy;
115 + niffy_diff(&ndiff, jdiff);
116 + grq.last_jiffy += jdiff;
117 grq.niffies += ndiff;
120 @@ -1203,7 +1222,7 @@ void kick_process(struct task_struct *p)
121 * prio PRIO_LIMIT so it is always preempted.
124 -can_preempt(struct task_struct *p, int prio, unsigned long deadline,
125 +can_preempt(struct task_struct *p, int prio, u64 deadline,
128 /* Better static priority RT task or better policy preemption */
129 @@ -1254,7 +1273,8 @@ static inline int needs_other_cpu(struct
130 static void try_preempt(struct task_struct *p, struct rq *this_rq)
132 struct rq *highest_prio_rq = this_rq;
133 - unsigned long latest_deadline, cpu;
134 + u64 latest_deadline;
139 @@ -1276,7 +1296,7 @@ static void try_preempt(struct task_stru
142 for_each_cpu_mask_nr(cpu, tmp) {
143 - unsigned long offset_deadline;
144 + u64 offset_deadline;
148 @@ -1895,16 +1915,12 @@ static void pc_user_time(struct rq *rq,
151 /* Convert nanoseconds to percentage of one tick. */
152 -#define NS_TO_PC(NS) (NS * 100 / JIFFIES_TO_NS(1))
153 +#define NS_TO_PC(NS) (NS * 100 / JIFFY_NS)
156 * This is called on clock ticks and on context switches.
157 * Bank in p->sched_time the ns elapsed since the last tick or switch.
158 * CPU scheduler quota accounting is also performed here in microseconds.
159 - * The value returned from sched_clock() occasionally gives bogus values so
160 - * some sanity checking is required. Time is supposed to be banked all the
161 - * time so default to half a tick to make up for when sched_clock reverts
162 - * to just returning jiffies, and for hardware that can't do tsc.
165 update_cpu_clock(struct rq *rq, struct task_struct *p, int tick)
166 @@ -1939,18 +1955,9 @@ update_cpu_clock(struct rq *rq, struct t
168 /* time_slice accounting is done in usecs to avoid overflow on 32bit */
169 if (rq->rq_policy != SCHED_FIFO && p != idle) {
170 - long time_diff = rq->clock - rq->rq_last_ran;
173 - * There should be less than or equal to one jiffy worth, and not
174 - * negative/overflow. time_diff is only used for internal scheduler
175 - * time_slice accounting.
177 - if (unlikely(time_diff <= 0))
178 - time_diff = JIFFIES_TO_NS(1) / 2;
179 - else if (unlikely(time_diff > JIFFIES_TO_NS(1)))
180 - time_diff = JIFFIES_TO_NS(1);
181 + s64 time_diff = rq->clock - rq->rq_last_ran;
183 + niffy_diff(&time_diff, 1);
184 rq->rq_time_slice -= NS_TO_US(time_diff);
186 rq->rq_last_ran = rq->timekeep_clock = rq->clock;
187 @@ -2358,17 +2365,17 @@ EXPORT_SYMBOL(sub_preempt_count);
188 * proportion works out to the square of the virtual deadline difference, so
189 * this equation will give nice 19 3% CPU compared to nice 0.
191 -static inline int prio_deadline_diff(int user_prio)
192 +static inline u64 prio_deadline_diff(int user_prio)
194 return (prio_ratios[user_prio] * rr_interval * (MS_TO_NS(1) / 128));
197 -static inline int task_deadline_diff(struct task_struct *p)
198 +static inline u64 task_deadline_diff(struct task_struct *p)
200 return prio_deadline_diff(TASK_USER_PRIO(p));
203 -static inline int static_deadline_diff(int static_prio)
204 +static inline u64 static_deadline_diff(int static_prio)
206 return prio_deadline_diff(USER_PRIO(static_prio));
208 @@ -2424,7 +2431,7 @@ static inline void check_deadline(struct
210 task_struct *earliest_deadline_task(struct rq *rq, struct task_struct *idle)
212 - unsigned long dl, earliest_deadline = 0; /* Initialise to silence compiler */
213 + u64 dl, earliest_deadline = 0; /* Initialise to silence compiler */
214 struct task_struct *p, *edt = idle;
215 unsigned int cpu = cpu_of(rq);
216 struct list_head *queue;
217 @@ -6100,6 +6107,7 @@ void __init sched_init(void)
218 spin_lock_init(&grq.lock);
219 grq.nr_running = grq.nr_uninterruptible = grq.nr_switches = 0;
221 + grq.last_jiffy = jiffies;
222 spin_lock_init(&grq.iso_lock);
223 grq.iso_ticks = grq.iso_refractory = 0;
225 Index: linux-2.6.28/include/linux/sched.h
226 ===================================================================
227 --- linux-2.6.28.orig/include/linux/sched.h 2010-10-04 09:34:58.028244089 +1100
228 +++ linux-2.6.28/include/linux/sched.h 2010-10-04 09:35:08.833093538 +1100
229 @@ -1426,7 +1426,7 @@ static inline void tsk_cpus_current(stru
231 static inline void print_scheduler_version(void)
233 - printk(KERN_INFO"BFS CPU scheduler v0.350 by Con Kolivas.\n");
234 + printk(KERN_INFO"BFS CPU scheduler v0.357 by Con Kolivas.\n");
237 static inline int iso_task(struct task_struct *p)