--- zzzz-none-000/linux-2.4.17/kernel/timer.c 2001-10-08 17:41:41.000000000 +0000 +++ sangam-fb-322/linux-2.4.17/kernel/timer.c 2004-11-24 13:21:27.000000000 +0000 @@ -22,8 +22,19 @@ #include #include #include +#include +#include +#include +#include #include +#ifdef CONFIG_TIMER_STATS +#include +#else +#define TP_START(a) +#define TP_STOP(a) +#define DTIMEPEG(a,b) +#endif /* * Timekeeping variables @@ -33,6 +44,16 @@ /* The current time */ struct timeval xtime __attribute__ ((aligned (16))); +/* + * This spinlock protect us from races in SMP while playing with xtime. -arca + */ +rwlock_t xtime_lock = RW_LOCK_UNLOCKED; +/* + * This atomic prevents re-entry of the run_timer_list and has the side + * effect of shifting conflict runs to the "owning" cpu. + */ + +static atomic_t timer_tasklet_lock = ATOMIC_INIT(-1); /* Don't completely fail for HZ > 500. */ int tickadj = 500/HZ ? : 1; /* microsecs */ @@ -65,7 +86,7 @@ extern int do_setitimer(int, struct itimerval *, struct itimerval *); -unsigned long volatile jiffies; +volatile u64 jiffies_64=0; unsigned int * prof_buffer; unsigned long prof_len; @@ -74,13 +95,38 @@ /* * Event timer code */ +#ifdef CONFIG_HIGH_RES_TIMERS +/* + * ifdef eliminator macro... + */ +#define IF_HIGH_RES(a) a +#ifndef CONFIG_NEW_TIMERLIST +#define CONFIG_NEW_TIMERLIST +#endif +#else +#define IF_HIGH_RES(a) +#endif +#if ! defined(CONFIG_CASCADING_TIMERS) && ! defined(CONFIG_NEW_TIMERLIST) +#define CONFIG_CASCADING_TIMERS +#endif /* CONFIG_HIGH_RES_TIMERS */ #define TVN_BITS 6 #define TVR_BITS 8 #define TVN_SIZE (1 << TVN_BITS) #define TVR_SIZE (1 << TVR_BITS) #define TVN_MASK (TVN_SIZE - 1) #define TVR_MASK (TVR_SIZE - 1) - +#ifdef CONFIG_NEW_TIMERLIST +#define IF_NEW_TIMER_LIST(a) a +#ifndef CONFIG_NEW_TIMER_LISTSIZE +#define CONFIG_NEW_TIMER_LISTSIZE 512 +#endif /* CONFIG_NEW_TIMERLIST */ +#define NEW_TVEC_SIZE CONFIG_NEW_TIMER_LISTSIZE +#define NEW_TVEC_MASK (NEW_TVEC_SIZE - 1) +static struct list_head new_tvec[NEW_TVEC_SIZE]; +#else /* CONFIG_NEW_TIMERLIST */ +#define IF_NEW_TIMER_LIST(a) +#endif /* CONFIG_NEW_TIMERLIST */ +#ifdef CONFIG_CASCADING_TIMERS struct timer_vec { int index; struct list_head vec[TVN_SIZE]; @@ -102,11 +148,13 @@ }; #define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0])) +#endif /* CONFIG_CASCADING_TIMERS */ void init_timervecs (void) { int i; +#ifdef CONFIG_CASCADING_TIMERS for (i = 0; i < TVN_SIZE; i++) { INIT_LIST_HEAD(tv5.vec + i); INIT_LIST_HEAD(tv4.vec + i); @@ -115,15 +163,133 @@ } for (i = 0; i < TVR_SIZE; i++) INIT_LIST_HEAD(tv1.vec + i); +#endif /* CONFIG_CASCADING_TIMERS */ +#ifdef CONFIG_NEW_TIMERLIST + for (i = 0; i < NEW_TVEC_SIZE; i++) + INIT_LIST_HEAD( new_tvec + i); +#endif /* CONFIG_NEW_TIMERLIST */ } static unsigned long timer_jiffies; +/* Initialize both explicitly - let's try to have them in the same cache line */ +spinlock_t timerlist_lock = SPIN_LOCK_UNLOCKED; + +#if defined( CONFIG_SMP) || defined(CONFIG_HIGH_RES_TIMERS) +volatile struct timer_list * volatile running_timer; +#define timer_enter(t) do { running_timer = t; mb(); } while (0) +#define timer_exit() do { running_timer = NULL; } while (0) +#define timer_is_inactive() (running_timer == NULL) +#else +#define timer_enter(t) do { } while (0) +#define timer_exit() do { } while (0) +#define timer_is_inactive() 1 +#endif + +#ifdef CONFIG_SMP +#define timer_is_running(t) (running_timer == t) +#define timer_synchronize(t) while (timer_is_running(t)) barrier() +#endif + + + static inline void internal_add_timer(struct timer_list *timer) { /* * must be cli-ed when calling this */ +#ifdef CONFIG_NEW_TIMERLIST + unsigned long expires = timer->expires; + IF_HIGH_RES(int sub_expires = timer->sub_expires;) + int indx; + struct list_head *pos,*list_start; + + if ( time_before(expires, timer_jiffies) ){ + /* + * already expired, schedule for next tick + * would like to do better here + * Actually this now works just fine with the + * back up of timer_jiffies in "run_timer_list". + * Note that this puts the timer on a list other + * than the one it idexes to. We don't want to + * change the expires value in the timer as it is + * used by the repeat code in setitimer and the + * POSIX timers code. + */ + expires = timer_jiffies; + IF_HIGH_RES(sub_expires = 0); + } + + indx = expires & NEW_TVEC_MASK; + if ((expires - timer_jiffies) <= NEW_TVEC_SIZE) { +#ifdef CONFIG_HIGH_RES_TIMERS + unsigned long jiffies_f; + /* + * The high diff bits are the same, goes to the head of + * the list, sort on sub_expire. + */ + for (pos = (list_start = &new_tvec[indx])->next; + pos != list_start; + pos = pos->next){ + struct timer_list *tmr = + list_entry(pos, + struct timer_list, + list); + if ((tmr->sub_expires >= sub_expires) || + (tmr->expires != expires)){ + break; + } + } + list_add_tail(&timer->list, pos); + /* + * Notes to me. Use jiffies here instead of + * timer_jiffies to prevent adding unneeded interrupts. + * Timer_is_inactive() is false if we are currently + * activly dispatching timers. Since we are under + * the same spin lock, it being false means that + * it has dropped the spinlock to call the timer + * function, which could well be who called us. + * In any case, we don't need a new interrupt as + * the timer dispach code (run_timer_list) will + * pick this up when the function it is calling + * returns. + */ + if ( expires == (jiffies_f = jiffies) && + list_start->next == &timer->list && + timer_is_inactive()) { + schedule_next_int(jiffies_f, sub_expires,1); + } +#else + pos = (&new_tvec[indx])->next; + list_add_tail(&timer->list, pos); +#endif /* CONFIG_HIGH_RES_TIMERS */ + }else{ + /* + * The high diff bits differ, search from the tail + * The for will pick up an empty list. + */ + for (pos = (list_start = &new_tvec[indx])->prev; + pos != list_start; + pos = pos->prev){ + struct timer_list *tmr = + list_entry(pos, + struct timer_list, + list); + if (time_after(tmr->expires, expires)){ + continue; + } + IF_HIGH_RES( + if ((tmr->expires != expires) || + (tmr->sub_expires < sub_expires)) { + break; + } + ) + } + list_add(&timer->list, pos); + } + +#endif /* CONFIG_NEW_TIMERLIST */ +#ifdef CONFIG_CASCADING_TIMERS unsigned long expires = timer->expires; unsigned long idx = expires - timer_jiffies; struct list_head * vec; @@ -157,22 +323,9 @@ * Timers are FIFO! */ list_add(&timer->list, vec->prev); +#endif /* CONFIG_CASCADING_TIMERS */ } -/* Initialize both explicitly - let's try to have them in the same cache line */ -spinlock_t timerlist_lock = SPIN_LOCK_UNLOCKED; - -#ifdef CONFIG_SMP -volatile struct timer_list * volatile running_timer; -#define timer_enter(t) do { running_timer = t; mb(); } while (0) -#define timer_exit() do { running_timer = NULL; } while (0) -#define timer_is_running(t) (running_timer == t) -#define timer_synchronize(t) while (timer_is_running(t)) barrier() -#else -#define timer_enter(t) do { } while (0) -#define timer_exit() do { } while (0) -#endif - void add_timer(struct timer_list *timer) { unsigned long flags; @@ -191,8 +344,9 @@ static inline int detach_timer (struct timer_list *timer) { - if (!timer_pending(timer)) + if (!timer_pending(timer)){ return 0; + } list_del(&timer->list); return 1; } @@ -209,6 +363,23 @@ spin_unlock_irqrestore(&timerlist_lock, flags); return ret; } +#ifdef CONFIG_HIGH_RES_TIMERS +int mod_timer_hr(struct timer_list *timer, + unsigned long expires, + long sub_expires) +{ + int ret; + unsigned long flags; + + spin_lock_irqsave(&timerlist_lock, flags); + timer->expires = expires; + timer->sub_expires = sub_expires; + ret = detach_timer(timer); + internal_add_timer(timer); + spin_unlock_irqrestore(&timerlist_lock, flags); + return ret; +} +#endif /* CONFIG_HIGH_RES_TIMERS */ int del_timer(struct timer_list * timer) { @@ -260,6 +431,7 @@ } #endif +#ifdef CONFIG_CASCADING_TIMERS static inline void cascade_timers(struct timer_vec *tv) { @@ -284,20 +456,49 @@ INIT_LIST_HEAD(head); tv->index = (tv->index + 1) & TVN_MASK; } +#endif /* CONFIG_CASCADING_TIMERS */ static inline void run_timer_list(void) { + IF_HIGH_RES( unsigned long jiffies_f; + long sub_jiff = -1; + long sub_jiffie_f); spin_lock_irq(&timerlist_lock); +#ifdef CONFIG_HIGH_RES_TIMERS + read_lock(&xtime_lock); + jiffies_f = jiffies; + sub_jiffie_f = sub_jiffie() + quick_get_cpuctr(); + read_unlock(&xtime_lock); + while ( unlikely(sub_jiffie_f >= cycles_per_jiffies)){ + sub_jiffie_f -= cycles_per_jiffies; + jiffies_f++; + } + while ((long)(jiffies_f - timer_jiffies) >= 0) { +#else while ((long)(jiffies - timer_jiffies) >= 0) { +#endif /* CONFIG_HIGH_RES_TIMERS */ struct list_head *head, *curr; +#ifdef CONFIG_CASCADING_TIMERS if (!tv1.index) { int n = 1; do { cascade_timers(tvecs[n]); } while (tvecs[n]->index == 1 && ++n < NOOF_TVECS); } -repeat: + head = tv1.vec + tv1.index; +#endif /* CONFIG_CASCADING_TIMERS */ + IF_NEW_TIMER_LIST( + head = new_tvec + + (timer_jiffies & NEW_TVEC_MASK); + ) + /* + * Note that we never move "head" but continue to + * pick the first entry from it. This allows new + * entries to be inserted while we unlock for the + * function call below. + */ +repeat: curr = head->next; if (curr != head) { struct timer_list *timer; @@ -305,7 +506,23 @@ unsigned long data; timer = list_entry(curr, struct timer_list, list); - fn = timer->function; +#ifdef CONFIG_HIGH_RES_TIMERS + /* + * This would be simpler if we never got behind + * i.e. if timer_jiffies == jiffies, we could + * drop one of the tests. Since we may get + * behind, (in fact we don't up date until + * we are behind to allow sub_jiffie entries) + * we need a way to negate the sub_jiffie + * test in that case... + */ + if (time_before(timer->expires, jiffies_f)|| + ((timer->expires == jiffies_f) && + timer->sub_expires <= sub_jiffie_f)) +#else + if (time_before_eq(timer->expires, jiffies)) +#endif /* CONFIG_HIGH_RES_TIMERS */ + {fn = timer->function; data= timer->data; detach_timer(timer); @@ -317,9 +534,42 @@ timer_exit(); goto repeat; } +#ifdef CONFIG_HIGH_RES_TIMERS + else{ + /* + * The new timer list is not always emptied + * here as it contains: + * a.) entries (list size)^N*jiffies out and + * b.) entries that match in jiffies, but have + * sub_expire times further out than now. + */ + if (timer->expires == jiffies_f ){ + sub_jiff = timer->sub_expires; + } + } +#endif /* CONFIG_HIGH_RES_TIMERS */ + } ++timer_jiffies; +#ifdef CONFIG_CASCADING_TIMERS tv1.index = (tv1.index + 1) & TVR_MASK; +#endif } + /* + * It is faster to back out the last bump, than to prevent it. + * This allows zero time inserts as well as sub_jiffie values in + * the current jiffie. Will not work for the cascade as tv1.index + * also needs adjusting. + */ + IF_NEW_TIMER_LIST(--timer_jiffies); + + IF_HIGH_RES(if (schedule_next_int( jiffies_f, sub_jiff, 0)){ + /* + * If schedule_next_int says the time has passed + * bump the tasklet lock so we go round again + */ + atomic_inc(&timer_tasklet_lock); + }); + spin_unlock_irq(&timerlist_lock); } @@ -583,7 +833,15 @@ update_one_process(p, user_tick, system, cpu); if (p->pid) { +#ifdef CONFIG_RTSCHED + /* SCHED_FIFO and the idle(s) have counters set to -100, + * so we won't count them, seems like a good idea for + * both schedulers, but, being pure... + */ + if (p->counter >= 0 && --p->counter <= 0) { +#else if (--p->counter <= 0) { +#endif p->counter = 0; p->need_resched = 1; } @@ -640,12 +898,8 @@ /* jiffies at the most recent update of wall time */ unsigned long wall_jiffies; -/* - * This spinlock protect us from races in SMP while playing with xtime. -arca - */ -rwlock_t xtime_lock = RW_LOCK_UNLOCKED; -static inline void update_times(void) +static inline unsigned long update_times(void) { unsigned long ticks; @@ -662,24 +916,70 @@ update_wall_time(ticks); } write_unlock_irq(&xtime_lock); - calc_load(ticks); -} + return ticks; /* This is dum. change calc_load to a timer */ +} +#ifdef CONFIG_HIGH_RES_TIMERS +void update_real_wall_time(void) +{ + unsigned long ticks; + /* + * To get the time of day really right, we need to make sure + * every one is on the same jiffie. (Because of adj_time, etc.) + * So we provide this for the high res code. Must be called + * under the write(xtime_lock). (External locking allows the + * caller to include sub jiffies in the lock region.) + */ + ticks = jiffies - wall_jiffies; + if (ticks) { + wall_jiffies += ticks; + update_wall_time(ticks); + } +} +#endif /* CONFIG_HIGH_RES_TIMERS */ void timer_bh(void) { - update_times(); + unsigned long ticks; + TRACE_EVENT(TRACE_EV_KERNEL_TIMER, NULL); + ticks = update_times(); + calc_load(ticks); run_timer_list(); } +/* + * timer_tasklet_lock starts at -1. 0, then means it is cool to continue. + * If another cpu bumps it while the first is still in timer_bh, it will + * be detected on exit, and we will run it again. But multiple entries + * are not needed, just once for all the "hits" while we are in timer_bh. + */ +void timer_softirq(struct softirq_action* a) +{ + + if ( ! atomic_inc_and_test(&timer_tasklet_lock)){ + //kgdb_ts(atomic_read(&timer_tasklet_lock),0); + return; + } + do { + //kgdb_ts(atomic_read(&timer_tasklet_lock),1); + atomic_set(&timer_tasklet_lock,0); + timer_bh(); + // mark_bh(TIMER_BH); + }while ( ! atomic_add_negative(-1,&timer_tasklet_lock)); + + //kgdb_ts(atomic_read(&timer_tasklet_lock),2); + } + void do_timer(struct pt_regs *regs) { - (*(unsigned long *)&jiffies)++; + update_jiffies(); #ifndef CONFIG_SMP /* SMP process accounting uses the local APIC timer */ + IF_HIGH_RES( if (new_jiffie())) + update_process_times(user_mode(regs)); - update_process_times(user_mode(regs)); #endif - mark_bh(TIMER_BH); + /*mark_bh(TIMER_BH);*/ + raise_softirq( RUN_TIMER_LIST ); if (TQ_ACTIVE(tq_timer)) mark_bh(TQUEUE_BH); } @@ -800,11 +1100,25 @@ return current->pid; } -asmlinkage long sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp) +#if 0 +// This #if 0 is to keep the pretty printer/ formatter happy so the indents will +// correct below. +// The NANOSLEEP_ENTRY macro is defined in asm/signal.h and +// is structured to allow code as well as entry definitions, so that when +// we get control back here the entry parameters will be available as expected. +// Some systems may find these paramerts in other ways than as entry parms, +// for example, struct pt_regs *regs is defined in i386 as the address of the +// first parameter, where as other archs pass it as one of the paramerters. +asmlinkage long sys_nanosleep(void) { - struct timespec t; - unsigned long expire; +#endif + NANOSLEEP_ENTRY( struct timespec t; + unsigned long expire;) + + // The following code expects rqtp, rmtp to be available as a result of + // the above macro. Also any regs needed for the _do_signal() macro + // shoule be set up here. if(copy_from_user(&t, rqtp, sizeof(struct timespec))) return -EFAULT; @@ -826,9 +1140,9 @@ } expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec); - + do { current->state = TASK_INTERRUPTIBLE; - expire = schedule_timeout(expire); + } while((expire = schedule_timeout(expire)) && !_do_signal()); if (expire) { if (rmtp) {