小男孩‘自慰网亚洲一区二区,亚洲一级在线播放毛片,亚洲中文字幕av每天更新,黄aⅴ永久免费无码,91成人午夜在线精品,色网站免费在线观看,亚洲欧洲wwwww在线观看

分享

LINUX高精度定時(shí)器實(shí)現(xiàn)分析 | 碼農(nóng)故事

 老匹夫 2016-01-04

hrtimer,是High-resolution kernel timers的縮寫,從字面意思就知道,這是一個(gè)高精度內(nèi)核timer。

HRTIMER用法示例

先調(diào)用hrtimer_init初始化,然后設(shè)置function回調(diào),例如,此處設(shè)置回調(diào)為coalesced_timer_fn。

1
2
3
4
5
6
7
8
9
10
static enum hrtimer_restart coalesced_timer_fn(struct hrtimer *timer)
{
struct kvm_rtc *rtc = container_of(timer, struct kvm_rtc, coalesced_timer);
CHECK_PAUSE_RET(rtc->pause, HRTIMER_NORESTART);
queue_work(rtc->wq, &rtc->ws_coalesced);
return HRTIMER_NORESTART;//這里返回值很重要,如果想此timer為周期timer,返回HRTIMER_RESTART(超時(shí)必須先設(shè)置)
}
hrtimer_init(&rtc->coalesced_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
rtc->coalesced_timer.function = coalesced_timer_fn;

設(shè)置超時(shí)時(shí)間,設(shè)置一個(gè)超時(shí)的ABS時(shí)間,超時(shí)時(shí)間設(shè)置了,并不代表timer已經(jīng)運(yùn)行,還必須將其加入active隊(duì)列,start系列函數(shù)就做此事。

1
2
hrtimer_forward(&rtc->coalesced_timer, ns_to_ktime(now_time), ns_to_ktime(next_time-now_time));
hrtimer_add_expires_ns(&rtc->coalesced_timer, 1000000000);//1秒


加入active隊(duì)列

1
2
hrtimer_start_expires(&rtc->coalesced_timer, HRTIMER_MODE_ABS);
hrtimer_restart(&rtc->coalesced_timer);

從active隊(duì)列刪除(如正在回調(diào)中,會(huì)等待回調(diào)運(yùn)行完成)。

1
hrtimer_cancel(&rtc->coalesced_timer);

HRTIMER數(shù)據(jù)結(jié)構(gòu)


Figure 1數(shù)據(jù)結(jié)構(gòu)示意圖

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
enum  hrtimer_base_type {
HRTIMER_BASE_MONOTONIC,
HRTIMER_BASE_REALTIME,
HRTIMER_BASE_BOOTTIME,
HRTIMER_MAX_CLOCK_BASES, //max 邊界,下面的clock_base就是用來做數(shù)組大小
};
/*
* struct hrtimer_cpu_base - the per cpu clock bases
*/
struct hrtimer_cpu_base {
raw_spinlock_t lock; //lock protecting the base and associated clock bases and timers
unsigned int active_bases; //Bitfield to mark bases with active timers
unsigned int clock_was_set;  //Indicates that clock was set from irq context.
#ifdef CONFIG_HIGH_RES_TIMERS
ktime_t expires_next; //absolute time of the next event which was scheduled, via clock_set_next_event()
int hres_active; //State of high resolution mode
int hang_detected; //The last hrtimer interrupt detected a hang
unsigned long nr_events; //Total number of hrtimer interrupt events
unsigned long nr_retries; //Total number of hrtimer interrupt retries
unsigned long nr_hangs;   //Total number of hrtimer interrupt hangs
ktime_t max_hang_time; //Maximum time spent in hrtimer_interrupt
#endif
struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; //array of clock bases for this cpu
};
DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
{
.lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock),
.clock_base =
{
{
.index = HRTIMER_BASE_MONOTONIC,
.clockid = CLOCK_MONOTONIC,
.get_time = &ktime_get, //monotonic time
.resolution = KTIME_LOW_RES, //剛初始化的時(shí)候,都是低精度的
},
{
.index = HRTIMER_BASE_REALTIME,
.clockid = CLOCK_REALTIME,
.get_time = &ktime_get_real, //get the real (wall-) time, TOD
.resolution = KTIME_LOW_RES,
},
{
.index = HRTIMER_BASE_BOOTTIME,
.clockid = CLOCK_BOOTTIME,
.get_time = &ktime_get_boottime,//monotonic time since boot
.resolution = KTIME_LOW_RES,
},
}
};
/**
* struct hrtimer_clock_base - the timer base for a specific clock
*/
struct hrtimer_clock_base {
struct hrtimer_cpu_base * cpu_base; //per cpu clock base
int index; //clock type index for per_cpu support when moving a timer to a base on another cpu.
clockid_t clockid; //clock id for per_cpu support
struct timerqueue_head active; //red black tree root node for the active timers, active queue里面存放的,就是hrtimer
ktime_t resolution; //the resolution of the clock, in nanoseconds
ktime_t (*get_time)(void); //function to retrieve the current time of the clock
ktime_t softirq_time; //the time when running the hrtimer queue in the softirq
ktime_t offset; //offset of this clock to the monotonic base
};
struct timerqueue_node {
struct rb_node node;
ktime_t expires; //這個(gè)超時(shí),是真正用于比較時(shí)間的,據(jù)說是為了節(jié)能優(yōu)化
};
struct hrtimer {
struct timerqueue_node node;
ktime_t _softexpires; //這是超時(shí),叫soft expires,對(duì)應(yīng)timerqueue_node->expires叫做hard expires
enum hrtimer_restart (*function)(struct hrtimer *); //這就是回調(diào)函數(shù)了
struct hrtimer_clock_base *base;
unsigned long state; //狀態(tài)字段,指示hrtimer處于什么狀態(tài),見#狀態(tài)轉(zhuǎn)換
#ifdef CONFIG_TIMER_STATS
int start_pid;
void *start_site;
char start_comm[16];
#endif
};

HRTIMER初始化流程

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
asmlinkage void __init start_kernel(void)
{
...
init_IRQ();
init_timers(); //初始化低精度timer
hrtimers_init(); //初始化高進(jìn)度timer
softirq_init();
timekeeping_init();
time_init();
...
}
//hrtimer的CPU事件通知
static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
int scpu = (long)hcpu;
switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
init_hrtimers_cpu(scpu);//對(duì)每個(gè)CPU,初始化其struct hrtimer_cpu_base * 中的hrtimer_clock_base
break;
#ifdef CONFIG_HOTPLUG_CPU //熱插拔支持
case CPU_DYING:
case CPU_DYING_FROZEN:
clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DYING, &scpu);
break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
{
clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &scpu);
migrate_hrtimers(scpu);//CPU DEAD的時(shí)候,將DEAD的CPU上的timer遷移到本CPU
break;
}
#endif
default:
break;
}
return NOTIFY_OK;
}
static struct notifier_block __cpuinitdata hrtimers_nb = {
.notifier_call = hrtimer_cpu_notify,
};
void __init hrtimers_init(void)
{
hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
(void *)(long)smp_processor_id());//這里將BSP的PREPARE初始化顯示調(diào)用一下,因?yàn)锽SP已經(jīng)啟動(dòng)了,其他CPU通過下面注冊(cè)的回調(diào)
register_cpu_notifier(&hrtimers_nb);//CPU事件通知,會(huì)調(diào)用hrtimer_cpu_notify
#ifdef CONFIG_HIGH_RES_TIMERS
open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);//注冊(cè)高精度模式下的定時(shí)器軟中斷
#endif
}
/*
* Functions related to boot-time initialization:
*/
//對(duì)每一個(gè)CPU,初始化和CPU關(guān)聯(lián)的hrtimer_cpu_base結(jié)構(gòu)
static void __cpuinit init_hrtimers_cpu(int cpu)
{
struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
int i;
//其實(shí),也初始化了hrtimer_cpu_base中的hrtimer_clock_base數(shù)組
//每個(gè)clock base,需要將其存放hrtimer的queue初始化好,將回溯指針設(shè)置好
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
cpu_base->clock_base[i].cpu_base = cpu_base;
timerqueue_init_head(&cpu_base->clock_base[i].active);
}
hrtimer_init_hres(cpu_base);//例如初始化高精度為未激活狀態(tài)
}

HRTIMER CPU熱插拔支持

熱插拔支持,其核心功能,就是當(dāng)一個(gè)CPU死掉的時(shí)候,將其上面的hrtimer遷移到本CPU上來,需要預(yù)編譯宏CONFIG_HOTPLUG_CPU。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#ifdef CONFIG_HOTPLUG_CPU
static void migrate_hrtimers(int scpu)
{
struct hrtimer_cpu_base *old_base, *new_base;
int i;
BUG_ON(cpu_online(scpu));
tick_cancel_sched_timer(scpu);
local_irq_disable();//遷移timer的時(shí)候關(guān)中斷
old_base = &per_cpu(hrtimer_bases, scpu);
new_base = &__get_cpu_var(hrtimer_bases);
/*
* The caller is globally serialized and nobody else
* takes two locks at once, deadlock is not possible.
*/
raw_spin_lock(&new_base->lock);
raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
//新的老的hrtimer_bases都鎖起來,然后遷移timer節(jié)點(diǎn)
//一個(gè)hrtimer_bases下又有多個(gè)clock base
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
migrate_hrtimer_list(&old_base->clock_base[i],
&new_base->clock_base[i]);
}
raw_spin_unlock(&old_base->lock);
raw_spin_unlock(&new_base->lock);
/* Check, if we got expired work to do */
__hrtimer_peek_ahead_timers();//這里是為了確保實(shí)時(shí)性,遷移完成后,就檢查一下處于本CPU上的hrtimer_bases中是否有timer超時(shí)
local_irq_enable();//遷移成功后開中斷
}
static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
struct hrtimer_clock_base *new_base)
{
struct hrtimer *timer;
struct timerqueue_node *node;
//做了個(gè)很簡單是事情,就是將老的clock_base里的active鏈表的timer重新連接到新的
while ((node = timerqueue_getnext(&old_base->active))) {
timer = container_of(node, struct hrtimer, node);
BUG_ON(hrtimer_callback_running(timer));
debug_deactivate(timer);
/*
* Mark it as STATE_MIGRATE not INACTIVE otherwise the
* timer could be seen as !active and just vanish away
* under us on another CPU
*/
__remove_hrtimer(timer, old_base, HRTIMER_STATE_MIGRATE, 0);
timer->base = new_base;
/*
* Enqueue the timers on the new cpu. This does not
* reprogram the event device in case the timer
* expires before the earliest on this CPU, but we run
* hrtimer_interrupt after we migrated everything to
* sort out already expired timers and reprogram the
* event device.
*/
enqueue_hrtimer(timer, new_base);
/* Clear the migration state bit */
timer->state &= ~HRTIMER_STATE_MIGRATE;
}
}
#endif /* CONFIG_HOTPLUG_CPU */

時(shí)鐘設(shè)備

如下斜體部分文字摘自陳功的《Linux 時(shí)鐘管理》

tick device
Tick device 用來處理周期性的 tick event。Tick device 其實(shí)是時(shí)鐘事件設(shè)備的一個(gè) wrapper,因此 tick device 也有 one-shot 和周期性這兩種中斷觸發(fā)模式。
每注冊(cè)一個(gè)時(shí)鐘事件設(shè)備,這個(gè)設(shè)備會(huì)自動(dòng)被注冊(cè)為一個(gè) tick device。全局的 tick device 用來更新諸如 jiffies 這樣的全局信息,per-CPU 的 tick device 則用來更新每個(gè) CPU 相關(guān)的特定信息。

broadcast
Broadcast 的出現(xiàn)是為了應(yīng)對(duì)這樣一種情況:假定 CPU 使用 Local APIC Timer 作為 per-CPU 的 tick device,但是某些特定的 CPU(如 Intel 的 Westmere 之前的 CPU)在進(jìn)入 C3+ 的狀態(tài)時(shí) Local APIC Timer 也會(huì)同時(shí)停止工作,進(jìn)入睡眠狀態(tài)。在這種情形下 broadcast 可以替代 Local APIC Timer 繼續(xù)完成統(tǒng)計(jì)進(jìn)程的執(zhí)行時(shí)間等有關(guān)操作。本質(zhì)上 broadcast 是發(fā)送一個(gè) IPI(Inter-processor interrupt)中斷給其他所有的 CPU,當(dāng)目標(biāo) CPU 收到這個(gè) IPI 中斷后就會(huì)調(diào)用原先 Local APIC Timer 正常工作時(shí)的中斷處理函數(shù),從而實(shí)現(xiàn)了同樣的功能。目前主要在 x86 以及 MIPS 下會(huì)用到 broadcast 功能。

Timekeeping & GTOD (Generic Time-of-Day)
Timekeeping(可以理解為時(shí)間測量或者計(jì)時(shí))是內(nèi)核時(shí)間管理的一個(gè)核心組成部分。沒有 Timekeeping,就無法更新系統(tǒng)時(shí)間,維持系統(tǒng)“心跳”。GTOD 是一個(gè)通用的框架,用來實(shí)現(xiàn)諸如設(shè)置系統(tǒng)時(shí)間 gettimeofday 或者修改系統(tǒng)時(shí)間 settimeofday 等工作。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
/*
* The hpet clock event device
*/
static struct clock_event_device hpet_clockevent = {
.name = "hpet",
.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
.set_mode = hpet_legacy_set_mode,
.set_next_event = hpet_legacy_next_event,
.irq = 0,
.rating = 50,
};
asmlinkage void __init start_kernel(void)
{
...
init_IRQ();
init_timers();
hrtimers_init();
softirq_init();
timekeeping_init();
time_init();
...
if (late_time_init)
late_time_init();
...
}
void __init time_init(void)
{
late_time_init = x86_late_time_init;
}
static __init void x86_late_time_init(void)
{
x86_init.timers.timer_init();
tsc_init();
}
struct x86_init_ops x86_init __initdata = {
...
.timers = {
.setup_percpu_clockev = setup_boot_APIC_clock,
.tsc_pre_init = x86_init_noop,
.timer_init = hpet_time_init,
.wallclock_init = x86_init_noop,
},
...
};
/* Default timer init function */
void __init hpet_time_init(void)
{
if (!hpet_enable())//默認(rèn)使用HPET,如果HPET不支持,再使用PIT代替時(shí)鐘源
setup_pit_timer();
setup_default_timer_irq();//為IRQ0設(shè)置處理HANDLE
}
hpet_enable ->
static void hpet_legacy_clockevent_register(void)
{
/* Start HPET legacy interrupts */
hpet_enable_legacy_int();
/*
* Start hpet with the boot cpu mask and make it
* global after the IO_APIC has been initialized.
*/
hpet_clockevent.cpumask = cpumask_of(smp_processor_id());
clockevents_config_and_register(&hpet_clockevent, hpet_freq,
HPET_MIN_PROG_DELTA, 0x7FFFFFFF);//非常關(guān)鍵,注冊(cè)clock event
global_clock_event = &hpet_clockevent;//讓IRQ 0的中斷HANDLE使用hpet的HANDLE
printk(KERN_DEBUG "hpet clockevent registeredn");
}
clockevents_config_and_register ->
void clockevents_register_device(struct clock_event_device *dev)
{
unsigned long flags;
BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
if (!dev->cpumask) {
WARN_ON(num_possible_cpus() > 1);
dev->cpumask = cpumask_of(smp_processor_id());
}
raw_spin_lock_irqsave(&clockevents_lock, flags);
//將clock event 加入clockevent_devices鏈表,可以用在suspend, resume或其他消息通知的時(shí)候回調(diào)
list_add(&dev->list, &clockevent_devices);
clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev);//通知clock event添加事件
clockevents_notify_released();
raw_spin_unlock_irqrestore(&clockevents_lock, flags);
}
/*
* Notify about a clock event change. Called with clockevents_lock
* held.
*/
static void clockevents_do_notify(unsigned long reason, void *dev)
{
raw_notifier_call_chain(&clockevents_chain, reason, dev);
}

CLOCK_EVT_NOTIFY_ADD通知會(huì)在tick_notify里收到,然后回調(diào)
tick_check_new_device -> tick_setup_device(td, newdev, cpu, cpumask_of(cpu));

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
/*
* Setup the tick device
*/
static void tick_setup_device(struct tick_device *td,
struct clock_event_device *newdev, int cpu,
const struct cpumask *cpumask)
{
ktime_t next_event;
void (*handler)(struct clock_event_device *) = NULL;
/*
* First device setup ?
*/
if (!td->evtdev) {
/*
* If no cpu took the do_timer update, assign it to
* this cpu:
*/
if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {
tick_do_timer_cpu = cpu;
tick_next_period = ktime_get();
tick_period = ktime_set(0, NSEC_PER_SEC / HZ);
}
/*
* Startup in periodic mode first.
*/
td->mode = TICKDEV_MODE_PERIODIC;//初始的時(shí)候,都是PERIODIC模式,當(dāng)高精度時(shí)鐘的時(shí)候,才是ONE SHORT
} else {
handler = td->evtdev->event_handler;
next_event = td->evtdev->next_event;
td->evtdev->event_handler = clockevents_handle_noop;
}
td->evtdev = newdev;
/*
* When the device is not per cpu, pin the interrupt to the
* current cpu:
*/
if (!cpumask_equal(newdev->cpumask, cpumask))
irq_set_affinity(newdev->irq, cpumask);
/*
* When global broadcasting is active, check if the current
* device is registered as a placeholder for broadcast mode.
* This allows us to handle this x86 misfeature in a generic
* way.
*/
if (tick_device_uses_broadcast(newdev, cpu))
return;
//初始化
if (td->mode == TICKDEV_MODE_PERIODIC)
tick_setup_periodic(newdev, 0);//這里面設(shè)置handle為tick_handle_periodic 或tick_handle_periodic_broadcast
else
tick_setup_oneshot(newdev, handler, next_event); //在高精度模式下,handler其實(shí)是hrtimer_interrupt
}

低精度模式

所以,周期時(shí)鐘(低精度)時(shí),回調(diào)函數(shù)為tick_handle_periodic或tick_handle_periodic_broadcast

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
/*
* Event handler for periodic ticks
*/
void tick_handle_periodic(struct clock_event_device *dev)
{
int cpu = smp_processor_id();
ktime_t next;
tick_periodic(cpu);
if (dev->mode != CLOCK_EVT_MODE_ONESHOT)
return;
/*
* Setup the next period for devices, which do not have
* periodic mode:
*/
next = ktime_add(dev->next_event, tick_period);
for (;;) {
if (!clockevents_program_event(dev, next, false))
return;
/*
* Have to be careful here. If we're in oneshot mode,
* before we call tick_periodic() in a loop, we need
* to be sure we're using a real hardware clocksource.
* Otherwise we could get trapped in an infinite
* loop, as the tick_periodic() increments jiffies,
* when then will increment time, posibly causing
* the loop to trigger again and again.
*/
if (timekeeping_valid_for_hres())
tick_periodic(cpu);
next = ktime_add(next, tick_period);
}
}
/*
* Periodic tick
*/
static void tick_periodic(int cpu)
{
if (tick_do_timer_cpu == cpu) {
write_seqlock(&jiffies_lock);
/* Keep track of the next tick event */
tick_next_period = ktime_add(tick_next_period, tick_period);
do_timer(1);
write_sequnlock(&jiffies_lock);
}
update_process_times(user_mode(get_irq_regs()));//低精度下,運(yùn)行此函數(shù),更新進(jìn)程時(shí)間,調(diào)用run_local_timers
profile_tick(CPU_PROFILING);
}
void update_process_times(int user_tick)
{
struct task_struct *p = current;
int cpu = smp_processor_id();
/* Note: this timer irq context must be accounted for as well. */
account_process_tick(p, user_tick);
run_local_timers(); //運(yùn)行l(wèi)ocal timers
rcu_check_callbacks(cpu, user_tick);
#ifdef CONFIG_IRQ_WORK
if (in_irq())
irq_work_run();
#endif
scheduler_tick();
run_posix_cpu_timers(p);
}
/*
* Called by the local, per-CPU timer interrupt on SMP.
*/
void run_local_timers(void)
{
hrtimer_run_queues();//hardirq context下運(yùn)行所有到期的timer
raise_softirq(TIMER_SOFTIRQ); //TIMER軟中斷,不是HTIMER, softirq context下運(yùn)行所有到期的timer,start_kernel->init_timers->open_softirq(TIMER_SOFTIRQ, run_timer_softirq); 這里注冊(cè)的SOFTIRQ回調(diào)
}

這里的TIMER SOFTIRQ,是在start_kernel里面調(diào)用init_timers初始化的,會(huì)調(diào)用到run_timer_softirq

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
void __init init_timers(void)
{
int err;
/* ensure there are enough low bits for flags in timer->base pointer */
BUILD_BUG_ON(__alignof__(struct tvec_base) & TIMER_FLAG_MASK);
//顯示調(diào)用一下當(dāng)前CPU的PREPARE
err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE,
(void *)(long)smp_processor_id());
init_timer_stats();
BUG_ON(err != NOTIFY_OK);
register_cpu_notifier(&timers_nb);//這里才是注冊(cè)回調(diào)
open_softirq(TIMER_SOFTIRQ, run_timer_softirq);//注冊(cè)TIMER_SOFTIRQ軟中斷
}
static void run_timer_softirq(struct softirq_action *h)
{
struct tvec_base *base = __this_cpu_read(tvec_bases);
hrtimer_run_pending();//檢查一下,是否需切換到高精度
if (time_after_eq(jiffies, base->timer_jiffies))
__run_timers(base);//低精度模式下,在軟中斷上下文調(diào)用低精度的timer回調(diào)
}

上面的run_local_timers里,調(diào)用hrtimer_run_queues,目的是在低精度模式下,實(shí)現(xiàn)hrtimer功能

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
/*
* Called from hardirq context every jiffy
*/
void hrtimer_run_queues(void)
{
struct timerqueue_node *node;
struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
struct hrtimer_clock_base *base;
int index, gettime = 1;
//如果開啟了高精度模式,這里就不會(huì)進(jìn)來
//換言之,這里實(shí)現(xiàn)了低精度支持hrtimer的功能
//切記,這個(gè)調(diào)用是在時(shí)鐘設(shè)備的硬中斷中調(diào)用的
if (hrtimer_hres_active())
return;
for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
base = &cpu_base->clock_base[index];//將當(dāng)前CPU上的所有類型的CLOCK上的所有timer都檢查一遍
if (!timerqueue_getnext(&base->active))
continue;
if (gettime) {
hrtimer_get_softirq_time(cpu_base);//這里會(huì)將所有的base的softirq_time更新為最新
gettime = 0;
}
raw_spin_lock(&cpu_base->lock);
while ((node = timerqueue_getnext(&base->active))) {
struct hrtimer *timer;
//這里比較的世界是node的時(shí)間,即hard expires
timer = container_of(node, struct hrtimer, node);
if (base->softirq_time.tv64 <=
hrtimer_get_expires_tv64(timer))//timer->node.expires.tv64;這里比較時(shí)間,用的是timer->node.expires,不是timer->_softexpires
break;
__run_hrtimer(timer, &base->softirq_time);//調(diào)用run timer
}
raw_spin_unlock(&cpu_base->lock);
}
}

低精度切換到高精度

上面低精度模式下,運(yùn)行TIMER SOFTIRQ的時(shí)候(run_timer_softirq),會(huì)檢查是否可以切換到高精度模式

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
void hrtimer_run_pending(void)
{
if (hrtimer_hres_active())//已經(jīng)是高精度了
return;
/*
* This _is_ ugly: We have to check in the softirq context,
* whether we can switch to highres and / or nohz mode. The
* clocksource switch happens in the timer interrupt with
* xtime_lock held. Notification from there only sets the
* check bit in the tick_oneshot code, otherwise we might
* deadlock vs. xtime_lock.
*/
if (tick_check_oneshot_change(!hrtimer_is_hres_enabled()))
hrtimer_switch_to_hres();//看下,是否高精度模式開啟,是的話切換
}
int tick_check_oneshot_change(int allow_nohz)
{
struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
//0位用于保存十分clock發(fā)生了變化
if (!test_and_clear_bit(0, &ts->check_clocks))
return 0;
if (ts->nohz_mode != NOHZ_MODE_INACTIVE)//已經(jīng)開啟了NOHZ模式
return 0;
//timekeeping不支持高精度或clock event不支持oneshot,無法切換到高精度
if (!timekeeping_valid_for_hres() || !tick_is_oneshot_available())
return 0;
//當(dāng)運(yùn)行高精度的時(shí)候,return 1,會(huì)設(shè)置hrtimer_interrupt
//否則切換到nohz,設(shè)置tick_nohz_handler
if (!allow_nohz)
return 1;
#if 0
low resolution mode High resolution mode
------------------+-----------------------+-----------------------
periodic tick   | tick_handle_periodic  | hrtimer_interrupt
dynamic tick   | tick_nohz_handler   | hrtimer_interrupt
------------------+-----------------------+-----------------------
#endif
tick_nohz_switch_to_nohz();
return 0;
}
/*
* Switch to high resolution mode
*/
static int hrtimer_switch_to_hres(void)
{
int i, cpu = smp_processor_id();
struct hrtimer_cpu_base *base = &per_cpu(hrtimer_bases, cpu);
unsigned long flags;
if (base->hres_active)//已經(jīng)是高精度模式了
return 1;
local_irq_save(flags);
if (tick_init_highres()) { // => tick_switch_to_oneshot(hrtimer_interrupt)
//初始化錯(cuò)了
local_irq_restore(flags);
printk(KERN_WARNING "Could not switch to high resolution "
"mode on CPU %dn", cpu);
return 0;
}
base->hres_active = 1;//這個(gè)標(biāo)志高精度模式active
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
base->clock_base[i].resolution = KTIME_HIGH_RES;//修改clock base的標(biāo)志
tick_setup_sched_timer();//低精度下要干的事,高精度下用一個(gè)hrtimer來做,為啥?我想是因?yàn)楦呔鹊念l率比低精度高,而這些任務(wù)用不著高頻率處理
/* "Retrigger" the interrupt to get things going */
retrigger_next_event(NULL);
local_irq_restore(flags);
return 1;
}
/**
* tick_switch_to_oneshot - switch to oneshot mode
*/
int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *))
{//高精度支持,需要oneshot模式,方便及時(shí)切換頻率或停止啟動(dòng)
struct tick_device *td = &__get_cpu_var(tick_cpu_device);
struct clock_event_device *dev = td->evtdev;
if (!dev || !(dev->features & CLOCK_EVT_FEAT_ONESHOT) ||
!tick_device_is_functional(dev)) {
//糟糕,設(shè)備不支持ONESHORT
printk(KERN_INFO "Clockevents: "
"could not switch to one-shot mode:");
if (!dev) {
printk(" no tick devicen");
} else {
if (!tick_device_is_functional(dev))
printk(" %s is not functional.n", dev->name);
else
printk(" %s does not support one-shot mode.n",
dev->name);
}
return -EINVAL;
}
td->mode = TICKDEV_MODE_ONESHOT;//修改為ONESHORT模式
dev->event_handler = handler;//現(xiàn)在,HANDLER也修改了,低精度的回調(diào)是在tick_setup_periodic里設(shè)置的,高精度是hrtimer_interrupt
clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
tick_broadcast_switch_to_oneshot();//讓broadcast設(shè)備也切換到oneshot模式
return 0;
}
/**
* tick_setup_sched_timer - setup the tick emulation timer
*/
void tick_setup_sched_timer(void)
{
struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
ktime_t now = ktime_get();
/*
* Emulate tick processing via per-CPU hrtimers:
*/
hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
ts->sched_timer.function = tick_sched_timer;//主要更新下JIFFIES,進(jìn)程運(yùn)行時(shí)間等在低精度下也要做的工作
/* Get the next period (per cpu) */
hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
/* Offset the tick to avert jiffies_lock contention. */
if (sched_skew_tick) {//這個(gè)不錯(cuò),防止jiffies lock競爭,讓超時(shí)每個(gè)CPU的超時(shí)周期和CPU ID做一個(gè)散列
u64 offset = ktime_to_ns(tick_period) >> 1;
do_div(offset, num_possible_cpus());
offset *= smp_processor_id();
hrtimer_add_expires_ns(&ts->sched_timer, offset);
}
for (;;) {//就是確保sched timer運(yùn)行
hrtimer_forward(&ts->sched_timer, now, tick_period);
hrtimer_start_expires(&ts->sched_timer,
HRTIMER_MODE_ABS_PINNED);
/* Check, if the timer was already in the past */
if (hrtimer_active(&ts->sched_timer))
break;
now = ktime_get();
}
#ifdef CONFIG_NO_HZ
if (tick_nohz_enabled)
ts->nohz_mode = NOHZ_MODE_HIGHRES;
#endif
}

高精度模式

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
/*
* High resolution timer interrupt
* Called with interrupts disabled
*/
void hrtimer_interrupt(struct clock_event_device *dev)
{
struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
ktime_t expires_next, now, entry_time, delta;
int i, retries = 0;
BUG_ON(!cpu_base->hres_active);
cpu_base->nr_events++;//統(tǒng)計(jì)總的interrupt次數(shù)
dev->next_event.tv64 = KTIME_MAX;
raw_spin_lock(&cpu_base->lock);
entry_time = now = hrtimer_update_base(cpu_base);//更新clock_base的時(shí)間
retry:
expires_next.tv64 = KTIME_MAX;
/*
* We set expires_next to KTIME_MAX here with cpu_base->lock
* held to prevent that a timer is enqueued in our queue via
* the migration code. This does not affect enqueueing of
* timers which run their callback and need to be requeued on
* this CPU.
*/
cpu_base->expires_next.tv64 = KTIME_MAX;
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
struct hrtimer_clock_base *base;
struct timerqueue_node *node;
ktime_t basenow;
if (!(cpu_base->active_bases & (1 << i)))//clock不是激活狀態(tài),比如,clock base里面沒有timer,何必調(diào)用一次?
continue;
base = cpu_base->clock_base + i;//每一個(gè)CLOCK BASE
basenow = ktime_add(now, base->offset);//每一個(gè)CLOCK BASE的當(dāng)前時(shí)間
//取每一個(gè)CLOCK BASE的active紅黑樹中最頂端hrtimer,最可能超時(shí)
while ((node = timerqueue_getnext(&base->active))) {
struct hrtimer *timer;
timer = container_of(node, struct hrtimer, node);
/*
* The immediate goal for using the softexpires is
* minimizing wakeups, not running timers at the
* earliest interrupt after their soft expiration.
* This allows us to avoid using a Priority Search
* Tree, which can answer a stabbing querry for
* overlapping intervals and instead use the simple
* BST we already have.
* We don't add extra wakeups by delaying timers that
* are right-of a not yet expired timer, because that
* timer will have to trigger a wakeup anyway.
*/
//這里比較的是soft expires,如果soft expires超過了當(dāng)前CLOCK BASE的時(shí)間,表示還沒到期,當(dāng)前的CLOCK BASE可以中斷檢查
if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) {
ktime_t expires;
expires = ktime_sub(hrtimer_get_expires(timer),
base->offset);//用未超時(shí)的timer的hard expires - base->offset,其實(shí)就是base 下次觸發(fā)的時(shí)間
if (expires.tv64 < 0)
expires.tv64 = KTIME_MAX;//溢出了?這不科學(xué),設(shè)置為最大值
if (expires.tv64 < expires_next.tv64)
expires_next = expires;//expires其實(shí)就是next expires
break;
}
__run_hrtimer(timer, &basenow);//調(diào)用run timer
}
}
/*
* Store the new expiry value so the migration code can verify
* against it.
*/
cpu_base->expires_next = expires_next;
raw_spin_unlock(&cpu_base->lock);
/* Reprogramming necessary ? */
if (expires_next.tv64 == KTIME_MAX ||//不需要next expires 或設(shè)置硬件next正確
!tick_program_event(expires_next, 0)) {//設(shè)置對(duì)應(yīng)硬件的下一次超時(shí),為表示正確
cpu_base->hang_detected = 0;
return;
}
/*
* The next timer was already expired due to:
* - tracing
* - long lasting callbacks
* - being scheduled away when running in a VM
*
* We need to prevent that we loop forever in the hrtimer
* interrupt routine. We give it 3 attempts to avoid
* overreacting on some spurious event.
*
* Acquire base lock for updating the offsets and retrieving
* the current time.
*/
raw_spin_lock(&cpu_base->lock);
//當(dāng)前時(shí)間已經(jīng)超過next time,嘗試修復(fù),執(zhí)行次
now = hrtimer_update_base(cpu_base);
cpu_base->nr_retries++;
if (++retries < 3)
goto retry;
//還是不行?標(biāo)志hang了
/*
* Give the system a chance to do something else than looping
* here. We stored the entry time, so we know exactly how long
* we spent here. We schedule the next event this amount of
* time away.
*/
cpu_base->nr_hangs++;
cpu_base->hang_detected = 1;
raw_spin_unlock(&cpu_base->lock);
delta = ktime_sub(now, entry_time);//從剛進(jìn)來到現(xiàn)在,耗時(shí)多長?delta
if (delta.tv64 > cpu_base->max_hang_time.tv64)
cpu_base->max_hang_time = delta;//保存最大的hang time就可以了
/*
* Limit it to a sensible value as we enforce a longer
* delay. Give the CPU at least 100ms to catch up.
*/
if (delta.tv64 > 100 * NSEC_PER_MSEC)
expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC);
else
expires_next = ktime_add(now, delta);
tick_program_event(expires_next, 1);//設(shè)置長一些的超時(shí)最大ms
printk_once(KERN_WARNING "hrtimer: interrupt took %llu nsn",
ktime_to_ns(delta));
}
tick_program_event ->
/**
* clockevents_program_event - Reprogram the clock event device.
* @dev: device to program
* @expires: absolute expiry time (monotonic clock)
* @force: program minimum delay if expires can not be set
*
* Returns 0 on success, -ETIME when the event is in the past.
*/
int clockevents_program_event(struct clock_event_device *dev, ktime_t expires,
bool force)
{
unsigned long long clc;
int64_t delta;
int rc;
if (unlikely(expires.tv64 < 0)) {
WARN_ON_ONCE(1);
return -ETIME;
}
dev->next_event = expires;
if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN)
return 0;
/* Shortcut for clockevent devices that can deal with ktime. */
if (dev->features & CLOCK_EVT_FEAT_KTIME)
return dev->set_next_ktime(expires, dev);
delta = ktime_to_ns(ktime_sub(expires, ktime_get()));
if (delta <= 0)//現(xiàn)在的時(shí)間,已經(jīng)超過了想要預(yù)設(shè)的超時(shí),怎么辦?根據(jù)是否需要force決定是否設(shè)置為min delta
return force ? clockevents_program_min_delta(dev) : -ETIME;
delta = min(delta, (int64_t) dev->max_delta_ns);
delta = max(delta, (int64_t) dev->min_delta_ns);
clc = ((unsigned long long) delta * dev->mult) >> dev->shift;
rc = dev->set_next_event((unsigned long) clc, dev); //比如hpet,其回調(diào)為hpet_next_event
//返回非表示錯(cuò)誤,如果需要force,那么強(qiáng)行設(shè)置為min delta
return (rc && force) ? clockevents_program_min_delta(dev) : rc;
}
static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
{
struct hrtimer_clock_base *base = timer->base;
struct hrtimer_cpu_base *cpu_base = base->cpu_base;
enum hrtimer_restart (*fn)(struct hrtimer *);
int restart;
WARN_ON(!irqs_disabled());
debug_deactivate(timer);
__remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);//先將timer從base中刪除,并設(shè)置timer的狀態(tài)為CALLBACK
timer_stats_account_hrtimer(timer);
/*
這里的function回調(diào)指針,就是我們調(diào)用hrtimer_init后設(shè)置的
hrtimer_init(&rtc->coalesced_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
rtc->coalesced_timer.function = coalesced_timer_fn;
可以看出,我們?cè)O(shè)置的hrtimer回調(diào)是在hardirq context中執(zhí)行
*/
fn = timer->function;
/*
* Because we run timers from hardirq context, there is no chance
* they get migrated to another cpu, therefore its safe to unlock
* the timer base.
*/
raw_spin_unlock(&cpu_base->lock);//這句話點(diǎn)名了,timer的回調(diào)函數(shù)是在hardirq context
trace_hrtimer_expire_entry(timer, now);
restart = fn(timer);//調(diào)用我們的回調(diào)函數(shù)
trace_hrtimer_expire_exit(timer);
raw_spin_lock(&cpu_base->lock);
/*
* Note: We clear the CALLBACK bit after enqueue_hrtimer and
* we do not reprogramm the event hardware. Happens either in
* hrtimer_start_range_ns() or in hrtimer_interrupt()
*/
if (restart != HRTIMER_NORESTART) {
BUG_ON(timer->state != HRTIMER_STATE_CALLBACK);
enqueue_hrtimer(timer, base);
}
WARN_ON_ONCE(!(timer->state & HRTIMER_STATE_CALLBACK));
timer->state &= ~HRTIMER_STATE_CALLBACK;
}

HRTIMER函數(shù)詳解

初始化

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
//hrtimer_init非常簡單,就是將hrtimer*加入對(duì)應(yīng)的RB TREE
static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
enum hrtimer_mode mode)
{
struct hrtimer_cpu_base *cpu_base;
int base;
memset(timer, 0, sizeof(struct hrtimer));
cpu_base = &__raw_get_cpu_var(hrtimer_bases);
if (clock_id == CLOCK_REALTIME && mode != HRTIMER_MODE_ABS)
clock_id = CLOCK_MONOTONIC;
base = hrtimer_clockid_to_base(clock_id);
timer->base = &cpu_base->clock_base[base];
timerqueue_init(&timer->node);
#ifdef CONFIG_TIMER_STATS
timer->start_site = NULL;
timer->start_pid = -1;
memset(timer->start_comm, 0, TASK_COMM_LEN);
#endif
}

設(shè)置超時(shí)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
/**
* hrtimer_forward - forward the timer expiry
* @timer: hrtimer to forward
* @now: forward past this time
* @interval: the interval to forward
*
* Forward the timer expiry so it will expire in the future.
* Returns the number of overruns.
*/
u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
{
u64 orun = 1;
ktime_t delta;
delta = ktime_sub(now, hrtimer_get_expires(timer));
if (delta.tv64 < 0)//如果timer原本的超時(shí)時(shí)間還在想要定位的now之后,就不修改,因?yàn)檫@個(gè)函數(shù)的目的是將超時(shí)時(shí)間設(shè)置在now后
return 0;
if (interval.tv64 < timer->base->resolution.tv64)//如果interval過小,小于clock base所能達(dá)到的精度,當(dāng)然使用clock base的最小精度了
interval.tv64 = timer->base->resolution.tv64;
//欲修改的時(shí)間基準(zhǔn)與原有超時(shí)時(shí)間差,大于interval
//這個(gè)話使用的是unlikely,表示,這種情況還是不多的
//例如,你在一個(gè)周期回調(diào)里面,再次add,其interval肯定要大
if (unlikely(delta.tv64 >= interval.tv64)) {
s64 incr = ktime_to_ns(interval);
orun = ktime_divns(delta, incr);//相差有多少個(gè)interval
hrtimer_add_expires_ns(timer, incr * orun);
if (hrtimer_get_expires_tv64(timer) > now.tv64)//這個(gè)函數(shù),不是簡單的add interval,而是觸發(fā)時(shí)間能夠>now就可以了,想想周期時(shí)鐘的用法,確實(shí)應(yīng)該是這樣
return orun;
/*
* This (and the ktime_add() below) is the
* correction for exact:
*/
orun++;
}
hrtimer_add_expires(timer, interval);//在上次超時(shí)的基礎(chǔ)上加上interval
return orun;
}
static inline void hrtimer_add_expires(struct hrtimer *timer, ktime_t time)
{//《Linux 時(shí)鐘管理》說,將原來必須在hard expire 超時(shí)才能執(zhí)行的一個(gè)點(diǎn)變成一個(gè)范圍后,可以盡量把hrtimer
//中斷放在一起處理,這樣CPU 被重復(fù)喚醒的幾率會(huì)變小,從而達(dá)到節(jié)能的效果,同時(shí)這個(gè)hrtimer 也可以保證其執(zhí)行精度。
timer->node.expires = ktime_add_safe(timer->node.expires, time);
timer->_softexpires = ktime_add_safe(timer->_softexpires, time);
}
static inline void hrtimer_add_expires_ns(struct hrtimer *timer, u64 ns)
{
timer->node.expires = ktime_add_ns(timer->node.expires, ns);
timer->_softexpires = ktime_add_ns(timer->_softexpires, ns);
}

啟動(dòng)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
int
hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
{//這里的mode 和tim有關(guān),mode為HRTIMER_REL的時(shí)候,表示tim為相對(duì)于now的時(shí)間
//顧明思意,如果mode為HRTIMER_ABS,這tim為絕對(duì)時(shí)間
return __hrtimer_start_range_ns(timer, tim, 0, mode, 1);
}
int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
unsigned long delta_ns, const enum hrtimer_mode mode,
int wakeup)
{
struct hrtimer_clock_base *base, *new_base;
unsigned long flags;
int ret, leftmost;
base = lock_hrtimer_base(timer, &flags);
/* Remove an active timer from the queue: */
ret = remove_hrtimer(timer, base);
/* Switch the timer base, if necessary: */
//啟動(dòng)的時(shí)候,會(huì)檢查是否需要switch clock base到當(dāng)前CPU的clock base
new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
if (mode & HRTIMER_MODE_REL) {
tim = ktime_add_safe(tim, new_base->get_time());
/*
* CONFIG_TIME_LOW_RES is a temporary way for architectures
* to signal that they simply return xtime in
* do_gettimeoffset(). In this case we want to round up by
* resolution when starting a relative timer, to avoid short
* timeouts. This will go away with the GTOD framework.
*/
#ifdef CONFIG_TIME_LOW_RES
tim = ktime_add_safe(tim, base->resolution);
#endif
}
hrtimer_set_expires_range_ns(timer, tim, delta_ns);
timer_stats_hrtimer_set_start_info(timer);
//加入紅黑樹
leftmost = enqueue_hrtimer(timer, new_base);
/*
* Only allow reprogramming if the new base is on this CPU.
* (it might still be on another CPU if the timer was pending)
*
* XXX send_remote_softirq() ?
*/
//加入之后,如果發(fā)下自己這個(gè)hrtimer最早超時(shí), leftmost為
//如果是本CPU上的CLOCK BASE,那么,重新設(shè)置超時(shí),因?yàn)橹霸O(shè)置的超時(shí)比較靠后了
if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases)
&& hrtimer_enqueue_reprogram(timer, new_base)) {
//如果加入失敗,激活HRTIMER_SOFTIRQ,使得能夠在軟中斷中檢查timer是否超時(shí)
if (wakeup) {
/*
* We need to drop cpu_base->lock to avoid a
* lock ordering issue vs. rq->lock.
*/
raw_spin_unlock(&new_base->cpu_base->lock);
raise_softirq_irqoff(HRTIMER_SOFTIRQ);
local_irq_restore(flags);
return ret;
} else {
__raise_softirq_irqoff(HRTIMER_SOFTIRQ);
}
}
unlock_hrtimer_base(timer, &flags);
return ret;
}

刪除

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
static inline int hrtimer_callback_running(struct hrtimer *timer)
{
return timer->state & HRTIMER_STATE_CALLBACK;
}
/**
* hrtimer_try_to_cancel - try to deactivate a timer
* @timer: hrtimer to stop
*
* Returns:
*  0 when the timer was not active
*  1 when the timer was active
* -1 when the timer is currently excuting the callback function and
*    cannot be stopped
*/
int hrtimer_try_to_cancel(struct hrtimer *timer)
{
struct hrtimer_clock_base *base;
unsigned long flags;
int ret = -1;
base = lock_hrtimer_base(timer, &flags);
if (!hrtimer_callback_running(timer))//如果處于回調(diào)函數(shù)執(zhí)行狀態(tài),不CANCEL
ret = remove_hrtimer(timer, base);
unlock_hrtimer_base(timer, &flags);
return ret;
}
/**
* hrtimer_cancel - cancel a timer and wait for the handler to finish.
* @timer: the timer to be cancelled
*
* Returns:
*  0 when the timer was not active
*  1 when the timer was active
*/
int hrtimer_cancel(struct hrtimer *timer)
{
for (;;) {
int ret = hrtimer_try_to_cancel(timer);//不停嘗試cancel,直到成功
if (ret >= 0)
return ret;
cpu_relax();
}
}

狀態(tài)轉(zhuǎn)換

state為hrtimer的四個(gè)狀態(tài):

1
2
3
4
#define HRTIMER_STATE_INACTIVE 0x00
#define HRTIMER_STATE_ENQUEUED 0x01
#define HRTIMER_STATE_CALLBACK 0x02
#define HRTIMER_STATE_MIGRATE 0x04

可以想象,HRTIMER_STATE_INACTIVE這個(gè)是初始值,調(diào)用hrtimer_init后,這個(gè)變量設(shè)置為HRTIMER_STATE_INACTIVE

調(diào)用enqueue_hrtimer將hrtimer*加入到RB TREE后,狀態(tài)會(huì) OR 上 HRTIMER_STATE_ENQUEUED
什么時(shí)候清楚,當(dāng)調(diào)用__remove_hrtimer的從RB TREE里刪除后,會(huì)“設(shè)置”為新狀態(tài),此新狀態(tài)中一定不包含HRTIMER_STATE_ENQUEUED

  1. 在remove_hrtimer調(diào)用的時(shí)候,只保留了CALLBACK狀態(tài)

    1
    2
    state = timer->state & HRTIMER_STATE_CALLBACK;
    __remove_hrtimer(timer, base, state, reprogram);
  2. 在__run_hrtimer的時(shí)候,會(huì)先從RB TREE里將hrtimer*刪除,設(shè)置狀態(tài)為CALLBACK,然后調(diào)用回調(diào)函數(shù)
    先將timer從base中刪除,并設(shè)置timer的狀態(tài)為CALLBACK

    1
    2
    3
    4
    5
    6
    __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
    restart = fn(timer);
    if (restart != HRTIMER_NORESTART) {
    enqueue_hrtimer(timer, base);
    }
    timer->state &= ~HRTIMER_STATE_CALLBACK;


    調(diào)用完fn后,又將CALLBACK狀態(tài)去除

  3. 在將其他DEAD的CPU上的hrtimer對(duì)象遷移到本CPU的時(shí)候,migrate_hrtimer_list

    1
    2
    3
    4
    __remove_hrtimer(timer, old_base, HRTIMER_STATE_MIGRATE, 0);
    timer->base = new_base; //替換base為當(dāng)前CPU的base
    enqueue_hrtimer(timer, new_base);//加入當(dāng)前CPU的base
    timer->state &= ~HRTIMER_STATE_MIGRATE; //清除HRTIMER_STATE_MIGRATE狀態(tài)

參考資料

  1. 淺析 Linux 中的時(shí)間編程和實(shí)現(xiàn)原理
    http://www./embeded/201308211.asp

  2. Linux 時(shí)鐘管理
    http://www.ibm.com/developerworks/cn/linux/l-cn-timerm/index.html

PDF下載

    本站是提供個(gè)人知識(shí)管理的網(wǎng)絡(luò)存儲(chǔ)空間,所有內(nèi)容均由用戶發(fā)布,不代表本站觀點(diǎn)。請(qǐng)注意甄別內(nèi)容中的聯(lián)系方式、誘導(dǎo)購買等信息,謹(jǐn)防詐騙。如發(fā)現(xiàn)有害或侵權(quán)內(nèi)容,請(qǐng)點(diǎn)擊一鍵舉報(bào)。
    轉(zhuǎn)藏 分享 獻(xiàn)花(0

    0條評(píng)論

    發(fā)表

    請(qǐng)遵守用戶 評(píng)論公約

    類似文章 更多