diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 794cf8953b7fd64b33bf2e57d769016957d04123..05a9cef2edacdf2ccdb3a45be8b32d5aab15e7fa 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -920,6 +920,12 @@ static inline bool cgroup_ifs_enabled(void) return static_branch_unlikely(&cgrp_ifs_enabled); } +DECLARE_STATIC_KEY_TRUE(cgrp_ifs_tsc_available); +static inline bool cgroup_ifs_tsc_available(void) +{ + return static_branch_likely(&cgrp_ifs_tsc_available); +} + static inline struct cgroup_ifs *cgroup_ifs(struct cgroup *cgrp) { return cgroup_ino(cgrp) == 1 ? &cgroup_root_ifs : cgrp->ifs; @@ -955,19 +961,22 @@ static inline void cgroup_ifs_account_delta(struct cgroup_ifs_cpu *ifsc, static inline u64 cgroup_ifs_time_counter(void) { + if (cgroup_ifs_tsc_available()) { #if defined(__aarch64__) - u64 counter; + u64 counter; - asm volatile("mrs %0, cntvct_el0" : "=r" (counter) :: "memory"); - return counter; + asm volatile("mrs %0, cntvct_el0" : "=r" (counter) :: "memory"); + return counter; #elif defined(__x86_64__) - unsigned int lo, hi; + unsigned int lo, hi; - asm volatile("rdtsc" : "=a"(lo), "=d"(hi) :: "memory"); - return ((u64)hi << 32) | lo; -#else - return sched_clock(); + asm volatile("rdtsc" : "=a"(lo), "=d"(hi) :: "memory"); + return ((u64)hi << 32) | lo; #endif + } + + /* fallback */ + return sched_clock(); } static inline void cgroup_ifs_enter_lock(u64 *clock) diff --git a/init/Kconfig b/init/Kconfig index 6c1f5079467f9b3910691098d42788110be55e52..f94a0c0894fc8a984cec6ee970790d1e43f50b1e 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1390,7 +1390,9 @@ config CGROUP_FILES config CGROUP_IFS bool "Cgroup-based Interference Statistics" default n + depends on X86 || ARM64 select KERNFS + select SCHED_INFO select IRQ_TIME_ACCOUTING help This option will provide online low-overhead interference diff --git a/kernel/cgroup/ifs.c b/kernel/cgroup/ifs.c index e6419899d725b60480c9076c300fa0b7541237dc..d7a3e4214143283c9b2b02a88ccbff45e171b04f 100644 --- a/kernel/cgroup/ifs.c +++ b/kernel/cgroup/ifs.c @@ -49,6 +49,7 @@ struct cgroup_ifs cgroup_root_ifs = { }; DEFINE_STATIC_KEY_FALSE(cgrp_ifs_enabled); +DEFINE_STATIC_KEY_TRUE(cgrp_ifs_tsc_available); #ifdef CONFIG_CGROUP_IFS_DEFAULT_ENABLED static bool ifs_enable = true; @@ -315,7 +316,8 @@ static void tdesc_init(struct ifs_tdesc *desc, u64 freq) static void cgroup_ifs_tdesc_init(void) { tdesc_init(&ifs_tdesc[IFS_TIMER_CLK], NSEC_PER_SEC); - tdesc_init(&ifs_tdesc[IFS_TIMER_TSC], this_cpu_read(ifs_tsc_freq)); + if (cgroup_ifs_tsc_available()) + tdesc_init(&ifs_tdesc[IFS_TIMER_TSC], this_cpu_read(ifs_tsc_freq)); } static u64 tsc_cycles_to_nsec(u64 tsc_cycles) @@ -327,7 +329,7 @@ static u64 tsc_cycles_to_nsec(u64 tsc_cycles) #endif } -static int cgroup_ifs_tsc_init(void) +static void cgroup_ifs_tsc_init(void) { u64 freq = 0; int cpu; @@ -339,14 +341,13 @@ static int cgroup_ifs_tsc_init(void) freq = tsc_khz * 1000; #endif if (!freq) { - pr_warn("Disable CGROUP IFS: no constant tsc\n"); - return -1; + pr_warn("IFS: no constant tsc, use default clocksource as time source\n"); + static_branch_disable(&cgrp_ifs_tsc_available); + return; } for_each_possible_cpu(cpu) per_cpu(ifs_tsc_freq, cpu) = freq; - - return 0; } static bool should_print(int type) @@ -362,6 +363,11 @@ static bool should_print(int type) return true; } +static bool use_tsc(enum ifs_types t) +{ + return cgroup_ifs_tsc_available() && (t == IFS_SPINLOCK || t == IFS_MUTEX); +} + static int print_sum_time(struct cgroup_ifs *ifs, struct seq_file *seq) { u64 time[NR_IFS_TYPES] = { 0 }; @@ -381,7 +387,7 @@ static int print_sum_time(struct cgroup_ifs *ifs, struct seq_file *seq) for (i = 0; i < NR_IFS_TYPES; i++) { if (!should_print(i)) continue; - if (i == IFS_SPINLOCK || i == IFS_MUTEX) + if (use_tsc(i)) time[i] = tsc_cycles_to_nsec(time[i]); seq_printf(seq, "%-18s%llu\n", ifs_type_name(i), time[i]); } @@ -425,7 +431,7 @@ static int print_hist_count(struct cgroup_ifs *ifs, struct seq_file *seq) if (!should_print(i)) continue; - if (i == IFS_SPINLOCK || i == IFS_MUTEX) + if (use_tsc(i)) desc = &ifs_tdesc[IFS_TIMER_TSC]; else desc = &ifs_tdesc[IFS_TIMER_CLK]; @@ -471,6 +477,16 @@ static int cgroup_ifs_show(struct seq_file *seq, void *v) return -EINVAL; } +#ifdef CONFIG_CGROUP_CPUACCT + if ((!cgroup_subsys_on_dfl(cpuacct_cgrp_subsys) && cgroup_on_dfl(cgrp)) || + (cgroup_subsys_on_dfl(cpuacct_cgrp_subsys) && !cgroup_on_dfl(cgrp))) { + pr_info("cgroup version mismatch: subsystem %s, cgroup %s\n", + cgroup_subsys_on_dfl(cpuacct_cgrp_subsys) ? "v2" : "v1", + cgroup_on_dfl(cgrp) ? "v2" : "v1"); + return -EOPNOTSUPP; + } +#endif + ret = print_sum_time(ifs, seq); if (ret) return ret; @@ -532,9 +548,7 @@ void cgroup_ifs_init(void) if (!ifs_enable) return; - if (cgroup_ifs_tsc_init() < 0) - return; - + cgroup_ifs_tsc_init(); BUG_ON(cgroup_init_cftypes(NULL, cgroup_ifs_files)); cgroup_ifs_tdesc_init(); } @@ -580,10 +594,17 @@ static __init int cgroup_ifs_enable(void) if (!ifs_enable) return 0; - if (!this_cpu_read(ifs_tsc_freq)) - return 0; - static_branch_enable(&cgrp_ifs_enabled); return 0; } -late_initcall_sync(cgroup_ifs_enable); + +/* + * Execution Timing Constraints: + * 1. Must be late enough (e.g., after SUBSYS_INITCALL) to avoid the + * intermediate state of the core cgroup subsystem initialization, ensuring + * all internal structures are stable. + * 2. Must execute strictly before cgroup_v1_ifs_init(), which runs at + * LATE_INITCALL_SYNC, as cgroup_v1_ifs_init() relies on 'cgrp_ifs_enabled' + * being set before its execution begins. + */ +device_initcall(cgroup_ifs_enable); diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index 1b736b93124707824521e9dfadcc4523e2d0c4a8..ca031b9af93a4b2234151a5220e98661d1c08760 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h @@ -136,7 +136,7 @@ __schedstats_from_se(struct sched_entity *se) #define QOS_THROTTLED 2 #endif -#ifdef CONFIG_CGROUP_IFS +#if defined(CONFIG_CGROUP_IFS) && defined(CONFIG_SCHED_INFO) static inline void ifs_account_rundelay(struct task_struct *task, u64 delta) { /*