aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-10-10 10:03:24 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-10-10 10:03:24 -0700
commit7f6dcffb44ad246e3211c6aeaba8a625e2766836 (patch)
treeeb433bcc3b419c32d7fb82111c006887bae77f3c /include/linux
parentMerge tag 'objtool-core-2022-10-07' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip (diff)
parentu64_stats: Streamline the implementation (diff)
downloadlinux-dev-7f6dcffb44ad246e3211c6aeaba8a625e2766836.tar.xz
linux-dev-7f6dcffb44ad246e3211c6aeaba8a625e2766836.zip
Merge tag 'sched-rt-2022-10-05' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull preempt RT updates from Thomas Gleixner: "Introduce preempt_[dis|enable_nested() and use it to clean up various places which have open coded PREEMPT_RT conditionals. On PREEMPT_RT enabled kernels, spinlocks and rwlocks are neither disabling preemption nor interrupts. Though there are a few places which depend on the implicit preemption/interrupt disable of those locks, e.g. seqcount write sections, per CPU statistics updates etc. PREEMPT_RT added open coded CONFIG_PREEMPT_RT conditionals to disable/enable preemption in the related code parts all over the place. That's hard to read and does not really explain why this is necessary. Linus suggested to use helper functions (preempt_disable_nested() and preempt_enable_nested()) and use those in the affected places. On !RT enabled kernels these functions are NOPs, but contain a lockdep assert to validate that preemption is actually disabled to catch call sites which do not have preemption disabled. Clean up the affected code paths in mm, dentry and lib" * tag 'sched-rt-2022-10-05' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: u64_stats: Streamline the implementation flex_proportions: Disable preemption entering the write section. mm/compaction: Get rid of RT ifdeffery mm/memcontrol: Replace the PREEMPT_RT conditionals mm/debug: Provide VM_WARN_ON_IRQS_ENABLED() mm/vmstat: Use preempt_[dis|en]able_nested() dentry: Use preempt_[dis|en]able_nested() preempt: Provide preempt_[dis|en]able_nested()
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/mmdebug.h6
-rw-r--r--include/linux/preempt.h42
-rw-r--r--include/linux/u64_stats_sync.h145
3 files changed, 112 insertions, 81 deletions
diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index 15ae78cd2853..b8728d11c949 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -94,6 +94,12 @@ void dump_mm(const struct mm_struct *mm);
#define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond)
#endif
+#ifdef CONFIG_DEBUG_VM_IRQSOFF
+#define VM_WARN_ON_IRQS_ENABLED() WARN_ON_ONCE(!irqs_disabled())
+#else
+#define VM_WARN_ON_IRQS_ENABLED() do { } while (0)
+#endif
+
#ifdef CONFIG_DEBUG_VIRTUAL
#define VIRTUAL_BUG_ON(cond) BUG_ON(cond)
#else
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index b4381f255a5c..0df425bf9bd7 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -421,4 +421,46 @@ static inline void migrate_enable(void) { }
#endif /* CONFIG_SMP */
+/**
+ * preempt_disable_nested - Disable preemption inside a normally preempt disabled section
+ *
+ * Use for code which requires preemption protection inside a critical
+ * section which has preemption disabled implicitly on non-PREEMPT_RT
+ * enabled kernels, by e.g.:
+ * - holding a spinlock/rwlock
+ * - soft interrupt context
+ * - regular interrupt handlers
+ *
+ * On PREEMPT_RT enabled kernels spinlock/rwlock held sections, soft
+ * interrupt context and regular interrupt handlers are preemptible and
+ * only prevent migration. preempt_disable_nested() ensures that preemption
+ * is disabled for cases which require CPU local serialization even on
+ * PREEMPT_RT. For non-PREEMPT_RT kernels this is a NOP.
+ *
+ * The use cases are code sequences which are not serialized by a
+ * particular lock instance, e.g.:
+ * - seqcount write side critical sections where the seqcount is not
+ * associated to a particular lock and therefore the automatic
+ * protection mechanism does not work. This prevents a live lock
+ * against a preempting high priority reader.
+ * - RMW per CPU variable updates like vmstat.
+ */
+/* Macro to avoid header recursion hell vs. lockdep */
+#define preempt_disable_nested() \
+do { \
+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) \
+ preempt_disable(); \
+ else \
+ lockdep_assert_preemption_disabled(); \
+} while (0)
+
+/**
+ * preempt_enable_nested - Undo the effect of preempt_disable_nested()
+ */
+static __always_inline void preempt_enable_nested(void)
+{
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ preempt_enable();
+}
+
#endif /* __LINUX_PREEMPT_H */
diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h
index 6ad4e9032d53..46040d66334a 100644
--- a/include/linux/u64_stats_sync.h
+++ b/include/linux/u64_stats_sync.h
@@ -8,7 +8,7 @@
*
* Key points :
*
- * - Use a seqcount on 32-bit SMP, only disable preemption for 32-bit UP.
+ * - Use a seqcount on 32-bit
* - The whole thing is a no-op on 64-bit architectures.
*
* Usage constraints:
@@ -20,7 +20,8 @@
* writer and also spin forever.
*
* 3) Write side must use the _irqsave() variant if other writers, or a reader,
- * can be invoked from an IRQ context.
+ * can be invoked from an IRQ context. On 64bit systems this variant does not
+ * disable interrupts.
*
* 4) If reader fetches several counters, there is no guarantee the whole values
* are consistent w.r.t. each other (remember point #2: seqcounts are not
@@ -29,11 +30,6 @@
* 5) Readers are allowed to sleep or be preempted/interrupted: they perform
* pure reads.
*
- * 6) Readers must use both u64_stats_fetch_{begin,retry}_irq() if the stats
- * might be updated from a hardirq or softirq context (remember point #1:
- * seqcounts are not used for UP kernels). 32-bit UP stat readers could read
- * corrupted 64-bit values otherwise.
- *
* Usage :
*
* Stats producer (writer) should use following template granted it already got
@@ -66,7 +62,7 @@
#include <linux/seqlock.h>
struct u64_stats_sync {
-#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
+#if BITS_PER_LONG == 32
seqcount_t seq;
#endif
};
@@ -98,7 +94,22 @@ static inline void u64_stats_inc(u64_stats_t *p)
local64_inc(&p->v);
}
-#else
+static inline void u64_stats_init(struct u64_stats_sync *syncp) { }
+static inline void __u64_stats_update_begin(struct u64_stats_sync *syncp) { }
+static inline void __u64_stats_update_end(struct u64_stats_sync *syncp) { }
+static inline unsigned long __u64_stats_irqsave(void) { return 0; }
+static inline void __u64_stats_irqrestore(unsigned long flags) { }
+static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
+{
+ return 0;
+}
+static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
+ unsigned int start)
+{
+ return false;
+}
+
+#else /* 64 bit */
typedef struct {
u64 v;
@@ -123,123 +134,95 @@ static inline void u64_stats_inc(u64_stats_t *p)
{
p->v++;
}
-#endif
-#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
-#define u64_stats_init(syncp) seqcount_init(&(syncp)->seq)
-#else
static inline void u64_stats_init(struct u64_stats_sync *syncp)
{
+ seqcount_init(&syncp->seq);
}
-#endif
-static inline void u64_stats_update_begin(struct u64_stats_sync *syncp)
+static inline void __u64_stats_update_begin(struct u64_stats_sync *syncp)
{
-#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
- if (IS_ENABLED(CONFIG_PREEMPT_RT))
- preempt_disable();
+ preempt_disable_nested();
write_seqcount_begin(&syncp->seq);
-#endif
}
-static inline void u64_stats_update_end(struct u64_stats_sync *syncp)
+static inline void __u64_stats_update_end(struct u64_stats_sync *syncp)
{
-#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
write_seqcount_end(&syncp->seq);
- if (IS_ENABLED(CONFIG_PREEMPT_RT))
- preempt_enable();
-#endif
+ preempt_enable_nested();
}
-static inline unsigned long
-u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp)
+static inline unsigned long __u64_stats_irqsave(void)
{
- unsigned long flags = 0;
+ unsigned long flags;
-#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
- if (IS_ENABLED(CONFIG_PREEMPT_RT))
- preempt_disable();
- else
- local_irq_save(flags);
- write_seqcount_begin(&syncp->seq);
-#endif
+ local_irq_save(flags);
return flags;
}
-static inline void
-u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp,
- unsigned long flags)
+static inline void __u64_stats_irqrestore(unsigned long flags)
{
-#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
- write_seqcount_end(&syncp->seq);
- if (IS_ENABLED(CONFIG_PREEMPT_RT))
- preempt_enable();
- else
- local_irq_restore(flags);
-#endif
+ local_irq_restore(flags);
}
static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
{
-#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
return read_seqcount_begin(&syncp->seq);
-#else
- return 0;
-#endif
}
-static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
+static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
+ unsigned int start)
{
-#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT))
- preempt_disable();
-#endif
- return __u64_stats_fetch_begin(syncp);
+ return read_seqcount_retry(&syncp->seq, start);
}
+#endif /* !64 bit */
-static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
- unsigned int start)
+static inline void u64_stats_update_begin(struct u64_stats_sync *syncp)
{
-#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
- return read_seqcount_retry(&syncp->seq, start);
-#else
- return false;
-#endif
+ __u64_stats_update_begin(syncp);
+}
+
+static inline void u64_stats_update_end(struct u64_stats_sync *syncp)
+{
+ __u64_stats_update_end(syncp);
+}
+
+static inline unsigned long u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp)
+{
+ unsigned long flags = __u64_stats_irqsave();
+
+ __u64_stats_update_begin(syncp);
+ return flags;
+}
+
+static inline void u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp,
+ unsigned long flags)
+{
+ __u64_stats_update_end(syncp);
+ __u64_stats_irqrestore(flags);
+}
+
+static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
+{
+ return __u64_stats_fetch_begin(syncp);
}
static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
unsigned int start)
{
-#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT))
- preempt_enable();
-#endif
return __u64_stats_fetch_retry(syncp, start);
}
-/*
- * In case irq handlers can update u64 counters, readers can use following helpers
- * - SMP 32bit arches use seqcount protection, irq safe.
- * - UP 32bit must disable irqs.
- * - 64bit have no problem atomically reading u64 values, irq safe.
- */
+/* Obsolete interfaces */
static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync *syncp)
{
-#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT)
- preempt_disable();
-#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP)
- local_irq_disable();
-#endif
- return __u64_stats_fetch_begin(syncp);
+ return u64_stats_fetch_begin(syncp);
}
static inline bool u64_stats_fetch_retry_irq(const struct u64_stats_sync *syncp,
unsigned int start)
{
-#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT)
- preempt_enable();
-#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP)
- local_irq_enable();
-#endif
- return __u64_stats_fetch_retry(syncp, start);
+ return u64_stats_fetch_retry(syncp, start);
}
#endif /* _LINUX_U64_STATS_SYNC_H */