aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-06-02 16:00:26 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-06-02 16:00:26 -0700
commitfd1f8473503e5bf897bd3e8efe3545c0352954e6 (patch)
treebd9f699a23c0093dd55be8cac76d4329837654d0
parentMerge tag 'gfs2-for-6.16-fix' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2 (diff)
parentmm/khugepaged: clean up refcount check using folio_expected_ref_count() (diff)
downloadlinux-rng-fd1f8473503e5bf897bd3e8efe3545c0352954e6.tar.xz
linux-rng-fd1f8473503e5bf897bd3e8efe3545c0352954e6.zip
Merge tag 'mm-stable-2025-06-01-14-06' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull more MM updates from Andrew Morton: - "zram: support algorithm-specific parameters" from Sergey Senozhatsky adds infrastructure for passing algorithm-specific parameters into zram. A single parameter `winbits' is implemented at this time. - "memcg: nmi-safe kmem charging" from Shakeel Butt makes memcg charging nmi-safe, which is required by BFP, which can operate in NMI context. - "Some random fixes and cleanup to shmem" from Kemeng Shi implements small fixes and cleanups in the shmem code. - "Skip mm selftests instead when kernel features are not present" from Zi Yan fixes some issues in the MM selftest code. - "mm/damon: build-enable essential DAMON components by default" from SeongJae Park reworks DAMON Kconfig to make it easier to enable CONFIG_DAMON. - "sched/numa: add statistics of numa balance task migration" from Libo Chen adds more info into sysfs and procfs files to improve visibility into the NUMA balancer's task migration activity. - "selftests/mm: cow and gup_longterm cleanups" from Mark Brown provides various updates to some of the MM selftests to make them play better with the overall containing framework. * tag 'mm-stable-2025-06-01-14-06' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (43 commits) mm/khugepaged: clean up refcount check using folio_expected_ref_count() selftests/mm: fix test result reporting in gup_longterm selftests/mm: report unique test names for each cow test selftests/mm: add helper for logging test start and results selftests/mm: use standard ksft_finished() in cow and gup_longterm selftests/damon/_damon_sysfs: skip testcases if CONFIG_DAMON_SYSFS is disabled sched/numa: add statistics of numa balance task sched/numa: fix task swap by skipping kernel threads tools/testing: check correct variable in open_procmap() tools/testing/vma: add missing function stub mm/gup: update comment explaining why gup_fast() disables IRQs selftests/mm: two fixes for the pfnmap test mm/khugepaged: fix race with folio split/free using temporary reference mm: add CONFIG_PAGE_BLOCK_ORDER to select page block order mmu_notifiers: remove leftover stub macros selftests/mm: deduplicate test names in madv_populate kcov: rust: add flags for KCOV with Rust mm: rust: make CONFIG_MMU ifdefs more narrow mmu_gather: move tlb flush for VM_PFNMAP/VM_MIXEDMAP vmas into free_pgtables() mm/damon/Kconfig: enable CONFIG_DAMON by default ...
-rw-r--r--Documentation/admin-guide/cgroup-v2.rst6
-rw-r--r--arch/arm/mm/flush.c4
-rw-r--r--arch/m68k/mm/motorola.c3
-rw-r--r--drivers/block/zram/backend_deflate.c12
-rw-r--r--drivers/block/zram/backend_lz4.c2
-rw-r--r--drivers/block/zram/backend_lz4hc.c2
-rw-r--r--drivers/block/zram/backend_zstd.c2
-rw-r--r--drivers/block/zram/zcomp.h9
-rw-r--r--drivers/block/zram/zram_drv.c21
-rw-r--r--fs/ntfs3/file.c31
-rw-r--r--include/asm-generic/tlb.h46
-rw-r--r--include/linux/memcontrol.h10
-rw-r--r--include/linux/mm.h6
-rw-r--r--include/linux/mm_types.h6
-rw-r--r--include/linux/mmu_notifier.h3
-rw-r--r--include/linux/mmzone.h16
-rw-r--r--include/linux/pageblock-flags.h8
-rw-r--r--include/linux/sched.h4
-rw-r--r--include/linux/uio.h10
-rw-r--r--include/linux/vm_event_item.h2
-rw-r--r--init/Kconfig14
-rw-r--r--kernel/futex/core.c2
-rw-r--r--kernel/sched/core.c9
-rw-r--r--kernel/sched/debug.c4
-rw-r--r--kernel/sched/fair.c3
-rw-r--r--lib/iov_iter.c29
-rw-r--r--mm/Kconfig34
-rw-r--r--mm/damon/Kconfig4
-rw-r--r--mm/damon/core.c8
-rw-r--r--mm/filemap.c4
-rw-r--r--mm/gup.c2
-rw-r--r--mm/hugetlb.c2
-rw-r--r--mm/khugepaged.c35
-rw-r--r--mm/memcontrol.c127
-rw-r--r--mm/memory.c6
-rw-r--r--mm/mm_init.c2
-rw-r--r--mm/mmu_gather.c1
-rw-r--r--mm/page-writeback.c6
-rw-r--r--mm/shmem.c23
-rw-r--r--mm/truncate.c2
-rw-r--r--mm/vmstat.c2
-rw-r--r--mm/zpdesc.h4
-rw-r--r--rust/Makefile1
-rw-r--r--rust/kernel/mm.rs56
-rw-r--r--rust/kernel/mm/mmput_async.rs68
-rw-r--r--scripts/Makefile.kcov6
-rw-r--r--scripts/Makefile.lib3
-rw-r--r--tools/testing/selftests/damon/_damon_sysfs.py4
-rw-r--r--tools/testing/selftests/mm/cow.c340
-rw-r--r--tools/testing/selftests/mm/guard-regions.c17
-rw-r--r--tools/testing/selftests/mm/gup_longterm.c158
-rw-r--r--tools/testing/selftests/mm/madv_populate.c18
-rw-r--r--tools/testing/selftests/mm/mlock2-tests.c2
-rw-r--r--tools/testing/selftests/mm/pfnmap.c61
-rw-r--r--tools/testing/selftests/mm/thuge-gen.c4
-rwxr-xr-xtools/testing/selftests/mm/va_high_addr_switch.sh26
-rw-r--r--tools/testing/selftests/mm/vm_util.c2
-rw-r--r--tools/testing/selftests/mm/vm_util.h20
-rw-r--r--tools/testing/vma/vma_internal.h5
59 files changed, 909 insertions, 408 deletions
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index bd98ea3175ec..0cc35a14afbe 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1732,6 +1732,12 @@ The following nested keys are defined.
numa_hint_faults (npn)
Number of NUMA hinting faults.
+ numa_task_migrated (npn)
+ Number of task migration by NUMA balancing.
+
+ numa_task_swapped (npn)
+ Number of task swap by NUMA balancing.
+
pgdemote_kswapd
Number of pages demoted by kswapd.
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index 0749cf8a6637..5219158d54cf 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -227,9 +227,9 @@ void __flush_dcache_folio(struct address_space *mapping, struct folio *folio)
}
/*
- * If this is a page cache page, and we have an aliasing VIPT cache,
+ * If this is a page cache folio, and we have an aliasing VIPT cache,
* we only need to do one flush - which would be at the relevant
- * userspace colour, which is congruent with page->index.
+ * userspace colour, which is congruent with folio->index.
*/
if (mapping && cache_is_vipt_aliasing())
flush_pfn_alias(folio_pfn(folio), folio_pos(folio));
diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c
index 6ab3ef39ba7a..745bd575dcfa 100644
--- a/arch/m68k/mm/motorola.c
+++ b/arch/m68k/mm/motorola.c
@@ -105,7 +105,8 @@ static struct list_head ptable_list[3] = {
#define PD_PTABLE(page) ((ptable_desc *)&(virt_to_page((void *)(page))->lru))
#define PD_PAGE(ptable) (list_entry(ptable, struct page, lru))
-#define PD_MARKBITS(dp) (*(unsigned int *)&PD_PAGE(dp)->index)
+#define PD_PTDESC(ptable) (list_entry(ptable, struct ptdesc, pt_list))
+#define PD_MARKBITS(dp) (*(unsigned int *)&PD_PTDESC(dp)->pt_index)
static const int ptable_shift[3] = {
7+2, /* PGD */
diff --git a/drivers/block/zram/backend_deflate.c b/drivers/block/zram/backend_deflate.c
index 0f7f252c12f4..b75016e0e654 100644
--- a/drivers/block/zram/backend_deflate.c
+++ b/drivers/block/zram/backend_deflate.c
@@ -8,7 +8,7 @@
#include "backend_deflate.h"
/* Use the same value as crypto API */
-#define DEFLATE_DEF_WINBITS 11
+#define DEFLATE_DEF_WINBITS (-11)
#define DEFLATE_DEF_MEMLEVEL MAX_MEM_LEVEL
struct deflate_ctx {
@@ -22,8 +22,10 @@ static void deflate_release_params(struct zcomp_params *params)
static int deflate_setup_params(struct zcomp_params *params)
{
- if (params->level == ZCOMP_PARAM_NO_LEVEL)
+ if (params->level == ZCOMP_PARAM_NOT_SET)
params->level = Z_DEFAULT_COMPRESSION;
+ if (params->deflate.winbits == ZCOMP_PARAM_NOT_SET)
+ params->deflate.winbits = DEFLATE_DEF_WINBITS;
return 0;
}
@@ -57,13 +59,13 @@ static int deflate_create(struct zcomp_params *params, struct zcomp_ctx *ctx)
return -ENOMEM;
ctx->context = zctx;
- sz = zlib_deflate_workspacesize(-DEFLATE_DEF_WINBITS, MAX_MEM_LEVEL);
+ sz = zlib_deflate_workspacesize(params->deflate.winbits, MAX_MEM_LEVEL);
zctx->cctx.workspace = vzalloc(sz);
if (!zctx->cctx.workspace)
goto error;
ret = zlib_deflateInit2(&zctx->cctx, params->level, Z_DEFLATED,
- -DEFLATE_DEF_WINBITS, DEFLATE_DEF_MEMLEVEL,
+ params->deflate.winbits, DEFLATE_DEF_MEMLEVEL,
Z_DEFAULT_STRATEGY);
if (ret != Z_OK)
goto error;
@@ -73,7 +75,7 @@ static int deflate_create(struct zcomp_params *params, struct zcomp_ctx *ctx)
if (!zctx->dctx.workspace)
goto error;
- ret = zlib_inflateInit2(&zctx->dctx, -DEFLATE_DEF_WINBITS);
+ ret = zlib_inflateInit2(&zctx->dctx, params->deflate.winbits);
if (ret != Z_OK)
goto error;
diff --git a/drivers/block/zram/backend_lz4.c b/drivers/block/zram/backend_lz4.c
index 847f3334eb38..daccd60857eb 100644
--- a/drivers/block/zram/backend_lz4.c
+++ b/drivers/block/zram/backend_lz4.c
@@ -18,7 +18,7 @@ static void lz4_release_params(struct zcomp_params *params)
static int lz4_setup_params(struct zcomp_params *params)
{
- if (params->level == ZCOMP_PARAM_NO_LEVEL)
+ if (params->level == ZCOMP_PARAM_NOT_SET)
params->level = LZ4_ACCELERATION_DEFAULT;
return 0;
diff --git a/drivers/block/zram/backend_lz4hc.c b/drivers/block/zram/backend_lz4hc.c
index 5f37d5abcaeb..9e8a35dfa56d 100644
--- a/drivers/block/zram/backend_lz4hc.c
+++ b/drivers/block/zram/backend_lz4hc.c
@@ -18,7 +18,7 @@ static void lz4hc_release_params(struct zcomp_params *params)
static int lz4hc_setup_params(struct zcomp_params *params)
{
- if (params->level == ZCOMP_PARAM_NO_LEVEL)
+ if (params->level == ZCOMP_PARAM_NOT_SET)
params->level = LZ4HC_DEFAULT_CLEVEL;
return 0;
diff --git a/drivers/block/zram/backend_zstd.c b/drivers/block/zram/backend_zstd.c
index 22c8067536f3..81defb98ed09 100644
--- a/drivers/block/zram/backend_zstd.c
+++ b/drivers/block/zram/backend_zstd.c
@@ -58,7 +58,7 @@ static int zstd_setup_params(struct zcomp_params *params)
return -ENOMEM;
params->drv_data = zp;
- if (params->level == ZCOMP_PARAM_NO_LEVEL)
+ if (params->level == ZCOMP_PARAM_NOT_SET)
params->level = zstd_default_clevel();
zp->cprm = zstd_get_params(params->level, PAGE_SIZE);
diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h
index 25339ed1e07e..4acffe671a5e 100644
--- a/drivers/block/zram/zcomp.h
+++ b/drivers/block/zram/zcomp.h
@@ -5,7 +5,11 @@
#include <linux/mutex.h>
-#define ZCOMP_PARAM_NO_LEVEL INT_MIN
+#define ZCOMP_PARAM_NOT_SET INT_MIN
+
+struct deflate_params {
+ s32 winbits;
+};
/*
* Immutable driver (backend) parameters. The driver may attach private
@@ -17,6 +21,9 @@ struct zcomp_params {
void *dict;
size_t dict_sz;
s32 level;
+ union {
+ struct deflate_params deflate;
+ };
void *drv_data;
};
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 94e6e9b80bf0..54c57103715f 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1276,13 +1276,15 @@ static void comp_params_reset(struct zram *zram, u32 prio)
struct zcomp_params *params = &zram->params[prio];
vfree(params->dict);
- params->level = ZCOMP_PARAM_NO_LEVEL;
+ params->level = ZCOMP_PARAM_NOT_SET;
+ params->deflate.winbits = ZCOMP_PARAM_NOT_SET;
params->dict_sz = 0;
params->dict = NULL;
}
static int comp_params_store(struct zram *zram, u32 prio, s32 level,
- const char *dict_path)
+ const char *dict_path,
+ struct deflate_params *deflate_params)
{
ssize_t sz = 0;
@@ -1300,6 +1302,7 @@ static int comp_params_store(struct zram *zram, u32 prio, s32 level,
zram->params[prio].dict_sz = sz;
zram->params[prio].level = level;
+ zram->params[prio].deflate.winbits = deflate_params->winbits;
return 0;
}
@@ -1308,11 +1311,14 @@ static ssize_t algorithm_params_store(struct device *dev,
const char *buf,
size_t len)
{
- s32 prio = ZRAM_PRIMARY_COMP, level = ZCOMP_PARAM_NO_LEVEL;
+ s32 prio = ZRAM_PRIMARY_COMP, level = ZCOMP_PARAM_NOT_SET;
char *args, *param, *val, *algo = NULL, *dict_path = NULL;
+ struct deflate_params deflate_params;
struct zram *zram = dev_to_zram(dev);
int ret;
+ deflate_params.winbits = ZCOMP_PARAM_NOT_SET;
+
args = skip_spaces(buf);
while (*args) {
args = next_arg(args, &param, &val);
@@ -1343,6 +1349,13 @@ static ssize_t algorithm_params_store(struct device *dev,
dict_path = val;
continue;
}
+
+ if (!strcmp(param, "deflate.winbits")) {
+ ret = kstrtoint(val, 10, &deflate_params.winbits);
+ if (ret)
+ return ret;
+ continue;
+ }
}
/* Lookup priority by algorithm name */
@@ -1364,7 +1377,7 @@ static ssize_t algorithm_params_store(struct device *dev,
if (prio < ZRAM_PRIMARY_COMP || prio >= ZRAM_MAX_COMPS)
return -EINVAL;
- ret = comp_params_store(zram, prio, level, dict_path);
+ ret = comp_params_store(zram, prio, level, dict_path, &deflate_params);
return ret ? ret : len;
}
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index 34ed242e1063..1e99a35691cd 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -913,7 +913,8 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
struct ntfs_inode *ni = ntfs_i(inode);
u64 valid = ni->i_valid;
struct ntfs_sb_info *sbi = ni->mi.sbi;
- struct page *page, **pages = NULL;
+ struct page **pages = NULL;
+ struct folio *folio;
size_t written = 0;
u8 frame_bits = NTFS_LZNT_CUNIT + sbi->cluster_bits;
u32 frame_size = 1u << frame_bits;
@@ -923,7 +924,6 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
u64 frame_vbo;
pgoff_t index;
bool frame_uptodate;
- struct folio *folio;
if (frame_size < PAGE_SIZE) {
/*
@@ -977,8 +977,7 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
pages_per_frame);
if (err) {
for (ip = 0; ip < pages_per_frame; ip++) {
- page = pages[ip];
- folio = page_folio(page);
+ folio = page_folio(pages[ip]);
folio_unlock(folio);
folio_put(folio);
}
@@ -989,10 +988,9 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
ip = off >> PAGE_SHIFT;
off = offset_in_page(valid);
for (; ip < pages_per_frame; ip++, off = 0) {
- page = pages[ip];
- folio = page_folio(page);
- zero_user_segment(page, off, PAGE_SIZE);
- flush_dcache_page(page);
+ folio = page_folio(pages[ip]);
+ folio_zero_segment(folio, off, PAGE_SIZE);
+ flush_dcache_folio(folio);
folio_mark_uptodate(folio);
}
@@ -1001,8 +999,7 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
ni_unlock(ni);
for (ip = 0; ip < pages_per_frame; ip++) {
- page = pages[ip];
- folio = page_folio(page);
+ folio = page_folio(pages[ip]);
folio_mark_uptodate(folio);
folio_unlock(folio);
folio_put(folio);
@@ -1046,8 +1043,7 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
if (err) {
for (ip = 0; ip < pages_per_frame;
ip++) {
- page = pages[ip];
- folio = page_folio(page);
+ folio = page_folio(pages[ip]);
folio_unlock(folio);
folio_put(folio);
}
@@ -1065,10 +1061,10 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
for (;;) {
size_t cp, tail = PAGE_SIZE - off;
- page = pages[ip];
- cp = copy_page_from_iter_atomic(page, off,
+ folio = page_folio(pages[ip]);
+ cp = copy_folio_from_iter_atomic(folio, off,
min(tail, bytes), from);
- flush_dcache_page(page);
+ flush_dcache_folio(folio);
copied += cp;
bytes -= cp;
@@ -1088,9 +1084,8 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
ni_unlock(ni);
for (ip = 0; ip < pages_per_frame; ip++) {
- page = pages[ip];
- ClearPageDirty(page);
- folio = page_folio(page);
+ folio = page_folio(pages[ip]);
+ folio_clear_dirty(folio);
folio_mark_uptodate(folio);
folio_unlock(folio);
folio_put(folio);
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 88a42973fa47..1fff717cae51 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -58,6 +58,11 @@
* Defaults to flushing at tlb_end_vma() to reset the range; helps when
* there's large holes between the VMAs.
*
+ * - tlb_free_vmas()
+ *
+ * tlb_free_vmas() marks the start of unlinking of one or more vmas
+ * and freeing page-tables.
+ *
* - tlb_remove_table()
*
* tlb_remove_table() is the basic primitive to free page-table directories
@@ -464,7 +469,12 @@ tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma)
*/
tlb->vma_huge = is_vm_hugetlb_page(vma);
tlb->vma_exec = !!(vma->vm_flags & VM_EXEC);
- tlb->vma_pfn = !!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP));
+
+ /*
+ * Track if there's at least one VM_PFNMAP/VM_MIXEDMAP vma
+ * in the tracked range, see tlb_free_vmas().
+ */
+ tlb->vma_pfn |= !!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP));
}
static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
@@ -548,22 +558,38 @@ static inline void tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *
static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
{
+ if (tlb->fullmm || IS_ENABLED(CONFIG_MMU_GATHER_MERGE_VMAS))
+ return;
+
+ /*
+ * Do a TLB flush and reset the range at VMA boundaries; this avoids
+ * the ranges growing with the unused space between consecutive VMAs,
+ * but also the mmu_gather::vma_* flags from tlb_start_vma() rely on
+ * this.
+ */
+ tlb_flush_mmu_tlbonly(tlb);
+}
+
+static inline void tlb_free_vmas(struct mmu_gather *tlb)
+{
if (tlb->fullmm)
return;
/*
* VM_PFNMAP is more fragile because the core mm will not track the
- * page mapcount -- there might not be page-frames for these PFNs after
- * all. Force flush TLBs for such ranges to avoid munmap() vs
- * unmap_mapping_range() races.
+ * page mapcount -- there might not be page-frames for these PFNs
+ * after all.
+ *
+ * Specifically() there is a race between munmap() and
+ * unmap_mapping_range(), where munmap() will unlink the VMA, such
+ * that unmap_mapping_range() will no longer observe the VMA and
+ * no-op, without observing the TLBI, returning prematurely.
+ *
+ * So if we're about to unlink such a VMA, and we have pending
+ * TLBI for such a vma, flush things now.
*/
- if (tlb->vma_pfn || !IS_ENABLED(CONFIG_MMU_GATHER_MERGE_VMAS)) {
- /*
- * Do a TLB flush and reset the range at VMA boundaries; this avoids
- * the ranges growing with the unused space between consecutive VMAs.
- */
+ if (tlb->vma_pfn)
tlb_flush_mmu_tlbonly(tlb);
- }
}
/*
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index f7848f73f41c..87b6688f124a 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -113,6 +113,12 @@ struct mem_cgroup_per_node {
CACHELINE_PADDING(_pad2_);
unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
struct mem_cgroup_reclaim_iter iter;
+
+#ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC
+ /* slab stats for nmi context */
+ atomic_t slab_reclaimable;
+ atomic_t slab_unreclaimable;
+#endif
};
struct mem_cgroup_threshold {
@@ -236,6 +242,10 @@ struct mem_cgroup {
atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS];
atomic_long_t memory_events_local[MEMCG_NR_MEMORY_EVENTS];
+#ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC
+ /* MEMCG_KMEM for nmi context */
+ atomic_t kmem_stat;
+#endif
/*
* Hint of reclaim pressure for socket memroy management. Note
* that this indicator should NOT be used in legacy cgroup mode
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9e221ffcb868..0ef2ba0c667a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1276,9 +1276,9 @@ vm_fault_t finish_fault(struct vm_fault *vmf);
* the page's disk buffers. PG_private must be set to tell the VM to call
* into the filesystem to release these pages.
*
- * A page may belong to an inode's memory mapping. In this case, page->mapping
- * is the pointer to the inode, and page->index is the file offset of the page,
- * in units of PAGE_SIZE.
+ * A folio may belong to an inode's memory mapping. In this case,
+ * folio->mapping points to the inode, and folio->index is the file
+ * offset of the folio, in units of PAGE_SIZE.
*
* If pagecache pages are not associated with an inode, they are said to be
* anonymous pages. These may become associated with the swapcache, and in that
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index d3cffd8828c9..d6b91e8a66d6 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -108,7 +108,7 @@ struct page {
/* See page-flags.h for PAGE_MAPPING_FLAGS */
struct address_space *mapping;
union {
- pgoff_t index; /* Our offset within mapping. */
+ pgoff_t __folio_index; /* Our offset within mapping. */
unsigned long share; /* share count for fsdax */
};
/**
@@ -489,7 +489,7 @@ FOLIO_MATCH(flags, flags);
FOLIO_MATCH(lru, lru);
FOLIO_MATCH(mapping, mapping);
FOLIO_MATCH(compound_head, lru);
-FOLIO_MATCH(index, index);
+FOLIO_MATCH(__folio_index, index);
FOLIO_MATCH(private, private);
FOLIO_MATCH(_mapcount, _mapcount);
FOLIO_MATCH(_refcount, _refcount);
@@ -590,7 +590,7 @@ TABLE_MATCH(flags, __page_flags);
TABLE_MATCH(compound_head, pt_list);
TABLE_MATCH(compound_head, _pt_pad_1);
TABLE_MATCH(mapping, __page_mapping);
-TABLE_MATCH(index, pt_index);
+TABLE_MATCH(__folio_index, pt_index);
TABLE_MATCH(rcu_head, pt_rcu_head);
TABLE_MATCH(page_type, __page_type);
TABLE_MATCH(_refcount, __page_refcount);
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index bc2402a45741..d1094c2d5fb6 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -654,9 +654,6 @@ static inline void mmu_notifier_subscriptions_destroy(struct mm_struct *mm)
#define pmdp_clear_flush_young_notify pmdp_clear_flush_young
#define ptep_clear_young_notify ptep_test_and_clear_young
#define pmdp_clear_young_notify pmdp_test_and_clear_young
-#define ptep_clear_flush_notify ptep_clear_flush
-#define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush
-#define pudp_huge_clear_flush_notify pudp_huge_clear_flush
static inline void mmu_notifier_synchronize(void)
{
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 28066b4ced81..283913d42d7b 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -37,6 +37,22 @@
#define NR_PAGE_ORDERS (MAX_PAGE_ORDER + 1)
+/* Defines the order for the number of pages that have a migrate type. */
+#ifndef CONFIG_PAGE_BLOCK_ORDER
+#define PAGE_BLOCK_ORDER MAX_PAGE_ORDER
+#else
+#define PAGE_BLOCK_ORDER CONFIG_PAGE_BLOCK_ORDER
+#endif /* CONFIG_PAGE_BLOCK_ORDER */
+
+/*
+ * The MAX_PAGE_ORDER, which defines the max order of pages to be allocated
+ * by the buddy allocator, has to be larger or equal to the PAGE_BLOCK_ORDER,
+ * which defines the order for the number of pages that can have a migrate type
+ */
+#if (PAGE_BLOCK_ORDER > MAX_PAGE_ORDER)
+#error MAX_PAGE_ORDER must be >= PAGE_BLOCK_ORDER
+#endif
+
/*
* PAGE_ALLOC_COSTLY_ORDER is the order at which allocations are deemed
* costly to service. That is between allocation orders which should
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index fc6b9c87cb0a..e73a4292ef02 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -41,18 +41,18 @@ extern unsigned int pageblock_order;
* Huge pages are a constant size, but don't exceed the maximum allocation
* granularity.
*/
-#define pageblock_order MIN_T(unsigned int, HUGETLB_PAGE_ORDER, MAX_PAGE_ORDER)
+#define pageblock_order MIN_T(unsigned int, HUGETLB_PAGE_ORDER, PAGE_BLOCK_ORDER)
#endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
#elif defined(CONFIG_TRANSPARENT_HUGEPAGE)
-#define pageblock_order MIN_T(unsigned int, HPAGE_PMD_ORDER, MAX_PAGE_ORDER)
+#define pageblock_order MIN_T(unsigned int, HPAGE_PMD_ORDER, PAGE_BLOCK_ORDER)
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
-/* If huge pages are not used, group by MAX_ORDER_NR_PAGES */
-#define pageblock_order MAX_PAGE_ORDER
+/* If huge pages are not used, group by PAGE_BLOCK_ORDER */
+#define pageblock_order PAGE_BLOCK_ORDER
#endif /* CONFIG_HUGETLB_PAGE */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index aa9c5be7a632..4f78a64beb52 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -548,6 +548,10 @@ struct sched_statistics {
u64 nr_failed_migrations_running;
u64 nr_failed_migrations_hot;
u64 nr_forced_migrations;
+#ifdef CONFIG_NUMA_BALANCING
+ u64 numa_task_migrated;
+ u64 numa_task_swapped;
+#endif
u64 nr_wakeups;
u64 nr_wakeups_sync;
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 393d0622cc28..2e86c653186c 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -182,8 +182,6 @@ static inline size_t iov_length(const struct iovec *iov, unsigned long nr_segs)
return ret;
}
-size_t copy_page_from_iter_atomic(struct page *page, size_t offset,
- size_t bytes, struct iov_iter *i);
void iov_iter_advance(struct iov_iter *i, size_t bytes);
void iov_iter_revert(struct iov_iter *i, size_t bytes);
size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t bytes);
@@ -193,6 +191,8 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
struct iov_iter *i);
size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
struct iov_iter *i);
+size_t copy_folio_from_iter_atomic(struct folio *folio, size_t offset,
+ size_t bytes, struct iov_iter *i);
size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i);
@@ -210,12 +210,6 @@ static inline size_t copy_folio_from_iter(struct folio *folio, size_t offset,
return copy_page_from_iter(&folio->page, offset, bytes, i);
}
-static inline size_t copy_folio_from_iter_atomic(struct folio *folio,
- size_t offset, size_t bytes, struct iov_iter *i)
-{
- return copy_page_from_iter_atomic(&folio->page, offset, bytes, i);
-}
-
size_t copy_page_to_iter_nofault(struct page *page, unsigned offset,
size_t bytes, struct iov_iter *i);
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 9e15a088ba38..91a3ce9a2687 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -66,6 +66,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
NUMA_HINT_FAULTS,
NUMA_HINT_FAULTS_LOCAL,
NUMA_PAGE_MIGRATE,
+ NUMA_TASK_MIGRATE,
+ NUMA_TASK_SWAP,
#endif
#ifdef CONFIG_MIGRATION
PGMIGRATE_SUCCESS, PGMIGRATE_FAIL,
diff --git a/init/Kconfig b/init/Kconfig
index 20716189fe68..ab83abe0fd9d 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -992,6 +992,20 @@ config MEMCG
help
Provides control over the memory footprint of tasks in a cgroup.
+config MEMCG_NMI_UNSAFE
+ bool
+ depends on MEMCG
+ depends on HAVE_NMI
+ depends on !ARCH_HAS_NMI_SAFE_THIS_CPU_OPS && !ARCH_HAVE_NMI_SAFE_CMPXCHG
+ default y
+
+config MEMCG_NMI_SAFETY_REQUIRES_ATOMIC
+ bool
+ depends on MEMCG
+ depends on HAVE_NMI
+ depends on !ARCH_HAS_NMI_SAFE_THIS_CPU_OPS && ARCH_HAVE_NMI_SAFE_CMPXCHG
+ default y
+
config MEMCG_V1
bool "Legacy cgroup v1 memory controller"
depends on MEMCG
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index 19a2c65f3d37..565f9717c6ca 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -531,7 +531,7 @@ static u64 get_inode_sequence_number(struct inode *inode)
*
* For shared mappings (when @fshared), the key is:
*
- * ( inode->i_sequence, page->index, offset_within_page )
+ * ( inode->i_sequence, page offset within mapping, offset_within_page )
*
* [ also see get_inode_sequence_number() ]
*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 62b3416f5e43..dce50fa57471 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3362,6 +3362,10 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
#ifdef CONFIG_NUMA_BALANCING
static void __migrate_swap_task(struct task_struct *p, int cpu)
{
+ __schedstat_inc(p->stats.numa_task_swapped);
+ count_vm_numa_event(NUMA_TASK_SWAP);
+ count_memcg_event_mm(p->mm, NUMA_TASK_SWAP);
+
if (task_on_rq_queued(p)) {
struct rq *src_rq, *dst_rq;
struct rq_flags srf, drf;
@@ -7930,8 +7934,9 @@ int migrate_task_to(struct task_struct *p, int target_cpu)
if (!cpumask_test_cpu(target_cpu, p->cpus_ptr))
return -EINVAL;
- /* TODO: This is not properly updating schedstats */
-
+ __schedstat_inc(p->stats.numa_task_migrated);
+ count_vm_numa_event(NUMA_TASK_MIGRATE);
+ count_memcg_event_mm(p->mm, NUMA_TASK_MIGRATE);
trace_sched_move_numa(p, curr_cpu, target_cpu);
return stop_one_cpu(curr_cpu, migration_cpu_stop, &arg);
}
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 557246880a7e..9d71baf08075 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -1210,6 +1210,10 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
P_SCHEDSTAT(nr_failed_migrations_running);
P_SCHEDSTAT(nr_failed_migrations_hot);
P_SCHEDSTAT(nr_forced_migrations);
+#ifdef CONFIG_NUMA_BALANCING
+ P_SCHEDSTAT(numa_task_migrated);
+ P_SCHEDSTAT(numa_task_swapped);
+#endif
P_SCHEDSTAT(nr_wakeups);
P_SCHEDSTAT(nr_wakeups_sync);
P_SCHEDSTAT(nr_wakeups_migrate);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index b4326827e326..7a14da5396fb 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2273,7 +2273,8 @@ static bool task_numa_compare(struct task_numa_env *env,
rcu_read_lock();
cur = rcu_dereference(dst_rq->curr);
- if (cur && ((cur->flags & PF_EXITING) || is_idle_task(cur)))
+ if (cur && ((cur->flags & (PF_EXITING | PF_KTHREAD)) ||
+ !cur->mm))
cur = NULL;
/*
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index d9e19fb2dcf3..969d4ad510df 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -457,38 +457,35 @@ size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
}
EXPORT_SYMBOL(iov_iter_zero);
-size_t copy_page_from_iter_atomic(struct page *page, size_t offset,
+size_t copy_folio_from_iter_atomic(struct folio *folio, size_t offset,
size_t bytes, struct iov_iter *i)
{
size_t n, copied = 0;
- bool uses_kmap = IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP) ||
- PageHighMem(page);
- if (!page_copy_sane(page, offset, bytes))
+ if (!page_copy_sane(&folio->page, offset, bytes))
return 0;
if (WARN_ON_ONCE(!i->data_source))
return 0;
do {
- char *p;
+ char *to = kmap_local_folio(folio, offset);
n = bytes - copied;
- if (uses_kmap) {
- page += offset / PAGE_SIZE;
- offset %= PAGE_SIZE;
- n = min_t(size_t, n, PAGE_SIZE - offset);
- }
-
- p = kmap_atomic(page) + offset;
- n = __copy_from_iter(p, n, i);
- kunmap_atomic(p);
+ if (folio_test_partial_kmap(folio) &&
+ n > PAGE_SIZE - offset_in_page(offset))
+ n = PAGE_SIZE - offset_in_page(offset);
+
+ pagefault_disable();
+ n = __copy_from_iter(to, n, i);
+ pagefault_enable();
+ kunmap_local(to);
copied += n;
offset += n;
- } while (uses_kmap && copied != bytes && n > 0);
+ } while (copied != bytes && n > 0);
return copied;
}
-EXPORT_SYMBOL(copy_page_from_iter_atomic);
+EXPORT_SYMBOL(copy_folio_from_iter_atomic);
static void iov_iter_bvec_advance(struct iov_iter *i, size_t size)
{
diff --git a/mm/Kconfig b/mm/Kconfig
index bd08e151fa1b..f8bb8f070d0d 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -993,6 +993,40 @@ config CMA_AREAS
If unsure, leave the default value "8" in UMA and "20" in NUMA.
+#
+# Select this config option from the architecture Kconfig, if available, to set
+# the max page order for physically contiguous allocations.
+#
+config ARCH_FORCE_MAX_ORDER
+ int
+
+#
+# When ARCH_FORCE_MAX_ORDER is not defined,
+# the default page block order is MAX_PAGE_ORDER (10) as per
+# include/linux/mmzone.h.
+#
+config PAGE_BLOCK_ORDER
+ int "Page Block Order"
+ range 1 10 if ARCH_FORCE_MAX_ORDER = 0
+ default 10 if ARCH_FORCE_MAX_ORDER = 0
+ range 1 ARCH_FORCE_MAX_ORDER if ARCH_FORCE_MAX_ORDER != 0
+ default ARCH_FORCE_MAX_ORDER if ARCH_FORCE_MAX_ORDER != 0
+ help
+ The page block order refers to the power of two number of pages that
+ are physically contiguous and can have a migrate type associated to
+ them. The maximum size of the page block order is limited by
+ ARCH_FORCE_MAX_ORDER.
+
+ This config allows overriding the default page block order when the
+ page block order is required to be smaller than ARCH_FORCE_MAX_ORDER
+ or MAX_PAGE_ORDER.
+
+ Reducing pageblock order can negatively impact THP generation
+ success rate. If your workloads uses THP heavily, please use this
+ option with caution.
+
+ Don't change if unsure.
+
config MEM_SOFT_DIRTY
bool "Track memory changes"
depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS
diff --git a/mm/damon/Kconfig b/mm/damon/Kconfig
index c213cf8b5638..551745df011b 100644
--- a/mm/damon/Kconfig
+++ b/mm/damon/Kconfig
@@ -4,6 +4,7 @@ menu "Data Access Monitoring"
config DAMON
bool "DAMON: Data Access Monitoring Framework"
+ default y
help
This builds a framework that allows kernel subsystems to monitor
access frequency of each memory region. The information can be useful
@@ -28,6 +29,7 @@ config DAMON_VADDR
bool "Data access monitoring operations for virtual address spaces"
depends on DAMON && MMU
select PAGE_IDLE_FLAG
+ default DAMON
help
This builds the default data access monitoring operations for DAMON
that work for virtual address spaces.
@@ -36,6 +38,7 @@ config DAMON_PADDR
bool "Data access monitoring operations for the physical address space"
depends on DAMON && MMU
select PAGE_IDLE_FLAG
+ default DAMON
help
This builds the default data access monitoring operations for DAMON
that works for the physical address space.
@@ -55,6 +58,7 @@ config DAMON_VADDR_KUNIT_TEST
config DAMON_SYSFS
bool "DAMON sysfs interface"
depends on DAMON && SYSFS
+ default DAMON
help
This builds the sysfs interface for DAMON. The user space can use
the interface for arbitrary data access monitoring.
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 0bb71e2ab713..b217e0120e09 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -1093,9 +1093,17 @@ static int damon_commit_targets(
if (err)
return err;
} else {
+ struct damos *s;
+
if (damon_target_has_pid(dst))
put_pid(dst_target->pid);
damon_destroy_target(dst_target);
+ damon_for_each_scheme(s, dst) {
+ if (s->quota.charge_target_from == dst_target) {
+ s->quota.charge_target_from = NULL;
+ s->quota.charge_addr_from = 0;
+ }
+ }
}
}
diff --git a/mm/filemap.c b/mm/filemap.c
index 48c944e2c163..bada249b9fb7 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -142,7 +142,7 @@ static void page_cache_delete(struct address_space *mapping,
xas_init_marks(&xas);
folio->mapping = NULL;
- /* Leave page->index set: truncation lookup relies upon it */
+ /* Leave folio->index set: truncation lookup relies upon it */
mapping->nrpages -= nr;
}
@@ -949,7 +949,7 @@ unlock:
return 0;
error:
folio->mapping = NULL;
- /* Leave page->index set: truncation relies upon it */
+ /* Leave folio->index set: truncation relies upon it */
folio_put_refs(folio, nr);
return xas_error(&xas);
}
diff --git a/mm/gup.c b/mm/gup.c
index 329c5f7acc7a..e065a49842a8 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -3299,7 +3299,7 @@ static unsigned long gup_fast(unsigned long start, unsigned long end,
* include/asm-generic/tlb.h for more details.
*
* We do not adopt an rcu_read_lock() here as we also want to block IPIs
- * that come from THPs splitting.
+ * that come from callers of tlb_remove_table_sync_one().
*/
local_irq_save(flags);
gup_fast_pgd_range(start, end, gup_flags, pages, &nr_pinned);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 32ab14aa4074..f0b1d53079f9 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3741,7 +3741,7 @@ static void __init report_hugepages(void)
string_get_size(huge_page_size(h), 1, STRING_UNITS_2, buf, 32);
pr_info("HugeTLB: registered %s page size, pre-allocated %ld pages\n",
- buf, h->free_huge_pages);
+ buf, h->nr_huge_pages);
if (nrinvalid)
pr_info("HugeTLB: %s page size: %lu invalid page%s discarded\n",
buf, nrinvalid, nrinvalid > 1 ? "s" : "");
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index cdf5a581368b..15203ea7d007 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -548,19 +548,6 @@ static void release_pte_pages(pte_t *pte, pte_t *_pte,
}
}
-static bool is_refcount_suitable(struct folio *folio)
-{
- int expected_refcount = folio_mapcount(folio);
-
- if (!folio_test_anon(folio) || folio_test_swapcache(folio))
- expected_refcount += folio_nr_pages(folio);
-
- if (folio_test_private(folio))
- expected_refcount++;
-
- return folio_ref_count(folio) == expected_refcount;
-}
-
static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
unsigned long address,
pte_t *pte,
@@ -652,7 +639,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
* but not from this process. The other process cannot write to
* the page, only trigger CoW.
*/
- if (!is_refcount_suitable(folio)) {
+ if (folio_expected_ref_count(folio) != folio_ref_count(folio)) {
folio_unlock(folio);
result = SCAN_PAGE_COUNT;
goto out;
@@ -1402,7 +1389,7 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm,
* has excessive GUP pins (i.e. 512). Anyway the same check
* will be done again later the risk seems low.
*/
- if (!is_refcount_suitable(folio)) {
+ if (folio_expected_ref_count(folio) != folio_ref_count(folio)) {
result = SCAN_PAGE_COUNT;
goto out_unmap;
}
@@ -2293,6 +2280,17 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr,
continue;
}
+ if (!folio_try_get(folio)) {
+ xas_reset(&xas);
+ continue;
+ }
+
+ if (unlikely(folio != xas_reload(&xas))) {
+ folio_put(folio);
+ xas_reset(&xas);
+ continue;
+ }
+
if (folio_order(folio) == HPAGE_PMD_ORDER &&
folio->index == start) {
/* Maybe PMD-mapped */
@@ -2303,23 +2301,27 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr,
* it's safe to skip LRU and refcount checks before
* returning.
*/
+ folio_put(folio);
break;
}
node = folio_nid(folio);
if (hpage_collapse_scan_abort(node, cc)) {
result = SCAN_SCAN_ABORT;
+ folio_put(folio);
break;
}
cc->node_load[node]++;
if (!folio_test_lru(folio)) {
result = SCAN_PAGE_LRU;
+ folio_put(folio);
break;
}
- if (!is_refcount_suitable(folio)) {
+ if (folio_expected_ref_count(folio) + 1 != folio_ref_count(folio)) {
result = SCAN_PAGE_COUNT;
+ folio_put(folio);
break;
}
@@ -2331,6 +2333,7 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr,
*/
present += folio_nr_pages(folio);
+ folio_put(folio);
if (need_resched()) {
xas_pause(&xas);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b90aa3075950..902da8a9c643 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -474,6 +474,8 @@ static const unsigned int memcg_vm_event_stat[] = {
NUMA_PAGE_MIGRATE,
NUMA_PTE_UPDATES,
NUMA_HINT_FAULTS,
+ NUMA_TASK_MIGRATE,
+ NUMA_TASK_SWAP,
#endif
};
@@ -531,7 +533,7 @@ struct memcg_vmstats {
unsigned long events_pending[NR_MEMCG_EVENTS];
/* Stats updates since the last flush */
- atomic64_t stats_updates;
+ atomic_t stats_updates;
};
/*
@@ -557,7 +559,7 @@ static u64 flush_last_time;
static bool memcg_vmstats_needs_flush(struct memcg_vmstats *vmstats)
{
- return atomic64_read(&vmstats->stats_updates) >
+ return atomic_read(&vmstats->stats_updates) >
MEMCG_CHARGE_BATCH * num_online_cpus();
}
@@ -571,7 +573,9 @@ static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val,
if (!val)
return;
- css_rstat_updated(&memcg->css, cpu);
+ /* TODO: add to cgroup update tree once it is nmi-safe. */
+ if (!in_nmi())
+ css_rstat_updated(&memcg->css, cpu);
statc_pcpu = memcg->vmstats_percpu;
for (; statc_pcpu; statc_pcpu = statc->parent_pcpu) {
statc = this_cpu_ptr(statc_pcpu);
@@ -589,7 +593,7 @@ static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val,
continue;
stats_updates = this_cpu_xchg(statc_pcpu->stats_updates, 0);
- atomic64_add(stats_updates, &statc->vmstats->stats_updates);
+ atomic_add(stats_updates, &statc->vmstats->stats_updates);
}
}
@@ -597,7 +601,7 @@ static void __mem_cgroup_flush_stats(struct mem_cgroup *memcg, bool force)
{
bool needs_flush = memcg_vmstats_needs_flush(memcg->vmstats);
- trace_memcg_flush_stats(memcg, atomic64_read(&memcg->vmstats->stats_updates),
+ trace_memcg_flush_stats(memcg, atomic_read(&memcg->vmstats->stats_updates),
force, needs_flush);
if (!force && !needs_flush)
@@ -2513,17 +2517,47 @@ static void commit_charge(struct folio *folio, struct mem_cgroup *memcg)
folio->memcg_data = (unsigned long)memcg;
}
+#ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC
+static inline void account_slab_nmi_safe(struct mem_cgroup *memcg,
+ struct pglist_data *pgdat,
+ enum node_stat_item idx, int nr)
+{
+ struct lruvec *lruvec;
+
+ if (likely(!in_nmi())) {
+ lruvec = mem_cgroup_lruvec(memcg, pgdat);
+ mod_memcg_lruvec_state(lruvec, idx, nr);
+ } else {
+ struct mem_cgroup_per_node *pn = memcg->nodeinfo[pgdat->node_id];
+
+ /* TODO: add to cgroup update tree once it is nmi-safe. */
+ if (idx == NR_SLAB_RECLAIMABLE_B)
+ atomic_add(nr, &pn->slab_reclaimable);
+ else
+ atomic_add(nr, &pn->slab_unreclaimable);
+ }
+}
+#else
+static inline void account_slab_nmi_safe(struct mem_cgroup *memcg,
+ struct pglist_data *pgdat,
+ enum node_stat_item idx, int nr)
+{
+ struct lruvec *lruvec;
+
+ lruvec = mem_cgroup_lruvec(memcg, pgdat);
+ mod_memcg_lruvec_state(lruvec, idx, nr);
+}
+#endif
+
static inline void mod_objcg_mlstate(struct obj_cgroup *objcg,
struct pglist_data *pgdat,
enum node_stat_item idx, int nr)
{
struct mem_cgroup *memcg;
- struct lruvec *lruvec;
rcu_read_lock();
memcg = obj_cgroup_memcg(objcg);
- lruvec = mem_cgroup_lruvec(memcg, pgdat);
- mod_memcg_lruvec_state(lruvec, idx, nr);
+ account_slab_nmi_safe(memcg, pgdat, idx, nr);
rcu_read_unlock();
}
@@ -2648,6 +2682,9 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void)
struct mem_cgroup *memcg;
struct obj_cgroup *objcg;
+ if (IS_ENABLED(CONFIG_MEMCG_NMI_UNSAFE) && in_nmi())
+ return NULL;
+
if (in_task()) {
memcg = current->active_memcg;
if (unlikely(memcg))
@@ -2710,6 +2747,23 @@ struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio)
return objcg;
}
+#ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC
+static inline void account_kmem_nmi_safe(struct mem_cgroup *memcg, int val)
+{
+ if (likely(!in_nmi())) {
+ mod_memcg_state(memcg, MEMCG_KMEM, val);
+ } else {
+ /* TODO: add to cgroup update tree once it is nmi-safe. */
+ atomic_add(val, &memcg->kmem_stat);
+ }
+}
+#else
+static inline void account_kmem_nmi_safe(struct mem_cgroup *memcg, int val)
+{
+ mod_memcg_state(memcg, MEMCG_KMEM, val);
+}
+#endif
+
/*
* obj_cgroup_uncharge_pages: uncharge a number of kernel pages from a objcg
* @objcg: object cgroup to uncharge
@@ -2722,7 +2776,7 @@ static void obj_cgroup_uncharge_pages(struct obj_cgroup *objcg,
memcg = get_mem_cgroup_from_objcg(objcg);
- mod_memcg_state(memcg, MEMCG_KMEM, -nr_pages);
+ account_kmem_nmi_safe(memcg, -nr_pages);
memcg1_account_kmem(memcg, -nr_pages);
if (!mem_cgroup_is_root(memcg))
refill_stock(memcg, nr_pages);
@@ -2750,7 +2804,7 @@ static int obj_cgroup_charge_pages(struct obj_cgroup *objcg, gfp_t gfp,
if (ret)
goto out;
- mod_memcg_state(memcg, MEMCG_KMEM, nr_pages);
+ account_kmem_nmi_safe(memcg, nr_pages);
memcg1_account_kmem(memcg, nr_pages);
out:
css_put(&memcg->css);
@@ -3961,6 +4015,53 @@ static void mem_cgroup_stat_aggregate(struct aggregate_control *ac)
}
}
+#ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC
+static void flush_nmi_stats(struct mem_cgroup *memcg, struct mem_cgroup *parent,
+ int cpu)
+{
+ int nid;
+
+ if (atomic_read(&memcg->kmem_stat)) {
+ int kmem = atomic_xchg(&memcg->kmem_stat, 0);
+ int index = memcg_stats_index(MEMCG_KMEM);
+
+ memcg->vmstats->state[index] += kmem;
+ if (parent)
+ parent->vmstats->state_pending[index] += kmem;
+ }
+
+ for_each_node_state(nid, N_MEMORY) {
+ struct mem_cgroup_per_node *pn = memcg->nodeinfo[nid];
+ struct lruvec_stats *lstats = pn->lruvec_stats;
+ struct lruvec_stats *plstats = NULL;
+
+ if (parent)
+ plstats = parent->nodeinfo[nid]->lruvec_stats;
+
+ if (atomic_read(&pn->slab_reclaimable)) {
+ int slab = atomic_xchg(&pn->slab_reclaimable, 0);
+ int index = memcg_stats_index(NR_SLAB_RECLAIMABLE_B);
+
+ lstats->state[index] += slab;
+ if (plstats)
+ plstats->state_pending[index] += slab;
+ }
+ if (atomic_read(&pn->slab_unreclaimable)) {
+ int slab = atomic_xchg(&pn->slab_unreclaimable, 0);
+ int index = memcg_stats_index(NR_SLAB_UNRECLAIMABLE_B);
+
+ lstats->state[index] += slab;
+ if (plstats)
+ plstats->state_pending[index] += slab;
+ }
+ }
+}
+#else
+static void flush_nmi_stats(struct mem_cgroup *memcg, struct mem_cgroup *parent,
+ int cpu)
+{}
+#endif
+
static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
@@ -3969,6 +4070,8 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
struct aggregate_control ac;
int nid;
+ flush_nmi_stats(memcg, parent, cpu);
+
statc = per_cpu_ptr(memcg->vmstats_percpu, cpu);
ac = (struct aggregate_control) {
@@ -4018,8 +4121,8 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
}
WRITE_ONCE(statc->stats_updates, 0);
/* We are in a per-cpu loop here, only do the atomic write once */
- if (atomic64_read(&memcg->vmstats->stats_updates))
- atomic64_set(&memcg->vmstats->stats_updates, 0);
+ if (atomic_read(&memcg->vmstats->stats_updates))
+ atomic_set(&memcg->vmstats->stats_updates, 0);
}
static void mem_cgroup_fork(struct task_struct *task)
diff --git a/mm/memory.c b/mm/memory.c
index 5cb48f262ab0..8eba595056fe 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -358,6 +358,8 @@ void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas,
{
struct unlink_vma_file_batch vb;
+ tlb_free_vmas(tlb);
+
do {
unsigned long addr = vma->vm_start;
struct vm_area_struct *next;
@@ -4668,8 +4670,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
/*
* KSM sometimes has to copy on read faults, for example, if
- * page->index of !PageKSM() pages would be nonlinear inside the
- * anon VMA -- PageKSM() is lost on actual swapout.
+ * folio->index of non-ksm folios would be nonlinear inside the
+ * anon VMA -- the ksm flag is lost on actual swapout.
*/
folio = ksm_might_need_to_copy(folio, vma, vmf->address);
if (unlikely(!folio)) {
diff --git a/mm/mm_init.c b/mm/mm_init.c
index f0bd0830daad..f2944748f526 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -1509,7 +1509,7 @@ static inline void setup_usemap(struct zone *zone) {}
/* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */
void __init set_pageblock_order(void)
{
- unsigned int order = MAX_PAGE_ORDER;
+ unsigned int order = PAGE_BLOCK_ORDER;
/* Check that pageblock_nr_pages has not already been setup */
if (pageblock_order)
diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index db7ba4a725d6..b49cc6385f1f 100644
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -424,6 +424,7 @@ static void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
#ifdef CONFIG_MMU_GATHER_PAGE_SIZE
tlb->page_size = 0;
#endif
+ tlb->vma_pfn = 0;
__tlb_reset_range(tlb);
inc_tlb_flush_pending(tlb->mm);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index b603a59cf8f7..b8eea5b3c064 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2565,11 +2565,11 @@ struct folio *writeback_iter(struct address_space *mapping,
if (!folio) {
/*
* To avoid deadlocks between range_cyclic writeback and callers
- * that hold pages in PageWriteback to aggregate I/O until
+ * that hold folios in writeback to aggregate I/O until
* the writeback iteration finishes, we do not loop back to the
- * start of the file. Doing so causes a page lock/page
+ * start of the file. Doing so causes a folio lock/folio
* writeback access order inversion - we should only ever lock
- * multiple pages in ascending page->index order, and looping
+ * multiple folios in ascending folio->index order, and looping
* back to the start of the file violates that rule and causes
* deadlocks.
*/
diff --git a/mm/shmem.c b/mm/shmem.c
index 858cee02ca49..0c5fb4ffa03a 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1446,8 +1446,6 @@ static int shmem_unuse_swap_entries(struct inode *inode,
for (i = 0; i < folio_batch_count(fbatch); i++) {
struct folio *folio = fbatch->folios[i];
- if (!xa_is_value(folio))
- continue;
error = shmem_swapin_folio(inode, indices[i], &folio, SGP_CACHE,
mapping_gfp_mask(mapping), NULL, NULL);
if (error == 0) {
@@ -1505,6 +1503,7 @@ int shmem_unuse(unsigned int type)
return 0;
mutex_lock(&shmem_swaplist_mutex);
+start_over:
list_for_each_entry_safe(info, next, &shmem_swaplist, swaplist) {
if (!info->swapped) {
list_del_init(&info->swaplist);
@@ -1523,13 +1522,15 @@ int shmem_unuse(unsigned int type)
cond_resched();
mutex_lock(&shmem_swaplist_mutex);
- next = list_next_entry(info, swaplist);
- if (!info->swapped)
- list_del_init(&info->swaplist);
if (atomic_dec_and_test(&info->stop_eviction))
wake_up_var(&info->stop_eviction);
if (error)
break;
+ if (list_empty(&info->swaplist))
+ goto start_over;
+ next = list_next_entry(info, swaplist);
+ if (!info->swapped)
+ list_del_init(&info->swaplist);
}
mutex_unlock(&shmem_swaplist_mutex);
@@ -1643,8 +1644,8 @@ try_split:
BUG_ON(folio_mapped(folio));
return swap_writeout(folio, wbc);
}
-
- list_del_init(&info->swaplist);
+ if (!info->swapped)
+ list_del_init(&info->swaplist);
mutex_unlock(&shmem_swaplist_mutex);
if (nr_pages > 1)
goto try_split;
@@ -2331,6 +2332,8 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
*/
split_order = shmem_split_large_entry(inode, index, swap, gfp);
if (split_order < 0) {
+ folio_put(folio);
+ folio = NULL;
error = split_order;
goto failed;
}
@@ -5805,12 +5808,12 @@ static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name,
if (size < 0 || size > MAX_LFS_FILESIZE)
return ERR_PTR(-EINVAL);
- if (shmem_acct_size(flags, size))
- return ERR_PTR(-ENOMEM);
-
if (is_idmapped_mnt(mnt))
return ERR_PTR(-EINVAL);
+ if (shmem_acct_size(flags, size))
+ return ERR_PTR(-ENOMEM);
+
inode = shmem_get_inode(&nop_mnt_idmap, mnt->mnt_sb, NULL,
S_IFREG | S_IRWXUGO, 0, flags);
if (IS_ERR(inode)) {
diff --git a/mm/truncate.c b/mm/truncate.c
index f2aaf99f2990..91eb92a5ce4f 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -425,7 +425,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
for (i = 0; i < folio_batch_count(&fbatch); i++) {
struct folio *folio = fbatch.folios[i];
- /* We rely upon deletion not changing page->index */
+ /* We rely upon deletion not changing folio->index */
if (xa_is_value(folio))
continue;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index d888c248d99f..6f740f070b3d 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1347,6 +1347,8 @@ const char * const vmstat_text[] = {
"numa_hint_faults",
"numa_hint_faults_local",
"numa_pages_migrated",
+ "numa_task_migrated",
+ "numa_task_swapped",
#endif
#ifdef CONFIG_MIGRATION
"pgmigrate_success",
diff --git a/mm/zpdesc.h b/mm/zpdesc.h
index 57e7a4d6c6ca..d3df316e5bb7 100644
--- a/mm/zpdesc.h
+++ b/mm/zpdesc.h
@@ -54,8 +54,8 @@ struct zpdesc {
ZPDESC_MATCH(flags, flags);
ZPDESC_MATCH(lru, lru);
ZPDESC_MATCH(mapping, movable_ops);
-ZPDESC_MATCH(index, next);
-ZPDESC_MATCH(index, handle);
+ZPDESC_MATCH(__folio_index, next);
+ZPDESC_MATCH(__folio_index, handle);
ZPDESC_MATCH(private, zspage);
ZPDESC_MATCH(page_type, first_obj_offset);
ZPDESC_MATCH(_refcount, _refcount);
diff --git a/rust/Makefile b/rust/Makefile
index 3aca903a7d08..80c84749d734 100644
--- a/rust/Makefile
+++ b/rust/Makefile
@@ -492,6 +492,7 @@ $(obj)/core.o: $(RUST_LIB_SRC)/core/src/lib.rs \
ifneq ($(or $(CONFIG_X86_64),$(CONFIG_X86_32)),)
$(obj)/core.o: scripts/target.json
endif
+KCOV_INSTRUMENT_core.o := n
$(obj)/compiler_builtins.o: private skip_gendwarfksyms = 1
$(obj)/compiler_builtins.o: private rustc_objcopy = -w -W '__*'
diff --git a/rust/kernel/mm.rs b/rust/kernel/mm.rs
index 615907a0f3b4..43f525c0d16c 100644
--- a/rust/kernel/mm.rs
+++ b/rust/kernel/mm.rs
@@ -10,7 +10,6 @@
//! control what happens when userspace reads or writes to that region of memory.
//!
//! C header: [`include/linux/mm.h`](srctree/include/linux/mm.h)
-#![cfg(CONFIG_MMU)]
use crate::{
bindings,
@@ -21,6 +20,10 @@ use core::{ops::Deref, ptr::NonNull};
pub mod virt;
use virt::VmaRef;
+#[cfg(CONFIG_MMU)]
+pub use mmput_async::MmWithUserAsync;
+mod mmput_async;
+
/// A wrapper for the kernel's `struct mm_struct`.
///
/// This represents the address space of a userspace process, so each process has one `Mm`
@@ -111,50 +114,6 @@ impl Deref for MmWithUser {
}
}
-/// A wrapper for the kernel's `struct mm_struct`.
-///
-/// This type is identical to `MmWithUser` except that it uses `mmput_async` when dropping a
-/// refcount. This means that the destructor of `ARef<MmWithUserAsync>` is safe to call in atomic
-/// context.
-///
-/// # Invariants
-///
-/// Values of this type are always refcounted using `mmget`. The value of `mm_users` is non-zero.
-#[repr(transparent)]
-pub struct MmWithUserAsync {
- mm: MmWithUser,
-}
-
-// SAFETY: It is safe to call `mmput_async` on another thread than where `mmget` was called.
-unsafe impl Send for MmWithUserAsync {}
-// SAFETY: All methods on `MmWithUserAsync` can be called in parallel from several threads.
-unsafe impl Sync for MmWithUserAsync {}
-
-// SAFETY: By the type invariants, this type is always refcounted.
-unsafe impl AlwaysRefCounted for MmWithUserAsync {
- #[inline]
- fn inc_ref(&self) {
- // SAFETY: The pointer is valid since self is a reference.
- unsafe { bindings::mmget(self.as_raw()) };
- }
-
- #[inline]
- unsafe fn dec_ref(obj: NonNull<Self>) {
- // SAFETY: The caller is giving up their refcount.
- unsafe { bindings::mmput_async(obj.cast().as_ptr()) };
- }
-}
-
-// Make all `MmWithUser` methods available on `MmWithUserAsync`.
-impl Deref for MmWithUserAsync {
- type Target = MmWithUser;
-
- #[inline]
- fn deref(&self) -> &MmWithUser {
- &self.mm
- }
-}
-
// These methods are safe to call even if `mm_users` is zero.
impl Mm {
/// Returns a raw pointer to the inner `mm_struct`.
@@ -206,13 +165,6 @@ impl MmWithUser {
unsafe { &*ptr.cast() }
}
- /// Use `mmput_async` when dropping this refcount.
- #[inline]
- pub fn into_mmput_async(me: ARef<MmWithUser>) -> ARef<MmWithUserAsync> {
- // SAFETY: The layouts and invariants are compatible.
- unsafe { ARef::from_raw(ARef::into_raw(me).cast()) }
- }
-
/// Attempt to access a vma using the vma read lock.
///
/// This is an optimistic trylock operation, so it may fail if there is contention. In that
diff --git a/rust/kernel/mm/mmput_async.rs b/rust/kernel/mm/mmput_async.rs
new file mode 100644
index 000000000000..9289e05f7a67
--- /dev/null
+++ b/rust/kernel/mm/mmput_async.rs
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2024 Google LLC.
+
+//! Version of `MmWithUser` using `mmput_async`.
+//!
+//! This is a separate file from `mm.rs` due to the dependency on `CONFIG_MMU=y`.
+#![cfg(CONFIG_MMU)]
+
+use crate::{
+ bindings,
+ mm::MmWithUser,
+ types::{ARef, AlwaysRefCounted},
+};
+use core::{ops::Deref, ptr::NonNull};
+
+/// A wrapper for the kernel's `struct mm_struct`.
+///
+/// This type is identical to `MmWithUser` except that it uses `mmput_async` when dropping a
+/// refcount. This means that the destructor of `ARef<MmWithUserAsync>` is safe to call in atomic
+/// context.
+///
+/// # Invariants
+///
+/// Values of this type are always refcounted using `mmget`. The value of `mm_users` is non-zero.
+#[repr(transparent)]
+pub struct MmWithUserAsync {
+ mm: MmWithUser,
+}
+
+// SAFETY: It is safe to call `mmput_async` on another thread than where `mmget` was called.
+unsafe impl Send for MmWithUserAsync {}
+// SAFETY: All methods on `MmWithUserAsync` can be called in parallel from several threads.
+unsafe impl Sync for MmWithUserAsync {}
+
+// SAFETY: By the type invariants, this type is always refcounted.
+unsafe impl AlwaysRefCounted for MmWithUserAsync {
+ #[inline]
+ fn inc_ref(&self) {
+ // SAFETY: The pointer is valid since self is a reference.
+ unsafe { bindings::mmget(self.as_raw()) };
+ }
+
+ #[inline]
+ unsafe fn dec_ref(obj: NonNull<Self>) {
+ // SAFETY: The caller is giving up their refcount.
+ unsafe { bindings::mmput_async(obj.cast().as_ptr()) };
+ }
+}
+
+// Make all `MmWithUser` methods available on `MmWithUserAsync`.
+impl Deref for MmWithUserAsync {
+ type Target = MmWithUser;
+
+ #[inline]
+ fn deref(&self) -> &MmWithUser {
+ &self.mm
+ }
+}
+
+impl MmWithUser {
+ /// Use `mmput_async` when dropping this refcount.
+ #[inline]
+ pub fn into_mmput_async(me: ARef<MmWithUser>) -> ARef<MmWithUserAsync> {
+ // SAFETY: The layouts and invariants are compatible.
+ unsafe { ARef::from_raw(ARef::into_raw(me).cast()) }
+ }
+}
diff --git a/scripts/Makefile.kcov b/scripts/Makefile.kcov
index 01616472f43e..78305a84ba9d 100644
--- a/scripts/Makefile.kcov
+++ b/scripts/Makefile.kcov
@@ -2,4 +2,10 @@
kcov-flags-y += -fsanitize-coverage=trace-pc
kcov-flags-$(CONFIG_KCOV_ENABLE_COMPARISONS) += -fsanitize-coverage=trace-cmp
+kcov-rflags-y += -Cpasses=sancov-module
+kcov-rflags-y += -Cllvm-args=-sanitizer-coverage-level=3
+kcov-rflags-y += -Cllvm-args=-sanitizer-coverage-trace-pc
+kcov-rflags-$(CONFIG_KCOV_ENABLE_COMPARISONS) += -Cllvm-args=-sanitizer-coverage-trace-compares
+
export CFLAGS_KCOV := $(kcov-flags-y)
+export RUSTFLAGS_KCOV := $(kcov-rflags-y)
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 6fc2a82ee3bb..2b332645e0c2 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -169,6 +169,9 @@ ifeq ($(CONFIG_KCOV),y)
_c_flags += $(if $(patsubst n%,, \
$(KCOV_INSTRUMENT_$(target-stem).o)$(KCOV_INSTRUMENT)$(if $(is-kernel-object),$(CONFIG_KCOV_INSTRUMENT_ALL))), \
$(CFLAGS_KCOV))
+_rust_flags += $(if $(patsubst n%,, \
+ $(KCOV_INSTRUMENT_$(target-stem).o)$(KCOV_INSTRUMENT)$(if $(is-kernel-object),$(CONFIG_KCOV_INSTRUMENT_ALL))), \
+ $(RUSTFLAGS_KCOV))
endif
#
diff --git a/tools/testing/selftests/damon/_damon_sysfs.py b/tools/testing/selftests/damon/_damon_sysfs.py
index 1e587e0b1a39..5b1cb6b3ce4e 100644
--- a/tools/testing/selftests/damon/_damon_sysfs.py
+++ b/tools/testing/selftests/damon/_damon_sysfs.py
@@ -15,6 +15,10 @@ if sysfs_root is None:
print('Seems sysfs not mounted?')
exit(ksft_skip)
+if not os.path.exists(sysfs_root):
+ print('Seems DAMON disabled?')
+ exit(ksft_skip)
+
def write_file(path, string):
"Returns error string if failed, or None otherwise"
string = '%s' % string
diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c
index b6cfe0a4b7df..dbbcc5eb3dce 100644
--- a/tools/testing/selftests/mm/cow.c
+++ b/tools/testing/selftests/mm/cow.c
@@ -112,9 +112,12 @@ struct comm_pipes {
static int setup_comm_pipes(struct comm_pipes *comm_pipes)
{
- if (pipe(comm_pipes->child_ready) < 0)
+ if (pipe(comm_pipes->child_ready) < 0) {
+ ksft_perror("pipe()");
return -errno;
+ }
if (pipe(comm_pipes->parent_ready) < 0) {
+ ksft_perror("pipe()");
close(comm_pipes->child_ready[0]);
close(comm_pipes->child_ready[1]);
return -errno;
@@ -207,13 +210,14 @@ static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect,
ret = setup_comm_pipes(&comm_pipes);
if (ret) {
- ksft_test_result_fail("pipe() failed\n");
+ log_test_result(KSFT_FAIL);
return;
}
ret = fork();
if (ret < 0) {
- ksft_test_result_fail("fork() failed\n");
+ ksft_perror("fork() failed");
+ log_test_result(KSFT_FAIL);
goto close_comm_pipes;
} else if (!ret) {
exit(fn(mem, size, &comm_pipes));
@@ -228,9 +232,18 @@ static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect,
* write-faults by directly mapping pages writable.
*/
ret = mprotect(mem, size, PROT_READ);
- ret |= mprotect(mem, size, PROT_READ|PROT_WRITE);
if (ret) {
- ksft_test_result_fail("mprotect() failed\n");
+ ksft_perror("mprotect() failed");
+ log_test_result(KSFT_FAIL);
+ write(comm_pipes.parent_ready[1], "0", 1);
+ wait(&ret);
+ goto close_comm_pipes;
+ }
+
+ ret = mprotect(mem, size, PROT_READ|PROT_WRITE);
+ if (ret) {
+ ksft_perror("mprotect() failed");
+ log_test_result(KSFT_FAIL);
write(comm_pipes.parent_ready[1], "0", 1);
wait(&ret);
goto close_comm_pipes;
@@ -248,16 +261,16 @@ static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect,
ret = -EINVAL;
if (!ret) {
- ksft_test_result_pass("No leak from parent into child\n");
+ log_test_result(KSFT_PASS);
} else if (xfail) {
/*
* With hugetlb, some vmsplice() tests are currently expected to
* fail because (a) harder to fix and (b) nobody really cares.
* Flag them as expected failure for now.
*/
- ksft_test_result_xfail("Leak from parent into child\n");
+ log_test_result(KSFT_XFAIL);
} else {
- ksft_test_result_fail("Leak from parent into child\n");
+ log_test_result(KSFT_FAIL);
}
close_comm_pipes:
close_comm_pipes(&comm_pipes);
@@ -306,26 +319,29 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size,
ret = setup_comm_pipes(&comm_pipes);
if (ret) {
- ksft_test_result_fail("pipe() failed\n");
+ log_test_result(KSFT_FAIL);
goto free;
}
if (pipe(fds) < 0) {
- ksft_test_result_fail("pipe() failed\n");
+ ksft_perror("pipe() failed");
+ log_test_result(KSFT_FAIL);
goto close_comm_pipes;
}
if (before_fork) {
transferred = vmsplice(fds[1], &iov, 1, 0);
if (transferred <= 0) {
- ksft_test_result_fail("vmsplice() failed\n");
+ ksft_print_msg("vmsplice() failed\n");
+ log_test_result(KSFT_FAIL);
goto close_pipe;
}
}
ret = fork();
if (ret < 0) {
- ksft_test_result_fail("fork() failed\n");
+ ksft_perror("fork() failed\n");
+ log_test_result(KSFT_FAIL);
goto close_pipe;
} else if (!ret) {
write(comm_pipes.child_ready[1], "0", 1);
@@ -339,7 +355,8 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size,
if (!before_fork) {
transferred = vmsplice(fds[1], &iov, 1, 0);
if (transferred <= 0) {
- ksft_test_result_fail("vmsplice() failed\n");
+ ksft_perror("vmsplice() failed");
+ log_test_result(KSFT_FAIL);
wait(&ret);
goto close_pipe;
}
@@ -348,7 +365,8 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size,
while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
;
if (munmap(mem, size) < 0) {
- ksft_test_result_fail("munmap() failed\n");
+ ksft_perror("munmap() failed");
+ log_test_result(KSFT_FAIL);
goto close_pipe;
}
write(comm_pipes.parent_ready[1], "0", 1);
@@ -356,7 +374,8 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size,
/* Wait until the child is done writing. */
wait(&ret);
if (!WIFEXITED(ret)) {
- ksft_test_result_fail("wait() failed\n");
+ ksft_perror("wait() failed");
+ log_test_result(KSFT_FAIL);
goto close_pipe;
}
@@ -364,22 +383,23 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size,
for (total = 0; total < transferred; total += cur) {
cur = read(fds[0], new + total, transferred - total);
if (cur < 0) {
- ksft_test_result_fail("read() failed\n");
+ ksft_perror("read() failed");
+ log_test_result(KSFT_FAIL);
goto close_pipe;
}
}
if (!memcmp(old, new, transferred)) {
- ksft_test_result_pass("No leak from child into parent\n");
+ log_test_result(KSFT_PASS);
} else if (xfail) {
/*
* With hugetlb, some vmsplice() tests are currently expected to
* fail because (a) harder to fix and (b) nobody really cares.
* Flag them as expected failure for now.
*/
- ksft_test_result_xfail("Leak from child into parent\n");
+ log_test_result(KSFT_XFAIL);
} else {
- ksft_test_result_fail("Leak from child into parent\n");
+ log_test_result(KSFT_FAIL);
}
close_pipe:
close(fds[0]);
@@ -416,13 +436,14 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork)
ret = setup_comm_pipes(&comm_pipes);
if (ret) {
- ksft_test_result_fail("pipe() failed\n");
+ log_test_result(KSFT_FAIL);
return;
}
file = tmpfile();
if (!file) {
- ksft_test_result_fail("tmpfile() failed\n");
+ ksft_perror("tmpfile() failed");
+ log_test_result(KSFT_FAIL);
goto close_comm_pipes;
}
fd = fileno(file);
@@ -430,14 +451,16 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork)
tmp = malloc(size);
if (!tmp) {
- ksft_test_result_fail("malloc() failed\n");
+ ksft_print_msg("malloc() failed\n");
+ log_test_result(KSFT_FAIL);
goto close_file;
}
/* Skip on errors, as we might just lack kernel support. */
ret = io_uring_queue_init(1, &ring, 0);
if (ret < 0) {
- ksft_test_result_skip("io_uring_queue_init() failed\n");
+ ksft_print_msg("io_uring_queue_init() failed\n");
+ log_test_result(KSFT_SKIP);
goto free_tmp;
}
@@ -452,7 +475,8 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork)
iov.iov_len = size;
ret = io_uring_register_buffers(&ring, &iov, 1);
if (ret) {
- ksft_test_result_skip("io_uring_register_buffers() failed\n");
+ ksft_print_msg("io_uring_register_buffers() failed\n");
+ log_test_result(KSFT_SKIP);
goto queue_exit;
}
@@ -463,7 +487,8 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork)
*/
ret = fork();
if (ret < 0) {
- ksft_test_result_fail("fork() failed\n");
+ ksft_perror("fork() failed");
+ log_test_result(KSFT_FAIL);
goto unregister_buffers;
} else if (!ret) {
write(comm_pipes.child_ready[1], "0", 1);
@@ -483,10 +508,17 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork)
* if the page is mapped R/O vs. R/W).
*/
ret = mprotect(mem, size, PROT_READ);
+ if (ret) {
+ ksft_perror("mprotect() failed");
+ log_test_result(KSFT_FAIL);
+ goto unregister_buffers;
+ }
+
clear_softdirty();
- ret |= mprotect(mem, size, PROT_READ | PROT_WRITE);
+ ret = mprotect(mem, size, PROT_READ | PROT_WRITE);
if (ret) {
- ksft_test_result_fail("mprotect() failed\n");
+ ksft_perror("mprotect() failed");
+ log_test_result(KSFT_FAIL);
goto unregister_buffers;
}
}
@@ -498,25 +530,29 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork)
memset(mem, 0xff, size);
sqe = io_uring_get_sqe(&ring);
if (!sqe) {
- ksft_test_result_fail("io_uring_get_sqe() failed\n");
+ ksft_print_msg("io_uring_get_sqe() failed\n");
+ log_test_result(KSFT_FAIL);
goto quit_child;
}
io_uring_prep_write_fixed(sqe, fd, mem, size, 0, 0);
ret = io_uring_submit(&ring);
if (ret < 0) {
- ksft_test_result_fail("io_uring_submit() failed\n");
+ ksft_print_msg("io_uring_submit() failed\n");
+ log_test_result(KSFT_FAIL);
goto quit_child;
}
ret = io_uring_wait_cqe(&ring, &cqe);
if (ret < 0) {
- ksft_test_result_fail("io_uring_wait_cqe() failed\n");
+ ksft_print_msg("io_uring_wait_cqe() failed\n");
+ log_test_result(KSFT_FAIL);
goto quit_child;
}
if (cqe->res != size) {
- ksft_test_result_fail("write_fixed failed\n");
+ ksft_print_msg("write_fixed failed\n");
+ log_test_result(KSFT_FAIL);
goto quit_child;
}
io_uring_cqe_seen(&ring, cqe);
@@ -526,15 +562,18 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork)
while (total < size) {
cur = pread(fd, tmp + total, size - total, total);
if (cur < 0) {
- ksft_test_result_fail("pread() failed\n");
+ ksft_print_msg("pread() failed\n");
+ log_test_result(KSFT_FAIL);
goto quit_child;
}
total += cur;
}
/* Finally, check if we read what we expected. */
- ksft_test_result(!memcmp(mem, tmp, size),
- "Longterm R/W pin is reliable\n");
+ if (!memcmp(mem, tmp, size))
+ log_test_result(KSFT_PASS);
+ else
+ log_test_result(KSFT_FAIL);
quit_child:
if (use_fork) {
@@ -582,19 +621,21 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test,
int ret;
if (gup_fd < 0) {
- ksft_test_result_skip("gup_test not available\n");
+ ksft_print_msg("gup_test not available\n");
+ log_test_result(KSFT_SKIP);
return;
}
tmp = malloc(size);
if (!tmp) {
- ksft_test_result_fail("malloc() failed\n");
+ ksft_print_msg("malloc() failed\n");
+ log_test_result(KSFT_FAIL);
return;
}
ret = setup_comm_pipes(&comm_pipes);
if (ret) {
- ksft_test_result_fail("pipe() failed\n");
+ log_test_result(KSFT_FAIL);
goto free_tmp;
}
@@ -609,7 +650,8 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test,
*/
ret = fork();
if (ret < 0) {
- ksft_test_result_fail("fork() failed\n");
+ ksft_perror("fork() failed");
+ log_test_result(KSFT_FAIL);
goto close_comm_pipes;
} else if (!ret) {
write(comm_pipes.child_ready[1], "0", 1);
@@ -646,7 +688,8 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test,
clear_softdirty();
ret |= mprotect(mem, size, PROT_READ | PROT_WRITE);
if (ret) {
- ksft_test_result_fail("mprotect() failed\n");
+ ksft_perror("mprotect() failed");
+ log_test_result(KSFT_FAIL);
goto close_comm_pipes;
}
break;
@@ -661,9 +704,11 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test,
ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args);
if (ret) {
if (errno == EINVAL)
- ksft_test_result_skip("PIN_LONGTERM_TEST_START failed\n");
+ ret = KSFT_SKIP;
else
- ksft_test_result_fail("PIN_LONGTERM_TEST_START failed\n");
+ ret = KSFT_FAIL;
+ ksft_perror("PIN_LONGTERM_TEST_START failed");
+ log_test_result(ret);
goto wait;
}
@@ -676,22 +721,26 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test,
*/
tmp_val = (__u64)(uintptr_t)tmp;
ret = ioctl(gup_fd, PIN_LONGTERM_TEST_READ, &tmp_val);
- if (ret)
- ksft_test_result_fail("PIN_LONGTERM_TEST_READ failed\n");
- else
- ksft_test_result(!memcmp(mem, tmp, size),
- "Longterm R/O pin is reliable\n");
+ if (ret) {
+ ksft_perror("PIN_LONGTERM_TEST_READ failed");
+ log_test_result(KSFT_FAIL);
+ } else {
+ if (!memcmp(mem, tmp, size))
+ log_test_result(KSFT_PASS);
+ else
+ log_test_result(KSFT_FAIL);
+ }
ret = ioctl(gup_fd, PIN_LONGTERM_TEST_STOP);
if (ret)
- ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed\n");
+ ksft_perror("PIN_LONGTERM_TEST_STOP failed");
wait:
switch (test) {
case RO_PIN_TEST_SHARED:
write(comm_pipes.parent_ready[1], "0", 1);
wait(&ret);
if (!WIFEXITED(ret))
- ksft_print_msg("[INFO] wait() failed\n");
+ ksft_perror("wait() failed");
break;
default:
break;
@@ -746,14 +795,16 @@ static void do_run_with_base_page(test_fn fn, bool swapout)
mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (mem == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
return;
}
ret = madvise(mem, pagesize, MADV_NOHUGEPAGE);
/* Ignore if not around on a kernel. */
if (ret && errno != EINVAL) {
- ksft_test_result_fail("MADV_NOHUGEPAGE failed\n");
+ ksft_perror("MADV_NOHUGEPAGE failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
@@ -763,7 +814,8 @@ static void do_run_with_base_page(test_fn fn, bool swapout)
if (swapout) {
madvise(mem, pagesize, MADV_PAGEOUT);
if (!pagemap_is_swapped(pagemap_fd, mem)) {
- ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n");
+ ksft_print_msg("MADV_PAGEOUT did not work, is swap enabled?\n");
+ log_test_result(KSFT_SKIP);
goto munmap;
}
}
@@ -775,13 +827,13 @@ munmap:
static void run_with_base_page(test_fn fn, const char *desc)
{
- ksft_print_msg("[RUN] %s ... with base page\n", desc);
+ log_test_start("%s ... with base page", desc);
do_run_with_base_page(fn, false);
}
static void run_with_base_page_swap(test_fn fn, const char *desc)
{
- ksft_print_msg("[RUN] %s ... with swapped out base page\n", desc);
+ log_test_start("%s ... with swapped out base page", desc);
do_run_with_base_page(fn, true);
}
@@ -807,7 +859,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (mmap_mem == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
return;
}
@@ -816,7 +869,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
ret = madvise(mem, thpsize, MADV_HUGEPAGE);
if (ret) {
- ksft_test_result_fail("MADV_HUGEPAGE failed\n");
+ ksft_perror("MADV_HUGEPAGE failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
@@ -826,7 +880,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
*/
mem[0] = 1;
if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) {
- ksft_test_result_skip("Did not get a THP populated\n");
+ ksft_print_msg("Did not get a THP populated\n");
+ log_test_result(KSFT_SKIP);
goto munmap;
}
memset(mem, 1, thpsize);
@@ -846,12 +901,14 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
*/
ret = mprotect(mem + pagesize, pagesize, PROT_READ);
if (ret) {
- ksft_test_result_fail("mprotect() failed\n");
+ ksft_perror("mprotect() failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE);
if (ret) {
- ksft_test_result_fail("mprotect() failed\n");
+ ksft_perror("mprotect() failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
break;
@@ -863,7 +920,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
*/
ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTNEED);
if (ret) {
- ksft_test_result_fail("MADV_DONTNEED failed\n");
+ ksft_perror("MADV_DONTNEED failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
size = pagesize;
@@ -877,13 +935,15 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
mremap_mem = mmap(NULL, mremap_size, PROT_NONE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (mremap_mem == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
tmp = mremap(mem + mremap_size, mremap_size, mremap_size,
MREMAP_MAYMOVE | MREMAP_FIXED, mremap_mem);
if (tmp != mremap_mem) {
- ksft_test_result_fail("mremap() failed\n");
+ ksft_perror("mremap() failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
size = mremap_size;
@@ -896,12 +956,14 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
*/
ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTFORK);
if (ret) {
- ksft_test_result_fail("MADV_DONTFORK failed\n");
+ ksft_perror("MADV_DONTFORK failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
ret = fork();
if (ret < 0) {
- ksft_test_result_fail("fork() failed\n");
+ ksft_perror("fork() failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
} else if (!ret) {
exit(0);
@@ -910,7 +972,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
/* Allow for sharing all pages again. */
ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DOFORK);
if (ret) {
- ksft_test_result_fail("MADV_DOFORK failed\n");
+ ksft_perror("MADV_DOFORK failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
break;
@@ -924,7 +987,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
case THP_RUN_SINGLE_PTE_SWAPOUT:
madvise(mem, size, MADV_PAGEOUT);
if (!range_is_swapped(mem, size)) {
- ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n");
+ ksft_print_msg("MADV_PAGEOUT did not work, is swap enabled?\n");
+ log_test_result(KSFT_SKIP);
goto munmap;
}
break;
@@ -941,56 +1005,56 @@ munmap:
static void run_with_thp(test_fn fn, const char *desc, size_t size)
{
- ksft_print_msg("[RUN] %s ... with THP (%zu kB)\n",
+ log_test_start("%s ... with THP (%zu kB)",
desc, size / 1024);
do_run_with_thp(fn, THP_RUN_PMD, size);
}
static void run_with_thp_swap(test_fn fn, const char *desc, size_t size)
{
- ksft_print_msg("[RUN] %s ... with swapped-out THP (%zu kB)\n",
+ log_test_start("%s ... with swapped-out THP (%zu kB)",
desc, size / 1024);
do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT, size);
}
static void run_with_pte_mapped_thp(test_fn fn, const char *desc, size_t size)
{
- ksft_print_msg("[RUN] %s ... with PTE-mapped THP (%zu kB)\n",
+ log_test_start("%s ... with PTE-mapped THP (%zu kB)",
desc, size / 1024);
do_run_with_thp(fn, THP_RUN_PTE, size);
}
static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc, size_t size)
{
- ksft_print_msg("[RUN] %s ... with swapped-out, PTE-mapped THP (%zu kB)\n",
+ log_test_start("%s ... with swapped-out, PTE-mapped THP (%zu kB)",
desc, size / 1024);
do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT, size);
}
static void run_with_single_pte_of_thp(test_fn fn, const char *desc, size_t size)
{
- ksft_print_msg("[RUN] %s ... with single PTE of THP (%zu kB)\n",
+ log_test_start("%s ... with single PTE of THP (%zu kB)",
desc, size / 1024);
do_run_with_thp(fn, THP_RUN_SINGLE_PTE, size);
}
static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc, size_t size)
{
- ksft_print_msg("[RUN] %s ... with single PTE of swapped-out THP (%zu kB)\n",
+ log_test_start("%s ... with single PTE of swapped-out THP (%zu kB)",
desc, size / 1024);
do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT, size);
}
static void run_with_partial_mremap_thp(test_fn fn, const char *desc, size_t size)
{
- ksft_print_msg("[RUN] %s ... with partially mremap()'ed THP (%zu kB)\n",
+ log_test_start("%s ... with partially mremap()'ed THP (%zu kB)",
desc, size / 1024);
do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP, size);
}
static void run_with_partial_shared_thp(test_fn fn, const char *desc, size_t size)
{
- ksft_print_msg("[RUN] %s ... with partially shared THP (%zu kB)\n",
+ log_test_start("%s ... with partially shared THP (%zu kB)",
desc, size / 1024);
do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED, size);
}
@@ -1000,14 +1064,15 @@ static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize)
int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB;
char *mem, *dummy;
- ksft_print_msg("[RUN] %s ... with hugetlb (%zu kB)\n", desc,
+ log_test_start("%s ... with hugetlb (%zu kB)", desc,
hugetlbsize / 1024);
flags |= __builtin_ctzll(hugetlbsize) << MAP_HUGE_SHIFT;
mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0);
if (mem == MAP_FAILED) {
- ksft_test_result_skip("need more free huge pages\n");
+ ksft_perror("need more free huge pages");
+ log_test_result(KSFT_SKIP);
return;
}
@@ -1020,7 +1085,8 @@ static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize)
*/
dummy = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0);
if (dummy == MAP_FAILED) {
- ksft_test_result_skip("need more free huge pages\n");
+ ksft_perror("need more free huge pages");
+ log_test_result(KSFT_SKIP);
goto munmap;
}
munmap(dummy, hugetlbsize);
@@ -1226,7 +1292,7 @@ static void do_test_anon_thp_collapse(char *mem, size_t size,
ret = setup_comm_pipes(&comm_pipes);
if (ret) {
- ksft_test_result_fail("pipe() failed\n");
+ log_test_result(KSFT_FAIL);
return;
}
@@ -1236,12 +1302,14 @@ static void do_test_anon_thp_collapse(char *mem, size_t size,
*/
ret = mprotect(mem + pagesize, pagesize, PROT_READ);
if (ret) {
- ksft_test_result_fail("mprotect() failed\n");
+ ksft_perror("mprotect() failed");
+ log_test_result(KSFT_FAIL);
goto close_comm_pipes;
}
ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE);
if (ret) {
- ksft_test_result_fail("mprotect() failed\n");
+ ksft_perror("mprotect() failed");
+ log_test_result(KSFT_FAIL);
goto close_comm_pipes;
}
@@ -1250,8 +1318,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size,
/* Collapse before actually COW-sharing the page. */
ret = madvise(mem, size, MADV_COLLAPSE);
if (ret) {
- ksft_test_result_skip("MADV_COLLAPSE failed: %s\n",
- strerror(errno));
+ ksft_perror("MADV_COLLAPSE failed");
+ log_test_result(KSFT_SKIP);
goto close_comm_pipes;
}
break;
@@ -1262,7 +1330,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size,
/* Don't COW-share the upper part of the THP. */
ret = madvise(mem + size / 2, size / 2, MADV_DONTFORK);
if (ret) {
- ksft_test_result_fail("MADV_DONTFORK failed\n");
+ ksft_perror("MADV_DONTFORK failed");
+ log_test_result(KSFT_FAIL);
goto close_comm_pipes;
}
break;
@@ -1270,7 +1339,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size,
/* Don't COW-share the lower part of the THP. */
ret = madvise(mem, size / 2, MADV_DONTFORK);
if (ret) {
- ksft_test_result_fail("MADV_DONTFORK failed\n");
+ ksft_perror("MADV_DONTFORK failed");
+ log_test_result(KSFT_FAIL);
goto close_comm_pipes;
}
break;
@@ -1280,7 +1350,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size,
ret = fork();
if (ret < 0) {
- ksft_test_result_fail("fork() failed\n");
+ ksft_perror("fork() failed");
+ log_test_result(KSFT_FAIL);
goto close_comm_pipes;
} else if (!ret) {
switch (test) {
@@ -1314,7 +1385,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size,
*/
ret = madvise(mem, size, MADV_DOFORK);
if (ret) {
- ksft_test_result_fail("MADV_DOFORK failed\n");
+ ksft_perror("MADV_DOFORK failed");
+ log_test_result(KSFT_FAIL);
write(comm_pipes.parent_ready[1], "0", 1);
wait(&ret);
goto close_comm_pipes;
@@ -1324,8 +1396,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size,
/* Collapse before anyone modified the COW-shared page. */
ret = madvise(mem, size, MADV_COLLAPSE);
if (ret) {
- ksft_test_result_skip("MADV_COLLAPSE failed: %s\n",
- strerror(errno));
+ ksft_perror("MADV_COLLAPSE failed");
+ log_test_result(KSFT_SKIP);
write(comm_pipes.parent_ready[1], "0", 1);
wait(&ret);
goto close_comm_pipes;
@@ -1345,7 +1417,10 @@ static void do_test_anon_thp_collapse(char *mem, size_t size,
else
ret = -EINVAL;
- ksft_test_result(!ret, "No leak from parent into child\n");
+ if (!ret)
+ log_test_result(KSFT_PASS);
+ else
+ log_test_result(KSFT_FAIL);
close_comm_pipes:
close_comm_pipes(&comm_pipes);
}
@@ -1430,7 +1505,7 @@ static void run_anon_thp_test_cases(void)
for (i = 0; i < ARRAY_SIZE(anon_thp_test_cases); i++) {
struct test_case const *test_case = &anon_thp_test_cases[i];
- ksft_print_msg("[RUN] %s\n", test_case->desc);
+ log_test_start("%s", test_case->desc);
do_run_with_thp(test_case->fn, THP_RUN_PMD, pmdsize);
}
}
@@ -1453,8 +1528,10 @@ static void test_cow(char *mem, const char *smem, size_t size)
memset(mem, 0xff, size);
/* See if we still read the old values via the other mapping. */
- ksft_test_result(!memcmp(smem, old, size),
- "Other mapping not modified\n");
+ if (!memcmp(smem, old, size))
+ log_test_result(KSFT_PASS);
+ else
+ log_test_result(KSFT_FAIL);
free(old);
}
@@ -1472,18 +1549,20 @@ static void run_with_zeropage(non_anon_test_fn fn, const char *desc)
{
char *mem, *smem, tmp;
- ksft_print_msg("[RUN] %s ... with shared zeropage\n", desc);
+ log_test_start("%s ... with shared zeropage", desc);
mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANON, -1, 0);
if (mem == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
return;
}
smem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0);
if (smem == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
@@ -1504,10 +1583,11 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc)
size_t mmap_size;
int ret;
- ksft_print_msg("[RUN] %s ... with huge zeropage\n", desc);
+ log_test_start("%s ... with huge zeropage", desc);
if (!has_huge_zeropage) {
- ksft_test_result_skip("Huge zeropage not enabled\n");
+ ksft_print_msg("Huge zeropage not enabled\n");
+ log_test_result(KSFT_SKIP);
return;
}
@@ -1516,13 +1596,15 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc)
mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (mmap_mem == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
return;
}
mmap_smem = mmap(NULL, mmap_size, PROT_READ,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (mmap_smem == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
@@ -1531,9 +1613,15 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc)
smem = (char *)(((uintptr_t)mmap_smem + pmdsize) & ~(pmdsize - 1));
ret = madvise(mem, pmdsize, MADV_HUGEPAGE);
+ if (ret != 0) {
+ ksft_perror("madvise()");
+ log_test_result(KSFT_FAIL);
+ goto munmap;
+ }
ret |= madvise(smem, pmdsize, MADV_HUGEPAGE);
- if (ret) {
- ksft_test_result_fail("MADV_HUGEPAGE failed\n");
+ if (ret != 0) {
+ ksft_perror("madvise()");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
@@ -1562,29 +1650,33 @@ static void run_with_memfd(non_anon_test_fn fn, const char *desc)
char *mem, *smem, tmp;
int fd;
- ksft_print_msg("[RUN] %s ... with memfd\n", desc);
+ log_test_start("%s ... with memfd", desc);
fd = memfd_create("test", 0);
if (fd < 0) {
- ksft_test_result_fail("memfd_create() failed\n");
+ ksft_perror("memfd_create() failed");
+ log_test_result(KSFT_FAIL);
return;
}
/* File consists of a single page filled with zeroes. */
if (fallocate(fd, 0, 0, pagesize)) {
- ksft_test_result_fail("fallocate() failed\n");
+ ksft_perror("fallocate() failed");
+ log_test_result(KSFT_FAIL);
goto close;
}
/* Create a private mapping of the memfd. */
mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
if (mem == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
goto close;
}
smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0);
if (smem == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
@@ -1607,35 +1699,40 @@ static void run_with_tmpfile(non_anon_test_fn fn, const char *desc)
FILE *file;
int fd;
- ksft_print_msg("[RUN] %s ... with tmpfile\n", desc);
+ log_test_start("%s ... with tmpfile", desc);
file = tmpfile();
if (!file) {
- ksft_test_result_fail("tmpfile() failed\n");
+ ksft_perror("tmpfile() failed");
+ log_test_result(KSFT_FAIL);
return;
}
fd = fileno(file);
if (fd < 0) {
- ksft_test_result_skip("fileno() failed\n");
+ ksft_perror("fileno() failed");
+ log_test_result(KSFT_SKIP);
return;
}
/* File consists of a single page filled with zeroes. */
if (fallocate(fd, 0, 0, pagesize)) {
- ksft_test_result_fail("fallocate() failed\n");
+ ksft_perror("fallocate() failed");
+ log_test_result(KSFT_FAIL);
goto close;
}
/* Create a private mapping of the memfd. */
mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
if (mem == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
goto close;
}
smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0);
if (smem == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
@@ -1659,20 +1756,22 @@ static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc,
char *mem, *smem, tmp;
int fd;
- ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc,
+ log_test_start("%s ... with memfd hugetlb (%zu kB)", desc,
hugetlbsize / 1024);
flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT;
fd = memfd_create("test", flags);
if (fd < 0) {
- ksft_test_result_skip("memfd_create() failed\n");
+ ksft_perror("memfd_create() failed");
+ log_test_result(KSFT_SKIP);
return;
}
/* File consists of a single page filled with zeroes. */
if (fallocate(fd, 0, 0, hugetlbsize)) {
- ksft_test_result_skip("need more free huge pages\n");
+ ksft_perror("need more free huge pages");
+ log_test_result(KSFT_SKIP);
goto close;
}
@@ -1680,12 +1779,14 @@ static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc,
mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd,
0);
if (mem == MAP_FAILED) {
- ksft_test_result_skip("need more free huge pages\n");
+ ksft_perror("need more free huge pages");
+ log_test_result(KSFT_SKIP);
goto close;
}
smem = mmap(NULL, hugetlbsize, PROT_READ, MAP_SHARED, fd, 0);
if (smem == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
@@ -1771,7 +1872,6 @@ static int tests_per_non_anon_test_case(void)
int main(int argc, char **argv)
{
- int err;
struct thp_settings default_settings;
ksft_print_header();
@@ -1811,9 +1911,5 @@ int main(int argc, char **argv)
thp_restore_settings();
}
- err = ksft_get_fail_cnt();
- if (err)
- ksft_exit_fail_msg("%d out of %d tests failed\n",
- err, ksft_test_num());
- ksft_exit_pass();
+ ksft_finished();
}
diff --git a/tools/testing/selftests/mm/guard-regions.c b/tools/testing/selftests/mm/guard-regions.c
index 0cd9d236649d..93af3d3760f9 100644
--- a/tools/testing/selftests/mm/guard-regions.c
+++ b/tools/testing/selftests/mm/guard-regions.c
@@ -1453,8 +1453,21 @@ TEST_F(guard_regions, uffd)
/* Set up uffd. */
uffd = userfaultfd(0);
- if (uffd == -1 && errno == EPERM)
- ksft_exit_skip("No userfaultfd permissions, try running as root.\n");
+ if (uffd == -1) {
+ switch (errno) {
+ case EPERM:
+ SKIP(return, "No userfaultfd permissions, try running as root.");
+ break;
+ case ENOSYS:
+ SKIP(return, "userfaultfd is not supported/not enabled.");
+ break;
+ default:
+ ksft_exit_fail_msg("userfaultfd failed with %s\n",
+ strerror(errno));
+ break;
+ }
+ }
+
ASSERT_NE(uffd, -1);
ASSERT_EQ(ioctl(uffd, UFFDIO_API, &api), 0);
diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c
index d50ada0c7dbf..8a97ac5176a4 100644
--- a/tools/testing/selftests/mm/gup_longterm.c
+++ b/tools/testing/selftests/mm/gup_longterm.c
@@ -93,33 +93,48 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
__fsword_t fs_type = get_fs_type(fd);
bool should_work;
char *mem;
+ int result = KSFT_PASS;
int ret;
+ if (fd < 0) {
+ result = KSFT_FAIL;
+ goto report;
+ }
+
if (ftruncate(fd, size)) {
if (errno == ENOENT) {
skip_test_dodgy_fs("ftruncate()");
} else {
- ksft_test_result_fail("ftruncate() failed (%s)\n", strerror(errno));
+ ksft_print_msg("ftruncate() failed (%s)\n",
+ strerror(errno));
+ result = KSFT_FAIL;
+ goto report;
}
return;
}
if (fallocate(fd, 0, 0, size)) {
- if (size == pagesize)
- ksft_test_result_fail("fallocate() failed (%s)\n", strerror(errno));
- else
- ksft_test_result_skip("need more free huge pages\n");
- return;
+ if (size == pagesize) {
+ ksft_print_msg("fallocate() failed (%s)\n", strerror(errno));
+ result = KSFT_FAIL;
+ } else {
+ ksft_print_msg("need more free huge pages\n");
+ result = KSFT_SKIP;
+ }
+ goto report;
}
mem = mmap(NULL, size, PROT_READ | PROT_WRITE,
shared ? MAP_SHARED : MAP_PRIVATE, fd, 0);
if (mem == MAP_FAILED) {
- if (size == pagesize || shared)
- ksft_test_result_fail("mmap() failed (%s)\n", strerror(errno));
- else
- ksft_test_result_skip("need more free huge pages\n");
- return;
+ if (size == pagesize || shared) {
+ ksft_print_msg("mmap() failed (%s)\n", strerror(errno));
+ result = KSFT_FAIL;
+ } else {
+ ksft_print_msg("need more free huge pages\n");
+ result = KSFT_SKIP;
+ }
+ goto report;
}
/* Fault in the page such that GUP-fast can pin it directly. */
@@ -134,7 +149,8 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
*/
ret = mprotect(mem, size, PROT_READ);
if (ret) {
- ksft_test_result_fail("mprotect() failed (%s)\n", strerror(errno));
+ ksft_print_msg("mprotect() failed (%s)\n", strerror(errno));
+ result = KSFT_FAIL;
goto munmap;
}
/* FALLTHROUGH */
@@ -147,12 +163,14 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
type == TEST_TYPE_RW_FAST;
if (gup_fd < 0) {
- ksft_test_result_skip("gup_test not available\n");
+ ksft_print_msg("gup_test not available\n");
+ result = KSFT_SKIP;
break;
}
if (rw && shared && fs_is_unknown(fs_type)) {
- ksft_test_result_skip("Unknown filesystem\n");
+ ksft_print_msg("Unknown filesystem\n");
+ result = KSFT_SKIP;
return;
}
/*
@@ -169,14 +187,19 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
args.flags |= rw ? PIN_LONGTERM_TEST_FLAG_USE_WRITE : 0;
ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args);
if (ret && errno == EINVAL) {
- ksft_test_result_skip("PIN_LONGTERM_TEST_START failed (EINVAL)n");
+ ksft_print_msg("PIN_LONGTERM_TEST_START failed (EINVAL)n");
+ result = KSFT_SKIP;
break;
} else if (ret && errno == EFAULT) {
- ksft_test_result(!should_work, "Should have failed\n");
+ if (should_work)
+ result = KSFT_FAIL;
+ else
+ result = KSFT_PASS;
break;
} else if (ret) {
- ksft_test_result_fail("PIN_LONGTERM_TEST_START failed (%s)\n",
- strerror(errno));
+ ksft_print_msg("PIN_LONGTERM_TEST_START failed (%s)\n",
+ strerror(errno));
+ result = KSFT_FAIL;
break;
}
@@ -189,7 +212,10 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
* some previously unsupported filesystems, we might want to
* perform some additional tests for possible data corruptions.
*/
- ksft_test_result(should_work, "Should have worked\n");
+ if (should_work)
+ result = KSFT_PASS;
+ else
+ result = KSFT_FAIL;
break;
}
#ifdef LOCAL_CONFIG_HAVE_LIBURING
@@ -199,8 +225,9 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
/* io_uring always pins pages writable. */
if (shared && fs_is_unknown(fs_type)) {
- ksft_test_result_skip("Unknown filesystem\n");
- return;
+ ksft_print_msg("Unknown filesystem\n");
+ result = KSFT_SKIP;
+ goto report;
}
should_work = !shared ||
fs_supports_writable_longterm_pinning(fs_type);
@@ -208,8 +235,9 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
/* Skip on errors, as we might just lack kernel support. */
ret = io_uring_queue_init(1, &ring, 0);
if (ret < 0) {
- ksft_test_result_skip("io_uring_queue_init() failed (%s)\n",
- strerror(-ret));
+ ksft_print_msg("io_uring_queue_init() failed (%s)\n",
+ strerror(-ret));
+ result = KSFT_SKIP;
break;
}
/*
@@ -222,17 +250,28 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
/* Only new kernels return EFAULT. */
if (ret && (errno == ENOSPC || errno == EOPNOTSUPP ||
errno == EFAULT)) {
- ksft_test_result(!should_work, "Should have failed (%s)\n",
- strerror(errno));
+ if (should_work) {
+ ksft_print_msg("Should have failed (%s)\n",
+ strerror(errno));
+ result = KSFT_FAIL;
+ } else {
+ result = KSFT_PASS;
+ }
} else if (ret) {
/*
* We might just lack support or have insufficient
* MEMLOCK limits.
*/
- ksft_test_result_skip("io_uring_register_buffers() failed (%s)\n",
- strerror(-ret));
+ ksft_print_msg("io_uring_register_buffers() failed (%s)\n",
+ strerror(-ret));
+ result = KSFT_SKIP;
} else {
- ksft_test_result(should_work, "Should have worked\n");
+ if (should_work) {
+ result = KSFT_PASS;
+ } else {
+ ksft_print_msg("Should have worked\n");
+ result = KSFT_FAIL;
+ }
io_uring_unregister_buffers(&ring);
}
@@ -246,6 +285,8 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
munmap:
munmap(mem, size);
+report:
+ log_test_result(result);
}
typedef void (*test_fn)(int fd, size_t size);
@@ -254,13 +295,11 @@ static void run_with_memfd(test_fn fn, const char *desc)
{
int fd;
- ksft_print_msg("[RUN] %s ... with memfd\n", desc);
+ log_test_start("%s ... with memfd", desc);
fd = memfd_create("test", 0);
- if (fd < 0) {
- ksft_test_result_fail("memfd_create() failed (%s)\n", strerror(errno));
- return;
- }
+ if (fd < 0)
+ ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno));
fn(fd, pagesize);
close(fd);
@@ -271,23 +310,23 @@ static void run_with_tmpfile(test_fn fn, const char *desc)
FILE *file;
int fd;
- ksft_print_msg("[RUN] %s ... with tmpfile\n", desc);
+ log_test_start("%s ... with tmpfile", desc);
file = tmpfile();
if (!file) {
- ksft_test_result_fail("tmpfile() failed (%s)\n", strerror(errno));
- return;
- }
-
- fd = fileno(file);
- if (fd < 0) {
- ksft_test_result_fail("fileno() failed (%s)\n", strerror(errno));
- goto close;
+ ksft_print_msg("tmpfile() failed (%s)\n", strerror(errno));
+ fd = -1;
+ } else {
+ fd = fileno(file);
+ if (fd < 0) {
+ ksft_print_msg("fileno() failed (%s)\n", strerror(errno));
+ }
}
fn(fd, pagesize);
-close:
- fclose(file);
+
+ if (file)
+ fclose(file);
}
static void run_with_local_tmpfile(test_fn fn, const char *desc)
@@ -295,22 +334,22 @@ static void run_with_local_tmpfile(test_fn fn, const char *desc)
char filename[] = __FILE__"_tmpfile_XXXXXX";
int fd;
- ksft_print_msg("[RUN] %s ... with local tmpfile\n", desc);
+ log_test_start("%s ... with local tmpfile", desc);
fd = mkstemp(filename);
- if (fd < 0) {
- ksft_test_result_fail("mkstemp() failed (%s)\n", strerror(errno));
- return;
- }
+ if (fd < 0)
+ ksft_print_msg("mkstemp() failed (%s)\n", strerror(errno));
if (unlink(filename)) {
- ksft_test_result_fail("unlink() failed (%s)\n", strerror(errno));
- goto close;
+ ksft_print_msg("unlink() failed (%s)\n", strerror(errno));
+ close(fd);
+ fd = -1;
}
fn(fd, pagesize);
-close:
- close(fd);
+
+ if (fd >= 0)
+ close(fd);
}
static void run_with_memfd_hugetlb(test_fn fn, const char *desc,
@@ -319,15 +358,14 @@ static void run_with_memfd_hugetlb(test_fn fn, const char *desc,
int flags = MFD_HUGETLB;
int fd;
- ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc,
+ log_test_start("%s ... with memfd hugetlb (%zu kB)", desc,
hugetlbsize / 1024);
flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT;
fd = memfd_create("test", flags);
if (fd < 0) {
- ksft_test_result_skip("memfd_create() failed (%s)\n", strerror(errno));
- return;
+ ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno));
}
fn(fd, hugetlbsize);
@@ -455,7 +493,7 @@ static int tests_per_test_case(void)
int main(int argc, char **argv)
{
- int i, err;
+ int i;
pagesize = getpagesize();
nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes,
@@ -469,9 +507,5 @@ int main(int argc, char **argv)
for (i = 0; i < ARRAY_SIZE(test_cases); i++)
run_test_case(&test_cases[i]);
- err = ksft_get_fail_cnt();
- if (err)
- ksft_exit_fail_msg("%d out of %d tests failed\n",
- err, ksft_test_num());
- ksft_exit_pass();
+ ksft_finished();
}
diff --git a/tools/testing/selftests/mm/madv_populate.c b/tools/testing/selftests/mm/madv_populate.c
index ef7d911da13e..b6fabd5c27ed 100644
--- a/tools/testing/selftests/mm/madv_populate.c
+++ b/tools/testing/selftests/mm/madv_populate.c
@@ -172,12 +172,12 @@ static void test_populate_read(void)
if (addr == MAP_FAILED)
ksft_exit_fail_msg("mmap failed\n");
ksft_test_result(range_is_not_populated(addr, SIZE),
- "range initially not populated\n");
+ "read range initially not populated\n");
ret = madvise(addr, SIZE, MADV_POPULATE_READ);
ksft_test_result(!ret, "MADV_POPULATE_READ\n");
ksft_test_result(range_is_populated(addr, SIZE),
- "range is populated\n");
+ "read range is populated\n");
munmap(addr, SIZE);
}
@@ -194,12 +194,12 @@ static void test_populate_write(void)
if (addr == MAP_FAILED)
ksft_exit_fail_msg("mmap failed\n");
ksft_test_result(range_is_not_populated(addr, SIZE),
- "range initially not populated\n");
+ "write range initially not populated\n");
ret = madvise(addr, SIZE, MADV_POPULATE_WRITE);
ksft_test_result(!ret, "MADV_POPULATE_WRITE\n");
ksft_test_result(range_is_populated(addr, SIZE),
- "range is populated\n");
+ "write range is populated\n");
munmap(addr, SIZE);
}
@@ -247,19 +247,19 @@ static void test_softdirty(void)
/* Clear any softdirty bits. */
clear_softdirty();
ksft_test_result(range_is_not_softdirty(addr, SIZE),
- "range is not softdirty\n");
+ "cleared range is not softdirty\n");
/* Populating READ should set softdirty. */
ret = madvise(addr, SIZE, MADV_POPULATE_READ);
- ksft_test_result(!ret, "MADV_POPULATE_READ\n");
+ ksft_test_result(!ret, "softdirty MADV_POPULATE_READ\n");
ksft_test_result(range_is_not_softdirty(addr, SIZE),
- "range is not softdirty\n");
+ "range is not softdirty after MADV_POPULATE_READ\n");
/* Populating WRITE should set softdirty. */
ret = madvise(addr, SIZE, MADV_POPULATE_WRITE);
- ksft_test_result(!ret, "MADV_POPULATE_WRITE\n");
+ ksft_test_result(!ret, "softdirty MADV_POPULATE_WRITE\n");
ksft_test_result(range_is_softdirty(addr, SIZE),
- "range is softdirty\n");
+ "range is softdirty after MADV_POPULATE_WRITE \n");
munmap(addr, SIZE);
}
diff --git a/tools/testing/selftests/mm/mlock2-tests.c b/tools/testing/selftests/mm/mlock2-tests.c
index 7f0d50fa361d..3e90ff37e336 100644
--- a/tools/testing/selftests/mm/mlock2-tests.c
+++ b/tools/testing/selftests/mm/mlock2-tests.c
@@ -196,7 +196,7 @@ static void test_mlock_lock(void)
ksft_exit_fail_msg("munlock(): %s\n", strerror(errno));
}
- ksft_test_result(!unlock_lock_check(map), "%s: Locked\n", __func__);
+ ksft_test_result(!unlock_lock_check(map), "%s: Unlocked\n", __func__);
munmap(map, 2 * page_size);
}
diff --git a/tools/testing/selftests/mm/pfnmap.c b/tools/testing/selftests/mm/pfnmap.c
index 8a9d19b6020c..866ac023baf5 100644
--- a/tools/testing/selftests/mm/pfnmap.c
+++ b/tools/testing/selftests/mm/pfnmap.c
@@ -12,6 +12,8 @@
#include <stdint.h>
#include <unistd.h>
#include <errno.h>
+#include <stdio.h>
+#include <ctype.h>
#include <fcntl.h>
#include <signal.h>
#include <setjmp.h>
@@ -43,14 +45,62 @@ static int test_read_access(char *addr, size_t size, size_t pagesize)
/* Force a read that the compiler cannot optimize out. */
*((volatile char *)(addr + offs));
}
- if (signal(SIGSEGV, signal_handler) == SIG_ERR)
+ if (signal(SIGSEGV, SIG_DFL) == SIG_ERR)
return -EINVAL;
return ret;
}
+static int find_ram_target(off_t *phys_addr,
+ unsigned long long pagesize)
+{
+ unsigned long long start, end;
+ char line[80], *end_ptr;
+ FILE *file;
+
+ /* Search /proc/iomem for the first suitable "System RAM" range. */
+ file = fopen("/proc/iomem", "r");
+ if (!file)
+ return -errno;
+
+ while (fgets(line, sizeof(line), file)) {
+ /* Ignore any child nodes. */
+ if (!isalnum(line[0]))
+ continue;
+
+ if (!strstr(line, "System RAM\n"))
+ continue;
+
+ start = strtoull(line, &end_ptr, 16);
+ /* Skip over the "-" */
+ end_ptr++;
+ /* Make end "exclusive". */
+ end = strtoull(end_ptr, NULL, 16) + 1;
+
+ /* Actual addresses are not exported */
+ if (!start && !end)
+ break;
+
+ /* We need full pages. */
+ start = (start + pagesize - 1) & ~(pagesize - 1);
+ end &= ~(pagesize - 1);
+
+ if (start != (off_t)start)
+ break;
+
+ /* We need two pages. */
+ if (end > start + 2 * pagesize) {
+ fclose(file);
+ *phys_addr = start;
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
FIXTURE(pfnmap)
{
+ off_t phys_addr;
size_t pagesize;
int dev_mem_fd;
char *addr1;
@@ -63,14 +113,17 @@ FIXTURE_SETUP(pfnmap)
{
self->pagesize = getpagesize();
+ /* We'll require two physical pages throughout our tests ... */
+ if (find_ram_target(&self->phys_addr, self->pagesize))
+ SKIP(return, "Cannot find ram target in '/proc/iomem'\n");
+
self->dev_mem_fd = open("/dev/mem", O_RDONLY);
if (self->dev_mem_fd < 0)
SKIP(return, "Cannot open '/dev/mem'\n");
- /* We'll require the first two pages throughout our tests ... */
self->size1 = self->pagesize * 2;
self->addr1 = mmap(NULL, self->size1, PROT_READ, MAP_SHARED,
- self->dev_mem_fd, 0);
+ self->dev_mem_fd, self->phys_addr);
if (self->addr1 == MAP_FAILED)
SKIP(return, "Cannot mmap '/dev/mem'\n");
@@ -129,7 +182,7 @@ TEST_F(pfnmap, munmap_split)
*/
self->size2 = self->pagesize;
self->addr2 = mmap(NULL, self->pagesize, PROT_READ, MAP_SHARED,
- self->dev_mem_fd, 0);
+ self->dev_mem_fd, self->phys_addr);
ASSERT_NE(self->addr2, MAP_FAILED);
}
diff --git a/tools/testing/selftests/mm/thuge-gen.c b/tools/testing/selftests/mm/thuge-gen.c
index cd5174d735be..a41bc1234b37 100644
--- a/tools/testing/selftests/mm/thuge-gen.c
+++ b/tools/testing/selftests/mm/thuge-gen.c
@@ -127,7 +127,7 @@ void test_mmap(unsigned long size, unsigned flags)
show(size);
ksft_test_result(size == getpagesize() || (before - after) == NUM_PAGES,
- "%s mmap %lu\n", __func__, size);
+ "%s mmap %lu %x\n", __func__, size, flags);
if (munmap(map, size * NUM_PAGES))
ksft_exit_fail_msg("%s: unmap %s\n", __func__, strerror(errno));
@@ -165,7 +165,7 @@ void test_shmget(unsigned long size, unsigned flags)
show(size);
ksft_test_result(size == getpagesize() || (before - after) == NUM_PAGES,
- "%s: mmap %lu\n", __func__, size);
+ "%s: mmap %lu %x\n", __func__, size, flags);
if (shmdt(map))
ksft_exit_fail_msg("%s: shmdt: %s\n", __func__, strerror(errno));
}
diff --git a/tools/testing/selftests/mm/va_high_addr_switch.sh b/tools/testing/selftests/mm/va_high_addr_switch.sh
index 1f92e8caceac..325de53966b6 100755
--- a/tools/testing/selftests/mm/va_high_addr_switch.sh
+++ b/tools/testing/selftests/mm/va_high_addr_switch.sh
@@ -7,23 +7,20 @@
# real test to check that the kernel is configured to support at least 5
# pagetable levels.
-# 1 means the test failed
-exitcode=1
-
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
-fail()
+skip()
{
echo "$1"
- exit $exitcode
+ exit $ksft_skip
}
check_supported_x86_64()
{
local config="/proc/config.gz"
[[ -f "${config}" ]] || config="/boot/config-$(uname -r)"
- [[ -f "${config}" ]] || fail "Cannot find kernel config in /proc or /boot"
+ [[ -f "${config}" ]] || skip "Cannot find kernel config in /proc or /boot"
# gzip -dcfq automatically handles both compressed and plaintext input.
# See man 1 gzip under '-f'.
@@ -33,11 +30,9 @@ check_supported_x86_64()
else {print 1}; exit}' /proc/cpuinfo 2>/dev/null)
if [[ "${pg_table_levels}" -lt 5 ]]; then
- echo "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test"
- exit $ksft_skip
+ skip "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test"
elif [[ "${cpu_supports_pl5}" -ne 0 ]]; then
- echo "$0: CPU does not have the necessary la57 flag to support page table level 5"
- exit $ksft_skip
+ skip "$0: CPU does not have the necessary la57 flag to support page table level 5"
fi
}
@@ -45,24 +40,21 @@ check_supported_ppc64()
{
local config="/proc/config.gz"
[[ -f "${config}" ]] || config="/boot/config-$(uname -r)"
- [[ -f "${config}" ]] || fail "Cannot find kernel config in /proc or /boot"
+ [[ -f "${config}" ]] || skip "Cannot find kernel config in /proc or /boot"
local pg_table_levels=$(gzip -dcfq "${config}" | grep PGTABLE_LEVELS | cut -d'=' -f 2)
if [[ "${pg_table_levels}" -lt 5 ]]; then
- echo "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test"
- exit $ksft_skip
+ skip "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test"
fi
local mmu_support=$(grep -m1 "mmu" /proc/cpuinfo | awk '{print $3}')
if [[ "$mmu_support" != "radix" ]]; then
- echo "$0: System does not use Radix MMU, required for 5-level paging"
- exit $ksft_skip
+ skip "$0: System does not use Radix MMU, required for 5-level paging"
fi
local hugepages_total=$(awk '/HugePages_Total/ {print $2}' /proc/meminfo)
if [[ "${hugepages_total}" -eq 0 ]]; then
- echo "$0: HugePages are not enabled, required for some tests"
- exit $ksft_skip
+ skip "$0: HugePages are not enabled, required for some tests"
fi
}
diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c
index 1357e2d6a7b6..61d7bf1f8c62 100644
--- a/tools/testing/selftests/mm/vm_util.c
+++ b/tools/testing/selftests/mm/vm_util.c
@@ -439,7 +439,7 @@ int open_procmap(pid_t pid, struct procmap_fd *procmap_out)
sprintf(path, "/proc/%d/maps", pid);
procmap_out->query.size = sizeof(procmap_out->query);
procmap_out->fd = open(path, O_RDONLY);
- if (procmap_out < 0)
+ if (procmap_out->fd < 0)
ret = -errno;
return ret;
diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h
index 9211ba640d9c..adb5d294a220 100644
--- a/tools/testing/selftests/mm/vm_util.h
+++ b/tools/testing/selftests/mm/vm_util.h
@@ -3,6 +3,7 @@
#include <stdbool.h>
#include <sys/mman.h>
#include <err.h>
+#include <stdarg.h>
#include <strings.h> /* ffsl() */
#include <unistd.h> /* _SC_PAGESIZE */
#include "../kselftest.h"
@@ -95,6 +96,25 @@ static inline int open_self_procmap(struct procmap_fd *procmap_out)
return open_procmap(pid, procmap_out);
}
+/* These helpers need to be inline to match the kselftest.h idiom. */
+static char test_name[1024];
+
+static inline void log_test_start(const char *name, ...)
+{
+ va_list args;
+ va_start(args, name);
+
+ vsnprintf(test_name, sizeof(test_name), name, args);
+ ksft_print_msg("[RUN] %s\n", test_name);
+
+ va_end(args);
+}
+
+static inline void log_test_result(int result)
+{
+ ksft_test_result_report(result, "%s\n", test_name);
+}
+
/*
* On ppc64 this will only work with radix 2M hugepage size
*/
diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
index f6e45e62da3a..441feb21aa5a 100644
--- a/tools/testing/vma/vma_internal.h
+++ b/tools/testing/vma/vma_internal.h
@@ -1461,4 +1461,9 @@ static inline int __call_mmap_prepare(struct file *file,
return file->f_op->mmap_prepare(desc);
}
+static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma)
+{
+ (void)vma;
+}
+
#endif /* __MM_VMA_INTERNAL_H */