From 63703f37aa09e2c12c0ff25afbf5c460b21bfe4c Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 30 Jun 2021 18:52:20 -0700 Subject: mm: generalize ZONE_[DMA|DMA32] ZONE_[DMA|DMA32] configs have duplicate definitions on platforms that subscribe to them. Instead, just make them generic options which can be selected on applicable platforms. Also only x86/arm64 architectures could enable both ZONE_DMA and ZONE_DMA32 if EXPERT, add ARCH_HAS_ZONE_DMA_SET to make dma zone configurable and visible on the two architectures. Link: https://lkml.kernel.org/r/20210528074557.17768-1-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Acked-by: Catalin Marinas [arm64] Acked-by: Geert Uytterhoeven [m68k] Acked-by: Mike Rapoport Acked-by: Palmer Dabbelt [RISC-V] Acked-by: Michal Simek [microblaze] Acked-by: Michael Ellerman [powerpc] Cc: Catalin Marinas Cc: Will Deacon Cc: Geert Uytterhoeven Cc: Thomas Bogendoerfer Cc: "David S. Miller" Cc: Ingo Molnar Cc: Borislav Petkov Cc: Richard Henderson Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/Kconfig | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch/alpha') diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 8954216b9956..77d3280dc678 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -40,6 +40,7 @@ config ALPHA select MMU_GATHER_NO_RANGE select SET_FS select SPARSEMEM_EXTREME if SPARSEMEM + select ZONE_DMA help The Alpha is a 64-bit general-purpose processor designed and marketed by the Digital Equipment Corporation of blessed memory, @@ -65,10 +66,6 @@ config GENERIC_CALIBRATE_DELAY bool default y -config ZONE_DMA - bool - default y - config GENERIC_ISA_DMA bool default y -- cgit v1.2.3-59-g8ed1b From 4ca9b3859dac14bbef0c27d00667bb5b10917adb Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 30 Jun 2021 18:52:28 -0700 Subject: mm/madvise: introduce MADV_POPULATE_(READ|WRITE) to prefault page tables I. Background: Sparse Memory Mappings When we manage sparse memory mappings dynamically in user space - also sometimes involving MAP_NORESERVE - we want to dynamically populate/ discard memory inside such a sparse memory region. Example users are hypervisors (especially implementing memory ballooning or similar technologies like virtio-mem) and memory allocators. In addition, we want to fail in a nice way (instead of generating SIGBUS) if populating does not succeed because we are out of backend memory (which can happen easily with file-based mappings, especially tmpfs and hugetlbfs). While MADV_DONTNEED, MADV_REMOVE and FALLOC_FL_PUNCH_HOLE allow for reliably discarding memory for most mapping types, there is no generic approach to populate page tables and preallocate memory. Although mmap() supports MAP_POPULATE, it is not applicable to the concept of sparse memory mappings, where we want to populate/discard dynamically and avoid expensive/problematic remappings. In addition, we never actually report errors during the final populate phase - it is best-effort only. fallocate() can be used to preallocate file-based memory and fail in a safe way. However, it cannot really be used for any private mappings on anonymous files via memfd due to COW semantics. In addition, fallocate() does not actually populate page tables, so we still always get pagefaults on first access - which is sometimes undesired (i.e., real-time workloads) and requires real prefaulting of page tables, not just a preallocation of backend storage. There might be interesting use cases for sparse memory regions along with mlockall(MCL_ONFAULT) which fallocate() cannot satisfy as it does not prefault page tables. II. On preallcoation/prefaulting from user space Because we don't have a proper interface, what applications (like QEMU and databases) end up doing is touching (i.e., reading+writing one byte to not overwrite existing data) all individual pages. However, that approach 1) Can result in wear on storage backing, because we end up reading/writing each page; this is especially a problem for dax/pmem. 2) Can result in mmap_sem contention when prefaulting via multiple threads. 3) Requires expensive signal handling, especially to catch SIGBUS in case of hugetlbfs/shmem/file-backed memory. For example, this is problematic in hypervisors like QEMU where SIGBUS handlers might already be used by other subsystems concurrently to e.g, handle hardware errors. "Simply" doing preallocation concurrently from other thread is not that easy. III. On MADV_WILLNEED Extending MADV_WILLNEED is not an option because 1. It would change the semantics: "Expect access in the near future." and "might be a good idea to read some pages" vs. "Definitely populate/ preallocate all memory and definitely fail on errors.". 2. Existing users (like virtio-balloon in QEMU when deflating the balloon) don't want populate/prealloc semantics. They treat this rather as a hint to give a little performance boost without too much overhead - and don't expect that a lot of memory might get consumed or a lot of time might be spent. IV. MADV_POPULATE_READ and MADV_POPULATE_WRITE Let's introduce MADV_POPULATE_READ and MADV_POPULATE_WRITE, inspired by MAP_POPULATE, with the following semantics: 1. MADV_POPULATE_READ can be used to prefault page tables just like manually reading each individual page. This will not break any COW mappings. The shared zero page might get mapped and no backend storage might get preallocated -- allocation might be deferred to write-fault time. Especially shared file mappings require an explicit fallocate() upfront to actually preallocate backend memory (blocks in the file system) in case the file might have holes. 2. If MADV_POPULATE_READ succeeds, all page tables have been populated (prefaulted) readable once. 3. MADV_POPULATE_WRITE can be used to preallocate backend memory and prefault page tables just like manually writing (or reading+writing) each individual page. This will break any COW mappings -- e.g., the shared zeropage is never populated. 4. If MADV_POPULATE_WRITE succeeds, all page tables have been populated (prefaulted) writable once. 5. MADV_POPULATE_READ and MADV_POPULATE_WRITE cannot be applied to special mappings marked with VM_PFNMAP and VM_IO. Also, proper access permissions (e.g., PROT_READ, PROT_WRITE) are required. If any such mapping is encountered, madvise() fails with -EINVAL. 6. If MADV_POPULATE_READ or MADV_POPULATE_WRITE fails, some page tables might have been populated. 7. MADV_POPULATE_READ and MADV_POPULATE_WRITE will return -EHWPOISON when encountering a HW poisoned page in the range. 8. Similar to MAP_POPULATE, MADV_POPULATE_READ and MADV_POPULATE_WRITE cannot protect from the OOM (Out Of Memory) handler killing the process. While the use case for MADV_POPULATE_WRITE is fairly obvious (i.e., preallocate memory and prefault page tables for VMs), one issue is that whenever we prefault pages writable, the pages have to be marked dirty, because the CPU could dirty them any time. while not a real problem for hugetlbfs or dax/pmem, it can be a problem for shared file mappings: each page will be marked dirty and has to be written back later when evicting. MADV_POPULATE_READ allows for optimizing this scenario: Pre-read a whole mapping from backend storage without marking it dirty, such that eviction won't have to write it back. As discussed above, shared file mappings might require an explciit fallocate() upfront to achieve preallcoation+prepopulation. Although sparse memory mappings are the primary use case, this will also be useful for other preallocate/prefault use cases where MAP_POPULATE is not desired or the semantics of MAP_POPULATE are not sufficient: as one example, QEMU users can trigger preallocation/prefaulting of guest RAM after the mapping was created -- and don't want errors to be silently suppressed. Looking at the history, MADV_POPULATE was already proposed in 2013 [1], however, the main motivation back than was performance improvements -- which should also still be the case. V. Single-threaded performance comparison I did a short experiment, prefaulting page tables on completely *empty mappings/files* and repeated the experiment 10 times. The results correspond to the shortest execution time. In general, the performance benefit for huge pages is negligible with small mappings. V.1: Private mappings POPULATE_READ and POPULATE_WRITE is fastest. Note that Reading/POPULATE_READ will populate the shared zeropage where applicable -- which result in short population times. The fastest way to allocate backend storage (here: swap or huge pages) and prefault page tables is POPULATE_WRITE. V.2: Shared mappings fallocate() is fastest, however, doesn't prefault page tables. POPULATE_WRITE is faster than simple writes and read/writes. POPULATE_READ is faster than simple reads. Without a fd, the fastest way to allocate backend storage and prefault page tables is POPULATE_WRITE. With an fd, the fastest way is usually FALLOCATE+POPULATE_READ or FALLOCATE+POPULATE_WRITE respectively; one exception are actual files: FALLOCATE+Read is slightly faster than FALLOCATE+POPULATE_READ. The fastest way to allocate backend storage prefault page tables is FALLOCATE+POPULATE_WRITE -- except when dealing with actual files; then, FALLOCATE+POPULATE_READ is fastest and won't directly mark all pages as dirty. v.3: Detailed results ================================================== 2 MiB MAP_PRIVATE: ************************************************** Anon 4 KiB : Read : 0.119 ms Anon 4 KiB : Write : 0.222 ms Anon 4 KiB : Read/Write : 0.380 ms Anon 4 KiB : POPULATE_READ : 0.060 ms Anon 4 KiB : POPULATE_WRITE : 0.158 ms Memfd 4 KiB : Read : 0.034 ms Memfd 4 KiB : Write : 0.310 ms Memfd 4 KiB : Read/Write : 0.362 ms Memfd 4 KiB : POPULATE_READ : 0.039 ms Memfd 4 KiB : POPULATE_WRITE : 0.229 ms Memfd 2 MiB : Read : 0.030 ms Memfd 2 MiB : Write : 0.030 ms Memfd 2 MiB : Read/Write : 0.030 ms Memfd 2 MiB : POPULATE_READ : 0.030 ms Memfd 2 MiB : POPULATE_WRITE : 0.030 ms tmpfs : Read : 0.033 ms tmpfs : Write : 0.313 ms tmpfs : Read/Write : 0.406 ms tmpfs : POPULATE_READ : 0.039 ms tmpfs : POPULATE_WRITE : 0.285 ms file : Read : 0.033 ms file : Write : 0.351 ms file : Read/Write : 0.408 ms file : POPULATE_READ : 0.039 ms file : POPULATE_WRITE : 0.290 ms hugetlbfs : Read : 0.030 ms hugetlbfs : Write : 0.030 ms hugetlbfs : Read/Write : 0.030 ms hugetlbfs : POPULATE_READ : 0.030 ms hugetlbfs : POPULATE_WRITE : 0.030 ms ************************************************** 4096 MiB MAP_PRIVATE: ************************************************** Anon 4 KiB : Read : 237.940 ms Anon 4 KiB : Write : 708.409 ms Anon 4 KiB : Read/Write : 1054.041 ms Anon 4 KiB : POPULATE_READ : 124.310 ms Anon 4 KiB : POPULATE_WRITE : 572.582 ms Memfd 4 KiB : Read : 136.928 ms Memfd 4 KiB : Write : 963.898 ms Memfd 4 KiB : Read/Write : 1106.561 ms Memfd 4 KiB : POPULATE_READ : 78.450 ms Memfd 4 KiB : POPULATE_WRITE : 805.881 ms Memfd 2 MiB : Read : 357.116 ms Memfd 2 MiB : Write : 357.210 ms Memfd 2 MiB : Read/Write : 357.606 ms Memfd 2 MiB : POPULATE_READ : 356.094 ms Memfd 2 MiB : POPULATE_WRITE : 356.937 ms tmpfs : Read : 137.536 ms tmpfs : Write : 954.362 ms tmpfs : Read/Write : 1105.954 ms tmpfs : POPULATE_READ : 80.289 ms tmpfs : POPULATE_WRITE : 822.826 ms file : Read : 137.874 ms file : Write : 987.025 ms file : Read/Write : 1107.439 ms file : POPULATE_READ : 80.413 ms file : POPULATE_WRITE : 857.622 ms hugetlbfs : Read : 355.607 ms hugetlbfs : Write : 355.729 ms hugetlbfs : Read/Write : 356.127 ms hugetlbfs : POPULATE_READ : 354.585 ms hugetlbfs : POPULATE_WRITE : 355.138 ms ************************************************** 2 MiB MAP_SHARED: ************************************************** Anon 4 KiB : Read : 0.394 ms Anon 4 KiB : Write : 0.348 ms Anon 4 KiB : Read/Write : 0.400 ms Anon 4 KiB : POPULATE_READ : 0.326 ms Anon 4 KiB : POPULATE_WRITE : 0.273 ms Anon 2 MiB : Read : 0.030 ms Anon 2 MiB : Write : 0.030 ms Anon 2 MiB : Read/Write : 0.030 ms Anon 2 MiB : POPULATE_READ : 0.030 ms Anon 2 MiB : POPULATE_WRITE : 0.030 ms Memfd 4 KiB : Read : 0.412 ms Memfd 4 KiB : Write : 0.372 ms Memfd 4 KiB : Read/Write : 0.419 ms Memfd 4 KiB : POPULATE_READ : 0.343 ms Memfd 4 KiB : POPULATE_WRITE : 0.288 ms Memfd 4 KiB : FALLOCATE : 0.137 ms Memfd 4 KiB : FALLOCATE+Read : 0.446 ms Memfd 4 KiB : FALLOCATE+Write : 0.330 ms Memfd 4 KiB : FALLOCATE+Read/Write : 0.454 ms Memfd 4 KiB : FALLOCATE+POPULATE_READ : 0.379 ms Memfd 4 KiB : FALLOCATE+POPULATE_WRITE : 0.268 ms Memfd 2 MiB : Read : 0.030 ms Memfd 2 MiB : Write : 0.030 ms Memfd 2 MiB : Read/Write : 0.030 ms Memfd 2 MiB : POPULATE_READ : 0.030 ms Memfd 2 MiB : POPULATE_WRITE : 0.030 ms Memfd 2 MiB : FALLOCATE : 0.030 ms Memfd 2 MiB : FALLOCATE+Read : 0.031 ms Memfd 2 MiB : FALLOCATE+Write : 0.031 ms Memfd 2 MiB : FALLOCATE+Read/Write : 0.031 ms Memfd 2 MiB : FALLOCATE+POPULATE_READ : 0.030 ms Memfd 2 MiB : FALLOCATE+POPULATE_WRITE : 0.030 ms tmpfs : Read : 0.416 ms tmpfs : Write : 0.369 ms tmpfs : Read/Write : 0.425 ms tmpfs : POPULATE_READ : 0.346 ms tmpfs : POPULATE_WRITE : 0.295 ms tmpfs : FALLOCATE : 0.139 ms tmpfs : FALLOCATE+Read : 0.447 ms tmpfs : FALLOCATE+Write : 0.333 ms tmpfs : FALLOCATE+Read/Write : 0.454 ms tmpfs : FALLOCATE+POPULATE_READ : 0.380 ms tmpfs : FALLOCATE+POPULATE_WRITE : 0.272 ms file : Read : 0.191 ms file : Write : 0.511 ms file : Read/Write : 0.524 ms file : POPULATE_READ : 0.196 ms file : POPULATE_WRITE : 0.434 ms file : FALLOCATE : 0.004 ms file : FALLOCATE+Read : 0.197 ms file : FALLOCATE+Write : 0.554 ms file : FALLOCATE+Read/Write : 0.480 ms file : FALLOCATE+POPULATE_READ : 0.201 ms file : FALLOCATE+POPULATE_WRITE : 0.381 ms hugetlbfs : Read : 0.030 ms hugetlbfs : Write : 0.030 ms hugetlbfs : Read/Write : 0.030 ms hugetlbfs : POPULATE_READ : 0.030 ms hugetlbfs : POPULATE_WRITE : 0.030 ms hugetlbfs : FALLOCATE : 0.030 ms hugetlbfs : FALLOCATE+Read : 0.031 ms hugetlbfs : FALLOCATE+Write : 0.031 ms hugetlbfs : FALLOCATE+Read/Write : 0.030 ms hugetlbfs : FALLOCATE+POPULATE_READ : 0.030 ms hugetlbfs : FALLOCATE+POPULATE_WRITE : 0.030 ms ************************************************** 4096 MiB MAP_SHARED: ************************************************** Anon 4 KiB : Read : 1053.090 ms Anon 4 KiB : Write : 913.642 ms Anon 4 KiB : Read/Write : 1060.350 ms Anon 4 KiB : POPULATE_READ : 893.691 ms Anon 4 KiB : POPULATE_WRITE : 782.885 ms Anon 2 MiB : Read : 358.553 ms Anon 2 MiB : Write : 358.419 ms Anon 2 MiB : Read/Write : 357.992 ms Anon 2 MiB : POPULATE_READ : 357.533 ms Anon 2 MiB : POPULATE_WRITE : 357.808 ms Memfd 4 KiB : Read : 1078.144 ms Memfd 4 KiB : Write : 942.036 ms Memfd 4 KiB : Read/Write : 1100.391 ms Memfd 4 KiB : POPULATE_READ : 925.829 ms Memfd 4 KiB : POPULATE_WRITE : 804.394 ms Memfd 4 KiB : FALLOCATE : 304.632 ms Memfd 4 KiB : FALLOCATE+Read : 1163.359 ms Memfd 4 KiB : FALLOCATE+Write : 933.186 ms Memfd 4 KiB : FALLOCATE+Read/Write : 1187.304 ms Memfd 4 KiB : FALLOCATE+POPULATE_READ : 1013.660 ms Memfd 4 KiB : FALLOCATE+POPULATE_WRITE : 794.560 ms Memfd 2 MiB : Read : 358.131 ms Memfd 2 MiB : Write : 358.099 ms Memfd 2 MiB : Read/Write : 358.250 ms Memfd 2 MiB : POPULATE_READ : 357.563 ms Memfd 2 MiB : POPULATE_WRITE : 357.334 ms Memfd 2 MiB : FALLOCATE : 356.735 ms Memfd 2 MiB : FALLOCATE+Read : 358.152 ms Memfd 2 MiB : FALLOCATE+Write : 358.331 ms Memfd 2 MiB : FALLOCATE+Read/Write : 358.018 ms Memfd 2 MiB : FALLOCATE+POPULATE_READ : 357.286 ms Memfd 2 MiB : FALLOCATE+POPULATE_WRITE : 357.523 ms tmpfs : Read : 1087.265 ms tmpfs : Write : 950.840 ms tmpfs : Read/Write : 1107.567 ms tmpfs : POPULATE_READ : 922.605 ms tmpfs : POPULATE_WRITE : 810.094 ms tmpfs : FALLOCATE : 306.320 ms tmpfs : FALLOCATE+Read : 1169.796 ms tmpfs : FALLOCATE+Write : 933.730 ms tmpfs : FALLOCATE+Read/Write : 1191.610 ms tmpfs : FALLOCATE+POPULATE_READ : 1020.474 ms tmpfs : FALLOCATE+POPULATE_WRITE : 798.945 ms file : Read : 654.101 ms file : Write : 1259.142 ms file : Read/Write : 1289.509 ms file : POPULATE_READ : 661.642 ms file : POPULATE_WRITE : 1106.816 ms file : FALLOCATE : 1.864 ms file : FALLOCATE+Read : 656.328 ms file : FALLOCATE+Write : 1153.300 ms file : FALLOCATE+Read/Write : 1180.613 ms file : FALLOCATE+POPULATE_READ : 668.347 ms file : FALLOCATE+POPULATE_WRITE : 996.143 ms hugetlbfs : Read : 357.245 ms hugetlbfs : Write : 357.413 ms hugetlbfs : Read/Write : 357.120 ms hugetlbfs : POPULATE_READ : 356.321 ms hugetlbfs : POPULATE_WRITE : 356.693 ms hugetlbfs : FALLOCATE : 355.927 ms hugetlbfs : FALLOCATE+Read : 357.074 ms hugetlbfs : FALLOCATE+Write : 357.120 ms hugetlbfs : FALLOCATE+Read/Write : 356.983 ms hugetlbfs : FALLOCATE+POPULATE_READ : 356.413 ms hugetlbfs : FALLOCATE+POPULATE_WRITE : 356.266 ms ************************************************** [1] https://lkml.org/lkml/2013/6/27/698 [akpm@linux-foundation.org: coding style fixes] Link: https://lkml.kernel.org/r/20210419135443.12822-3-david@redhat.com Signed-off-by: David Hildenbrand Cc: Arnd Bergmann Cc: Michal Hocko Cc: Oscar Salvador Cc: Matthew Wilcox (Oracle) Cc: Andrea Arcangeli Cc: Minchan Kim Cc: Jann Horn Cc: Jason Gunthorpe Cc: Dave Hansen Cc: Hugh Dickins Cc: Rik van Riel Cc: Michael S. Tsirkin Cc: Kirill A. Shutemov Cc: Vlastimil Babka Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Thomas Bogendoerfer Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: Chris Zankel Cc: Max Filippov Cc: Mike Kravetz Cc: Peter Xu Cc: Rolf Eike Beer Cc: Ram Pai Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/include/uapi/asm/mman.h | 3 ++ arch/mips/include/uapi/asm/mman.h | 3 ++ arch/parisc/include/uapi/asm/mman.h | 3 ++ arch/xtensa/include/uapi/asm/mman.h | 3 ++ include/uapi/asm-generic/mman-common.h | 3 ++ mm/gup.c | 58 ++++++++++++++++++++++++++++++ mm/internal.h | 3 ++ mm/madvise.c | 66 ++++++++++++++++++++++++++++++++++ 8 files changed, 142 insertions(+) (limited to 'arch/alpha') diff --git a/arch/alpha/include/uapi/asm/mman.h b/arch/alpha/include/uapi/asm/mman.h index a18ec7f63888..56b4ee5a6c9e 100644 --- a/arch/alpha/include/uapi/asm/mman.h +++ b/arch/alpha/include/uapi/asm/mman.h @@ -71,6 +71,9 @@ #define MADV_COLD 20 /* deactivate these pages */ #define MADV_PAGEOUT 21 /* reclaim these pages */ +#define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */ +#define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/arch/mips/include/uapi/asm/mman.h b/arch/mips/include/uapi/asm/mman.h index 57dc2ac4f8bd..40b210c65a5a 100644 --- a/arch/mips/include/uapi/asm/mman.h +++ b/arch/mips/include/uapi/asm/mman.h @@ -98,6 +98,9 @@ #define MADV_COLD 20 /* deactivate these pages */ #define MADV_PAGEOUT 21 /* reclaim these pages */ +#define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */ +#define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h index ab78cba446ed..9e3c010c0f61 100644 --- a/arch/parisc/include/uapi/asm/mman.h +++ b/arch/parisc/include/uapi/asm/mman.h @@ -52,6 +52,9 @@ #define MADV_COLD 20 /* deactivate these pages */ #define MADV_PAGEOUT 21 /* reclaim these pages */ +#define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */ +#define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */ + #define MADV_MERGEABLE 65 /* KSM may merge identical pages */ #define MADV_UNMERGEABLE 66 /* KSM may not merge identical pages */ diff --git a/arch/xtensa/include/uapi/asm/mman.h b/arch/xtensa/include/uapi/asm/mman.h index e5e643752947..b3a22095371b 100644 --- a/arch/xtensa/include/uapi/asm/mman.h +++ b/arch/xtensa/include/uapi/asm/mman.h @@ -106,6 +106,9 @@ #define MADV_COLD 20 /* deactivate these pages */ #define MADV_PAGEOUT 21 /* reclaim these pages */ +#define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */ +#define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h index f94f65d429be..1567a3294c3d 100644 --- a/include/uapi/asm-generic/mman-common.h +++ b/include/uapi/asm-generic/mman-common.h @@ -72,6 +72,9 @@ #define MADV_COLD 20 /* deactivate these pages */ #define MADV_PAGEOUT 21 /* reclaim these pages */ +#define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */ +#define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/mm/gup.c b/mm/gup.c index 8651309f8ec3..728d996767cb 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1500,6 +1500,64 @@ long populate_vma_page_range(struct vm_area_struct *vma, NULL, NULL, locked); } +/* + * faultin_vma_page_range() - populate (prefault) page tables inside the + * given VMA range readable/writable + * + * This takes care of mlocking the pages, too, if VM_LOCKED is set. + * + * @vma: target vma + * @start: start address + * @end: end address + * @write: whether to prefault readable or writable + * @locked: whether the mmap_lock is still held + * + * Returns either number of processed pages in the vma, or a negative error + * code on error (see __get_user_pages()). + * + * vma->vm_mm->mmap_lock must be held. The range must be page-aligned and + * covered by the VMA. + * + * If @locked is NULL, it may be held for read or write and will be unperturbed. + * + * If @locked is non-NULL, it must held for read only and may be released. If + * it's released, *@locked will be set to 0. + */ +long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end, bool write, int *locked) +{ + struct mm_struct *mm = vma->vm_mm; + unsigned long nr_pages = (end - start) / PAGE_SIZE; + int gup_flags; + + VM_BUG_ON(!PAGE_ALIGNED(start)); + VM_BUG_ON(!PAGE_ALIGNED(end)); + VM_BUG_ON_VMA(start < vma->vm_start, vma); + VM_BUG_ON_VMA(end > vma->vm_end, vma); + mmap_assert_locked(mm); + + /* + * FOLL_TOUCH: Mark page accessed and thereby young; will also mark + * the page dirty with FOLL_WRITE -- which doesn't make a + * difference with !FOLL_FORCE, because the page is writable + * in the page table. + * FOLL_HWPOISON: Return -EHWPOISON instead of -EFAULT when we hit + * a poisoned page. + * FOLL_POPULATE: Always populate memory with VM_LOCKONFAULT. + * !FOLL_FORCE: Require proper access permissions. + */ + gup_flags = FOLL_TOUCH | FOLL_POPULATE | FOLL_MLOCK | FOLL_HWPOISON; + if (write) + gup_flags |= FOLL_WRITE; + + /* + * See check_vma_flags(): Will return -EFAULT on incompatible mappings + * or with insufficient permissions. + */ + return __get_user_pages(mm, start, nr_pages, gup_flags, + NULL, NULL, locked); +} + /* * __mm_populate - populate and/or mlock pages within a range of address space. * diff --git a/mm/internal.h b/mm/internal.h index d9c2b33cd2ab..f38bc5d75ab7 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -345,6 +345,9 @@ void __vma_unlink_list(struct mm_struct *mm, struct vm_area_struct *vma); #ifdef CONFIG_MMU extern long populate_vma_page_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, int *locked); +extern long faultin_vma_page_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end, + bool write, int *locked); extern void munlock_vma_pages_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); static inline void munlock_vma_pages_all(struct vm_area_struct *vma) diff --git a/mm/madvise.c b/mm/madvise.c index 63e489e5bfdb..6d3d348b17f4 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -53,6 +53,8 @@ static int madvise_need_mmap_write(int behavior) case MADV_COLD: case MADV_PAGEOUT: case MADV_FREE: + case MADV_POPULATE_READ: + case MADV_POPULATE_WRITE: return 0; default: /* be safe, default to 1. list exceptions explicitly */ @@ -822,6 +824,61 @@ static long madvise_dontneed_free(struct vm_area_struct *vma, return -EINVAL; } +static long madvise_populate(struct vm_area_struct *vma, + struct vm_area_struct **prev, + unsigned long start, unsigned long end, + int behavior) +{ + const bool write = behavior == MADV_POPULATE_WRITE; + struct mm_struct *mm = vma->vm_mm; + unsigned long tmp_end; + int locked = 1; + long pages; + + *prev = vma; + + while (start < end) { + /* + * We might have temporarily dropped the lock. For example, + * our VMA might have been split. + */ + if (!vma || start >= vma->vm_end) { + vma = find_vma(mm, start); + if (!vma || start < vma->vm_start) + return -ENOMEM; + } + + tmp_end = min_t(unsigned long, end, vma->vm_end); + /* Populate (prefault) page tables readable/writable. */ + pages = faultin_vma_page_range(vma, start, tmp_end, write, + &locked); + if (!locked) { + mmap_read_lock(mm); + locked = 1; + *prev = NULL; + vma = NULL; + } + if (pages < 0) { + switch (pages) { + case -EINTR: + return -EINTR; + case -EFAULT: /* Incompatible mappings / permissions. */ + return -EINVAL; + case -EHWPOISON: + return -EHWPOISON; + default: + pr_warn_once("%s: unhandled return value: %ld\n", + __func__, pages); + fallthrough; + case -ENOMEM: + return -ENOMEM; + } + } + start += pages * PAGE_SIZE; + } + return 0; +} + /* * Application wants to free up the pages and associated backing store. * This is effectively punching a hole into the middle of a file. @@ -935,6 +992,9 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, case MADV_FREE: case MADV_DONTNEED: return madvise_dontneed_free(vma, prev, start, end, behavior); + case MADV_POPULATE_READ: + case MADV_POPULATE_WRITE: + return madvise_populate(vma, prev, start, end, behavior); default: return madvise_behavior(vma, prev, start, end, behavior); } @@ -955,6 +1015,8 @@ madvise_behavior_valid(int behavior) case MADV_FREE: case MADV_COLD: case MADV_PAGEOUT: + case MADV_POPULATE_READ: + case MADV_POPULATE_WRITE: #ifdef CONFIG_KSM case MADV_MERGEABLE: case MADV_UNMERGEABLE: @@ -1042,6 +1104,10 @@ process_madvise_behavior_valid(int behavior) * easily if memory pressure happens. * MADV_PAGEOUT - the application is not expected to use this memory soon, * page out the pages in this range immediately. + * MADV_POPULATE_READ - populate (prefault) page tables readable by + * triggering read faults if required + * MADV_POPULATE_WRITE - populate (prefault) page tables writable by + * triggering write faults if required * * return values: * zero - success -- cgit v1.2.3-59-g8ed1b From fac7757e1fb05b75c8e22d4f8fe2f6c9c4d7edca Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 30 Jun 2021 18:53:13 -0700 Subject: mm: define default value for FIRST_USER_ADDRESS Currently most platforms define FIRST_USER_ADDRESS as 0UL duplication the same code all over. Instead just define a generic default value (i.e 0UL) for FIRST_USER_ADDRESS and let the platforms override when required. This makes it much cleaner with reduced code. The default FIRST_USER_ADDRESS here would be skipped in when the given platform overrides its value via . Link: https://lkml.kernel.org/r/1620615725-24623-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Acked-by: Geert Uytterhoeven [m68k] Acked-by: Guo Ren [csky] Acked-by: Stafford Horne [openrisc] Acked-by: Catalin Marinas [arm64] Acked-by: Mike Rapoport Acked-by: Palmer Dabbelt [RISC-V] Cc: Richard Henderson Cc: Vineet Gupta Cc: Catalin Marinas Cc: Will Deacon Cc: Guo Ren Cc: Brian Cain Cc: Geert Uytterhoeven Cc: Michal Simek Cc: Thomas Bogendoerfer Cc: Ley Foon Tan Cc: Jonas Bonn Cc: Stefan Kristiansson Cc: Stafford Horne Cc: "James E.J. Bottomley" Cc: Michael Ellerman Cc: Christophe Leroy Cc: Paul Walmsley Cc: Heiko Carstens Cc: Yoshinori Sato Cc: "David S. Miller" Cc: Jeff Dike Cc: Thomas Gleixner Cc: Chris Zankel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/pgtable.h | 1 - arch/arc/include/asm/pgtable.h | 6 ------ arch/arm64/include/asm/pgtable.h | 2 -- arch/csky/include/asm/pgtable.h | 1 - arch/hexagon/include/asm/pgtable.h | 3 --- arch/ia64/include/asm/pgtable.h | 1 - arch/m68k/include/asm/pgtable_mm.h | 1 - arch/microblaze/include/asm/pgtable.h | 2 -- arch/mips/include/asm/pgtable-32.h | 1 - arch/mips/include/asm/pgtable-64.h | 1 - arch/nios2/include/asm/pgtable.h | 2 -- arch/openrisc/include/asm/pgtable.h | 1 - arch/parisc/include/asm/pgtable.h | 2 -- arch/powerpc/include/asm/book3s/pgtable.h | 1 - arch/powerpc/include/asm/nohash/32/pgtable.h | 1 - arch/powerpc/include/asm/nohash/64/pgtable.h | 2 -- arch/riscv/include/asm/pgtable.h | 2 -- arch/s390/include/asm/pgtable.h | 2 -- arch/sh/include/asm/pgtable.h | 2 -- arch/sparc/include/asm/pgtable_32.h | 1 - arch/sparc/include/asm/pgtable_64.h | 3 --- arch/um/include/asm/pgtable-2level.h | 1 - arch/um/include/asm/pgtable-3level.h | 1 - arch/x86/include/asm/pgtable_types.h | 2 -- arch/xtensa/include/asm/pgtable.h | 1 - include/linux/pgtable.h | 9 +++++++++ 26 files changed, 9 insertions(+), 43 deletions(-) (limited to 'arch/alpha') diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h index e1757b7cfe3d..ff690846465e 100644 --- a/arch/alpha/include/asm/pgtable.h +++ b/arch/alpha/include/asm/pgtable.h @@ -46,7 +46,6 @@ struct vm_area_struct; #define PTRS_PER_PMD (1UL << (PAGE_SHIFT-3)) #define PTRS_PER_PGD (1UL << (PAGE_SHIFT-3)) #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) -#define FIRST_USER_ADDRESS 0UL /* Number of pointers that fit on a page: this will go away. */ #define PTRS_PER_PAGE (1UL << (PAGE_SHIFT-3)) diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 5878846f00cf..577682e8cc7f 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -222,12 +222,6 @@ */ #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) -/* - * No special requirements for lowest virtual address we permit any user space - * mapping to be mapped at. - */ -#define FIRST_USER_ADDRESS 0UL - /**************************************************************** * Bucket load of VM Helpers diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 0b10204e72fc..25f5c04b43ce 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -26,8 +26,6 @@ #define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT)) -#define FIRST_USER_ADDRESS 0UL - #ifndef __ASSEMBLY__ #include diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h index 0d60367b6bfa..151607ed5158 100644 --- a/arch/csky/include/asm/pgtable.h +++ b/arch/csky/include/asm/pgtable.h @@ -14,7 +14,6 @@ #define PGDIR_MASK (~(PGDIR_SIZE-1)) #define USER_PTRS_PER_PGD (PAGE_OFFSET/PGDIR_SIZE) -#define FIRST_USER_ADDRESS 0UL /* * C-SKY is two-level paging structure: diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h index dbb22b80b8c4..e4979508cddf 100644 --- a/arch/hexagon/include/asm/pgtable.h +++ b/arch/hexagon/include/asm/pgtable.h @@ -155,9 +155,6 @@ extern unsigned long _dflt_cache_att; extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; /* located in head.S */ -/* Seems to be zero even in architectures where the zero page is firewalled? */ -#define FIRST_USER_ADDRESS 0UL - /* HUGETLB not working currently */ #ifdef CONFIG_HUGETLB_PAGE #define pte_mkhuge(pte) __pte((pte_val(pte) & ~0x3) | HVM_HUGEPAGE_SIZE) diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h index d765fd948fae..3f5dbbd8b9d8 100644 --- a/arch/ia64/include/asm/pgtable.h +++ b/arch/ia64/include/asm/pgtable.h @@ -128,7 +128,6 @@ #define PTRS_PER_PGD_SHIFT PTRS_PER_PTD_SHIFT #define PTRS_PER_PGD (1UL << PTRS_PER_PGD_SHIFT) #define USER_PTRS_PER_PGD (5*PTRS_PER_PGD/8) /* regions 0-4 are user regions */ -#define FIRST_USER_ADDRESS 0UL /* * All the normal masks have the "page accessed" bits on, as any time diff --git a/arch/m68k/include/asm/pgtable_mm.h b/arch/m68k/include/asm/pgtable_mm.h index aca22c2c1ee2..143ba7de9bda 100644 --- a/arch/m68k/include/asm/pgtable_mm.h +++ b/arch/m68k/include/asm/pgtable_mm.h @@ -72,7 +72,6 @@ #define PTRS_PER_PGD 128 #endif #define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE) -#define FIRST_USER_ADDRESS 0UL /* Virtual address region for use by kernel_map() */ #ifdef CONFIG_SUN3 diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h index 9ae8d2c17dd5..71cd547655d9 100644 --- a/arch/microblaze/include/asm/pgtable.h +++ b/arch/microblaze/include/asm/pgtable.h @@ -25,8 +25,6 @@ extern int mem_init_done; #include #include -#define FIRST_USER_ADDRESS 0UL - extern unsigned long va_to_phys(unsigned long address); extern pte_t *va_to_pte(unsigned long address); diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h index 6c0532d7b211..95df9c293d8d 100644 --- a/arch/mips/include/asm/pgtable-32.h +++ b/arch/mips/include/asm/pgtable-32.h @@ -93,7 +93,6 @@ extern int add_temporary_entry(unsigned long entrylo0, unsigned long entrylo1, #endif #define USER_PTRS_PER_PGD (0x80000000UL/PGDIR_SIZE) -#define FIRST_USER_ADDRESS 0UL #define VMALLOC_START MAP_BASE diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h index 1e7d6ce9d8d6..046465906c82 100644 --- a/arch/mips/include/asm/pgtable-64.h +++ b/arch/mips/include/asm/pgtable-64.h @@ -137,7 +137,6 @@ #define PTRS_PER_PTE ((PAGE_SIZE << PTE_ORDER) / sizeof(pte_t)) #define USER_PTRS_PER_PGD ((TASK_SIZE64 / PGDIR_SIZE)?(TASK_SIZE64 / PGDIR_SIZE):1) -#define FIRST_USER_ADDRESS 0UL /* * TLB refill handlers also map the vmalloc area into xuseg. Avoid diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h index 2600d76c310c..4a995fa628ee 100644 --- a/arch/nios2/include/asm/pgtable.h +++ b/arch/nios2/include/asm/pgtable.h @@ -24,8 +24,6 @@ #include #include -#define FIRST_USER_ADDRESS 0UL - #define VMALLOC_START CONFIG_NIOS2_KERNEL_MMU_REGION_BASE #define VMALLOC_END (CONFIG_NIOS2_KERNEL_REGION_BASE - 1) diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h index 9425bedab4fc..4ac591c9ca33 100644 --- a/arch/openrisc/include/asm/pgtable.h +++ b/arch/openrisc/include/asm/pgtable.h @@ -73,7 +73,6 @@ extern void paging_init(void); */ #define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE) -#define FIRST_USER_ADDRESS 0UL /* * Kernels own virtual memory area. diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 39017210dbf0..7f33c29764cc 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -171,8 +171,6 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) * pgd entries used up by user/kernel: */ -#define FIRST_USER_ADDRESS 0UL - /* NB: The tlb miss handlers make certain assumptions about the order */ /* of the following bits, so be careful (One example, bits 25-31 */ /* are moved together in one instruction). */ diff --git a/arch/powerpc/include/asm/book3s/pgtable.h b/arch/powerpc/include/asm/book3s/pgtable.h index 0e1263455d73..ad130e15a126 100644 --- a/arch/powerpc/include/asm/book3s/pgtable.h +++ b/arch/powerpc/include/asm/book3s/pgtable.h @@ -8,7 +8,6 @@ #include #endif -#define FIRST_USER_ADDRESS 0UL #ifndef __ASSEMBLY__ /* Insert a PTE, top-level function is out of line. It uses an inline * low level function in the respective pgtable-* files diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 96522f7f0618..f06ae00f2a65 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -54,7 +54,6 @@ extern int icache_44x_need_flush; #define PGD_MASKED_BITS 0 #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) -#define FIRST_USER_ADDRESS 0UL #define pte_ERROR(e) \ pr_err("%s:%d: bad pte %llx.\n", __FILE__, __LINE__, \ diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 57cd3892bfe0..53fbfdfac93d 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -12,8 +12,6 @@ #include #include -#define FIRST_USER_ADDRESS 0UL - /* * Size of EA range mapped by our pagetables. */ diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 380cd3a7e548..62f3fe7368f3 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -536,8 +536,6 @@ void setup_bootmem(void); void paging_init(void); void misc_mem_init(void); -#define FIRST_USER_ADDRESS 0 - /* * ZERO_PAGE is a global shared page that is always zero, * used for zero-mapped memory areas, etc. diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index b38f7b781564..7c86bf03fc8f 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -65,8 +65,6 @@ extern unsigned long zero_page_mask; /* TODO: s390 cannot support io_remap_pfn_range... */ -#define FIRST_USER_ADDRESS 0UL - #define pte_ERROR(e) \ printk("%s:%d: bad pte %p.\n", __FILE__, __LINE__, (void *) pte_val(e)) #define pmd_ERROR(e) \ diff --git a/arch/sh/include/asm/pgtable.h b/arch/sh/include/asm/pgtable.h index 27751e9470df..d7ddb1ec86a0 100644 --- a/arch/sh/include/asm/pgtable.h +++ b/arch/sh/include/asm/pgtable.h @@ -59,8 +59,6 @@ static inline unsigned long long neff_sign_extend(unsigned long val) /* Entries per level */ #define PTRS_PER_PTE (PAGE_SIZE / (1 << PTE_MAGNITUDE)) -#define FIRST_USER_ADDRESS 0UL - #define PHYS_ADDR_MASK29 0x1fffffff #define PHYS_ADDR_MASK32 0xffffffff diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h index a5cf79c149fe..0888bda245f5 100644 --- a/arch/sparc/include/asm/pgtable_32.h +++ b/arch/sparc/include/asm/pgtable_32.h @@ -48,7 +48,6 @@ unsigned long __init bootmem_init(unsigned long *pages_avail); #define PTRS_PER_PMD 64 #define PTRS_PER_PGD 256 #define USER_PTRS_PER_PGD PAGE_OFFSET / PGDIR_SIZE -#define FIRST_USER_ADDRESS 0UL #define PTE_SIZE (PTRS_PER_PTE*4) #define PAGE_NONE SRMMU_PAGE_NONE diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 2cd80a0a9795..a400e0f23046 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -95,9 +95,6 @@ bool kern_addr_valid(unsigned long addr); #define PTRS_PER_PUD (1UL << PUD_BITS) #define PTRS_PER_PGD (1UL << PGDIR_BITS) -/* Kernel has a separate 44bit address space. */ -#define FIRST_USER_ADDRESS 0UL - #define pmd_ERROR(e) \ pr_err("%s:%d: bad pmd %p(%016lx) seen at (%pS)\n", \ __FILE__, __LINE__, &(e), pmd_val(e), __builtin_return_address(0)) diff --git a/arch/um/include/asm/pgtable-2level.h b/arch/um/include/asm/pgtable-2level.h index 32106d31e4ab..8256ecc5b919 100644 --- a/arch/um/include/asm/pgtable-2level.h +++ b/arch/um/include/asm/pgtable-2level.h @@ -23,7 +23,6 @@ #define PTRS_PER_PTE 1024 #define USER_PTRS_PER_PGD ((TASK_SIZE + (PGDIR_SIZE - 1)) / PGDIR_SIZE) #define PTRS_PER_PGD 1024 -#define FIRST_USER_ADDRESS 0UL #define pte_ERROR(e) \ printk("%s:%d: bad pte %p(%08lx).\n", __FILE__, __LINE__, &(e), \ diff --git a/arch/um/include/asm/pgtable-3level.h b/arch/um/include/asm/pgtable-3level.h index 7e6a4180db9d..9289a86643a9 100644 --- a/arch/um/include/asm/pgtable-3level.h +++ b/arch/um/include/asm/pgtable-3level.h @@ -41,7 +41,6 @@ #endif #define USER_PTRS_PER_PGD ((TASK_SIZE + (PGDIR_SIZE - 1)) / PGDIR_SIZE) -#define FIRST_USER_ADDRESS 0UL #define pte_ERROR(e) \ printk("%s:%d: bad pte %p(%016lx).\n", __FILE__, __LINE__, &(e), \ diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index f24d7ef8fffa..40497a9020c6 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -7,8 +7,6 @@ #include -#define FIRST_USER_ADDRESS 0UL - #define _PAGE_BIT_PRESENT 0 /* is present */ #define _PAGE_BIT_RW 1 /* writeable */ #define _PAGE_BIT_USER 2 /* userspace addressable */ diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index d7fc45c920c2..bd5aeb795567 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -59,7 +59,6 @@ #define PTRS_PER_PGD 1024 #define PGD_ORDER 0 #define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE) -#define FIRST_USER_ADDRESS 0UL #define FIRST_USER_PGD_NR (FIRST_USER_ADDRESS >> PGDIR_SHIFT) #ifdef CONFIG_MMU diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 2b0d02291178..69700e3e615f 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -28,6 +28,15 @@ #define USER_PGTABLES_CEILING 0UL #endif +/* + * This defines the first usable user address. Platforms + * can override its value with custom FIRST_USER_ADDRESS + * defined in their respective . + */ +#ifndef FIRST_USER_ADDRESS +#define FIRST_USER_ADDRESS 0UL +#endif + /* * A page table page can be thought of an array like this: pXd_t[PTRS_PER_PxD] * -- cgit v1.2.3-59-g8ed1b From 1c2f7d14d84f767a797558609eb034511e02f41e Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 30 Jun 2021 18:53:59 -0700 Subject: mm/thp: define default pmd_pgtable() Currently most platforms define pmd_pgtable() as pmd_page() duplicating the same code all over. Instead just define a default value i.e pmd_page() for pmd_pgtable() and let platforms override when required via . All the existing platform that override pmd_pgtable() have been moved into their respective header in order to precede before the new generic definition. This makes it much cleaner with reduced code. Link: https://lkml.kernel.org/r/1623646133-20306-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Acked-by: Geert Uytterhoeven Acked-by: Mike Rapoport Cc: Nick Hu Cc: Richard Henderson Cc: Vineet Gupta Cc: Catalin Marinas Cc: Will Deacon Cc: Guo Ren Cc: Brian Cain Cc: Geert Uytterhoeven Cc: Michal Simek Cc: Thomas Bogendoerfer Cc: Ley Foon Tan Cc: Jonas Bonn Cc: Stefan Kristiansson Cc: Stafford Horne Cc: "James E.J. Bottomley" Cc: Michael Ellerman Cc: Christophe Leroy Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Heiko Carstens Cc: Yoshinori Sato Cc: "David S. Miller" Cc: Jeff Dike Cc: Thomas Gleixner Cc: Chris Zankel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/pgalloc.h | 1 - arch/arc/include/asm/pgalloc.h | 2 -- arch/arc/include/asm/pgtable.h | 2 ++ arch/arm/include/asm/pgalloc.h | 1 - arch/arm64/include/asm/pgalloc.h | 1 - arch/csky/include/asm/pgalloc.h | 2 -- arch/hexagon/include/asm/pgtable.h | 1 - arch/ia64/include/asm/pgalloc.h | 1 - arch/m68k/include/asm/mcf_pgalloc.h | 2 -- arch/m68k/include/asm/mcf_pgtable.h | 2 ++ arch/m68k/include/asm/motorola_pgalloc.h | 1 - arch/m68k/include/asm/motorola_pgtable.h | 2 ++ arch/m68k/include/asm/sun3_pgalloc.h | 1 - arch/microblaze/include/asm/pgalloc.h | 2 -- arch/mips/include/asm/pgalloc.h | 1 - arch/nds32/include/asm/pgalloc.h | 5 ----- arch/nios2/include/asm/pgalloc.h | 1 - arch/openrisc/include/asm/pgalloc.h | 2 -- arch/parisc/include/asm/pgalloc.h | 1 - arch/powerpc/include/asm/pgalloc.h | 5 ----- arch/powerpc/include/asm/pgtable.h | 6 ++++++ arch/riscv/include/asm/pgalloc.h | 2 -- arch/s390/include/asm/pgalloc.h | 3 --- arch/s390/include/asm/pgtable.h | 3 +++ arch/sh/include/asm/pgalloc.h | 1 - arch/sparc/include/asm/pgalloc_32.h | 1 - arch/sparc/include/asm/pgalloc_64.h | 1 - arch/sparc/include/asm/pgtable_32.h | 2 ++ arch/sparc/include/asm/pgtable_64.h | 2 ++ arch/um/include/asm/pgalloc.h | 1 - arch/x86/include/asm/pgalloc.h | 2 -- arch/xtensa/include/asm/pgalloc.h | 2 -- include/linux/pgtable.h | 9 +++++++++ 33 files changed, 28 insertions(+), 43 deletions(-) (limited to 'arch/alpha') diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h index 9c6a24fe493d..68be7adbfe58 100644 --- a/arch/alpha/include/asm/pgalloc.h +++ b/arch/alpha/include/asm/pgalloc.h @@ -18,7 +18,6 @@ pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte) { pmd_set(pmd, (pte_t *)(page_to_pa(pte) + PAGE_OFFSET)); } -#define pmd_pgtable(pmd) pmd_page(pmd) static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h index 6147db925248..a32ca3104ced 100644 --- a/arch/arc/include/asm/pgalloc.h +++ b/arch/arc/include/asm/pgalloc.h @@ -129,6 +129,4 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptep) #define __pte_free_tlb(tlb, pte, addr) pte_free((tlb)->mm, pte) -#define pmd_pgtable(pmd) ((pgtable_t) pmd_page_vaddr(pmd)) - #endif /* _ASM_ARC_PGALLOC_H */ diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 577682e8cc7f..320cc0ae8a08 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -350,6 +350,8 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, #define kern_addr_valid(addr) (1) +#define pmd_pgtable(pmd) ((pgtable_t) pmd_page_vaddr(pmd)) + /* * remap a physical page `pfn' of size `size' with page protection `prot' * into virtual address `from' diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h index fdee1f04f4f3..a17f01235c29 100644 --- a/arch/arm/include/asm/pgalloc.h +++ b/arch/arm/include/asm/pgalloc.h @@ -143,7 +143,6 @@ pmd_populate(struct mm_struct *mm, pmd_t *pmdp, pgtable_t ptep) __pmd_populate(pmdp, page_to_phys(ptep), prot); } -#define pmd_pgtable(pmd) pmd_page(pmd) #endif /* CONFIG_MMU */ diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 31fbab3d6f99..8433a2058eb1 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -86,6 +86,5 @@ pmd_populate(struct mm_struct *mm, pmd_t *pmdp, pgtable_t ptep) VM_BUG_ON(mm == &init_mm); __pmd_populate(pmdp, page_to_phys(ptep), PMD_TYPE_TABLE | PMD_TABLE_PXN); } -#define pmd_pgtable(pmd) pmd_page(pmd) #endif diff --git a/arch/csky/include/asm/pgalloc.h b/arch/csky/include/asm/pgalloc.h index cd211aabbefd..bbbd0698b397 100644 --- a/arch/csky/include/asm/pgalloc.h +++ b/arch/csky/include/asm/pgalloc.h @@ -22,8 +22,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, set_pmd(pmd, __pmd(__pa(page_address(pte)))); } -#define pmd_pgtable(pmd) pmd_page(pmd) - extern void pgd_init(unsigned long *p); static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h index e4979508cddf..18cd6ea9ab23 100644 --- a/arch/hexagon/include/asm/pgtable.h +++ b/arch/hexagon/include/asm/pgtable.h @@ -239,7 +239,6 @@ static inline int pmd_bad(pmd_t pmd) * pmd_page - converts a PMD entry to a page pointer */ #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)) -#define pmd_pgtable(pmd) pmd_page(pmd) /** * pte_none - check if pte is mapped diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h index 9601cfe83c94..0fb2b6291d58 100644 --- a/arch/ia64/include/asm/pgalloc.h +++ b/arch/ia64/include/asm/pgalloc.h @@ -52,7 +52,6 @@ pmd_populate(struct mm_struct *mm, pmd_t * pmd_entry, pgtable_t pte) { pmd_val(*pmd_entry) = page_to_phys(pte); } -#define pmd_pgtable(pmd) pmd_page(pmd) static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte) diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h index bc1228e00518..5c2c0a864524 100644 --- a/arch/m68k/include/asm/mcf_pgalloc.h +++ b/arch/m68k/include/asm/mcf_pgalloc.h @@ -32,8 +32,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address) #define pmd_populate_kernel pmd_populate -#define pmd_pgtable(pmd) pfn_to_virt(pmd_val(pmd) >> PAGE_SHIFT) - static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pgtable, unsigned long address) { diff --git a/arch/m68k/include/asm/mcf_pgtable.h b/arch/m68k/include/asm/mcf_pgtable.h index 8d4ec05996c5..6f2b87d7a50d 100644 --- a/arch/m68k/include/asm/mcf_pgtable.h +++ b/arch/m68k/include/asm/mcf_pgtable.h @@ -150,6 +150,8 @@ #ifndef __ASSEMBLY__ +#define pmd_pgtable(pmd) pfn_to_virt(pmd_val(pmd) >> PAGE_SHIFT) + /* * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h index b4fc3b4f6bb3..74a817d9387f 100644 --- a/arch/m68k/include/asm/motorola_pgalloc.h +++ b/arch/m68k/include/asm/motorola_pgalloc.h @@ -88,7 +88,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t page { pmd_set(pmd, page); } -#define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd)) static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) { diff --git a/arch/m68k/include/asm/motorola_pgtable.h b/arch/m68k/include/asm/motorola_pgtable.h index 8076467eff4b..a2908164ee6f 100644 --- a/arch/m68k/include/asm/motorola_pgtable.h +++ b/arch/m68k/include/asm/motorola_pgtable.h @@ -105,6 +105,8 @@ extern unsigned long mm_cachebits; #define __S110 PAGE_SHARED_C #define __S111 PAGE_SHARED_C +#define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd)) + /* * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h index 000f64869b91..198036aff519 100644 --- a/arch/m68k/include/asm/sun3_pgalloc.h +++ b/arch/m68k/include/asm/sun3_pgalloc.h @@ -32,7 +32,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t page { pmd_val(*pmd) = __pa((unsigned long)page_address(page)); } -#define pmd_pgtable(pmd) pmd_page(pmd) /* * allocating and freeing a pmd is trivial: the 1-entry pmd is diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h index d56b9f670ad1..6c33b05f730f 100644 --- a/arch/microblaze/include/asm/pgalloc.h +++ b/arch/microblaze/include/asm/pgalloc.h @@ -28,8 +28,6 @@ static inline pgd_t *get_pgd(void) #define pgd_alloc(mm) get_pgd() -#define pmd_pgtable(pmd) pmd_page(pmd) - extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm); #define __pte_free_tlb(tlb, pte, addr) pte_free((tlb)->mm, (pte)) diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h index 8b18424b3120..dd53d0f79cb3 100644 --- a/arch/mips/include/asm/pgalloc.h +++ b/arch/mips/include/asm/pgalloc.h @@ -28,7 +28,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, { set_pmd(pmd, __pmd((unsigned long)page_address(pte))); } -#define pmd_pgtable(pmd) pmd_page(pmd) /* * Initialize a new pmd table with invalid pointers. diff --git a/arch/nds32/include/asm/pgalloc.h b/arch/nds32/include/asm/pgalloc.h index 85c117347c86..a08e1ebca70e 100644 --- a/arch/nds32/include/asm/pgalloc.h +++ b/arch/nds32/include/asm/pgalloc.h @@ -12,11 +12,6 @@ #define __HAVE_ARCH_PTE_ALLOC_ONE #include /* for pte_{alloc,free}_one */ -/* - * Since we have only two-level page tables, these are trivial - */ -#define pmd_pgtable(pmd) pmd_page(pmd) - extern pgd_t *pgd_alloc(struct mm_struct *mm); extern void pgd_free(struct mm_struct *mm, pgd_t * pgd); diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h index e6600d2a5ae0..3c4ae74d5798 100644 --- a/arch/nios2/include/asm/pgalloc.h +++ b/arch/nios2/include/asm/pgalloc.h @@ -25,7 +25,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, { set_pmd(pmd, __pmd((unsigned long)page_address(pte))); } -#define pmd_pgtable(pmd) pmd_page(pmd) /* * Initialize a new pmd table with invalid pointers. diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h index 88820299ecc4..b7b2b8d16fad 100644 --- a/arch/openrisc/include/asm/pgalloc.h +++ b/arch/openrisc/include/asm/pgalloc.h @@ -72,6 +72,4 @@ do { \ tlb_remove_page((tlb), (pte)); \ } while (0) -#define pmd_pgtable(pmd) pmd_page(pmd) - #endif diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h index dda557085311..6a7e98e71f1d 100644 --- a/arch/parisc/include/asm/pgalloc.h +++ b/arch/parisc/include/asm/pgalloc.h @@ -69,6 +69,5 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) #define pmd_populate(mm, pmd, pte_page) \ pmd_populate_kernel(mm, pmd, page_address(pte_page)) -#define pmd_pgtable(pmd) pmd_page(pmd) #endif diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h index 6dd78a2dc03a..3360cad78ace 100644 --- a/arch/powerpc/include/asm/pgalloc.h +++ b/arch/powerpc/include/asm/pgalloc.h @@ -70,9 +70,4 @@ extern struct kmem_cache *pgtable_cache[]; #include #endif -static inline pgtable_t pmd_pgtable(pmd_t pmd) -{ - return (pgtable_t)pmd_page_vaddr(pmd); -} - #endif /* _ASM_POWERPC_PGALLOC_H */ diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index c6a676714f04..5969743719bc 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -152,6 +152,12 @@ static inline bool p4d_is_leaf(p4d_t p4d) } #endif +#define pmd_pgtable pmd_pgtable +static inline pgtable_t pmd_pgtable(pmd_t pmd) +{ + return (pgtable_t)pmd_page_vaddr(pmd); +} + #ifdef CONFIG_PPC64 #define is_ioremap_addr is_ioremap_addr static inline bool is_ioremap_addr(const void *x) diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h index 23b1544e0ca5..0af6933a7100 100644 --- a/arch/riscv/include/asm/pgalloc.h +++ b/arch/riscv/include/asm/pgalloc.h @@ -38,8 +38,6 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) } #endif /* __PAGETABLE_PMD_FOLDED */ -#define pmd_pgtable(pmd) pmd_page(pmd) - static inline pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *pgd; diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 6b187cd72251..f14a555eff74 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -134,9 +134,6 @@ static inline void pmd_populate(struct mm_struct *mm, #define pmd_populate_kernel(mm, pmd, pte) pmd_populate(mm, pmd, pte) -#define pmd_pgtable(pmd) \ - ((pgtable_t)__va(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE)) - /* * page table entry allocation/free routines. */ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 7c86bf03fc8f..1f8f5da53262 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1709,4 +1709,7 @@ extern void s390_reset_cmma(struct mm_struct *mm); #define HAVE_ARCH_UNMAPPED_AREA #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN +#define pmd_pgtable(pmd) \ + ((pgtable_t)__va(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE)) + #endif /* _S390_PAGE_H */ diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h index 0e6b0be25e33..a9e98233c4d4 100644 --- a/arch/sh/include/asm/pgalloc.h +++ b/arch/sh/include/asm/pgalloc.h @@ -30,7 +30,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, { set_pmd(pmd, __pmd((unsigned long)page_address(pte))); } -#define pmd_pgtable(pmd) pmd_page(pmd) #define __pte_free_tlb(tlb,pte,addr) \ do { \ diff --git a/arch/sparc/include/asm/pgalloc_32.h b/arch/sparc/include/asm/pgalloc_32.h index 9d353e6dc5a9..4f73e87b22a3 100644 --- a/arch/sparc/include/asm/pgalloc_32.h +++ b/arch/sparc/include/asm/pgalloc_32.h @@ -51,7 +51,6 @@ static inline void free_pmd_fast(pmd_t * pmd) #define __pmd_free_tlb(tlb, pmd, addr) pmd_free((tlb)->mm, pmd) #define pmd_populate(mm, pmd, pte) pmd_set(pmd, pte) -#define pmd_pgtable(pmd) (pgtable_t)__pmd_page(pmd) void pmd_set(pmd_t *pmdp, pte_t *ptep); #define pmd_populate_kernel pmd_populate diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h index a8dafc550985..7b5561d17ab1 100644 --- a/arch/sparc/include/asm/pgalloc_64.h +++ b/arch/sparc/include/asm/pgalloc_64.h @@ -67,7 +67,6 @@ void pte_free(struct mm_struct *mm, pgtable_t ptepage); #define pmd_populate_kernel(MM, PMD, PTE) pmd_set(MM, PMD, PTE) #define pmd_populate(MM, PMD, PTE) pmd_set(MM, PMD, PTE) -#define pmd_pgtable(PMD) ((pte_t *)pmd_page_vaddr(PMD)) void pgtable_free(void *table, bool is_page); diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h index 0888bda245f5..ebaf374b55ab 100644 --- a/arch/sparc/include/asm/pgtable_32.h +++ b/arch/sparc/include/asm/pgtable_32.h @@ -432,4 +432,6 @@ static inline int io_remap_pfn_range(struct vm_area_struct *vma, /* We provide our own get_unmapped_area to cope with VA holes for userland */ #define HAVE_ARCH_UNMAPPED_AREA +#define pmd_pgtable(pmd) ((pgtable_t)__pmd_page(pmd)) + #endif /* !(_SPARC_PGTABLE_H) */ diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index a400e0f23046..e0ee48ec3903 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -1117,6 +1117,8 @@ extern unsigned long cmdline_memory_size; asmlinkage void do_sparc64_fault(struct pt_regs *regs); +#define pmd_pgtable(PMD) ((pte_t *)pmd_page_vaddr(PMD)) + #ifdef CONFIG_HUGETLB_PAGE #define pud_leaf_size pud_leaf_size diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h index 2bbf28cf3aa9..8ec7cd46dd96 100644 --- a/arch/um/include/asm/pgalloc.h +++ b/arch/um/include/asm/pgalloc.h @@ -19,7 +19,6 @@ set_pmd(pmd, __pmd(_PAGE_TABLE + \ ((unsigned long long)page_to_pfn(pte) << \ (unsigned long long) PAGE_SHIFT))) -#define pmd_pgtable(pmd) pmd_page(pmd) /* * Allocate and free page tables. diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index 62ad61d6fefc..c7ec5bb88334 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -84,8 +84,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, set_pmd(pmd, __pmd(((pteval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE)); } -#define pmd_pgtable(pmd) pmd_page(pmd) - #if CONFIG_PGTABLE_LEVELS > 2 extern void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd); diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h index d3a22da4d2c9..eeb2de3a89e5 100644 --- a/arch/xtensa/include/asm/pgalloc.h +++ b/arch/xtensa/include/asm/pgalloc.h @@ -25,7 +25,6 @@ (pmd_val(*(pmdp)) = ((unsigned long)ptep)) #define pmd_populate(mm, pmdp, page) \ (pmd_val(*(pmdp)) = ((unsigned long)page_to_virt(page))) -#define pmd_pgtable(pmd) pmd_page(pmd) static inline pgd_t* pgd_alloc(struct mm_struct *mm) @@ -63,7 +62,6 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm) return page; } -#define pmd_pgtable(pmd) pmd_page(pmd) #endif /* CONFIG_MMU */ #endif /* _XTENSA_PGALLOC_H */ diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 69700e3e615f..e82660f7b9e4 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -37,6 +37,15 @@ #define FIRST_USER_ADDRESS 0UL #endif +/* + * This defines the generic helper for accessing PMD page + * table page. Although platforms can still override this + * via their respective . + */ +#ifndef pmd_pgtable +#define pmd_pgtable(pmd) pmd_page(pmd) +#endif + /* * A page table page can be thought of an array like this: pXd_t[PTRS_PER_PxD] * -- cgit v1.2.3-59-g8ed1b From f39650de687e35766572ac89dbcd16a5911e2f0a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 30 Jun 2021 18:54:59 -0700 Subject: kernel.h: split out panic and oops helpers kernel.h is being used as a dump for all kinds of stuff for a long time. Here is the attempt to start cleaning it up by splitting out panic and oops helpers. There are several purposes of doing this: - dropping dependency in bug.h - dropping a loop by moving out panic_notifier.h - unload kernel.h from something which has its own domain At the same time convert users tree-wide to use new headers, although for the time being include new header back to kernel.h to avoid twisted indirected includes for existing users. [akpm@linux-foundation.org: thread_info.h needs limits.h] [andriy.shevchenko@linux.intel.com: ia64 fix] Link: https://lkml.kernel.org/r/20210520130557.55277-1-andriy.shevchenko@linux.intel.com Link: https://lkml.kernel.org/r/20210511074137.33666-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Reviewed-by: Bjorn Andersson Co-developed-by: Andrew Morton Acked-by: Mike Rapoport Acked-by: Corey Minyard Acked-by: Christian Brauner Acked-by: Arnd Bergmann Acked-by: Kees Cook Acked-by: Wei Liu Acked-by: Rasmus Villemoes Signed-off-by: Andrew Morton Acked-by: Sebastian Reichel Acked-by: Luis Chamberlain Acked-by: Stephen Boyd Acked-by: Thomas Bogendoerfer Acked-by: Helge Deller # parisc Signed-off-by: Linus Torvalds --- arch/alpha/kernel/setup.c | 2 +- arch/arm64/kernel/setup.c | 1 + arch/ia64/include/asm/pal.h | 1 + arch/mips/kernel/relocate.c | 1 + arch/mips/sgi-ip22/ip22-reset.c | 1 + arch/mips/sgi-ip32/ip32-reset.c | 1 + arch/parisc/kernel/pdc_chassis.c | 1 + arch/powerpc/kernel/setup-common.c | 1 + arch/s390/kernel/ipl.c | 1 + arch/sparc/kernel/sstate.c | 1 + arch/um/drivers/mconsole_kern.c | 1 + arch/um/kernel/um_arch.c | 1 + arch/x86/include/asm/desc.h | 1 + arch/x86/kernel/cpu/mshyperv.c | 1 + arch/x86/kernel/setup.c | 1 + arch/x86/purgatory/purgatory.c | 2 + arch/x86/xen/enlighten.c | 1 + arch/xtensa/platforms/iss/setup.c | 1 + drivers/bus/brcmstb_gisb.c | 1 + drivers/char/ipmi/ipmi_msghandler.c | 1 + drivers/clk/analogbits/wrpll-cln28hpc.c | 4 + drivers/edac/altera_edac.c | 1 + drivers/firmware/google/gsmi.c | 1 + drivers/hv/vmbus_drv.c | 1 + drivers/hwtracing/coresight/coresight-cpu-debug.c | 1 + drivers/leds/trigger/ledtrig-activity.c | 1 + drivers/leds/trigger/ledtrig-heartbeat.c | 1 + drivers/leds/trigger/ledtrig-panic.c | 1 + drivers/misc/bcm-vk/bcm_vk_dev.c | 1 + drivers/misc/ibmasm/heartbeat.c | 1 + drivers/misc/pvpanic/pvpanic.c | 1 + drivers/net/ipa/ipa_smp2p.c | 1 + drivers/parisc/power.c | 1 + drivers/power/reset/ltc2952-poweroff.c | 1 + drivers/remoteproc/remoteproc_core.c | 1 + drivers/s390/char/con3215.c | 1 + drivers/s390/char/con3270.c | 1 + drivers/s390/char/sclp.c | 1 + drivers/s390/char/sclp_con.c | 1 + drivers/s390/char/sclp_vt220.c | 1 + drivers/s390/char/zcore.c | 1 + drivers/soc/bcm/brcmstb/pm/pm-arm.c | 1 + drivers/staging/olpc_dcon/olpc_dcon.c | 1 + drivers/video/fbdev/hyperv_fb.c | 1 + include/asm-generic/bug.h | 3 +- include/linux/kernel.h | 84 +------------------ include/linux/panic.h | 98 +++++++++++++++++++++++ include/linux/panic_notifier.h | 12 +++ include/linux/thread_info.h | 1 + kernel/hung_task.c | 1 + kernel/kexec_core.c | 1 + kernel/panic.c | 1 + kernel/rcu/tree.c | 2 + kernel/sysctl.c | 1 + kernel/trace/trace.c | 1 + 55 files changed, 169 insertions(+), 85 deletions(-) create mode 100644 include/linux/panic.h create mode 100644 include/linux/panic_notifier.h (limited to 'arch/alpha') diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c index 5f6858e9dc28..7d56c217b235 100644 --- a/arch/alpha/kernel/setup.c +++ b/arch/alpha/kernel/setup.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -46,7 +47,6 @@ #include #include -extern struct atomic_notifier_head panic_notifier_list; static int alpha_panic_event(struct notifier_block *, unsigned long, void *); static struct notifier_block alpha_panic_block = { alpha_panic_event, diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 61845c0821d9..787bc0f601b3 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/ia64/include/asm/pal.h b/arch/ia64/include/asm/pal.h index 5c51fceedaf9..e6b652f9e45e 100644 --- a/arch/ia64/include/asm/pal.h +++ b/arch/ia64/include/asm/pal.h @@ -99,6 +99,7 @@ #include #include +#include /* * Data types needed to pass information into PAL procedures and diff --git a/arch/mips/kernel/relocate.c b/arch/mips/kernel/relocate.c index 499a5357c09f..56b51de2dc51 100644 --- a/arch/mips/kernel/relocate.c +++ b/arch/mips/kernel/relocate.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/mips/sgi-ip22/ip22-reset.c b/arch/mips/sgi-ip22/ip22-reset.c index c374f3ceec38..9028dbbb45dd 100644 --- a/arch/mips/sgi-ip22/ip22-reset.c +++ b/arch/mips/sgi-ip22/ip22-reset.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include diff --git a/arch/mips/sgi-ip32/ip32-reset.c b/arch/mips/sgi-ip32/ip32-reset.c index 20d8637340be..18d1c115cd53 100644 --- a/arch/mips/sgi-ip32/ip32-reset.c +++ b/arch/mips/sgi-ip32/ip32-reset.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/parisc/kernel/pdc_chassis.c b/arch/parisc/kernel/pdc_chassis.c index 75ae88d13909..da154406d368 100644 --- a/arch/parisc/kernel/pdc_chassis.c +++ b/arch/parisc/kernel/pdc_chassis.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 74a98fff2c2f..046fe21b5c3b 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -9,6 +9,7 @@ #undef DEBUG #include +#include #include #include #include diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index dba04fbc37a2..36f870dc944f 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/sparc/kernel/sstate.c b/arch/sparc/kernel/sstate.c index ac8677c3841e..3bcc4ddc6911 100644 --- a/arch/sparc/kernel/sstate.c +++ b/arch/sparc/kernel/sstate.c @@ -6,6 +6,7 @@ #include #include +#include #include #include diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index 6d00af25ec6b..328b16f99b30 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 74e07e748a9b..9512253947d5 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 476082a83d1c..ceb12683b6d1 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -9,6 +9,7 @@ #include #include +#include #include #include diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 22f13343b5da..9e5c6f2b044d 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 1e720626069a..d21ba6fa39d9 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c index f03b64d9cb51..7558139920f8 100644 --- a/arch/x86/purgatory/purgatory.c +++ b/arch/x86/purgatory/purgatory.c @@ -9,6 +9,8 @@ */ #include +#include +#include #include #include diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index aa9f50fccc5d..c79bd0af2e8c 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include diff --git a/arch/xtensa/platforms/iss/setup.c b/arch/xtensa/platforms/iss/setup.c index ed519aee0ec8..d3433e1bb94e 100644 --- a/arch/xtensa/platforms/iss/setup.c +++ b/arch/xtensa/platforms/iss/setup.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include diff --git a/drivers/bus/brcmstb_gisb.c b/drivers/bus/brcmstb_gisb.c index 7355fa2cb439..6551286a60cc 100644 --- a/drivers/bus/brcmstb_gisb.c +++ b/drivers/bus/brcmstb_gisb.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index 8a0e97b33cae..e96cb5c4f97a 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include diff --git a/drivers/clk/analogbits/wrpll-cln28hpc.c b/drivers/clk/analogbits/wrpll-cln28hpc.c index 776ead319ae9..7c64ea52a8d5 100644 --- a/drivers/clk/analogbits/wrpll-cln28hpc.c +++ b/drivers/clk/analogbits/wrpll-cln28hpc.c @@ -23,8 +23,12 @@ #include #include +#include #include #include +#include +#include + #include /* MIN_INPUT_FREQ: minimum input clock frequency, in Hz (Fref_min) */ diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 5f7fd79ec82f..61c21bd880a4 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/firmware/google/gsmi.c b/drivers/firmware/google/gsmi.c index bb6e77ee3898..adaa492c3d2d 100644 --- a/drivers/firmware/google/gsmi.c +++ b/drivers/firmware/google/gsmi.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 92cb3f7d21d9..57bbbaa4e8f7 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -25,6 +25,7 @@ #include #include +#include #include #include #include diff --git a/drivers/hwtracing/coresight/coresight-cpu-debug.c b/drivers/hwtracing/coresight/coresight-cpu-debug.c index 2dcf13de751f..9731d3a96073 100644 --- a/drivers/hwtracing/coresight/coresight-cpu-debug.c +++ b/drivers/hwtracing/coresight/coresight-cpu-debug.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/leds/trigger/ledtrig-activity.c b/drivers/leds/trigger/ledtrig-activity.c index 14ba7faaed9e..30bc9df03636 100644 --- a/drivers/leds/trigger/ledtrig-activity.c +++ b/drivers/leds/trigger/ledtrig-activity.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/leds/trigger/ledtrig-heartbeat.c b/drivers/leds/trigger/ledtrig-heartbeat.c index 36b6709afe9f..7fe0a05574d2 100644 --- a/drivers/leds/trigger/ledtrig-heartbeat.c +++ b/drivers/leds/trigger/ledtrig-heartbeat.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/leds/trigger/ledtrig-panic.c b/drivers/leds/trigger/ledtrig-panic.c index 5751cd032f9d..64abf2e91608 100644 --- a/drivers/leds/trigger/ledtrig-panic.c +++ b/drivers/leds/trigger/ledtrig-panic.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include "../leds.h" diff --git a/drivers/misc/bcm-vk/bcm_vk_dev.c b/drivers/misc/bcm-vk/bcm_vk_dev.c index 6bfea3210389..ad639ee85b2a 100644 --- a/drivers/misc/bcm-vk/bcm_vk_dev.c +++ b/drivers/misc/bcm-vk/bcm_vk_dev.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/misc/ibmasm/heartbeat.c b/drivers/misc/ibmasm/heartbeat.c index 4f5f3bdc814d..59c9a0d95659 100644 --- a/drivers/misc/ibmasm/heartbeat.c +++ b/drivers/misc/ibmasm/heartbeat.c @@ -9,6 +9,7 @@ */ #include +#include #include "ibmasm.h" #include "dot_command.h" #include "lowlevel.h" diff --git a/drivers/misc/pvpanic/pvpanic.c b/drivers/misc/pvpanic/pvpanic.c index 65f70a4da8c0..793ea0c01193 100644 --- a/drivers/misc/pvpanic/pvpanic.c +++ b/drivers/misc/pvpanic/pvpanic.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/ipa/ipa_smp2p.c b/drivers/net/ipa/ipa_smp2p.c index a5f7a79a1923..34b68dc43886 100644 --- a/drivers/net/ipa/ipa_smp2p.c +++ b/drivers/net/ipa/ipa_smp2p.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include diff --git a/drivers/parisc/power.c b/drivers/parisc/power.c index ebaf6867b457..456776bd8ee6 100644 --- a/drivers/parisc/power.c +++ b/drivers/parisc/power.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/power/reset/ltc2952-poweroff.c b/drivers/power/reset/ltc2952-poweroff.c index d1495af30081..8688c8ba8894 100644 --- a/drivers/power/reset/ltc2952-poweroff.c +++ b/drivers/power/reset/ltc2952-poweroff.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index 626a6b90fba2..76dd8e2b1e7e 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/s390/char/con3215.c b/drivers/s390/char/con3215.c index 1fd5bca9fa20..02523f4e29f4 100644 --- a/drivers/s390/char/con3215.c +++ b/drivers/s390/char/con3215.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include /* ASYNC_* flags */ #include diff --git a/drivers/s390/char/con3270.c b/drivers/s390/char/con3270.c index e21962c0fd94..87cdbace1453 100644 --- a/drivers/s390/char/con3270.c +++ b/drivers/s390/char/con3270.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c index 986bbbc23d0a..6627820a5eb9 100644 --- a/drivers/s390/char/sclp.c +++ b/drivers/s390/char/sclp.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/s390/char/sclp_con.c b/drivers/s390/char/sclp_con.c index 9b852a47ccc1..cc01a7b8595d 100644 --- a/drivers/s390/char/sclp_con.c +++ b/drivers/s390/char/sclp_con.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/s390/char/sclp_vt220.c b/drivers/s390/char/sclp_vt220.c index 7f4445b0f819..5b8a7b090a97 100644 --- a/drivers/s390/char/sclp_vt220.c +++ b/drivers/s390/char/sclp_vt220.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index bd3c724bf695..b5b0848da93b 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include diff --git a/drivers/soc/bcm/brcmstb/pm/pm-arm.c b/drivers/soc/bcm/brcmstb/pm/pm-arm.c index a673fdffe216..3cbb165d6e30 100644 --- a/drivers/soc/bcm/brcmstb/pm/pm-arm.c +++ b/drivers/soc/bcm/brcmstb/pm/pm-arm.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/staging/olpc_dcon/olpc_dcon.c b/drivers/staging/olpc_dcon/olpc_dcon.c index 6d8e9a481786..7284cb4ac395 100644 --- a/drivers/staging/olpc_dcon/olpc_dcon.c +++ b/drivers/staging/olpc_dcon/olpc_dcon.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c index a7e6eea2c4a1..23999df52739 100644 --- a/drivers/video/fbdev/hyperv_fb.c +++ b/drivers/video/fbdev/hyperv_fb.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h index b402494883b6..f152b9bb916f 100644 --- a/include/asm-generic/bug.h +++ b/include/asm-generic/bug.h @@ -17,7 +17,8 @@ #endif #ifndef __ASSEMBLY__ -#include +#include +#include #ifdef CONFIG_BUG diff --git a/include/linux/kernel.h b/include/linux/kernel.h index bf950621febf..baea2eb763d0 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -72,7 +73,6 @@ #define lower_32_bits(n) ((u32)((n) & 0xffffffff)) struct completion; -struct pt_regs; struct user; #ifdef CONFIG_PREEMPT_VOLUNTARY @@ -177,14 +177,6 @@ void __might_fault(const char *file, int line); static inline void might_fault(void) { } #endif -extern struct atomic_notifier_head panic_notifier_list; -extern long (*panic_blink)(int state); -__printf(1, 2) -void panic(const char *fmt, ...) __noreturn __cold; -void nmi_panic(struct pt_regs *regs, const char *msg); -extern void oops_enter(void); -extern void oops_exit(void); -extern bool oops_may_print(void); void do_exit(long error_code) __noreturn; void complete_and_exit(struct completion *, long) __noreturn; @@ -372,52 +364,8 @@ extern int __kernel_text_address(unsigned long addr); extern int kernel_text_address(unsigned long addr); extern int func_ptr_is_kernel_text(void *ptr); -#ifdef CONFIG_SMP -extern unsigned int sysctl_oops_all_cpu_backtrace; -#else -#define sysctl_oops_all_cpu_backtrace 0 -#endif /* CONFIG_SMP */ - extern void bust_spinlocks(int yes); -extern int panic_timeout; -extern unsigned long panic_print; -extern int panic_on_oops; -extern int panic_on_unrecovered_nmi; -extern int panic_on_io_nmi; -extern int panic_on_warn; -extern unsigned long panic_on_taint; -extern bool panic_on_taint_nousertaint; -extern int sysctl_panic_on_rcu_stall; -extern int sysctl_max_rcu_stall_to_panic; -extern int sysctl_panic_on_stackoverflow; - -extern bool crash_kexec_post_notifiers; -/* - * panic_cpu is used for synchronizing panic() and crash_kexec() execution. It - * holds a CPU number which is executing panic() currently. A value of - * PANIC_CPU_INVALID means no CPU has entered panic() or crash_kexec(). - */ -extern atomic_t panic_cpu; -#define PANIC_CPU_INVALID -1 - -/* - * Only to be used by arch init code. If the user over-wrote the default - * CONFIG_PANIC_TIMEOUT, honor it. - */ -static inline void set_arch_panic_timeout(int timeout, int arch_default_timeout) -{ - if (panic_timeout == arch_default_timeout) - panic_timeout = timeout; -} -extern const char *print_tainted(void); -enum lockdep_ok { - LOCKDEP_STILL_OK, - LOCKDEP_NOW_UNRELIABLE -}; -extern void add_taint(unsigned flag, enum lockdep_ok); -extern int test_taint(unsigned flag); -extern unsigned long get_taint(void); extern int root_mountflags; extern bool early_boot_irqs_disabled; @@ -436,36 +384,6 @@ extern enum system_states { SYSTEM_SUSPEND, } system_state; -/* This cannot be an enum because some may be used in assembly source. */ -#define TAINT_PROPRIETARY_MODULE 0 -#define TAINT_FORCED_MODULE 1 -#define TAINT_CPU_OUT_OF_SPEC 2 -#define TAINT_FORCED_RMMOD 3 -#define TAINT_MACHINE_CHECK 4 -#define TAINT_BAD_PAGE 5 -#define TAINT_USER 6 -#define TAINT_DIE 7 -#define TAINT_OVERRIDDEN_ACPI_TABLE 8 -#define TAINT_WARN 9 -#define TAINT_CRAP 10 -#define TAINT_FIRMWARE_WORKAROUND 11 -#define TAINT_OOT_MODULE 12 -#define TAINT_UNSIGNED_MODULE 13 -#define TAINT_SOFTLOCKUP 14 -#define TAINT_LIVEPATCH 15 -#define TAINT_AUX 16 -#define TAINT_RANDSTRUCT 17 -#define TAINT_FLAGS_COUNT 18 -#define TAINT_FLAGS_MAX ((1UL << TAINT_FLAGS_COUNT) - 1) - -struct taint_flag { - char c_true; /* character printed when tainted */ - char c_false; /* character printed when not tainted */ - bool module; /* also show as a per-module taint flag */ -}; - -extern const struct taint_flag taint_flags[TAINT_FLAGS_COUNT]; - extern const char hex_asc[]; #define hex_asc_lo(x) hex_asc[((x) & 0x0f)] #define hex_asc_hi(x) hex_asc[((x) & 0xf0) >> 4] diff --git a/include/linux/panic.h b/include/linux/panic.h new file mode 100644 index 000000000000..f5844908a089 --- /dev/null +++ b/include/linux/panic.h @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_PANIC_H +#define _LINUX_PANIC_H + +#include +#include + +struct pt_regs; + +extern long (*panic_blink)(int state); +__printf(1, 2) +void panic(const char *fmt, ...) __noreturn __cold; +void nmi_panic(struct pt_regs *regs, const char *msg); +extern void oops_enter(void); +extern void oops_exit(void); +extern bool oops_may_print(void); + +#ifdef CONFIG_SMP +extern unsigned int sysctl_oops_all_cpu_backtrace; +#else +#define sysctl_oops_all_cpu_backtrace 0 +#endif /* CONFIG_SMP */ + +extern int panic_timeout; +extern unsigned long panic_print; +extern int panic_on_oops; +extern int panic_on_unrecovered_nmi; +extern int panic_on_io_nmi; +extern int panic_on_warn; + +extern unsigned long panic_on_taint; +extern bool panic_on_taint_nousertaint; + +extern int sysctl_panic_on_rcu_stall; +extern int sysctl_max_rcu_stall_to_panic; +extern int sysctl_panic_on_stackoverflow; + +extern bool crash_kexec_post_notifiers; + +/* + * panic_cpu is used for synchronizing panic() and crash_kexec() execution. It + * holds a CPU number which is executing panic() currently. A value of + * PANIC_CPU_INVALID means no CPU has entered panic() or crash_kexec(). + */ +extern atomic_t panic_cpu; +#define PANIC_CPU_INVALID -1 + +/* + * Only to be used by arch init code. If the user over-wrote the default + * CONFIG_PANIC_TIMEOUT, honor it. + */ +static inline void set_arch_panic_timeout(int timeout, int arch_default_timeout) +{ + if (panic_timeout == arch_default_timeout) + panic_timeout = timeout; +} + +/* This cannot be an enum because some may be used in assembly source. */ +#define TAINT_PROPRIETARY_MODULE 0 +#define TAINT_FORCED_MODULE 1 +#define TAINT_CPU_OUT_OF_SPEC 2 +#define TAINT_FORCED_RMMOD 3 +#define TAINT_MACHINE_CHECK 4 +#define TAINT_BAD_PAGE 5 +#define TAINT_USER 6 +#define TAINT_DIE 7 +#define TAINT_OVERRIDDEN_ACPI_TABLE 8 +#define TAINT_WARN 9 +#define TAINT_CRAP 10 +#define TAINT_FIRMWARE_WORKAROUND 11 +#define TAINT_OOT_MODULE 12 +#define TAINT_UNSIGNED_MODULE 13 +#define TAINT_SOFTLOCKUP 14 +#define TAINT_LIVEPATCH 15 +#define TAINT_AUX 16 +#define TAINT_RANDSTRUCT 17 +#define TAINT_FLAGS_COUNT 18 +#define TAINT_FLAGS_MAX ((1UL << TAINT_FLAGS_COUNT) - 1) + +struct taint_flag { + char c_true; /* character printed when tainted */ + char c_false; /* character printed when not tainted */ + bool module; /* also show as a per-module taint flag */ +}; + +extern const struct taint_flag taint_flags[TAINT_FLAGS_COUNT]; + +enum lockdep_ok { + LOCKDEP_STILL_OK, + LOCKDEP_NOW_UNRELIABLE, +}; + +extern const char *print_tainted(void); +extern void add_taint(unsigned flag, enum lockdep_ok); +extern int test_taint(unsigned flag); +extern unsigned long get_taint(void); + +#endif /* _LINUX_PANIC_H */ diff --git a/include/linux/panic_notifier.h b/include/linux/panic_notifier.h new file mode 100644 index 000000000000..41e32483d7a7 --- /dev/null +++ b/include/linux/panic_notifier.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_PANIC_NOTIFIERS_H +#define _LINUX_PANIC_NOTIFIERS_H + +#include +#include + +extern struct atomic_notifier_head panic_notifier_list; + +extern bool crash_kexec_post_notifiers; + +#endif /* _LINUX_PANIC_NOTIFIERS_H */ diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 157762db9d4b..0999f6317978 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -9,6 +9,7 @@ #define _LINUX_THREAD_INFO_H #include +#include #include #include #include diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 396ebaebea3f..ebc641664eb0 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index f099baee3578..4b34a9aa32bc 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/panic.c b/kernel/panic.c index 332736a72a58..edad89660a2b 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 8e78b2430c16..f12056beb916 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -32,6 +32,8 @@ #include #include #include +#include +#include #include #include #include diff --git a/kernel/sysctl.c b/kernel/sysctl.c index bb37739f5731..70e286c83d6b 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d23a09d3eb37..3c1384bc5c5a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3-59-g8ed1b