aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--mm/Kconfig6
-rw-r--r--mm/page_alloc.c46
2 files changed, 43 insertions, 9 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 3af64646f343..e3490ecac839 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -747,13 +747,13 @@ config DEFERRED_STRUCT_PAGE_INIT
depends on SPARSEMEM
depends on !NEED_PER_CPU_KM
depends on 64BIT
+ select PADATA
help
Ordinarily all struct pages are initialised during early boot in a
single thread. On very large machines this can take a considerable
amount of time. If this option is set, large machines will bring up
- a subset of memmap at boot and then initialise the rest in parallel
- by starting one-off "pgdatinitX" kernel thread for each node X. This
- has a potential performance impact on processes running early in the
+ a subset of memmap at boot and then initialise the rest in parallel.
+ This has a potential performance impact on tasks running early in the
lifetime of the system until these kthreads finish the
initialisation.
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 89bd57241e08..27ec5dc4db33 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -68,6 +68,7 @@
#include <linux/lockdep.h>
#include <linux/nmi.h>
#include <linux/psi.h>
+#include <linux/padata.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
@@ -1815,6 +1816,26 @@ deferred_init_maxorder(u64 *i, struct zone *zone, unsigned long *start_pfn,
return nr_pages;
}
+static void __init
+deferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn,
+ void *arg)
+{
+ unsigned long spfn, epfn;
+ struct zone *zone = arg;
+ u64 i;
+
+ deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, start_pfn);
+
+ /*
+ * Initialize and free pages in MAX_ORDER sized increments so that we
+ * can avoid introducing any issues with the buddy allocator.
+ */
+ while (spfn < end_pfn) {
+ deferred_init_maxorder(&i, zone, &spfn, &epfn);
+ cond_resched();
+ }
+}
+
/* Initialise remaining memory on a node */
static int __init deferred_init_memmap(void *data)
{
@@ -1824,7 +1845,7 @@ static int __init deferred_init_memmap(void *data)
unsigned long first_init_pfn, flags;
unsigned long start = jiffies;
struct zone *zone;
- int zid;
+ int zid, max_threads;
u64 i;
/* Bind memory initialisation thread to a local node if possible */
@@ -1864,13 +1885,26 @@ static int __init deferred_init_memmap(void *data)
goto zone_empty;
/*
- * Initialize and free pages in MAX_ORDER sized increments so
- * that we can avoid introducing any issues with the buddy
- * allocator.
+ * More CPUs always led to greater speedups on tested systems, up to
+ * all the nodes' CPUs. Use all since the system is otherwise idle now.
*/
+ max_threads = max(cpumask_weight(cpumask), 1u);
+
while (spfn < epfn) {
- deferred_init_maxorder(&i, zone, &spfn, &epfn);
- cond_resched();
+ unsigned long epfn_align = ALIGN(epfn, PAGES_PER_SECTION);
+ struct padata_mt_job job = {
+ .thread_fn = deferred_init_memmap_chunk,
+ .fn_arg = zone,
+ .start = spfn,
+ .size = epfn_align - spfn,
+ .align = PAGES_PER_SECTION,
+ .min_chunk = PAGES_PER_SECTION,
+ .max_threads = max_threads,
+ };
+
+ padata_do_multithreaded(&job);
+ deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn,
+ epfn_align);
}
zone_empty:
/* Sanity check that the next zone really is unpopulated */