From b24413180f5600bcb3bb70fbed5cf186b60864bd Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 1 Nov 2017 15:07:57 +0100
Subject: License cleanup: add SPDX GPL-2.0 license identifier to files with no
 license

Many source files in the tree are missing licensing information, which
makes it harder for compliance tools to determine the correct license.

By default all files without license information are under the default
license of the kernel, which is GPL version 2.

Update the files which contain no license information with the 'GPL-2.0'
SPDX license identifier.  The SPDX identifier is a legally binding
shorthand, which can be used instead of the full boiler plate text.

This patch is based on work done by Thomas Gleixner and Kate Stewart and
Philippe Ombredanne.

How this work was done:

Patches were generated and checked against linux-4.14-rc6 for a subset of
the use cases:
 - file had no licensing information it it.
 - file was a */uapi/* one with no licensing information in it,
 - file was a */uapi/* one with existing licensing information,

Further patches will be generated in subsequent months to fix up cases
where non-standard license headers were used, and references to license
had to be inferred by heuristics based on keywords.

The analysis to determine which SPDX License Identifier to be applied to
a file was done in a spreadsheet of side by side results from of the
output of two independent scanners (ScanCode & Windriver) producing SPDX
tag:value files created by Philippe Ombredanne.  Philippe prepared the
base worksheet, and did an initial spot review of a few 1000 files.

The 4.13 kernel was the starting point of the analysis with 60,537 files
assessed.  Kate Stewart did a file by file comparison of the scanner
results in the spreadsheet to determine which SPDX license identifier(s)
to be applied to the file. She confirmed any determination that was not
immediately clear with lawyers working with the Linux Foundation.

Criteria used to select files for SPDX license identifier tagging was:
 - Files considered eligible had to be source code files.
 - Make and config files were included as candidates if they contained >5
   lines of source
 - File already had some variant of a license header in it (even if <5
   lines).

All documentation files were explicitly excluded.

The following heuristics were used to determine which SPDX license
identifiers to apply.

 - when both scanners couldn't find any license traces, file was
   considered to have no license information in it, and the top level
   COPYING file license applied.

   For non */uapi/* files that summary was:

   SPDX license identifier                            # files
   ---------------------------------------------------|-------
   GPL-2.0                                              11139

   and resulted in the first patch in this series.

   If that file was a */uapi/* path one, it was "GPL-2.0 WITH
   Linux-syscall-note" otherwise it was "GPL-2.0".  Results of that was:

   SPDX license identifier                            # files
   ---------------------------------------------------|-------
   GPL-2.0 WITH Linux-syscall-note                        930

   and resulted in the second patch in this series.

 - if a file had some form of licensing information in it, and was one
   of the */uapi/* ones, it was denoted with the Linux-syscall-note if
   any GPL family license was found in the file or had no licensing in
   it (per prior point).  Results summary:

   SPDX license identifier                            # files
   ---------------------------------------------------|------
   GPL-2.0 WITH Linux-syscall-note                       270
   GPL-2.0+ WITH Linux-syscall-note                      169
   ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause)    21
   ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)    17
   LGPL-2.1+ WITH Linux-syscall-note                      15
   GPL-1.0+ WITH Linux-syscall-note                       14
   ((GPL-2.0+ WITH Linux-syscall-note) OR BSD-3-Clause)    5
   LGPL-2.0+ WITH Linux-syscall-note                       4
   LGPL-2.1 WITH Linux-syscall-note                        3
   ((GPL-2.0 WITH Linux-syscall-note) OR MIT)              3
   ((GPL-2.0 WITH Linux-syscall-note) AND MIT)             1

   and that resulted in the third patch in this series.

 - when the two scanners agreed on the detected license(s), that became
   the concluded license(s).

 - when there was disagreement between the two scanners (one detected a
   license but the other didn't, or they both detected different
   licenses) a manual inspection of the file occurred.

 - In most cases a manual inspection of the information in the file
   resulted in a clear resolution of the license that should apply (and
   which scanner probably needed to revisit its heuristics).

 - When it was not immediately clear, the license identifier was
   confirmed with lawyers working with the Linux Foundation.

 - If there was any question as to the appropriate license identifier,
   the file was flagged for further research and to be revisited later
   in time.

In total, over 70 hours of logged manual review was done on the
spreadsheet to determine the SPDX license identifiers to apply to the
source files by Kate, Philippe, Thomas and, in some cases, confirmation
by lawyers working with the Linux Foundation.

Kate also obtained a third independent scan of the 4.13 code base from
FOSSology, and compared selected files where the other two scanners
disagreed against that SPDX file, to see if there was new insights.  The
Windriver scanner is based on an older version of FOSSology in part, so
they are related.

Thomas did random spot checks in about 500 files from the spreadsheets
for the uapi headers and agreed with SPDX license identifier in the
files he inspected. For the non-uapi files Thomas did random spot checks
in about 15000 files.

In initial set of patches against 4.14-rc6, 3 files were found to have
copy/paste license identifier errors, and have been fixed to reflect the
correct identifier.

Additionally Philippe spent 10 hours this week doing a detailed manual
inspection and review of the 12,461 patched files from the initial patch
version early this week with:
 - a full scancode scan run, collecting the matched texts, detected
   license ids and scores
 - reviewing anything where there was a license detected (about 500+
   files) to ensure that the applied SPDX license was correct
 - reviewing anything where there was no detection but the patch license
   was not GPL-2.0 WITH Linux-syscall-note to ensure that the applied
   SPDX license was correct

This produced a worksheet with 20 files needing minor correction.  This
worksheet was then exported into 3 different .csv files for the
different types of files to be modified.

These .csv files were then reviewed by Greg.  Thomas wrote a script to
parse the csv files and add the proper SPDX tag to the file, in the
format that the file expected.  This script was further refined by Greg
based on the output to detect more types of files automatically and to
distinguish between header and source .c files (which need different
comment types.)  Finally Greg ran the script using the .csv files to
generate the patches.

Reviewed-by: Kate Stewart <kstewart@linuxfoundation.org>
Reviewed-by: Philippe Ombredanne <pombredanne@nexb.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/debug.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'mm/debug.c')

diff --git a/mm/debug.c b/mm/debug.c
index 5715448ab0b5..6726bec731c9 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * mm/debug.c
  *
-- 
cgit v1.3-8-gc7d7


From b4e98d9ac775907cc53fb08fcb6776deb7694e30 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Wed, 15 Nov 2017 17:35:33 -0800
Subject: mm: account pud page tables

On a machine with 5-level paging support a process can allocate
significant amount of memory and stay unnoticed by oom-killer and memory
cgroup.  The trick is to allocate a lot of PUD page tables.  We don't
account PUD page tables, only PMD and PTE.

We already addressed the same issue for PMD page tables, see commit
dc6c9a35b66b ("mm: account pmd page tables to the process").
Introduction of 5-level paging brings the same issue for PUD page
tables.

The patch expands accounting to PUD level.

[kirill.shutemov@linux.intel.com: s/pmd_t/pud_t/]
  Link: http://lkml.kernel.org/r/20171004074305.x35eh5u7ybbt5kar@black.fi.intel.com
[heiko.carstens@de.ibm.com: s390/mm: fix pud table accounting]
  Link: http://lkml.kernel.org/r/20171103090551.18231-1-heiko.carstens@de.ibm.com
Link: http://lkml.kernel.org/r/20171002080427.3320-1-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/sysctl/vm.txt         |  8 ++++----
 arch/powerpc/mm/hugetlbpage.c       |  1 +
 arch/s390/include/asm/mmu_context.h |  4 +++-
 arch/sparc/mm/hugetlbpage.c         |  1 +
 fs/proc/task_mmu.c                  |  5 ++++-
 include/linux/mm.h                  | 36 +++++++++++++++++++++++++++++++++---
 include/linux/mm_types.h            |  3 +++
 kernel/fork.c                       |  4 ++++
 mm/debug.c                          |  6 ++++--
 mm/memory.c                         | 15 +++++++++------
 mm/oom_kill.c                       |  8 +++++---
 11 files changed, 71 insertions(+), 20 deletions(-)

(limited to 'mm/debug.c')

diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 30fd16b14196..2729e8db9492 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -629,10 +629,10 @@ oom_dump_tasks
 
 Enables a system-wide task dump (excluding kernel threads) to be produced
 when the kernel performs an OOM-killing and includes such information as
-pid, uid, tgid, vm size, rss, nr_ptes, nr_pmds, swapents, oom_score_adj
-score, and name.  This is helpful to determine why the OOM killer was
-invoked, to identify the rogue task that caused it, and to determine why
-the OOM killer chose the task it did to kill.
+pid, uid, tgid, vm size, rss, nr_ptes, nr_pmds, nr_puds, swapents,
+oom_score_adj score, and name.  This is helpful to determine why the OOM
+killer was invoked, to identify the rogue task that caused it, and to
+determine why the OOM killer chose the task it did to kill.
 
 If this is set to zero, this information is suppressed.  On very
 large systems with thousands of tasks it may not be feasible to dump
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 1571a498a33f..a9b9083c5e49 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -433,6 +433,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
 	pud = pud_offset(pgd, start);
 	pgd_clear(pgd);
 	pud_free_tlb(tlb, pud, start);
+	mm_dec_nr_puds(tlb->mm);
 }
 
 /*
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 43607bb12cc2..cf4c1cb17dcd 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -44,6 +44,8 @@ static inline int init_new_context(struct task_struct *tsk,
 		mm->context.asce_limit = STACK_TOP_MAX;
 		mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
 				   _ASCE_USER_BITS | _ASCE_TYPE_REGION3;
+		/* pgd_alloc() did not account this pud */
+		mm_inc_nr_puds(mm);
 		break;
 	case -PAGE_SIZE:
 		/* forked 5-level task, set new asce with new_mm->pgd */
@@ -59,7 +61,7 @@ static inline int init_new_context(struct task_struct *tsk,
 		/* forked 2-level compat task, set new asce with new mm->pgd */
 		mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
 				   _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
-		/* pgd_alloc() did not increase mm->nr_pmds */
+		/* pgd_alloc() did not account this pmd */
 		mm_inc_nr_pmds(mm);
 	}
 	crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index 5078b7f68890..01f63b4ee2b4 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -472,6 +472,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
 	pud = pud_offset(pgd, start);
 	pgd_clear(pgd);
 	pud_free_tlb(tlb, pud, start);
+	mm_dec_nr_puds(tlb->mm);
 }
 
 void hugetlb_free_pgd_range(struct mmu_gather *tlb,
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 6744bd706ecf..a05ce6186f99 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -26,7 +26,7 @@
 
 void task_mem(struct seq_file *m, struct mm_struct *mm)
 {
-	unsigned long text, lib, swap, ptes, pmds, anon, file, shmem;
+	unsigned long text, lib, swap, ptes, pmds, puds, anon, file, shmem;
 	unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss;
 
 	anon = get_mm_counter(mm, MM_ANONPAGES);
@@ -52,6 +52,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
 	swap = get_mm_counter(mm, MM_SWAPENTS);
 	ptes = PTRS_PER_PTE * sizeof(pte_t) * atomic_long_read(&mm->nr_ptes);
 	pmds = PTRS_PER_PMD * sizeof(pmd_t) * mm_nr_pmds(mm);
+	puds = PTRS_PER_PUD * sizeof(pud_t) * mm_nr_puds(mm);
 	seq_printf(m,
 		"VmPeak:\t%8lu kB\n"
 		"VmSize:\t%8lu kB\n"
@@ -68,6 +69,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
 		"VmLib:\t%8lu kB\n"
 		"VmPTE:\t%8lu kB\n"
 		"VmPMD:\t%8lu kB\n"
+		"VmPUD:\t%8lu kB\n"
 		"VmSwap:\t%8lu kB\n",
 		hiwater_vm << (PAGE_SHIFT-10),
 		total_vm << (PAGE_SHIFT-10),
@@ -82,6 +84,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
 		mm->stack_vm << (PAGE_SHIFT-10), text, lib,
 		ptes >> 10,
 		pmds >> 10,
+		puds >> 10,
 		swap << (PAGE_SHIFT-10));
 	hugetlb_report_usage(m, mm);
 }
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 91b46f99b4d2..9af86f39d928 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1599,14 +1599,44 @@ static inline int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd,
 int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);
 #endif
 
-#ifdef __PAGETABLE_PUD_FOLDED
+#if defined(__PAGETABLE_PUD_FOLDED) || !defined(CONFIG_MMU)
 static inline int __pud_alloc(struct mm_struct *mm, p4d_t *p4d,
 						unsigned long address)
 {
 	return 0;
 }
+
+static inline unsigned long mm_nr_puds(const struct mm_struct *mm)
+{
+	return 0;
+}
+
+static inline void mm_nr_puds_init(struct mm_struct *mm) {}
+static inline void mm_inc_nr_puds(struct mm_struct *mm) {}
+static inline void mm_dec_nr_puds(struct mm_struct *mm) {}
+
 #else
 int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address);
+
+static inline void mm_nr_puds_init(struct mm_struct *mm)
+{
+	atomic_long_set(&mm->nr_puds, 0);
+}
+
+static inline unsigned long mm_nr_puds(const struct mm_struct *mm)
+{
+	return atomic_long_read(&mm->nr_puds);
+}
+
+static inline void mm_inc_nr_puds(struct mm_struct *mm)
+{
+	atomic_long_inc(&mm->nr_puds);
+}
+
+static inline void mm_dec_nr_puds(struct mm_struct *mm)
+{
+	atomic_long_dec(&mm->nr_puds);
+}
 #endif
 
 #if defined(__PAGETABLE_PMD_FOLDED) || !defined(CONFIG_MMU)
@@ -1618,7 +1648,7 @@ static inline int __pmd_alloc(struct mm_struct *mm, pud_t *pud,
 
 static inline void mm_nr_pmds_init(struct mm_struct *mm) {}
 
-static inline unsigned long mm_nr_pmds(struct mm_struct *mm)
+static inline unsigned long mm_nr_pmds(const struct mm_struct *mm)
 {
 	return 0;
 }
@@ -1634,7 +1664,7 @@ static inline void mm_nr_pmds_init(struct mm_struct *mm)
 	atomic_long_set(&mm->nr_pmds, 0);
 }
 
-static inline unsigned long mm_nr_pmds(struct mm_struct *mm)
+static inline unsigned long mm_nr_pmds(const struct mm_struct *mm)
 {
 	return atomic_long_read(&mm->nr_pmds);
 }
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index d1b8e8f97fc2..e9e561e02d22 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -404,6 +404,9 @@ struct mm_struct {
 	atomic_long_t nr_ptes;			/* PTE page table pages */
 #if CONFIG_PGTABLE_LEVELS > 2
 	atomic_long_t nr_pmds;			/* PMD page table pages */
+#endif
+#if CONFIG_PGTABLE_LEVELS > 3
+	atomic_long_t nr_puds;			/* PUD page table pages */
 #endif
 	int map_count;				/* number of VMAs */
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 07cc743698d3..a4eb6f289365 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -819,6 +819,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 	mm->core_state = NULL;
 	atomic_long_set(&mm->nr_ptes, 0);
 	mm_nr_pmds_init(mm);
+	mm_nr_puds_init(mm);
 	mm->map_count = 0;
 	mm->locked_vm = 0;
 	mm->pinned_vm = 0;
@@ -878,6 +879,9 @@ static void check_mm(struct mm_struct *mm)
 	if (mm_nr_pmds(mm))
 		pr_alert("BUG: non-zero nr_pmds on freeing mm: %ld\n",
 				mm_nr_pmds(mm));
+	if (mm_nr_puds(mm))
+		pr_alert("BUG: non-zero nr_puds on freeing mm: %ld\n",
+				mm_nr_puds(mm));
 
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
 	VM_BUG_ON_MM(mm->pmd_huge_pte, mm);
diff --git a/mm/debug.c b/mm/debug.c
index 6726bec731c9..a12d826bb774 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -105,7 +105,8 @@ void dump_mm(const struct mm_struct *mm)
 		"get_unmapped_area %p\n"
 #endif
 		"mmap_base %lu mmap_legacy_base %lu highest_vm_end %lu\n"
-		"pgd %p mm_users %d mm_count %d nr_ptes %lu nr_pmds %lu map_count %d\n"
+		"pgd %p mm_users %d mm_count %d\n"
+		"nr_ptes %lu nr_pmds %lu nr_puds %lu map_count %d\n"
 		"hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n"
 		"pinned_vm %lx data_vm %lx exec_vm %lx stack_vm %lx\n"
 		"start_code %lx end_code %lx start_data %lx end_data %lx\n"
@@ -136,7 +137,8 @@ void dump_mm(const struct mm_struct *mm)
 		mm->pgd, atomic_read(&mm->mm_users),
 		atomic_read(&mm->mm_count),
 		atomic_long_read((atomic_long_t *)&mm->nr_ptes),
-		mm_nr_pmds((struct mm_struct *)mm),
+		mm_nr_pmds(mm),
+		mm_nr_puds(mm),
 		mm->map_count,
 		mm->hiwater_rss, mm->hiwater_vm, mm->total_vm, mm->locked_vm,
 		mm->pinned_vm, mm->data_vm, mm->exec_vm, mm->stack_vm,
diff --git a/mm/memory.c b/mm/memory.c
index 42fb30300bb5..6bbd4078ec98 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -506,6 +506,7 @@ static inline void free_pud_range(struct mmu_gather *tlb, p4d_t *p4d,
 	pud = pud_offset(p4d, start);
 	p4d_clear(p4d);
 	pud_free_tlb(tlb, pud, start);
+	mm_dec_nr_puds(tlb->mm);
 }
 
 static inline void free_p4d_range(struct mmu_gather *tlb, pgd_t *pgd,
@@ -4149,15 +4150,17 @@ int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address)
 
 	spin_lock(&mm->page_table_lock);
 #ifndef __ARCH_HAS_5LEVEL_HACK
-	if (p4d_present(*p4d))		/* Another has populated it */
-		pud_free(mm, new);
-	else
+	if (!p4d_present(*p4d)) {
+		mm_inc_nr_puds(mm);
 		p4d_populate(mm, p4d, new);
-#else
-	if (pgd_present(*p4d))		/* Another has populated it */
+	} else	/* Another has populated it */
 		pud_free(mm, new);
-	else
+#else
+	if (!pgd_present(*p4d)) {
+		mm_inc_nr_puds(mm);
 		pgd_populate(mm, p4d, new);
+	} else	/* Another has populated it */
+		pud_free(mm, new);
 #endif /* __ARCH_HAS_5LEVEL_HACK */
 	spin_unlock(&mm->page_table_lock);
 	return 0;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 3023919970f7..f642a45b7f14 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -221,7 +221,8 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
 	 * task's rss, pagetable and swap space use.
 	 */
 	points = get_mm_rss(p->mm) + get_mm_counter(p->mm, MM_SWAPENTS) +
-		atomic_long_read(&p->mm->nr_ptes) + mm_nr_pmds(p->mm);
+		atomic_long_read(&p->mm->nr_ptes) + mm_nr_pmds(p->mm) +
+		mm_nr_puds(p->mm);
 	task_unlock(p);
 
 	/*
@@ -397,7 +398,7 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
 	struct task_struct *p;
 	struct task_struct *task;
 
-	pr_info("[ pid ]   uid  tgid total_vm      rss nr_ptes nr_pmds swapents oom_score_adj name\n");
+	pr_info("[ pid ]   uid  tgid total_vm      rss nr_ptes nr_pmds nr_puds swapents oom_score_adj name\n");
 	rcu_read_lock();
 	for_each_process(p) {
 		if (oom_unkillable_task(p, memcg, nodemask))
@@ -413,11 +414,12 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
 			continue;
 		}
 
-		pr_info("[%5d] %5d %5d %8lu %8lu %7ld %7ld %8lu         %5hd %s\n",
+		pr_info("[%5d] %5d %5d %8lu %8lu %7ld %7ld %7ld %8lu         %5hd %s\n",
 			task->pid, from_kuid(&init_user_ns, task_uid(task)),
 			task->tgid, task->mm->total_vm, get_mm_rss(task->mm),
 			atomic_long_read(&task->mm->nr_ptes),
 			mm_nr_pmds(task->mm),
+			mm_nr_puds(task->mm),
 			get_mm_counter(task->mm, MM_SWAPENTS),
 			task->signal->oom_score_adj, task->comm);
 		task_unlock(task);
-- 
cgit v1.3-8-gc7d7


From c4812909f5d5a9b7f1c85a2d95be388a066cda52 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Wed, 15 Nov 2017 17:35:37 -0800
Subject: mm: introduce wrappers to access mm->nr_ptes

Let's add wrappers for ->nr_ptes with the same interface as for nr_pmd
and nr_pud.

The patch also makes nr_ptes accounting dependent onto CONFIG_MMU.  Page
table accounting doesn't make sense if you don't have page tables.

It's preparation for consolidation of page-table counters in mm_struct.

Link: http://lkml.kernel.org/r/20171006100651.44742-1-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/mm/pgd.c           |  2 +-
 arch/sparc/mm/hugetlbpage.c |  2 +-
 arch/unicore32/mm/pgd.c     |  2 +-
 fs/proc/task_mmu.c          |  2 +-
 include/linux/mm.h          | 32 ++++++++++++++++++++++++++++++++
 include/linux/mm_types.h    |  2 ++
 kernel/fork.c               |  6 +++---
 mm/debug.c                  |  2 +-
 mm/huge_memory.c            | 10 +++++-----
 mm/khugepaged.c             |  2 +-
 mm/memory.c                 |  8 ++++----
 mm/oom_kill.c               |  5 ++---
 12 files changed, 54 insertions(+), 21 deletions(-)

(limited to 'mm/debug.c')

diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c
index c1c1a5c67da1..61e281cb29fb 100644
--- a/arch/arm/mm/pgd.c
+++ b/arch/arm/mm/pgd.c
@@ -141,7 +141,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd_base)
 	pte = pmd_pgtable(*pmd);
 	pmd_clear(pmd);
 	pte_free(mm, pte);
-	atomic_long_dec(&mm->nr_ptes);
+	mm_dec_nr_ptes(mm);
 no_pmd:
 	pud_clear(pud);
 	pmd_free(mm, pmd);
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index 01f63b4ee2b4..0112d6942288 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -397,7 +397,7 @@ static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
 
 	pmd_clear(pmd);
 	pte_free_tlb(tlb, token, addr);
-	atomic_long_dec(&tlb->mm->nr_ptes);
+	mm_dec_nr_ptes(tlb->mm);
 }
 
 static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
diff --git a/arch/unicore32/mm/pgd.c b/arch/unicore32/mm/pgd.c
index c572a28c76c9..a830a300aaa1 100644
--- a/arch/unicore32/mm/pgd.c
+++ b/arch/unicore32/mm/pgd.c
@@ -97,7 +97,7 @@ void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd)
 	pte = pmd_pgtable(*pmd);
 	pmd_clear(pmd);
 	pte_free(mm, pte);
-	atomic_long_dec(&mm->nr_ptes);
+	mm_dec_nr_ptes(mm);
 	pmd_free(mm, pmd);
 	mm_dec_nr_pmds(mm);
 free:
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index a05ce6186f99..9bd2a0294ac1 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -50,7 +50,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
 	text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10;
 	lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text;
 	swap = get_mm_counter(mm, MM_SWAPENTS);
-	ptes = PTRS_PER_PTE * sizeof(pte_t) * atomic_long_read(&mm->nr_ptes);
+	ptes = PTRS_PER_PTE * sizeof(pte_t) * mm_nr_ptes(mm);
 	pmds = PTRS_PER_PMD * sizeof(pmd_t) * mm_nr_pmds(mm);
 	puds = PTRS_PER_PUD * sizeof(pud_t) * mm_nr_puds(mm);
 	seq_printf(m,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9af86f39d928..2ca799f0d762 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1680,6 +1680,38 @@ static inline void mm_dec_nr_pmds(struct mm_struct *mm)
 }
 #endif
 
+#ifdef CONFIG_MMU
+static inline void mm_nr_ptes_init(struct mm_struct *mm)
+{
+	atomic_long_set(&mm->nr_ptes, 0);
+}
+
+static inline unsigned long mm_nr_ptes(const struct mm_struct *mm)
+{
+	return atomic_long_read(&mm->nr_ptes);
+}
+
+static inline void mm_inc_nr_ptes(struct mm_struct *mm)
+{
+	atomic_long_inc(&mm->nr_ptes);
+}
+
+static inline void mm_dec_nr_ptes(struct mm_struct *mm)
+{
+	atomic_long_dec(&mm->nr_ptes);
+}
+#else
+static inline void mm_nr_ptes_init(struct mm_struct *mm) {}
+
+static inline unsigned long mm_nr_ptes(const struct mm_struct *mm)
+{
+	return 0;
+}
+
+static inline void mm_inc_nr_ptes(struct mm_struct *mm) {}
+static inline void mm_dec_nr_ptes(struct mm_struct *mm) {}
+#endif
+
 int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
 int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index e9e561e02d22..e42048020664 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -401,7 +401,9 @@ struct mm_struct {
 	 */
 	atomic_t mm_count;
 
+#ifdef CONFIG_MMU
 	atomic_long_t nr_ptes;			/* PTE page table pages */
+#endif
 #if CONFIG_PGTABLE_LEVELS > 2
 	atomic_long_t nr_pmds;			/* PMD page table pages */
 #endif
diff --git a/kernel/fork.c b/kernel/fork.c
index a4eb6f289365..946922a30ede 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -817,7 +817,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 	init_rwsem(&mm->mmap_sem);
 	INIT_LIST_HEAD(&mm->mmlist);
 	mm->core_state = NULL;
-	atomic_long_set(&mm->nr_ptes, 0);
+	mm_nr_ptes_init(mm);
 	mm_nr_pmds_init(mm);
 	mm_nr_puds_init(mm);
 	mm->map_count = 0;
@@ -873,9 +873,9 @@ static void check_mm(struct mm_struct *mm)
 					  "mm:%p idx:%d val:%ld\n", mm, i, x);
 	}
 
-	if (atomic_long_read(&mm->nr_ptes))
+	if (mm_nr_ptes(mm))
 		pr_alert("BUG: non-zero nr_ptes on freeing mm: %ld\n",
-				atomic_long_read(&mm->nr_ptes));
+				mm_nr_ptes(mm));
 	if (mm_nr_pmds(mm))
 		pr_alert("BUG: non-zero nr_pmds on freeing mm: %ld\n",
 				mm_nr_pmds(mm));
diff --git a/mm/debug.c b/mm/debug.c
index a12d826bb774..c9888a6d7875 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -136,7 +136,7 @@ void dump_mm(const struct mm_struct *mm)
 		mm->mmap_base, mm->mmap_legacy_base, mm->highest_vm_end,
 		mm->pgd, atomic_read(&mm->mm_users),
 		atomic_read(&mm->mm_count),
-		atomic_long_read((atomic_long_t *)&mm->nr_ptes),
+		mm_nr_ptes(mm),
 		mm_nr_pmds(mm),
 		mm_nr_puds(mm),
 		mm->map_count,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index cc65fb87c9db..3610d81c062a 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -606,7 +606,7 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
 		pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
 		set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
 		add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
-		atomic_long_inc(&vma->vm_mm->nr_ptes);
+		mm_inc_nr_ptes(vma->vm_mm);
 		spin_unlock(vmf->ptl);
 		count_vm_event(THP_FAULT_ALLOC);
 	}
@@ -662,7 +662,7 @@ static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
 	if (pgtable)
 		pgtable_trans_huge_deposit(mm, pmd, pgtable);
 	set_pmd_at(mm, haddr, pmd, entry);
-	atomic_long_inc(&mm->nr_ptes);
+	mm_inc_nr_ptes(mm);
 	return true;
 }
 
@@ -747,7 +747,7 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
 
 	if (pgtable) {
 		pgtable_trans_huge_deposit(mm, pmd, pgtable);
-		atomic_long_inc(&mm->nr_ptes);
+		mm_inc_nr_ptes(mm);
 	}
 
 	set_pmd_at(mm, addr, pmd, entry);
@@ -978,7 +978,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	get_page(src_page);
 	page_dup_rmap(src_page, true);
 	add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR);
-	atomic_long_inc(&dst_mm->nr_ptes);
+	mm_inc_nr_ptes(dst_mm);
 	pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable);
 
 	pmdp_set_wrprotect(src_mm, addr, src_pmd);
@@ -1695,7 +1695,7 @@ static inline void zap_deposited_table(struct mm_struct *mm, pmd_t *pmd)
 
 	pgtable = pgtable_trans_huge_withdraw(mm, pmd);
 	pte_free(mm, pgtable);
-	atomic_long_dec(&mm->nr_ptes);
+	mm_dec_nr_ptes(mm);
 }
 
 int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 43cb3043311b..ea4ff259b671 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1270,7 +1270,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
 			_pmd = pmdp_collapse_flush(vma, addr, pmd);
 			spin_unlock(ptl);
 			up_write(&vma->vm_mm->mmap_sem);
-			atomic_long_dec(&vma->vm_mm->nr_ptes);
+			mm_dec_nr_ptes(vma->vm_mm);
 			pte_free(vma->vm_mm, pmd_pgtable(_pmd));
 		}
 	}
diff --git a/mm/memory.c b/mm/memory.c
index 6bbd4078ec98..6dec21b182b0 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -438,7 +438,7 @@ static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
 	pgtable_t token = pmd_pgtable(*pmd);
 	pmd_clear(pmd);
 	pte_free_tlb(tlb, token, addr);
-	atomic_long_dec(&tlb->mm->nr_ptes);
+	mm_dec_nr_ptes(tlb->mm);
 }
 
 static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
@@ -666,7 +666,7 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
 
 	ptl = pmd_lock(mm, pmd);
 	if (likely(pmd_none(*pmd))) {	/* Has another populated it ? */
-		atomic_long_inc(&mm->nr_ptes);
+		mm_inc_nr_ptes(mm);
 		pmd_populate(mm, pmd, new);
 		new = NULL;
 	}
@@ -3238,7 +3238,7 @@ static int pte_alloc_one_map(struct vm_fault *vmf)
 			goto map_pte;
 		}
 
-		atomic_long_inc(&vma->vm_mm->nr_ptes);
+		mm_inc_nr_ptes(vma->vm_mm);
 		pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
 		spin_unlock(vmf->ptl);
 		vmf->prealloc_pte = NULL;
@@ -3297,7 +3297,7 @@ static void deposit_prealloc_pte(struct vm_fault *vmf)
 	 * We are going to consume the prealloc table,
 	 * count that as nr_ptes.
 	 */
-	atomic_long_inc(&vma->vm_mm->nr_ptes);
+	mm_inc_nr_ptes(vma->vm_mm);
 	vmf->prealloc_pte = NULL;
 }
 
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index f642a45b7f14..f9300141480e 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -221,8 +221,7 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
 	 * task's rss, pagetable and swap space use.
 	 */
 	points = get_mm_rss(p->mm) + get_mm_counter(p->mm, MM_SWAPENTS) +
-		atomic_long_read(&p->mm->nr_ptes) + mm_nr_pmds(p->mm) +
-		mm_nr_puds(p->mm);
+		mm_nr_ptes(p->mm) + mm_nr_pmds(p->mm) + mm_nr_puds(p->mm);
 	task_unlock(p);
 
 	/*
@@ -417,7 +416,7 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
 		pr_info("[%5d] %5d %5d %8lu %8lu %7ld %7ld %7ld %8lu         %5hd %s\n",
 			task->pid, from_kuid(&init_user_ns, task_uid(task)),
 			task->tgid, task->mm->total_vm, get_mm_rss(task->mm),
-			atomic_long_read(&task->mm->nr_ptes),
+			mm_nr_ptes(task->mm),
 			mm_nr_pmds(task->mm),
 			mm_nr_puds(task->mm),
 			get_mm_counter(task->mm, MM_SWAPENTS),
-- 
cgit v1.3-8-gc7d7


From af5b0f6a09e42c9f4fa87735f2a366748767b686 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Wed, 15 Nov 2017 17:35:40 -0800
Subject: mm: consolidate page table accounting

Currently, we account page tables separately for each page table level,
but that's redundant -- we only make use of total memory allocated to
page tables for oom_badness calculation.  We also provide the
information to userspace, but it has dubious value there too.

This patch switches page table accounting to single counter.

mm->pgtables_bytes is now used to account all page table levels.  We use
bytes, because page table size for different levels of page table tree
may be different.

The change has user-visible effect: we don't have VmPMD and VmPUD
reported in /proc/[pid]/status.  Not sure if anybody uses them.  (As
alternative, we can always report 0 kB for them.)

OOM-killer report is also slightly changed: we now report pgtables_bytes
instead of nr_ptes, nr_pmd, nr_puds.

Apart from reducing number of counters per-mm, the benefit is that we
now calculate oom_badness() more correctly for machines which have
different size of page tables depending on level or where page tables
are less than a page in size.

The only downside can be debuggability because we do not know which page
table level could leak.  But I do not remember many bugs that would be
caught by separate counters so I wouldn't lose sleep over this.

[akpm@linux-foundation.org: fix mm/huge_memory.c]
Link: http://lkml.kernel.org/r/20171006100651.44742-2-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
[kirill.shutemov@linux.intel.com: fix build]
  Link: http://lkml.kernel.org/r/20171016150113.ikfxy3e7zzfvsr4w@black.fi.intel.com
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/proc.txt |  1 -
 Documentation/sysctl/vm.txt        |  8 +++---
 fs/proc/task_mmu.c                 | 11 ++------
 include/linux/mm.h                 | 58 ++++++++------------------------------
 include/linux/mm_types.h           |  8 +-----
 kernel/fork.c                      | 16 +++--------
 mm/debug.c                         |  7 ++---
 mm/huge_memory.c                   |  2 +-
 mm/oom_kill.c                      | 14 ++++-----
 9 files changed, 32 insertions(+), 93 deletions(-)

(limited to 'mm/debug.c')

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index adba21b5ada7..ec571b9bb18a 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -250,7 +250,6 @@ Table 1-2: Contents of the status files (as of 4.8)
  VmExe                       size of text segment
  VmLib                       size of shared library code
  VmPTE                       size of page table entries
- VmPMD                       size of second level page tables
  VmSwap                      amount of swap used by anonymous private data
                              (shmem swap usage is not included)
  HugetlbPages                size of hugetlb memory portions
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 2729e8db9492..3e579740b49f 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -629,10 +629,10 @@ oom_dump_tasks
 
 Enables a system-wide task dump (excluding kernel threads) to be produced
 when the kernel performs an OOM-killing and includes such information as
-pid, uid, tgid, vm size, rss, nr_ptes, nr_pmds, nr_puds, swapents,
-oom_score_adj score, and name.  This is helpful to determine why the OOM
-killer was invoked, to identify the rogue task that caused it, and to
-determine why the OOM killer chose the task it did to kill.
+pid, uid, tgid, vm size, rss, pgtables_bytes, swapents, oom_score_adj
+score, and name.  This is helpful to determine why the OOM killer was
+invoked, to identify the rogue task that caused it, and to determine why
+the OOM killer chose the task it did to kill.
 
 If this is set to zero, this information is suppressed.  On very
 large systems with thousands of tasks it may not be feasible to dump
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 9bd2a0294ac1..875231c36cb3 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -26,7 +26,7 @@
 
 void task_mem(struct seq_file *m, struct mm_struct *mm)
 {
-	unsigned long text, lib, swap, ptes, pmds, puds, anon, file, shmem;
+	unsigned long text, lib, swap, anon, file, shmem;
 	unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss;
 
 	anon = get_mm_counter(mm, MM_ANONPAGES);
@@ -50,9 +50,6 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
 	text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10;
 	lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text;
 	swap = get_mm_counter(mm, MM_SWAPENTS);
-	ptes = PTRS_PER_PTE * sizeof(pte_t) * mm_nr_ptes(mm);
-	pmds = PTRS_PER_PMD * sizeof(pmd_t) * mm_nr_pmds(mm);
-	puds = PTRS_PER_PUD * sizeof(pud_t) * mm_nr_puds(mm);
 	seq_printf(m,
 		"VmPeak:\t%8lu kB\n"
 		"VmSize:\t%8lu kB\n"
@@ -68,8 +65,6 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
 		"VmExe:\t%8lu kB\n"
 		"VmLib:\t%8lu kB\n"
 		"VmPTE:\t%8lu kB\n"
-		"VmPMD:\t%8lu kB\n"
-		"VmPUD:\t%8lu kB\n"
 		"VmSwap:\t%8lu kB\n",
 		hiwater_vm << (PAGE_SHIFT-10),
 		total_vm << (PAGE_SHIFT-10),
@@ -82,9 +77,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
 		shmem << (PAGE_SHIFT-10),
 		mm->data_vm << (PAGE_SHIFT-10),
 		mm->stack_vm << (PAGE_SHIFT-10), text, lib,
-		ptes >> 10,
-		pmds >> 10,
-		puds >> 10,
+		mm_pgtables_bytes(mm) >> 10,
 		swap << (PAGE_SHIFT-10));
 	hugetlb_report_usage(m, mm);
 }
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2ca799f0d762..7c1e82a1aa77 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1605,37 +1605,20 @@ static inline int __pud_alloc(struct mm_struct *mm, p4d_t *p4d,
 {
 	return 0;
 }
-
-static inline unsigned long mm_nr_puds(const struct mm_struct *mm)
-{
-	return 0;
-}
-
-static inline void mm_nr_puds_init(struct mm_struct *mm) {}
 static inline void mm_inc_nr_puds(struct mm_struct *mm) {}
 static inline void mm_dec_nr_puds(struct mm_struct *mm) {}
 
 #else
 int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address);
 
-static inline void mm_nr_puds_init(struct mm_struct *mm)
-{
-	atomic_long_set(&mm->nr_puds, 0);
-}
-
-static inline unsigned long mm_nr_puds(const struct mm_struct *mm)
-{
-	return atomic_long_read(&mm->nr_puds);
-}
-
 static inline void mm_inc_nr_puds(struct mm_struct *mm)
 {
-	atomic_long_inc(&mm->nr_puds);
+	atomic_long_add(PTRS_PER_PUD * sizeof(pud_t), &mm->pgtables_bytes);
 }
 
 static inline void mm_dec_nr_puds(struct mm_struct *mm)
 {
-	atomic_long_dec(&mm->nr_puds);
+	atomic_long_sub(PTRS_PER_PUD * sizeof(pud_t), &mm->pgtables_bytes);
 }
 #endif
 
@@ -1646,64 +1629,47 @@ static inline int __pmd_alloc(struct mm_struct *mm, pud_t *pud,
 	return 0;
 }
 
-static inline void mm_nr_pmds_init(struct mm_struct *mm) {}
-
-static inline unsigned long mm_nr_pmds(const struct mm_struct *mm)
-{
-	return 0;
-}
-
 static inline void mm_inc_nr_pmds(struct mm_struct *mm) {}
 static inline void mm_dec_nr_pmds(struct mm_struct *mm) {}
 
 #else
 int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address);
 
-static inline void mm_nr_pmds_init(struct mm_struct *mm)
-{
-	atomic_long_set(&mm->nr_pmds, 0);
-}
-
-static inline unsigned long mm_nr_pmds(const struct mm_struct *mm)
-{
-	return atomic_long_read(&mm->nr_pmds);
-}
-
 static inline void mm_inc_nr_pmds(struct mm_struct *mm)
 {
-	atomic_long_inc(&mm->nr_pmds);
+	atomic_long_add(PTRS_PER_PMD * sizeof(pmd_t), &mm->pgtables_bytes);
 }
 
 static inline void mm_dec_nr_pmds(struct mm_struct *mm)
 {
-	atomic_long_dec(&mm->nr_pmds);
+	atomic_long_sub(PTRS_PER_PMD * sizeof(pmd_t), &mm->pgtables_bytes);
 }
 #endif
 
 #ifdef CONFIG_MMU
-static inline void mm_nr_ptes_init(struct mm_struct *mm)
+static inline void mm_pgtables_bytes_init(struct mm_struct *mm)
 {
-	atomic_long_set(&mm->nr_ptes, 0);
+	atomic_long_set(&mm->pgtables_bytes, 0);
 }
 
-static inline unsigned long mm_nr_ptes(const struct mm_struct *mm)
+static inline unsigned long mm_pgtables_bytes(const struct mm_struct *mm)
 {
-	return atomic_long_read(&mm->nr_ptes);
+	return atomic_long_read(&mm->pgtables_bytes);
 }
 
 static inline void mm_inc_nr_ptes(struct mm_struct *mm)
 {
-	atomic_long_inc(&mm->nr_ptes);
+	atomic_long_add(PTRS_PER_PTE * sizeof(pte_t), &mm->pgtables_bytes);
 }
 
 static inline void mm_dec_nr_ptes(struct mm_struct *mm)
 {
-	atomic_long_dec(&mm->nr_ptes);
+	atomic_long_sub(PTRS_PER_PTE * sizeof(pte_t), &mm->pgtables_bytes);
 }
 #else
-static inline void mm_nr_ptes_init(struct mm_struct *mm) {}
 
-static inline unsigned long mm_nr_ptes(const struct mm_struct *mm)
+static inline void mm_pgtables_bytes_init(struct mm_struct *mm) {}
+static inline unsigned long mm_pgtables_bytes(const struct mm_struct *mm)
 {
 	return 0;
 }
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index e42048020664..09643e0472fc 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -402,13 +402,7 @@ struct mm_struct {
 	atomic_t mm_count;
 
 #ifdef CONFIG_MMU
-	atomic_long_t nr_ptes;			/* PTE page table pages */
-#endif
-#if CONFIG_PGTABLE_LEVELS > 2
-	atomic_long_t nr_pmds;			/* PMD page table pages */
-#endif
-#if CONFIG_PGTABLE_LEVELS > 3
-	atomic_long_t nr_puds;			/* PUD page table pages */
+	atomic_long_t pgtables_bytes;		/* PTE page table pages */
 #endif
 	int map_count;				/* number of VMAs */
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 946922a30ede..006dc5899a1a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -817,9 +817,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 	init_rwsem(&mm->mmap_sem);
 	INIT_LIST_HEAD(&mm->mmlist);
 	mm->core_state = NULL;
-	mm_nr_ptes_init(mm);
-	mm_nr_pmds_init(mm);
-	mm_nr_puds_init(mm);
+	mm_pgtables_bytes_init(mm);
 	mm->map_count = 0;
 	mm->locked_vm = 0;
 	mm->pinned_vm = 0;
@@ -873,15 +871,9 @@ static void check_mm(struct mm_struct *mm)
 					  "mm:%p idx:%d val:%ld\n", mm, i, x);
 	}
 
-	if (mm_nr_ptes(mm))
-		pr_alert("BUG: non-zero nr_ptes on freeing mm: %ld\n",
-				mm_nr_ptes(mm));
-	if (mm_nr_pmds(mm))
-		pr_alert("BUG: non-zero nr_pmds on freeing mm: %ld\n",
-				mm_nr_pmds(mm));
-	if (mm_nr_puds(mm))
-		pr_alert("BUG: non-zero nr_puds on freeing mm: %ld\n",
-				mm_nr_puds(mm));
+	if (mm_pgtables_bytes(mm))
+		pr_alert("BUG: non-zero pgtables_bytes on freeing mm: %ld\n",
+				mm_pgtables_bytes(mm));
 
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
 	VM_BUG_ON_MM(mm->pmd_huge_pte, mm);
diff --git a/mm/debug.c b/mm/debug.c
index c9888a6d7875..d947f3e03b0d 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -105,8 +105,7 @@ void dump_mm(const struct mm_struct *mm)
 		"get_unmapped_area %p\n"
 #endif
 		"mmap_base %lu mmap_legacy_base %lu highest_vm_end %lu\n"
-		"pgd %p mm_users %d mm_count %d\n"
-		"nr_ptes %lu nr_pmds %lu nr_puds %lu map_count %d\n"
+		"pgd %p mm_users %d mm_count %d pgtables_bytes %lu map_count %d\n"
 		"hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n"
 		"pinned_vm %lx data_vm %lx exec_vm %lx stack_vm %lx\n"
 		"start_code %lx end_code %lx start_data %lx end_data %lx\n"
@@ -136,9 +135,7 @@ void dump_mm(const struct mm_struct *mm)
 		mm->mmap_base, mm->mmap_legacy_base, mm->highest_vm_end,
 		mm->pgd, atomic_read(&mm->mm_users),
 		atomic_read(&mm->mm_count),
-		mm_nr_ptes(mm),
-		mm_nr_pmds(mm),
-		mm_nr_puds(mm),
+		mm_pgtables_bytes(mm),
 		mm->map_count,
 		mm->hiwater_rss, mm->hiwater_vm, mm->total_vm, mm->locked_vm,
 		mm->pinned_vm, mm->data_vm, mm->exec_vm, mm->stack_vm,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 3610d81c062a..86fe697e8bfb 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -942,7 +942,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 			set_pmd_at(src_mm, addr, src_pmd, pmd);
 		}
 		add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR);
-		atomic_long_inc(&dst_mm->nr_ptes);
+		mm_inc_nr_ptes(dst_mm);
 		pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable);
 		set_pmd_at(dst_mm, addr, dst_pmd, pmd);
 		ret = 0;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index f9300141480e..26add8a0d1f7 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -221,7 +221,7 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
 	 * task's rss, pagetable and swap space use.
 	 */
 	points = get_mm_rss(p->mm) + get_mm_counter(p->mm, MM_SWAPENTS) +
-		mm_nr_ptes(p->mm) + mm_nr_pmds(p->mm) + mm_nr_puds(p->mm);
+		mm_pgtables_bytes(p->mm) / PAGE_SIZE;
 	task_unlock(p);
 
 	/*
@@ -389,15 +389,15 @@ static void select_bad_process(struct oom_control *oc)
  * Dumps the current memory state of all eligible tasks.  Tasks not in the same
  * memcg, not in the same cpuset, or bound to a disjoint set of mempolicy nodes
  * are not shown.
- * State information includes task's pid, uid, tgid, vm size, rss, nr_ptes,
- * swapents, oom_score_adj value, and name.
+ * State information includes task's pid, uid, tgid, vm size, rss,
+ * pgtables_bytes, swapents, oom_score_adj value, and name.
  */
 static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
 {
 	struct task_struct *p;
 	struct task_struct *task;
 
-	pr_info("[ pid ]   uid  tgid total_vm      rss nr_ptes nr_pmds nr_puds swapents oom_score_adj name\n");
+	pr_info("[ pid ]   uid  tgid total_vm      rss pgtables_bytes swapents oom_score_adj name\n");
 	rcu_read_lock();
 	for_each_process(p) {
 		if (oom_unkillable_task(p, memcg, nodemask))
@@ -413,12 +413,10 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
 			continue;
 		}
 
-		pr_info("[%5d] %5d %5d %8lu %8lu %7ld %7ld %7ld %8lu         %5hd %s\n",
+		pr_info("[%5d] %5d %5d %8lu %8lu %8ld %8lu         %5hd %s\n",
 			task->pid, from_kuid(&init_user_ns, task_uid(task)),
 			task->tgid, task->mm->total_vm, get_mm_rss(task->mm),
-			mm_nr_ptes(task->mm),
-			mm_nr_pmds(task->mm),
-			mm_nr_puds(task->mm),
+			mm_pgtables_bytes(task->mm),
 			get_mm_counter(task->mm, MM_SWAPENTS),
 			task->signal->oom_score_adj, task->comm);
 		task_unlock(task);
-- 
cgit v1.3-8-gc7d7