From 2e7d31704c7fe9141b0b27ec1ece254d9993fe2d Mon Sep 17 00:00:00 2001
From: zhong jiang <zhongjiang@huawei.com>
Date: Wed, 4 Dec 2019 16:49:43 -0800
Subject: mm/kasan/common.c: fix compile error

I hit the following compile error in arch/x86/

   mm/kasan/common.c: In function kasan_populate_vmalloc:
   mm/kasan/common.c:797:2: error: implicit declaration of function flush_cache_vmap; did you mean flush_rcu_work? [-Werror=implicit-function-declaration]
     flush_cache_vmap(shadow_start, shadow_end);
     ^~~~~~~~~~~~~~~~
     flush_rcu_work
   cc1: some warnings being treated as errors

Link: http://lkml.kernel.org/r/1575363013-43761-1-git-send-email-zhongjiang@huawei.com
Fixes: 3c5c3cfb9ef4 ("kasan: support backing vmalloc space with real shadow memory")
Signed-off-by: zhong jiang <zhongjiang@huawei.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Daniel Axtens <dja@axtens.net>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/kasan/common.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index df3371d5c572..2fa710bb6358 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -36,6 +36,7 @@
 #include <linux/bug.h>
 #include <linux/uaccess.h>
 
+#include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
 #include "kasan.h"
-- 
cgit v1.2.3-59-g8ed1b


From a264df74df38855096393447f1b8f386069a94b9 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Wed, 4 Dec 2019 16:49:46 -0800
Subject: mm: memcg/slab: wait for !root kmem_cache refcnt killing on root
 kmem_cache destruction

Christian reported a warning like the following obtained during running
some KVM-related tests on s390:

    WARNING: CPU: 8 PID: 208 at lib/percpu-refcount.c:108 percpu_ref_exit+0x50/0x58
    Modules linked in: kvm(-) xt_CHECKSUM xt_MASQUERADE bonding xt_tcpudp ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 ipt_REJECT nf_reject_ipv4 xt_conntrack ip6table_na>
    CPU: 8 PID: 208 Comm: kworker/8:1 Not tainted 5.2.0+ #66
    Hardware name: IBM 2964 NC9 712 (LPAR)
    Workqueue: events sysfs_slab_remove_workfn
    Krnl PSW : 0704e00180000000 0000001529746850 (percpu_ref_exit+0x50/0x58)
               R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:2 PM:0 RI:0 EA:3
    Krnl GPRS: 00000000ffff8808 0000001529746740 000003f4e30e8e18 0036008100000000
               0000001f00000000 0035008100000000 0000001fb3573ab8 0000000000000000
               0000001fbdb6de00 0000000000000000 0000001529f01328 0000001fb3573b00
               0000001fbb27e000 0000001fbdb69300 000003e009263d00 000003e009263cd0
    Krnl Code: 0000001529746842: f0a0000407fe        srp        4(11,%r0),2046,0
               0000001529746848: 47000700            bc         0,1792
              #000000152974684c: a7f40001            brc        15,152974684e
              >0000001529746850: a7f4fff2            brc        15,1529746834
               0000001529746854: 0707                bcr        0,%r7
               0000001529746856: 0707                bcr        0,%r7
               0000001529746858: eb8ff0580024        stmg       %r8,%r15,88(%r15)
               000000152974685e: a738ffff            lhi        %r3,-1
    Call Trace:
    ([<000003e009263d00>] 0x3e009263d00)
     [<00000015293252ea>] slab_kmem_cache_release+0x3a/0x70
     [<0000001529b04882>] kobject_put+0xaa/0xe8
     [<000000152918cf28>] process_one_work+0x1e8/0x428
     [<000000152918d1b0>] worker_thread+0x48/0x460
     [<00000015291942c6>] kthread+0x126/0x160
     [<0000001529b22344>] ret_from_fork+0x28/0x30
     [<0000001529b2234c>] kernel_thread_starter+0x0/0x10
    Last Breaking-Event-Address:
     [<000000152974684c>] percpu_ref_exit+0x4c/0x58
    ---[ end trace b035e7da5788eb09 ]---

The problem occurs because kmem_cache_destroy() is called immediately
after deleting of a memcg, so it races with the memcg kmem_cache
deactivation.

flush_memcg_workqueue() at the beginning of kmem_cache_destroy() is
supposed to guarantee that all deactivation processes are finished, but
failed to do so.  It waits for an rcu grace period, after which all
children kmem_caches should be deactivated.  During the deactivation
percpu_ref_kill() is called for non root kmem_cache refcounters, but it
requires yet another rcu grace period to finish the transition to the
atomic (dead) state.

So in a rare case when not all children kmem_caches are destroyed at the
moment when the root kmem_cache is about to be gone, we need to wait
another rcu grace period before destroying the root kmem_cache.

This issue can be triggered only with dynamically created kmem_caches
which are used with memcg accounting.  In this case per-memcg child
kmem_caches are created.  They are deactivated from the cgroup removing
path.  If the destruction of the root kmem_cache is racing with the
removal of the cgroup (both are quite complicated multi-stage
processes), the described issue can occur.  The only known way to
trigger it in the real life, is to unload some kernel module which
creates a dedicated kmem_cache, used from different memory cgroups with
GFP_ACCOUNT flag.  If the unloading happens immediately after calling
rmdir on the corresponding cgroup, there is some chance to trigger the
issue.

Link: http://lkml.kernel.org/r/20191129025011.3076017-1-guro@fb.com
Fixes: f0a3a24b532d ("mm: memcg/slab: rework non-root kmem_cache lifecycle management")
Signed-off-by: Roman Gushchin <guro@fb.com>
Reported-by: Christian Borntraeger <borntraeger@de.ibm.com>
Tested-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/slab_common.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/mm/slab_common.c b/mm/slab_common.c
index 8afa188f6e20..f0ab6d4ceb4c 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -904,6 +904,18 @@ static void flush_memcg_workqueue(struct kmem_cache *s)
 	 * previous workitems on workqueue are processed.
 	 */
 	flush_workqueue(memcg_kmem_cache_wq);
+
+	/*
+	 * If we're racing with children kmem_cache deactivation, it might
+	 * take another rcu grace period to complete their destruction.
+	 * At this moment the corresponding percpu_ref_kill() call should be
+	 * done, but it might take another rcu grace period to complete
+	 * switching to the atomic mode.
+	 * Please, note that we check without grabbing the slab_mutex. It's safe
+	 * because at this moment the children list can't grow.
+	 */
+	if (!list_empty(&s->memcg_params.children))
+		rcu_barrier();
 }
 #else
 static inline int shutdown_memcg_caches(struct kmem_cache *s)
-- 
cgit v1.2.3-59-g8ed1b


From 9d7ea9a297e6445d567056f15b469dde13ca4134 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Wed, 4 Dec 2019 16:49:50 -0800
Subject: mm/vmstat: add helpers to get vmstat item names for each enum type

Statistics in vmstat is combined from counters with different structure,
but names for them are merged into one array.

This patch adds trivial helpers to get name for each item:

  const char *zone_stat_name(enum zone_stat_item item);
  const char *numa_stat_name(enum numa_stat_item item);
  const char *node_stat_name(enum node_stat_item item);
  const char *writeback_stat_name(enum writeback_stat_item item);
  const char *vm_event_name(enum vm_event_item item);

Names for enum writeback_stat_item are folded in the middle of
vmstat_text so this patch moves declaration into header to calculate
offset of following items.

Also this patch reuses piece of node stat names for lru list names:

  const char *lru_list_name(enum lru_list lru);

This returns common lru list names: "inactive_anon", "active_anon",
"inactive_file", "active_file", "unevictable".

[khlebnikov@yandex-team.ru: do not use size of vmstat_text as count of /proc/vmstat items]
  Link: http://lkml.kernel.org/r/157152151769.4139.15423465513138349343.stgit@buzz
  Link: https://lore.kernel.org/linux-mm/cd1c42ae-281f-c8a8-70ac-1d01d417b2e1@infradead.org/T/#u
Link: http://lkml.kernel.org/r/157113012325.453.562783073839432766.stgit@buzz
Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: YueHaibing <yuehaibing@huawei.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/node.c    |  9 +++------
 include/linux/vmstat.h | 50 +++++++++++++++++++++++++++++++++++++++++++++++++
 mm/vmstat.c            | 51 ++++++++++++++++++++------------------------------
 3 files changed, 73 insertions(+), 37 deletions(-)

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 296546ffed6c..98a31bafc8a2 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -496,20 +496,17 @@ static ssize_t node_read_vmstat(struct device *dev,
 	int n = 0;
 
 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
-		n += sprintf(buf+n, "%s %lu\n", vmstat_text[i],
+		n += sprintf(buf+n, "%s %lu\n", zone_stat_name(i),
 			     sum_zone_node_page_state(nid, i));
 
 #ifdef CONFIG_NUMA
 	for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
-		n += sprintf(buf+n, "%s %lu\n",
-			     vmstat_text[i + NR_VM_ZONE_STAT_ITEMS],
+		n += sprintf(buf+n, "%s %lu\n", numa_stat_name(i),
 			     sum_zone_numa_state(nid, i));
 #endif
 
 	for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
-		n += sprintf(buf+n, "%s %lu\n",
-			     vmstat_text[i + NR_VM_ZONE_STAT_ITEMS +
-			     NR_VM_NUMA_STAT_ITEMS],
+		n += sprintf(buf+n, "%s %lu\n", node_stat_name(i),
 			     node_page_state(pgdat, i));
 
 	return n;
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index bdeda4b079fe..b995d8b680c2 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -31,6 +31,12 @@ struct reclaim_stat {
 	unsigned nr_unmap_fail;
 };
 
+enum writeback_stat_item {
+	NR_DIRTY_THRESHOLD,
+	NR_DIRTY_BG_THRESHOLD,
+	NR_VM_WRITEBACK_STAT_ITEMS,
+};
+
 #ifdef CONFIG_VM_EVENT_COUNTERS
 /*
  * Light weight per cpu counter implementation.
@@ -381,4 +387,48 @@ static inline void __mod_zone_freepage_state(struct zone *zone, int nr_pages,
 
 extern const char * const vmstat_text[];
 
+static inline const char *zone_stat_name(enum zone_stat_item item)
+{
+	return vmstat_text[item];
+}
+
+#ifdef CONFIG_NUMA
+static inline const char *numa_stat_name(enum numa_stat_item item)
+{
+	return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
+			   item];
+}
+#endif /* CONFIG_NUMA */
+
+static inline const char *node_stat_name(enum node_stat_item item)
+{
+	return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
+			   NR_VM_NUMA_STAT_ITEMS +
+			   item];
+}
+
+static inline const char *lru_list_name(enum lru_list lru)
+{
+	return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
+}
+
+static inline const char *writeback_stat_name(enum writeback_stat_item item)
+{
+	return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
+			   NR_VM_NUMA_STAT_ITEMS +
+			   NR_VM_NODE_STAT_ITEMS +
+			   item];
+}
+
+#ifdef CONFIG_VM_EVENT_COUNTERS
+static inline const char *vm_event_name(enum vm_event_item item)
+{
+	return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
+			   NR_VM_NUMA_STAT_ITEMS +
+			   NR_VM_NODE_STAT_ITEMS +
+			   NR_VM_WRITEBACK_STAT_ITEMS +
+			   item];
+}
+#endif /* CONFIG_VM_EVENT_COUNTERS */
+
 #endif /* _LINUX_VMSTAT_H */
diff --git a/mm/vmstat.c b/mm/vmstat.c
index a8222041bd44..fa627329428b 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1134,7 +1134,7 @@ const char * const vmstat_text[] = {
 	"numa_other",
 #endif
 
-	/* Node-based counters */
+	/* enum node_stat_item counters */
 	"nr_inactive_anon",
 	"nr_active_anon",
 	"nr_inactive_file",
@@ -1564,10 +1564,8 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 	if (is_zone_first_populated(pgdat, zone)) {
 		seq_printf(m, "\n  per-node stats");
 		for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
-			seq_printf(m, "\n      %-12s %lu",
-				vmstat_text[i + NR_VM_ZONE_STAT_ITEMS +
-				NR_VM_NUMA_STAT_ITEMS],
-				node_page_state(pgdat, i));
+			seq_printf(m, "\n      %-12s %lu", node_stat_name(i),
+				   node_page_state(pgdat, i));
 		}
 	}
 	seq_printf(m,
@@ -1600,14 +1598,13 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 	}
 
 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
-		seq_printf(m, "\n      %-12s %lu", vmstat_text[i],
-				zone_page_state(zone, i));
+		seq_printf(m, "\n      %-12s %lu", zone_stat_name(i),
+			   zone_page_state(zone, i));
 
 #ifdef CONFIG_NUMA
 	for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
-		seq_printf(m, "\n      %-12s %lu",
-				vmstat_text[i + NR_VM_ZONE_STAT_ITEMS],
-				zone_numa_state_snapshot(zone, i));
+		seq_printf(m, "\n      %-12s %lu", numa_stat_name(i),
+			   zone_numa_state_snapshot(zone, i));
 #endif
 
 	seq_printf(m, "\n  pagesets");
@@ -1658,31 +1655,23 @@ static const struct seq_operations zoneinfo_op = {
 	.show	= zoneinfo_show,
 };
 
-enum writeback_stat_item {
-	NR_DIRTY_THRESHOLD,
-	NR_DIRTY_BG_THRESHOLD,
-	NR_VM_WRITEBACK_STAT_ITEMS,
-};
+#define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEMS + \
+			 NR_VM_NUMA_STAT_ITEMS + \
+			 NR_VM_NODE_STAT_ITEMS + \
+			 NR_VM_WRITEBACK_STAT_ITEMS + \
+			 (IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \
+			  NR_VM_EVENT_ITEMS : 0))
 
 static void *vmstat_start(struct seq_file *m, loff_t *pos)
 {
 	unsigned long *v;
-	int i, stat_items_size;
+	int i;
 
-	if (*pos >= ARRAY_SIZE(vmstat_text))
+	if (*pos >= NR_VMSTAT_ITEMS)
 		return NULL;
-	stat_items_size = NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) +
-			  NR_VM_NUMA_STAT_ITEMS * sizeof(unsigned long) +
-			  NR_VM_NODE_STAT_ITEMS * sizeof(unsigned long) +
-			  NR_VM_WRITEBACK_STAT_ITEMS * sizeof(unsigned long);
-
-#ifdef CONFIG_VM_EVENT_COUNTERS
-	stat_items_size += sizeof(struct vm_event_state);
-#endif
 
-	BUILD_BUG_ON(stat_items_size !=
-		     ARRAY_SIZE(vmstat_text) * sizeof(unsigned long));
-	v = kmalloc(stat_items_size, GFP_KERNEL);
+	BUILD_BUG_ON(ARRAY_SIZE(vmstat_text) < NR_VMSTAT_ITEMS);
+	v = kmalloc_array(NR_VMSTAT_ITEMS, sizeof(unsigned long), GFP_KERNEL);
 	m->private = v;
 	if (!v)
 		return ERR_PTR(-ENOMEM);
@@ -1715,7 +1704,7 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos)
 static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
 {
 	(*pos)++;
-	if (*pos >= ARRAY_SIZE(vmstat_text))
+	if (*pos >= NR_VMSTAT_ITEMS)
 		return NULL;
 	return (unsigned long *)m->private + *pos;
 }
@@ -1781,7 +1770,7 @@ int vmstat_refresh(struct ctl_table *table, int write,
 		val = atomic_long_read(&vm_zone_stat[i]);
 		if (val < 0) {
 			pr_warn("%s: %s %ld\n",
-				__func__, vmstat_text[i], val);
+				__func__, zone_stat_name(i), val);
 			err = -EINVAL;
 		}
 	}
@@ -1790,7 +1779,7 @@ int vmstat_refresh(struct ctl_table *table, int write,
 		val = atomic_long_read(&vm_numa_stat[i]);
 		if (val < 0) {
 			pr_warn("%s: %s %ld\n",
-				__func__, vmstat_text[i + NR_VM_ZONE_STAT_ITEMS], val);
+				__func__, numa_stat_name(i), val);
 			err = -EINVAL;
 		}
 	}
-- 
cgit v1.2.3-59-g8ed1b


From ebc5d83d04438116c24dcc556b0ab6c8ef64b77e Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Wed, 4 Dec 2019 16:49:53 -0800
Subject: mm/memcontrol: use vmstat names for printing statistics

Use common names from vmstat array when possible.  This gives not much
difference in code size for now, but should help in keeping interfaces
consistent.

  add/remove: 0/2 grow/shrink: 2/0 up/down: 70/-72 (-2)
  Function                                     old     new   delta
  memory_stat_format                           984    1050     +66
  memcg_stat_show                              957     961      +4
  memcg1_event_names                            32       -     -32
  mem_cgroup_lru_names                          40       -     -40
  Total: Before=14485337, After=14485335, chg -0.00%

Link: http://lkml.kernel.org/r/157113012508.453.80391533767219371.stgit@buzz
Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vmstat.h |  4 ++--
 mm/memcontrol.c        | 52 +++++++++++++++++++++-----------------------------
 mm/vmstat.c            |  9 +++++----
 3 files changed, 29 insertions(+), 36 deletions(-)

diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index b995d8b680c2..292485f3d24d 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -420,7 +420,7 @@ static inline const char *writeback_stat_name(enum writeback_stat_item item)
 			   item];
 }
 
-#ifdef CONFIG_VM_EVENT_COUNTERS
+#if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG)
 static inline const char *vm_event_name(enum vm_event_item item)
 {
 	return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
@@ -429,6 +429,6 @@ static inline const char *vm_event_name(enum vm_event_item item)
 			   NR_VM_WRITEBACK_STAT_ITEMS +
 			   item];
 }
-#endif /* CONFIG_VM_EVENT_COUNTERS */
+#endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */
 
 #endif /* _LINUX_VMSTAT_H */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index bc01423277c5..c5b5f74cfd4d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -98,14 +98,6 @@ static bool do_memsw_account(void)
 	return !cgroup_subsys_on_dfl(memory_cgrp_subsys) && do_swap_account;
 }
 
-static const char *const mem_cgroup_lru_names[] = {
-	"inactive_anon",
-	"active_anon",
-	"inactive_file",
-	"active_file",
-	"unevictable",
-};
-
 #define THRESHOLDS_EVENTS_TARGET 128
 #define SOFTLIMIT_EVENTS_TARGET 1024
 
@@ -1421,7 +1413,7 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
 		       PAGE_SIZE);
 
 	for (i = 0; i < NR_LRU_LISTS; i++)
-		seq_buf_printf(&s, "%s %llu\n", mem_cgroup_lru_names[i],
+		seq_buf_printf(&s, "%s %llu\n", lru_list_name(i),
 			       (u64)memcg_page_state(memcg, NR_LRU_BASE + i) *
 			       PAGE_SIZE);
 
@@ -1434,8 +1426,10 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
 
 	/* Accumulated memory events */
 
-	seq_buf_printf(&s, "pgfault %lu\n", memcg_events(memcg, PGFAULT));
-	seq_buf_printf(&s, "pgmajfault %lu\n", memcg_events(memcg, PGMAJFAULT));
+	seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGFAULT),
+		       memcg_events(memcg, PGFAULT));
+	seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGMAJFAULT),
+		       memcg_events(memcg, PGMAJFAULT));
 
 	seq_buf_printf(&s, "workingset_refault %lu\n",
 		       memcg_page_state(memcg, WORKINGSET_REFAULT));
@@ -1444,22 +1438,27 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
 	seq_buf_printf(&s, "workingset_nodereclaim %lu\n",
 		       memcg_page_state(memcg, WORKINGSET_NODERECLAIM));
 
-	seq_buf_printf(&s, "pgrefill %lu\n", memcg_events(memcg, PGREFILL));
+	seq_buf_printf(&s, "%s %lu\n",  vm_event_name(PGREFILL),
+		       memcg_events(memcg, PGREFILL));
 	seq_buf_printf(&s, "pgscan %lu\n",
 		       memcg_events(memcg, PGSCAN_KSWAPD) +
 		       memcg_events(memcg, PGSCAN_DIRECT));
 	seq_buf_printf(&s, "pgsteal %lu\n",
 		       memcg_events(memcg, PGSTEAL_KSWAPD) +
 		       memcg_events(memcg, PGSTEAL_DIRECT));
-	seq_buf_printf(&s, "pgactivate %lu\n", memcg_events(memcg, PGACTIVATE));
-	seq_buf_printf(&s, "pgdeactivate %lu\n", memcg_events(memcg, PGDEACTIVATE));
-	seq_buf_printf(&s, "pglazyfree %lu\n", memcg_events(memcg, PGLAZYFREE));
-	seq_buf_printf(&s, "pglazyfreed %lu\n", memcg_events(memcg, PGLAZYFREED));
+	seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGACTIVATE),
+		       memcg_events(memcg, PGACTIVATE));
+	seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGDEACTIVATE),
+		       memcg_events(memcg, PGDEACTIVATE));
+	seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGLAZYFREE),
+		       memcg_events(memcg, PGLAZYFREE));
+	seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGLAZYFREED),
+		       memcg_events(memcg, PGLAZYFREED));
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-	seq_buf_printf(&s, "thp_fault_alloc %lu\n",
+	seq_buf_printf(&s, "%s %lu\n", vm_event_name(THP_FAULT_ALLOC),
 		       memcg_events(memcg, THP_FAULT_ALLOC));
-	seq_buf_printf(&s, "thp_collapse_alloc %lu\n",
+	seq_buf_printf(&s, "%s %lu\n", vm_event_name(THP_COLLAPSE_ALLOC),
 		       memcg_events(memcg, THP_COLLAPSE_ALLOC));
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
@@ -3742,13 +3741,6 @@ static const unsigned int memcg1_events[] = {
 	PGMAJFAULT,
 };
 
-static const char *const memcg1_event_names[] = {
-	"pgpgin",
-	"pgpgout",
-	"pgfault",
-	"pgmajfault",
-};
-
 static int memcg_stat_show(struct seq_file *m, void *v)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
@@ -3757,7 +3749,6 @@ static int memcg_stat_show(struct seq_file *m, void *v)
 	unsigned int i;
 
 	BUILD_BUG_ON(ARRAY_SIZE(memcg1_stat_names) != ARRAY_SIZE(memcg1_stats));
-	BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS);
 
 	for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) {
 		if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account())
@@ -3768,11 +3759,11 @@ static int memcg_stat_show(struct seq_file *m, void *v)
 	}
 
 	for (i = 0; i < ARRAY_SIZE(memcg1_events); i++)
-		seq_printf(m, "%s %lu\n", memcg1_event_names[i],
+		seq_printf(m, "%s %lu\n", vm_event_name(memcg1_events[i]),
 			   memcg_events_local(memcg, memcg1_events[i]));
 
 	for (i = 0; i < NR_LRU_LISTS; i++)
-		seq_printf(m, "%s %lu\n", mem_cgroup_lru_names[i],
+		seq_printf(m, "%s %lu\n", lru_list_name(i),
 			   memcg_page_state_local(memcg, NR_LRU_BASE + i) *
 			   PAGE_SIZE);
 
@@ -3797,11 +3788,12 @@ static int memcg_stat_show(struct seq_file *m, void *v)
 	}
 
 	for (i = 0; i < ARRAY_SIZE(memcg1_events); i++)
-		seq_printf(m, "total_%s %llu\n", memcg1_event_names[i],
+		seq_printf(m, "total_%s %llu\n",
+			   vm_event_name(memcg1_events[i]),
 			   (u64)memcg_events(memcg, memcg1_events[i]));
 
 	for (i = 0; i < NR_LRU_LISTS; i++)
-		seq_printf(m, "total_%s %llu\n", mem_cgroup_lru_names[i],
+		seq_printf(m, "total_%s %llu\n", lru_list_name(i),
 			   (u64)memcg_page_state(memcg, NR_LRU_BASE + i) *
 			   PAGE_SIZE);
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index fa627329428b..78d53378db99 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1084,7 +1084,8 @@ int fragmentation_index(struct zone *zone, unsigned int order)
 }
 #endif
 
-#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || defined(CONFIG_NUMA)
+#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || \
+    defined(CONFIG_NUMA) || defined(CONFIG_MEMCG)
 #ifdef CONFIG_ZONE_DMA
 #define TEXT_FOR_DMA(xx) xx "_dma",
 #else
@@ -1172,7 +1173,7 @@ const char * const vmstat_text[] = {
 	"nr_dirty_threshold",
 	"nr_dirty_background_threshold",
 
-#ifdef CONFIG_VM_EVENT_COUNTERS
+#if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG)
 	/* enum vm_event_item counters */
 	"pgpgin",
 	"pgpgout",
@@ -1291,9 +1292,9 @@ const char * const vmstat_text[] = {
 	"swap_ra",
 	"swap_ra_hit",
 #endif
-#endif /* CONFIG_VM_EVENTS_COUNTERS */
+#endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */
 };
-#endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA */
+#endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA || CONFIG_MEMCG */
 
 #if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \
      defined(CONFIG_PROC_FS)
-- 
cgit v1.2.3-59-g8ed1b


From 3cde287bb4769fe9dfc9c532ddc88d90e81bc4c5 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com>
Date: Wed, 4 Dec 2019 16:49:56 -0800
Subject: mm/memory.c: replace is_zero_pfn with is_huge_zero_pmd for thp

For hugely mapped thp, we use is_huge_zero_pmd() to check if it's zero
page or not.

We do fill ptes with my_zero_pfn() when we split zero thp pmd, but this
is not what we have in vm_normal_page_pmd() -- pmd_trans_huge_lock()
makes sure of it.

This is a trivial fix for /proc/pid/numa_maps, and AFAIK nobody
complains about it.

Gerald Schaefer asked:
: Maybe the description could also mention the symptom of this bug?
: I would assume that it affects anon/dirty accounting in gather_pte_stats(),
: for huge mappings, if zero page mappings are not correctly recognized.

I came across this while I was looking at the code, so I'm not aware of
any symptom.

Link: http://lkml.kernel.org/r/20191108192629.201556-1-yuzhao@google.com
Signed-off-by: Yu Zhao <yuzhao@google.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Will Deacon <will@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Thomas Hellstrom <thellstrom@vmware.com>
Cc: Souptick Joarder <jrdr.linux@gmail.com>
Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/memory.c b/mm/memory.c
index 513c3ecc76ee..e455160e0f75 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -672,7 +672,7 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
 
 	if (pmd_devmap(pmd))
 		return NULL;
-	if (is_zero_pfn(pfn))
+	if (is_huge_zero_pmd(pmd))
 		return NULL;
 	if (unlikely(pfn > highest_memmap_pfn))
 		return NULL;
-- 
cgit v1.2.3-59-g8ed1b


From e06689bf57017ac022ccf0f2a5071f760821ce0f Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 4 Dec 2019 16:49:59 -0800
Subject: proc: change ->nlink under proc_subdir_lock

Currently gluing PDE into global /proc tree is done under lock, but
changing ->nlink is not.  Additionally struct proc_dir_entry::nlink is
not atomic so updates can be lost.

Link: http://lkml.kernel.org/r/20190925202436.GA17388@avx2
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/generic.c | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 64e9ee1b129e..d4f353187d67 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -138,8 +138,12 @@ static int proc_getattr(const struct path *path, struct kstat *stat,
 {
 	struct inode *inode = d_inode(path->dentry);
 	struct proc_dir_entry *de = PDE(inode);
-	if (de && de->nlink)
-		set_nlink(inode, de->nlink);
+	if (de) {
+		nlink_t nlink = READ_ONCE(de->nlink);
+		if (nlink > 0) {
+			set_nlink(inode, nlink);
+		}
+	}
 
 	generic_fillattr(inode, stat);
 	return 0;
@@ -362,6 +366,7 @@ struct proc_dir_entry *proc_register(struct proc_dir_entry *dir,
 		write_unlock(&proc_subdir_lock);
 		goto out_free_inum;
 	}
+	dir->nlink++;
 	write_unlock(&proc_subdir_lock);
 
 	return dp;
@@ -472,10 +477,7 @@ struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode,
 		ent->data = data;
 		ent->proc_fops = &proc_dir_operations;
 		ent->proc_iops = &proc_dir_inode_operations;
-		parent->nlink++;
 		ent = proc_register(parent, ent);
-		if (!ent)
-			parent->nlink--;
 	}
 	return ent;
 }
@@ -505,10 +507,7 @@ struct proc_dir_entry *proc_create_mount_point(const char *name)
 		ent->data = NULL;
 		ent->proc_fops = NULL;
 		ent->proc_iops = NULL;
-		parent->nlink++;
 		ent = proc_register(parent, ent);
-		if (!ent)
-			parent->nlink--;
 	}
 	return ent;
 }
@@ -666,8 +665,12 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
 	len = strlen(fn);
 
 	de = pde_subdir_find(parent, fn, len);
-	if (de)
+	if (de) {
 		rb_erase(&de->subdir_node, &parent->subdir);
+		if (S_ISDIR(de->mode)) {
+			parent->nlink--;
+		}
+	}
 	write_unlock(&proc_subdir_lock);
 	if (!de) {
 		WARN(1, "name '%s'\n", name);
@@ -676,9 +679,6 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
 
 	proc_entry_rundown(de);
 
-	if (S_ISDIR(de->mode))
-		parent->nlink--;
-	de->nlink = 0;
 	WARN(pde_subdir_first(de),
 	     "%s: removing non-empty directory '%s/%s', leaking at least '%s'\n",
 	     __func__, de->parent->name, de->name, pde_subdir_first(de)->name);
@@ -714,13 +714,12 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent)
 			de = next;
 			continue;
 		}
-		write_unlock(&proc_subdir_lock);
-
-		proc_entry_rundown(de);
 		next = de->parent;
 		if (S_ISDIR(de->mode))
 			next->nlink--;
-		de->nlink = 0;
+		write_unlock(&proc_subdir_lock);
+
+		proc_entry_rundown(de);
 		if (de == root)
 			break;
 		pde_put(de);
-- 
cgit v1.2.3-59-g8ed1b


From 5f6354eaa517ce5c1e7b7feb224a0324601f796e Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 4 Dec 2019 16:50:02 -0800
Subject: fs/proc/generic.c: delete useless "len" variable

Pointer to next '/' encodes length of path element and next start
position.  Subtraction and increment are redundant.

Link: http://lkml.kernel.org/r/20191004234521.GA30246@avx2
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/generic.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index d4f353187d67..074e9585c699 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -163,7 +163,6 @@ static int __xlate_proc_name(const char *name, struct proc_dir_entry **ret,
 {
 	const char     		*cp = name, *next;
 	struct proc_dir_entry	*de;
-	unsigned int		len;
 
 	de = *ret;
 	if (!de)
@@ -174,13 +173,12 @@ static int __xlate_proc_name(const char *name, struct proc_dir_entry **ret,
 		if (!next)
 			break;
 
-		len = next - cp;
-		de = pde_subdir_find(de, cp, len);
+		de = pde_subdir_find(de, cp, next - cp);
 		if (!de) {
 			WARN(1, "name '%s'\n", name);
 			return -ENOENT;
 		}
-		cp += len + 1;
+		cp = next + 1;
 	}
 	*residual = cp;
 	*ret = de;
-- 
cgit v1.2.3-59-g8ed1b


From 70a731c0e3c603e84607b770c15e53da4c3a6ecd Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 4 Dec 2019 16:50:05 -0800
Subject: fs/proc/internal.h: shuffle "struct pde_opener"

List iteration takes more code than anything else which means embedded
list_head should be the first element of the structure.

Space savings:

	add/remove: 0/0 grow/shrink: 0/4 up/down: 0/-18 (-18)
	Function                                     old     new   delta
	close_pdeo                                   228     227      -1
	proc_reg_release                              86      82      -4
	proc_entry_rundown                           143     139      -4
	proc_reg_open                                298     289      -9

Link: http://lkml.kernel.org/r/20191004234753.GB30246@avx2
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/internal.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index cd0c8d5ce9a1..0f3b557c9b77 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -197,8 +197,8 @@ extern ssize_t proc_simple_write(struct file *, const char __user *, size_t, lof
  * inode.c
  */
 struct pde_opener {
-	struct file *file;
 	struct list_head lh;
+	struct file *file;
 	bool closing;
 	struct completion *c;
 } __randomize_layout;
-- 
cgit v1.2.3-59-g8ed1b


From 9573e8f70a82bcbac95b1ea222ac9d5e50266f9f Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Wed, 4 Dec 2019 16:50:08 -0800
Subject: include/linux/proc_fs.h: fix confusing macro arg name

state_size and ops are in the wrong position.

Link: http://lkml.kernel.org/r/20190910021747.11216-1-linmiaohe@huawei.com
Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Aleksa Sarai <cyphar@cyphar.com>
Reviewed-by: Christian Brauner <christian.brauner@ubuntu.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/proc_fs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index a705aa2d03f9..0640be56dcbd 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -58,8 +58,8 @@ extern int remove_proc_subtree(const char *, struct proc_dir_entry *);
 struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode,
 		struct proc_dir_entry *parent, const struct seq_operations *ops,
 		unsigned int state_size, void *data);
-#define proc_create_net(name, mode, parent, state_size, ops) \
-	proc_create_net_data(name, mode, parent, state_size, ops, NULL)
+#define proc_create_net(name, mode, parent, ops, state_size) \
+	proc_create_net_data(name, mode, parent, ops, state_size, NULL)
 struct proc_dir_entry *proc_create_net_single(const char *name, umode_t mode,
 		struct proc_dir_entry *parent,
 		int (*show)(struct seq_file *, void *), void *data);
-- 
cgit v1.2.3-59-g8ed1b


From 3d82191c22e20836812afdaef22c5965ae2c55b8 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Wed, 4 Dec 2019 16:50:11 -0800
Subject: fs/proc/Kconfig: fix indentation

Adjust indentation from spaces to tab (+optional two spaces) as in
coding style with command like:
        $ sed -e 's/^        /	/' -i */Kconfig

[adobriyan@gmail.com: add two spaces where necessary]
Link: http://lkml.kernel.org/r/20191124133936.GA5655@avx2
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/Kconfig | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
index cb5629bd5fff..733881a6387b 100644
--- a/fs/proc/Kconfig
+++ b/fs/proc/Kconfig
@@ -42,8 +42,8 @@ config PROC_VMCORE
 	bool "/proc/vmcore support"
 	depends on PROC_FS && CRASH_DUMP
 	default y
-        help
-        Exports the dump image of crashed kernel in ELF format.
+	help
+	  Exports the dump image of crashed kernel in ELF format.
 
 config PROC_VMCORE_DEVICE_DUMP
 	bool "Device Hardware/Firmware Log Collection"
@@ -72,7 +72,7 @@ config PROC_SYSCTL
 	  a recompile of the kernel or reboot of the system.  The primary
 	  interface is through /proc/sys.  If you say Y here a tree of
 	  modifiable sysctl entries will be generated beneath the
-          /proc/sys directory. They are explained in the files
+	  /proc/sys directory. They are explained in the files
 	  in <file:Documentation/admin-guide/sysctl/>.  Note that enabling this
 	  option will enlarge the kernel by at least 8 KB.
 
@@ -88,7 +88,7 @@ config PROC_PAGE_MONITOR
 	  Various /proc files exist to monitor process memory utilization:
 	  /proc/pid/smaps, /proc/pid/clear_refs, /proc/pid/pagemap,
 	  /proc/kpagecount, and /proc/kpageflags. Disabling these
-          interfaces will reduce the size of the kernel by approximately 4kb.
+	  interfaces will reduce the size of the kernel by approximately 4kb.
 
 config PROC_CHILDREN
 	bool "Include /proc/<pid>/task/<tid>/children file"
-- 
cgit v1.2.3-59-g8ed1b


From d5ffb71b633cd5c4b8cce633c9d6448dced4eb74 Mon Sep 17 00:00:00 2001
From: Alessio Balsini <balsini@android.com>
Date: Wed, 4 Dec 2019 16:50:14 -0800
Subject: include/linux/sysctl.h: inline braces for ctl_table and
 ctl_table_header

Fix coding style of "struct ctl_table" and "struct ctl_table_header" to
have inline braces.

Besides the wide use of this proposed cose style, this change helps to
find at a glance the struct definition when navigating the code.

Link: http://lkml.kernel.org/r/20190903154906.188651-1-balsini@android.com
Signed-off-by: Alessio Balsini <balsini@android.com>
Acked-by: Luis Chamberlain <mcgrof@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sysctl.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 6df477329b76..02fa84493f23 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -120,8 +120,7 @@ static inline void *proc_sys_poll_event(struct ctl_table_poll *poll)
 	struct ctl_table_poll name = __CTL_TABLE_POLL_INITIALIZER(name)
 
 /* A sysctl table is an array of struct ctl_table: */
-struct ctl_table 
-{
+struct ctl_table {
 	const char *procname;		/* Text ID for /proc/sys, or zero */
 	void *data;
 	int maxlen;
@@ -140,8 +139,7 @@ struct ctl_node {
 
 /* struct ctl_table_header is used to maintain dynamic lists of
    struct ctl_table trees. */
-struct ctl_table_header
-{
+struct ctl_table_header {
 	union {
 		struct {
 			struct ctl_table *ctl_table;
-- 
cgit v1.2.3-59-g8ed1b


From a512ae54cee18296384f45201b3118345ce0dc80 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <swboyd@chromium.org>
Date: Wed, 4 Dec 2019 16:50:17 -0800
Subject: .gitattributes: use 'dts' diff driver for dts files

Git is gaining support to display the closest node to the diff in the
hunk header via the 'dts' diff driver.  Use that driver for all dts and
dtsi files so we can gain some more context on where the diff is.
Taking a recent commit in the kernel dts files you can see the
difference.

With this patch and an updated git

: diff --git a/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi b/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi
:  index 62e07e1197cc..4c38426a6969 100644
:  --- a/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi
:  +++ b/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi
:  @@ -289,5 +289,29 @@ vdd_hdmi: regulator@1 {
:                          gpio = <&gpio TEGRA194_MAIN_GPIO(A, 3) GPIO_ACTIVE_HIGH>;
:                          enable-active-high;
:                  };
:  +
:  +               vdd_3v3_pcie: regulator@2 {
:  +                       compatible = "regulator-fixed";

vs. without this patch

:  diff --git a/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi b/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi
:  index 62e07e1197cc..4c38426a6969 100644
:  --- a/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi
:  +++ b/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi
:  @@ -289,5 +289,29 @@
:                          gpio = <&gpio TEGRA194_MAIN_GPIO(A, 3) GPIO_ACTIVE_HIGH>;
:                          enable-active-high;
:                  };
:  +
:  +               vdd_3v3_pcie: regulator@2 {
:  +                       compatible = "regulator-fixed";

You can see that we don't know what the context node is because it isn't
shown after the '@@'.

dts is not released yet but it is staged to be in the next release[1].
One can probably build git from source and try it out.

[1] https://git.kernel.org/pub/scm/git/git.git/commit/?id=d49c2c3466d2c8cb0b3d0a43e6b406b07078fdb1

Link: http://lkml.kernel.org/r/20191004212311.141538-1-swboyd@chromium.org
Signed-off-by: Stephen Boyd <swboyd@chromium.org>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Frank Rowand <frowand.list@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 .gitattributes | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitattributes b/.gitattributes
index 89c411b5ce6b..4b32eaa9571e 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,2 +1,4 @@
 *.c   diff=cpp
 *.h   diff=cpp
+*.dtsi diff=dts
+*.dts  diff=dts
-- 
cgit v1.2.3-59-g8ed1b


From 8788994376d84d627450fd0d67deb6a66ddf07d7 Mon Sep 17 00:00:00 2001
From: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Date: Wed, 4 Dec 2019 16:50:20 -0800
Subject: linux/build_bug.h: change type to int

Having BUILD_BUG_ON_ZERO produce a value of type size_t leads to awkward
casts in cases where the result needs to be signed, or of smaller type
than size_t.  To avoid this, cast the value to int instead and rely on
implicit type conversions when a larger or unsigned type is needed.

Link: http://lkml.kernel.org/r/20190811184938.1796-3-rikard.falkeborn@gmail.com
Signed-off-by: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Suggested-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Joe Perches <joe@perches.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/build_bug.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/build_bug.h b/include/linux/build_bug.h
index 0fe5426f2bdc..e3a0be2c90ad 100644
--- a/include/linux/build_bug.h
+++ b/include/linux/build_bug.h
@@ -9,11 +9,11 @@
 #else /* __CHECKER__ */
 /*
  * Force a compilation error if condition is true, but also produce a
- * result (of value 0 and type size_t), so the expression can be used
+ * result (of value 0 and type int), so the expression can be used
  * e.g. in a structure initializer (or where-ever else comma expressions
  * aren't permitted).
  */
-#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:(-!!(e)); }))
+#define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); })))
 #endif /* __CHECKER__ */
 
 /* Force a compilation error if a constant expression is not a power of 2 */
-- 
cgit v1.2.3-59-g8ed1b


From 1a18374fc370da5a335b061fe94ebf677b07bd1d Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Wed, 4 Dec 2019 16:50:23 -0800
Subject: linux/scc.h: make uapi linux/scc.h self-contained

Userspace cannot compile <linux/scc.h>

    CC      usr/include/linux/scc.h.s
  In file included from <command-line>:32:0:
  usr/include/linux/scc.h:20:20: error: `SIOCDEVPRIVATE' undeclared here (not in a function)
    SIOCSCCRESERVED = SIOCDEVPRIVATE,
                      ^~~~~~~~~~~~~~

Include <linux/sockios.h> to make it self-contained, and add it to the
compile-test coverage.

Link: http://lkml.kernel.org/r/20191108055809.26969-1-yamada.masahiro@socionext.com
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/linux/scc.h | 1 +
 usr/include/Makefile     | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/uapi/linux/scc.h b/include/uapi/linux/scc.h
index c5bc7f747755..947edb17ce9d 100644
--- a/include/uapi/linux/scc.h
+++ b/include/uapi/linux/scc.h
@@ -4,6 +4,7 @@
 #ifndef _UAPI_SCC_H
 #define _UAPI_SCC_H
 
+#include <linux/sockios.h>
 
 /* selection of hardware types */
 
diff --git a/usr/include/Makefile b/usr/include/Makefile
index 24543a30b9f0..5acd9bb6d714 100644
--- a/usr/include/Makefile
+++ b/usr/include/Makefile
@@ -40,7 +40,6 @@ header-test- += linux/omapfb.h
 header-test- += linux/patchkey.h
 header-test- += linux/phonet.h
 header-test- += linux/reiserfs_xattr.h
-header-test- += linux/scc.h
 header-test- += linux/sctp.h
 header-test- += linux/signal.h
 header-test- += linux/sysctl.h
-- 
cgit v1.2.3-59-g8ed1b


From 24b54fee106d8f9ea41f71e366e03f6a3f083a15 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Wed, 4 Dec 2019 16:50:26 -0800
Subject: arch/Kconfig: fix indentation

Adjust indentation from spaces to tab (+optional two spaces) as in
coding style with command like:
	$ sed -e 's/^        /	/' -i */Kconfig

Link: http://lkml.kernel.org/r/1574306573-10886-1-git-send-email-krzk@kernel.org
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jiri Kosina <trivial@kernel.org>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/Kconfig | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 7b861fe3f900..48b5e103bdb0 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -72,11 +72,11 @@ config KPROBES
 	  If in doubt, say "N".
 
 config JUMP_LABEL
-       bool "Optimize very unlikely/likely branches"
-       depends on HAVE_ARCH_JUMP_LABEL
-       depends on CC_HAS_ASM_GOTO
-       help
-         This option enables a transparent branch optimization that
+	bool "Optimize very unlikely/likely branches"
+	depends on HAVE_ARCH_JUMP_LABEL
+	depends on CC_HAS_ASM_GOTO
+	help
+	 This option enables a transparent branch optimization that
 	 makes certain almost-always-true or almost-always-false branch
 	 conditions even cheaper to execute within the kernel.
 
@@ -84,7 +84,7 @@ config JUMP_LABEL
 	 scheduler functionality, networking code and KVM have such
 	 branches and include support for this optimization technique.
 
-         If it is detected that the compiler has support for "asm goto",
+	 If it is detected that the compiler has support for "asm goto",
 	 the kernel will compile such branches with just a nop
 	 instruction. When the condition flag is toggled to true, the
 	 nop will be converted to a jump instruction to execute the
@@ -151,8 +151,8 @@ config HAVE_EFFICIENT_UNALIGNED_ACCESS
 	  information on the topic of unaligned memory accesses.
 
 config ARCH_USE_BUILTIN_BSWAP
-       bool
-       help
+	bool
+	help
 	 Modern versions of GCC (since 4.4) have builtin functions
 	 for handling byte-swapping. Using these, instead of the old
 	 inline assembler that the architecture code provides in the
@@ -221,10 +221,10 @@ config HAVE_DMA_CONTIGUOUS
 	bool
 
 config GENERIC_SMP_IDLE_THREAD
-       bool
+	bool
 
 config GENERIC_IDLE_POLL_SETUP
-       bool
+	bool
 
 config ARCH_HAS_FORTIFY_SOURCE
 	bool
@@ -257,7 +257,7 @@ config ARCH_HAS_UNCACHED_SEGMENT
 
 # Select if arch init_task must go in the __init_task_data section
 config ARCH_TASK_STRUCT_ON_STACK
-       bool
+	bool
 
 # Select if arch has its private alloc_task_struct() function
 config ARCH_TASK_STRUCT_ALLOCATOR
-- 
cgit v1.2.3-59-g8ed1b


From 2f5bd343694ed53b3abc4a616ce975505271afe7 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 4 Dec 2019 16:50:29 -0800
Subject: scripts/get_maintainer.pl: add signatures from Fixes: <badcommit>
 lines in commit message

A Fixes: lines in a commit message generally indicate that a previous
commit was inadequate for whatever reason.

The signers of the previous inadequate commit should also be cc'd on
this new commit so update get_maintainer to find the old commit and add
the original signers.

Link: http://lkml.kernel.org/r/33605b9fc0e0f711236951ae84185a6218acff4f.camel@perches.com
Signed-off-by: Joe Perches <joe@perches.com>
Suggested-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/get_maintainer.pl | 38 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 37 insertions(+), 1 deletion(-)

diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index 5ef59214c555..34085d146fa2 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -26,6 +26,7 @@ my $email = 1;
 my $email_usename = 1;
 my $email_maintainer = 1;
 my $email_reviewer = 1;
+my $email_fixes = 1;
 my $email_list = 1;
 my $email_moderated_list = 1;
 my $email_subscriber_list = 0;
@@ -249,6 +250,7 @@ if (!GetOptions(
 		'r!' => \$email_reviewer,
 		'n!' => \$email_usename,
 		'l!' => \$email_list,
+		'fixes!' => \$email_fixes,
 		'moderated!' => \$email_moderated_list,
 		's!' => \$email_subscriber_list,
 		'multiline!' => \$output_multiline,
@@ -503,6 +505,7 @@ sub read_mailmap {
 ## use the filenames on the command line or find the filenames in the patchfiles
 
 my @files = ();
+my @fixes = ();			# If a patch description includes Fixes: lines
 my @range = ();
 my @keyword_tvi = ();
 my @file_emails = ();
@@ -568,6 +571,8 @@ foreach my $file (@ARGV) {
 		my $filename2 = $2;
 		push(@files, $filename1);
 		push(@files, $filename2);
+	    } elsif (m/^Fixes:\s+([0-9a-fA-F]{6,40})/) {
+		push(@fixes, $1) if ($email_fixes);
 	    } elsif (m/^\+\+\+\s+(\S+)/ or m/^---\s+(\S+)/) {
 		my $filename = $1;
 		$filename =~ s@^[^/]*/@@;
@@ -598,6 +603,7 @@ foreach my $file (@ARGV) {
 }
 
 @file_emails = uniq(@file_emails);
+@fixes = uniq(@fixes);
 
 my %email_hash_name;
 my %email_hash_address;
@@ -612,7 +618,6 @@ my %deduplicate_name_hash = ();
 my %deduplicate_address_hash = ();
 
 my @maintainers = get_maintainers();
-
 if (@maintainers) {
     @maintainers = merge_email(@maintainers);
     output(@maintainers);
@@ -927,6 +932,10 @@ sub get_maintainers {
 	}
     }
 
+    foreach my $fix (@fixes) {
+	vcs_add_commit_signers($fix, "blamed_fixes");
+    }
+
     foreach my $email (@email_to, @list_to) {
 	$email->[0] = deduplicate_email($email->[0]);
     }
@@ -1031,6 +1040,7 @@ MAINTAINER field selection options:
     --roles => show roles (status:subsystem, git-signer, list, etc...)
     --rolestats => show roles and statistics (commits/total_commits, %)
     --file-emails => add email addresses found in -f file (default: 0 (off))
+    --fixes => for patches, add signatures of commits with 'Fixes: <commit>' (default: 1 (on))
   --scm => print SCM tree(s) if any
   --status => print status if any
   --subsystem => print subsystem name if any
@@ -1730,6 +1740,32 @@ sub vcs_is_hg {
     return $vcs_used == 2;
 }
 
+sub vcs_add_commit_signers {
+    return if (!vcs_exists());
+
+    my ($commit, $desc) = @_;
+    my $commit_count = 0;
+    my $commit_authors_ref;
+    my $commit_signers_ref;
+    my $stats_ref;
+    my @commit_authors = ();
+    my @commit_signers = ();
+    my $cmd;
+
+    $cmd = $VCS_cmds{"find_commit_signers_cmd"};
+    $cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
+
+    ($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, "");
+    @commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref;
+    @commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref;
+
+    foreach my $signer (@commit_signers) {
+	$signer = deduplicate_email($signer);
+    }
+
+    vcs_assign($desc, 1, @commit_signers);
+}
+
 sub interactive_get_maintainers {
     my ($list_ref) = @_;
     my @list = @$list_ref;
-- 
cgit v1.2.3-59-g8ed1b


From 885e68e8b7b1328aa1e28b27e13fbfb5f020d269 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 4 Dec 2019 16:50:32 -0800
Subject: kernel.h: update comment about simple_strto<foo>() functions

There were discussions in the past about use cases for
simple_strto<foo>() functions and, in some rare cases, they have a
benefit over kstrto<foo>() ones.

Update a comment to reduce confusion about special use cases.

Link: http://lkml.kernel.org/r/20190801192904.41087-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Suggested-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Mans Rullgard <mans@mansr.com>
Cc: Petr Mladek <pmladek@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 09f759228e3f..3adcb39fa6f5 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -348,8 +348,7 @@ int __must_check kstrtoll(const char *s, unsigned int base, long long *res);
  * @res: Where to write the result of the conversion on success.
  *
  * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
- * Used as a replacement for the obsolete simple_strtoull. Return code must
- * be checked.
+ * Used as a replacement for the simple_strtoull. Return code must be checked.
 */
 static inline int __must_check kstrtoul(const char *s, unsigned int base, unsigned long *res)
 {
@@ -377,8 +376,7 @@ static inline int __must_check kstrtoul(const char *s, unsigned int base, unsign
  * @res: Where to write the result of the conversion on success.
  *
  * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
- * Used as a replacement for the obsolete simple_strtoull. Return code must
- * be checked.
+ * Used as a replacement for the simple_strtoull. Return code must be checked.
  */
 static inline int __must_check kstrtol(const char *s, unsigned int base, long *res)
 {
@@ -454,7 +452,18 @@ static inline int __must_check kstrtos32_from_user(const char __user *s, size_t
 	return kstrtoint_from_user(s, count, base, res);
 }
 
-/* Obsolete, do not use.  Use kstrto<foo> instead */
+/*
+ * Use kstrto<foo> instead.
+ *
+ * NOTE: simple_strto<foo> does not check for the range overflow and,
+ *	 depending on the input, may give interesting results.
+ *
+ * Use these functions if and only if you cannot use kstrto<foo>, because
+ * the conversion ends on the first non-digit character, which may be far
+ * beyond the supported range. It might be useful to parse the strings like
+ * 10x50 or 12:21 without altering original string or temporary buffer in use.
+ * Keep in mind above caveat.
+ */
 
 extern unsigned long simple_strtoul(const char *,char **,unsigned int);
 extern long simple_strtol(const char *,char **,unsigned int);
-- 
cgit v1.2.3-59-g8ed1b


From d717e7da45b382c09cee4f1e03ce30124e672e07 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 4 Dec 2019 16:50:36 -0800
Subject: auxdisplay: charlcd: deduplicate simple_strtoul()

Like in commit 8b2303de399f ("serial: core: Fix handling of options
after MMIO address") we may use simple_strtoul() which in comparison to
kstrtoul() can do conversion in-place without additional and unnecessary
code to be written.

Link: http://lkml.kernel.org/r/20190801192904.41087-2-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Mans Rullgard <mans@mansr.com>
Cc: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/auxdisplay/charlcd.c | 34 +++++++---------------------------
 1 file changed, 7 insertions(+), 27 deletions(-)

diff --git a/drivers/auxdisplay/charlcd.c b/drivers/auxdisplay/charlcd.c
index bef6b85778b6..874c259a8829 100644
--- a/drivers/auxdisplay/charlcd.c
+++ b/drivers/auxdisplay/charlcd.c
@@ -287,31 +287,6 @@ static int charlcd_init_display(struct charlcd *lcd)
 	return 0;
 }
 
-/*
- * Parses an unsigned integer from a string, until a non-digit character
- * is found. The empty string is not accepted. No overflow checks are done.
- *
- * Returns whether the parsing was successful. Only in that case
- * the output parameters are written to.
- *
- * TODO: If the kernel adds an inplace version of kstrtoul(), this function
- * could be easily replaced by that.
- */
-static bool parse_n(const char *s, unsigned long *res, const char **next_s)
-{
-	if (!isdigit(*s))
-		return false;
-
-	*res = 0;
-	while (isdigit(*s)) {
-		*res = *res * 10 + (*s - '0');
-		++s;
-	}
-
-	*next_s = s;
-	return true;
-}
-
 /*
  * Parses a movement command of the form "(.*);", where the group can be
  * any number of subcommands of the form "(x|y)[0-9]+".
@@ -336,6 +311,7 @@ static bool parse_xy(const char *s, unsigned long *x, unsigned long *y)
 {
 	unsigned long new_x = *x;
 	unsigned long new_y = *y;
+	char *p;
 
 	for (;;) {
 		if (!*s)
@@ -345,11 +321,15 @@ static bool parse_xy(const char *s, unsigned long *x, unsigned long *y)
 			break;
 
 		if (*s == 'x') {
-			if (!parse_n(s + 1, &new_x, &s))
+			new_x = simple_strtoul(s + 1, &p, 10);
+			if (p == s + 1)
 				return false;
+			s = p;
 		} else if (*s == 'y') {
-			if (!parse_n(s + 1, &new_y, &s))
+			new_y = simple_strtoul(s + 1, &p, 10);
+			if (p == s + 1)
 				return false;
+			s = p;
 		} else {
 			return false;
 		}
-- 
cgit v1.2.3-59-g8ed1b


From 1a50cb80f219c44adb6265f5071b81fc3c1deced Mon Sep 17 00:00:00 2001
From: Xiaoming Ni <nixiaoming@huawei.com>
Date: Wed, 4 Dec 2019 16:50:39 -0800
Subject: kernel/notifier.c: intercept duplicate registrations to avoid
 infinite loops

Registering the same notifier to a hook repeatedly can cause the hook
list to form a ring or lose other members of the list.

  case1: An infinite loop in notifier_chain_register() can cause soft lockup
          atomic_notifier_chain_register(&test_notifier_list, &test1);
          atomic_notifier_chain_register(&test_notifier_list, &test1);
          atomic_notifier_chain_register(&test_notifier_list, &test2);

  case2: An infinite loop in notifier_chain_register() can cause soft lockup
          atomic_notifier_chain_register(&test_notifier_list, &test1);
          atomic_notifier_chain_register(&test_notifier_list, &test1);
          atomic_notifier_call_chain(&test_notifier_list, 0, NULL);

  case3: lose other hook test2
          atomic_notifier_chain_register(&test_notifier_list, &test1);
          atomic_notifier_chain_register(&test_notifier_list, &test2);
          atomic_notifier_chain_register(&test_notifier_list, &test1);

  case4: Unregister returns 0, but the hook is still in the linked list,
         and it is not really registered. If you call
         notifier_call_chain after ko is unloaded, it will trigger oops.

If the system is configured with softlockup_panic and the same hook is
repeatedly registered on the panic_notifier_list, it will cause a loop
panic.

Add a check in notifier_chain_register(), intercepting duplicate
registrations to avoid infinite loops

Link: http://lkml.kernel.org/r/1568861888-34045-2-git-send-email-nixiaoming@huawei.com
Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com>
Reviewed-by: Vasily Averin <vvs@virtuozzo.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Anna Schumaker <anna.schumaker@netapp.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: J. Bruce Fields <bfields@fieldses.org>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Jeff Layton <jlayton@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Nadia Derbey <Nadia.Derbey@bull.net>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Sam Protsenko <semen.protsenko@linaro.org>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Trond Myklebust <trond.myklebust@hammerspace.com>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Xiaoming Ni <nixiaoming@huawei.com>
Cc: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/notifier.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/kernel/notifier.c b/kernel/notifier.c
index d9f5081d578d..30bedb8be6dd 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -23,7 +23,10 @@ static int notifier_chain_register(struct notifier_block **nl,
 		struct notifier_block *n)
 {
 	while ((*nl) != NULL) {
-		WARN_ONCE(((*nl) == n), "double register detected");
+		if (unlikely((*nl) == n)) {
+			WARN(1, "double register detected");
+			return 0;
+		}
 		if (n->priority > (*nl)->priority)
 			break;
 		nl = &((*nl)->next);
-- 
cgit v1.2.3-59-g8ed1b


From 5adaabb65a267d890b29193af2dbc38a3b85bbf2 Mon Sep 17 00:00:00 2001
From: Xiaoming Ni <nixiaoming@huawei.com>
Date: Wed, 4 Dec 2019 16:50:43 -0800
Subject: kernel/notifier.c: remove notifier_chain_cond_register()

The only difference between notifier_chain_cond_register() and
notifier_chain_register() is the lack of warning hints for duplicate
registrations.  Use notifier_chain_register() instead of
notifier_chain_cond_register() to avoid duplicate code

Link: http://lkml.kernel.org/r/1568861888-34045-3-git-send-email-nixiaoming@huawei.com
Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Anna Schumaker <anna.schumaker@netapp.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: J. Bruce Fields <bfields@fieldses.org>
Cc: Jeff Layton <jlayton@kernel.org>
Cc: Nadia Derbey <Nadia.Derbey@bull.net>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Sam Protsenko <semen.protsenko@linaro.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Trond Myklebust <trond.myklebust@hammerspace.com>
Cc: Vasily Averin <vvs@virtuozzo.com>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/notifier.c | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/kernel/notifier.c b/kernel/notifier.c
index 30bedb8be6dd..e3d221f092fe 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -36,21 +36,6 @@ static int notifier_chain_register(struct notifier_block **nl,
 	return 0;
 }
 
-static int notifier_chain_cond_register(struct notifier_block **nl,
-		struct notifier_block *n)
-{
-	while ((*nl) != NULL) {
-		if ((*nl) == n)
-			return 0;
-		if (n->priority > (*nl)->priority)
-			break;
-		nl = &((*nl)->next);
-	}
-	n->next = *nl;
-	rcu_assign_pointer(*nl, n);
-	return 0;
-}
-
 static int notifier_chain_unregister(struct notifier_block **nl,
 		struct notifier_block *n)
 {
@@ -252,7 +237,7 @@ int blocking_notifier_chain_cond_register(struct blocking_notifier_head *nh,
 	int ret;
 
 	down_write(&nh->rwsem);
-	ret = notifier_chain_cond_register(&nh->head, n);
+	ret = notifier_chain_register(&nh->head, n);
 	up_write(&nh->rwsem);
 	return ret;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 260a2679e5cbfb3d8a4cf6cd1cb6f57e89c7e543 Mon Sep 17 00:00:00 2001
From: Xiaoming Ni <nixiaoming@huawei.com>
Date: Wed, 4 Dec 2019 16:50:47 -0800
Subject: kernel/notifier.c: remove blocking_notifier_chain_cond_register()

blocking_notifier_chain_cond_register() does not consider system_booting
state, which is the only difference between this function and
blocking_notifier_cain_register().  This can be a bug and is a piece of
duplicate code.

Delete blocking_notifier_chain_cond_register()

Link: http://lkml.kernel.org/r/1568861888-34045-4-git-send-email-nixiaoming@huawei.com
Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Anna Schumaker <anna.schumaker@netapp.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: J. Bruce Fields <bfields@fieldses.org>
Cc: Jeff Layton <jlayton@kernel.org>
Cc: Nadia Derbey <Nadia.Derbey@bull.net>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Sam Protsenko <semen.protsenko@linaro.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Trond Myklebust <trond.myklebust@hammerspace.com>
Cc: Vasily Averin <vvs@virtuozzo.com>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/notifier.h |  4 ----
 kernel/notifier.c        | 23 -----------------------
 net/sunrpc/rpc_pipe.c    |  2 +-
 3 files changed, 1 insertion(+), 28 deletions(-)

diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 0096a05395e3..018947611483 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -150,10 +150,6 @@ extern int raw_notifier_chain_register(struct raw_notifier_head *nh,
 extern int srcu_notifier_chain_register(struct srcu_notifier_head *nh,
 		struct notifier_block *nb);
 
-extern int blocking_notifier_chain_cond_register(
-		struct blocking_notifier_head *nh,
-		struct notifier_block *nb);
-
 extern int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh,
 		struct notifier_block *nb);
 extern int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh,
diff --git a/kernel/notifier.c b/kernel/notifier.c
index e3d221f092fe..63d7501ac638 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -220,29 +220,6 @@ int blocking_notifier_chain_register(struct blocking_notifier_head *nh,
 }
 EXPORT_SYMBOL_GPL(blocking_notifier_chain_register);
 
-/**
- *	blocking_notifier_chain_cond_register - Cond add notifier to a blocking notifier chain
- *	@nh: Pointer to head of the blocking notifier chain
- *	@n: New entry in notifier chain
- *
- *	Adds a notifier to a blocking notifier chain, only if not already
- *	present in the chain.
- *	Must be called in process context.
- *
- *	Currently always returns zero.
- */
-int blocking_notifier_chain_cond_register(struct blocking_notifier_head *nh,
-		struct notifier_block *n)
-{
-	int ret;
-
-	down_write(&nh->rwsem);
-	ret = notifier_chain_register(&nh->head, n);
-	up_write(&nh->rwsem);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(blocking_notifier_chain_cond_register);
-
 /**
  *	blocking_notifier_chain_unregister - Remove notifier from a blocking notifier chain
  *	@nh: Pointer to head of the blocking notifier chain
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index b71a39ded930..39e14d5edaf1 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -51,7 +51,7 @@ static BLOCKING_NOTIFIER_HEAD(rpc_pipefs_notifier_list);
 
 int rpc_pipefs_notifier_register(struct notifier_block *nb)
 {
-	return blocking_notifier_chain_cond_register(&rpc_pipefs_notifier_list, nb);
+	return blocking_notifier_chain_register(&rpc_pipefs_notifier_list, nb);
 }
 EXPORT_SYMBOL_GPL(rpc_pipefs_notifier_register);
 
-- 
cgit v1.2.3-59-g8ed1b


From ef70eff9dea66f38f8c2c2dcc7fe4b7a2bbb4921 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Wed, 4 Dec 2019 16:50:50 -0800
Subject: kernel/profile.c: use cpumask_available to check for NULL cpumask

When building with clang + -Wtautological-pointer-compare, these
instances pop up:

  kernel/profile.c:339:6: warning: comparison of array 'prof_cpu_mask' not equal to a null pointer is always true [-Wtautological-pointer-compare]
          if (prof_cpu_mask != NULL)
              ^~~~~~~~~~~~~    ~~~~
  kernel/profile.c:376:6: warning: comparison of array 'prof_cpu_mask' not equal to a null pointer is always true [-Wtautological-pointer-compare]
          if (prof_cpu_mask != NULL)
              ^~~~~~~~~~~~~    ~~~~
  kernel/profile.c:406:26: warning: comparison of array 'prof_cpu_mask' not equal to a null pointer is always true [-Wtautological-pointer-compare]
          if (!user_mode(regs) && prof_cpu_mask != NULL &&
                                ^~~~~~~~~~~~~    ~~~~
  3 warnings generated.

This can be addressed with the cpumask_available helper, introduced in
commit f7e30f01a9e2 ("cpumask: Add helper cpumask_available()") to fix
warnings like this while keeping the code the same.

Link: https://github.com/ClangBuiltLinux/linux/issues/747
Link: http://lkml.kernel.org/r/20191022191957.9554-1-natechancellor@gmail.com
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/profile.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/profile.c b/kernel/profile.c
index af7c94bf5fa1..4b144b02ca5d 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -336,7 +336,7 @@ static int profile_dead_cpu(unsigned int cpu)
 	struct page *page;
 	int i;
 
-	if (prof_cpu_mask != NULL)
+	if (cpumask_available(prof_cpu_mask))
 		cpumask_clear_cpu(cpu, prof_cpu_mask);
 
 	for (i = 0; i < 2; i++) {
@@ -373,7 +373,7 @@ static int profile_prepare_cpu(unsigned int cpu)
 
 static int profile_online_cpu(unsigned int cpu)
 {
-	if (prof_cpu_mask != NULL)
+	if (cpumask_available(prof_cpu_mask))
 		cpumask_set_cpu(cpu, prof_cpu_mask);
 
 	return 0;
@@ -403,7 +403,7 @@ void profile_tick(int type)
 {
 	struct pt_regs *regs = get_irq_regs();
 
-	if (!user_mode(regs) && prof_cpu_mask != NULL &&
+	if (!user_mode(regs) && cpumask_available(prof_cpu_mask) &&
 	    cpumask_test_cpu(smp_processor_id(), prof_cpu_mask))
 		profile_hit(type, (void *)profile_pc(regs));
 }
-- 
cgit v1.2.3-59-g8ed1b


From 5e1aada08cd19ea652b2d32a250501d09b02ff2e Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 4 Dec 2019 16:50:53 -0800
Subject: kernel/sys.c: avoid copying possible padding bytes in copy_to_user

Initialization is not guaranteed to zero padding bytes so use an
explicit memset instead to avoid leaking any kernel content in any
possible padding bytes.

Link: http://lkml.kernel.org/r/dfa331c00881d61c8ee51577a082d8bebd61805c.camel@perches.com
Signed-off-by: Joe Perches <joe@perches.com>
Cc: Dan Carpenter <error27@gmail.com>
Cc: Julia Lawall <julia.lawall@lip6.fr>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/sys.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/kernel/sys.c b/kernel/sys.c
index d3aef31e24dc..a9331f101883 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1279,11 +1279,13 @@ SYSCALL_DEFINE1(uname, struct old_utsname __user *, name)
 
 SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name)
 {
-	struct oldold_utsname tmp = {};
+	struct oldold_utsname tmp;
 
 	if (!name)
 		return -EFAULT;
 
+	memset(&tmp, 0, sizeof(tmp));
+
 	down_read(&uts_sem);
 	memcpy(&tmp.sysname, &utsname()->sysname, __OLD_UTS_LEN);
 	memcpy(&tmp.nodename, &utsname()->nodename, __OLD_UTS_LEN);
-- 
cgit v1.2.3-59-g8ed1b


From 169c474fb22d8a5e909e172f177b957546d0519d Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <vilhelm.gray@gmail.com>
Date: Wed, 4 Dec 2019 16:50:57 -0800
Subject: bitops: introduce the for_each_set_clump8 macro

Pach series "Introduce the for_each_set_clump8 macro", v18.

While adding GPIO get_multiple/set_multiple callback support for various
drivers, I noticed a pattern of looping manifesting that would be useful
standardized as a macro.

This patchset introduces the for_each_set_clump8 macro and utilizes it
in several GPIO drivers.  The for_each_set_clump macro8 facilitates a
for-loop syntax that iterates over a memory region entire groups of set
bits at a time.

For example, suppose you would like to iterate over a 32-bit integer 8
bits at a time, skipping over 8-bit groups with no set bit, where
XXXXXXXX represents the current 8-bit group:

    Example:        10111110 00000000 11111111 00110011
    First loop:     10111110 00000000 11111111 XXXXXXXX
    Second loop:    10111110 00000000 XXXXXXXX 00110011
    Third loop:     XXXXXXXX 00000000 11111111 00110011

Each iteration of the loop returns the next 8-bit group that has at
least one set bit.

The for_each_set_clump8 macro has four parameters:

    * start: set to the bit offset of the current clump
    * clump: set to the current clump value
    * bits: bitmap to search within
    * size: bitmap size in number of bits

In this version of the patchset, the for_each_set_clump macro has been
reimplemented and simplified based on the suggestions provided by Rasmus
Villemoes and Andy Shevchenko in the version 4 submission.

In particular, the function of the for_each_set_clump macro has been
restricted to handle only 8-bit clumps; the drivers that use the
for_each_set_clump macro only handle 8-bit ports so a generic
for_each_set_clump implementation is not necessary.  Thus, a solution
for large clumps (i.e.  those larger than the width of a bitmap word)
can be postponed until a driver appears that actually requires such a
generic for_each_set_clump implementation.

For what it's worth, a semi-generic for_each_set_clump (i.e.  for clumps
smaller than the width of a bitmap word) can be implemented by simply
replacing the hardcoded '8' and '0xFF' instances with respective
variables.  I have not yet had a need for such an implementation, and
since it falls short of a true generic for_each_set_clump function, I
have decided to forgo such an implementation for now.

In addition, the bitmap_get_value8 and bitmap_set_value8 functions are
introduced to get and set 8-bit values respectively.  Their use is based
on the behavior suggested in the patchset version 4 review.

This patch (of 14):

This macro iterates for each 8-bit group of bits (clump) with set bits,
within a bitmap memory region.  For each iteration, "start" is set to
the bit offset of the found clump, while the respective clump value is
stored to the location pointed by "clump".  Additionally, the
bitmap_get_value8 and bitmap_set_value8 functions are introduced to
respectively get and set an 8-bit value in a bitmap memory region.

[gustavo@embeddedor.com: fix potential sign-extension overflow]
  Link: http://lkml.kernel.org/r/20191015184657.GA26541@embeddedor
[akpm@linux-foundation.org: s/ULL/UL/, per Joe]
[vilhelm.gray@gmail.com: add for_each_set_clump8 documentation]
  Link: http://lkml.kernel.org/r/20191016161825.301082-1-vilhelm.gray@gmail.com
Link: http://lkml.kernel.org/r/893c3b4f03266c9496137cc98ac2b1bd27f92c73.1570641097.git.vilhelm.gray@gmail.com
Signed-off-by: William Breathitt Gray <vilhelm.gray@gmail.com>
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Suggested-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Suggested-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Suggested-by: Lukas Wunner <lukas@wunner.de>
Tested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Phil Reid <preid@electromag.com.au>
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Mathias Duckeck <m.duckeck@kunbus.de>
Cc: Morten Hein Tiljeset <morten.tiljeset@prevas.dk>
Cc: Sean Nyekjaer <sean.nyekjaer@prevas.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/bitops/find.h | 17 +++++++++++++++++
 include/linux/bitmap.h            | 35 +++++++++++++++++++++++++++++++++++
 include/linux/bitops.h            | 12 ++++++++++++
 lib/find_bit.c                    | 14 ++++++++++++++
 4 files changed, 78 insertions(+)

diff --git a/include/asm-generic/bitops/find.h b/include/asm-generic/bitops/find.h
index 8a1ee10014de..9fdf21302fdf 100644
--- a/include/asm-generic/bitops/find.h
+++ b/include/asm-generic/bitops/find.h
@@ -80,4 +80,21 @@ extern unsigned long find_first_zero_bit(const unsigned long *addr,
 
 #endif /* CONFIG_GENERIC_FIND_FIRST_BIT */
 
+/**
+ * find_next_clump8 - find next 8-bit clump with set bits in a memory region
+ * @clump: location to store copy of found clump
+ * @addr: address to base the search on
+ * @size: bitmap size in number of bits
+ * @offset: bit offset at which to start searching
+ *
+ * Returns the bit offset for the next set clump; the found clump value is
+ * copied to the location pointed by @clump. If no bits are set, returns @size.
+ */
+extern unsigned long find_next_clump8(unsigned long *clump,
+				      const unsigned long *addr,
+				      unsigned long size, unsigned long offset);
+
+#define find_first_clump8(clump, bits, size) \
+	find_next_clump8((clump), (bits), (size), 0)
+
 #endif /*_ASM_GENERIC_BITOPS_FIND_H_ */
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 29fc933df3bf..9f046609e809 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -66,6 +66,8 @@
  *  bitmap_allocate_region(bitmap, pos, order)  Allocate specified bit region
  *  bitmap_from_arr32(dst, buf, nbits)          Copy nbits from u32[] buf to dst
  *  bitmap_to_arr32(buf, src, nbits)            Copy nbits from buf to u32[] dst
+ *  bitmap_get_value8(map, start)               Get 8bit value from map at start
+ *  bitmap_set_value8(map, value, start)        Set 8bit value to map at start
  *
  * Note, bitmap_zero() and bitmap_fill() operate over the region of
  * unsigned longs, that is, bits behind bitmap till the unsigned long
@@ -489,6 +491,39 @@ static inline void bitmap_from_u64(unsigned long *dst, u64 mask)
 		dst[1] = mask >> 32;
 }
 
+/**
+ * bitmap_get_value8 - get an 8-bit value within a memory region
+ * @map: address to the bitmap memory region
+ * @start: bit offset of the 8-bit value; must be a multiple of 8
+ *
+ * Returns the 8-bit value located at the @start bit offset within the @src
+ * memory region.
+ */
+static inline unsigned long bitmap_get_value8(const unsigned long *map,
+					      unsigned long start)
+{
+	const size_t index = BIT_WORD(start);
+	const unsigned long offset = start % BITS_PER_LONG;
+
+	return (map[index] >> offset) & 0xFF;
+}
+
+/**
+ * bitmap_set_value8 - set an 8-bit value within a memory region
+ * @map: address to the bitmap memory region
+ * @value: the 8-bit value; values wider than 8 bits may clobber bitmap
+ * @start: bit offset of the 8-bit value; must be a multiple of 8
+ */
+static inline void bitmap_set_value8(unsigned long *map, unsigned long value,
+				     unsigned long start)
+{
+	const size_t index = BIT_WORD(start);
+	const unsigned long offset = start % BITS_PER_LONG;
+
+	map[index] &= ~(0xFFUL << offset);
+	map[index] |= value << offset;
+}
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __LINUX_BITMAP_H */
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index c94a9ff9f082..e479067c202c 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -47,6 +47,18 @@ extern unsigned long __sw_hweight64(__u64 w);
 	     (bit) < (size);					\
 	     (bit) = find_next_zero_bit((addr), (size), (bit) + 1))
 
+/**
+ * for_each_set_clump8 - iterate over bitmap for each 8-bit clump with set bits
+ * @start: bit offset to start search and to store the current iteration offset
+ * @clump: location to store copy of current 8-bit clump
+ * @bits: bitmap address to base the search on
+ * @size: bitmap size in number of bits
+ */
+#define for_each_set_clump8(start, clump, bits, size) \
+	for ((start) = find_first_clump8(&(clump), (bits), (size)); \
+	     (start) < (size); \
+	     (start) = find_next_clump8(&(clump), (bits), (size), (start) + 8))
+
 static inline int get_bitmask_order(unsigned int count)
 {
 	int order;
diff --git a/lib/find_bit.c b/lib/find_bit.c
index 5c51eb45178a..e35a76b291e6 100644
--- a/lib/find_bit.c
+++ b/lib/find_bit.c
@@ -214,3 +214,17 @@ EXPORT_SYMBOL(find_next_bit_le);
 #endif
 
 #endif /* __BIG_ENDIAN */
+
+unsigned long find_next_clump8(unsigned long *clump, const unsigned long *addr,
+			       unsigned long size, unsigned long offset)
+{
+	offset = find_next_bit(addr, size, offset);
+	if (offset == size)
+		return size;
+
+	offset = round_down(offset, 8);
+	*clump = bitmap_get_value8(addr, offset);
+
+	return offset;
+}
+EXPORT_SYMBOL(find_next_clump8);
-- 
cgit v1.2.3-59-g8ed1b


From e4aa168de88da92f05dd5f13ceb63711011a9b04 Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <vilhelm.gray@gmail.com>
Date: Wed, 4 Dec 2019 16:51:01 -0800
Subject: lib/test_bitmap.c: add for_each_set_clump8 test cases

The introduction of the for_each_set_clump8 macro warrants test cases to
verify the implementation.  This patch adds test case checks for whether
an out-of-bounds clump index is returned, a zero clump is returned, or
the returned clump value differs from the expected clump value.

Link: http://lkml.kernel.org/r/febc0fb8151e3e3fdd61c34da9193d1c4d7e6c12.1570641097.git.vilhelm.gray@gmail.com
Signed-off-by: William Breathitt Gray <vilhelm.gray@gmail.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Tested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Mathias Duckeck <m.duckeck@kunbus.de>
Cc: Morten Hein Tiljeset <morten.tiljeset@prevas.dk>
Cc: Phil Reid <preid@electromag.com.au>
Cc: Sean Nyekjaer <sean.nyekjaer@prevas.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/test_bitmap.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)

diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index 51a98f7ee79e..dc167c13eb39 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -92,6 +92,36 @@ __check_eq_u32_array(const char *srcfile, unsigned int line,
 	return true;
 }
 
+static bool __init __check_eq_clump8(const char *srcfile, unsigned int line,
+				    const unsigned int offset,
+				    const unsigned int size,
+				    const unsigned char *const clump_exp,
+				    const unsigned long *const clump)
+{
+	unsigned long exp;
+
+	if (offset >= size) {
+		pr_warn("[%s:%u] bit offset for clump out-of-bounds: expected less than %u, got %u\n",
+			srcfile, line, size, offset);
+		return false;
+	}
+
+	exp = clump_exp[offset / 8];
+	if (!exp) {
+		pr_warn("[%s:%u] bit offset for zero clump: expected nonzero clump, got bit offset %u with clump value 0",
+			srcfile, line, offset);
+		return false;
+	}
+
+	if (*clump != exp) {
+		pr_warn("[%s:%u] expected clump value of 0x%lX, got clump value of 0x%lX",
+			srcfile, line, exp, *clump);
+		return false;
+	}
+
+	return true;
+}
+
 #define __expect_eq(suffix, ...)					\
 	({								\
 		int result = 0;						\
@@ -108,6 +138,7 @@ __check_eq_u32_array(const char *srcfile, unsigned int line,
 #define expect_eq_bitmap(...)		__expect_eq(bitmap, ##__VA_ARGS__)
 #define expect_eq_pbl(...)		__expect_eq(pbl, ##__VA_ARGS__)
 #define expect_eq_u32_array(...)	__expect_eq(u32_array, ##__VA_ARGS__)
+#define expect_eq_clump8(...)		__expect_eq(clump8, ##__VA_ARGS__)
 
 static void __init test_zero_clear(void)
 {
@@ -404,6 +435,39 @@ static void noinline __init test_mem_optimisations(void)
 	}
 }
 
+static const unsigned char clump_exp[] __initconst = {
+	0x01,	/* 1 bit set */
+	0x02,	/* non-edge 1 bit set */
+	0x00,	/* zero bits set */
+	0x38,	/* 3 bits set across 4-bit boundary */
+	0x38,	/* Repeated clump */
+	0x0F,	/* 4 bits set */
+	0xFF,	/* all bits set */
+	0x05,	/* non-adjacent 2 bits set */
+};
+
+static void __init test_for_each_set_clump8(void)
+{
+#define CLUMP_EXP_NUMBITS 64
+	DECLARE_BITMAP(bits, CLUMP_EXP_NUMBITS);
+	unsigned int start;
+	unsigned long clump;
+
+	/* set bitmap to test case */
+	bitmap_zero(bits, CLUMP_EXP_NUMBITS);
+	bitmap_set(bits, 0, 1);		/* 0x01 */
+	bitmap_set(bits, 9, 1);		/* 0x02 */
+	bitmap_set(bits, 27, 3);	/* 0x28 */
+	bitmap_set(bits, 35, 3);	/* 0x28 */
+	bitmap_set(bits, 40, 4);	/* 0x0F */
+	bitmap_set(bits, 48, 8);	/* 0xFF */
+	bitmap_set(bits, 56, 1);	/* 0x05 - part 1 */
+	bitmap_set(bits, 58, 1);	/* 0x05 - part 2 */
+
+	for_each_set_clump8(start, clump, bits, CLUMP_EXP_NUMBITS)
+		expect_eq_clump8(start, CLUMP_EXP_NUMBITS, clump_exp, &clump);
+}
+
 static void __init selftest(void)
 {
 	test_zero_clear();
@@ -413,6 +477,7 @@ static void __init selftest(void)
 	test_bitmap_parselist();
 	test_bitmap_parselist_user();
 	test_mem_optimisations();
+	test_for_each_set_clump8();
 }
 
 KSTM_MODULE_LOADERS(test_bitmap);
-- 
cgit v1.2.3-59-g8ed1b


From f70dad5d4c0fd4e184483b70939e935e060d9208 Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <vilhelm.gray@gmail.com>
Date: Wed, 4 Dec 2019 16:51:04 -0800
Subject: gpio: 104-dio-48e: utilize for_each_set_clump8 macro

Replace verbose implementation in get_multiple/set_multiple callbacks
with for_each_set_clump8 macro to simplify code and improve clarity.

Link: http://lkml.kernel.org/r/08b9c9a3e75ef1ab0d172223d10a1661f2b43fe2.1570641097.git.vilhelm.gray@gmail.com
Signed-off-by: William Breathitt Gray <vilhelm.gray@gmail.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Mathias Duckeck <m.duckeck@kunbus.de>
Cc: Morten Hein Tiljeset <morten.tiljeset@prevas.dk>
Cc: Phil Reid <preid@electromag.com.au>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Sean Nyekjaer <sean.nyekjaer@prevas.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/gpio-104-dio-48e.c | 73 ++++++++++++-----------------------------
 1 file changed, 21 insertions(+), 52 deletions(-)

diff --git a/drivers/gpio/gpio-104-dio-48e.c b/drivers/gpio/gpio-104-dio-48e.c
index 400c09b905f8..1f7d9bbec0fc 100644
--- a/drivers/gpio/gpio-104-dio-48e.c
+++ b/drivers/gpio/gpio-104-dio-48e.c
@@ -178,46 +178,25 @@ static int dio48e_gpio_get(struct gpio_chip *chip, unsigned offset)
 	return !!(port_state & mask);
 }
 
+static const size_t ports[] = { 0, 1, 2, 4, 5, 6 };
+
 static int dio48e_gpio_get_multiple(struct gpio_chip *chip, unsigned long *mask,
 	unsigned long *bits)
 {
 	struct dio48e_gpio *const dio48egpio = gpiochip_get_data(chip);
-	size_t i;
-	static const size_t ports[] = { 0, 1, 2, 4, 5, 6 };
-	const unsigned int gpio_reg_size = 8;
-	unsigned int bits_offset;
-	size_t word_index;
-	unsigned int word_offset;
-	unsigned long word_mask;
-	const unsigned long port_mask = GENMASK(gpio_reg_size - 1, 0);
+	unsigned long offset;
+	unsigned long gpio_mask;
+	unsigned int port_addr;
 	unsigned long port_state;
 
 	/* clear bits array to a clean slate */
 	bitmap_zero(bits, chip->ngpio);
 
-	/* get bits are evaluated a gpio port register at a time */
-	for (i = 0; i < ARRAY_SIZE(ports); i++) {
-		/* gpio offset in bits array */
-		bits_offset = i * gpio_reg_size;
-
-		/* word index for bits array */
-		word_index = BIT_WORD(bits_offset);
-
-		/* gpio offset within current word of bits array */
-		word_offset = bits_offset % BITS_PER_LONG;
-
-		/* mask of get bits for current gpio within current word */
-		word_mask = mask[word_index] & (port_mask << word_offset);
-		if (!word_mask) {
-			/* no get bits in this port so skip to next one */
-			continue;
-		}
-
-		/* read bits from current gpio port */
-		port_state = inb(dio48egpio->base + ports[i]);
+	for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
+		port_addr = dio48egpio->base + ports[offset / 8];
+		port_state = inb(port_addr) & gpio_mask;
 
-		/* store acquired bits at respective bits array offset */
-		bits[word_index] |= (port_state << word_offset) & word_mask;
+		bitmap_set_value8(bits, port_state, offset);
 	}
 
 	return 0;
@@ -247,37 +226,27 @@ static void dio48e_gpio_set_multiple(struct gpio_chip *chip,
 	unsigned long *mask, unsigned long *bits)
 {
 	struct dio48e_gpio *const dio48egpio = gpiochip_get_data(chip);
-	unsigned int i;
-	const unsigned int gpio_reg_size = 8;
-	unsigned int port;
-	unsigned int out_port;
-	unsigned int bitmask;
+	unsigned long offset;
+	unsigned long gpio_mask;
+	size_t index;
+	unsigned int port_addr;
+	unsigned long bitmask;
 	unsigned long flags;
 
-	/* set bits are evaluated a gpio register size at a time */
-	for (i = 0; i < chip->ngpio; i += gpio_reg_size) {
-		/* no more set bits in this mask word; skip to the next word */
-		if (!mask[BIT_WORD(i)]) {
-			i = (BIT_WORD(i) + 1) * BITS_PER_LONG - gpio_reg_size;
-			continue;
-		}
+	for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
+		index = offset / 8;
+		port_addr = dio48egpio->base + ports[index];
 
-		port = i / gpio_reg_size;
-		out_port = (port > 2) ? port + 1 : port;
-		bitmask = mask[BIT_WORD(i)] & bits[BIT_WORD(i)];
+		bitmask = bitmap_get_value8(bits, offset) & gpio_mask;
 
 		raw_spin_lock_irqsave(&dio48egpio->lock, flags);
 
 		/* update output state data and set device gpio register */
-		dio48egpio->out_state[port] &= ~mask[BIT_WORD(i)];
-		dio48egpio->out_state[port] |= bitmask;
-		outb(dio48egpio->out_state[port], dio48egpio->base + out_port);
+		dio48egpio->out_state[index] &= ~gpio_mask;
+		dio48egpio->out_state[index] |= bitmask;
+		outb(dio48egpio->out_state[index], port_addr);
 
 		raw_spin_unlock_irqrestore(&dio48egpio->lock, flags);
-
-		/* prepare for next gpio register set */
-		mask[BIT_WORD(i)] >>= gpio_reg_size;
-		bits[BIT_WORD(i)] >>= gpio_reg_size;
 	}
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 9bfcce0db3cff35901fbf5d332f7e1f1fe097cfb Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <vilhelm.gray@gmail.com>
Date: Wed, 4 Dec 2019 16:51:08 -0800
Subject: gpio: 104-idi-48: utilize for_each_set_clump8 macro

Replace verbose implementation in get_multiple/set_multiple callbacks
with for_each_set_clump8 macro to simplify code and improve clarity.

Link: http://lkml.kernel.org/r/b0631b6d489f85008480399df283ccd33ecfe310.1570641097.git.vilhelm.gray@gmail.com
Signed-off-by: William Breathitt Gray <vilhelm.gray@gmail.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Mathias Duckeck <m.duckeck@kunbus.de>
Cc: Morten Hein Tiljeset <morten.tiljeset@prevas.dk>
Cc: Phil Reid <preid@electromag.com.au>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Sean Nyekjaer <sean.nyekjaer@prevas.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/gpio-104-idi-48.c | 36 +++++++-----------------------------
 1 file changed, 7 insertions(+), 29 deletions(-)

diff --git a/drivers/gpio/gpio-104-idi-48.c b/drivers/gpio/gpio-104-idi-48.c
index c50329ab493a..d350ac0de06b 100644
--- a/drivers/gpio/gpio-104-idi-48.c
+++ b/drivers/gpio/gpio-104-idi-48.c
@@ -85,42 +85,20 @@ static int idi_48_gpio_get_multiple(struct gpio_chip *chip, unsigned long *mask,
 	unsigned long *bits)
 {
 	struct idi_48_gpio *const idi48gpio = gpiochip_get_data(chip);
-	size_t i;
+	unsigned long offset;
+	unsigned long gpio_mask;
 	static const size_t ports[] = { 0, 1, 2, 4, 5, 6 };
-	const unsigned int gpio_reg_size = 8;
-	unsigned int bits_offset;
-	size_t word_index;
-	unsigned int word_offset;
-	unsigned long word_mask;
-	const unsigned long port_mask = GENMASK(gpio_reg_size - 1, 0);
+	unsigned int port_addr;
 	unsigned long port_state;
 
 	/* clear bits array to a clean slate */
 	bitmap_zero(bits, chip->ngpio);
 
-	/* get bits are evaluated a gpio port register at a time */
-	for (i = 0; i < ARRAY_SIZE(ports); i++) {
-		/* gpio offset in bits array */
-		bits_offset = i * gpio_reg_size;
+	for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
+		port_addr = idi48gpio->base + ports[offset / 8];
+		port_state = inb(port_addr) & gpio_mask;
 
-		/* word index for bits array */
-		word_index = BIT_WORD(bits_offset);
-
-		/* gpio offset within current word of bits array */
-		word_offset = bits_offset % BITS_PER_LONG;
-
-		/* mask of get bits for current gpio within current word */
-		word_mask = mask[word_index] & (port_mask << word_offset);
-		if (!word_mask) {
-			/* no get bits in this port so skip to next one */
-			continue;
-		}
-
-		/* read bits from current gpio port */
-		port_state = inb(idi48gpio->base + ports[i]);
-
-		/* store acquired bits at respective bits array offset */
-		bits[word_index] |= (port_state << word_offset) & word_mask;
+		bitmap_set_value8(bits, port_state, offset);
 	}
 
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From b0f49e9b9e2cf0b4c09998bd806dd6ede6c4ad61 Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <vilhelm.gray@gmail.com>
Date: Wed, 4 Dec 2019 16:51:11 -0800
Subject: gpio: gpio-mm: utilize for_each_set_clump8 macro

Replace verbose implementation in get_multiple/set_multiple callbacks
with for_each_set_clump8 macro to simplify code and improve clarity.

Link: http://lkml.kernel.org/r/0de53d7021b2d6db10294473cd8a1b6102bcec94.1570641097.git.vilhelm.gray@gmail.com
Signed-off-by: William Breathitt Gray <vilhelm.gray@gmail.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Mathias Duckeck <m.duckeck@kunbus.de>
Cc: Morten Hein Tiljeset <morten.tiljeset@prevas.dk>
Cc: Phil Reid <preid@electromag.com.au>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Sean Nyekjaer <sean.nyekjaer@prevas.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/gpio-gpio-mm.c | 73 +++++++++++++--------------------------------
 1 file changed, 21 insertions(+), 52 deletions(-)

diff --git a/drivers/gpio/gpio-gpio-mm.c b/drivers/gpio/gpio-gpio-mm.c
index c22d6f94129c..b89b8c5ff1f5 100644
--- a/drivers/gpio/gpio-gpio-mm.c
+++ b/drivers/gpio/gpio-gpio-mm.c
@@ -167,46 +167,25 @@ static int gpiomm_gpio_get(struct gpio_chip *chip, unsigned int offset)
 	return !!(port_state & mask);
 }
 
+static const size_t ports[] = { 0, 1, 2, 4, 5, 6 };
+
 static int gpiomm_gpio_get_multiple(struct gpio_chip *chip, unsigned long *mask,
 	unsigned long *bits)
 {
 	struct gpiomm_gpio *const gpiommgpio = gpiochip_get_data(chip);
-	size_t i;
-	static const size_t ports[] = { 0, 1, 2, 4, 5, 6 };
-	const unsigned int gpio_reg_size = 8;
-	unsigned int bits_offset;
-	size_t word_index;
-	unsigned int word_offset;
-	unsigned long word_mask;
-	const unsigned long port_mask = GENMASK(gpio_reg_size - 1, 0);
+	unsigned long offset;
+	unsigned long gpio_mask;
+	unsigned int port_addr;
 	unsigned long port_state;
 
 	/* clear bits array to a clean slate */
 	bitmap_zero(bits, chip->ngpio);
 
-	/* get bits are evaluated a gpio port register at a time */
-	for (i = 0; i < ARRAY_SIZE(ports); i++) {
-		/* gpio offset in bits array */
-		bits_offset = i * gpio_reg_size;
-
-		/* word index for bits array */
-		word_index = BIT_WORD(bits_offset);
-
-		/* gpio offset within current word of bits array */
-		word_offset = bits_offset % BITS_PER_LONG;
-
-		/* mask of get bits for current gpio within current word */
-		word_mask = mask[word_index] & (port_mask << word_offset);
-		if (!word_mask) {
-			/* no get bits in this port so skip to next one */
-			continue;
-		}
-
-		/* read bits from current gpio port */
-		port_state = inb(gpiommgpio->base + ports[i]);
+	for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
+		port_addr = gpiommgpio->base + ports[offset / 8];
+		port_state = inb(port_addr) & gpio_mask;
 
-		/* store acquired bits at respective bits array offset */
-		bits[word_index] |= (port_state << word_offset) & word_mask;
+		bitmap_set_value8(bits, port_state, offset);
 	}
 
 	return 0;
@@ -237,37 +216,27 @@ static void gpiomm_gpio_set_multiple(struct gpio_chip *chip,
 	unsigned long *mask, unsigned long *bits)
 {
 	struct gpiomm_gpio *const gpiommgpio = gpiochip_get_data(chip);
-	unsigned int i;
-	const unsigned int gpio_reg_size = 8;
-	unsigned int port;
-	unsigned int out_port;
-	unsigned int bitmask;
+	unsigned long offset;
+	unsigned long gpio_mask;
+	size_t index;
+	unsigned int port_addr;
+	unsigned long bitmask;
 	unsigned long flags;
 
-	/* set bits are evaluated a gpio register size at a time */
-	for (i = 0; i < chip->ngpio; i += gpio_reg_size) {
-		/* no more set bits in this mask word; skip to the next word */
-		if (!mask[BIT_WORD(i)]) {
-			i = (BIT_WORD(i) + 1) * BITS_PER_LONG - gpio_reg_size;
-			continue;
-		}
+	for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
+		index = offset / 8;
+		port_addr = gpiommgpio->base + ports[index];
 
-		port = i / gpio_reg_size;
-		out_port = (port > 2) ? port + 1 : port;
-		bitmask = mask[BIT_WORD(i)] & bits[BIT_WORD(i)];
+		bitmask = bitmap_get_value8(bits, offset) & gpio_mask;
 
 		spin_lock_irqsave(&gpiommgpio->lock, flags);
 
 		/* update output state data and set device gpio register */
-		gpiommgpio->out_state[port] &= ~mask[BIT_WORD(i)];
-		gpiommgpio->out_state[port] |= bitmask;
-		outb(gpiommgpio->out_state[port], gpiommgpio->base + out_port);
+		gpiommgpio->out_state[index] &= ~gpio_mask;
+		gpiommgpio->out_state[index] |= bitmask;
+		outb(gpiommgpio->out_state[index], port_addr);
 
 		spin_unlock_irqrestore(&gpiommgpio->lock, flags);
-
-		/* prepare for next gpio register set */
-		mask[BIT_WORD(i)] >>= gpio_reg_size;
-		bits[BIT_WORD(i)] >>= gpio_reg_size;
 	}
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From acebb82fe9cd4d3dc09e7e28ffa5a5ecc76ae7f8 Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <vilhelm.gray@gmail.com>
Date: Wed, 4 Dec 2019 16:51:15 -0800
Subject: gpio: ws16c48: utilize for_each_set_clump8 macro

Replace verbose implementation in get_multiple/set_multiple callbacks
with for_each_set_clump8 macro to simplify code and improve clarity.

Link: http://lkml.kernel.org/r/7a0d2c964e7f2d289b16c63ff6b06fc1f4c50d4d.1570641097.git.vilhelm.gray@gmail.com
Signed-off-by: William Breathitt Gray <vilhelm.gray@gmail.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Mathias Duckeck <m.duckeck@kunbus.de>
Cc: Morten Hein Tiljeset <morten.tiljeset@prevas.dk>
Cc: Phil Reid <preid@electromag.com.au>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Sean Nyekjaer <sean.nyekjaer@prevas.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/gpio-ws16c48.c | 73 +++++++++++++--------------------------------
 1 file changed, 20 insertions(+), 53 deletions(-)

diff --git a/drivers/gpio/gpio-ws16c48.c b/drivers/gpio/gpio-ws16c48.c
index fe456bea81f6..cb510df2b014 100644
--- a/drivers/gpio/gpio-ws16c48.c
+++ b/drivers/gpio/gpio-ws16c48.c
@@ -129,42 +129,19 @@ static int ws16c48_gpio_get_multiple(struct gpio_chip *chip,
 	unsigned long *mask, unsigned long *bits)
 {
 	struct ws16c48_gpio *const ws16c48gpio = gpiochip_get_data(chip);
-	const unsigned int gpio_reg_size = 8;
-	size_t i;
-	const size_t num_ports = chip->ngpio / gpio_reg_size;
-	unsigned int bits_offset;
-	size_t word_index;
-	unsigned int word_offset;
-	unsigned long word_mask;
-	const unsigned long port_mask = GENMASK(gpio_reg_size - 1, 0);
+	unsigned long offset;
+	unsigned long gpio_mask;
+	unsigned int port_addr;
 	unsigned long port_state;
 
 	/* clear bits array to a clean slate */
 	bitmap_zero(bits, chip->ngpio);
 
-	/* get bits are evaluated a gpio port register at a time */
-	for (i = 0; i < num_ports; i++) {
-		/* gpio offset in bits array */
-		bits_offset = i * gpio_reg_size;
+	for_each_set_clump8(offset, gpio_mask, mask, chip->ngpio) {
+		port_addr = ws16c48gpio->base + offset / 8;
+		port_state = inb(port_addr) & gpio_mask;
 
-		/* word index for bits array */
-		word_index = BIT_WORD(bits_offset);
-
-		/* gpio offset within current word of bits array */
-		word_offset = bits_offset % BITS_PER_LONG;
-
-		/* mask of get bits for current gpio within current word */
-		word_mask = mask[word_index] & (port_mask << word_offset);
-		if (!word_mask) {
-			/* no get bits in this port so skip to next one */
-			continue;
-		}
-
-		/* read bits from current gpio port */
-		port_state = inb(ws16c48gpio->base + i);
-
-		/* store acquired bits at respective bits array offset */
-		bits[word_index] |= (port_state << word_offset) & word_mask;
+		bitmap_set_value8(bits, port_state, offset);
 	}
 
 	return 0;
@@ -198,39 +175,29 @@ static void ws16c48_gpio_set_multiple(struct gpio_chip *chip,
 	unsigned long *mask, unsigned long *bits)
 {
 	struct ws16c48_gpio *const ws16c48gpio = gpiochip_get_data(chip);
-	unsigned int i;
-	const unsigned int gpio_reg_size = 8;
-	unsigned int port;
-	unsigned int iomask;
-	unsigned int bitmask;
+	unsigned long offset;
+	unsigned long gpio_mask;
+	size_t index;
+	unsigned int port_addr;
+	unsigned long bitmask;
 	unsigned long flags;
 
-	/* set bits are evaluated a gpio register size at a time */
-	for (i = 0; i < chip->ngpio; i += gpio_reg_size) {
-		/* no more set bits in this mask word; skip to the next word */
-		if (!mask[BIT_WORD(i)]) {
-			i = (BIT_WORD(i) + 1) * BITS_PER_LONG - gpio_reg_size;
-			continue;
-		}
-
-		port = i / gpio_reg_size;
+	for_each_set_clump8(offset, gpio_mask, mask, chip->ngpio) {
+		index = offset / 8;
+		port_addr = ws16c48gpio->base + index;
 
 		/* mask out GPIO configured for input */
-		iomask = mask[BIT_WORD(i)] & ~ws16c48gpio->io_state[port];
-		bitmask = iomask & bits[BIT_WORD(i)];
+		gpio_mask &= ~ws16c48gpio->io_state[index];
+		bitmask = bitmap_get_value8(bits, offset) & gpio_mask;
 
 		raw_spin_lock_irqsave(&ws16c48gpio->lock, flags);
 
 		/* update output state data and set device gpio register */
-		ws16c48gpio->out_state[port] &= ~iomask;
-		ws16c48gpio->out_state[port] |= bitmask;
-		outb(ws16c48gpio->out_state[port], ws16c48gpio->base + port);
+		ws16c48gpio->out_state[index] &= ~gpio_mask;
+		ws16c48gpio->out_state[index] |= bitmask;
+		outb(ws16c48gpio->out_state[index], port_addr);
 
 		raw_spin_unlock_irqrestore(&ws16c48gpio->lock, flags);
-
-		/* prepare for next gpio register set */
-		mask[BIT_WORD(i)] >>= gpio_reg_size;
-		bits[BIT_WORD(i)] >>= gpio_reg_size;
 	}
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 2dc7c3c16daac7317ef7458bad6b528aa3330951 Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <vilhelm.gray@gmail.com>
Date: Wed, 4 Dec 2019 16:51:18 -0800
Subject: gpio: pci-idio-16: utilize for_each_set_clump8 macro

Replace verbose implementation in get_multiple/set_multiple callbacks
with for_each_set_clump8 macro to simplify code and improve clarity.

Link: http://lkml.kernel.org/r/b30f131b4634caf5a70f12e01496f71631a17305.1570641097.git.vilhelm.gray@gmail.com
Signed-off-by: William Breathitt Gray <vilhelm.gray@gmail.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Mathias Duckeck <m.duckeck@kunbus.de>
Cc: Morten Hein Tiljeset <morten.tiljeset@prevas.dk>
Cc: Phil Reid <preid@electromag.com.au>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Sean Nyekjaer <sean.nyekjaer@prevas.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/gpio-pci-idio-16.c | 75 +++++++++++++++--------------------------
 1 file changed, 27 insertions(+), 48 deletions(-)

diff --git a/drivers/gpio/gpio-pci-idio-16.c b/drivers/gpio/gpio-pci-idio-16.c
index df51dd08bdfe..638d6656ce73 100644
--- a/drivers/gpio/gpio-pci-idio-16.c
+++ b/drivers/gpio/gpio-pci-idio-16.c
@@ -100,45 +100,23 @@ static int idio_16_gpio_get_multiple(struct gpio_chip *chip,
 	unsigned long *mask, unsigned long *bits)
 {
 	struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip);
-	size_t i;
-	const unsigned int gpio_reg_size = 8;
-	unsigned int bits_offset;
-	size_t word_index;
-	unsigned int word_offset;
-	unsigned long word_mask;
-	const unsigned long port_mask = GENMASK(gpio_reg_size - 1, 0);
-	unsigned long port_state;
+	unsigned long offset;
+	unsigned long gpio_mask;
 	void __iomem *ports[] = {
 		&idio16gpio->reg->out0_7, &idio16gpio->reg->out8_15,
 		&idio16gpio->reg->in0_7, &idio16gpio->reg->in8_15,
 	};
+	void __iomem *port_addr;
+	unsigned long port_state;
 
 	/* clear bits array to a clean slate */
 	bitmap_zero(bits, chip->ngpio);
 
-	/* get bits are evaluated a gpio port register at a time */
-	for (i = 0; i < ARRAY_SIZE(ports); i++) {
-		/* gpio offset in bits array */
-		bits_offset = i * gpio_reg_size;
-
-		/* word index for bits array */
-		word_index = BIT_WORD(bits_offset);
-
-		/* gpio offset within current word of bits array */
-		word_offset = bits_offset % BITS_PER_LONG;
-
-		/* mask of get bits for current gpio within current word */
-		word_mask = mask[word_index] & (port_mask << word_offset);
-		if (!word_mask) {
-			/* no get bits in this port so skip to next one */
-			continue;
-		}
+	for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
+		port_addr = ports[offset / 8];
+		port_state = ioread8(port_addr) & gpio_mask;
 
-		/* read bits from current gpio port */
-		port_state = ioread8(ports[i]);
-
-		/* store acquired bits at respective bits array offset */
-		bits[word_index] |= (port_state << word_offset) & word_mask;
+		bitmap_set_value8(bits, port_state, offset);
 	}
 
 	return 0;
@@ -178,30 +156,31 @@ static void idio_16_gpio_set_multiple(struct gpio_chip *chip,
 	unsigned long *mask, unsigned long *bits)
 {
 	struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip);
+	unsigned long offset;
+	unsigned long gpio_mask;
+	void __iomem *ports[] = {
+		&idio16gpio->reg->out0_7, &idio16gpio->reg->out8_15,
+	};
+	size_t index;
+	void __iomem *port_addr;
+	unsigned long bitmask;
 	unsigned long flags;
-	unsigned int out_state;
+	unsigned long out_state;
 
-	raw_spin_lock_irqsave(&idio16gpio->lock, flags);
+	for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
+		index = offset / 8;
+		port_addr = ports[index];
 
-	/* process output lines 0-7 */
-	if (*mask & 0xFF) {
-		out_state = ioread8(&idio16gpio->reg->out0_7) & ~*mask;
-		out_state |= *mask & *bits;
-		iowrite8(out_state, &idio16gpio->reg->out0_7);
-	}
+		bitmask = bitmap_get_value8(bits, offset) & gpio_mask;
+
+		raw_spin_lock_irqsave(&idio16gpio->lock, flags);
 
-	/* shift to next output line word */
-	*mask >>= 8;
+		out_state = ioread8(port_addr) & ~gpio_mask;
+		out_state |= bitmask;
+		iowrite8(out_state, port_addr);
 
-	/* process output lines 8-15 */
-	if (*mask & 0xFF) {
-		*bits >>= 8;
-		out_state = ioread8(&idio16gpio->reg->out8_15) & ~*mask;
-		out_state |= *mask & *bits;
-		iowrite8(out_state, &idio16gpio->reg->out8_15);
+		raw_spin_unlock_irqrestore(&idio16gpio->lock, flags);
 	}
-
-	raw_spin_unlock_irqrestore(&idio16gpio->lock, flags);
 }
 
 static void idio_16_irq_ack(struct irq_data *data)
-- 
cgit v1.2.3-59-g8ed1b


From c586aa8f480598f17e1e7d99d3ca7a7a0a6ffbd3 Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <vilhelm.gray@gmail.com>
Date: Wed, 4 Dec 2019 16:51:22 -0800
Subject: gpio: pcie-idio-24: utilize for_each_set_clump8 macro

Replace verbose implementation in get_multiple/set_multiple callbacks
with for_each_set_clump8 macro to simplify code and improve clarity.

Link: http://lkml.kernel.org/r/d5d22fa9809dcf8330f4381dbe7e7ca37990e79f.1570641097.git.vilhelm.gray@gmail.com
Signed-off-by: William Breathitt Gray <vilhelm.gray@gmail.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Mathias Duckeck <m.duckeck@kunbus.de>
Cc: Morten Hein Tiljeset <morten.tiljeset@prevas.dk>
Cc: Phil Reid <preid@electromag.com.au>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Sean Nyekjaer <sean.nyekjaer@prevas.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/gpio-pcie-idio-24.c | 109 ++++++++++++++-------------------------
 1 file changed, 40 insertions(+), 69 deletions(-)

diff --git a/drivers/gpio/gpio-pcie-idio-24.c b/drivers/gpio/gpio-pcie-idio-24.c
index 44c1e4fc489f..1d475794a50f 100644
--- a/drivers/gpio/gpio-pcie-idio-24.c
+++ b/drivers/gpio/gpio-pcie-idio-24.c
@@ -201,52 +201,34 @@ static int idio_24_gpio_get_multiple(struct gpio_chip *chip,
 	unsigned long *mask, unsigned long *bits)
 {
 	struct idio_24_gpio *const idio24gpio = gpiochip_get_data(chip);
-	size_t i;
-	const unsigned int gpio_reg_size = 8;
-	unsigned int bits_offset;
-	size_t word_index;
-	unsigned int word_offset;
-	unsigned long word_mask;
-	const unsigned long port_mask = GENMASK(gpio_reg_size - 1, 0);
-	unsigned long port_state;
+	unsigned long offset;
+	unsigned long gpio_mask;
 	void __iomem *ports[] = {
 		&idio24gpio->reg->out0_7, &idio24gpio->reg->out8_15,
 		&idio24gpio->reg->out16_23, &idio24gpio->reg->in0_7,
 		&idio24gpio->reg->in8_15, &idio24gpio->reg->in16_23,
 	};
+	size_t index;
+	unsigned long port_state;
 	const unsigned long out_mode_mask = BIT(1);
 
 	/* clear bits array to a clean slate */
 	bitmap_zero(bits, chip->ngpio);
 
-	/* get bits are evaluated a gpio port register at a time */
-	for (i = 0; i < ARRAY_SIZE(ports) + 1; i++) {
-		/* gpio offset in bits array */
-		bits_offset = i * gpio_reg_size;
-
-		/* word index for bits array */
-		word_index = BIT_WORD(bits_offset);
-
-		/* gpio offset within current word of bits array */
-		word_offset = bits_offset % BITS_PER_LONG;
-
-		/* mask of get bits for current gpio within current word */
-		word_mask = mask[word_index] & (port_mask << word_offset);
-		if (!word_mask) {
-			/* no get bits in this port so skip to next one */
-			continue;
-		}
+	for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
+		index = offset / 8;
 
 		/* read bits from current gpio port (port 6 is TTL GPIO) */
-		if (i < 6)
-			port_state = ioread8(ports[i]);
+		if (index < 6)
+			port_state = ioread8(ports[index]);
 		else if (ioread8(&idio24gpio->reg->ctl) & out_mode_mask)
 			port_state = ioread8(&idio24gpio->reg->ttl_out0_7);
 		else
 			port_state = ioread8(&idio24gpio->reg->ttl_in0_7);
 
-		/* store acquired bits at respective bits array offset */
-		bits[word_index] |= (port_state << word_offset) & word_mask;
+		port_state &= gpio_mask;
+
+		bitmap_set_value8(bits, port_state, offset);
 	}
 
 	return 0;
@@ -297,59 +279,48 @@ static void idio_24_gpio_set_multiple(struct gpio_chip *chip,
 	unsigned long *mask, unsigned long *bits)
 {
 	struct idio_24_gpio *const idio24gpio = gpiochip_get_data(chip);
-	size_t i;
-	unsigned long bits_offset;
+	unsigned long offset;
 	unsigned long gpio_mask;
-	const unsigned int gpio_reg_size = 8;
-	const unsigned long port_mask = GENMASK(gpio_reg_size, 0);
-	unsigned long flags;
-	unsigned int out_state;
 	void __iomem *ports[] = {
 		&idio24gpio->reg->out0_7, &idio24gpio->reg->out8_15,
 		&idio24gpio->reg->out16_23
 	};
+	size_t index;
+	unsigned long bitmask;
+	unsigned long flags;
+	unsigned long out_state;
 	const unsigned long out_mode_mask = BIT(1);
-	const unsigned int ttl_offset = 48;
-	const size_t ttl_i = BIT_WORD(ttl_offset);
-	const unsigned int word_offset = ttl_offset % BITS_PER_LONG;
-	const unsigned long ttl_mask = (mask[ttl_i] >> word_offset) & port_mask;
-	const unsigned long ttl_bits = (bits[ttl_i] >> word_offset) & ttl_mask;
-
-	/* set bits are processed a gpio port register at a time */
-	for (i = 0; i < ARRAY_SIZE(ports); i++) {
-		/* gpio offset in bits array */
-		bits_offset = i * gpio_reg_size;
-
-		/* check if any set bits for current port */
-		gpio_mask = (*mask >> bits_offset) & port_mask;
-		if (!gpio_mask) {
-			/* no set bits for this port so move on to next port */
-			continue;
-		}
 
-		raw_spin_lock_irqsave(&idio24gpio->lock, flags);
+	for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
+		index = offset / 8;
 
-		/* process output lines */
-		out_state = ioread8(ports[i]) & ~gpio_mask;
-		out_state |= (*bits >> bits_offset) & gpio_mask;
-		iowrite8(out_state, ports[i]);
+		bitmask = bitmap_get_value8(bits, offset) & gpio_mask;
 
-		raw_spin_unlock_irqrestore(&idio24gpio->lock, flags);
-	}
+		raw_spin_lock_irqsave(&idio24gpio->lock, flags);
 
-	/* check if setting TTL lines and if they are in output mode */
-	if (!ttl_mask || !(ioread8(&idio24gpio->reg->ctl) & out_mode_mask))
-		return;
+		/* read bits from current gpio port (port 6 is TTL GPIO) */
+		if (index < 6) {
+			out_state = ioread8(ports[index]);
+		} else if (ioread8(&idio24gpio->reg->ctl) & out_mode_mask) {
+			out_state = ioread8(&idio24gpio->reg->ttl_out0_7);
+		} else {
+			/* skip TTL GPIO if set for input */
+			raw_spin_unlock_irqrestore(&idio24gpio->lock, flags);
+			continue;
+		}
 
-	/* handle TTL output */
-	raw_spin_lock_irqsave(&idio24gpio->lock, flags);
+		/* set requested bit states */
+		out_state &= ~gpio_mask;
+		out_state |= bitmask;
 
-	/* process output lines */
-	out_state = ioread8(&idio24gpio->reg->ttl_out0_7) & ~ttl_mask;
-	out_state |= ttl_bits;
-	iowrite8(out_state, &idio24gpio->reg->ttl_out0_7);
+		/* write bits for current gpio port (port 6 is TTL GPIO) */
+		if (index < 6)
+			iowrite8(out_state, ports[index]);
+		else
+			iowrite8(out_state, &idio24gpio->reg->ttl_out0_7);
 
-	raw_spin_unlock_irqrestore(&idio24gpio->lock, flags);
+		raw_spin_unlock_irqrestore(&idio24gpio->lock, flags);
+	}
 }
 
 static void idio_24_irq_ack(struct irq_data *data)
-- 
cgit v1.2.3-59-g8ed1b


From 17b6038942e304c522d0688c2b29b1c630f1cfa6 Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <vilhelm.gray@gmail.com>
Date: Wed, 4 Dec 2019 16:51:25 -0800
Subject: gpio: uniphier: utilize for_each_set_clump8 macro

Replace verbose implementation in set_multiple callback with
for_each_set_clump8 macro to simplify code and improve clarity.  An
improvement in this case is that banks that are not masked will now be
skipped.

Link: http://lkml.kernel.org/r/5b24887e97f3093e4832d7c50a1093f537e91ab4.1570641097.git.vilhelm.gray@gmail.com
Signed-off-by: William Breathitt Gray <vilhelm.gray@gmail.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Mathias Duckeck <m.duckeck@kunbus.de>
Cc: Morten Hein Tiljeset <morten.tiljeset@prevas.dk>
Cc: Phil Reid <preid@electromag.com.au>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Sean Nyekjaer <sean.nyekjaer@prevas.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/gpio-uniphier.c | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/drivers/gpio/gpio-uniphier.c b/drivers/gpio/gpio-uniphier.c
index bd203e8fa58e..7ec97499b7f7 100644
--- a/drivers/gpio/gpio-uniphier.c
+++ b/drivers/gpio/gpio-uniphier.c
@@ -15,9 +15,6 @@
 #include <linux/spinlock.h>
 #include <dt-bindings/gpio/uniphier-gpio.h>
 
-#define UNIPHIER_GPIO_BANK_MASK		\
-				GENMASK((UNIPHIER_GPIO_LINES_PER_BANK) - 1, 0)
-
 #define UNIPHIER_GPIO_IRQ_MAX_NUM	24
 
 #define UNIPHIER_GPIO_PORT_DATA		0x0	/* data */
@@ -150,15 +147,11 @@ static void uniphier_gpio_set(struct gpio_chip *chip,
 static void uniphier_gpio_set_multiple(struct gpio_chip *chip,
 				       unsigned long *mask, unsigned long *bits)
 {
-	unsigned int bank, shift, bank_mask, bank_bits;
-	int i;
+	unsigned long i, bank, bank_mask, bank_bits;
 
-	for (i = 0; i < chip->ngpio; i += UNIPHIER_GPIO_LINES_PER_BANK) {
+	for_each_set_clump8(i, bank_mask, mask, chip->ngpio) {
 		bank = i / UNIPHIER_GPIO_LINES_PER_BANK;
-		shift = i % BITS_PER_LONG;
-		bank_mask = (mask[BIT_WORD(i)] >> shift) &
-						UNIPHIER_GPIO_BANK_MASK;
-		bank_bits = bits[BIT_WORD(i)] >> shift;
+		bank_bits = bitmap_get_value8(bits, i);
 
 		uniphier_gpio_bank_write(chip, bank, UNIPHIER_GPIO_PORT_DATA,
 					 bank_mask, bank_bits);
-- 
cgit v1.2.3-59-g8ed1b


From b2ca9ebfa68f47950e46b12f2d640f84ee5e1c98 Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <vilhelm.gray@gmail.com>
Date: Wed, 4 Dec 2019 16:51:29 -0800
Subject: gpio: 74x164: utilize the for_each_set_clump8 macro

Replace verbose implementation in set_multiple callback with
for_each_set_clump8 macro to simplify code and improve clarity.

Link: http://lkml.kernel.org/r/7ea2df7182a50a1136ca36edc46dffcb2446fd27.1570641097.git.vilhelm.gray@gmail.com
Signed-off-by: William Breathitt Gray <vilhelm.gray@gmail.com>
Suggested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Phil Reid <preid@electromag.com.au>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Mathias Duckeck <m.duckeck@kunbus.de>
Cc: Morten Hein Tiljeset <morten.tiljeset@prevas.dk>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Sean Nyekjaer <sean.nyekjaer@prevas.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/gpio-74x164.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/drivers/gpio/gpio-74x164.c b/drivers/gpio/gpio-74x164.c
index e81307f9754e..05637d585152 100644
--- a/drivers/gpio/gpio-74x164.c
+++ b/drivers/gpio/gpio-74x164.c
@@ -6,6 +6,7 @@
  *  Copyright (C) 2010 Miguel Gaio <miguel.gaio@efixo.com>
  */
 
+#include <linux/bitops.h>
 #include <linux/gpio/consumer.h>
 #include <linux/gpio/driver.h>
 #include <linux/module.h>
@@ -72,20 +73,18 @@ static void gen_74x164_set_multiple(struct gpio_chip *gc, unsigned long *mask,
 				    unsigned long *bits)
 {
 	struct gen_74x164_chip *chip = gpiochip_get_data(gc);
-	unsigned int i, idx, shift;
-	u8 bank, bankmask;
+	unsigned long offset;
+	unsigned long bankmask;
+	size_t bank;
+	unsigned long bitmask;
 
 	mutex_lock(&chip->lock);
-	for (i = 0, bank = chip->registers - 1; i < chip->registers;
-	     i++, bank--) {
-		idx = i / sizeof(*mask);
-		shift = i % sizeof(*mask) * BITS_PER_BYTE;
-		bankmask = mask[idx] >> shift;
-		if (!bankmask)
-			continue;
+	for_each_set_clump8(offset, bankmask, mask, chip->registers * 8) {
+		bank = chip->registers - 1 - offset / 8;
+		bitmask = bitmap_get_value8(bits, offset) & bankmask;
 
 		chip->buffer[bank] &= ~bankmask;
-		chip->buffer[bank] |= bankmask & (bits[idx] >> shift);
+		chip->buffer[bank] |= bitmask;
 	}
 	__gen_74x164_write_config(chip);
 	mutex_unlock(&chip->lock);
-- 
cgit v1.2.3-59-g8ed1b


From 9f00ebf518b80e4d58ab32966772b68511d015f2 Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <vilhelm.gray@gmail.com>
Date: Wed, 4 Dec 2019 16:51:32 -0800
Subject: thermal: intel: intel_soc_dts_iosf: Utilize for_each_set_clump8 macro

Utilize for_each_set_clump8 macro, and the bitmap_set_value8 and
bitmap_get_value8 functions, where appropriate.  In addition, remove the
now unnecessary temp_mask and temp_shift members of the
intel_soc_dts_sensor_entry structure.

Link: http://lkml.kernel.org/r/2d3c74e9a00a52954f31d19e04623a7f4bc85520.1570641097.git.vilhelm.gray@gmail.com
Signed-off-by: William Breathitt Gray <vilhelm.gray@gmail.com>
Suggested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Mathias Duckeck <m.duckeck@kunbus.de>
Cc: Morten Hein Tiljeset <morten.tiljeset@prevas.dk>
Cc: Phil Reid <preid@electromag.com.au>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Sean Nyekjaer <sean.nyekjaer@prevas.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/thermal/intel/intel_soc_dts_iosf.c | 31 +++++++++++++++++-------------
 drivers/thermal/intel/intel_soc_dts_iosf.h |  2 --
 2 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/drivers/thermal/intel/intel_soc_dts_iosf.c b/drivers/thermal/intel/intel_soc_dts_iosf.c
index 5716b62e0f73..f75271b669c6 100644
--- a/drivers/thermal/intel/intel_soc_dts_iosf.c
+++ b/drivers/thermal/intel/intel_soc_dts_iosf.c
@@ -6,6 +6,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/bitops.h>
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
@@ -103,6 +104,7 @@ static int update_trip_temp(struct intel_soc_dts_sensor_entry *dts,
 	int status;
 	u32 temp_out;
 	u32 out;
+	unsigned long update_ptps;
 	u32 store_ptps;
 	u32 store_ptmc;
 	u32 store_te_out;
@@ -120,8 +122,10 @@ static int update_trip_temp(struct intel_soc_dts_sensor_entry *dts,
 	if (status)
 		return status;
 
-	out = (store_ptps & ~(0xFF << (thres_index * 8)));
-	out |= (temp_out & 0xFF) << (thres_index * 8);
+	update_ptps = store_ptps;
+	bitmap_set_value8(&update_ptps, temp_out & 0xFF, thres_index * 8);
+	out = update_ptps;
+
 	status = iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_REG_WRITE,
 				SOC_DTS_OFFSET_PTPS, out);
 	if (status)
@@ -223,6 +227,7 @@ static int sys_get_curr_temp(struct thermal_zone_device *tzd,
 	u32 out;
 	struct intel_soc_dts_sensor_entry *dts;
 	struct intel_soc_dts_sensors *sensors;
+	unsigned long raw;
 
 	dts = tzd->devdata;
 	sensors = dts->sensors;
@@ -231,8 +236,8 @@ static int sys_get_curr_temp(struct thermal_zone_device *tzd,
 	if (status)
 		return status;
 
-	out = (out & dts->temp_mask) >> dts->temp_shift;
-	out -= SOC_DTS_TJMAX_ENCODING;
+	raw = out;
+	out = bitmap_get_value8(&raw, dts->id * 8) - SOC_DTS_TJMAX_ENCODING;
 	*temp = sensors->tj_max - out * 1000;
 
 	return 0;
@@ -280,11 +285,14 @@ static int add_dts_thermal_zone(int id, struct intel_soc_dts_sensor_entry *dts,
 				int read_only_trip_cnt)
 {
 	char name[10];
+	unsigned long trip;
 	int trip_count = 0;
 	int trip_mask = 0;
+	int writable_trip_cnt = 0;
+	unsigned long ptps;
 	u32 store_ptps;
+	unsigned long i;
 	int ret;
-	int i;
 
 	/* Store status to restor on exit */
 	ret = iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_REG_READ,
@@ -293,11 +301,10 @@ static int add_dts_thermal_zone(int id, struct intel_soc_dts_sensor_entry *dts,
 		goto err_ret;
 
 	dts->id = id;
-	dts->temp_mask = 0x00FF << (id * 8);
-	dts->temp_shift = id * 8;
 	if (notification_support) {
 		trip_count = min(SOC_MAX_DTS_TRIPS, trip_cnt);
-		trip_mask = BIT(trip_count - read_only_trip_cnt) - 1;
+		writable_trip_cnt = trip_count - read_only_trip_cnt;
+		trip_mask = GENMASK(writable_trip_cnt - 1, 0);
 	}
 
 	/* Check if the writable trip we provide is not used by BIOS */
@@ -306,11 +313,9 @@ static int add_dts_thermal_zone(int id, struct intel_soc_dts_sensor_entry *dts,
 	if (ret)
 		trip_mask = 0;
 	else {
-		for (i = 0; i < trip_count; ++i) {
-			if (trip_mask & BIT(i))
-				if (store_ptps & (0xff << (i * 8)))
-					trip_mask &= ~BIT(i);
-		}
+		ptps = store_ptps;
+		for_each_set_clump8(i, trip, &ptps, writable_trip_cnt * 8)
+			trip_mask &= ~BIT(i / 8);
 	}
 	dts->trip_mask = trip_mask;
 	dts->trip_count = trip_count;
diff --git a/drivers/thermal/intel/intel_soc_dts_iosf.h b/drivers/thermal/intel/intel_soc_dts_iosf.h
index adfb09af33fc..c54945748200 100644
--- a/drivers/thermal/intel/intel_soc_dts_iosf.h
+++ b/drivers/thermal/intel/intel_soc_dts_iosf.h
@@ -24,8 +24,6 @@ struct intel_soc_dts_sensors;
 
 struct intel_soc_dts_sensor_entry {
 	int id;
-	u32 temp_mask;
-	u32 temp_shift;
 	u32 store_status;
 	u32 trip_mask;
 	u32 trip_count;
-- 
cgit v1.2.3-59-g8ed1b


From 608bd5fda60d9e2083c8bb92f17adfae73afc37a Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <vilhelm.gray@gmail.com>
Date: Wed, 4 Dec 2019 16:51:36 -0800
Subject: gpio: pisosr: utilize the for_each_set_clump8 macro

Replace verbose implementation in get_multiple callback with
for_each_set_clump8 macro to simplify code and improve clarity.

Link: http://lkml.kernel.org/r/8a39ee772247d4b7d752b32dbacc06c1cdcb60b5.1570641097.git.vilhelm.gray@gmail.com
Signed-off-by: William Breathitt Gray <vilhelm.gray@gmail.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Morten Hein Tiljeset <morten.tiljeset@prevas.dk>
Cc: Sean Nyekjaer <sean.nyekjaer@prevas.dk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Mathias Duckeck <m.duckeck@kunbus.de>
Cc: Phil Reid <preid@electromag.com.au>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/gpio-pisosr.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpio/gpio-pisosr.c b/drivers/gpio/gpio-pisosr.c
index 1331b2a94679..6698feabaced 100644
--- a/drivers/gpio/gpio-pisosr.c
+++ b/drivers/gpio/gpio-pisosr.c
@@ -96,16 +96,16 @@ static int pisosr_gpio_get_multiple(struct gpio_chip *chip,
 				    unsigned long *mask, unsigned long *bits)
 {
 	struct pisosr_gpio *gpio = gpiochip_get_data(chip);
-	unsigned int nbytes = DIV_ROUND_UP(chip->ngpio, 8);
-	unsigned int i, j;
+	unsigned long offset;
+	unsigned long gpio_mask;
+	unsigned long buffer_state;
 
 	pisosr_gpio_refresh(gpio);
 
 	bitmap_zero(bits, chip->ngpio);
-	for (i = 0; i < nbytes; i++) {
-		j = i / sizeof(unsigned long);
-		bits[j] |= ((unsigned long) gpio->buffer[i])
-			   << (8 * (i % sizeof(unsigned long)));
+	for_each_set_clump8(offset, gpio_mask, mask, chip->ngpio) {
+		buffer_state = gpio->buffer[offset / 8] & gpio_mask;
+		bitmap_set_value8(bits, buffer_state, offset);
 	}
 
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From d077c78b45168bc869b5453bb1250325c5ed10ff Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <vilhelm.gray@gmail.com>
Date: Wed, 4 Dec 2019 16:51:40 -0800
Subject: gpio: max3191x: utilize the for_each_set_clump8 macro

Replace verbose implementation in get_multiple callback with
for_each_set_clump8 macro to simplify code and improve clarity.

Link: http://lkml.kernel.org/r/c2b1ed62caf6fce6e5681809a50c05ce6acdf2a6.1570641097.git.vilhelm.gray@gmail.com
Signed-off-by: William Breathitt Gray <vilhelm.gray@gmail.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Mathias Duckeck <m.duckeck@kunbus.de>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Morten Hein Tiljeset <morten.tiljeset@prevas.dk>
Cc: Phil Reid <preid@electromag.com.au>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Sean Nyekjaer <sean.nyekjaer@prevas.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/gpio-max3191x.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/drivers/gpio/gpio-max3191x.c b/drivers/gpio/gpio-max3191x.c
index 0696d5a21431..310d1a248cae 100644
--- a/drivers/gpio/gpio-max3191x.c
+++ b/drivers/gpio/gpio-max3191x.c
@@ -31,6 +31,7 @@
  */
 
 #include <linux/bitmap.h>
+#include <linux/bitops.h>
 #include <linux/crc8.h>
 #include <linux/gpio/consumer.h>
 #include <linux/gpio/driver.h>
@@ -232,16 +233,20 @@ static int max3191x_get_multiple(struct gpio_chip *gpio, unsigned long *mask,
 				 unsigned long *bits)
 {
 	struct max3191x_chip *max3191x = gpiochip_get_data(gpio);
-	int ret, bit = 0, wordlen = max3191x_wordlen(max3191x);
+	const unsigned int wordlen = max3191x_wordlen(max3191x);
+	int ret;
+	unsigned long bit;
+	unsigned long gpio_mask;
+	unsigned long in;
 
 	mutex_lock(&max3191x->lock);
 	ret = max3191x_readout_locked(max3191x);
 	if (ret)
 		goto out_unlock;
 
-	while ((bit = find_next_bit(mask, gpio->ngpio, bit)) != gpio->ngpio) {
+	bitmap_zero(bits, gpio->ngpio);
+	for_each_set_clump8(bit, gpio_mask, mask, gpio->ngpio) {
 		unsigned int chipnum = bit / MAX3191X_NGPIO;
-		unsigned long in, shift, index;
 
 		if (max3191x_chip_is_faulting(max3191x, chipnum)) {
 			ret = -EIO;
@@ -249,12 +254,8 @@ static int max3191x_get_multiple(struct gpio_chip *gpio, unsigned long *mask,
 		}
 
 		in = ((u8 *)max3191x->xfer.rx_buf)[chipnum * wordlen];
-		shift = round_down(bit % BITS_PER_LONG, MAX3191X_NGPIO);
-		index = bit / BITS_PER_LONG;
-		bits[index] &= ~(mask[index] & (0xff << shift));
-		bits[index] |= mask[index] & (in << shift); /* copy bits */
-
-		bit = (chipnum + 1) * MAX3191X_NGPIO; /* go to next chip */
+		in &= gpio_mask;
+		bitmap_set_value8(bits, in, bit);
 	}
 
 out_unlock:
-- 
cgit v1.2.3-59-g8ed1b


From ae81217edc18135d13e5b6c17609ee2c07af4188 Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <vilhelm.gray@gmail.com>
Date: Wed, 4 Dec 2019 16:51:43 -0800
Subject: gpio: pca953x: utilize the for_each_set_clump8 macro

Replace verbose implementation in set_multiple callback with
for_each_set_clump8 macro to simplify code and improve clarity.

Link: http://lkml.kernel.org/r/3543ffc3668ad4ed4c00e8ebaf14a5559fd6ddf2.1570641097.git.vilhelm.gray@gmail.com
Signed-off-by: William Breathitt Gray <vilhelm.gray@gmail.com>
Tested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Phil Reid <preid@electromag.com.au>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Mathias Duckeck <m.duckeck@kunbus.de>
Cc: Morten Hein Tiljeset <morten.tiljeset@prevas.dk>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Sean Nyekjaer <sean.nyekjaer@prevas.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/gpio-pca953x.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
index 82122c3c688a..232e3f987511 100644
--- a/drivers/gpio/gpio-pca953x.c
+++ b/drivers/gpio/gpio-pca953x.c
@@ -10,6 +10,7 @@
 
 #include <linux/acpi.h>
 #include <linux/bits.h>
+#include <linux/bitops.h>
 #include <linux/gpio/driver.h>
 #include <linux/gpio/consumer.h>
 #include <linux/i2c.h>
@@ -459,7 +460,8 @@ static void pca953x_gpio_set_multiple(struct gpio_chip *gc,
 				      unsigned long *mask, unsigned long *bits)
 {
 	struct pca953x_chip *chip = gpiochip_get_data(gc);
-	unsigned int bank_mask, bank_val;
+	unsigned long offset;
+	unsigned long bank_mask;
 	int bank;
 	u8 reg_val[MAX_BANK];
 	int ret;
@@ -469,15 +471,10 @@ static void pca953x_gpio_set_multiple(struct gpio_chip *gc,
 	if (ret)
 		goto exit;
 
-	for (bank = 0; bank < NBANK(chip); bank++) {
-		bank_mask = mask[bank / sizeof(*mask)] >>
-			   ((bank % sizeof(*mask)) * 8);
-		if (bank_mask) {
-			bank_val = bits[bank / sizeof(*bits)] >>
-				  ((bank % sizeof(*bits)) * 8);
-			bank_val &= bank_mask;
-			reg_val[bank] = (reg_val[bank] & ~bank_mask) | bank_val;
-		}
+	for_each_set_clump8(offset, bank_mask, mask, gc->ngpio) {
+		bank = offset / 8;
+		reg_val[bank] &= ~bank_mask;
+		reg_val[bank] |= bitmap_get_value8(bits, offset) & bank_mask;
 	}
 
 	pca953x_write_regs(chip, chip->regs->output, reg_val);
-- 
cgit v1.2.3-59-g8ed1b


From 11d43e62f693c66c8c76c2ea2349e0f3c5764964 Mon Sep 17 00:00:00 2001
From: Wei Yang <richardw.yang@linux.intel.com>
Date: Wed, 4 Dec 2019 16:51:47 -0800
Subject: lib/rbtree: set successor's parent unconditionally

Both in Case 2 and 3, we exchange n and s.  This mean no matter whether
child2 is NULL or not, successor's parent should be assigned to node's.

This patch takes this step out to make it explicit and reduce the
ambiguity.

Besides, this step reduces some symbol size like rb_erase().

   KERN_CONFIG       upstream       patched
   OPT_FOR_PERF      877            870
   OPT_FOR_SIZE      635            621

Link: http://lkml.kernel.org/r/20191028021442.5450-1-richardw.yang@linux.intel.com
Signed-off-by: Wei Yang <richardw.yang@linux.intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Michel Lespinasse <walken@google.com>
Reviewed-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rbtree_augmented.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
index fdd421b8d9ae..99c42e1a74b8 100644
--- a/include/linux/rbtree_augmented.h
+++ b/include/linux/rbtree_augmented.h
@@ -283,14 +283,13 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root,
 		__rb_change_child(node, successor, tmp, root);
 
 		if (child2) {
-			successor->__rb_parent_color = pc;
 			rb_set_parent_color(child2, parent, RB_BLACK);
 			rebalance = NULL;
 		} else {
 			unsigned long pc2 = successor->__rb_parent_color;
-			successor->__rb_parent_color = pc;
 			rebalance = __rb_is_black(pc2) ? parent : NULL;
 		}
+		successor->__rb_parent_color = pc;
 		tmp = successor;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 8b7569a224a18953b9aee29c375e439b8a6eeb05 Mon Sep 17 00:00:00 2001
From: Wei Yang <richardw.yang@linux.intel.com>
Date: Wed, 4 Dec 2019 16:51:50 -0800
Subject: lib/rbtree: get successor's color directly

After move parent assignment out, we can check the color directly.

Link: http://lkml.kernel.org/r/20191028021442.5450-2-richardw.yang@linux.intel.com
Signed-off-by: Wei Yang <richardw.yang@linux.intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Michel Lespinasse <walken@google.com>
Reviewed-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rbtree_augmented.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
index 99c42e1a74b8..724b0d036b57 100644
--- a/include/linux/rbtree_augmented.h
+++ b/include/linux/rbtree_augmented.h
@@ -286,8 +286,7 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root,
 			rb_set_parent_color(child2, parent, RB_BLACK);
 			rebalance = NULL;
 		} else {
-			unsigned long pc2 = successor->__rb_parent_color;
-			rebalance = __rb_is_black(pc2) ? parent : NULL;
+			rebalance = rb_is_black(successor) ? parent : NULL;
 		}
 		successor->__rb_parent_color = pc;
 		tmp = successor;
-- 
cgit v1.2.3-59-g8ed1b


From dc5c5ad79f0cc2d8756d161dbdee7b370f35f5bb Mon Sep 17 00:00:00 2001
From: Laura Abbott <labbott@redhat.com>
Date: Wed, 4 Dec 2019 16:51:53 -0800
Subject: lib/test_meminit.c: add bulk alloc/free tests

kmem_cache_alloc_bulk/kmem_cache_free_bulk are used to make multiple
allocations of the same size to avoid the overhead of multiple
kmalloc/kfree calls.  Extend the kmem_cache tests to make some calls to
these APIs.

Link: http://lkml.kernel.org/r/20191107191447.23058-1-labbott@redhat.com
Signed-off-by: Laura Abbott <labbott@redhat.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Tested-by: Alexander Potapenko <glider@google.com>
Cc: Laura Abbott <labbott@redhat.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Kostya Serebryany <kcc@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Sandeep Patil <sspatil@android.com>
Cc: Jann Horn <jannh@google.com>
Cc: Marco Elver <elver@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/test_meminit.c | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/lib/test_meminit.c b/lib/test_meminit.c
index 9742e5cb853a..e4f706a404b3 100644
--- a/lib/test_meminit.c
+++ b/lib/test_meminit.c
@@ -183,6 +183,9 @@ static bool __init check_buf(void *buf, int size, bool want_ctor,
 	return fail;
 }
 
+#define BULK_SIZE 100
+static void *bulk_array[BULK_SIZE];
+
 /*
  * Test kmem_cache with given parameters:
  *  want_ctor - use a constructor;
@@ -203,9 +206,24 @@ static int __init do_kmem_cache_size(size_t size, bool want_ctor,
 			      want_rcu ? SLAB_TYPESAFE_BY_RCU : 0,
 			      want_ctor ? test_ctor : NULL);
 	for (iter = 0; iter < 10; iter++) {
+		/* Do a test of bulk allocations */
+		if (!want_rcu && !want_ctor) {
+			int ret;
+
+			ret = kmem_cache_alloc_bulk(c, alloc_mask, BULK_SIZE, bulk_array);
+			if (!ret) {
+				fail = true;
+			} else {
+				int i;
+				for (i = 0; i < ret; i++)
+					fail |= check_buf(bulk_array[i], size, want_ctor, want_rcu, want_zero);
+				kmem_cache_free_bulk(c, ret, bulk_array);
+			}
+		}
+
 		buf = kmem_cache_alloc(c, alloc_mask);
 		/* Check that buf is zeroed, if it must be. */
-		fail = check_buf(buf, size, want_ctor, want_rcu, want_zero);
+		fail |= check_buf(buf, size, want_ctor, want_rcu, want_zero);
 		fill_with_garbage_skip(buf, size, want_ctor ? CTOR_BYTES : 0);
 
 		if (!want_rcu) {
-- 
cgit v1.2.3-59-g8ed1b


From 323dd2c3ed0641f49e89b4e420f9eef5d3d5a881 Mon Sep 17 00:00:00 2001
From: Trent Piepho <tpiepho@gmail.com>
Date: Wed, 4 Dec 2019 16:51:57 -0800
Subject: lib/math/rational.c: fix possible incorrect result from rational
 fractions helper

In some cases the previous algorithm would not return the closest
approximation.  This would happen when a semi-convergent was the
closest, as the previous algorithm would only consider convergents.

As an example, consider an initial value of 5/4, and trying to find the
closest approximation with a maximum of 4 for numerator and denominator.
The previous algorithm would return 1/1 as the closest approximation,
while this version will return the correct answer of 4/3.

To do this, the main loop performs effectively the same operations as it
did before.  It must now keep track of the last three approximations,
n2/d2 ..  n0/d0, while before it only needed the last two.

If an exact answer is not found, the algorithm will now calculate the
best semi-convergent term, t, which is a single expression with two
divisions:

    min((max_numerator - n0) / n1, (max_denominator - d0) / d1)

This will be used if it is better than previous convergent.  The test
for this is generally a simple comparison, 2*t > a.  But in an edge
case, where the convergent's final term is even and the best allowable
semi-convergent has a final term of exactly half the convergent's final
term, the more complex comparison (d0*dp > d1*d) is used.

I also wrote some comments explaining the code.  While one still needs
to look up the math elsewhere, they should help a lot to follow how the
code relates to that math.

This routine is used in two places in the video4linux code, but in those
cases it is only used to reduce a fraction to lowest terms, which the
existing code will do correctly.  This could be done more efficiently
with a different library routine but it would still be the Euclidean
alogrithm at its heart.  So no change.

The remain users are places where a fractional PLL divider is
programmed.  What would happen is something asked for a clock of X MHz
but instead gets Y MHz, where Y is close to X but not exactly due to the
hardware limitations.  After this change they might, in some cases, get
Y' MHz, where Y' is a little closer to X then Y was.

Users like this are: Three UARTs, in 8250_mid, 8250_lpss, and imx.  One
GPU in vp4_hdmi.  And three clock drivers, clk-cdce706, clk-si5351, and
clk-fractional-divider.  The last is a generic clock driver and so would
have more users referenced via device tree entries.

I think there's a bug in that one, it's limiting an N bit field that is
offset-by-1 to the range 0 ..  (1<<N)-2, when it should be (1<<N)-1 as
the upper limit.

I have an IMX system, one of the UARTs using this, so I can provide a
real example.  If I request a custom baud rate of 1499978, the driver
will program the PLL to produce a baud rate of 1500000.  After this
change, the fractional divider in the UART is programmed to a ratio of
65535/65536, which produces a baud rate of 1499977.0625.  Closer to the
requested value.

Link: http://lkml.kernel.org/r/20190330205855.19396-1-tpiepho@gmail.com
Signed-off-by: Trent Piepho <tpiepho@gmail.com>
Cc: Oskar Schirmer <oskar@scara.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/math/rational.c | 63 ++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 50 insertions(+), 13 deletions(-)

diff --git a/lib/math/rational.c b/lib/math/rational.c
index ba7443677c90..31fb27db2deb 100644
--- a/lib/math/rational.c
+++ b/lib/math/rational.c
@@ -3,6 +3,7 @@
  * rational fractions
  *
  * Copyright (C) 2009 emlix GmbH, Oskar Schirmer <oskar@scara.com>
+ * Copyright (C) 2019 Trent Piepho <tpiepho@gmail.com>
  *
  * helper functions when coping with rational numbers
  */
@@ -10,6 +11,7 @@
 #include <linux/rational.h>
 #include <linux/compiler.h>
 #include <linux/export.h>
+#include <linux/kernel.h>
 
 /*
  * calculate best rational approximation for a given fraction
@@ -33,30 +35,65 @@ void rational_best_approximation(
 	unsigned long max_numerator, unsigned long max_denominator,
 	unsigned long *best_numerator, unsigned long *best_denominator)
 {
-	unsigned long n, d, n0, d0, n1, d1;
+	/* n/d is the starting rational, which is continually
+	 * decreased each iteration using the Euclidean algorithm.
+	 *
+	 * dp is the value of d from the prior iteration.
+	 *
+	 * n2/d2, n1/d1, and n0/d0 are our successively more accurate
+	 * approximations of the rational.  They are, respectively,
+	 * the current, previous, and two prior iterations of it.
+	 *
+	 * a is current term of the continued fraction.
+	 */
+	unsigned long n, d, n0, d0, n1, d1, n2, d2;
 	n = given_numerator;
 	d = given_denominator;
 	n0 = d1 = 0;
 	n1 = d0 = 1;
+
 	for (;;) {
-		unsigned long t, a;
-		if ((n1 > max_numerator) || (d1 > max_denominator)) {
-			n1 = n0;
-			d1 = d0;
-			break;
-		}
+		unsigned long dp, a;
+
 		if (d == 0)
 			break;
-		t = d;
+		/* Find next term in continued fraction, 'a', via
+		 * Euclidean algorithm.
+		 */
+		dp = d;
 		a = n / d;
 		d = n % d;
-		n = t;
-		t = n0 + a * n1;
+		n = dp;
+
+		/* Calculate the current rational approximation (aka
+		 * convergent), n2/d2, using the term just found and
+		 * the two prior approximations.
+		 */
+		n2 = n0 + a * n1;
+		d2 = d0 + a * d1;
+
+		/* If the current convergent exceeds the maxes, then
+		 * return either the previous convergent or the
+		 * largest semi-convergent, the final term of which is
+		 * found below as 't'.
+		 */
+		if ((n2 > max_numerator) || (d2 > max_denominator)) {
+			unsigned long t = min((max_numerator - n0) / n1,
+					      (max_denominator - d0) / d1);
+
+			/* This tests if the semi-convergent is closer
+			 * than the previous convergent.
+			 */
+			if (2u * t > a || (2u * t == a && d0 * dp > d1 * d)) {
+				n1 = n0 + t * n1;
+				d1 = d0 + t * d1;
+			}
+			break;
+		}
 		n0 = n1;
-		n1 = t;
-		t = d0 + a * d1;
+		n1 = n2;
 		d0 = d1;
-		d1 = t;
+		d1 = d2;
 	}
 	*best_numerator = n1;
 	*best_denominator = d1;
-- 
cgit v1.2.3-59-g8ed1b


From fd7eb2513f8549cf2f6b0c323377816268424bed Mon Sep 17 00:00:00 2001
From: Huang Shijie <sjhuang@iluvatar.ai>
Date: Wed, 4 Dec 2019 16:52:00 -0800
Subject: lib/genalloc.c: export symbol addr_in_gen_pool

We use addr_in_gen_pool() in a driver module.  So export it.

Link: http://lkml.kernel.org/r/20181224070622.22197-2-sjhuang@iluvatar.ai
Signed-off-by: Huang Shijie <sjhuang@iluvatar.ai>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Alexey Skidanov <alexey.skidanov@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/genalloc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/genalloc.c b/lib/genalloc.c
index 24d20ca7e91b..af9a57422186 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -567,6 +567,7 @@ bool addr_in_gen_pool(struct gen_pool *pool, unsigned long start,
 	rcu_read_unlock();
 	return found;
 }
+EXPORT_SYMBOL(addr_in_gen_pool);
 
 /**
  * gen_pool_avail - get available free space of the pool
-- 
cgit v1.2.3-59-g8ed1b


From 964975ac6677c97ae61ec9d6969dd5d03f18d1c3 Mon Sep 17 00:00:00 2001
From: Huang Shijie <sjhuang@iluvatar.ai>
Date: Wed, 4 Dec 2019 16:52:03 -0800
Subject: lib/genalloc.c: rename addr_in_gen_pool to gen_pool_has_addr

Follow the kernel conventions, rename addr_in_gen_pool to
gen_pool_has_addr.

[sjhuang@iluvatar.ai: fix Documentation/ too]
 Link: http://lkml.kernel.org/r/20181229015914.5573-1-sjhuang@iluvatar.ai
Link: http://lkml.kernel.org/r/20181228083950.20398-1-sjhuang@iluvatar.ai
Signed-off-by: Huang Shijie <sjhuang@iluvatar.ai>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/core-api/genalloc.rst | 2 +-
 arch/arm/mm/dma-mapping.c           | 2 +-
 drivers/misc/sram-exec.c            | 2 +-
 include/linux/genalloc.h            | 2 +-
 kernel/dma/remap.c                  | 2 +-
 lib/genalloc.c                      | 6 +++---
 6 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/Documentation/core-api/genalloc.rst b/Documentation/core-api/genalloc.rst
index 098a46f55798..a5af2cbf58a5 100644
--- a/Documentation/core-api/genalloc.rst
+++ b/Documentation/core-api/genalloc.rst
@@ -129,7 +129,7 @@ writing of special-purpose memory allocators in the future.
    :functions: gen_pool_for_each_chunk
 
 .. kernel-doc:: lib/genalloc.c
-   :functions: addr_in_gen_pool
+   :functions: gen_pool_has_addr
 
 .. kernel-doc:: lib/genalloc.c
    :functions: gen_pool_avail
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 1df6eb42f22e..e822af0d9219 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -529,7 +529,7 @@ static void *__alloc_from_pool(size_t size, struct page **ret_page)
 
 static bool __in_atomic_pool(void *start, size_t size)
 {
-	return addr_in_gen_pool(atomic_pool, (unsigned long)start, size);
+	return gen_pool_has_addr(atomic_pool, (unsigned long)start, size);
 }
 
 static int __free_from_pool(void *start, size_t size)
diff --git a/drivers/misc/sram-exec.c b/drivers/misc/sram-exec.c
index 426ad912b441..d054e2842a5f 100644
--- a/drivers/misc/sram-exec.c
+++ b/drivers/misc/sram-exec.c
@@ -96,7 +96,7 @@ void *sram_exec_copy(struct gen_pool *pool, void *dst, void *src,
 	if (!part)
 		return NULL;
 
-	if (!addr_in_gen_pool(pool, (unsigned long)dst, size))
+	if (!gen_pool_has_addr(pool, (unsigned long)dst, size))
 		return NULL;
 
 	base = (unsigned long)part->base;
diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h
index 4bd583bd6934..5b14a0f38124 100644
--- a/include/linux/genalloc.h
+++ b/include/linux/genalloc.h
@@ -206,7 +206,7 @@ extern struct gen_pool *devm_gen_pool_create(struct device *dev,
 		int min_alloc_order, int nid, const char *name);
 extern struct gen_pool *gen_pool_get(struct device *dev, const char *name);
 
-bool addr_in_gen_pool(struct gen_pool *pool, unsigned long start,
+extern bool gen_pool_has_addr(struct gen_pool *pool, unsigned long start,
 			size_t size);
 
 #ifdef CONFIG_OF
diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
index d47bd40fc0f5..d14cbc83986a 100644
--- a/kernel/dma/remap.c
+++ b/kernel/dma/remap.c
@@ -178,7 +178,7 @@ bool dma_in_atomic_pool(void *start, size_t size)
 	if (unlikely(!atomic_pool))
 		return false;
 
-	return addr_in_gen_pool(atomic_pool, (unsigned long)start, size);
+	return gen_pool_has_addr(atomic_pool, (unsigned long)start, size);
 }
 
 void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags)
diff --git a/lib/genalloc.c b/lib/genalloc.c
index af9a57422186..7f1244b5294a 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -540,7 +540,7 @@ void gen_pool_for_each_chunk(struct gen_pool *pool,
 EXPORT_SYMBOL(gen_pool_for_each_chunk);
 
 /**
- * addr_in_gen_pool - checks if an address falls within the range of a pool
+ * gen_pool_has_addr - checks if an address falls within the range of a pool
  * @pool:	the generic memory pool
  * @start:	start address
  * @size:	size of the region
@@ -548,7 +548,7 @@ EXPORT_SYMBOL(gen_pool_for_each_chunk);
  * Check if the range of addresses falls within the specified pool. Returns
  * true if the entire range is contained in the pool and false otherwise.
  */
-bool addr_in_gen_pool(struct gen_pool *pool, unsigned long start,
+bool gen_pool_has_addr(struct gen_pool *pool, unsigned long start,
 			size_t size)
 {
 	bool found = false;
@@ -567,7 +567,7 @@ bool addr_in_gen_pool(struct gen_pool *pool, unsigned long start,
 	rcu_read_unlock();
 	return found;
 }
-EXPORT_SYMBOL(addr_in_gen_pool);
+EXPORT_SYMBOL(gen_pool_has_addr);
 
 /**
  * gen_pool_avail - get available free space of the pool
-- 
cgit v1.2.3-59-g8ed1b


From d439e6a5d78b93a85c0f93ea559548fc109ccc1a Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 4 Dec 2019 16:52:06 -0800
Subject: checkpatch: improve ignoring CamelCase SI style variants like mA

Ignore all upper-case variants before and after SI units like mA, mV and
uV so uses like RANGE_mA do not emit a CAMELCASE message.

Link: http://lkml.kernel.org/r/5ce6f9131327fd2e12d7a0e20a55f588448de090.camel@perches.com
Signed-off-by: Joe Perches <joe@perches.com>
Cc: Jules Irenge <jbi.octave@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 592911a2f06c..da330b3ce140 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -5038,8 +5038,9 @@ sub process {
 			    $var =~ /[A-Z][a-z]|[a-z][A-Z]/ &&
 #Ignore Page<foo> variants
 			    $var !~ /^(?:Clear|Set|TestClear|TestSet|)Page[A-Z]/ &&
-#Ignore SI style variants like nS, mV and dB (ie: max_uV, regulator_min_uA_show)
-			    $var !~ /^(?:[a-z_]*?)_?[a-z][A-Z](?:_[a-z_]+)?$/ &&
+#Ignore SI style variants like nS, mV and dB
+#(ie: max_uV, regulator_min_uA_show, RANGE_mA_VALUE)
+			    $var !~ /^(?:[a-z0-9_]*|[A-Z0-9_]*)?_?[a-z][A-Z](?:_[a-z0-9_]+|_[A-Z0-9_]+)?$/ &&
 #Ignore some three character SI units explicitly, like MiB and KHz
 			    $var !~ /^(?:[a-z_]*?)_?(?:[KMGT]iB|[KMGT]?Hz)(?:_[a-z_]+)?$/) {
 				while ($var =~ m{($Ident)}g) {
-- 
cgit v1.2.3-59-g8ed1b


From cd28b119047b9928ddae914dd747f032ab929cc6 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 4 Dec 2019 16:52:09 -0800
Subject: checkpatch: reduce is_maintained_obsolete lookup runtime

The is_maintained_obsolete function can be called twice using the same
filename.  This function spawns a process using get_maintainer.pl.
Store the status of each filename when spawned and use the stored result
to eliminate the spawning of unnecessary duplicate child processes.

Example:

old:

  $ time ./scripts/checkpatch.pl hp100-Move-to-staging.patch > /dev/null

  real	0m1.767s
  user	0m1.634s
  sys	0m0.141s

new:

  $ time ./scripts/checkpatch.pl hp100-Move-to-staging.patch > /dev/null

  real	0m1.184s
  user	0m1.085s
  sys	0m0.103s

Link: http://lkml.kernel.org/r/b982566a2b9b4825badce36fdfc3032bd0005151.camel@perches.com
Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index da330b3ce140..7cbe6e72e363 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -874,14 +874,18 @@ sub seed_camelcase_file {
 	}
 }
 
+our %maintained_status = ();
+
 sub is_maintained_obsolete {
 	my ($filename) = @_;
 
 	return 0 if (!$tree || !(-e "$root/scripts/get_maintainer.pl"));
 
-	my $status = `perl $root/scripts/get_maintainer.pl --status --nom --nol --nogit --nogit-fallback -f $filename 2>&1`;
+	if (!exists($maintained_status{$filename})) {
+		$maintained_status{$filename} = `perl $root/scripts/get_maintainer.pl --status --nom --nol --nogit --nogit-fallback -f $filename 2>&1`;
+	}
 
-	return $status =~ /obsolete/i;
+	return $maintained_status{$filename} =~ /obsolete/i;
 }
 
 sub is_SPDX_License_valid {
-- 
cgit v1.2.3-59-g8ed1b


From f6520c520842c532059c380d7a5b30b6e0e72ced Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@akamai.com>
Date: Wed, 4 Dec 2019 16:52:12 -0800
Subject: epoll: simplify ep_poll_safewake() for CONFIG_DEBUG_LOCK_ALLOC

Currently, ep_poll_safewake() in the CONFIG_DEBUG_LOCK_ALLOC case uses
ep_call_nested() in order to pass the correct subclass argument to
spin_lock_irqsave_nested().  However, ep_call_nested() adds unnecessary
checks for epoll depth and loops that are already verified when doing
EPOLL_CTL_ADD.  This mirrors a conversion that was done for
!CONFIG_DEBUG_LOCK_ALLOC in: commit 37b5e5212a44 ("epoll: remove
ep_call_nested() from ep_eventpoll_poll()")

Link: http://lkml.kernel.org/r/1567628549-11501-1-git-send-email-jbaron@akamai.com
Signed-off-by: Jason Baron <jbaron@akamai.com>
Reviewed-by: Roman Penyaev <rpenyaev@suse.de>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Eric Wong <normalperson@yhbt.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/eventpoll.c | 36 +++++++++++++-----------------------
 1 file changed, 13 insertions(+), 23 deletions(-)

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index c4159bcc05d9..98ef0e2f1b5c 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -551,28 +551,23 @@ out_unlock:
  */
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 
-static struct nested_calls poll_safewake_ncalls;
-
-static int ep_poll_wakeup_proc(void *priv, void *cookie, int call_nests)
-{
-	unsigned long flags;
-	wait_queue_head_t *wqueue = (wait_queue_head_t *)cookie;
-
-	spin_lock_irqsave_nested(&wqueue->lock, flags, call_nests + 1);
-	wake_up_locked_poll(wqueue, EPOLLIN);
-	spin_unlock_irqrestore(&wqueue->lock, flags);
-
-	return 0;
-}
+static DEFINE_PER_CPU(int, wakeup_nest);
 
 static void ep_poll_safewake(wait_queue_head_t *wq)
 {
-	int this_cpu = get_cpu();
-
-	ep_call_nested(&poll_safewake_ncalls,
-		       ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu);
+	unsigned long flags;
+	int subclass;
 
-	put_cpu();
+	local_irq_save(flags);
+	preempt_disable();
+	subclass = __this_cpu_read(wakeup_nest);
+	spin_lock_nested(&wq->lock, subclass + 1);
+	__this_cpu_inc(wakeup_nest);
+	wake_up_locked_poll(wq, POLLIN);
+	__this_cpu_dec(wakeup_nest);
+	spin_unlock(&wq->lock);
+	local_irq_restore(flags);
+	preempt_enable();
 }
 
 #else
@@ -2370,11 +2365,6 @@ static int __init eventpoll_init(void)
 	 */
 	ep_nested_calls_init(&poll_loop_ncalls);
 
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	/* Initialize the structure used to perform safe poll wait head wake ups */
-	ep_nested_calls_init(&poll_safewake_ncalls);
-#endif
-
 	/*
 	 * We can have many thousands of epitems, so prevent this from
 	 * using an extra cache line on 64-bit (and smaller) CPUs
-- 
cgit v1.2.3-59-g8ed1b


From 339ddb53d373baee6e7946aec17c739c4924d6d9 Mon Sep 17 00:00:00 2001
From: Heiher <r@hev.cc>
Date: Wed, 4 Dec 2019 16:52:15 -0800
Subject: fs/epoll: remove unnecessary wakeups of nested epoll

Take the case where we have:

        t0
         | (ew)
        e0
         | (et)
        e1
         | (lt)
        s0

t0: thread 0
e0: epoll fd 0
e1: epoll fd 1
s0: socket fd 0
ew: epoll_wait
et: edge-trigger
lt: level-trigger

We remove unnecessary wakeups to prevent the nested epoll that working in edge-
triggered mode to waking up continuously.

Test code:
 #include <unistd.h>
 #include <sys/epoll.h>
 #include <sys/socket.h>

 int main(int argc, char *argv[])
 {
 	int sfd[2];
 	int efd[2];
 	struct epoll_event e;

 	if (socketpair(AF_UNIX, SOCK_STREAM, 0, sfd) < 0)
 		goto out;

 	efd[0] = epoll_create(1);
 	if (efd[0] < 0)
 		goto out;

 	efd[1] = epoll_create(1);
 	if (efd[1] < 0)
 		goto out;

 	e.events = EPOLLIN;
 	if (epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e) < 0)
 		goto out;

 	e.events = EPOLLIN | EPOLLET;
 	if (epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e) < 0)
 		goto out;

 	if (write(sfd[1], "w", 1) != 1)
 		goto out;

 	if (epoll_wait(efd[0], &e, 1, 0) != 1)
 		goto out;

 	if (epoll_wait(efd[0], &e, 1, 0) != 0)
 		goto out;

 	close(efd[0]);
 	close(efd[1]);
 	close(sfd[0]);
 	close(sfd[1]);

 	return 0;

 out:
 	return -1;
 }

More tests:
 https://github.com/heiher/epoll-wakeup

Link: http://lkml.kernel.org/r/20191009060516.3577-1-r@hev.cc
Signed-off-by: hev <r@hev.cc>
Reviewed-by: Roman Penyaev <rpenyaev@suse.de>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Davide Libenzi <davidel@xmailserver.org>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Eric Wong <e@80x24.org>
Cc: Jason Baron <jbaron@akamai.com>
Cc: Sridhar Samudrala <sridhar.samudrala@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/eventpoll.c | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 98ef0e2f1b5c..67a395039268 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -666,7 +666,6 @@ static __poll_t ep_scan_ready_list(struct eventpoll *ep,
 			      void *priv, int depth, bool ep_locked)
 {
 	__poll_t res;
-	int pwake = 0;
 	struct epitem *epi, *nepi;
 	LIST_HEAD(txlist);
 
@@ -733,26 +732,11 @@ static __poll_t ep_scan_ready_list(struct eventpoll *ep,
 	 */
 	list_splice(&txlist, &ep->rdllist);
 	__pm_relax(ep->ws);
-
-	if (!list_empty(&ep->rdllist)) {
-		/*
-		 * Wake up (if active) both the eventpoll wait list and
-		 * the ->poll() wait list (delayed after we release the lock).
-		 */
-		if (waitqueue_active(&ep->wq))
-			wake_up(&ep->wq);
-		if (waitqueue_active(&ep->poll_wait))
-			pwake++;
-	}
 	write_unlock_irq(&ep->lock);
 
 	if (!ep_locked)
 		mutex_unlock(&ep->mtx);
 
-	/* We have to call this outside the lock */
-	if (pwake)
-		ep_poll_safewake(&ep->poll_wait);
-
 	return res;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From f2728fe80cefb40a8f486e64a4bef17ce2c64f5b Mon Sep 17 00:00:00 2001
From: Heiher <r@hev.cc>
Date: Wed, 4 Dec 2019 16:52:19 -0800
Subject: selftests: add epoll selftests

This adds the promised selftest for epoll.  It will verify the wakeups
of epoll.  Including leaf and nested mode, epoll_wait() and poll() and
multi-threads.

Link: http://lkml.kernel.org/r/20191009121518.4027-1-r@hev.cc
Signed-off-by: hev <r@hev.cc>
Reviewed-by: Roman Penyaev <rpenyaev@suse.de>
Cc: Jason Baron <jbaron@akamai.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/Makefile                   |    1 +
 .../testing/selftests/filesystems/epoll/.gitignore |    1 +
 tools/testing/selftests/filesystems/epoll/Makefile |    7 +
 .../filesystems/epoll/epoll_wakeup_test.c          | 3074 ++++++++++++++++++++
 4 files changed, 3083 insertions(+)
 create mode 100644 tools/testing/selftests/filesystems/epoll/.gitignore
 create mode 100644 tools/testing/selftests/filesystems/epoll/Makefile
 create mode 100644 tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index d67f968eac21..b001c602414b 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -13,6 +13,7 @@ TARGETS += efivarfs
 TARGETS += exec
 TARGETS += filesystems
 TARGETS += filesystems/binderfs
+TARGETS += filesystems/epoll
 TARGETS += firmware
 TARGETS += ftrace
 TARGETS += futex
diff --git a/tools/testing/selftests/filesystems/epoll/.gitignore b/tools/testing/selftests/filesystems/epoll/.gitignore
new file mode 100644
index 000000000000..9ae8db44ec14
--- /dev/null
+++ b/tools/testing/selftests/filesystems/epoll/.gitignore
@@ -0,0 +1 @@
+epoll_wakeup_test
diff --git a/tools/testing/selftests/filesystems/epoll/Makefile b/tools/testing/selftests/filesystems/epoll/Makefile
new file mode 100644
index 000000000000..e62f3d4f68da
--- /dev/null
+++ b/tools/testing/selftests/filesystems/epoll/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CFLAGS += -I../../../../../usr/include/
+LDFLAGS += -lpthread
+TEST_GEN_PROGS := epoll_wakeup_test
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c
new file mode 100644
index 000000000000..37a04dab56f0
--- /dev/null
+++ b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c
@@ -0,0 +1,3074 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <poll.h>
+#include <unistd.h>
+#include <signal.h>
+#include <pthread.h>
+#include <sys/epoll.h>
+#include <sys/socket.h>
+#include "../../kselftest_harness.h"
+
+struct epoll_mtcontext
+{
+	int efd[3];
+	int sfd[4];
+	int count;
+
+	pthread_t main;
+	pthread_t waiter;
+};
+
+static void signal_handler(int signum)
+{
+}
+
+static void kill_timeout(struct epoll_mtcontext *ctx)
+{
+	usleep(1000000);
+	pthread_kill(ctx->main, SIGUSR1);
+	pthread_kill(ctx->waiter, SIGUSR1);
+}
+
+static void *waiter_entry1a(void *data)
+{
+	struct epoll_event e;
+	struct epoll_mtcontext *ctx = data;
+
+	if (epoll_wait(ctx->efd[0], &e, 1, -1) > 0)
+		__sync_fetch_and_add(&ctx->count, 1);
+
+	return NULL;
+}
+
+static void *waiter_entry1ap(void *data)
+{
+	struct pollfd pfd;
+	struct epoll_event e;
+	struct epoll_mtcontext *ctx = data;
+
+	pfd.fd = ctx->efd[0];
+	pfd.events = POLLIN;
+	if (poll(&pfd, 1, -1) > 0) {
+		if (epoll_wait(ctx->efd[0], &e, 1, 0) > 0)
+			__sync_fetch_and_add(&ctx->count, 1);
+	}
+
+	return NULL;
+}
+
+static void *waiter_entry1o(void *data)
+{
+	struct epoll_event e;
+	struct epoll_mtcontext *ctx = data;
+
+	if (epoll_wait(ctx->efd[0], &e, 1, -1) > 0)
+		__sync_fetch_and_or(&ctx->count, 1);
+
+	return NULL;
+}
+
+static void *waiter_entry1op(void *data)
+{
+	struct pollfd pfd;
+	struct epoll_event e;
+	struct epoll_mtcontext *ctx = data;
+
+	pfd.fd = ctx->efd[0];
+	pfd.events = POLLIN;
+	if (poll(&pfd, 1, -1) > 0) {
+		if (epoll_wait(ctx->efd[0], &e, 1, 0) > 0)
+			__sync_fetch_and_or(&ctx->count, 1);
+	}
+
+	return NULL;
+}
+
+static void *waiter_entry2a(void *data)
+{
+	struct epoll_event events[2];
+	struct epoll_mtcontext *ctx = data;
+
+	if (epoll_wait(ctx->efd[0], events, 2, -1) > 0)
+		__sync_fetch_and_add(&ctx->count, 1);
+
+	return NULL;
+}
+
+static void *waiter_entry2ap(void *data)
+{
+	struct pollfd pfd;
+	struct epoll_event events[2];
+	struct epoll_mtcontext *ctx = data;
+
+	pfd.fd = ctx->efd[0];
+	pfd.events = POLLIN;
+	if (poll(&pfd, 1, -1) > 0) {
+		if (epoll_wait(ctx->efd[0], events, 2, 0) > 0)
+			__sync_fetch_and_add(&ctx->count, 1);
+	}
+
+	return NULL;
+}
+
+static void *emitter_entry1(void *data)
+{
+	struct epoll_mtcontext *ctx = data;
+
+	usleep(100000);
+	write(ctx->sfd[1], "w", 1);
+
+	kill_timeout(ctx);
+
+	return NULL;
+}
+
+static void *emitter_entry2(void *data)
+{
+	struct epoll_mtcontext *ctx = data;
+
+	usleep(100000);
+	write(ctx->sfd[1], "w", 1);
+	write(ctx->sfd[3], "w", 1);
+
+	kill_timeout(ctx);
+
+	return NULL;
+}
+
+/*
+ *          t0
+ *           | (ew)
+ *          e0
+ *           | (lt)
+ *          s0
+ */
+TEST(epoll1)
+{
+	int efd;
+	int sfd[2];
+	struct epoll_event e;
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0);
+
+	efd = epoll_create(1);
+	ASSERT_GE(efd, 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+	ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+	EXPECT_EQ(epoll_wait(efd, &e, 1, 0), 1);
+	EXPECT_EQ(epoll_wait(efd, &e, 1, 0), 1);
+
+	close(efd);
+	close(sfd[0]);
+	close(sfd[1]);
+}
+
+/*
+ *          t0
+ *           | (ew)
+ *          e0
+ *           | (et)
+ *          s0
+ */
+TEST(epoll2)
+{
+	int efd;
+	int sfd[2];
+	struct epoll_event e;
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0);
+
+	efd = epoll_create(1);
+	ASSERT_GE(efd, 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+	ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+	EXPECT_EQ(epoll_wait(efd, &e, 1, 0), 1);
+	EXPECT_EQ(epoll_wait(efd, &e, 1, 0), 0);
+
+	close(efd);
+	close(sfd[0]);
+	close(sfd[1]);
+}
+
+/*
+ *           t0
+ *            | (ew)
+ *           e0
+ *     (lt) /  \ (lt)
+ *        s0    s2
+ */
+TEST(epoll3)
+{
+	int efd;
+	int sfd[4];
+	struct epoll_event events[2];
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0);
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0);
+
+	efd = epoll_create(1);
+	ASSERT_GE(efd, 0);
+
+	events[0].events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], events), 0);
+
+	events[0].events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[2], events), 0);
+
+	ASSERT_EQ(write(sfd[1], "w", 1), 1);
+	ASSERT_EQ(write(sfd[3], "w", 1), 1);
+
+	EXPECT_EQ(epoll_wait(efd, events, 2, 0), 2);
+	EXPECT_EQ(epoll_wait(efd, events, 2, 0), 2);
+
+	close(efd);
+	close(sfd[0]);
+	close(sfd[1]);
+	close(sfd[2]);
+	close(sfd[3]);
+}
+
+/*
+ *           t0
+ *            | (ew)
+ *           e0
+ *     (et) /  \ (et)
+ *        s0    s2
+ */
+TEST(epoll4)
+{
+	int efd;
+	int sfd[4];
+	struct epoll_event events[2];
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0);
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0);
+
+	efd = epoll_create(1);
+	ASSERT_GE(efd, 0);
+
+	events[0].events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], events), 0);
+
+	events[0].events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[2], events), 0);
+
+	ASSERT_EQ(write(sfd[1], "w", 1), 1);
+	ASSERT_EQ(write(sfd[3], "w", 1), 1);
+
+	EXPECT_EQ(epoll_wait(efd, events, 2, 0), 2);
+	EXPECT_EQ(epoll_wait(efd, events, 2, 0), 0);
+
+	close(efd);
+	close(sfd[0]);
+	close(sfd[1]);
+	close(sfd[2]);
+	close(sfd[3]);
+}
+
+/*
+ *          t0
+ *           | (p)
+ *          e0
+ *           | (lt)
+ *          s0
+ */
+TEST(epoll5)
+{
+	int efd;
+	int sfd[2];
+	struct pollfd pfd;
+	struct epoll_event e;
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0);
+
+	efd = epoll_create(1);
+	ASSERT_GE(efd, 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+	ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+	pfd.fd = efd;
+	pfd.events = POLLIN;
+	ASSERT_EQ(poll(&pfd, 1, 0), 1);
+	ASSERT_EQ(epoll_wait(efd, &e, 1, 0), 1);
+
+	pfd.fd = efd;
+	pfd.events = POLLIN;
+	ASSERT_EQ(poll(&pfd, 1, 0), 1);
+	ASSERT_EQ(epoll_wait(efd, &e, 1, 0), 1);
+
+	close(efd);
+	close(sfd[0]);
+	close(sfd[1]);
+}
+
+/*
+ *          t0
+ *           | (p)
+ *          e0
+ *           | (et)
+ *          s0
+ */
+TEST(epoll6)
+{
+	int efd;
+	int sfd[2];
+	struct pollfd pfd;
+	struct epoll_event e;
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0);
+
+	efd = epoll_create(1);
+	ASSERT_GE(efd, 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+	ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+	pfd.fd = efd;
+	pfd.events = POLLIN;
+	ASSERT_EQ(poll(&pfd, 1, 0), 1);
+	ASSERT_EQ(epoll_wait(efd, &e, 1, 0), 1);
+
+	pfd.fd = efd;
+	pfd.events = POLLIN;
+	ASSERT_EQ(poll(&pfd, 1, 0), 0);
+	ASSERT_EQ(epoll_wait(efd, &e, 1, 0), 0);
+
+	close(efd);
+	close(sfd[0]);
+	close(sfd[1]);
+}
+
+/*
+ *           t0
+ *            | (p)
+ *           e0
+ *     (lt) /  \ (lt)
+ *        s0    s2
+ */
+
+TEST(epoll7)
+{
+	int efd;
+	int sfd[4];
+	struct pollfd pfd;
+	struct epoll_event events[2];
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0);
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0);
+
+	efd = epoll_create(1);
+	ASSERT_GE(efd, 0);
+
+	events[0].events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], events), 0);
+
+	events[0].events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[2], events), 0);
+
+	ASSERT_EQ(write(sfd[1], "w", 1), 1);
+	ASSERT_EQ(write(sfd[3], "w", 1), 1);
+
+	pfd.fd = efd;
+	pfd.events = POLLIN;
+	EXPECT_EQ(poll(&pfd, 1, 0), 1);
+	EXPECT_EQ(epoll_wait(efd, events, 2, 0), 2);
+
+	pfd.fd = efd;
+	pfd.events = POLLIN;
+	EXPECT_EQ(poll(&pfd, 1, 0), 1);
+	EXPECT_EQ(epoll_wait(efd, events, 2, 0), 2);
+
+	close(efd);
+	close(sfd[0]);
+	close(sfd[1]);
+	close(sfd[2]);
+	close(sfd[3]);
+}
+
+/*
+ *           t0
+ *            | (p)
+ *           e0
+ *     (et) /  \ (et)
+ *        s0    s2
+ */
+TEST(epoll8)
+{
+	int efd;
+	int sfd[4];
+	struct pollfd pfd;
+	struct epoll_event events[2];
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0);
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0);
+
+	efd = epoll_create(1);
+	ASSERT_GE(efd, 0);
+
+	events[0].events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], events), 0);
+
+	events[0].events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[2], events), 0);
+
+	ASSERT_EQ(write(sfd[1], "w", 1), 1);
+	ASSERT_EQ(write(sfd[3], "w", 1), 1);
+
+	pfd.fd = efd;
+	pfd.events = POLLIN;
+	EXPECT_EQ(poll(&pfd, 1, 0), 1);
+	EXPECT_EQ(epoll_wait(efd, events, 2, 0), 2);
+
+	pfd.fd = efd;
+	pfd.events = POLLIN;
+	EXPECT_EQ(poll(&pfd, 1, 0), 0);
+	EXPECT_EQ(epoll_wait(efd, events, 2, 0), 0);
+
+	close(efd);
+	close(sfd[0]);
+	close(sfd[1]);
+	close(sfd[2]);
+	close(sfd[3]);
+}
+
+/*
+ *        t0    t1
+ *     (ew) \  / (ew)
+ *           e0
+ *            | (lt)
+ *           s0
+ */
+TEST(epoll9)
+{
+	pthread_t emitter;
+	struct epoll_event e;
+	struct epoll_mtcontext ctx = { 0 };
+
+	signal(SIGUSR1, signal_handler);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+	ctx.efd[0] = epoll_create(1);
+	ASSERT_GE(ctx.efd[0], 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+	ctx.main = pthread_self();
+	ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+	ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+	if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+		__sync_fetch_and_add(&ctx.count, 1);
+
+	ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+	EXPECT_EQ(ctx.count, 2);
+
+	if (pthread_tryjoin_np(emitter, NULL) < 0) {
+		pthread_kill(emitter, SIGUSR1);
+		pthread_join(emitter, NULL);
+	}
+
+	close(ctx.efd[0]);
+	close(ctx.sfd[0]);
+	close(ctx.sfd[1]);
+}
+
+/*
+ *        t0    t1
+ *     (ew) \  / (ew)
+ *           e0
+ *            | (et)
+ *           s0
+ */
+TEST(epoll10)
+{
+	pthread_t emitter;
+	struct epoll_event e;
+	struct epoll_mtcontext ctx = { 0 };
+
+	signal(SIGUSR1, signal_handler);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+	ctx.efd[0] = epoll_create(1);
+	ASSERT_GE(ctx.efd[0], 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+	ctx.main = pthread_self();
+	ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+	ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+	if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+		__sync_fetch_and_add(&ctx.count, 1);
+
+	ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+	EXPECT_EQ(ctx.count, 1);
+
+	if (pthread_tryjoin_np(emitter, NULL) < 0) {
+		pthread_kill(emitter, SIGUSR1);
+		pthread_join(emitter, NULL);
+	}
+
+	close(ctx.efd[0]);
+	close(ctx.sfd[0]);
+	close(ctx.sfd[1]);
+}
+
+/*
+ *        t0    t1
+ *     (ew) \  / (ew)
+ *           e0
+ *     (lt) /  \ (lt)
+ *        s0    s2
+ */
+TEST(epoll11)
+{
+	pthread_t emitter;
+	struct epoll_event events[2];
+	struct epoll_mtcontext ctx = { 0 };
+
+	signal(SIGUSR1, signal_handler);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0);
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0);
+
+	ctx.efd[0] = epoll_create(1);
+	ASSERT_GE(ctx.efd[0], 0);
+
+	events[0].events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], events), 0);
+
+	events[0].events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[2], events), 0);
+
+	ctx.main = pthread_self();
+	ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry2a, &ctx), 0);
+	ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0);
+
+	if (epoll_wait(ctx.efd[0], events, 2, -1) > 0)
+		__sync_fetch_and_add(&ctx.count, 1);
+
+	ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+	EXPECT_EQ(ctx.count, 2);
+
+	if (pthread_tryjoin_np(emitter, NULL) < 0) {
+		pthread_kill(emitter, SIGUSR1);
+		pthread_join(emitter, NULL);
+	}
+
+	close(ctx.efd[0]);
+	close(ctx.sfd[0]);
+	close(ctx.sfd[1]);
+	close(ctx.sfd[2]);
+	close(ctx.sfd[3]);
+}
+
+/*
+ *        t0    t1
+ *     (ew) \  / (ew)
+ *           e0
+ *     (et) /  \ (et)
+ *        s0    s2
+ */
+TEST(epoll12)
+{
+	pthread_t emitter;
+	struct epoll_event events[2];
+	struct epoll_mtcontext ctx = { 0 };
+
+	signal(SIGUSR1, signal_handler);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0);
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0);
+
+	ctx.efd[0] = epoll_create(1);
+	ASSERT_GE(ctx.efd[0], 0);
+
+	events[0].events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], events), 0);
+
+	events[0].events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[2], events), 0);
+
+	ctx.main = pthread_self();
+	ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+	ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0);
+
+	if (epoll_wait(ctx.efd[0], events, 1, -1) > 0)
+		__sync_fetch_and_add(&ctx.count, 1);
+
+	ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+	EXPECT_EQ(ctx.count, 2);
+
+	if (pthread_tryjoin_np(emitter, NULL) < 0) {
+		pthread_kill(emitter, SIGUSR1);
+		pthread_join(emitter, NULL);
+	}
+
+	close(ctx.efd[0]);
+	close(ctx.sfd[0]);
+	close(ctx.sfd[1]);
+	close(ctx.sfd[2]);
+	close(ctx.sfd[3]);
+}
+
+/*
+ *        t0    t1
+ *     (ew) \  / (p)
+ *           e0
+ *            | (lt)
+ *           s0
+ */
+TEST(epoll13)
+{
+	pthread_t emitter;
+	struct epoll_event e;
+	struct epoll_mtcontext ctx = { 0 };
+
+	signal(SIGUSR1, signal_handler);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+	ctx.efd[0] = epoll_create(1);
+	ASSERT_GE(ctx.efd[0], 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+	ctx.main = pthread_self();
+	ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+	ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+	if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+		__sync_fetch_and_add(&ctx.count, 1);
+
+	ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+	EXPECT_EQ(ctx.count, 2);
+
+	if (pthread_tryjoin_np(emitter, NULL) < 0) {
+		pthread_kill(emitter, SIGUSR1);
+		pthread_join(emitter, NULL);
+	}
+
+	close(ctx.efd[0]);
+	close(ctx.sfd[0]);
+	close(ctx.sfd[1]);
+}
+
+/*
+ *        t0    t1
+ *     (ew) \  / (p)
+ *           e0
+ *            | (et)
+ *           s0
+ */
+TEST(epoll14)
+{
+	pthread_t emitter;
+	struct epoll_event e;
+	struct epoll_mtcontext ctx = { 0 };
+
+	signal(SIGUSR1, signal_handler);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+	ctx.efd[0] = epoll_create(1);
+	ASSERT_GE(ctx.efd[0], 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+	ctx.main = pthread_self();
+	ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+	ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+	if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+		__sync_fetch_and_add(&ctx.count, 1);
+
+	ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+	EXPECT_EQ(ctx.count, 1);
+
+	if (pthread_tryjoin_np(emitter, NULL) < 0) {
+		pthread_kill(emitter, SIGUSR1);
+		pthread_join(emitter, NULL);
+	}
+
+	close(ctx.efd[0]);
+	close(ctx.sfd[0]);
+	close(ctx.sfd[1]);
+}
+
+/*
+ *        t0    t1
+ *     (ew) \  / (p)
+ *           e0
+ *     (lt) /  \ (lt)
+ *        s0    s2
+ */
+TEST(epoll15)
+{
+	pthread_t emitter;
+	struct epoll_event events[2];
+	struct epoll_mtcontext ctx = { 0 };
+
+	signal(SIGUSR1, signal_handler);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0);
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0);
+
+	ctx.efd[0] = epoll_create(1);
+	ASSERT_GE(ctx.efd[0], 0);
+
+	events[0].events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], events), 0);
+
+	events[0].events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[2], events), 0);
+
+	ctx.main = pthread_self();
+	ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry2ap, &ctx), 0);
+	ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0);
+
+	if (epoll_wait(ctx.efd[0], events, 2, -1) > 0)
+		__sync_fetch_and_add(&ctx.count, 1);
+
+	ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+	EXPECT_EQ(ctx.count, 2);
+
+	if (pthread_tryjoin_np(emitter, NULL) < 0) {
+		pthread_kill(emitter, SIGUSR1);
+		pthread_join(emitter, NULL);
+	}
+
+	close(ctx.efd[0]);
+	close(ctx.sfd[0]);
+	close(ctx.sfd[1]);
+	close(ctx.sfd[2]);
+	close(ctx.sfd[3]);
+}
+
+/*
+ *        t0    t1
+ *     (ew) \  / (p)
+ *           e0
+ *     (et) /  \ (et)
+ *        s0    s2
+ */
+TEST(epoll16)
+{
+	pthread_t emitter;
+	struct epoll_event events[2];
+	struct epoll_mtcontext ctx = { 0 };
+
+	signal(SIGUSR1, signal_handler);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0);
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0);
+
+	ctx.efd[0] = epoll_create(1);
+	ASSERT_GE(ctx.efd[0], 0);
+
+	events[0].events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], events), 0);
+
+	events[0].events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[2], events), 0);
+
+	ctx.main = pthread_self();
+	ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+	ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0);
+
+	if (epoll_wait(ctx.efd[0], events, 1, -1) > 0)
+		__sync_fetch_and_add(&ctx.count, 1);
+
+	ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+	EXPECT_EQ(ctx.count, 2);
+
+	if (pthread_tryjoin_np(emitter, NULL) < 0) {
+		pthread_kill(emitter, SIGUSR1);
+		pthread_join(emitter, NULL);
+	}
+
+	close(ctx.efd[0]);
+	close(ctx.sfd[0]);
+	close(ctx.sfd[1]);
+	close(ctx.sfd[2]);
+	close(ctx.sfd[3]);
+}
+
+/*
+ *          t0
+ *           | (ew)
+ *          e0
+ *           | (lt)
+ *          e1
+ *           | (lt)
+ *          s0
+ */
+TEST(epoll17)
+{
+	int efd[2];
+	int sfd[2];
+	struct epoll_event e;
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0);
+
+	efd[0] = epoll_create(1);
+	ASSERT_GE(efd[0], 0);
+
+	efd[1] = epoll_create(1);
+	ASSERT_GE(efd[1], 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0);
+
+	ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+	EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1);
+	EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1);
+
+	close(efd[0]);
+	close(efd[1]);
+	close(sfd[0]);
+	close(sfd[1]);
+}
+
+/*
+ *          t0
+ *           | (ew)
+ *          e0
+ *           | (lt)
+ *          e1
+ *           | (et)
+ *          s0
+ */
+TEST(epoll18)
+{
+	int efd[2];
+	int sfd[2];
+	struct epoll_event e;
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0);
+
+	efd[0] = epoll_create(1);
+	ASSERT_GE(efd[0], 0);
+
+	efd[1] = epoll_create(1);
+	ASSERT_GE(efd[1], 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0);
+
+	ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+	EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1);
+	EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1);
+
+	close(efd[0]);
+	close(efd[1]);
+	close(sfd[0]);
+	close(sfd[1]);
+}
+
+/*
+ *           t0
+ *            | (ew)
+ *           e0
+ *            | (et)
+ *           e1
+ *            | (lt)
+ *           s0
+ */
+TEST(epoll19)
+{
+	int efd[2];
+	int sfd[2];
+	struct epoll_event e;
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0);
+
+	efd[0] = epoll_create(1);
+	ASSERT_GE(efd[0], 0);
+
+	efd[1] = epoll_create(1);
+	ASSERT_GE(efd[1], 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0);
+
+	ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+	EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1);
+	EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 0);
+
+	close(efd[0]);
+	close(efd[1]);
+	close(sfd[0]);
+	close(sfd[1]);
+}
+
+/*
+ *           t0
+ *            | (ew)
+ *           e0
+ *            | (et)
+ *           e1
+ *            | (et)
+ *           s0
+ */
+TEST(epoll20)
+{
+	int efd[2];
+	int sfd[2];
+	struct epoll_event e;
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0);
+
+	efd[0] = epoll_create(1);
+	ASSERT_GE(efd[0], 0);
+
+	efd[1] = epoll_create(1);
+	ASSERT_GE(efd[1], 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0);
+
+	ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+	EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1);
+	EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 0);
+
+	close(efd[0]);
+	close(efd[1]);
+	close(sfd[0]);
+	close(sfd[1]);
+}
+
+/*
+ *          t0
+ *           | (p)
+ *          e0
+ *           | (lt)
+ *          e1
+ *           | (lt)
+ *          s0
+ */
+TEST(epoll21)
+{
+	int efd[2];
+	int sfd[2];
+	struct pollfd pfd;
+	struct epoll_event e;
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0);
+
+	efd[0] = epoll_create(1);
+	ASSERT_GE(efd[0], 0);
+
+	efd[1] = epoll_create(1);
+	ASSERT_GE(efd[1], 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0);
+
+	ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+	pfd.fd = efd[0];
+	pfd.events = POLLIN;
+	EXPECT_EQ(poll(&pfd, 1, 0), 1);
+	EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1);
+
+	pfd.fd = efd[0];
+	pfd.events = POLLIN;
+	EXPECT_EQ(poll(&pfd, 1, 0), 1);
+	EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1);
+
+	close(efd[0]);
+	close(efd[1]);
+	close(sfd[0]);
+	close(sfd[1]);
+}
+
+/*
+ *          t0
+ *           | (p)
+ *          e0
+ *           | (lt)
+ *          e1
+ *           | (et)
+ *          s0
+ */
+TEST(epoll22)
+{
+	int efd[2];
+	int sfd[2];
+	struct pollfd pfd;
+	struct epoll_event e;
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0);
+
+	efd[0] = epoll_create(1);
+	ASSERT_GE(efd[0], 0);
+
+	efd[1] = epoll_create(1);
+	ASSERT_GE(efd[1], 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0);
+
+	ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+	pfd.fd = efd[0];
+	pfd.events = POLLIN;
+	EXPECT_EQ(poll(&pfd, 1, 0), 1);
+	EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1);
+
+	pfd.fd = efd[0];
+	pfd.events = POLLIN;
+	EXPECT_EQ(poll(&pfd, 1, 0), 1);
+	EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1);
+
+	close(efd[0]);
+	close(efd[1]);
+	close(sfd[0]);
+	close(sfd[1]);
+}
+
+/*
+ *          t0
+ *           | (p)
+ *          e0
+ *           | (et)
+ *          e1
+ *           | (lt)
+ *          s0
+ */
+TEST(epoll23)
+{
+	int efd[2];
+	int sfd[2];
+	struct pollfd pfd;
+	struct epoll_event e;
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0);
+
+	efd[0] = epoll_create(1);
+	ASSERT_GE(efd[0], 0);
+
+	efd[1] = epoll_create(1);
+	ASSERT_GE(efd[1], 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0);
+
+	ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+	pfd.fd = efd[0];
+	pfd.events = POLLIN;
+	EXPECT_EQ(poll(&pfd, 1, 0), 1);
+	EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1);
+
+	pfd.fd = efd[0];
+	pfd.events = POLLIN;
+	EXPECT_EQ(poll(&pfd, 1, 0), 0);
+	EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 0);
+
+	close(efd[0]);
+	close(efd[1]);
+	close(sfd[0]);
+	close(sfd[1]);
+}
+
+/*
+ *          t0
+ *           | (p)
+ *          e0
+ *           | (et)
+ *          e1
+ *           | (et)
+ *          s0
+ */
+TEST(epoll24)
+{
+	int efd[2];
+	int sfd[2];
+	struct pollfd pfd;
+	struct epoll_event e;
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0);
+
+	efd[0] = epoll_create(1);
+	ASSERT_GE(efd[0], 0);
+
+	efd[1] = epoll_create(1);
+	ASSERT_GE(efd[1], 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0);
+
+	ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+	pfd.fd = efd[0];
+	pfd.events = POLLIN;
+	EXPECT_EQ(poll(&pfd, 1, 0), 1);
+	EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1);
+
+	pfd.fd = efd[0];
+	pfd.events = POLLIN;
+	EXPECT_EQ(poll(&pfd, 1, 0), 0);
+	EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 0);
+
+	close(efd[0]);
+	close(efd[1]);
+	close(sfd[0]);
+	close(sfd[1]);
+}
+
+/*
+ *        t0    t1
+ *     (ew) \  / (ew)
+ *           e0
+ *            | (lt)
+ *           e1
+ *            | (lt)
+ *           s0
+ */
+TEST(epoll25)
+{
+	pthread_t emitter;
+	struct epoll_event e;
+	struct epoll_mtcontext ctx = { 0 };
+
+	signal(SIGUSR1, signal_handler);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+	ctx.efd[0] = epoll_create(1);
+	ASSERT_GE(ctx.efd[0], 0);
+
+	ctx.efd[1] = epoll_create(1);
+	ASSERT_GE(ctx.efd[1], 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+	ctx.main = pthread_self();
+	ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+	ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+	if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+		__sync_fetch_and_add(&ctx.count, 1);
+
+	ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+	EXPECT_EQ(ctx.count, 2);
+
+	if (pthread_tryjoin_np(emitter, NULL) < 0) {
+		pthread_kill(emitter, SIGUSR1);
+		pthread_join(emitter, NULL);
+	}
+
+	close(ctx.efd[0]);
+	close(ctx.efd[1]);
+	close(ctx.sfd[0]);
+	close(ctx.sfd[1]);
+}
+
+/*
+ *        t0    t1
+ *     (ew) \  / (ew)
+ *           e0
+ *            | (lt)
+ *           e1
+ *            | (et)
+ *           s0
+ */
+TEST(epoll26)
+{
+	pthread_t emitter;
+	struct epoll_event e;
+	struct epoll_mtcontext ctx = { 0 };
+
+	signal(SIGUSR1, signal_handler);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+	ctx.efd[0] = epoll_create(1);
+	ASSERT_GE(ctx.efd[0], 0);
+
+	ctx.efd[1] = epoll_create(1);
+	ASSERT_GE(ctx.efd[1], 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+	ctx.main = pthread_self();
+	ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+	ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+	if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+		__sync_fetch_and_add(&ctx.count, 1);
+
+	ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+	EXPECT_EQ(ctx.count, 2);
+
+	if (pthread_tryjoin_np(emitter, NULL) < 0) {
+		pthread_kill(emitter, SIGUSR1);
+		pthread_join(emitter, NULL);
+	}
+
+	close(ctx.efd[0]);
+	close(ctx.efd[1]);
+	close(ctx.sfd[0]);
+	close(ctx.sfd[1]);
+}
+
+/*
+ *        t0    t1
+ *     (ew) \  / (ew)
+ *           e0
+ *            | (et)
+ *           e1
+ *            | (lt)
+ *           s0
+ */
+TEST(epoll27)
+{
+	pthread_t emitter;
+	struct epoll_event e;
+	struct epoll_mtcontext ctx = { 0 };
+
+	signal(SIGUSR1, signal_handler);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+	ctx.efd[0] = epoll_create(1);
+	ASSERT_GE(ctx.efd[0], 0);
+
+	ctx.efd[1] = epoll_create(1);
+	ASSERT_GE(ctx.efd[1], 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+	ctx.main = pthread_self();
+	ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+	ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+	if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+		__sync_fetch_and_add(&ctx.count, 1);
+
+	ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+	EXPECT_EQ(ctx.count, 1);
+
+	if (pthread_tryjoin_np(emitter, NULL) < 0) {
+		pthread_kill(emitter, SIGUSR1);
+		pthread_join(emitter, NULL);
+	}
+
+	close(ctx.efd[0]);
+	close(ctx.efd[1]);
+	close(ctx.sfd[0]);
+	close(ctx.sfd[1]);
+}
+
+/*
+ *        t0    t1
+ *     (ew) \  / (ew)
+ *           e0
+ *            | (et)
+ *           e1
+ *            | (et)
+ *           s0
+ */
+TEST(epoll28)
+{
+	pthread_t emitter;
+	struct epoll_event e;
+	struct epoll_mtcontext ctx = { 0 };
+
+	signal(SIGUSR1, signal_handler);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+	ctx.efd[0] = epoll_create(1);
+	ASSERT_GE(ctx.efd[0], 0);
+
+	ctx.efd[1] = epoll_create(1);
+	ASSERT_GE(ctx.efd[1], 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+	ctx.main = pthread_self();
+	ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+	ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+	if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+		__sync_fetch_and_add(&ctx.count, 1);
+
+	ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+	EXPECT_EQ(ctx.count, 1);
+
+	if (pthread_tryjoin_np(emitter, NULL) < 0) {
+		pthread_kill(emitter, SIGUSR1);
+		pthread_join(emitter, NULL);
+	}
+
+	close(ctx.efd[0]);
+	close(ctx.efd[1]);
+	close(ctx.sfd[0]);
+	close(ctx.sfd[1]);
+}
+
+/*
+ *        t0    t1
+ *     (ew) \  / (p)
+ *           e0
+ *            | (lt)
+ *           e1
+ *            | (lt)
+ *           s0
+ */
+TEST(epoll29)
+{
+	pthread_t emitter;
+	struct epoll_event e;
+	struct epoll_mtcontext ctx = { 0 };
+
+	signal(SIGUSR1, signal_handler);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+	ctx.efd[0] = epoll_create(1);
+	ASSERT_GE(ctx.efd[0], 0);
+
+	ctx.efd[1] = epoll_create(1);
+	ASSERT_GE(ctx.efd[1], 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+	ctx.main = pthread_self();
+	ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+	ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+	if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+		__sync_fetch_and_add(&ctx.count, 1);
+
+	ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+	EXPECT_EQ(ctx.count, 2);
+
+	if (pthread_tryjoin_np(emitter, NULL) < 0) {
+		pthread_kill(emitter, SIGUSR1);
+		pthread_join(emitter, NULL);
+	}
+
+	close(ctx.efd[0]);
+	close(ctx.sfd[0]);
+	close(ctx.sfd[1]);
+}
+
+/*
+ *        t0    t1
+ *     (ew) \  / (p)
+ *           e0
+ *            | (lt)
+ *           e1
+ *            | (et)
+ *           s0
+ */
+TEST(epoll30)
+{
+	pthread_t emitter;
+	struct epoll_event e;
+	struct epoll_mtcontext ctx = { 0 };
+
+	signal(SIGUSR1, signal_handler);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+	ctx.efd[0] = epoll_create(1);
+	ASSERT_GE(ctx.efd[0], 0);
+
+	ctx.efd[1] = epoll_create(1);
+	ASSERT_GE(ctx.efd[1], 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+	ctx.main = pthread_self();
+	ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+	ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+	if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+		__sync_fetch_and_add(&ctx.count, 1);
+
+	ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+	EXPECT_EQ(ctx.count, 2);
+
+	if (pthread_tryjoin_np(emitter, NULL) < 0) {
+		pthread_kill(emitter, SIGUSR1);
+		pthread_join(emitter, NULL);
+	}
+
+	close(ctx.efd[0]);
+	close(ctx.sfd[0]);
+	close(ctx.sfd[1]);
+}
+
+/*
+ *        t0    t1
+ *     (ew) \  / (p)
+ *           e0
+ *            | (et)
+ *           e1
+ *            | (lt)
+ *           s0
+ */
+TEST(epoll31)
+{
+	pthread_t emitter;
+	struct epoll_event e;
+	struct epoll_mtcontext ctx = { 0 };
+
+	signal(SIGUSR1, signal_handler);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+	ctx.efd[0] = epoll_create(1);
+	ASSERT_GE(ctx.efd[0], 0);
+
+	ctx.efd[1] = epoll_create(1);
+	ASSERT_GE(ctx.efd[1], 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+	ctx.main = pthread_self();
+	ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+	ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+	if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+		__sync_fetch_and_add(&ctx.count, 1);
+
+	ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+	EXPECT_EQ(ctx.count, 1);
+
+	if (pthread_tryjoin_np(emitter, NULL) < 0) {
+		pthread_kill(emitter, SIGUSR1);
+		pthread_join(emitter, NULL);
+	}
+
+	close(ctx.efd[0]);
+	close(ctx.sfd[0]);
+	close(ctx.sfd[1]);
+}
+
+/*
+ *        t0    t1
+ *     (ew) \  / (p)
+ *           e0
+ *            | (et)
+ *           e1
+ *            | (et)
+ *           s0
+ */
+TEST(epoll32)
+{
+	pthread_t emitter;
+	struct epoll_event e;
+	struct epoll_mtcontext ctx = { 0 };
+
+	signal(SIGUSR1, signal_handler);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+	ctx.efd[0] = epoll_create(1);
+	ASSERT_GE(ctx.efd[0], 0);
+
+	ctx.efd[1] = epoll_create(1);
+	ASSERT_GE(ctx.efd[1], 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+	ctx.main = pthread_self();
+	ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+	ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+	if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+		__sync_fetch_and_add(&ctx.count, 1);
+
+	ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+	EXPECT_EQ(ctx.count, 1);
+
+	if (pthread_tryjoin_np(emitter, NULL) < 0) {
+		pthread_kill(emitter, SIGUSR1);
+		pthread_join(emitter, NULL);
+	}
+
+	close(ctx.efd[0]);
+	close(ctx.sfd[0]);
+	close(ctx.sfd[1]);
+}
+
+/*
+ *        t0   t1
+ *    (ew) |    | (ew)
+ *         |   e0
+ *          \  / (lt)
+ *           e1
+ *            | (lt)
+ *           s0
+ */
+TEST(epoll33)
+{
+	pthread_t emitter;
+	struct epoll_event e;
+	struct epoll_mtcontext ctx = { 0 };
+
+	signal(SIGUSR1, signal_handler);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+	ctx.efd[0] = epoll_create(1);
+	ASSERT_GE(ctx.efd[0], 0);
+
+	ctx.efd[1] = epoll_create(1);
+	ASSERT_GE(ctx.efd[1], 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+	ctx.main = pthread_self();
+	ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+	ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+	if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0)
+		__sync_fetch_and_add(&ctx.count, 1);
+
+	ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+	EXPECT_EQ(ctx.count, 2);
+
+	if (pthread_tryjoin_np(emitter, NULL) < 0) {
+		pthread_kill(emitter, SIGUSR1);
+		pthread_join(emitter, NULL);
+	}
+
+	close(ctx.efd[0]);
+	close(ctx.efd[1]);
+	close(ctx.sfd[0]);
+	close(ctx.sfd[1]);
+}
+
+/*
+ *        t0   t1
+ *    (ew) |    | (ew)
+ *         |   e0
+ *          \  / (lt)
+ *           e1
+ *            | (et)
+ *           s0
+ */
+TEST(epoll34)
+{
+	pthread_t emitter;
+	struct epoll_event e;
+	struct epoll_mtcontext ctx = { 0 };
+
+	signal(SIGUSR1, signal_handler);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+	ctx.efd[0] = epoll_create(1);
+	ASSERT_GE(ctx.efd[0], 0);
+
+	ctx.efd[1] = epoll_create(1);
+	ASSERT_GE(ctx.efd[1], 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+	ctx.main = pthread_self();
+	ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1o, &ctx), 0);
+	ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+	if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0)
+		__sync_fetch_and_or(&ctx.count, 2);
+
+	ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+	EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3));
+
+	if (pthread_tryjoin_np(emitter, NULL) < 0) {
+		pthread_kill(emitter, SIGUSR1);
+		pthread_join(emitter, NULL);
+	}
+
+	close(ctx.efd[0]);
+	close(ctx.efd[1]);
+	close(ctx.sfd[0]);
+	close(ctx.sfd[1]);
+}
+
+/*
+ *        t0   t1
+ *    (ew) |    | (ew)
+ *         |   e0
+ *          \  / (et)
+ *           e1
+ *            | (lt)
+ *           s0
+ */
+TEST(epoll35)
+{
+	pthread_t emitter;
+	struct epoll_event e;
+	struct epoll_mtcontext ctx = { 0 };
+
+	signal(SIGUSR1, signal_handler);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+	ctx.efd[0] = epoll_create(1);
+	ASSERT_GE(ctx.efd[0], 0);
+
+	ctx.efd[1] = epoll_create(1);
+	ASSERT_GE(ctx.efd[1], 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+	ctx.main = pthread_self();
+	ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+	ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+	if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0)
+		__sync_fetch_and_add(&ctx.count, 1);
+
+	ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+	EXPECT_EQ(ctx.count, 2);
+
+	if (pthread_tryjoin_np(emitter, NULL) < 0) {
+		pthread_kill(emitter, SIGUSR1);
+		pthread_join(emitter, NULL);
+	}
+
+	close(ctx.efd[0]);
+	close(ctx.efd[1]);
+	close(ctx.sfd[0]);
+	close(ctx.sfd[1]);
+}
+
+/*
+ *        t0   t1
+ *    (ew) |    | (ew)
+ *         |   e0
+ *          \  / (et)
+ *           e1
+ *            | (et)
+ *           s0
+ */
+TEST(epoll36)
+{
+	pthread_t emitter;
+	struct epoll_event e;
+	struct epoll_mtcontext ctx = { 0 };
+
+	signal(SIGUSR1, signal_handler);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+	ctx.efd[0] = epoll_create(1);
+	ASSERT_GE(ctx.efd[0], 0);
+
+	ctx.efd[1] = epoll_create(1);
+	ASSERT_GE(ctx.efd[1], 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+	ctx.main = pthread_self();
+	ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1o, &ctx), 0);
+	ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+	if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0)
+		__sync_fetch_and_or(&ctx.count, 2);
+
+	ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+	EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3));
+
+	if (pthread_tryjoin_np(emitter, NULL) < 0) {
+		pthread_kill(emitter, SIGUSR1);
+		pthread_join(emitter, NULL);
+	}
+
+	close(ctx.efd[0]);
+	close(ctx.efd[1]);
+	close(ctx.sfd[0]);
+	close(ctx.sfd[1]);
+}
+
+/*
+ *        t0   t1
+ *     (p) |    | (ew)
+ *         |   e0
+ *          \  / (lt)
+ *           e1
+ *            | (lt)
+ *           s0
+ */
+TEST(epoll37)
+{
+	pthread_t emitter;
+	struct pollfd pfd;
+	struct epoll_event e;
+	struct epoll_mtcontext ctx = { 0 };
+
+	signal(SIGUSR1, signal_handler);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+	ctx.efd[0] = epoll_create(1);
+	ASSERT_GE(ctx.efd[0], 0);
+
+	ctx.efd[1] = epoll_create(1);
+	ASSERT_GE(ctx.efd[1], 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+	ctx.main = pthread_self();
+	ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+	ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+	pfd.fd = ctx.efd[1];
+	pfd.events = POLLIN;
+	if (poll(&pfd, 1, -1) > 0) {
+		if (epoll_wait(ctx.efd[1], &e, 1, 0) > 0)
+			__sync_fetch_and_add(&ctx.count, 1);
+	}
+
+	ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+	EXPECT_EQ(ctx.count, 2);
+
+	if (pthread_tryjoin_np(emitter, NULL) < 0) {
+		pthread_kill(emitter, SIGUSR1);
+		pthread_join(emitter, NULL);
+	}
+
+	close(ctx.efd[0]);
+	close(ctx.efd[1]);
+	close(ctx.sfd[0]);
+	close(ctx.sfd[1]);
+}
+
+/*
+ *        t0   t1
+ *     (p) |    | (ew)
+ *         |   e0
+ *          \  / (lt)
+ *           e1
+ *            | (et)
+ *           s0
+ */
+TEST(epoll38)
+{
+	pthread_t emitter;
+	struct pollfd pfd;
+	struct epoll_event e;
+	struct epoll_mtcontext ctx = { 0 };
+
+	signal(SIGUSR1, signal_handler);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+	ctx.efd[0] = epoll_create(1);
+	ASSERT_GE(ctx.efd[0], 0);
+
+	ctx.efd[1] = epoll_create(1);
+	ASSERT_GE(ctx.efd[1], 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+	ctx.main = pthread_self();
+	ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1o, &ctx), 0);
+	ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+	pfd.fd = ctx.efd[1];
+	pfd.events = POLLIN;
+	if (poll(&pfd, 1, -1) > 0) {
+		if (epoll_wait(ctx.efd[1], &e, 1, 0) > 0)
+			__sync_fetch_and_or(&ctx.count, 2);
+	}
+
+	ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+	EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3));
+
+	if (pthread_tryjoin_np(emitter, NULL) < 0) {
+		pthread_kill(emitter, SIGUSR1);
+		pthread_join(emitter, NULL);
+	}
+
+	close(ctx.efd[0]);
+	close(ctx.efd[1]);
+	close(ctx.sfd[0]);
+	close(ctx.sfd[1]);
+}
+
+/*
+ *        t0   t1
+ *     (p) |    | (ew)
+ *         |   e0
+ *          \  / (et)
+ *           e1
+ *            | (lt)
+ *           s0
+ */
+TEST(epoll39)
+{
+	pthread_t emitter;
+	struct pollfd pfd;
+	struct epoll_event e;
+	struct epoll_mtcontext ctx = { 0 };
+
+	signal(SIGUSR1, signal_handler);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+	ctx.efd[0] = epoll_create(1);
+	ASSERT_GE(ctx.efd[0], 0);
+
+	ctx.efd[1] = epoll_create(1);
+	ASSERT_GE(ctx.efd[1], 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+	ctx.main = pthread_self();
+	ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+	ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+	pfd.fd = ctx.efd[1];
+	pfd.events = POLLIN;
+	if (poll(&pfd, 1, -1) > 0) {
+		if (epoll_wait(ctx.efd[1], &e, 1, 0) > 0)
+			__sync_fetch_and_add(&ctx.count, 1);
+	}
+
+	ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+	EXPECT_EQ(ctx.count, 2);
+
+	if (pthread_tryjoin_np(emitter, NULL) < 0) {
+		pthread_kill(emitter, SIGUSR1);
+		pthread_join(emitter, NULL);
+	}
+
+	close(ctx.efd[0]);
+	close(ctx.efd[1]);
+	close(ctx.sfd[0]);
+	close(ctx.sfd[1]);
+}
+
+/*
+ *        t0   t1
+ *     (p) |    | (ew)
+ *         |   e0
+ *          \  / (et)
+ *           e1
+ *            | (et)
+ *           s0
+ */
+TEST(epoll40)
+{
+	pthread_t emitter;
+	struct pollfd pfd;
+	struct epoll_event e;
+	struct epoll_mtcontext ctx = { 0 };
+
+	signal(SIGUSR1, signal_handler);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+	ctx.efd[0] = epoll_create(1);
+	ASSERT_GE(ctx.efd[0], 0);
+
+	ctx.efd[1] = epoll_create(1);
+	ASSERT_GE(ctx.efd[1], 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+	ctx.main = pthread_self();
+	ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1o, &ctx), 0);
+	ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+	pfd.fd = ctx.efd[1];
+	pfd.events = POLLIN;
+	if (poll(&pfd, 1, -1) > 0) {
+		if (epoll_wait(ctx.efd[1], &e, 1, 0) > 0)
+			__sync_fetch_and_or(&ctx.count, 2);
+	}
+
+	ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+	EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3));
+
+	if (pthread_tryjoin_np(emitter, NULL) < 0) {
+		pthread_kill(emitter, SIGUSR1);
+		pthread_join(emitter, NULL);
+	}
+
+	close(ctx.efd[0]);
+	close(ctx.efd[1]);
+	close(ctx.sfd[0]);
+	close(ctx.sfd[1]);
+}
+
+/*
+ *        t0   t1
+ *    (ew) |    | (p)
+ *         |   e0
+ *          \  / (lt)
+ *           e1
+ *            | (lt)
+ *           s0
+ */
+TEST(epoll41)
+{
+	pthread_t emitter;
+	struct epoll_event e;
+	struct epoll_mtcontext ctx = { 0 };
+
+	signal(SIGUSR1, signal_handler);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+	ctx.efd[0] = epoll_create(1);
+	ASSERT_GE(ctx.efd[0], 0);
+
+	ctx.efd[1] = epoll_create(1);
+	ASSERT_GE(ctx.efd[1], 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+	ctx.main = pthread_self();
+	ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+	ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+	if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0)
+		__sync_fetch_and_add(&ctx.count, 1);
+
+	ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+	EXPECT_EQ(ctx.count, 2);
+
+	if (pthread_tryjoin_np(emitter, NULL) < 0) {
+		pthread_kill(emitter, SIGUSR1);
+		pthread_join(emitter, NULL);
+	}
+
+	close(ctx.efd[0]);
+	close(ctx.efd[1]);
+	close(ctx.sfd[0]);
+	close(ctx.sfd[1]);
+}
+
+/*
+ *        t0   t1
+ *    (ew) |    | (p)
+ *         |   e0
+ *          \  / (lt)
+ *           e1
+ *            | (et)
+ *           s0
+ */
+TEST(epoll42)
+{
+	pthread_t emitter;
+	struct epoll_event e;
+	struct epoll_mtcontext ctx = { 0 };
+
+	signal(SIGUSR1, signal_handler);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+	ctx.efd[0] = epoll_create(1);
+	ASSERT_GE(ctx.efd[0], 0);
+
+	ctx.efd[1] = epoll_create(1);
+	ASSERT_GE(ctx.efd[1], 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+	ctx.main = pthread_self();
+	ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1op, &ctx), 0);
+	ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+	if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0)
+		__sync_fetch_and_or(&ctx.count, 2);
+
+	ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+	EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3));
+
+	if (pthread_tryjoin_np(emitter, NULL) < 0) {
+		pthread_kill(emitter, SIGUSR1);
+		pthread_join(emitter, NULL);
+	}
+
+	close(ctx.efd[0]);
+	close(ctx.efd[1]);
+	close(ctx.sfd[0]);
+	close(ctx.sfd[1]);
+}
+
+/*
+ *        t0   t1
+ *    (ew) |    | (p)
+ *         |   e0
+ *          \  / (et)
+ *           e1
+ *            | (lt)
+ *           s0
+ */
+TEST(epoll43)
+{
+	pthread_t emitter;
+	struct epoll_event e;
+	struct epoll_mtcontext ctx = { 0 };
+
+	signal(SIGUSR1, signal_handler);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+	ctx.efd[0] = epoll_create(1);
+	ASSERT_GE(ctx.efd[0], 0);
+
+	ctx.efd[1] = epoll_create(1);
+	ASSERT_GE(ctx.efd[1], 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+	ctx.main = pthread_self();
+	ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+	ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+	if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0)
+		__sync_fetch_and_add(&ctx.count, 1);
+
+	ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+	EXPECT_EQ(ctx.count, 2);
+
+	if (pthread_tryjoin_np(emitter, NULL) < 0) {
+		pthread_kill(emitter, SIGUSR1);
+		pthread_join(emitter, NULL);
+	}
+
+	close(ctx.efd[0]);
+	close(ctx.efd[1]);
+	close(ctx.sfd[0]);
+	close(ctx.sfd[1]);
+}
+
+/*
+ *        t0   t1
+ *    (ew) |    | (p)
+ *         |   e0
+ *          \  / (et)
+ *           e1
+ *            | (et)
+ *           s0
+ */
+TEST(epoll44)
+{
+	pthread_t emitter;
+	struct epoll_event e;
+	struct epoll_mtcontext ctx = { 0 };
+
+	signal(SIGUSR1, signal_handler);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+	ctx.efd[0] = epoll_create(1);
+	ASSERT_GE(ctx.efd[0], 0);
+
+	ctx.efd[1] = epoll_create(1);
+	ASSERT_GE(ctx.efd[1], 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+	ctx.main = pthread_self();
+	ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1op, &ctx), 0);
+	ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+	if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0)
+		__sync_fetch_and_or(&ctx.count, 2);
+
+	ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+	EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3));
+
+	if (pthread_tryjoin_np(emitter, NULL) < 0) {
+		pthread_kill(emitter, SIGUSR1);
+		pthread_join(emitter, NULL);
+	}
+
+	close(ctx.efd[0]);
+	close(ctx.efd[1]);
+	close(ctx.sfd[0]);
+	close(ctx.sfd[1]);
+}
+
+/*
+ *        t0   t1
+ *     (p) |    | (p)
+ *         |   e0
+ *          \  / (lt)
+ *           e1
+ *            | (lt)
+ *           s0
+ */
+TEST(epoll45)
+{
+	pthread_t emitter;
+	struct pollfd pfd;
+	struct epoll_event e;
+	struct epoll_mtcontext ctx = { 0 };
+
+	signal(SIGUSR1, signal_handler);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+	ctx.efd[0] = epoll_create(1);
+	ASSERT_GE(ctx.efd[0], 0);
+
+	ctx.efd[1] = epoll_create(1);
+	ASSERT_GE(ctx.efd[1], 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+	ctx.main = pthread_self();
+	ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+	ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+	pfd.fd = ctx.efd[1];
+	pfd.events = POLLIN;
+	if (poll(&pfd, 1, -1) > 0) {
+		if (epoll_wait(ctx.efd[1], &e, 1, 0) > 0)
+			__sync_fetch_and_add(&ctx.count, 1);
+	}
+
+	ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+	EXPECT_EQ(ctx.count, 2);
+
+	if (pthread_tryjoin_np(emitter, NULL) < 0) {
+		pthread_kill(emitter, SIGUSR1);
+		pthread_join(emitter, NULL);
+	}
+
+	close(ctx.efd[0]);
+	close(ctx.efd[1]);
+	close(ctx.sfd[0]);
+	close(ctx.sfd[1]);
+}
+
+/*
+ *        t0   t1
+ *     (p) |    | (p)
+ *         |   e0
+ *          \  / (lt)
+ *           e1
+ *            | (et)
+ *           s0
+ */
+TEST(epoll46)
+{
+	pthread_t emitter;
+	struct epoll_event e;
+	struct epoll_mtcontext ctx = { 0 };
+
+	signal(SIGUSR1, signal_handler);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+	ctx.efd[0] = epoll_create(1);
+	ASSERT_GE(ctx.efd[0], 0);
+
+	ctx.efd[1] = epoll_create(1);
+	ASSERT_GE(ctx.efd[1], 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+	ctx.main = pthread_self();
+	ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1op, &ctx), 0);
+	ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+	if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0)
+		__sync_fetch_and_or(&ctx.count, 2);
+
+	ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+	EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3));
+
+	if (pthread_tryjoin_np(emitter, NULL) < 0) {
+		pthread_kill(emitter, SIGUSR1);
+		pthread_join(emitter, NULL);
+	}
+
+	close(ctx.efd[0]);
+	close(ctx.efd[1]);
+	close(ctx.sfd[0]);
+	close(ctx.sfd[1]);
+}
+
+/*
+ *        t0   t1
+ *     (p) |    | (p)
+ *         |   e0
+ *          \  / (et)
+ *           e1
+ *            | (lt)
+ *           s0
+ */
+TEST(epoll47)
+{
+	pthread_t emitter;
+	struct pollfd pfd;
+	struct epoll_event e;
+	struct epoll_mtcontext ctx = { 0 };
+
+	signal(SIGUSR1, signal_handler);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+	ctx.efd[0] = epoll_create(1);
+	ASSERT_GE(ctx.efd[0], 0);
+
+	ctx.efd[1] = epoll_create(1);
+	ASSERT_GE(ctx.efd[1], 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+	ctx.main = pthread_self();
+	ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+	ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+	pfd.fd = ctx.efd[1];
+	pfd.events = POLLIN;
+	if (poll(&pfd, 1, -1) > 0) {
+		if (epoll_wait(ctx.efd[1], &e, 1, 0) > 0)
+			__sync_fetch_and_add(&ctx.count, 1);
+	}
+
+	ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+	EXPECT_EQ(ctx.count, 2);
+
+	if (pthread_tryjoin_np(emitter, NULL) < 0) {
+		pthread_kill(emitter, SIGUSR1);
+		pthread_join(emitter, NULL);
+	}
+
+	close(ctx.efd[0]);
+	close(ctx.efd[1]);
+	close(ctx.sfd[0]);
+	close(ctx.sfd[1]);
+}
+
+/*
+ *        t0   t1
+ *     (p) |    | (p)
+ *         |   e0
+ *          \  / (et)
+ *           e1
+ *            | (et)
+ *           s0
+ */
+TEST(epoll48)
+{
+	pthread_t emitter;
+	struct epoll_event e;
+	struct epoll_mtcontext ctx = { 0 };
+
+	signal(SIGUSR1, signal_handler);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+	ctx.efd[0] = epoll_create(1);
+	ASSERT_GE(ctx.efd[0], 0);
+
+	ctx.efd[1] = epoll_create(1);
+	ASSERT_GE(ctx.efd[1], 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+	ctx.main = pthread_self();
+	ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1op, &ctx), 0);
+	ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0);
+
+	if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0)
+		__sync_fetch_and_or(&ctx.count, 2);
+
+	ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+	EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3));
+
+	if (pthread_tryjoin_np(emitter, NULL) < 0) {
+		pthread_kill(emitter, SIGUSR1);
+		pthread_join(emitter, NULL);
+	}
+
+	close(ctx.efd[0]);
+	close(ctx.efd[1]);
+	close(ctx.sfd[0]);
+	close(ctx.sfd[1]);
+}
+
+/*
+ *           t0
+ *            | (ew)
+ *           e0
+ *     (lt) /  \ (lt)
+ *        e1    e2
+ *    (lt) |     | (lt)
+ *        s0    s2
+ */
+TEST(epoll49)
+{
+	int efd[3];
+	int sfd[4];
+	struct epoll_event events[2];
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0);
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0);
+
+	efd[0] = epoll_create(1);
+	ASSERT_GE(efd[0], 0);
+
+	efd[1] = epoll_create(1);
+	ASSERT_GE(efd[1], 0);
+
+	efd[2] = epoll_create(1);
+	ASSERT_GE(efd[2], 0);
+
+	events[0].events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], events), 0);
+
+	events[0].events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(efd[2], EPOLL_CTL_ADD, sfd[2], events), 0);
+
+	events[0].events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], events), 0);
+
+	events[0].events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[2], events), 0);
+
+	ASSERT_EQ(write(sfd[1], "w", 1), 1);
+	ASSERT_EQ(write(sfd[3], "w", 1), 1);
+
+	EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 2);
+	EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 2);
+
+	close(efd[0]);
+	close(efd[1]);
+	close(efd[2]);
+	close(sfd[0]);
+	close(sfd[1]);
+	close(sfd[2]);
+	close(sfd[3]);
+}
+
+/*
+ *           t0
+ *            | (ew)
+ *           e0
+ *     (et) /  \ (et)
+ *        e1    e2
+ *    (lt) |     | (lt)
+ *        s0    s2
+ */
+TEST(epoll50)
+{
+	int efd[3];
+	int sfd[4];
+	struct epoll_event events[2];
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0);
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0);
+
+	efd[0] = epoll_create(1);
+	ASSERT_GE(efd[0], 0);
+
+	efd[1] = epoll_create(1);
+	ASSERT_GE(efd[1], 0);
+
+	efd[2] = epoll_create(1);
+	ASSERT_GE(efd[2], 0);
+
+	events[0].events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], events), 0);
+
+	events[0].events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(efd[2], EPOLL_CTL_ADD, sfd[2], events), 0);
+
+	events[0].events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], events), 0);
+
+	events[0].events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[2], events), 0);
+
+	ASSERT_EQ(write(sfd[1], "w", 1), 1);
+	ASSERT_EQ(write(sfd[3], "w", 1), 1);
+
+	EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 2);
+	EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 0);
+
+	close(efd[0]);
+	close(efd[1]);
+	close(efd[2]);
+	close(sfd[0]);
+	close(sfd[1]);
+	close(sfd[2]);
+	close(sfd[3]);
+}
+
+/*
+ *           t0
+ *            | (p)
+ *           e0
+ *     (lt) /  \ (lt)
+ *        e1    e2
+ *    (lt) |     | (lt)
+ *        s0    s2
+ */
+TEST(epoll51)
+{
+	int efd[3];
+	int sfd[4];
+	struct pollfd pfd;
+	struct epoll_event events[2];
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0);
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0);
+
+	efd[0] = epoll_create(1);
+	ASSERT_GE(efd[0], 0);
+
+	efd[1] = epoll_create(1);
+	ASSERT_GE(efd[1], 0);
+
+	efd[2] = epoll_create(1);
+	ASSERT_GE(efd[2], 0);
+
+	events[0].events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], events), 0);
+
+	events[0].events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(efd[2], EPOLL_CTL_ADD, sfd[2], events), 0);
+
+	events[0].events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], events), 0);
+
+	events[0].events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[2], events), 0);
+
+	ASSERT_EQ(write(sfd[1], "w", 1), 1);
+	ASSERT_EQ(write(sfd[3], "w", 1), 1);
+
+	pfd.fd = efd[0];
+	pfd.events = POLLIN;
+	EXPECT_EQ(poll(&pfd, 1, 0), 1);
+	EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 2);
+
+	pfd.fd = efd[0];
+	pfd.events = POLLIN;
+	EXPECT_EQ(poll(&pfd, 1, 0), 1);
+	EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 2);
+
+	close(efd[0]);
+	close(efd[1]);
+	close(efd[2]);
+	close(sfd[0]);
+	close(sfd[1]);
+	close(sfd[2]);
+	close(sfd[3]);
+}
+
+/*
+ *           t0
+ *            | (p)
+ *           e0
+ *     (et) /  \ (et)
+ *        e1    e2
+ *    (lt) |     | (lt)
+ *        s0    s2
+ */
+TEST(epoll52)
+{
+	int efd[3];
+	int sfd[4];
+	struct pollfd pfd;
+	struct epoll_event events[2];
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0);
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0);
+
+	efd[0] = epoll_create(1);
+	ASSERT_GE(efd[0], 0);
+
+	efd[1] = epoll_create(1);
+	ASSERT_GE(efd[1], 0);
+
+	efd[2] = epoll_create(1);
+	ASSERT_GE(efd[2], 0);
+
+	events[0].events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], events), 0);
+
+	events[0].events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(efd[2], EPOLL_CTL_ADD, sfd[2], events), 0);
+
+	events[0].events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], events), 0);
+
+	events[0].events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[2], events), 0);
+
+	ASSERT_EQ(write(sfd[1], "w", 1), 1);
+	ASSERT_EQ(write(sfd[3], "w", 1), 1);
+
+	pfd.fd = efd[0];
+	pfd.events = POLLIN;
+	EXPECT_EQ(poll(&pfd, 1, 0), 1);
+	EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 2);
+
+	pfd.fd = efd[0];
+	pfd.events = POLLIN;
+	EXPECT_EQ(poll(&pfd, 1, 0), 0);
+	EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 0);
+
+	close(efd[0]);
+	close(efd[1]);
+	close(efd[2]);
+	close(sfd[0]);
+	close(sfd[1]);
+	close(sfd[2]);
+	close(sfd[3]);
+}
+
+/*
+ *        t0    t1
+ *     (ew) \  / (ew)
+ *           e0
+ *     (lt) /  \ (lt)
+ *        e1    e2
+ *    (lt) |     | (lt)
+ *        s0    s2
+ */
+TEST(epoll53)
+{
+	pthread_t emitter;
+	struct epoll_event e;
+	struct epoll_mtcontext ctx = { 0 };
+
+	signal(SIGUSR1, signal_handler);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0);
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0);
+
+	ctx.efd[0] = epoll_create(1);
+	ASSERT_GE(ctx.efd[0], 0);
+
+	ctx.efd[1] = epoll_create(1);
+	ASSERT_GE(ctx.efd[1], 0);
+
+	ctx.efd[2] = epoll_create(1);
+	ASSERT_GE(ctx.efd[2], 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[2], EPOLL_CTL_ADD, ctx.sfd[2], &e), 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[2], &e), 0);
+
+	ctx.main = pthread_self();
+	ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+	ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0);
+
+	if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+		__sync_fetch_and_add(&ctx.count, 1);
+
+	ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+	EXPECT_EQ(ctx.count, 2);
+
+	if (pthread_tryjoin_np(emitter, NULL) < 0) {
+		pthread_kill(emitter, SIGUSR1);
+		pthread_join(emitter, NULL);
+	}
+
+	close(ctx.efd[0]);
+	close(ctx.efd[1]);
+	close(ctx.efd[2]);
+	close(ctx.sfd[0]);
+	close(ctx.sfd[1]);
+	close(ctx.sfd[2]);
+	close(ctx.sfd[3]);
+}
+
+/*
+ *        t0    t1
+ *     (ew) \  / (ew)
+ *           e0
+ *     (et) /  \ (et)
+ *        e1    e2
+ *    (lt) |     | (lt)
+ *        s0    s2
+ */
+TEST(epoll54)
+{
+	pthread_t emitter;
+	struct epoll_event e;
+	struct epoll_mtcontext ctx = { 0 };
+
+	signal(SIGUSR1, signal_handler);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0);
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0);
+
+	ctx.efd[0] = epoll_create(1);
+	ASSERT_GE(ctx.efd[0], 0);
+
+	ctx.efd[1] = epoll_create(1);
+	ASSERT_GE(ctx.efd[1], 0);
+
+	ctx.efd[2] = epoll_create(1);
+	ASSERT_GE(ctx.efd[2], 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[2], EPOLL_CTL_ADD, ctx.sfd[2], &e), 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[2], &e), 0);
+
+	ctx.main = pthread_self();
+	ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0);
+	ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0);
+
+	if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+		__sync_fetch_and_add(&ctx.count, 1);
+
+	ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+	EXPECT_EQ(ctx.count, 2);
+
+	if (pthread_tryjoin_np(emitter, NULL) < 0) {
+		pthread_kill(emitter, SIGUSR1);
+		pthread_join(emitter, NULL);
+	}
+
+	close(ctx.efd[0]);
+	close(ctx.efd[1]);
+	close(ctx.efd[2]);
+	close(ctx.sfd[0]);
+	close(ctx.sfd[1]);
+	close(ctx.sfd[2]);
+	close(ctx.sfd[3]);
+}
+
+/*
+ *        t0    t1
+ *     (ew) \  / (p)
+ *           e0
+ *     (lt) /  \ (lt)
+ *        e1    e2
+ *    (lt) |     | (lt)
+ *        s0    s2
+ */
+TEST(epoll55)
+{
+	pthread_t emitter;
+	struct epoll_event e;
+	struct epoll_mtcontext ctx = { 0 };
+
+	signal(SIGUSR1, signal_handler);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0);
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0);
+
+	ctx.efd[0] = epoll_create(1);
+	ASSERT_GE(ctx.efd[0], 0);
+
+	ctx.efd[1] = epoll_create(1);
+	ASSERT_GE(ctx.efd[1], 0);
+
+	ctx.efd[2] = epoll_create(1);
+	ASSERT_GE(ctx.efd[2], 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[2], EPOLL_CTL_ADD, ctx.sfd[2], &e), 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[2], &e), 0);
+
+	ctx.main = pthread_self();
+	ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+	ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0);
+
+	if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+		__sync_fetch_and_add(&ctx.count, 1);
+
+	ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+	EXPECT_EQ(ctx.count, 2);
+
+	if (pthread_tryjoin_np(emitter, NULL) < 0) {
+		pthread_kill(emitter, SIGUSR1);
+		pthread_join(emitter, NULL);
+	}
+
+	close(ctx.efd[0]);
+	close(ctx.efd[1]);
+	close(ctx.efd[2]);
+	close(ctx.sfd[0]);
+	close(ctx.sfd[1]);
+	close(ctx.sfd[2]);
+	close(ctx.sfd[3]);
+}
+
+/*
+ *        t0    t1
+ *     (ew) \  / (p)
+ *           e0
+ *     (et) /  \ (et)
+ *        e1    e2
+ *    (lt) |     | (lt)
+ *        s0    s2
+ */
+TEST(epoll56)
+{
+	pthread_t emitter;
+	struct epoll_event e;
+	struct epoll_mtcontext ctx = { 0 };
+
+	signal(SIGUSR1, signal_handler);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0);
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0);
+
+	ctx.efd[0] = epoll_create(1);
+	ASSERT_GE(ctx.efd[0], 0);
+
+	ctx.efd[1] = epoll_create(1);
+	ASSERT_GE(ctx.efd[1], 0);
+
+	ctx.efd[2] = epoll_create(1);
+	ASSERT_GE(ctx.efd[2], 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[2], EPOLL_CTL_ADD, ctx.sfd[2], &e), 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[2], &e), 0);
+
+	ctx.main = pthread_self();
+	ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+	ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0);
+
+	if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0)
+		__sync_fetch_and_add(&ctx.count, 1);
+
+	ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+	EXPECT_EQ(ctx.count, 2);
+
+	if (pthread_tryjoin_np(emitter, NULL) < 0) {
+		pthread_kill(emitter, SIGUSR1);
+		pthread_join(emitter, NULL);
+	}
+
+	close(ctx.efd[0]);
+	close(ctx.efd[1]);
+	close(ctx.efd[2]);
+	close(ctx.sfd[0]);
+	close(ctx.sfd[1]);
+	close(ctx.sfd[2]);
+	close(ctx.sfd[3]);
+}
+
+/*
+ *        t0    t1
+ *      (p) \  / (p)
+ *           e0
+ *     (lt) /  \ (lt)
+ *        e1    e2
+ *    (lt) |     | (lt)
+ *        s0    s2
+ */
+TEST(epoll57)
+{
+	pthread_t emitter;
+	struct pollfd pfd;
+	struct epoll_event e;
+	struct epoll_mtcontext ctx = { 0 };
+
+	signal(SIGUSR1, signal_handler);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0);
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0);
+
+	ctx.efd[0] = epoll_create(1);
+	ASSERT_GE(ctx.efd[0], 0);
+
+	ctx.efd[1] = epoll_create(1);
+	ASSERT_GE(ctx.efd[1], 0);
+
+	ctx.efd[2] = epoll_create(1);
+	ASSERT_GE(ctx.efd[2], 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[2], EPOLL_CTL_ADD, ctx.sfd[2], &e), 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[2], &e), 0);
+
+	ctx.main = pthread_self();
+	ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+	ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0);
+
+	pfd.fd = ctx.efd[0];
+	pfd.events = POLLIN;
+	if (poll(&pfd, 1, -1) > 0) {
+		if (epoll_wait(ctx.efd[0], &e, 1, 0) > 0)
+			__sync_fetch_and_add(&ctx.count, 1);
+	}
+
+	ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+	EXPECT_EQ(ctx.count, 2);
+
+	if (pthread_tryjoin_np(emitter, NULL) < 0) {
+		pthread_kill(emitter, SIGUSR1);
+		pthread_join(emitter, NULL);
+	}
+
+	close(ctx.efd[0]);
+	close(ctx.efd[1]);
+	close(ctx.efd[2]);
+	close(ctx.sfd[0]);
+	close(ctx.sfd[1]);
+	close(ctx.sfd[2]);
+	close(ctx.sfd[3]);
+}
+
+/*
+ *        t0    t1
+ *      (p) \  / (p)
+ *           e0
+ *     (et) /  \ (et)
+ *        e1    e2
+ *    (lt) |     | (lt)
+ *        s0    s2
+ */
+TEST(epoll58)
+{
+	pthread_t emitter;
+	struct pollfd pfd;
+	struct epoll_event e;
+	struct epoll_mtcontext ctx = { 0 };
+
+	signal(SIGUSR1, signal_handler);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0);
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0);
+
+	ctx.efd[0] = epoll_create(1);
+	ASSERT_GE(ctx.efd[0], 0);
+
+	ctx.efd[1] = epoll_create(1);
+	ASSERT_GE(ctx.efd[1], 0);
+
+	ctx.efd[2] = epoll_create(1);
+	ASSERT_GE(ctx.efd[2], 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(ctx.efd[2], EPOLL_CTL_ADD, ctx.sfd[2], &e), 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0);
+
+	e.events = EPOLLIN | EPOLLET;
+	ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[2], &e), 0);
+
+	ctx.main = pthread_self();
+	ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0);
+	ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0);
+
+	pfd.fd = ctx.efd[0];
+	pfd.events = POLLIN;
+	if (poll(&pfd, 1, -1) > 0) {
+		if (epoll_wait(ctx.efd[0], &e, 1, 0) > 0)
+			__sync_fetch_and_add(&ctx.count, 1);
+	}
+
+	ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0);
+	EXPECT_EQ(ctx.count, 2);
+
+	if (pthread_tryjoin_np(emitter, NULL) < 0) {
+		pthread_kill(emitter, SIGUSR1);
+		pthread_join(emitter, NULL);
+	}
+
+	close(ctx.efd[0]);
+	close(ctx.efd[1]);
+	close(ctx.efd[2]);
+	close(ctx.sfd[0]);
+	close(ctx.sfd[1]);
+	close(ctx.sfd[2]);
+	close(ctx.sfd[3]);
+}
+
+TEST_HARNESS_MAIN
-- 
cgit v1.2.3-59-g8ed1b


From 81696d5d544e665674b4f7a09e1304986fbbcbc6 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 4 Dec 2019 16:52:22 -0800
Subject: fs/binfmt_elf.c: delete unused "interp_map_addr" argument

Link: http://lkml.kernel.org/r/20191005165049.GA26927@avx2
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_elf.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 5372eabd276a..43eb8a78e84d 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -544,7 +544,7 @@ static inline int make_prot(u32 p_flags)
    an ELF header */
 
 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
-		struct file *interpreter, unsigned long *interp_map_addr,
+		struct file *interpreter,
 		unsigned long no_base, struct elf_phdr *interp_elf_phdata)
 {
 	struct elf_phdr *eppnt;
@@ -590,8 +590,6 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 			map_addr = elf_map(interpreter, load_addr + vaddr,
 					eppnt, elf_prot, elf_type, total_size);
 			total_size = 0;
-			if (!*interp_map_addr)
-				*interp_map_addr = map_addr;
 			error = map_addr;
 			if (BAD_ADDR(map_addr))
 				goto out;
@@ -1054,11 +1052,8 @@ out_free_interp:
 	}
 
 	if (interpreter) {
-		unsigned long interp_map_addr = 0;
-
 		elf_entry = load_elf_interp(&loc->interp_elf_ex,
 					    interpreter,
-					    &interp_map_addr,
 					    load_bias, interp_elf_phdata);
 		if (!IS_ERR((void *)elf_entry)) {
 			/*
-- 
cgit v1.2.3-59-g8ed1b


From 658c033565118549a07f15927aa6939f481e4712 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 4 Dec 2019 16:52:25 -0800
Subject: fs/binfmt_elf.c: extract elf_read() function

ELF reads done by the kernel have very complicated error detection code
which better live in one place.

Link: http://lkml.kernel.org/r/20191005165215.GB26927@avx2
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_elf.c | 49 ++++++++++++++++++++++++-------------------------
 1 file changed, 24 insertions(+), 25 deletions(-)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 43eb8a78e84d..ecd8d2698515 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -404,6 +404,17 @@ static unsigned long total_mapping_size(const struct elf_phdr *cmds, int nr)
 				ELF_PAGESTART(cmds[first_idx].p_vaddr);
 }
 
+static int elf_read(struct file *file, void *buf, size_t len, loff_t pos)
+{
+	ssize_t rv;
+
+	rv = kernel_read(file, buf, len, &pos);
+	if (unlikely(rv != len)) {
+		return (rv < 0) ? rv : -EIO;
+	}
+	return 0;
+}
+
 /**
  * load_elf_phdrs() - load ELF program headers
  * @elf_ex:   ELF header of the binary whose program headers should be loaded
@@ -418,7 +429,6 @@ static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex,
 {
 	struct elf_phdr *elf_phdata = NULL;
 	int retval, err = -1;
-	loff_t pos = elf_ex->e_phoff;
 	unsigned int size;
 
 	/*
@@ -439,9 +449,9 @@ static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex,
 		goto out;
 
 	/* Read in the program headers */
-	retval = kernel_read(elf_file, elf_phdata, size, &pos);
-	if (retval != size) {
-		err = (retval < 0) ? retval : -EIO;
+	retval = elf_read(elf_file, elf_phdata, size, elf_ex->e_phoff);
+	if (retval < 0) {
+		err = retval;
 		goto out;
 	}
 
@@ -720,7 +730,6 @@ static int load_elf_binary(struct linux_binprm *bprm)
 	elf_ppnt = elf_phdata;
 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
 		char *elf_interpreter;
-		loff_t pos;
 
 		if (elf_ppnt->p_type != PT_INTERP)
 			continue;
@@ -738,14 +747,10 @@ static int load_elf_binary(struct linux_binprm *bprm)
 		if (!elf_interpreter)
 			goto out_free_ph;
 
-		pos = elf_ppnt->p_offset;
-		retval = kernel_read(bprm->file, elf_interpreter,
-				     elf_ppnt->p_filesz, &pos);
-		if (retval != elf_ppnt->p_filesz) {
-			if (retval >= 0)
-				retval = -EIO;
+		retval = elf_read(bprm->file, elf_interpreter, elf_ppnt->p_filesz,
+				  elf_ppnt->p_offset);
+		if (retval < 0)
 			goto out_free_interp;
-		}
 		/* make sure path is NULL terminated */
 		retval = -ENOEXEC;
 		if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
@@ -764,14 +769,10 @@ static int load_elf_binary(struct linux_binprm *bprm)
 		would_dump(bprm, interpreter);
 
 		/* Get the exec headers */
-		pos = 0;
-		retval = kernel_read(interpreter, &loc->interp_elf_ex,
-				     sizeof(loc->interp_elf_ex), &pos);
-		if (retval != sizeof(loc->interp_elf_ex)) {
-			if (retval >= 0)
-				retval = -EIO;
+		retval = elf_read(interpreter, &loc->interp_elf_ex,
+				  sizeof(loc->interp_elf_ex), 0);
+		if (retval < 0)
 			goto out_free_dentry;
-		}
 
 		break;
 
@@ -1174,11 +1175,10 @@ static int load_elf_library(struct file *file)
 	unsigned long elf_bss, bss, len;
 	int retval, error, i, j;
 	struct elfhdr elf_ex;
-	loff_t pos = 0;
 
 	error = -ENOEXEC;
-	retval = kernel_read(file, &elf_ex, sizeof(elf_ex), &pos);
-	if (retval != sizeof(elf_ex))
+	retval = elf_read(file, &elf_ex, sizeof(elf_ex), 0);
+	if (retval < 0)
 		goto out;
 
 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
@@ -1203,9 +1203,8 @@ static int load_elf_library(struct file *file)
 
 	eppnt = elf_phdata;
 	error = -ENOEXEC;
-	pos =  elf_ex.e_phoff;
-	retval = kernel_read(file, eppnt, j, &pos);
-	if (retval != j)
+	retval = elf_read(file, eppnt, j, elf_ex.e_phoff);
+	if (retval < 0)
 		goto out_free_ph;
 
 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
-- 
cgit v1.2.3-59-g8ed1b


From e8cf4e9ca056cb5dcec3fdf3125c6645fc9b594d Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Wed, 4 Dec 2019 16:52:28 -0800
Subject: init/Kconfig: fix indentation

Adjust indentation from spaces to tab (+optional two spaces) as in
coding style with command like:
	$ sed -e 's/^        /	/' -i */Kconfig

Link: http://lkml.kernel.org/r/1574306670-30234-1-git-send-email-krzk@kernel.org
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Cc: Jiri Kosina <trivial@kernel.org>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: David Hildenbrand <david@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 init/Kconfig | 76 ++++++++++++++++++++++++++++++------------------------------
 1 file changed, 38 insertions(+), 38 deletions(-)

diff --git a/init/Kconfig b/init/Kconfig
index d7163fcf233b..a34064a031a5 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -146,13 +146,13 @@ config LOCALVERSION_AUTO
 	  which is done within the script "scripts/setlocalversion".)
 
 config BUILD_SALT
-       string "Build ID Salt"
-       default ""
-       help
-          The build ID is used to link binaries and their debug info. Setting
-          this option will use the value in the calculation of the build id.
-          This is mostly useful for distributions which want to ensure the
-          build is unique between builds. It's safe to leave the default.
+	string "Build ID Salt"
+	default ""
+	help
+	  The build ID is used to link binaries and their debug info. Setting
+	  this option will use the value in the calculation of the build id.
+	  This is mostly useful for distributions which want to ensure the
+	  build is unique between builds. It's safe to leave the default.
 
 config HAVE_KERNEL_GZIP
 	bool
@@ -818,7 +818,7 @@ menuconfig CGROUPS
 if CGROUPS
 
 config PAGE_COUNTER
-       bool
+	bool
 
 config MEMCG
 	bool "Memory controller"
@@ -1311,9 +1311,9 @@ menuconfig EXPERT
 	select DEBUG_KERNEL
 	help
 	  This option allows certain base kernel options and settings
-          to be disabled or tweaked. This is for specialized
-          environments which can tolerate a "non-standard" kernel.
-          Only use this if you really know what you are doing.
+	  to be disabled or tweaked. This is for specialized
+	  environments which can tolerate a "non-standard" kernel.
+	  Only use this if you really know what you are doing.
 
 config UID16
 	bool "Enable 16-bit UID system calls" if EXPERT
@@ -1406,11 +1406,11 @@ config BUG
 	bool "BUG() support" if EXPERT
 	default y
 	help
-          Disabling this option eliminates support for BUG and WARN, reducing
-          the size of your kernel image and potentially quietly ignoring
-          numerous fatal conditions. You should only consider disabling this
-          option for embedded systems with no facilities for reporting errors.
-          Just say Y.
+	  Disabling this option eliminates support for BUG and WARN, reducing
+	  the size of your kernel image and potentially quietly ignoring
+	  numerous fatal conditions. You should only consider disabling this
+	  option for embedded systems with no facilities for reporting errors.
+	  Just say Y.
 
 config ELF_CORE
 	depends on COREDUMP
@@ -1426,8 +1426,8 @@ config PCSPKR_PLATFORM
 	select I8253_LOCK
 	default y
 	help
-          This option allows to disable the internal PC-Speaker
-          support, saving some memory.
+	  This option allows to disable the internal PC-Speaker
+	  support, saving some memory.
 
 config BASE_FULL
 	default y
@@ -1545,29 +1545,29 @@ config MEMBARRIER
 	  If unsure, say Y.
 
 config KALLSYMS
-	 bool "Load all symbols for debugging/ksymoops" if EXPERT
-	 default y
-	 help
-	   Say Y here to let the kernel print out symbolic crash information and
-	   symbolic stack backtraces. This increases the size of the kernel
-	   somewhat, as all symbols have to be loaded into the kernel image.
+	bool "Load all symbols for debugging/ksymoops" if EXPERT
+	default y
+	help
+	  Say Y here to let the kernel print out symbolic crash information and
+	  symbolic stack backtraces. This increases the size of the kernel
+	  somewhat, as all symbols have to be loaded into the kernel image.
 
 config KALLSYMS_ALL
 	bool "Include all symbols in kallsyms"
 	depends on DEBUG_KERNEL && KALLSYMS
 	help
-	   Normally kallsyms only contains the symbols of functions for nicer
-	   OOPS messages and backtraces (i.e., symbols from the text and inittext
-	   sections). This is sufficient for most cases. And only in very rare
-	   cases (e.g., when a debugger is used) all symbols are required (e.g.,
-	   names of variables from the data sections, etc).
+	  Normally kallsyms only contains the symbols of functions for nicer
+	  OOPS messages and backtraces (i.e., symbols from the text and inittext
+	  sections). This is sufficient for most cases. And only in very rare
+	  cases (e.g., when a debugger is used) all symbols are required (e.g.,
+	  names of variables from the data sections, etc).
 
-	   This option makes sure that all symbols are loaded into the kernel
-	   image (i.e., symbols from all sections) in cost of increased kernel
-	   size (depending on the kernel configuration, it may be 300KiB or
-	   something like this).
+	  This option makes sure that all symbols are loaded into the kernel
+	  image (i.e., symbols from all sections) in cost of increased kernel
+	  size (depending on the kernel configuration, it may be 300KiB or
+	  something like this).
 
-	   Say N unless you really need all symbols.
+	  Say N unless you really need all symbols.
 
 config KALLSYMS_ABSOLUTE_PERCPU
 	bool
@@ -1710,12 +1710,12 @@ config DEBUG_PERF_USE_VMALLOC
 	depends on PERF_EVENTS && DEBUG_KERNEL && !PPC
 	select PERF_USE_VMALLOC
 	help
-	 Use vmalloc memory to back perf mmap() buffers.
+	  Use vmalloc memory to back perf mmap() buffers.
 
-	 Mostly useful for debugging the vmalloc code on platforms
-	 that don't require it.
+	  Mostly useful for debugging the vmalloc code on platforms
+	  that don't require it.
 
-	 Say N if unsure.
+	  Say N if unsure.
 
 endmenu
 
-- 
cgit v1.2.3-59-g8ed1b


From 48d6b4dd362c4f3a6032946397385b28ff8d2b9c Mon Sep 17 00:00:00 2001
From: "Ben Dooks (Codethink)" <ben.dooks@codethink.co.uk>
Date: Wed, 4 Dec 2019 16:52:31 -0800
Subject: drivers/rapidio/rio-driver.c: fix missing include of
 <linux/rio_drv.h>

Include <linux/rio_drv.h> for the missing declarations of functions
exported from this file.  Fixes the following sparse warnings:

  drivers/rapidio/rio-driver.c:53:16: warning: symbol 'rio_dev_get' was not declared. Should it be static?
  drivers/rapidio/rio-driver.c:70:6: warning: symbol 'rio_dev_put' was not declared. Should it be static?
  drivers/rapidio/rio-driver.c:150:5: warning: symbol 'rio_register_driver' was not declared. Should it be static?
  drivers/rapidio/rio-driver.c:169:6: warning: symbol 'rio_unregister_driver' was not declared. Should it be static?

Link: http://lkml.kernel.org/r/20191017114923.10888-1-ben.dooks@codethink.co.uk
Signed-off-by: Ben Dooks <ben.dooks@codethink.co.uk>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Alexandre Bounine <alex.bou9@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rapidio/rio-driver.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/rapidio/rio-driver.c b/drivers/rapidio/rio-driver.c
index 2d99a3712b72..72874153972e 100644
--- a/drivers/rapidio/rio-driver.c
+++ b/drivers/rapidio/rio-driver.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/rio.h>
 #include <linux/rio_ids.h>
+#include <linux/rio_drv.h>
 
 #include "rio.h"
 
-- 
cgit v1.2.3-59-g8ed1b


From bd7bca4335a55561b627149322e93de1b25404c1 Mon Sep 17 00:00:00 2001
From: "Ben Dooks (Codethink)" <ben.dooks@codethink.co.uk>
Date: Wed, 4 Dec 2019 16:52:34 -0800
Subject: drivers/rapidio/rio-access.c: fix missing include of
 <linux/rio_drv.h>

Include <linux/rio_drv.h> for the missing declarations of functions
exported from this file.  Fixes the following sparse warnings:

  drivers/rapidio/rio-access.c:59:1: warning: symbol '__rio_local_read_config_8' was not declared. Should it be static?
  drivers/rapidio/rio-access.c:60:1: warning: symbol '__rio_local_read_config_16' was not declared. Should it be static?
  drivers/rapidio/rio-access.c:61:1: warning: symbol '__rio_local_read_config_32' was not declared. Should it be static?
  drivers/rapidio/rio-access.c:62:1: warning: symbol '__rio_local_write_config_8' was not declared. Should it be static?
  drivers/rapidio/rio-access.c:63:1: warning: symbol '__rio_local_write_config_16' was not declared. Should it be static?
  drivers/rapidio/rio-access.c:64:1: warning: symbol '__rio_local_write_config_32' was not declared. Should it be static?
  drivers/rapidio/rio-access.c:112:1: warning: symbol 'rio_mport_read_config_8' was not declared. Should it be static?
  drivers/rapidio/rio-access.c:113:1: warning: symbol 'rio_mport_read_config_16' was not declared. Should it be static?
  drivers/rapidio/rio-access.c:114:1: warning: symbol 'rio_mport_read_config_32' was not declared. Should it be static?
  drivers/rapidio/rio-access.c:115:1: warning: symbol 'rio_mport_write_config_8' was not declared. Should it be static?
  drivers/rapidio/rio-access.c:116:1: warning: symbol 'rio_mport_write_config_16' was not declared. Should it be static?
  drivers/rapidio/rio-access.c:117:1: warning: symbol 'rio_mport_write_config_32' was not declared. Should it be static?
  drivers/rapidio/rio-access.c:136:5: warning: symbol 'rio_mport_send_doorbell' was not declared. Should it be static?

Link: http://lkml.kernel.org/r/20191017115103.684-1-ben.dooks@codethink.co.uk
Signed-off-by: Ben Dooks <ben.dooks@codethink.co.uk>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Alexandre Bounine <alex.bou9@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rapidio/rio-access.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/rapidio/rio-access.c b/drivers/rapidio/rio-access.c
index 33c8d1ecc988..f9e10647f94e 100644
--- a/drivers/rapidio/rio-access.c
+++ b/drivers/rapidio/rio-access.c
@@ -9,6 +9,8 @@
 #include <linux/rio.h>
 #include <linux/module.h>
 
+#include <linux/rio_drv.h>
+
 /*
  *  Wrappers for all RIO configuration access functions.  They just check
  *  alignment and call the low-level functions pointed to by rio_mport->ops.
-- 
cgit v1.2.3-59-g8ed1b


From 5bf8bec3f4ce044a223c40cbce92590d938f0e9c Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Wed, 4 Dec 2019 16:52:37 -0800
Subject: drm: limit to INT_MAX in create_blob ioctl

The hardened usercpy code is too paranoid ever since commit 6a30afa8c1fb
("uaccess: disallow > INT_MAX copy sizes")

Code itself should have been fine as-is.

Link: http://lkml.kernel.org/r/20191106164755.31478-1-daniel.vetter@ffwll.ch
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Reported-by: syzbot+fb77e97ebf0612ee6914@syzkaller.appspotmail.com
Fixes: 6a30afa8c1fb ("uaccess: disallow > INT_MAX copy sizes")
Cc: Kees Cook <keescook@chromium.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/drm_property.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_property.c b/drivers/gpu/drm/drm_property.c
index 892ce636ef72..6ee04803c362 100644
--- a/drivers/gpu/drm/drm_property.c
+++ b/drivers/gpu/drm/drm_property.c
@@ -561,7 +561,7 @@ drm_property_create_blob(struct drm_device *dev, size_t length,
 	struct drm_property_blob *blob;
 	int ret;
 
-	if (!length || length > ULONG_MAX - sizeof(struct drm_property_blob))
+	if (!length || length > INT_MAX - sizeof(struct drm_property_blob))
 		return ERR_PTR(-EINVAL);
 
 	blob = kvzalloc(sizeof(struct drm_property_blob)+length, GFP_KERNEL);
-- 
cgit v1.2.3-59-g8ed1b


From 6d13de1489b6bf539695f96d945de3860e6d5e17 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 4 Dec 2019 16:52:40 -0800
Subject: uaccess: disallow > INT_MAX copy sizes

As we've done with VFS, string operations, etc, reject usercopy sizes
larger than INT_MAX, which would be nice to have for catching bugs
related to size calculation overflows[1].

This adds 10 bytes to x86_64 defconfig text and 1980 bytes to the data
section:

     text    data     bss     dec     hex filename
  19691167        5134320 1646664 26472151        193eed7 vmlinux.before
  19691177        5136300 1646664 26474141        193f69d vmlinux.after

[1] https://marc.info/?l=linux-s390&m=156631939010493&w=2

Link: http://lkml.kernel.org/r/201908251612.F9902D7A@keescook
Signed-off-by: Kees Cook <keescook@chromium.org>
Suggested-by: Dan Carpenter <dan.carpenter@oracle.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/thread_info.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 659a4400517b..e93e249a4e9b 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -147,6 +147,8 @@ check_copy_size(const void *addr, size_t bytes, bool is_source)
 			__bad_copy_to();
 		return false;
 	}
+	if (WARN_ON_ONCE(bytes > INT_MAX))
+		return false;
 	check_object_size(addr, bytes, is_source);
 	return true;
 }
-- 
cgit v1.2.3-59-g8ed1b


From eec028c9386ed1a692aa01a85b55952202b41619 Mon Sep 17 00:00:00 2001
From: Andrey Konovalov <andreyknvl@google.com>
Date: Wed, 4 Dec 2019 16:52:43 -0800
Subject: kcov: remote coverage support

Patch series " kcov: collect coverage from usb and vhost", v3.

This patchset extends kcov to allow collecting coverage from backgound
kernel threads.  This extension requires custom annotations for each of
the places where coverage collection is desired.  This patchset
implements this for hub events in the USB subsystem and for vhost
workers.  See the first patch description for details about the kcov
extension.  The other two patches apply this kcov extension to USB and
vhost.

Examples of other subsystems that might potentially benefit from this
when custom annotations are added (the list is based on
process_one_work() callers for bugs recently reported by syzbot):

1. fs: writeback wb_workfn() worker,
2. net: addrconf_dad_work()/addrconf_verify_work() workers,
3. net: neigh_periodic_work() worker,
4. net/p9: p9_write_work()/p9_read_work() workers,
5. block: blk_mq_run_work_fn() worker.

These patches have been used to enable coverage-guided USB fuzzing with
syzkaller for the last few years, see the details here:

  https://github.com/google/syzkaller/blob/master/docs/linux/external_fuzzing_usb.md

This patchset has been pushed to the public Linux kernel Gerrit
instance:

  https://linux-review.googlesource.com/c/linux/kernel/git/torvalds/linux/+/1524

This patch (of 3):

Add background thread coverage collection ability to kcov.

With KCOV_ENABLE coverage is collected only for syscalls that are issued
from the current process.  With KCOV_REMOTE_ENABLE it's possible to
collect coverage for arbitrary parts of the kernel code, provided that
those parts are annotated with kcov_remote_start()/kcov_remote_stop().

This allows to collect coverage from two types of kernel background
threads: the global ones, that are spawned during kernel boot in a
limited number of instances (e.g.  one USB hub_event() worker thread is
spawned per USB HCD); and the local ones, that are spawned when a user
interacts with some kernel interface (e.g.  vhost workers).

To enable collecting coverage from a global background thread, a unique
global handle must be assigned and passed to the corresponding
kcov_remote_start() call.  Then a userspace process can pass a list of
such handles to the KCOV_REMOTE_ENABLE ioctl in the handles array field
of the kcov_remote_arg struct.  This will attach the used kcov device to
the code sections, that are referenced by those handles.

Since there might be many local background threads spawned from
different userspace processes, we can't use a single global handle per
annotation.  Instead, the userspace process passes a non-zero handle
through the common_handle field of the kcov_remote_arg struct.  This
common handle gets saved to the kcov_handle field in the current
task_struct and needs to be passed to the newly spawned threads via
custom annotations.  Those threads should in turn be annotated with
kcov_remote_start()/kcov_remote_stop().

Internally kcov stores handles as u64 integers.  The top byte of a
handle is used to denote the id of a subsystem that this handle belongs
to, and the lower 4 bytes are used to denote the id of a thread instance
within that subsystem.  A reserved value 0 is used as a subsystem id for
common handles as they don't belong to a particular subsystem.  The
bytes 4-7 are currently reserved and must be zero.  In the future the
number of bytes used for the subsystem or handle ids might be increased.

When a particular userspace process collects coverage by via a common
handle, kcov will collect coverage for each code section that is
annotated to use the common handle obtained as kcov_handle from the
current task_struct.  However non common handles allow to collect
coverage selectively from different subsystems.

Link: http://lkml.kernel.org/r/e90e315426a384207edbec1d6aa89e43008e4caf.1572366574.git.andreyknvl@google.com
Signed-off-by: Andrey Konovalov <andreyknvl@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: David Windsor <dwindsor@gmail.com>
Cc: Elena Reshetova <elena.reshetova@intel.com>
Cc: Anders Roxell <anders.roxell@linaro.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Marco Elver <elver@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/dev-tools/kcov.rst | 129 +++++++++
 include/linux/kcov.h             |  23 ++
 include/linux/sched.h            |   8 +
 include/uapi/linux/kcov.h        |  28 ++
 kernel/kcov.c                    | 547 ++++++++++++++++++++++++++++++++++++---
 5 files changed, 700 insertions(+), 35 deletions(-)

diff --git a/Documentation/dev-tools/kcov.rst b/Documentation/dev-tools/kcov.rst
index 42b612677799..36890b026e77 100644
--- a/Documentation/dev-tools/kcov.rst
+++ b/Documentation/dev-tools/kcov.rst
@@ -34,6 +34,7 @@ Profiling data will only become accessible once debugfs has been mounted::
 
 Coverage collection
 -------------------
+
 The following program demonstrates coverage collection from within a test
 program using kcov:
 
@@ -128,6 +129,7 @@ only need to enable coverage (disable happens automatically on thread end).
 
 Comparison operands collection
 ------------------------------
+
 Comparison operands collection is similar to coverage collection:
 
 .. code-block:: c
@@ -202,3 +204,130 @@ Comparison operands collection is similar to coverage collection:
 
 Note that the kcov modes (coverage collection or comparison operands) are
 mutually exclusive.
+
+Remote coverage collection
+--------------------------
+
+With KCOV_ENABLE coverage is collected only for syscalls that are issued
+from the current process. With KCOV_REMOTE_ENABLE it's possible to collect
+coverage for arbitrary parts of the kernel code, provided that those parts
+are annotated with kcov_remote_start()/kcov_remote_stop().
+
+This allows to collect coverage from two types of kernel background
+threads: the global ones, that are spawned during kernel boot in a limited
+number of instances (e.g. one USB hub_event() worker thread is spawned per
+USB HCD); and the local ones, that are spawned when a user interacts with
+some kernel interface (e.g. vhost workers).
+
+To enable collecting coverage from a global background thread, a unique
+global handle must be assigned and passed to the corresponding
+kcov_remote_start() call. Then a userspace process can pass a list of such
+handles to the KCOV_REMOTE_ENABLE ioctl in the handles array field of the
+kcov_remote_arg struct. This will attach the used kcov device to the code
+sections, that are referenced by those handles.
+
+Since there might be many local background threads spawned from different
+userspace processes, we can't use a single global handle per annotation.
+Instead, the userspace process passes a non-zero handle through the
+common_handle field of the kcov_remote_arg struct. This common handle gets
+saved to the kcov_handle field in the current task_struct and needs to be
+passed to the newly spawned threads via custom annotations. Those threads
+should in turn be annotated with kcov_remote_start()/kcov_remote_stop().
+
+Internally kcov stores handles as u64 integers. The top byte of a handle
+is used to denote the id of a subsystem that this handle belongs to, and
+the lower 4 bytes are used to denote the id of a thread instance within
+that subsystem. A reserved value 0 is used as a subsystem id for common
+handles as they don't belong to a particular subsystem. The bytes 4-7 are
+currently reserved and must be zero. In the future the number of bytes
+used for the subsystem or handle ids might be increased.
+
+When a particular userspace proccess collects coverage by via a common
+handle, kcov will collect coverage for each code section that is annotated
+to use the common handle obtained as kcov_handle from the current
+task_struct. However non common handles allow to collect coverage
+selectively from different subsystems.
+
+.. code-block:: c
+
+    struct kcov_remote_arg {
+	unsigned	trace_mode;
+	unsigned	area_size;
+	unsigned	num_handles;
+	uint64_t	common_handle;
+	uint64_t	handles[0];
+    };
+
+    #define KCOV_INIT_TRACE			_IOR('c', 1, unsigned long)
+    #define KCOV_DISABLE			_IO('c', 101)
+    #define KCOV_REMOTE_ENABLE		_IOW('c', 102, struct kcov_remote_arg)
+
+    #define COVER_SIZE	(64 << 10)
+
+    #define KCOV_TRACE_PC	0
+
+    #define KCOV_SUBSYSTEM_COMMON	(0x00ull << 56)
+    #define KCOV_SUBSYSTEM_USB	(0x01ull << 56)
+
+    #define KCOV_SUBSYSTEM_MASK	(0xffull << 56)
+    #define KCOV_INSTANCE_MASK	(0xffffffffull)
+
+    static inline __u64 kcov_remote_handle(__u64 subsys, __u64 inst)
+    {
+	if (subsys & ~KCOV_SUBSYSTEM_MASK || inst & ~KCOV_INSTANCE_MASK)
+		return 0;
+	return subsys | inst;
+    }
+
+    #define KCOV_COMMON_ID	0x42
+    #define KCOV_USB_BUS_NUM	1
+
+    int main(int argc, char **argv)
+    {
+	int fd;
+	unsigned long *cover, n, i;
+	struct kcov_remote_arg *arg;
+
+	fd = open("/sys/kernel/debug/kcov", O_RDWR);
+	if (fd == -1)
+		perror("open"), exit(1);
+	if (ioctl(fd, KCOV_INIT_TRACE, COVER_SIZE))
+		perror("ioctl"), exit(1);
+	cover = (unsigned long*)mmap(NULL, COVER_SIZE * sizeof(unsigned long),
+				     PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+	if ((void*)cover == MAP_FAILED)
+		perror("mmap"), exit(1);
+
+	/* Enable coverage collection via common handle and from USB bus #1. */
+	arg = calloc(1, sizeof(*arg) + sizeof(uint64_t));
+	if (!arg)
+		perror("calloc"), exit(1);
+	arg->trace_mode = KCOV_TRACE_PC;
+	arg->area_size = COVER_SIZE;
+	arg->num_handles = 1;
+	arg->common_handle = kcov_remote_handle(KCOV_SUBSYSTEM_COMMON,
+							KCOV_COMMON_ID);
+	arg->handles[0] = kcov_remote_handle(KCOV_SUBSYSTEM_USB,
+						KCOV_USB_BUS_NUM);
+	if (ioctl(fd, KCOV_REMOTE_ENABLE, arg))
+		perror("ioctl"), free(arg), exit(1);
+	free(arg);
+
+	/*
+	 * Here the user needs to trigger execution of a kernel code section
+	 * that is either annotated with the common handle, or to trigger some
+	 * activity on USB bus #1.
+	 */
+	sleep(2);
+
+	n = __atomic_load_n(&cover[0], __ATOMIC_RELAXED);
+	for (i = 0; i < n; i++)
+		printf("0x%lx\n", cover[i + 1]);
+	if (ioctl(fd, KCOV_DISABLE, 0))
+		perror("ioctl"), exit(1);
+	if (munmap(cover, COVER_SIZE * sizeof(unsigned long)))
+		perror("munmap"), exit(1);
+	if (close(fd))
+		perror("close"), exit(1);
+	return 0;
+    }
diff --git a/include/linux/kcov.h b/include/linux/kcov.h
index b76a1807028d..a10e84707d82 100644
--- a/include/linux/kcov.h
+++ b/include/linux/kcov.h
@@ -37,12 +37,35 @@ do {						\
 	(t)->kcov_mode &= ~KCOV_IN_CTXSW;	\
 } while (0)
 
+/* See Documentation/dev-tools/kcov.rst for usage details. */
+void kcov_remote_start(u64 handle);
+void kcov_remote_stop(void);
+u64 kcov_common_handle(void);
+
+static inline void kcov_remote_start_common(u64 id)
+{
+	kcov_remote_start(kcov_remote_handle(KCOV_SUBSYSTEM_COMMON, id));
+}
+
+static inline void kcov_remote_start_usb(u64 id)
+{
+	kcov_remote_start(kcov_remote_handle(KCOV_SUBSYSTEM_USB, id));
+}
+
 #else
 
 static inline void kcov_task_init(struct task_struct *t) {}
 static inline void kcov_task_exit(struct task_struct *t) {}
 static inline void kcov_prepare_switch(struct task_struct *t) {}
 static inline void kcov_finish_switch(struct task_struct *t) {}
+static inline void kcov_remote_start(u64 handle) {}
+static inline void kcov_remote_stop(void) {}
+static inline u64 kcov_common_handle(void)
+{
+	return 0;
+}
+static inline void kcov_remote_start_common(u64 id) {}
+static inline void kcov_remote_start_usb(u64 id) {}
 
 #endif /* CONFIG_KCOV */
 #endif /* _LINUX_KCOV_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0cd97d9dd021..467d26046416 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1210,6 +1210,8 @@ struct task_struct {
 #endif /* CONFIG_TRACING */
 
 #ifdef CONFIG_KCOV
+	/* See kernel/kcov.c for more details. */
+
 	/* Coverage collection mode enabled for this task (0 if disabled): */
 	unsigned int			kcov_mode;
 
@@ -1221,6 +1223,12 @@ struct task_struct {
 
 	/* KCOV descriptor wired with this task or NULL: */
 	struct kcov			*kcov;
+
+	/* KCOV common handle for remote coverage collection: */
+	u64				kcov_handle;
+
+	/* KCOV sequence number: */
+	int				kcov_sequence;
 #endif
 
 #ifdef CONFIG_MEMCG
diff --git a/include/uapi/linux/kcov.h b/include/uapi/linux/kcov.h
index 9529867717a8..409d3ad1e6e2 100644
--- a/include/uapi/linux/kcov.h
+++ b/include/uapi/linux/kcov.h
@@ -4,9 +4,24 @@
 
 #include <linux/types.h>
 
+/*
+ * Argument for KCOV_REMOTE_ENABLE ioctl, see Documentation/dev-tools/kcov.rst
+ * and the comment before kcov_remote_start() for usage details.
+ */
+struct kcov_remote_arg {
+	unsigned int	trace_mode;	/* KCOV_TRACE_PC or KCOV_TRACE_CMP */
+	unsigned int	area_size;	/* Length of coverage buffer in words */
+	unsigned int	num_handles;	/* Size of handles array */
+	__u64		common_handle;
+	__u64		handles[0];
+};
+
+#define KCOV_REMOTE_MAX_HANDLES		0x100
+
 #define KCOV_INIT_TRACE			_IOR('c', 1, unsigned long)
 #define KCOV_ENABLE			_IO('c', 100)
 #define KCOV_DISABLE			_IO('c', 101)
+#define KCOV_REMOTE_ENABLE		_IOW('c', 102, struct kcov_remote_arg)
 
 enum {
 	/*
@@ -32,4 +47,17 @@ enum {
 #define KCOV_CMP_SIZE(n)        ((n) << 1)
 #define KCOV_CMP_MASK           KCOV_CMP_SIZE(3)
 
+#define KCOV_SUBSYSTEM_COMMON	(0x00ull << 56)
+#define KCOV_SUBSYSTEM_USB	(0x01ull << 56)
+
+#define KCOV_SUBSYSTEM_MASK	(0xffull << 56)
+#define KCOV_INSTANCE_MASK	(0xffffffffull)
+
+static inline __u64 kcov_remote_handle(__u64 subsys, __u64 inst)
+{
+	if (subsys & ~KCOV_SUBSYSTEM_MASK || inst & ~KCOV_INSTANCE_MASK)
+		return 0;
+	return subsys | inst;
+}
+
 #endif /* _LINUX_KCOV_IOCTLS_H */
diff --git a/kernel/kcov.c b/kernel/kcov.c
index 2ee38727844a..f50354202dbe 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -9,6 +9,7 @@
 #include <linux/types.h>
 #include <linux/file.h>
 #include <linux/fs.h>
+#include <linux/hashtable.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/preempt.h>
@@ -21,8 +22,11 @@
 #include <linux/uaccess.h>
 #include <linux/kcov.h>
 #include <linux/refcount.h>
+#include <linux/log2.h>
 #include <asm/setup.h>
 
+#define kcov_debug(fmt, ...) pr_debug("%s: " fmt, __func__, ##__VA_ARGS__)
+
 /* Number of 64-bit words written per one comparison: */
 #define KCOV_WORDS_PER_CMP 4
 
@@ -44,19 +48,100 @@ struct kcov {
 	 * Reference counter. We keep one for:
 	 *  - opened file descriptor
 	 *  - task with enabled coverage (we can't unwire it from another task)
+	 *  - each code section for remote coverage collection
 	 */
 	refcount_t		refcount;
 	/* The lock protects mode, size, area and t. */
 	spinlock_t		lock;
 	enum kcov_mode		mode;
-	/* Size of arena (in long's for KCOV_MODE_TRACE). */
-	unsigned		size;
+	/* Size of arena (in long's). */
+	unsigned int		size;
 	/* Coverage buffer shared with user space. */
 	void			*area;
 	/* Task for which we collect coverage, or NULL. */
 	struct task_struct	*t;
+	/* Collecting coverage from remote (background) threads. */
+	bool			remote;
+	/* Size of remote area (in long's). */
+	unsigned int		remote_size;
+	/*
+	 * Sequence is incremented each time kcov is reenabled, used by
+	 * kcov_remote_stop(), see the comment there.
+	 */
+	int			sequence;
 };
 
+struct kcov_remote_area {
+	struct list_head	list;
+	unsigned int		size;
+};
+
+struct kcov_remote {
+	u64			handle;
+	struct kcov		*kcov;
+	struct hlist_node	hnode;
+};
+
+static DEFINE_SPINLOCK(kcov_remote_lock);
+static DEFINE_HASHTABLE(kcov_remote_map, 4);
+static struct list_head kcov_remote_areas = LIST_HEAD_INIT(kcov_remote_areas);
+
+/* Must be called with kcov_remote_lock locked. */
+static struct kcov_remote *kcov_remote_find(u64 handle)
+{
+	struct kcov_remote *remote;
+
+	hash_for_each_possible(kcov_remote_map, remote, hnode, handle) {
+		if (remote->handle == handle)
+			return remote;
+	}
+	return NULL;
+}
+
+static struct kcov_remote *kcov_remote_add(struct kcov *kcov, u64 handle)
+{
+	struct kcov_remote *remote;
+
+	if (kcov_remote_find(handle))
+		return ERR_PTR(-EEXIST);
+	remote = kmalloc(sizeof(*remote), GFP_ATOMIC);
+	if (!remote)
+		return ERR_PTR(-ENOMEM);
+	remote->handle = handle;
+	remote->kcov = kcov;
+	hash_add(kcov_remote_map, &remote->hnode, handle);
+	return remote;
+}
+
+/* Must be called with kcov_remote_lock locked. */
+static struct kcov_remote_area *kcov_remote_area_get(unsigned int size)
+{
+	struct kcov_remote_area *area;
+	struct list_head *pos;
+
+	kcov_debug("size = %u\n", size);
+	list_for_each(pos, &kcov_remote_areas) {
+		area = list_entry(pos, struct kcov_remote_area, list);
+		if (area->size == size) {
+			list_del(&area->list);
+			kcov_debug("rv = %px\n", area);
+			return area;
+		}
+	}
+	kcov_debug("rv = NULL\n");
+	return NULL;
+}
+
+/* Must be called with kcov_remote_lock locked. */
+static void kcov_remote_area_put(struct kcov_remote_area *area,
+					unsigned int size)
+{
+	kcov_debug("area = %px, size = %u\n", area, size);
+	INIT_LIST_HEAD(&area->list);
+	area->size = size;
+	list_add(&area->list, &kcov_remote_areas);
+}
+
 static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_struct *t)
 {
 	unsigned int mode;
@@ -73,7 +158,7 @@ static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_stru
 	 * in_interrupt() returns false (e.g. preempt_schedule_irq()).
 	 * READ_ONCE()/barrier() effectively provides load-acquire wrt
 	 * interrupts, there are paired barrier()/WRITE_ONCE() in
-	 * kcov_ioctl_locked().
+	 * kcov_start().
 	 */
 	barrier();
 	return mode == needed_mode;
@@ -227,6 +312,78 @@ void notrace __sanitizer_cov_trace_switch(u64 val, u64 *cases)
 EXPORT_SYMBOL(__sanitizer_cov_trace_switch);
 #endif /* ifdef CONFIG_KCOV_ENABLE_COMPARISONS */
 
+static void kcov_start(struct task_struct *t, unsigned int size,
+			void *area, enum kcov_mode mode, int sequence)
+{
+	kcov_debug("t = %px, size = %u, area = %px\n", t, size, area);
+	/* Cache in task struct for performance. */
+	t->kcov_size = size;
+	t->kcov_area = area;
+	/* See comment in check_kcov_mode(). */
+	barrier();
+	WRITE_ONCE(t->kcov_mode, mode);
+	t->kcov_sequence = sequence;
+}
+
+static void kcov_stop(struct task_struct *t)
+{
+	WRITE_ONCE(t->kcov_mode, KCOV_MODE_DISABLED);
+	barrier();
+	t->kcov_size = 0;
+	t->kcov_area = NULL;
+}
+
+static void kcov_task_reset(struct task_struct *t)
+{
+	kcov_stop(t);
+	t->kcov = NULL;
+	t->kcov_sequence = 0;
+	t->kcov_handle = 0;
+}
+
+void kcov_task_init(struct task_struct *t)
+{
+	kcov_task_reset(t);
+	t->kcov_handle = current->kcov_handle;
+}
+
+static void kcov_reset(struct kcov *kcov)
+{
+	kcov->t = NULL;
+	kcov->mode = KCOV_MODE_INIT;
+	kcov->remote = false;
+	kcov->remote_size = 0;
+	kcov->sequence++;
+}
+
+static void kcov_remote_reset(struct kcov *kcov)
+{
+	int bkt;
+	struct kcov_remote *remote;
+	struct hlist_node *tmp;
+
+	spin_lock(&kcov_remote_lock);
+	hash_for_each_safe(kcov_remote_map, bkt, tmp, remote, hnode) {
+		if (remote->kcov != kcov)
+			continue;
+		kcov_debug("removing handle %llx\n", remote->handle);
+		hash_del(&remote->hnode);
+		kfree(remote);
+	}
+	/* Do reset before unlock to prevent races with kcov_remote_start(). */
+	kcov_reset(kcov);
+	spin_unlock(&kcov_remote_lock);
+}
+
+static void kcov_disable(struct task_struct *t, struct kcov *kcov)
+{
+	kcov_task_reset(t);
+	if (kcov->remote)
+		kcov_remote_reset(kcov);
+	else
+		kcov_reset(kcov);
+}
+
 static void kcov_get(struct kcov *kcov)
 {
 	refcount_inc(&kcov->refcount);
@@ -235,20 +392,12 @@ static void kcov_get(struct kcov *kcov)
 static void kcov_put(struct kcov *kcov)
 {
 	if (refcount_dec_and_test(&kcov->refcount)) {
+		kcov_remote_reset(kcov);
 		vfree(kcov->area);
 		kfree(kcov);
 	}
 }
 
-void kcov_task_init(struct task_struct *t)
-{
-	WRITE_ONCE(t->kcov_mode, KCOV_MODE_DISABLED);
-	barrier();
-	t->kcov_size = 0;
-	t->kcov_area = NULL;
-	t->kcov = NULL;
-}
-
 void kcov_task_exit(struct task_struct *t)
 {
 	struct kcov *kcov;
@@ -256,15 +405,36 @@ void kcov_task_exit(struct task_struct *t)
 	kcov = t->kcov;
 	if (kcov == NULL)
 		return;
+
 	spin_lock(&kcov->lock);
+	kcov_debug("t = %px, kcov->t = %px\n", t, kcov->t);
+	/*
+	 * For KCOV_ENABLE devices we want to make sure that t->kcov->t == t,
+	 * which comes down to:
+	 *        WARN_ON(!kcov->remote && kcov->t != t);
+	 *
+	 * For KCOV_REMOTE_ENABLE devices, the exiting task is either:
+	 * 2. A remote task between kcov_remote_start() and kcov_remote_stop().
+	 *    In this case we should print a warning right away, since a task
+	 *    shouldn't be exiting when it's in a kcov coverage collection
+	 *    section. Here t points to the task that is collecting remote
+	 *    coverage, and t->kcov->t points to the thread that created the
+	 *    kcov device. Which means that to detect this case we need to
+	 *    check that t != t->kcov->t, and this gives us the following:
+	 *        WARN_ON(kcov->remote && kcov->t != t);
+	 *
+	 * 2. The task that created kcov exiting without calling KCOV_DISABLE,
+	 *    and then again we can make sure that t->kcov->t == t:
+	 *        WARN_ON(kcov->remote && kcov->t != t);
+	 *
+	 * By combining all three checks into one we get:
+	 */
 	if (WARN_ON(kcov->t != t)) {
 		spin_unlock(&kcov->lock);
 		return;
 	}
 	/* Just to not leave dangling references behind. */
-	kcov_task_init(t);
-	kcov->t = NULL;
-	kcov->mode = KCOV_MODE_INIT;
+	kcov_disable(t, kcov);
 	spin_unlock(&kcov->lock);
 	kcov_put(kcov);
 }
@@ -313,6 +483,7 @@ static int kcov_open(struct inode *inode, struct file *filep)
 	if (!kcov)
 		return -ENOMEM;
 	kcov->mode = KCOV_MODE_DISABLED;
+	kcov->sequence = 1;
 	refcount_set(&kcov->refcount, 1);
 	spin_lock_init(&kcov->lock);
 	filep->private_data = kcov;
@@ -325,6 +496,20 @@ static int kcov_close(struct inode *inode, struct file *filep)
 	return 0;
 }
 
+static int kcov_get_mode(unsigned long arg)
+{
+	if (arg == KCOV_TRACE_PC)
+		return KCOV_MODE_TRACE_PC;
+	else if (arg == KCOV_TRACE_CMP)
+#ifdef CONFIG_KCOV_ENABLE_COMPARISONS
+		return KCOV_MODE_TRACE_CMP;
+#else
+		return -ENOTSUPP;
+#endif
+	else
+		return -EINVAL;
+}
+
 /*
  * Fault in a lazily-faulted vmalloc area before it can be used by
  * __santizer_cov_trace_pc(), to avoid recursion issues if any code on the
@@ -340,14 +525,35 @@ static void kcov_fault_in_area(struct kcov *kcov)
 		READ_ONCE(area[offset]);
 }
 
+static inline bool kcov_check_handle(u64 handle, bool common_valid,
+				bool uncommon_valid, bool zero_valid)
+{
+	if (handle & ~(KCOV_SUBSYSTEM_MASK | KCOV_INSTANCE_MASK))
+		return false;
+	switch (handle & KCOV_SUBSYSTEM_MASK) {
+	case KCOV_SUBSYSTEM_COMMON:
+		return (handle & KCOV_INSTANCE_MASK) ?
+			common_valid : zero_valid;
+	case KCOV_SUBSYSTEM_USB:
+		return uncommon_valid;
+	default:
+		return false;
+	}
+	return false;
+}
+
 static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd,
 			     unsigned long arg)
 {
 	struct task_struct *t;
 	unsigned long size, unused;
+	int mode, i;
+	struct kcov_remote_arg *remote_arg;
+	struct kcov_remote *remote;
 
 	switch (cmd) {
 	case KCOV_INIT_TRACE:
+		kcov_debug("KCOV_INIT_TRACE\n");
 		/*
 		 * Enable kcov in trace mode and setup buffer size.
 		 * Must happen before anything else.
@@ -366,6 +572,7 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd,
 		kcov->mode = KCOV_MODE_INIT;
 		return 0;
 	case KCOV_ENABLE:
+		kcov_debug("KCOV_ENABLE\n");
 		/*
 		 * Enable coverage for the current task.
 		 * At this point user must have been enabled trace mode,
@@ -378,29 +585,20 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd,
 		t = current;
 		if (kcov->t != NULL || t->kcov != NULL)
 			return -EBUSY;
-		if (arg == KCOV_TRACE_PC)
-			kcov->mode = KCOV_MODE_TRACE_PC;
-		else if (arg == KCOV_TRACE_CMP)
-#ifdef CONFIG_KCOV_ENABLE_COMPARISONS
-			kcov->mode = KCOV_MODE_TRACE_CMP;
-#else
-		return -ENOTSUPP;
-#endif
-		else
-			return -EINVAL;
+		mode = kcov_get_mode(arg);
+		if (mode < 0)
+			return mode;
 		kcov_fault_in_area(kcov);
-		/* Cache in task struct for performance. */
-		t->kcov_size = kcov->size;
-		t->kcov_area = kcov->area;
-		/* See comment in check_kcov_mode(). */
-		barrier();
-		WRITE_ONCE(t->kcov_mode, kcov->mode);
+		kcov->mode = mode;
+		kcov_start(t, kcov->size, kcov->area, kcov->mode,
+				kcov->sequence);
 		t->kcov = kcov;
 		kcov->t = t;
-		/* This is put either in kcov_task_exit() or in KCOV_DISABLE. */
+		/* Put either in kcov_task_exit() or in KCOV_DISABLE. */
 		kcov_get(kcov);
 		return 0;
 	case KCOV_DISABLE:
+		kcov_debug("KCOV_DISABLE\n");
 		/* Disable coverage for the current task. */
 		unused = arg;
 		if (unused != 0 || current->kcov != kcov)
@@ -408,11 +606,65 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd,
 		t = current;
 		if (WARN_ON(kcov->t != t))
 			return -EINVAL;
-		kcov_task_init(t);
-		kcov->t = NULL;
-		kcov->mode = KCOV_MODE_INIT;
+		kcov_disable(t, kcov);
 		kcov_put(kcov);
 		return 0;
+	case KCOV_REMOTE_ENABLE:
+		kcov_debug("KCOV_REMOTE_ENABLE\n");
+		if (kcov->mode != KCOV_MODE_INIT || !kcov->area)
+			return -EINVAL;
+		t = current;
+		if (kcov->t != NULL || t->kcov != NULL)
+			return -EBUSY;
+		remote_arg = (struct kcov_remote_arg *)arg;
+		mode = kcov_get_mode(remote_arg->trace_mode);
+		if (mode < 0)
+			return mode;
+		if (remote_arg->area_size > LONG_MAX / sizeof(unsigned long))
+			return -EINVAL;
+		kcov->mode = mode;
+		t->kcov = kcov;
+		kcov->t = t;
+		kcov->remote = true;
+		kcov->remote_size = remote_arg->area_size;
+		spin_lock(&kcov_remote_lock);
+		for (i = 0; i < remote_arg->num_handles; i++) {
+			kcov_debug("handle %llx\n", remote_arg->handles[i]);
+			if (!kcov_check_handle(remote_arg->handles[i],
+						false, true, false)) {
+				spin_unlock(&kcov_remote_lock);
+				kcov_disable(t, kcov);
+				return -EINVAL;
+			}
+			remote = kcov_remote_add(kcov, remote_arg->handles[i]);
+			if (IS_ERR(remote)) {
+				spin_unlock(&kcov_remote_lock);
+				kcov_disable(t, kcov);
+				return PTR_ERR(remote);
+			}
+		}
+		if (remote_arg->common_handle) {
+			kcov_debug("common handle %llx\n",
+					remote_arg->common_handle);
+			if (!kcov_check_handle(remote_arg->common_handle,
+						true, false, false)) {
+				spin_unlock(&kcov_remote_lock);
+				kcov_disable(t, kcov);
+				return -EINVAL;
+			}
+			remote = kcov_remote_add(kcov,
+					remote_arg->common_handle);
+			if (IS_ERR(remote)) {
+				spin_unlock(&kcov_remote_lock);
+				kcov_disable(t, kcov);
+				return PTR_ERR(remote);
+			}
+			t->kcov_handle = remote_arg->common_handle;
+		}
+		spin_unlock(&kcov_remote_lock);
+		/* Put either in kcov_task_exit() or in KCOV_DISABLE. */
+		kcov_get(kcov);
+		return 0;
 	default:
 		return -ENOTTY;
 	}
@@ -422,11 +674,35 @@ static long kcov_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
 {
 	struct kcov *kcov;
 	int res;
+	struct kcov_remote_arg *remote_arg = NULL;
+	unsigned int remote_num_handles;
+	unsigned long remote_arg_size;
+
+	if (cmd == KCOV_REMOTE_ENABLE) {
+		if (get_user(remote_num_handles, (unsigned __user *)(arg +
+				offsetof(struct kcov_remote_arg, num_handles))))
+			return -EFAULT;
+		if (remote_num_handles > KCOV_REMOTE_MAX_HANDLES)
+			return -EINVAL;
+		remote_arg_size = struct_size(remote_arg, handles,
+					remote_num_handles);
+		remote_arg = memdup_user((void __user *)arg, remote_arg_size);
+		if (IS_ERR(remote_arg))
+			return PTR_ERR(remote_arg);
+		if (remote_arg->num_handles != remote_num_handles) {
+			kfree(remote_arg);
+			return -EINVAL;
+		}
+		arg = (unsigned long)remote_arg;
+	}
 
 	kcov = filep->private_data;
 	spin_lock(&kcov->lock);
 	res = kcov_ioctl_locked(kcov, cmd, arg);
 	spin_unlock(&kcov->lock);
+
+	kfree(remote_arg);
+
 	return res;
 }
 
@@ -438,6 +714,207 @@ static const struct file_operations kcov_fops = {
 	.release        = kcov_close,
 };
 
+/*
+ * kcov_remote_start() and kcov_remote_stop() can be used to annotate a section
+ * of code in a kernel background thread to allow kcov to be used to collect
+ * coverage from that part of code.
+ *
+ * The handle argument of kcov_remote_start() identifies a code section that is
+ * used for coverage collection. A userspace process passes this handle to
+ * KCOV_REMOTE_ENABLE ioctl to make the used kcov device start collecting
+ * coverage for the code section identified by this handle.
+ *
+ * The usage of these annotations in the kernel code is different depending on
+ * the type of the kernel thread whose code is being annotated.
+ *
+ * For global kernel threads that are spawned in a limited number of instances
+ * (e.g. one USB hub_event() worker thread is spawned per USB HCD), each
+ * instance must be assigned a unique 4-byte instance id. The instance id is
+ * then combined with a 1-byte subsystem id to get a handle via
+ * kcov_remote_handle(subsystem_id, instance_id).
+ *
+ * For local kernel threads that are spawned from system calls handler when a
+ * user interacts with some kernel interface (e.g. vhost workers), a handle is
+ * passed from a userspace process as the common_handle field of the
+ * kcov_remote_arg struct (note, that the user must generate a handle by using
+ * kcov_remote_handle() with KCOV_SUBSYSTEM_COMMON as the subsystem id and an
+ * arbitrary 4-byte non-zero number as the instance id). This common handle
+ * then gets saved into the task_struct of the process that issued the
+ * KCOV_REMOTE_ENABLE ioctl. When this proccess issues system calls that spawn
+ * kernel threads, the common handle must be retrived via kcov_common_handle()
+ * and passed to the spawned threads via custom annotations. Those kernel
+ * threads must in turn be annotated with kcov_remote_start(common_handle) and
+ * kcov_remote_stop(). All of the threads that are spawned by the same process
+ * obtain the same handle, hence the name "common".
+ *
+ * See Documentation/dev-tools/kcov.rst for more details.
+ *
+ * Internally, this function looks up the kcov device associated with the
+ * provided handle, allocates an area for coverage collection, and saves the
+ * pointers to kcov and area into the current task_struct to allow coverage to
+ * be collected via __sanitizer_cov_trace_pc()
+ * In turns kcov_remote_stop() clears those pointers from task_struct to stop
+ * collecting coverage and copies all collected coverage into the kcov area.
+ */
+void kcov_remote_start(u64 handle)
+{
+	struct kcov_remote *remote;
+	void *area;
+	struct task_struct *t;
+	unsigned int size;
+	enum kcov_mode mode;
+	int sequence;
+
+	if (WARN_ON(!kcov_check_handle(handle, true, true, true)))
+		return;
+	if (WARN_ON(!in_task()))
+		return;
+	t = current;
+	/*
+	 * Check that kcov_remote_start is not called twice
+	 * nor called by user tasks (with enabled kcov).
+	 */
+	if (WARN_ON(t->kcov))
+		return;
+
+	kcov_debug("handle = %llx\n", handle);
+
+	spin_lock(&kcov_remote_lock);
+	remote = kcov_remote_find(handle);
+	if (!remote) {
+		kcov_debug("no remote found");
+		spin_unlock(&kcov_remote_lock);
+		return;
+	}
+	/* Put in kcov_remote_stop(). */
+	kcov_get(remote->kcov);
+	t->kcov = remote->kcov;
+	/*
+	 * Read kcov fields before unlock to prevent races with
+	 * KCOV_DISABLE / kcov_remote_reset().
+	 */
+	size = remote->kcov->remote_size;
+	mode = remote->kcov->mode;
+	sequence = remote->kcov->sequence;
+	area = kcov_remote_area_get(size);
+	spin_unlock(&kcov_remote_lock);
+
+	if (!area) {
+		area = vmalloc(size * sizeof(unsigned long));
+		if (!area) {
+			t->kcov = NULL;
+			kcov_put(remote->kcov);
+			return;
+		}
+	}
+	/* Reset coverage size. */
+	*(u64 *)area = 0;
+
+	kcov_debug("area = %px, size = %u", area, size);
+
+	kcov_start(t, size, area, mode, sequence);
+
+}
+EXPORT_SYMBOL(kcov_remote_start);
+
+static void kcov_move_area(enum kcov_mode mode, void *dst_area,
+				unsigned int dst_area_size, void *src_area)
+{
+	u64 word_size = sizeof(unsigned long);
+	u64 count_size, entry_size_log;
+	u64 dst_len, src_len;
+	void *dst_entries, *src_entries;
+	u64 dst_occupied, dst_free, bytes_to_move, entries_moved;
+
+	kcov_debug("%px %u <= %px %lu\n",
+		dst_area, dst_area_size, src_area, *(unsigned long *)src_area);
+
+	switch (mode) {
+	case KCOV_MODE_TRACE_PC:
+		dst_len = READ_ONCE(*(unsigned long *)dst_area);
+		src_len = *(unsigned long *)src_area;
+		count_size = sizeof(unsigned long);
+		entry_size_log = __ilog2_u64(sizeof(unsigned long));
+		break;
+	case KCOV_MODE_TRACE_CMP:
+		dst_len = READ_ONCE(*(u64 *)dst_area);
+		src_len = *(u64 *)src_area;
+		count_size = sizeof(u64);
+		BUILD_BUG_ON(!is_power_of_2(KCOV_WORDS_PER_CMP));
+		entry_size_log = __ilog2_u64(sizeof(u64) * KCOV_WORDS_PER_CMP);
+		break;
+	default:
+		WARN_ON(1);
+		return;
+	}
+
+	/* As arm can't divide u64 integers use log of entry size. */
+	if (dst_len > ((dst_area_size * word_size - count_size) >>
+				entry_size_log))
+		return;
+	dst_occupied = count_size + (dst_len << entry_size_log);
+	dst_free = dst_area_size * word_size - dst_occupied;
+	bytes_to_move = min(dst_free, src_len << entry_size_log);
+	dst_entries = dst_area + dst_occupied;
+	src_entries = src_area + count_size;
+	memcpy(dst_entries, src_entries, bytes_to_move);
+	entries_moved = bytes_to_move >> entry_size_log;
+
+	switch (mode) {
+	case KCOV_MODE_TRACE_PC:
+		WRITE_ONCE(*(unsigned long *)dst_area, dst_len + entries_moved);
+		break;
+	case KCOV_MODE_TRACE_CMP:
+		WRITE_ONCE(*(u64 *)dst_area, dst_len + entries_moved);
+		break;
+	default:
+		break;
+	}
+}
+
+/* See the comment before kcov_remote_start() for usage details. */
+void kcov_remote_stop(void)
+{
+	struct task_struct *t = current;
+	struct kcov *kcov = t->kcov;
+	void *area = t->kcov_area;
+	unsigned int size = t->kcov_size;
+	int sequence = t->kcov_sequence;
+
+	if (!kcov) {
+		kcov_debug("no kcov found\n");
+		return;
+	}
+
+	kcov_stop(t);
+	t->kcov = NULL;
+
+	spin_lock(&kcov->lock);
+	/*
+	 * KCOV_DISABLE could have been called between kcov_remote_start()
+	 * and kcov_remote_stop(), hence the check.
+	 */
+	kcov_debug("move if: %d == %d && %d\n",
+		sequence, kcov->sequence, (int)kcov->remote);
+	if (sequence == kcov->sequence && kcov->remote)
+		kcov_move_area(kcov->mode, kcov->area, kcov->size, area);
+	spin_unlock(&kcov->lock);
+
+	spin_lock(&kcov_remote_lock);
+	kcov_remote_area_put(area, size);
+	spin_unlock(&kcov_remote_lock);
+
+	kcov_put(kcov);
+}
+EXPORT_SYMBOL(kcov_remote_stop);
+
+/* See the comment before kcov_remote_start() for usage details. */
+u64 kcov_common_handle(void)
+{
+	return current->kcov_handle;
+}
+EXPORT_SYMBOL(kcov_common_handle);
+
 static int __init kcov_init(void)
 {
 	/*
-- 
cgit v1.2.3-59-g8ed1b


From 95d23dc27bde0ab4b25f7ade5e2fddc08dd97d9b Mon Sep 17 00:00:00 2001
From: Andrey Konovalov <andreyknvl@google.com>
Date: Wed, 4 Dec 2019 16:52:47 -0800
Subject: usb, kcov: collect coverage from hub_event

Add kcov_remote_start()/kcov_remote_stop() annotations to the
hub_event() function, which is responsible for processing events on USB
buses, in particular events that happen during USB device enumeration.

Since hub_event() is run in a global background kernel thread (see
Documentation/dev-tools/kcov.rst for details), each USB bus gets a
unique global handle from the USB subsystem kcov handle range.  As the
result kcov can now be used to collect coverage from events that happen
on a particular USB bus.

[akpm@linux-foundation.org: avoid patch conflicts to make life easier for Andrew]
Link: http://lkml.kernel.org/r/de4fe1c219db2d002d905dc1736e2a3bfa1db997.1572366574.git.andreyknvl@google.com
Signed-off-by: Andrey Konovalov <andreyknvl@google.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Alexander Potapenko <glider@google.com>
Cc: Anders Roxell <anders.roxell@linaro.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: David Windsor <dwindsor@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Elena Reshetova <elena.reshetova@intel.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Marco Elver <elver@google.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/usb/core/hub.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 1709895387b9..f229ad6952c0 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -18,6 +18,7 @@
 #include <linux/sched/mm.h>
 #include <linux/list.h>
 #include <linux/slab.h>
+#include <linux/kcov.h>
 #include <linux/ioctl.h>
 #include <linux/usb.h>
 #include <linux/usbdevice_fs.h>
@@ -5484,6 +5485,8 @@ static void hub_event(struct work_struct *work)
 	hub_dev = hub->intfdev;
 	intf = to_usb_interface(hub_dev);
 
+	kcov_remote_start_usb((u64)hdev->bus->busnum);
+
 	dev_dbg(hub_dev, "state %d ports %d chg %04x evt %04x\n",
 			hdev->state, hdev->maxchild,
 			/* NOTE: expects max 15 ports... */
@@ -5590,6 +5593,8 @@ out_hdev_lock:
 	/* Balance the stuff in kick_hub_wq() and allow autosuspend */
 	usb_autopm_put_interface(intf);
 	kref_put(&hub->kref, hub_release);
+
+	kcov_remote_stop();
 }
 
 static const struct usb_device_id hub_id_table[] = {
-- 
cgit v1.2.3-59-g8ed1b


From 8f6a7f96dc29cefe16ab60f06f9c3a43510b96fd Mon Sep 17 00:00:00 2001
From: Andrey Konovalov <andreyknvl@google.com>
Date: Wed, 4 Dec 2019 16:52:50 -0800
Subject: vhost, kcov: collect coverage from vhost_worker

Add kcov_remote_start()/kcov_remote_stop() annotations to the
vhost_worker() function, which is responsible for processing vhost
works.

Since vhost_worker() threads are spawned per vhost device instance the
common kcov handle is used for kcov_remote_start()/stop() annotations
(see Documentation/dev-tools/kcov.rst for details).  As the result kcov
can now be used to collect coverage from vhost worker threads.

Link: http://lkml.kernel.org/r/e49d5d154e5da6c9ada521d2b7ce10a49ce9f98b.1572366574.git.andreyknvl@google.com
Signed-off-by: Andrey Konovalov <andreyknvl@google.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Alexander Potapenko <glider@google.com>
Cc: Anders Roxell <anders.roxell@linaro.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: David Windsor <dwindsor@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Elena Reshetova <elena.reshetova@intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Marco Elver <elver@google.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/vhost/vhost.c | 6 ++++++
 drivers/vhost/vhost.h | 1 +
 2 files changed, 7 insertions(+)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 36ca2cf419bf..f44340b41494 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -30,6 +30,7 @@
 #include <linux/sched/signal.h>
 #include <linux/interval_tree_generic.h>
 #include <linux/nospec.h>
+#include <linux/kcov.h>
 
 #include "vhost.h"
 
@@ -357,7 +358,9 @@ static int vhost_worker(void *data)
 		llist_for_each_entry_safe(work, work_next, node, node) {
 			clear_bit(VHOST_WORK_QUEUED, &work->flags);
 			__set_current_state(TASK_RUNNING);
+			kcov_remote_start_common(dev->kcov_handle);
 			work->fn(work);
+			kcov_remote_stop();
 			if (need_resched())
 				schedule();
 		}
@@ -546,6 +549,7 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
 
 	/* No owner, become one */
 	dev->mm = get_task_mm(current);
+	dev->kcov_handle = kcov_common_handle();
 	worker = kthread_create(vhost_worker, dev, "vhost-%d", current->pid);
 	if (IS_ERR(worker)) {
 		err = PTR_ERR(worker);
@@ -571,6 +575,7 @@ err_worker:
 	if (dev->mm)
 		mmput(dev->mm);
 	dev->mm = NULL;
+	dev->kcov_handle = 0;
 err_mm:
 	return err;
 }
@@ -682,6 +687,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
 	if (dev->worker) {
 		kthread_stop(dev->worker);
 		dev->worker = NULL;
+		dev->kcov_handle = 0;
 	}
 	if (dev->mm)
 		mmput(dev->mm);
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index e9ed2722b633..a123fd70847e 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -173,6 +173,7 @@ struct vhost_dev {
 	int iov_limit;
 	int weight;
 	int byte_weight;
+	u64 kcov_handle;
 };
 
 bool vhost_exceeds_weight(struct vhost_virtqueue *vq, int pkts, int total_len);
-- 
cgit v1.2.3-59-g8ed1b


From ce5c31db3645b649a31044a4d8b6057f6c723702 Mon Sep 17 00:00:00 2001
From: Julien Grall <julien.grall@arm.com>
Date: Wed, 4 Dec 2019 16:52:53 -0800
Subject: lib/ubsan: don't serialize UBSAN report

At the moment, UBSAN report will be serialized using a spin_lock().  On
RT-systems, spinlocks are turned to rt_spin_lock and may sleep.  This
will result to the following splat if the undefined behavior is in a
context that can sleep:

  BUG: sleeping function called from invalid context at /src/linux/kernel/locking/rtmutex.c:968
  in_atomic(): 1, irqs_disabled(): 128, pid: 3447, name: make
  1 lock held by make/3447:
   #0: 000000009a966332 (&mm->mmap_sem){++++}, at: do_page_fault+0x140/0x4f8
  irq event stamp: 6284
  hardirqs last  enabled at (6283): [<ffff000011326520>] _raw_spin_unlock_irqrestore+0x90/0xa0
  hardirqs last disabled at (6284): [<ffff0000113262b0>] _raw_spin_lock_irqsave+0x30/0x78
  softirqs last  enabled at (2430): [<ffff000010088ef8>] fpsimd_restore_current_state+0x60/0xe8
  softirqs last disabled at (2427): [<ffff000010088ec0>] fpsimd_restore_current_state+0x28/0xe8
  Preemption disabled at:
  [<ffff000011324a4c>] rt_mutex_futex_unlock+0x4c/0xb0
  CPU: 3 PID: 3447 Comm: make Tainted: G        W         5.2.14-rt7-01890-ge6e057589653 #911
  Call trace:
    dump_backtrace+0x0/0x148
    show_stack+0x14/0x20
    dump_stack+0xbc/0x104
    ___might_sleep+0x154/0x210
    rt_spin_lock+0x68/0xa0
    ubsan_prologue+0x30/0x68
    handle_overflow+0x64/0xe0
    __ubsan_handle_add_overflow+0x10/0x18
    __lock_acquire+0x1c28/0x2a28
    lock_acquire+0xf0/0x370
    _raw_spin_lock_irqsave+0x58/0x78
    rt_mutex_futex_unlock+0x4c/0xb0
    rt_spin_unlock+0x28/0x70
    get_page_from_freelist+0x428/0x2b60
    __alloc_pages_nodemask+0x174/0x1708
    alloc_pages_vma+0x1ac/0x238
    __handle_mm_fault+0x4ac/0x10b0
    handle_mm_fault+0x1d8/0x3b0
    do_page_fault+0x1c8/0x4f8
    do_translation_fault+0xb8/0xe0
    do_mem_abort+0x3c/0x98
    el0_da+0x20/0x24

The spin_lock() will protect against multiple CPUs to output a report
together, I guess to prevent them from being interleaved.  However, they
can still interleave with other messages (and even splat from
__might_sleep).

So the lock usefulness seems pretty limited.  Rather than trying to
accomodate RT-system by switching to a raw_spin_lock(), the lock is now
completely dropped.

Link: http://lkml.kernel.org/r/20190920100835.14999-1-julien.grall@arm.com
Signed-off-by: Julien Grall <julien.grall@arm.com>
Reported-by: Andre Przywara <andre.przywara@arm.com>
Acked-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/ubsan.c | 64 ++++++++++++++++++++++---------------------------------------
 1 file changed, 23 insertions(+), 41 deletions(-)

diff --git a/lib/ubsan.c b/lib/ubsan.c
index fc552d524ef7..7b9b58aee72c 100644
--- a/lib/ubsan.c
+++ b/lib/ubsan.c
@@ -140,25 +140,21 @@ static void val_to_string(char *str, size_t size, struct type_descriptor *type,
 	}
 }
 
-static DEFINE_SPINLOCK(report_lock);
-
-static void ubsan_prologue(struct source_location *location,
-			unsigned long *flags)
+static void ubsan_prologue(struct source_location *location)
 {
 	current->in_ubsan++;
-	spin_lock_irqsave(&report_lock, *flags);
 
 	pr_err("========================================"
 		"========================================\n");
 	print_source_location("UBSAN: Undefined behaviour in", location);
 }
 
-static void ubsan_epilogue(unsigned long *flags)
+static void ubsan_epilogue(void)
 {
 	dump_stack();
 	pr_err("========================================"
 		"========================================\n");
-	spin_unlock_irqrestore(&report_lock, *flags);
+
 	current->in_ubsan--;
 }
 
@@ -167,14 +163,13 @@ static void handle_overflow(struct overflow_data *data, void *lhs,
 {
 
 	struct type_descriptor *type = data->type;
-	unsigned long flags;
 	char lhs_val_str[VALUE_LENGTH];
 	char rhs_val_str[VALUE_LENGTH];
 
 	if (suppress_report(&data->location))
 		return;
 
-	ubsan_prologue(&data->location, &flags);
+	ubsan_prologue(&data->location);
 
 	val_to_string(lhs_val_str, sizeof(lhs_val_str), type, lhs);
 	val_to_string(rhs_val_str, sizeof(rhs_val_str), type, rhs);
@@ -186,7 +181,7 @@ static void handle_overflow(struct overflow_data *data, void *lhs,
 		rhs_val_str,
 		type->type_name);
 
-	ubsan_epilogue(&flags);
+	ubsan_epilogue();
 }
 
 void __ubsan_handle_add_overflow(struct overflow_data *data,
@@ -214,20 +209,19 @@ EXPORT_SYMBOL(__ubsan_handle_mul_overflow);
 void __ubsan_handle_negate_overflow(struct overflow_data *data,
 				void *old_val)
 {
-	unsigned long flags;
 	char old_val_str[VALUE_LENGTH];
 
 	if (suppress_report(&data->location))
 		return;
 
-	ubsan_prologue(&data->location, &flags);
+	ubsan_prologue(&data->location);
 
 	val_to_string(old_val_str, sizeof(old_val_str), data->type, old_val);
 
 	pr_err("negation of %s cannot be represented in type %s:\n",
 		old_val_str, data->type->type_name);
 
-	ubsan_epilogue(&flags);
+	ubsan_epilogue();
 }
 EXPORT_SYMBOL(__ubsan_handle_negate_overflow);
 
@@ -235,13 +229,12 @@ EXPORT_SYMBOL(__ubsan_handle_negate_overflow);
 void __ubsan_handle_divrem_overflow(struct overflow_data *data,
 				void *lhs, void *rhs)
 {
-	unsigned long flags;
 	char rhs_val_str[VALUE_LENGTH];
 
 	if (suppress_report(&data->location))
 		return;
 
-	ubsan_prologue(&data->location, &flags);
+	ubsan_prologue(&data->location);
 
 	val_to_string(rhs_val_str, sizeof(rhs_val_str), data->type, rhs);
 
@@ -251,58 +244,52 @@ void __ubsan_handle_divrem_overflow(struct overflow_data *data,
 	else
 		pr_err("division by zero\n");
 
-	ubsan_epilogue(&flags);
+	ubsan_epilogue();
 }
 EXPORT_SYMBOL(__ubsan_handle_divrem_overflow);
 
 static void handle_null_ptr_deref(struct type_mismatch_data_common *data)
 {
-	unsigned long flags;
-
 	if (suppress_report(data->location))
 		return;
 
-	ubsan_prologue(data->location, &flags);
+	ubsan_prologue(data->location);
 
 	pr_err("%s null pointer of type %s\n",
 		type_check_kinds[data->type_check_kind],
 		data->type->type_name);
 
-	ubsan_epilogue(&flags);
+	ubsan_epilogue();
 }
 
 static void handle_misaligned_access(struct type_mismatch_data_common *data,
 				unsigned long ptr)
 {
-	unsigned long flags;
-
 	if (suppress_report(data->location))
 		return;
 
-	ubsan_prologue(data->location, &flags);
+	ubsan_prologue(data->location);
 
 	pr_err("%s misaligned address %p for type %s\n",
 		type_check_kinds[data->type_check_kind],
 		(void *)ptr, data->type->type_name);
 	pr_err("which requires %ld byte alignment\n", data->alignment);
 
-	ubsan_epilogue(&flags);
+	ubsan_epilogue();
 }
 
 static void handle_object_size_mismatch(struct type_mismatch_data_common *data,
 					unsigned long ptr)
 {
-	unsigned long flags;
-
 	if (suppress_report(data->location))
 		return;
 
-	ubsan_prologue(data->location, &flags);
+	ubsan_prologue(data->location);
 	pr_err("%s address %p with insufficient space\n",
 		type_check_kinds[data->type_check_kind],
 		(void *) ptr);
 	pr_err("for an object of type %s\n", data->type->type_name);
-	ubsan_epilogue(&flags);
+	ubsan_epilogue();
 }
 
 static void ubsan_type_mismatch_common(struct type_mismatch_data_common *data,
@@ -351,25 +338,23 @@ EXPORT_SYMBOL(__ubsan_handle_type_mismatch_v1);
 
 void __ubsan_handle_out_of_bounds(struct out_of_bounds_data *data, void *index)
 {
-	unsigned long flags;
 	char index_str[VALUE_LENGTH];
 
 	if (suppress_report(&data->location))
 		return;
 
-	ubsan_prologue(&data->location, &flags);
+	ubsan_prologue(&data->location);
 
 	val_to_string(index_str, sizeof(index_str), data->index_type, index);
 	pr_err("index %s is out of range for type %s\n", index_str,
 		data->array_type->type_name);
-	ubsan_epilogue(&flags);
+	ubsan_epilogue();
 }
 EXPORT_SYMBOL(__ubsan_handle_out_of_bounds);
 
 void __ubsan_handle_shift_out_of_bounds(struct shift_out_of_bounds_data *data,
 					void *lhs, void *rhs)
 {
-	unsigned long flags;
 	struct type_descriptor *rhs_type = data->rhs_type;
 	struct type_descriptor *lhs_type = data->lhs_type;
 	char rhs_str[VALUE_LENGTH];
@@ -379,7 +364,7 @@ void __ubsan_handle_shift_out_of_bounds(struct shift_out_of_bounds_data *data,
 	if (suppress_report(&data->location))
 		goto out;
 
-	ubsan_prologue(&data->location, &flags);
+	ubsan_prologue(&data->location);
 
 	val_to_string(rhs_str, sizeof(rhs_str), rhs_type, rhs);
 	val_to_string(lhs_str, sizeof(lhs_str), lhs_type, lhs);
@@ -402,7 +387,7 @@ void __ubsan_handle_shift_out_of_bounds(struct shift_out_of_bounds_data *data,
 			lhs_str, rhs_str,
 			lhs_type->type_name);
 
-	ubsan_epilogue(&flags);
+	ubsan_epilogue();
 out:
 	user_access_restore(ua_flags);
 }
@@ -411,11 +396,9 @@ EXPORT_SYMBOL(__ubsan_handle_shift_out_of_bounds);
 
 void __ubsan_handle_builtin_unreachable(struct unreachable_data *data)
 {
-	unsigned long flags;
-
-	ubsan_prologue(&data->location, &flags);
+	ubsan_prologue(&data->location);
 	pr_err("calling __builtin_unreachable()\n");
-	ubsan_epilogue(&flags);
+	ubsan_epilogue();
 	panic("can't return from __builtin_unreachable()");
 }
 EXPORT_SYMBOL(__ubsan_handle_builtin_unreachable);
@@ -423,19 +406,18 @@ EXPORT_SYMBOL(__ubsan_handle_builtin_unreachable);
 void __ubsan_handle_load_invalid_value(struct invalid_value_data *data,
 				void *val)
 {
-	unsigned long flags;
 	char val_str[VALUE_LENGTH];
 
 	if (suppress_report(&data->location))
 		return;
 
-	ubsan_prologue(&data->location, &flags);
+	ubsan_prologue(&data->location);
 
 	val_to_string(val_str, sizeof(val_str), data->type, val);
 
 	pr_err("load of value %s is not a valid value for type %s\n",
 		val_str, data->type->type_name);
 
-	ubsan_epilogue(&flags);
+	ubsan_epilogue();
 }
 EXPORT_SYMBOL(__ubsan_handle_load_invalid_value);
-- 
cgit v1.2.3-59-g8ed1b


From 5b009673594d569674a9e0e60109f6a1723075b0 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Wed, 4 Dec 2019 16:52:57 -0800
Subject: arch: ipcbuf.h: make uapi asm/ipcbuf.h self-contained

Userspace cannot compile <asm/ipcbuf.h> due to some missing type
definitions.  For example, building it for x86 fails as follows:

    CC      usr/include/asm/ipcbuf.h.s
  In file included from usr/include/asm/ipcbuf.h:1:0,
                   from <command-line>:32:
  usr/include/asm-generic/ipcbuf.h:21:2: error: unknown type name `__kernel_key_t'
    __kernel_key_t  key;
    ^~~~~~~~~~~~~~
  usr/include/asm-generic/ipcbuf.h:22:2: error: unknown type name `__kernel_uid32_t'
    __kernel_uid32_t uid;
    ^~~~~~~~~~~~~~~~
  usr/include/asm-generic/ipcbuf.h:23:2: error: unknown type name `__kernel_gid32_t'
    __kernel_gid32_t gid;
    ^~~~~~~~~~~~~~~~
  usr/include/asm-generic/ipcbuf.h:24:2: error: unknown type name `__kernel_uid32_t'
    __kernel_uid32_t cuid;
    ^~~~~~~~~~~~~~~~
  usr/include/asm-generic/ipcbuf.h:25:2: error: unknown type name `__kernel_gid32_t'
    __kernel_gid32_t cgid;
    ^~~~~~~~~~~~~~~~
  usr/include/asm-generic/ipcbuf.h:26:2: error: unknown type name `__kernel_mode_t'
    __kernel_mode_t  mode;
    ^~~~~~~~~~~~~~~
  usr/include/asm-generic/ipcbuf.h:28:35: error: `__kernel_mode_t' undeclared here (not in a function)
    unsigned char  __pad1[4 - sizeof(__kernel_mode_t)];
                                     ^~~~~~~~~~~~~~~
  usr/include/asm-generic/ipcbuf.h:31:2: error: unknown type name `__kernel_ulong_t'
    __kernel_ulong_t __unused1;
    ^~~~~~~~~~~~~~~~
  usr/include/asm-generic/ipcbuf.h:32:2: error: unknown type name `__kernel_ulong_t'
    __kernel_ulong_t __unused2;
    ^~~~~~~~~~~~~~~~

It is just a matter of missing include directive.

Include <linux/posix_types.h> to make it self-contained, and add it to
the compile-test coverage.

Link: http://lkml.kernel.org/r/20191030063855.9989-1-yamada.masahiro@socionext.com
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/s390/include/uapi/asm/ipcbuf.h   | 2 ++
 arch/sparc/include/uapi/asm/ipcbuf.h  | 2 ++
 arch/xtensa/include/uapi/asm/ipcbuf.h | 2 ++
 include/uapi/asm-generic/ipcbuf.h     | 2 ++
 usr/include/Makefile                  | 1 -
 5 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/s390/include/uapi/asm/ipcbuf.h b/arch/s390/include/uapi/asm/ipcbuf.h
index 5b1c4f47c656..1030cd186899 100644
--- a/arch/s390/include/uapi/asm/ipcbuf.h
+++ b/arch/s390/include/uapi/asm/ipcbuf.h
@@ -2,6 +2,8 @@
 #ifndef __S390_IPCBUF_H__
 #define __S390_IPCBUF_H__
 
+#include <linux/posix_types.h>
+
 /*
  * The user_ipc_perm structure for S/390 architecture.
  * Note extra padding because this structure is passed back and forth
diff --git a/arch/sparc/include/uapi/asm/ipcbuf.h b/arch/sparc/include/uapi/asm/ipcbuf.h
index 9d0d125500e2..5b933a598a33 100644
--- a/arch/sparc/include/uapi/asm/ipcbuf.h
+++ b/arch/sparc/include/uapi/asm/ipcbuf.h
@@ -2,6 +2,8 @@
 #ifndef __SPARC_IPCBUF_H
 #define __SPARC_IPCBUF_H
 
+#include <linux/posix_types.h>
+
 /*
  * The ipc64_perm structure for sparc/sparc64 architecture.
  * Note extra padding because this structure is passed back and forth
diff --git a/arch/xtensa/include/uapi/asm/ipcbuf.h b/arch/xtensa/include/uapi/asm/ipcbuf.h
index a57afa0b606f..3bd0642f6660 100644
--- a/arch/xtensa/include/uapi/asm/ipcbuf.h
+++ b/arch/xtensa/include/uapi/asm/ipcbuf.h
@@ -12,6 +12,8 @@
 #ifndef _XTENSA_IPCBUF_H
 #define _XTENSA_IPCBUF_H
 
+#include <linux/posix_types.h>
+
 /*
  * Pad space is left for:
  * - 32-bit mode_t and seq
diff --git a/include/uapi/asm-generic/ipcbuf.h b/include/uapi/asm-generic/ipcbuf.h
index 7d80dbd336fb..41a01b494fc7 100644
--- a/include/uapi/asm-generic/ipcbuf.h
+++ b/include/uapi/asm-generic/ipcbuf.h
@@ -2,6 +2,8 @@
 #ifndef __ASM_GENERIC_IPCBUF_H
 #define __ASM_GENERIC_IPCBUF_H
 
+#include <linux/posix_types.h>
+
 /*
  * The generic ipc64_perm structure:
  * Note extra padding because this structure is passed back and forth
diff --git a/usr/include/Makefile b/usr/include/Makefile
index 5acd9bb6d714..00cc763ec488 100644
--- a/usr/include/Makefile
+++ b/usr/include/Makefile
@@ -16,7 +16,6 @@ override c_flags = $(UAPI_CFLAGS) -Wp,-MD,$(depfile) -I$(objtree)/usr/include
 # Please consider to fix the header first.
 #
 # Sorted alphabetically.
-header-test- += asm/ipcbuf.h
 header-test- += asm/msgbuf.h
 header-test- += asm/sembuf.h
 header-test- += asm/shmbuf.h
-- 
cgit v1.2.3-59-g8ed1b


From 9ef0e004181956e158fb7ceb9b43810a193f80cd Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Wed, 4 Dec 2019 16:53:00 -0800
Subject: arch: msgbuf.h: make uapi asm/msgbuf.h self-contained

Userspace cannot compile <asm/msgbuf.h> due to some missing type
definitions.  For example, building it for x86 fails as follows:

    CC      usr/include/asm/msgbuf.h.s
  In file included from usr/include/asm/msgbuf.h:6:0,
                   from <command-line>:32:
  usr/include/asm-generic/msgbuf.h:25:20: error: field `msg_perm' has incomplete type
    struct ipc64_perm msg_perm;
                      ^~~~~~~~
  usr/include/asm-generic/msgbuf.h:27:2: error: unknown type name `__kernel_time_t'
    __kernel_time_t msg_stime; /* last msgsnd time */
    ^~~~~~~~~~~~~~~
  usr/include/asm-generic/msgbuf.h:28:2: error: unknown type name `__kernel_time_t'
    __kernel_time_t msg_rtime; /* last msgrcv time */
    ^~~~~~~~~~~~~~~
  usr/include/asm-generic/msgbuf.h:29:2: error: unknown type name `__kernel_time_t'
    __kernel_time_t msg_ctime; /* last change time */
    ^~~~~~~~~~~~~~~
  usr/include/asm-generic/msgbuf.h:41:2: error: unknown type name `__kernel_pid_t'
    __kernel_pid_t msg_lspid; /* pid of last msgsnd */
    ^~~~~~~~~~~~~~
  usr/include/asm-generic/msgbuf.h:42:2: error: unknown type name `__kernel_pid_t'
    __kernel_pid_t msg_lrpid; /* last receive pid */
    ^~~~~~~~~~~~~~

It is just a matter of missing include directive.

Include <asm/ipcbuf.h> to make it self-contained, and add it to
the compile-test coverage.

Link: http://lkml.kernel.org/r/20191030063855.9989-2-yamada.masahiro@socionext.com
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/mips/include/uapi/asm/msgbuf.h    | 1 +
 arch/parisc/include/uapi/asm/msgbuf.h  | 1 +
 arch/powerpc/include/uapi/asm/msgbuf.h | 2 ++
 arch/sparc/include/uapi/asm/msgbuf.h   | 2 ++
 arch/x86/include/uapi/asm/msgbuf.h     | 3 +++
 arch/xtensa/include/uapi/asm/msgbuf.h  | 2 ++
 include/uapi/asm-generic/msgbuf.h      | 2 ++
 usr/include/Makefile                   | 1 -
 8 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/arch/mips/include/uapi/asm/msgbuf.h b/arch/mips/include/uapi/asm/msgbuf.h
index 9e0c2e230274..128af72f2dfe 100644
--- a/arch/mips/include/uapi/asm/msgbuf.h
+++ b/arch/mips/include/uapi/asm/msgbuf.h
@@ -2,6 +2,7 @@
 #ifndef _ASM_MSGBUF_H
 #define _ASM_MSGBUF_H
 
+#include <asm/ipcbuf.h>
 
 /*
  * The msqid64_ds structure for the MIPS architecture.
diff --git a/arch/parisc/include/uapi/asm/msgbuf.h b/arch/parisc/include/uapi/asm/msgbuf.h
index 3b877335da38..3b4de5b668c3 100644
--- a/arch/parisc/include/uapi/asm/msgbuf.h
+++ b/arch/parisc/include/uapi/asm/msgbuf.h
@@ -3,6 +3,7 @@
 #define _PARISC_MSGBUF_H
 
 #include <asm/bitsperlong.h>
+#include <asm/ipcbuf.h>
 
 /* 
  * The msqid64_ds structure for parisc architecture, copied from sparc.
diff --git a/arch/powerpc/include/uapi/asm/msgbuf.h b/arch/powerpc/include/uapi/asm/msgbuf.h
index 969bd83e4d3d..7919b2ba41b5 100644
--- a/arch/powerpc/include/uapi/asm/msgbuf.h
+++ b/arch/powerpc/include/uapi/asm/msgbuf.h
@@ -2,6 +2,8 @@
 #ifndef _ASM_POWERPC_MSGBUF_H
 #define _ASM_POWERPC_MSGBUF_H
 
+#include <asm/ipcbuf.h>
+
 /*
  * The msqid64_ds structure for the PowerPC architecture.
  * Note extra padding because this structure is passed back and forth
diff --git a/arch/sparc/include/uapi/asm/msgbuf.h b/arch/sparc/include/uapi/asm/msgbuf.h
index eeeb91933280..0954552da188 100644
--- a/arch/sparc/include/uapi/asm/msgbuf.h
+++ b/arch/sparc/include/uapi/asm/msgbuf.h
@@ -2,6 +2,8 @@
 #ifndef _SPARC_MSGBUF_H
 #define _SPARC_MSGBUF_H
 
+#include <asm/ipcbuf.h>
+
 /*
  * The msqid64_ds structure for sparc64 architecture.
  * Note extra padding because this structure is passed back and forth
diff --git a/arch/x86/include/uapi/asm/msgbuf.h b/arch/x86/include/uapi/asm/msgbuf.h
index 7c5bb43ed8af..b3d0664fadc9 100644
--- a/arch/x86/include/uapi/asm/msgbuf.h
+++ b/arch/x86/include/uapi/asm/msgbuf.h
@@ -5,6 +5,9 @@
 #if !defined(__x86_64__) || !defined(__ILP32__)
 #include <asm-generic/msgbuf.h>
 #else
+
+#include <asm/ipcbuf.h>
+
 /*
  * The msqid64_ds structure for x86 architecture with x32 ABI.
  *
diff --git a/arch/xtensa/include/uapi/asm/msgbuf.h b/arch/xtensa/include/uapi/asm/msgbuf.h
index d6915e9f071c..1ed2c85b693a 100644
--- a/arch/xtensa/include/uapi/asm/msgbuf.h
+++ b/arch/xtensa/include/uapi/asm/msgbuf.h
@@ -17,6 +17,8 @@
 #ifndef _XTENSA_MSGBUF_H
 #define _XTENSA_MSGBUF_H
 
+#include <asm/ipcbuf.h>
+
 struct msqid64_ds {
 	struct ipc64_perm msg_perm;
 #ifdef __XTENSA_EB__
diff --git a/include/uapi/asm-generic/msgbuf.h b/include/uapi/asm-generic/msgbuf.h
index af95aa89012e..6504d7b741ce 100644
--- a/include/uapi/asm-generic/msgbuf.h
+++ b/include/uapi/asm-generic/msgbuf.h
@@ -3,6 +3,8 @@
 #define __ASM_GENERIC_MSGBUF_H
 
 #include <asm/bitsperlong.h>
+#include <asm/ipcbuf.h>
+
 /*
  * generic msqid64_ds structure.
  *
diff --git a/usr/include/Makefile b/usr/include/Makefile
index 00cc763ec488..6bd22f83b0df 100644
--- a/usr/include/Makefile
+++ b/usr/include/Makefile
@@ -16,7 +16,6 @@ override c_flags = $(UAPI_CFLAGS) -Wp,-MD,$(depfile) -I$(objtree)/usr/include
 # Please consider to fix the header first.
 #
 # Sorted alphabetically.
-header-test- += asm/msgbuf.h
 header-test- += asm/sembuf.h
 header-test- += asm/shmbuf.h
 header-test- += asm/signal.h
-- 
cgit v1.2.3-59-g8ed1b


From 0fb9dc28679a627f84974165c8011e0630529ece Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Wed, 4 Dec 2019 16:53:03 -0800
Subject: arch: sembuf.h: make uapi asm/sembuf.h self-contained

Userspace cannot compile <asm/sembuf.h> due to some missing type
definitions.  For example, building it for x86 fails as follows:

    CC      usr/include/asm/sembuf.h.s
  In file included from <command-line>:32:0:
  usr/include/asm/sembuf.h:17:20: error: field `sem_perm' has incomplete type
    struct ipc64_perm sem_perm; /* permissions .. see ipc.h */
                      ^~~~~~~~
  usr/include/asm/sembuf.h:24:2: error: unknown type name `__kernel_time_t'
    __kernel_time_t sem_otime; /* last semop time */
    ^~~~~~~~~~~~~~~
  usr/include/asm/sembuf.h:25:2: error: unknown type name `__kernel_ulong_t'
    __kernel_ulong_t __unused1;
    ^~~~~~~~~~~~~~~~
  usr/include/asm/sembuf.h:26:2: error: unknown type name `__kernel_time_t'
    __kernel_time_t sem_ctime; /* last change time */
    ^~~~~~~~~~~~~~~
  usr/include/asm/sembuf.h:27:2: error: unknown type name `__kernel_ulong_t'
    __kernel_ulong_t __unused2;
    ^~~~~~~~~~~~~~~~
  usr/include/asm/sembuf.h:29:2: error: unknown type name `__kernel_ulong_t'
    __kernel_ulong_t sem_nsems; /* no. of semaphores in array */
    ^~~~~~~~~~~~~~~~
  usr/include/asm/sembuf.h:30:2: error: unknown type name `__kernel_ulong_t'
    __kernel_ulong_t __unused3;
    ^~~~~~~~~~~~~~~~
  usr/include/asm/sembuf.h:31:2: error: unknown type name `__kernel_ulong_t'
    __kernel_ulong_t __unused4;
    ^~~~~~~~~~~~~~~~

It is just a matter of missing include directive.

Include <asm/ipcbuf.h> to make it self-contained, and add it to
the compile-test coverage.

Link: http://lkml.kernel.org/r/20191030063855.9989-3-yamada.masahiro@socionext.com
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/mips/include/uapi/asm/sembuf.h    | 2 ++
 arch/parisc/include/uapi/asm/sembuf.h  | 1 +
 arch/powerpc/include/uapi/asm/sembuf.h | 2 ++
 arch/sparc/include/uapi/asm/sembuf.h   | 2 ++
 arch/x86/include/uapi/asm/sembuf.h     | 2 ++
 arch/xtensa/include/uapi/asm/sembuf.h  | 1 +
 include/uapi/asm-generic/sembuf.h      | 1 +
 usr/include/Makefile                   | 1 -
 8 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/arch/mips/include/uapi/asm/sembuf.h b/arch/mips/include/uapi/asm/sembuf.h
index 43e1b4a2f68a..ba7fe0c89e7d 100644
--- a/arch/mips/include/uapi/asm/sembuf.h
+++ b/arch/mips/include/uapi/asm/sembuf.h
@@ -2,6 +2,8 @@
 #ifndef _ASM_SEMBUF_H
 #define _ASM_SEMBUF_H
 
+#include <asm/ipcbuf.h>
+
 /*
  * The semid64_ds structure for the MIPS architecture.
  * Note extra padding because this structure is passed back and forth
diff --git a/arch/parisc/include/uapi/asm/sembuf.h b/arch/parisc/include/uapi/asm/sembuf.h
index 8241cf126018..e2ca4301c7fb 100644
--- a/arch/parisc/include/uapi/asm/sembuf.h
+++ b/arch/parisc/include/uapi/asm/sembuf.h
@@ -3,6 +3,7 @@
 #define _PARISC_SEMBUF_H
 
 #include <asm/bitsperlong.h>
+#include <asm/ipcbuf.h>
 
 /* 
  * The semid64_ds structure for parisc architecture.
diff --git a/arch/powerpc/include/uapi/asm/sembuf.h b/arch/powerpc/include/uapi/asm/sembuf.h
index 008ae77c6746..85e96ccb5f0f 100644
--- a/arch/powerpc/include/uapi/asm/sembuf.h
+++ b/arch/powerpc/include/uapi/asm/sembuf.h
@@ -2,6 +2,8 @@
 #ifndef _ASM_POWERPC_SEMBUF_H
 #define _ASM_POWERPC_SEMBUF_H
 
+#include <asm/ipcbuf.h>
+
 /*
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
diff --git a/arch/sparc/include/uapi/asm/sembuf.h b/arch/sparc/include/uapi/asm/sembuf.h
index cbcbaa4e7128..10621d066d79 100644
--- a/arch/sparc/include/uapi/asm/sembuf.h
+++ b/arch/sparc/include/uapi/asm/sembuf.h
@@ -2,6 +2,8 @@
 #ifndef _SPARC_SEMBUF_H
 #define _SPARC_SEMBUF_H
 
+#include <asm/ipcbuf.h>
+
 /*
  * The semid64_ds structure for sparc architecture.
  * Note extra padding because this structure is passed back and forth
diff --git a/arch/x86/include/uapi/asm/sembuf.h b/arch/x86/include/uapi/asm/sembuf.h
index 93030e97269a..71205b02a191 100644
--- a/arch/x86/include/uapi/asm/sembuf.h
+++ b/arch/x86/include/uapi/asm/sembuf.h
@@ -2,6 +2,8 @@
 #ifndef _ASM_X86_SEMBUF_H
 #define _ASM_X86_SEMBUF_H
 
+#include <asm/ipcbuf.h>
+
 /*
  * The semid64_ds structure for x86 architecture.
  * Note extra padding because this structure is passed back and forth
diff --git a/arch/xtensa/include/uapi/asm/sembuf.h b/arch/xtensa/include/uapi/asm/sembuf.h
index 09f348d643f1..3b9cdd406dfe 100644
--- a/arch/xtensa/include/uapi/asm/sembuf.h
+++ b/arch/xtensa/include/uapi/asm/sembuf.h
@@ -22,6 +22,7 @@
 #define _XTENSA_SEMBUF_H
 
 #include <asm/byteorder.h>
+#include <asm/ipcbuf.h>
 
 struct semid64_ds {
 	struct ipc64_perm sem_perm;		/* permissions .. see ipc.h */
diff --git a/include/uapi/asm-generic/sembuf.h b/include/uapi/asm-generic/sembuf.h
index 137606018c6a..0e709bd3d730 100644
--- a/include/uapi/asm-generic/sembuf.h
+++ b/include/uapi/asm-generic/sembuf.h
@@ -3,6 +3,7 @@
 #define __ASM_GENERIC_SEMBUF_H
 
 #include <asm/bitsperlong.h>
+#include <asm/ipcbuf.h>
 
 /*
  * The semid64_ds structure for x86 architecture.
diff --git a/usr/include/Makefile b/usr/include/Makefile
index 6bd22f83b0df..4a753a48767b 100644
--- a/usr/include/Makefile
+++ b/usr/include/Makefile
@@ -16,7 +16,6 @@ override c_flags = $(UAPI_CFLAGS) -Wp,-MD,$(depfile) -I$(objtree)/usr/include
 # Please consider to fix the header first.
 #
 # Sorted alphabetically.
-header-test- += asm/sembuf.h
 header-test- += asm/shmbuf.h
 header-test- += asm/signal.h
 header-test- += asm/ucontext.h
-- 
cgit v1.2.3-59-g8ed1b


From 17b6753ff08bc47f50da09f5185849172c598315 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 4 Dec 2019 16:53:06 -0800
Subject: lib/test_bitmap: force argument of bitmap_parselist_user() to proper
 address space

Patch series "gpio: pca953x: Convert to bitmap (extended) API", v2.

While converting gpio-pca953x driver to bitmap API, I noticed that we
have no function to replace bits.

So, that's how patch 7 appears.

First 6 patches are preparatory of the test suite (including some
warning fixes, etc).

Patches 8-9 are preparatory for the GPIO driver to be easier converted
to bitmap API, conversion to which happens in patch 10.

Patch 11 contains simple indentation fixes.

This patch (of 11):

Sparse complains:

  lib/test_bitmap.c:345:58: warning: incorrect type in argument 1 (different address spaces)
  lib/test_bitmap.c:345:58:    expected char const [noderef] <asn:1> *ubuf
  lib/test_bitmap.c:345:58:    got char const *const in

Force argument of bitmap_parselist_user() to proper address space.

Link: http://lkml.kernel.org/r/20191022172922.61232-2-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Cc: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Yury Norov <yury.norov@gmail.com>
Cc: William Breathitt Gray <vilhelm.gray@gmail.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
Cc: Marek Vasut <marek.vasut+renesas@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/test_bitmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index dc167c13eb39..09aa29a6b562 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -330,7 +330,7 @@ static void __init __test_bitmap_parselist(int is_user)
 
 			set_fs(KERNEL_DS);
 			time = ktime_get();
-			err = bitmap_parselist_user(ptest.in, len,
+			err = bitmap_parselist_user((__force const char __user *)ptest.in, len,
 						    bmap, ptest.nbits);
 			time = ktime_get() - time;
 			set_fs(orig_fs);
-- 
cgit v1.2.3-59-g8ed1b


From 54224044096e02d6df361333f5528940e3cd3f89 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 4 Dec 2019 16:53:09 -0800
Subject: lib/test_bitmap: undefine macros after use

There is no need to keep step and ptest macros defined in entire file.

Link: http://lkml.kernel.org/r/20191022172922.61232-3-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Cc: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Marek Vasut <marek.vasut+renesas@gmail.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
Cc: William Breathitt Gray <vilhelm.gray@gmail.com>
Cc: Yury Norov <yury.norov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/test_bitmap.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index 09aa29a6b562..d2fa94e45a46 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -311,6 +311,8 @@ static const struct test_bitmap_parselist parselist_tests[] __initconst = {
 	{-EINVAL, "a-31:10/1", NULL, 8, 0},
 	{-EINVAL, "0-31:a/1", NULL, 8, 0},
 	{-EINVAL, "0-\n", NULL, 8, 0},
+
+#undef step
 };
 
 static void __init __test_bitmap_parselist(int is_user)
@@ -357,6 +359,8 @@ static void __init __test_bitmap_parselist(int is_user)
 		if (ptest.flags & PARSE_TIME)
 			pr_err("parselist%s: %d: input is '%s' OK, Time: %llu\n",
 					mode, i, ptest.in, time);
+
+#undef ptest
 	}
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From a4881d1cbc6c36424bf11dec9d484b9d19b927ab Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 4 Dec 2019 16:53:13 -0800
Subject: lib/test_bitmap: name EXP_BYTES properly

EXP_BYTES has been wrongly named.  It's a size of the exp array in bits.

While here, go ahead and rename to EXP1_IN_BITS to avoid double renaming
when exp will be renamed to exp1 in the next patch

Link: http://lkml.kernel.org/r/20191022172922.61232-4-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Cc: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Marek Vasut <marek.vasut+renesas@gmail.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
Cc: William Breathitt Gray <vilhelm.gray@gmail.com>
Cc: Yury Norov <yury.norov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/test_bitmap.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index d2fa94e45a46..38f923411ada 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -374,17 +374,17 @@ static void __init test_bitmap_parselist_user(void)
 	__test_bitmap_parselist(1);
 }
 
-#define EXP_BYTES	(sizeof(exp) * 8)
+#define EXP1_IN_BITS	(sizeof(exp) * 8)
 
 static void __init test_bitmap_arr32(void)
 {
 	unsigned int nbits, next_bit;
-	u32 arr[sizeof(exp) / 4];
-	DECLARE_BITMAP(bmap2, EXP_BYTES);
+	u32 arr[EXP1_IN_BITS / 32];
+	DECLARE_BITMAP(bmap2, EXP1_IN_BITS);
 
 	memset(arr, 0xa5, sizeof(arr));
 
-	for (nbits = 0; nbits < EXP_BYTES; ++nbits) {
+	for (nbits = 0; nbits < EXP1_IN_BITS; ++nbits) {
 		bitmap_to_arr32(arr, exp, nbits);
 		bitmap_from_arr32(bmap2, arr, nbits);
 		expect_eq_bitmap(bmap2, exp, nbits);
@@ -396,7 +396,7 @@ static void __init test_bitmap_arr32(void)
 				" tail is not safely cleared: %d\n",
 				nbits, next_bit);
 
-		if (nbits < EXP_BYTES - 32)
+		if (nbits < EXP1_IN_BITS - 32)
 			expect_eq_uint(arr[DIV_ROUND_UP(nbits, 32)],
 								0xa5a5a5a5);
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 0ee312e38042c868be9eb973d6e639e6d67b84e2 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 4 Dec 2019 16:53:17 -0800
Subject: lib/test_bitmap: rename exp to exp1 to avoid ambiguous name

One function is using exp as local variable.  Avoid ambiguous naming by
rename global one to exp1.

Link: http://lkml.kernel.org/r/20191022172922.61232-5-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Cc: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Marek Vasut <marek.vasut+renesas@gmail.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
Cc: William Breathitt Gray <vilhelm.gray@gmail.com>
Cc: Yury Norov <yury.norov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/test_bitmap.c | 44 ++++++++++++++++++++++----------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index 38f923411ada..6b70166ac960 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -247,7 +247,7 @@ struct test_bitmap_parselist{
 	const int flags;
 };
 
-static const unsigned long exp[] __initconst = {
+static const unsigned long exp1[] __initconst = {
 	BITMAP_FROM_U64(1),
 	BITMAP_FROM_U64(2),
 	BITMAP_FROM_U64(0x0000ffff),
@@ -271,29 +271,29 @@ static const unsigned long exp2[] __initconst = {
 static const struct test_bitmap_parselist parselist_tests[] __initconst = {
 #define step (sizeof(u64) / sizeof(unsigned long))
 
-	{0, "0",			&exp[0], 8, 0},
-	{0, "1",			&exp[1 * step], 8, 0},
-	{0, "0-15",			&exp[2 * step], 32, 0},
-	{0, "16-31",			&exp[3 * step], 32, 0},
-	{0, "0-31:1/2",			&exp[4 * step], 32, 0},
-	{0, "1-31:1/2",			&exp[5 * step], 32, 0},
-	{0, "0-31:1/4",			&exp[6 * step], 32, 0},
-	{0, "1-31:1/4",			&exp[7 * step], 32, 0},
-	{0, "0-31:4/4",			&exp[8 * step], 32, 0},
-	{0, "1-31:4/4",			&exp[9 * step], 32, 0},
-	{0, "0-31:1/4,32-63:2/4",	&exp[10 * step], 64, 0},
-	{0, "0-31:3/4,32-63:4/4",	&exp[11 * step], 64, 0},
-	{0, "  ,,  0-31:3/4  ,, 32-63:4/4  ,,  ",	&exp[11 * step], 64, 0},
+	{0, "0",			&exp1[0], 8, 0},
+	{0, "1",			&exp1[1 * step], 8, 0},
+	{0, "0-15",			&exp1[2 * step], 32, 0},
+	{0, "16-31",			&exp1[3 * step], 32, 0},
+	{0, "0-31:1/2",			&exp1[4 * step], 32, 0},
+	{0, "1-31:1/2",			&exp1[5 * step], 32, 0},
+	{0, "0-31:1/4",			&exp1[6 * step], 32, 0},
+	{0, "1-31:1/4",			&exp1[7 * step], 32, 0},
+	{0, "0-31:4/4",			&exp1[8 * step], 32, 0},
+	{0, "1-31:4/4",			&exp1[9 * step], 32, 0},
+	{0, "0-31:1/4,32-63:2/4",	&exp1[10 * step], 64, 0},
+	{0, "0-31:3/4,32-63:4/4",	&exp1[11 * step], 64, 0},
+	{0, "  ,,  0-31:3/4  ,, 32-63:4/4  ,,  ",	&exp1[11 * step], 64, 0},
 
 	{0, "0-31:1/4,32-63:2/4,64-95:3/4,96-127:4/4",	exp2, 128, 0},
 
 	{0, "0-2047:128/256", NULL, 2048, PARSE_TIME},
 
-	{0, "",				&exp[12 * step], 8, 0},
-	{0, "\n",			&exp[12 * step], 8, 0},
-	{0, ",,  ,,  , ,  ,",		&exp[12 * step], 8, 0},
-	{0, " ,  ,,  , ,   ",		&exp[12 * step], 8, 0},
-	{0, " ,  ,,  , ,   \n",		&exp[12 * step], 8, 0},
+	{0, "",				&exp1[12 * step], 8, 0},
+	{0, "\n",			&exp1[12 * step], 8, 0},
+	{0, ",,  ,,  , ,  ,",		&exp1[12 * step], 8, 0},
+	{0, " ,  ,,  , ,   ",		&exp1[12 * step], 8, 0},
+	{0, " ,  ,,  , ,   \n",		&exp1[12 * step], 8, 0},
 
 	{-EINVAL, "-1",	NULL, 8, 0},
 	{-EINVAL, "-0",	NULL, 8, 0},
@@ -374,7 +374,7 @@ static void __init test_bitmap_parselist_user(void)
 	__test_bitmap_parselist(1);
 }
 
-#define EXP1_IN_BITS	(sizeof(exp) * 8)
+#define EXP1_IN_BITS	(sizeof(exp1) * 8)
 
 static void __init test_bitmap_arr32(void)
 {
@@ -385,9 +385,9 @@ static void __init test_bitmap_arr32(void)
 	memset(arr, 0xa5, sizeof(arr));
 
 	for (nbits = 0; nbits < EXP1_IN_BITS; ++nbits) {
-		bitmap_to_arr32(arr, exp, nbits);
+		bitmap_to_arr32(arr, exp1, nbits);
 		bitmap_from_arr32(bmap2, arr, nbits);
-		expect_eq_bitmap(bmap2, exp, nbits);
+		expect_eq_bitmap(bmap2, exp1, nbits);
 
 		next_bit = find_next_bit(bmap2,
 				round_up(nbits, BITS_PER_LONG), nbits);
-- 
cgit v1.2.3-59-g8ed1b


From c21dd8a7bb4b8b9cab5dc335a5fa3afa2df1a831 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 4 Dec 2019 16:53:20 -0800
Subject: lib/test_bitmap: move exp1 and exp2 upper for others to use

Some test cases may re-use predefined exp1 and exp2 bitmaps.  Move them
upper in the file.

Link: http://lkml.kernel.org/r/20191022172922.61232-6-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Cc: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Marek Vasut <marek.vasut+renesas@gmail.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
Cc: William Breathitt Gray <vilhelm.gray@gmail.com>
Cc: Yury Norov <yury.norov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/test_bitmap.c | 41 ++++++++++++++++++++---------------------
 1 file changed, 20 insertions(+), 21 deletions(-)

diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index 6b70166ac960..449d0882b488 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -21,6 +21,26 @@ static unsigned failed_tests __initdata;
 
 static char pbl_buffer[PAGE_SIZE] __initdata;
 
+static const unsigned long exp1[] __initconst = {
+	BITMAP_FROM_U64(1),
+	BITMAP_FROM_U64(2),
+	BITMAP_FROM_U64(0x0000ffff),
+	BITMAP_FROM_U64(0xffff0000),
+	BITMAP_FROM_U64(0x55555555),
+	BITMAP_FROM_U64(0xaaaaaaaa),
+	BITMAP_FROM_U64(0x11111111),
+	BITMAP_FROM_U64(0x22222222),
+	BITMAP_FROM_U64(0xffffffff),
+	BITMAP_FROM_U64(0xfffffffe),
+	BITMAP_FROM_U64(0x3333333311111111ULL),
+	BITMAP_FROM_U64(0xffffffff77777777ULL),
+	BITMAP_FROM_U64(0),
+};
+
+static const unsigned long exp2[] __initconst = {
+	BITMAP_FROM_U64(0x3333333311111111ULL),
+	BITMAP_FROM_U64(0xffffffff77777777ULL),
+};
 
 static bool __init
 __check_eq_uint(const char *srcfile, unsigned int line,
@@ -247,27 +267,6 @@ struct test_bitmap_parselist{
 	const int flags;
 };
 
-static const unsigned long exp1[] __initconst = {
-	BITMAP_FROM_U64(1),
-	BITMAP_FROM_U64(2),
-	BITMAP_FROM_U64(0x0000ffff),
-	BITMAP_FROM_U64(0xffff0000),
-	BITMAP_FROM_U64(0x55555555),
-	BITMAP_FROM_U64(0xaaaaaaaa),
-	BITMAP_FROM_U64(0x11111111),
-	BITMAP_FROM_U64(0x22222222),
-	BITMAP_FROM_U64(0xffffffff),
-	BITMAP_FROM_U64(0xfffffffe),
-	BITMAP_FROM_U64(0x3333333311111111ULL),
-	BITMAP_FROM_U64(0xffffffff77777777ULL),
-	BITMAP_FROM_U64(0),
-};
-
-static const unsigned long exp2[] __initconst = {
-	BITMAP_FROM_U64(0x3333333311111111ULL),
-	BITMAP_FROM_U64(0xffffffff77777777ULL)
-};
-
 static const struct test_bitmap_parselist parselist_tests[] __initconst = {
 #define step (sizeof(u64) / sizeof(unsigned long))
 
-- 
cgit v1.2.3-59-g8ed1b


From 780ff33b8bfac4f44fcc399a870d50ff2e74b33d Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 4 Dec 2019 16:53:23 -0800
Subject: lib/test_bitmap: fix comment about this file

This test case file is about bitmap API, and not printf() facility.

Link: http://lkml.kernel.org/r/20191022172922.61232-7-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Cc: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Marek Vasut <marek.vasut+renesas@gmail.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
Cc: William Breathitt Gray <vilhelm.gray@gmail.com>
Cc: Yury Norov <yury.norov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/test_bitmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index 449d0882b488..4544847cf81e 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Test cases for printf facility.
+ * Test cases for bitmap API.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-- 
cgit v1.2.3-59-g8ed1b


From 30544ed5de431fe25d3793e4dd5a058d877c4d77 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 4 Dec 2019 16:53:26 -0800
Subject: lib/bitmap: introduce bitmap_replace() helper

In some drivers we want to have a single operation over bitmap which is
an equivalent to:

	*dst = (*old & ~(*mask)) | (*new & *mask)

Introduce bitmap_replace() helper for this.

Link: http://lkml.kernel.org/r/20191022172922.61232-8-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Marek Vasut <marek.vasut+renesas@gmail.com>
Cc: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
Cc: William Breathitt Gray <vilhelm.gray@gmail.com>
Cc: Yury Norov <yury.norov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 16 ++++++++++++++++
 lib/bitmap.c           | 12 ++++++++++++
 lib/test_bitmap.c      | 38 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 66 insertions(+)

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 9f046609e809..ff335b22f23c 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -53,6 +53,7 @@
  *  bitmap_find_next_zero_area_off(buf, len, pos, n, mask)  as above
  *  bitmap_shift_right(dst, src, n, nbits)      *dst = *src >> n
  *  bitmap_shift_left(dst, src, n, nbits)       *dst = *src << n
+ *  bitmap_replace(dst, old, new, mask, nbits)  *dst = (*old & ~(*mask)) | (*new & *mask)
  *  bitmap_remap(dst, src, old, new, nbits)     *dst = map(old, new)(src)
  *  bitmap_bitremap(oldbit, old, new, nbits)    newbit = map(old, new)(oldbit)
  *  bitmap_onto(dst, orig, relmap, nbits)       *dst = orig relative to relmap
@@ -140,6 +141,9 @@ extern void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
 			const unsigned long *bitmap2, unsigned int nbits);
 extern int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
 			const unsigned long *bitmap2, unsigned int nbits);
+extern void __bitmap_replace(unsigned long *dst,
+			const unsigned long *old, const unsigned long *new,
+			const unsigned long *mask, unsigned int nbits);
 extern int __bitmap_intersects(const unsigned long *bitmap1,
 			const unsigned long *bitmap2, unsigned int nbits);
 extern int __bitmap_subset(const unsigned long *bitmap1,
@@ -434,6 +438,18 @@ static inline void bitmap_shift_left(unsigned long *dst, const unsigned long *sr
 		__bitmap_shift_left(dst, src, shift, nbits);
 }
 
+static inline void bitmap_replace(unsigned long *dst,
+				  const unsigned long *old,
+				  const unsigned long *new,
+				  const unsigned long *mask,
+				  unsigned int nbits)
+{
+	if (small_const_nbits(nbits))
+		*dst = (*old & ~(*mask)) | (*new & *mask);
+	else
+		__bitmap_replace(dst, old, new, mask, nbits);
+}
+
 static inline int bitmap_parse(const char *buf, unsigned int buflen,
 			unsigned long *maskp, int nmaskbits)
 {
diff --git a/lib/bitmap.c b/lib/bitmap.c
index f9e834841e94..4250519d7d1c 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -222,6 +222,18 @@ int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
 }
 EXPORT_SYMBOL(__bitmap_andnot);
 
+void __bitmap_replace(unsigned long *dst,
+		      const unsigned long *old, const unsigned long *new,
+		      const unsigned long *mask, unsigned int nbits)
+{
+	unsigned int k;
+	unsigned int nr = BITS_TO_LONGS(nbits);
+
+	for (k = 0; k < nr; k++)
+		dst[k] = (old[k] & ~mask[k]) | (new[k] & mask[k]);
+}
+EXPORT_SYMBOL(__bitmap_replace);
+
 int __bitmap_intersects(const unsigned long *bitmap1,
 			const unsigned long *bitmap2, unsigned int bits)
 {
diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index 4544847cf81e..e14a15ac250b 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -42,6 +42,19 @@ static const unsigned long exp2[] __initconst = {
 	BITMAP_FROM_U64(0xffffffff77777777ULL),
 };
 
+/* Fibonacci sequence */
+static const unsigned long exp2_to_exp3_mask[] __initconst = {
+	BITMAP_FROM_U64(0x008000020020212eULL),
+};
+/* exp3_0_1 = (exp2[0] & ~exp2_to_exp3_mask) | (exp2[1] & exp2_to_exp3_mask) */
+static const unsigned long exp3_0_1[] __initconst = {
+	BITMAP_FROM_U64(0x33b3333311313137ULL),
+};
+/* exp3_1_0 = (exp2[1] & ~exp2_to_exp3_mask) | (exp2[0] & exp2_to_exp3_mask) */
+static const unsigned long exp3_1_0[] __initconst = {
+	BITMAP_FROM_U64(0xff7fffff77575751ULL),
+};
+
 static bool __init
 __check_eq_uint(const char *srcfile, unsigned int line,
 		const unsigned int exp_uint, unsigned int x)
@@ -257,6 +270,30 @@ static void __init test_copy(void)
 	expect_eq_pbl("0-108,128-1023", bmap2, 1024);
 }
 
+#define EXP2_IN_BITS	(sizeof(exp2) * 8)
+
+static void __init test_replace(void)
+{
+	unsigned int nbits = 64;
+	DECLARE_BITMAP(bmap, 1024);
+
+	bitmap_zero(bmap, 1024);
+	bitmap_replace(bmap, &exp2[0], &exp2[1], exp2_to_exp3_mask, nbits);
+	expect_eq_bitmap(bmap, exp3_0_1, nbits);
+
+	bitmap_zero(bmap, 1024);
+	bitmap_replace(bmap, &exp2[1], &exp2[0], exp2_to_exp3_mask, nbits);
+	expect_eq_bitmap(bmap, exp3_1_0, nbits);
+
+	bitmap_fill(bmap, 1024);
+	bitmap_replace(bmap, &exp2[0], &exp2[1], exp2_to_exp3_mask, nbits);
+	expect_eq_bitmap(bmap, exp3_0_1, nbits);
+
+	bitmap_fill(bmap, 1024);
+	bitmap_replace(bmap, &exp2[1], &exp2[0], exp2_to_exp3_mask, nbits);
+	expect_eq_bitmap(bmap, exp3_1_0, nbits);
+}
+
 #define PARSE_TIME 0x1
 
 struct test_bitmap_parselist{
@@ -476,6 +513,7 @@ static void __init selftest(void)
 	test_zero_clear();
 	test_fill_set();
 	test_copy();
+	test_replace();
 	test_bitmap_arr32();
 	test_bitmap_parselist();
 	test_bitmap_parselist_user();
-- 
cgit v1.2.3-59-g8ed1b


From a97832f224893d6155aa785773df70263cfe94ef Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 4 Dec 2019 16:53:30 -0800
Subject: gpio: pca953x: remove redundant variable and check in IRQ handler

We always will have at least one iteration of the loop due to pending
being guaranteed to be non-zero.  That is, we may remove extra variable
and check in the IRQ handler.

Link: http://lkml.kernel.org/r/20191022172922.61232-9-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Cc: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Marek Vasut <marek.vasut+renesas@gmail.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
Cc: William Breathitt Gray <vilhelm.gray@gmail.com>
Cc: Yury Norov <yury.norov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/gpio-pca953x.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
index 232e3f987511..9f3301130efb 100644
--- a/drivers/gpio/gpio-pca953x.c
+++ b/drivers/gpio/gpio-pca953x.c
@@ -743,7 +743,6 @@ static irqreturn_t pca953x_irq_handler(int irq, void *devid)
 	struct pca953x_chip *chip = devid;
 	u8 pending[MAX_BANK];
 	u8 level;
-	unsigned nhandled = 0;
 	int i;
 
 	if (!pca953x_irq_pending(chip, pending))
@@ -755,11 +754,10 @@ static irqreturn_t pca953x_irq_handler(int irq, void *devid)
 			handle_nested_irq(irq_find_mapping(chip->gpio_chip.irq.domain,
 							level + (BANK_SZ * i)));
 			pending[i] &= ~(1 << level);
-			nhandled++;
 		}
 	}
 
-	return (nhandled > 0) ? IRQ_HANDLED : IRQ_NONE;
+	return IRQ_HANDLED;
 }
 
 static int pca953x_irq_setup(struct pca953x_chip *chip,
-- 
cgit v1.2.3-59-g8ed1b


From 0a0a0219d6c85e6dd3dd5487caa3fb7b540f45f1 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 4 Dec 2019 16:53:33 -0800
Subject: gpio: pca953x: use input from regs structure in pca953x_irq_pending()

While PCA_PCAL is defined for PCA953X type only, we still may use an
offset of the input from regs structure for sake of consistency.

Link: http://lkml.kernel.org/r/20191022172922.61232-10-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Cc: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Marek Vasut <marek.vasut+renesas@gmail.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
Cc: William Breathitt Gray <vilhelm.gray@gmail.com>
Cc: Yury Norov <yury.norov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/gpio-pca953x.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
index 9f3301130efb..31375138ee46 100644
--- a/drivers/gpio/gpio-pca953x.c
+++ b/drivers/gpio/gpio-pca953x.c
@@ -689,7 +689,7 @@ static bool pca953x_irq_pending(struct pca953x_chip *chip, u8 *pending)
 			return false;
 
 		/* Check latched inputs and clear interrupt status */
-		ret = pca953x_read_regs(chip, PCA953X_INPUT, cur_stat);
+		ret = pca953x_read_regs(chip, chip->regs->input, cur_stat);
 		if (ret)
 			return false;
 
-- 
cgit v1.2.3-59-g8ed1b


From 35d13d94893fbdb6bbcbd01aa703296e91c7f851 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 4 Dec 2019 16:53:37 -0800
Subject: gpio: pca953x: convert to use bitmap API

Instead of customized approach convert the driver to use bitmap API.

[andriy.shevchenko@linux.intel.com: reduce stack usage in couple of functions]
  Link: http://lkml.kernel.org/r/20191023153056.64262-1-andriy.shevchenko@linux.intel.com
Link: http://lkml.kernel.org/r/20191022172922.61232-11-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Cc: William Breathitt Gray <vilhelm.gray@gmail.com>
Cc: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
Cc: Marek Vasut <marek.vasut+renesas@gmail.com>
Cc: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Yury Norov <yury.norov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/gpio-pca953x.c | 164 +++++++++++++++++++-------------------------
 1 file changed, 70 insertions(+), 94 deletions(-)

diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
index 31375138ee46..b23a38cee613 100644
--- a/drivers/gpio/gpio-pca953x.c
+++ b/drivers/gpio/gpio-pca953x.c
@@ -9,8 +9,7 @@
  */
 
 #include <linux/acpi.h>
-#include <linux/bits.h>
-#include <linux/bitops.h>
+#include <linux/bitmap.h>
 #include <linux/gpio/driver.h>
 #include <linux/gpio/consumer.h>
 #include <linux/i2c.h>
@@ -116,6 +115,7 @@ MODULE_DEVICE_TABLE(acpi, pca953x_acpi_ids);
 
 #define MAX_BANK 5
 #define BANK_SZ 8
+#define MAX_LINE	(MAX_BANK * BANK_SZ)
 
 #define NBANK(chip) DIV_ROUND_UP(chip->gpio_chip.ngpio, BANK_SZ)
 
@@ -147,10 +147,10 @@ struct pca953x_chip {
 
 #ifdef CONFIG_GPIO_PCA953X_IRQ
 	struct mutex irq_lock;
-	u8 irq_mask[MAX_BANK];
-	u8 irq_stat[MAX_BANK];
-	u8 irq_trig_raise[MAX_BANK];
-	u8 irq_trig_fall[MAX_BANK];
+	DECLARE_BITMAP(irq_mask, MAX_LINE);
+	DECLARE_BITMAP(irq_stat, MAX_LINE);
+	DECLARE_BITMAP(irq_trig_raise, MAX_LINE);
+	DECLARE_BITMAP(irq_trig_fall, MAX_LINE);
 	struct irq_chip irq_chip;
 #endif
 	atomic_t wakeup_path;
@@ -334,12 +334,16 @@ static u8 pca953x_recalc_addr(struct pca953x_chip *chip, int reg, int off,
 	return regaddr;
 }
 
-static int pca953x_write_regs(struct pca953x_chip *chip, int reg, u8 *val)
+static int pca953x_write_regs(struct pca953x_chip *chip, int reg, unsigned long *val)
 {
 	u8 regaddr = pca953x_recalc_addr(chip, reg, 0, true, true);
-	int ret;
+	u8 value[MAX_BANK];
+	int i, ret;
+
+	for (i = 0; i < NBANK(chip); i++)
+		value[i] = bitmap_get_value8(val, i * BANK_SZ);
 
-	ret = regmap_bulk_write(chip->regmap, regaddr, val, NBANK(chip));
+	ret = regmap_bulk_write(chip->regmap, regaddr, value, NBANK(chip));
 	if (ret < 0) {
 		dev_err(&chip->client->dev, "failed writing register\n");
 		return ret;
@@ -348,17 +352,21 @@ static int pca953x_write_regs(struct pca953x_chip *chip, int reg, u8 *val)
 	return 0;
 }
 
-static int pca953x_read_regs(struct pca953x_chip *chip, int reg, u8 *val)
+static int pca953x_read_regs(struct pca953x_chip *chip, int reg, unsigned long *val)
 {
 	u8 regaddr = pca953x_recalc_addr(chip, reg, 0, false, true);
-	int ret;
+	u8 value[MAX_BANK];
+	int i, ret;
 
-	ret = regmap_bulk_read(chip->regmap, regaddr, val, NBANK(chip));
+	ret = regmap_bulk_read(chip->regmap, regaddr, value, NBANK(chip));
 	if (ret < 0) {
 		dev_err(&chip->client->dev, "failed reading register\n");
 		return ret;
 	}
 
+	for (i = 0; i < NBANK(chip); i++)
+		bitmap_set_value8(val, value[i], i * BANK_SZ);
+
 	return 0;
 }
 
@@ -460,10 +468,7 @@ static void pca953x_gpio_set_multiple(struct gpio_chip *gc,
 				      unsigned long *mask, unsigned long *bits)
 {
 	struct pca953x_chip *chip = gpiochip_get_data(gc);
-	unsigned long offset;
-	unsigned long bank_mask;
-	int bank;
-	u8 reg_val[MAX_BANK];
+	DECLARE_BITMAP(reg_val, MAX_LINE);
 	int ret;
 
 	mutex_lock(&chip->i2c_lock);
@@ -471,11 +476,7 @@ static void pca953x_gpio_set_multiple(struct gpio_chip *gc,
 	if (ret)
 		goto exit;
 
-	for_each_set_clump8(offset, bank_mask, mask, gc->ngpio) {
-		bank = offset / 8;
-		reg_val[bank] &= ~bank_mask;
-		reg_val[bank] |= bitmap_get_value8(bits, offset) & bank_mask;
-	}
+	bitmap_replace(reg_val, reg_val, bits, mask, gc->ngpio);
 
 	pca953x_write_regs(chip, chip->regs->output, reg_val);
 exit:
@@ -602,10 +603,9 @@ static void pca953x_irq_bus_sync_unlock(struct irq_data *d)
 {
 	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
 	struct pca953x_chip *chip = gpiochip_get_data(gc);
-	u8 new_irqs;
-	int level, i;
-	u8 invert_irq_mask[MAX_BANK];
-	u8 reg_direction[MAX_BANK];
+	DECLARE_BITMAP(irq_mask, MAX_LINE);
+	DECLARE_BITMAP(reg_direction, MAX_LINE);
+	int level;
 
 	pca953x_read_regs(chip, chip->regs->direction, reg_direction);
 
@@ -613,25 +613,18 @@ static void pca953x_irq_bus_sync_unlock(struct irq_data *d)
 		/* Enable latch on interrupt-enabled inputs */
 		pca953x_write_regs(chip, PCAL953X_IN_LATCH, chip->irq_mask);
 
-		for (i = 0; i < NBANK(chip); i++)
-			invert_irq_mask[i] = ~chip->irq_mask[i];
+		bitmap_complement(irq_mask, chip->irq_mask, gc->ngpio);
 
 		/* Unmask enabled interrupts */
-		pca953x_write_regs(chip, PCAL953X_INT_MASK, invert_irq_mask);
+		pca953x_write_regs(chip, PCAL953X_INT_MASK, irq_mask);
 	}
 
+	bitmap_or(irq_mask, chip->irq_trig_fall, chip->irq_trig_raise, gc->ngpio);
+	bitmap_and(irq_mask, irq_mask, reg_direction, gc->ngpio);
+
 	/* Look for any newly setup interrupt */
-	for (i = 0; i < NBANK(chip); i++) {
-		new_irqs = chip->irq_trig_fall[i] | chip->irq_trig_raise[i];
-		new_irqs &= reg_direction[i];
-
-		while (new_irqs) {
-			level = __ffs(new_irqs);
-			pca953x_gpio_direction_input(&chip->gpio_chip,
-							level + (BANK_SZ * i));
-			new_irqs &= ~(1 << level);
-		}
-	}
+	for_each_set_bit(level, irq_mask, gc->ngpio)
+		pca953x_gpio_direction_input(&chip->gpio_chip, level);
 
 	mutex_unlock(&chip->irq_lock);
 }
@@ -672,15 +665,15 @@ static void pca953x_irq_shutdown(struct irq_data *d)
 	chip->irq_trig_fall[d->hwirq / BANK_SZ] &= ~mask;
 }
 
-static bool pca953x_irq_pending(struct pca953x_chip *chip, u8 *pending)
+static bool pca953x_irq_pending(struct pca953x_chip *chip, unsigned long *pending)
 {
-	u8 cur_stat[MAX_BANK];
-	u8 old_stat[MAX_BANK];
-	bool pending_seen = false;
-	bool trigger_seen = false;
-	u8 trigger[MAX_BANK];
-	u8 reg_direction[MAX_BANK];
-	int ret, i;
+	struct gpio_chip *gc = &chip->gpio_chip;
+	DECLARE_BITMAP(reg_direction, MAX_LINE);
+	DECLARE_BITMAP(old_stat, MAX_LINE);
+	DECLARE_BITMAP(cur_stat, MAX_LINE);
+	DECLARE_BITMAP(new_stat, MAX_LINE);
+	DECLARE_BITMAP(trigger, MAX_LINE);
+	int ret;
 
 	if (chip->driver_data & PCA_PCAL) {
 		/* Read the current interrupt status from the device */
@@ -693,16 +686,12 @@ static bool pca953x_irq_pending(struct pca953x_chip *chip, u8 *pending)
 		if (ret)
 			return false;
 
-		for (i = 0; i < NBANK(chip); i++) {
-			/* Apply filter for rising/falling edge selection */
-			pending[i] = (~cur_stat[i] & chip->irq_trig_fall[i]) |
-				(cur_stat[i] & chip->irq_trig_raise[i]);
-			pending[i] &= trigger[i];
-			if (pending[i])
-				pending_seen = true;
-		}
+		/* Apply filter for rising/falling edge selection */
+		bitmap_replace(new_stat, chip->irq_trig_fall, chip->irq_trig_raise, cur_stat, gc->ngpio);
+
+		bitmap_and(pending, new_stat, trigger, gc->ngpio);
 
-		return pending_seen;
+		return !bitmap_empty(pending, gc->ngpio);
 	}
 
 	ret = pca953x_read_regs(chip, chip->regs->input, cur_stat);
@@ -711,51 +700,38 @@ static bool pca953x_irq_pending(struct pca953x_chip *chip, u8 *pending)
 
 	/* Remove output pins from the equation */
 	pca953x_read_regs(chip, chip->regs->direction, reg_direction);
-	for (i = 0; i < NBANK(chip); i++)
-		cur_stat[i] &= reg_direction[i];
 
-	memcpy(old_stat, chip->irq_stat, NBANK(chip));
+	bitmap_copy(old_stat, chip->irq_stat, gc->ngpio);
 
-	for (i = 0; i < NBANK(chip); i++) {
-		trigger[i] = (cur_stat[i] ^ old_stat[i]) & chip->irq_mask[i];
-		if (trigger[i])
-			trigger_seen = true;
-	}
+	bitmap_and(new_stat, cur_stat, reg_direction, gc->ngpio);
+	bitmap_xor(cur_stat, new_stat, old_stat, gc->ngpio);
+	bitmap_and(trigger, cur_stat, chip->irq_mask, gc->ngpio);
 
-	if (!trigger_seen)
+	if (bitmap_empty(trigger, gc->ngpio))
 		return false;
 
-	memcpy(chip->irq_stat, cur_stat, NBANK(chip));
+	bitmap_copy(chip->irq_stat, new_stat, gc->ngpio);
 
-	for (i = 0; i < NBANK(chip); i++) {
-		pending[i] = (old_stat[i] & chip->irq_trig_fall[i]) |
-			(cur_stat[i] & chip->irq_trig_raise[i]);
-		pending[i] &= trigger[i];
-		if (pending[i])
-			pending_seen = true;
-	}
+	bitmap_and(cur_stat, chip->irq_trig_fall, old_stat, gc->ngpio);
+	bitmap_and(old_stat, chip->irq_trig_raise, new_stat, gc->ngpio);
+	bitmap_or(new_stat, old_stat, cur_stat, gc->ngpio);
+	bitmap_and(pending, new_stat, trigger, gc->ngpio);
 
-	return pending_seen;
+	return !bitmap_empty(pending, gc->ngpio);
 }
 
 static irqreturn_t pca953x_irq_handler(int irq, void *devid)
 {
 	struct pca953x_chip *chip = devid;
-	u8 pending[MAX_BANK];
-	u8 level;
-	int i;
+	struct gpio_chip *gc = &chip->gpio_chip;
+	DECLARE_BITMAP(pending, MAX_LINE);
+	int level;
 
 	if (!pca953x_irq_pending(chip, pending))
 		return IRQ_NONE;
 
-	for (i = 0; i < NBANK(chip); i++) {
-		while (pending[i]) {
-			level = __ffs(pending[i]);
-			handle_nested_irq(irq_find_mapping(chip->gpio_chip.irq.domain,
-							level + (BANK_SZ * i)));
-			pending[i] &= ~(1 << level);
-		}
-	}
+	for_each_set_bit(level, pending, gc->ngpio)
+		handle_nested_irq(irq_find_mapping(gc->irq.domain, level));
 
 	return IRQ_HANDLED;
 }
@@ -765,8 +741,9 @@ static int pca953x_irq_setup(struct pca953x_chip *chip,
 {
 	struct i2c_client *client = chip->client;
 	struct irq_chip *irq_chip = &chip->irq_chip;
-	u8 reg_direction[MAX_BANK];
-	int ret, i;
+	DECLARE_BITMAP(reg_direction, MAX_LINE);
+	DECLARE_BITMAP(irq_stat, MAX_LINE);
+	int ret;
 
 	if (!client->irq)
 		return 0;
@@ -777,7 +754,7 @@ static int pca953x_irq_setup(struct pca953x_chip *chip,
 	if (!(chip->driver_data & PCA_INT))
 		return 0;
 
-	ret = pca953x_read_regs(chip, chip->regs->input, chip->irq_stat);
+	ret = pca953x_read_regs(chip, chip->regs->input, irq_stat);
 	if (ret)
 		return ret;
 
@@ -787,8 +764,7 @@ static int pca953x_irq_setup(struct pca953x_chip *chip,
 	 * this purpose.
 	 */
 	pca953x_read_regs(chip, chip->regs->direction, reg_direction);
-	for (i = 0; i < NBANK(chip); i++)
-		chip->irq_stat[i] &= reg_direction[i];
+	bitmap_and(chip->irq_stat, irq_stat, reg_direction, chip->gpio_chip.ngpio);
 	mutex_init(&chip->irq_lock);
 
 	ret = devm_request_threaded_irq(&client->dev, client->irq,
@@ -840,8 +816,8 @@ static int pca953x_irq_setup(struct pca953x_chip *chip,
 
 static int device_pca95xx_init(struct pca953x_chip *chip, u32 invert)
 {
+	DECLARE_BITMAP(val, MAX_LINE);
 	int ret;
-	u8 val[MAX_BANK];
 
 	ret = regcache_sync_region(chip->regmap, chip->regs->output,
 				   chip->regs->output + NBANK(chip));
@@ -855,9 +831,9 @@ static int device_pca95xx_init(struct pca953x_chip *chip, u32 invert)
 
 	/* set platform specific polarity inversion */
 	if (invert)
-		memset(val, 0xFF, NBANK(chip));
+		bitmap_fill(val, MAX_LINE);
 	else
-		memset(val, 0, NBANK(chip));
+		bitmap_zero(val, MAX_LINE);
 
 	ret = pca953x_write_regs(chip, chip->regs->invert, val);
 out:
@@ -866,8 +842,8 @@ out:
 
 static int device_pca957x_init(struct pca953x_chip *chip, u32 invert)
 {
+	DECLARE_BITMAP(val, MAX_LINE);
 	int ret;
-	u8 val[MAX_BANK];
 
 	ret = device_pca95xx_init(chip, invert);
 	if (ret)
-- 
cgit v1.2.3-59-g8ed1b


From b27d8517365eeac907de1cc1e91053ccf18179c3 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 4 Dec 2019 16:53:40 -0800
Subject: gpio: pca953x: tighten up indentation

There is no need to split some of the lines.  However, improve the style
of multi-line comment.  On top of this there is no need to have double
space.

Correct above indentation issues without altering the functionality.

Link: http://lkml.kernel.org/r/20191022172922.61232-12-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Cc: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Marek Vasut <marek.vasut+renesas@gmail.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
Cc: William Breathitt Gray <vilhelm.gray@gmail.com>
Cc: Yury Norov <yury.norov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/gpio-pca953x.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
index b23a38cee613..6652bee01966 100644
--- a/drivers/gpio/gpio-pca953x.c
+++ b/drivers/gpio/gpio-pca953x.c
@@ -421,7 +421,9 @@ static int pca953x_gpio_get_value(struct gpio_chip *gc, unsigned off)
 	ret = regmap_read(chip->regmap, inreg, &reg_val);
 	mutex_unlock(&chip->i2c_lock);
 	if (ret < 0) {
-		/* NOTE:  diagnostic already emitted; that's all we should
+		/*
+		 * NOTE:
+		 * diagnostic already emitted; that's all we should
 		 * do unless gpio_*_value_cansleep() calls become different
 		 * from their nonsleeping siblings (and report faults).
 		 */
@@ -736,8 +738,7 @@ static irqreturn_t pca953x_irq_handler(int irq, void *devid)
 	return IRQ_HANDLED;
 }
 
-static int pca953x_irq_setup(struct pca953x_chip *chip,
-			     int irq_base)
+static int pca953x_irq_setup(struct pca953x_chip *chip, int irq_base)
 {
 	struct i2c_client *client = chip->client;
 	struct irq_chip *irq_chip = &chip->irq_chip;
@@ -787,9 +788,9 @@ static int pca953x_irq_setup(struct pca953x_chip *chip,
 	irq_chip->irq_set_type = pca953x_irq_set_type;
 	irq_chip->irq_shutdown = pca953x_irq_shutdown;
 
-	ret =  gpiochip_irqchip_add_nested(&chip->gpio_chip, irq_chip,
-					   irq_base, handle_simple_irq,
-					   IRQ_TYPE_NONE);
+	ret = gpiochip_irqchip_add_nested(&chip->gpio_chip, irq_chip,
+					  irq_base, handle_simple_irq,
+					  IRQ_TYPE_NONE);
 	if (ret) {
 		dev_err(&client->dev,
 			"could not connect irqchip to gpiochip\n");
@@ -863,7 +864,7 @@ out:
 static const struct of_device_id pca953x_dt_ids[];
 
 static int pca953x_probe(struct i2c_client *client,
-				   const struct i2c_device_id *i2c_id)
+			 const struct i2c_device_id *i2c_id)
 {
 	struct pca953x_platform_data *pdata;
 	struct pca953x_chip *chip;
@@ -872,8 +873,7 @@ static int pca953x_probe(struct i2c_client *client,
 	u32 invert = 0;
 	struct regulator *reg;
 
-	chip = devm_kzalloc(&client->dev,
-			sizeof(struct pca953x_chip), GFP_KERNEL);
+	chip = devm_kzalloc(&client->dev, sizeof(*chip), GFP_KERNEL);
 	if (chip == NULL)
 		return -ENOMEM;
 
@@ -987,7 +987,7 @@ static int pca953x_probe(struct i2c_client *client,
 
 	if (pdata && pdata->setup) {
 		ret = pdata->setup(client, chip->gpio_chip.base,
-				chip->gpio_chip.ngpio, pdata->context);
+				   chip->gpio_chip.ngpio, pdata->context);
 		if (ret < 0)
 			dev_warn(&client->dev, "setup failed, %d\n", ret);
 	}
@@ -1007,7 +1007,7 @@ static int pca953x_remove(struct i2c_client *client)
 
 	if (pdata && pdata->teardown) {
 		ret = pdata->teardown(client, chip->gpio_chip.base,
-				chip->gpio_chip.ngpio, pdata->context);
+				      chip->gpio_chip.ngpio, pdata->context);
 		if (ret < 0)
 			dev_err(&client->dev, "teardown failed, %d\n", ret);
 	} else {
-- 
cgit v1.2.3-59-g8ed1b


From a73c948952ccd663b47f6cf59ac0fcae5bf0512c Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Wed, 4 Dec 2019 16:53:44 -0800
Subject: alpha: use pgtable-nopud instead of 4level-fixup

Patch series "mm: remove __ARCH_HAS_4LEVEL_HACK", v13.

These patches convert several architectures to use page table folding
and remove __ARCH_HAS_4LEVEL_HACK along with
include/asm-generic/4level-fixup.h.

For the nommu configurations the folding is already implemented by the
generic code so the only change was to use the appropriate header file.

As for the rest, the changes are mostly about mechanical replacement of
pgd accessors with pud/pmd ones and the addition of higher levels to
page table traversals.

With Vineet's patches from "elide extraneous generated code for folded
p4d/pud/pmd" series [1] there is a small shrink of the kernel size of
about -0.01% for the defconfig builds.

This patch (of 13):

It is not likely alpha will have 5-level page tables.

Replace usage of include/asm-generic/4level-fixup.h and implied
__ARCH_HAS_4LEVEL_HACK with include/asm-generic/pgtable-nopud.h and
adjust page table manipulation macros and functions accordingly.

Link: http://lkml.kernel.org/r/1572938135-31886-2-git-send-email-rppt@kernel.org
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Ungerer <gerg@linux-m68k.org>
Cc: Helge Deller <deller@gmx.de>
Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Mark Salter <msalter@redhat.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Peter Rosin <peda@axentia.se>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rolf Eike Beer <eike-kernel@sf-tec.de>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Sam Creasey <sammy@sammy.net>
Cc: Vincent Chen <deanbo422@gmail.com>
Cc: Vineet Gupta <Vineet.Gupta1@synopsys.com>
Cc: Anatoly Pugachev <matorola@gmail.com>
Cc: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/include/asm/mmzone.h  |  1 -
 arch/alpha/include/asm/pgalloc.h |  4 ++--
 arch/alpha/include/asm/pgtable.h | 24 ++++++++++++------------
 arch/alpha/mm/init.c             | 12 ++++++++----
 4 files changed, 22 insertions(+), 19 deletions(-)

diff --git a/arch/alpha/include/asm/mmzone.h b/arch/alpha/include/asm/mmzone.h
index 889b5d3ad825..7ee144f484f1 100644
--- a/arch/alpha/include/asm/mmzone.h
+++ b/arch/alpha/include/asm/mmzone.h
@@ -73,7 +73,6 @@ PLAT_NODE_DATA_LOCALNR(unsigned long p, int n)
 #define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
 
 #define pmd_page(pmd)		(pfn_to_page(pmd_val(pmd) >> 32))
-#define pgd_page(pgd)		(pfn_to_page(pgd_val(pgd) >> 32))
 #define pte_pfn(pte)		(pte_val(pte) >> 32)
 
 #define mk_pte(page, pgprot)						     \
diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h
index eb91f1e85629..a1a29f60934c 100644
--- a/arch/alpha/include/asm/pgalloc.h
+++ b/arch/alpha/include/asm/pgalloc.h
@@ -27,9 +27,9 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
 }
 
 static inline void
-pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd)
+pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 {
-	pgd_set(pgd, pmd);
+	pud_set(pud, pmd);
 }
 
 extern pgd_t *pgd_alloc(struct mm_struct *mm);
diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h
index 065b57f408c3..299791ce14b6 100644
--- a/arch/alpha/include/asm/pgtable.h
+++ b/arch/alpha/include/asm/pgtable.h
@@ -2,7 +2,7 @@
 #ifndef _ALPHA_PGTABLE_H
 #define _ALPHA_PGTABLE_H
 
-#include <asm-generic/4level-fixup.h>
+#include <asm-generic/pgtable-nopud.h>
 
 /*
  * This file contains the functions and defines necessary to modify and use
@@ -226,8 +226,8 @@ extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 extern inline void pmd_set(pmd_t * pmdp, pte_t * ptep)
 { pmd_val(*pmdp) = _PAGE_TABLE | ((((unsigned long) ptep) - PAGE_OFFSET) << (32-PAGE_SHIFT)); }
 
-extern inline void pgd_set(pgd_t * pgdp, pmd_t * pmdp)
-{ pgd_val(*pgdp) = _PAGE_TABLE | ((((unsigned long) pmdp) - PAGE_OFFSET) << (32-PAGE_SHIFT)); }
+extern inline void pud_set(pud_t * pudp, pmd_t * pmdp)
+{ pud_val(*pudp) = _PAGE_TABLE | ((((unsigned long) pmdp) - PAGE_OFFSET) << (32-PAGE_SHIFT)); }
 
 
 extern inline unsigned long
@@ -238,11 +238,11 @@ pmd_page_vaddr(pmd_t pmd)
 
 #ifndef CONFIG_DISCONTIGMEM
 #define pmd_page(pmd)	(mem_map + ((pmd_val(pmd) & _PFN_MASK) >> 32))
-#define pgd_page(pgd)	(mem_map + ((pgd_val(pgd) & _PFN_MASK) >> 32))
+#define pud_page(pud)	(mem_map + ((pud_val(pud) & _PFN_MASK) >> 32))
 #endif
 
-extern inline unsigned long pgd_page_vaddr(pgd_t pgd)
-{ return PAGE_OFFSET + ((pgd_val(pgd) & _PFN_MASK) >> (32-PAGE_SHIFT)); }
+extern inline unsigned long pud_page_vaddr(pud_t pgd)
+{ return PAGE_OFFSET + ((pud_val(pgd) & _PFN_MASK) >> (32-PAGE_SHIFT)); }
 
 extern inline int pte_none(pte_t pte)		{ return !pte_val(pte); }
 extern inline int pte_present(pte_t pte)	{ return pte_val(pte) & _PAGE_VALID; }
@@ -256,10 +256,10 @@ extern inline int pmd_bad(pmd_t pmd)		{ return (pmd_val(pmd) & ~_PFN_MASK) != _P
 extern inline int pmd_present(pmd_t pmd)	{ return pmd_val(pmd) & _PAGE_VALID; }
 extern inline void pmd_clear(pmd_t * pmdp)	{ pmd_val(*pmdp) = 0; }
 
-extern inline int pgd_none(pgd_t pgd)		{ return !pgd_val(pgd); }
-extern inline int pgd_bad(pgd_t pgd)		{ return (pgd_val(pgd) & ~_PFN_MASK) != _PAGE_TABLE; }
-extern inline int pgd_present(pgd_t pgd)	{ return pgd_val(pgd) & _PAGE_VALID; }
-extern inline void pgd_clear(pgd_t * pgdp)	{ pgd_val(*pgdp) = 0; }
+extern inline int pud_none(pud_t pud)		{ return !pud_val(pud); }
+extern inline int pud_bad(pud_t pud)		{ return (pud_val(pud) & ~_PFN_MASK) != _PAGE_TABLE; }
+extern inline int pud_present(pud_t pud)	{ return pud_val(pud) & _PAGE_VALID; }
+extern inline void pud_clear(pud_t * pudp)	{ pud_val(*pudp) = 0; }
 
 /*
  * The following only work if pte_present() is true.
@@ -301,9 +301,9 @@ extern inline pte_t pte_mkspecial(pte_t pte)	{ return pte; }
  */
 
 /* Find an entry in the second-level page table.. */
-extern inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
+extern inline pmd_t * pmd_offset(pud_t * dir, unsigned long address)
 {
-	pmd_t *ret = (pmd_t *) pgd_page_vaddr(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PAGE - 1));
+	pmd_t *ret = (pmd_t *) pud_page_vaddr(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PAGE - 1));
 	smp_read_barrier_depends(); /* see above */
 	return ret;
 }
diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c
index e2cbec3789e8..12e218d3792a 100644
--- a/arch/alpha/mm/init.c
+++ b/arch/alpha/mm/init.c
@@ -146,6 +146,8 @@ callback_init(void * kernel_end)
 {
 	struct crb_struct * crb;
 	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
 	pmd_t *pmd;
 	void *two_pages;
 
@@ -184,8 +186,10 @@ callback_init(void * kernel_end)
 	memset(two_pages, 0, 2*PAGE_SIZE);
 
 	pgd = pgd_offset_k(VMALLOC_START);
-	pgd_set(pgd, (pmd_t *)two_pages);
-	pmd = pmd_offset(pgd, VMALLOC_START);
+	p4d = p4d_offset(pgd, VMALLOC_START);
+	pud = pud_offset(p4d, VMALLOC_START);
+	pud_set(pud, (pmd_t *)two_pages);
+	pmd = pmd_offset(pud, VMALLOC_START);
 	pmd_set(pmd, (pte_t *)(two_pages + PAGE_SIZE));
 
 	if (alpha_using_srm) {
@@ -214,9 +218,9 @@ callback_init(void * kernel_end)
 				/* Newer consoles (especially on larger
 				   systems) may require more pages of
 				   PTEs. Grab additional pages as needed. */
-				if (pmd != pmd_offset(pgd, vaddr)) {
+				if (pmd != pmd_offset(pud, vaddr)) {
 					memset(kernel_end, 0, PAGE_SIZE);
-					pmd = pmd_offset(pgd, vaddr);
+					pmd = pmd_offset(pud, vaddr);
 					pmd_set(pmd, (pte_t *)kernel_end);
 					kernel_end += PAGE_SIZE;
 				}
-- 
cgit v1.2.3-59-g8ed1b


From aa6628230deb3a0d871dc51923531b41dfbef352 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Wed, 4 Dec 2019 16:53:48 -0800
Subject: arm: nommu: use pgtable-nopud instead of 4level-fixup

The generic nommu implementation of page table manipulation takes care
of folding of the upper levels and does not require fixups.

Simply replace of include/asm-generic/4level-fixup.h with
include/asm-generic/pgtable-nopud.h.

Link: http://lkml.kernel.org/r/1572938135-31886-3-git-send-email-rppt@kernel.org
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Acked-by: Russell King <rmk+kernel@armlinux.org.uk>
Cc: Anatoly Pugachev <matorola@gmail.com>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Ungerer <gerg@linux-m68k.org>
Cc: Helge Deller <deller@gmx.de>
Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Mark Salter <msalter@redhat.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Peter Rosin <peda@axentia.se>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rolf Eike Beer <eike-kernel@sf-tec.de>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Sam Creasey <sammy@sammy.net>
Cc: Vincent Chen <deanbo422@gmail.com>
Cc: Vineet Gupta <Vineet.Gupta1@synopsys.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/include/asm/pgtable.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 3ae120cd1715..eabcb48a7840 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -12,7 +12,7 @@
 
 #ifndef CONFIG_MMU
 
-#include <asm-generic/4level-fixup.h>
+#include <asm-generic/pgtable-nopud.h>
 #include <asm/pgtable-nommu.h>
 
 #else
-- 
cgit v1.2.3-59-g8ed1b


From d13252ea800e1f2a39e40c2b8a4397b21a80555d Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Wed, 4 Dec 2019 16:53:52 -0800
Subject: c6x: use pgtable-nopud instead of 4level-fixup

c6x is a nommu architecture and does not require fixup for upper layers
of the page tables because it is already handled by the generic nommu
implementation.

Replace usage of include/asm-generic/4level-fixup.h with
include/asm-generic/pgtable-nopud.h

Link: http://lkml.kernel.org/r/1572938135-31886-4-git-send-email-rppt@kernel.org
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Cc: Anatoly Pugachev <matorola@gmail.com>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Ungerer <gerg@linux-m68k.org>
Cc: Helge Deller <deller@gmx.de>
Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Mark Salter <msalter@redhat.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Peter Rosin <peda@axentia.se>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rolf Eike Beer <eike-kernel@sf-tec.de>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Russell King <rmk+kernel@armlinux.org.uk>
Cc: Sam Creasey <sammy@sammy.net>
Cc: Vincent Chen <deanbo422@gmail.com>
Cc: Vineet Gupta <Vineet.Gupta1@synopsys.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/c6x/include/asm/pgtable.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/c6x/include/asm/pgtable.h b/arch/c6x/include/asm/pgtable.h
index 0b6919c00413..197c473b796a 100644
--- a/arch/c6x/include/asm/pgtable.h
+++ b/arch/c6x/include/asm/pgtable.h
@@ -8,7 +8,7 @@
 #ifndef _ASM_C6X_PGTABLE_H
 #define _ASM_C6X_PGTABLE_H
 
-#include <asm-generic/4level-fixup.h>
+#include <asm-generic/pgtable-nopud.h>
 
 #include <asm/setup.h>
 #include <asm/page.h>
-- 
cgit v1.2.3-59-g8ed1b


From f6f7caeb58532db2e46c40eee2d7e8969d5c707e Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Wed, 4 Dec 2019 16:53:55 -0800
Subject: m68k: nommu: use pgtable-nopud instead of 4level-fixup

The generic nommu implementation of page table manipulation takes care
of folding of the upper levels and does not require fixups.

Simply replace of include/asm-generic/4level-fixup.h with
include/asm-generic/pgtable-nopud.h.

Link: http://lkml.kernel.org/r/1572938135-31886-5-git-send-email-rppt@kernel.org
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Acked-by: Greg Ungerer <gerg@linux-m68k.org>
Cc: Anatoly Pugachev <matorola@gmail.com>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Helge Deller <deller@gmx.de>
Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Mark Salter <msalter@redhat.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Peter Rosin <peda@axentia.se>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rolf Eike Beer <eike-kernel@sf-tec.de>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Russell King <rmk+kernel@armlinux.org.uk>
Cc: Sam Creasey <sammy@sammy.net>
Cc: Vincent Chen <deanbo422@gmail.com>
Cc: Vineet Gupta <Vineet.Gupta1@synopsys.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/m68k/include/asm/pgtable_no.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/m68k/include/asm/pgtable_no.h b/arch/m68k/include/asm/pgtable_no.h
index c18165b0d904..ccc4568299e5 100644
--- a/arch/m68k/include/asm/pgtable_no.h
+++ b/arch/m68k/include/asm/pgtable_no.h
@@ -2,7 +2,7 @@
 #ifndef _M68KNOMMU_PGTABLE_H
 #define _M68KNOMMU_PGTABLE_H
 
-#include <asm-generic/4level-fixup.h>
+#include <asm-generic/pgtable-nopud.h>
 
 /*
  * (C) Copyright 2000-2002, Greg Ungerer <gerg@snapgear.com>
-- 
cgit v1.2.3-59-g8ed1b


From 60e50f34b13e9e40763be12aa55f2144d8da514c Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Wed, 4 Dec 2019 16:53:59 -0800
Subject: m68k: mm: use pgtable-nopXd instead of 4level-fixup

m68k has two or three levels of page tables and can use appropriate
pgtable-nopXd and folding of the upper layers.

Replace usage of include/asm-generic/4level-fixup.h and explicit
definitions of __PAGETABLE_PxD_FOLDED in m68k with
include/asm-generic/pgtable-nopmd.h for two-level configurations and
with include/asm-generic/pgtable-nopud.h for three-lelve configurations
and adjust page table manipulation macros and functions accordingly.

[akpm@linux-foundation.org: fix merge glitch]
[geert@linux-m68k.org: more merge glitch fixes]
[akpm@linux-foundation.org: s/bad_pgd/bad_pud/, per Mike]
Link: http://lkml.kernel.org/r/1572938135-31886-6-git-send-email-rppt@kernel.org
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Acked-by: Greg Ungerer <gerg@linux-m68k.org>
Cc: Anatoly Pugachev <matorola@gmail.com>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Helge Deller <deller@gmx.de>
Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Mark Salter <msalter@redhat.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Peter Rosin <peda@axentia.se>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rolf Eike Beer <eike-kernel@sf-tec.de>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Russell King <rmk+kernel@armlinux.org.uk>
Cc: Sam Creasey <sammy@sammy.net>
Cc: Vincent Chen <deanbo422@gmail.com>
Cc: Vineet Gupta <Vineet.Gupta1@synopsys.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/m68k/include/asm/mcf_pgalloc.h      |  7 ------
 arch/m68k/include/asm/mcf_pgtable.h      | 28 ++++++++---------------
 arch/m68k/include/asm/mmu_context.h      | 12 +++++++++-
 arch/m68k/include/asm/motorola_pgalloc.h |  4 ++--
 arch/m68k/include/asm/motorola_pgtable.h | 32 ++++++++++++++++----------
 arch/m68k/include/asm/page.h             |  9 +++++---
 arch/m68k/include/asm/pgtable_mm.h       | 11 +++++----
 arch/m68k/include/asm/sun3_pgalloc.h     |  5 ----
 arch/m68k/include/asm/sun3_pgtable.h     | 18 ---------------
 arch/m68k/kernel/sys_m68k.c              | 10 +++++++-
 arch/m68k/mm/init.c                      |  6 +++--
 arch/m68k/mm/kmap.c                      | 39 ++++++++++++++++++++++++--------
 arch/m68k/mm/mcfmmu.c                    | 16 ++++++++++++-
 arch/m68k/mm/motorola.c                  | 17 +++++++++-----
 arch/m68k/sun3x/dvma.c                   |  7 ++++--
 15 files changed, 129 insertions(+), 92 deletions(-)

diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
index b34d44d666a4..82ec54c2eaa4 100644
--- a/arch/m68k/include/asm/mcf_pgalloc.h
+++ b/arch/m68k/include/asm/mcf_pgalloc.h
@@ -28,9 +28,6 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
 	return (pmd_t *) pgd;
 }
 
-#define pmd_alloc_one_fast(mm, address) ({ BUG(); ((pmd_t *)1); })
-#define pmd_alloc_one(mm, address)      ({ BUG(); ((pmd_t *)2); })
-
 #define pmd_populate(mm, pmd, page) (pmd_val(*pmd) = \
 	(unsigned long)(page_address(page)))
 
@@ -45,8 +42,6 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page,
 	__free_page(page);
 }
 
-#define __pmd_free_tlb(tlb, pmd, address) do { } while (0)
-
 static inline struct page *pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page = alloc_pages(GFP_DMA, 0);
@@ -100,6 +95,4 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 	return new_pgd;
 }
 
-#define pgd_populate(mm, pmd, pte) BUG()
-
 #endif /* M68K_MCF_PGALLOC_H */
diff --git a/arch/m68k/include/asm/mcf_pgtable.h b/arch/m68k/include/asm/mcf_pgtable.h
index 5d5502cb2b2d..b9f45aeded25 100644
--- a/arch/m68k/include/asm/mcf_pgtable.h
+++ b/arch/m68k/include/asm/mcf_pgtable.h
@@ -198,17 +198,9 @@ static inline int pmd_bad2(pmd_t *pmd) { return 0; }
 #define pmd_present(pmd) (!pmd_none2(&(pmd)))
 static inline void pmd_clear(pmd_t *pmdp) { pmd_val(*pmdp) = 0; }
 
-static inline int pgd_none(pgd_t pgd) { return 0; }
-static inline int pgd_bad(pgd_t pgd) { return 0; }
-static inline int pgd_present(pgd_t pgd) { return 1; }
-static inline void pgd_clear(pgd_t *pgdp) {}
-
 #define pte_ERROR(e) \
 	printk(KERN_ERR "%s:%d: bad pte %08lx.\n",	\
 	__FILE__, __LINE__, pte_val(e))
-#define pmd_ERROR(e) \
-	printk(KERN_ERR "%s:%d: bad pmd %08lx.\n",	\
-	__FILE__, __LINE__, pmd_val(e))
 #define pgd_ERROR(e) \
 	printk(KERN_ERR "%s:%d: bad pgd %08lx.\n",	\
 	__FILE__, __LINE__, pgd_val(e))
@@ -339,14 +331,6 @@ extern pgd_t kernel_pg_dir[PTRS_PER_PGD];
  */
 #define pgd_offset_k(address)	pgd_offset(&init_mm, address)
 
-/*
- * Find an entry in the second-level pagetable.
- */
-static inline pmd_t *pmd_offset(pgd_t *pgd, unsigned long address)
-{
-	return (pmd_t *) pgd;
-}
-
 /*
  * Find an entry in the third-level pagetable.
  */
@@ -360,12 +344,16 @@ static inline pmd_t *pmd_offset(pgd_t *pgd, unsigned long address)
 static inline void nocache_page(void *vaddr)
 {
 	pgd_t *dir;
+	p4d_t *p4dp;
+	pud_t *pudp;
 	pmd_t *pmdp;
 	pte_t *ptep;
 	unsigned long addr = (unsigned long) vaddr;
 
 	dir = pgd_offset_k(addr);
-	pmdp = pmd_offset(dir, addr);
+	p4dp = p4d_offset(dir, addr);
+	pudp = pud_offset(p4dp, addr);
+	pmdp = pmd_offset(pudp, addr);
 	ptep = pte_offset_kernel(pmdp, addr);
 	*ptep = pte_mknocache(*ptep);
 }
@@ -376,12 +364,16 @@ static inline void nocache_page(void *vaddr)
 static inline void cache_page(void *vaddr)
 {
 	pgd_t *dir;
+	p4d_t *p4dp;
+	pud_t *pudp;
 	pmd_t *pmdp;
 	pte_t *ptep;
 	unsigned long addr = (unsigned long) vaddr;
 
 	dir = pgd_offset_k(addr);
-	pmdp = pmd_offset(dir, addr);
+	p4dp = p4d_offset(dir, addr);
+	pudp = pud_offset(p4dp, addr);
+	pmdp = pmd_offset(pudp, addr);
 	ptep = pte_offset_kernel(pmdp, addr);
 	*ptep = pte_mkcache(*ptep);
 }
diff --git a/arch/m68k/include/asm/mmu_context.h b/arch/m68k/include/asm/mmu_context.h
index f5b1852b4663..cac9f289d1f6 100644
--- a/arch/m68k/include/asm/mmu_context.h
+++ b/arch/m68k/include/asm/mmu_context.h
@@ -100,6 +100,8 @@ static inline void load_ksp_mmu(struct task_struct *task)
 	struct mm_struct *mm;
 	int asid;
 	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
 	unsigned long mmuar;
@@ -127,7 +129,15 @@ static inline void load_ksp_mmu(struct task_struct *task)
 	if (pgd_none(*pgd))
 		goto bug;
 
-	pmd = pmd_offset(pgd, mmuar);
+	p4d = p4d_offset(pgd, mmuar);
+	if (p4d_none(*p4d))
+		goto bug;
+
+	pud = pud_offset(p4d, mmuar);
+	if (pud_none(*pud))
+		goto bug;
+
+	pmd = pmd_offset(pud, mmuar);
 	if (pmd_none(*pmd))
 		goto bug;
 
diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h
index acab315c851f..ff9cc401ffd1 100644
--- a/arch/m68k/include/asm/motorola_pgalloc.h
+++ b/arch/m68k/include/asm/motorola_pgalloc.h
@@ -106,9 +106,9 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t page
 }
 #define pmd_pgtable(pmd) pmd_page(pmd)
 
-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd)
+static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 {
-	pgd_set(pgd, pmd);
+	pud_set(pud, pmd);
 }
 
 #endif /* _MOTOROLA_PGALLOC_H */
diff --git a/arch/m68k/include/asm/motorola_pgtable.h b/arch/m68k/include/asm/motorola_pgtable.h
index 7f66a7bad7a5..62bedc61f110 100644
--- a/arch/m68k/include/asm/motorola_pgtable.h
+++ b/arch/m68k/include/asm/motorola_pgtable.h
@@ -117,14 +117,14 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep)
 	}
 }
 
-static inline void pgd_set(pgd_t *pgdp, pmd_t *pmdp)
+static inline void pud_set(pud_t *pudp, pmd_t *pmdp)
 {
-	pgd_val(*pgdp) = _PAGE_TABLE | _PAGE_ACCESSED | __pa(pmdp);
+	pud_val(*pudp) = _PAGE_TABLE | _PAGE_ACCESSED | __pa(pmdp);
 }
 
 #define __pte_page(pte) ((unsigned long)__va(pte_val(pte) & PAGE_MASK))
 #define __pmd_page(pmd) ((unsigned long)__va(pmd_val(pmd) & _TABLE_MASK))
-#define __pgd_page(pgd) ((unsigned long)__va(pgd_val(pgd) & _TABLE_MASK))
+#define pud_page_vaddr(pud) ((unsigned long)__va(pud_val(pud) & _TABLE_MASK))
 
 
 #define pte_none(pte)		(!pte_val(pte))
@@ -147,11 +147,11 @@ static inline void pgd_set(pgd_t *pgdp, pmd_t *pmdp)
 #define pmd_page(pmd)		virt_to_page(__va(pmd_val(pmd)))
 
 
-#define pgd_none(pgd)		(!pgd_val(pgd))
-#define pgd_bad(pgd)		((pgd_val(pgd) & _DESCTYPE_MASK) != _PAGE_TABLE)
-#define pgd_present(pgd)	(pgd_val(pgd) & _PAGE_TABLE)
-#define pgd_clear(pgdp)		({ pgd_val(*pgdp) = 0; })
-#define pgd_page(pgd)		(mem_map + ((unsigned long)(__va(pgd_val(pgd)) - PAGE_OFFSET) >> PAGE_SHIFT))
+#define pud_none(pud)		(!pud_val(pud))
+#define pud_bad(pud)		((pud_val(pud) & _DESCTYPE_MASK) != _PAGE_TABLE)
+#define pud_present(pud)	(pud_val(pud) & _PAGE_TABLE)
+#define pud_clear(pudp)		({ pud_val(*pudp) = 0; })
+#define pud_page(pud)		(mem_map + ((unsigned long)(__va(pud_val(pud)) - PAGE_OFFSET) >> PAGE_SHIFT))
 
 #define pte_ERROR(e) \
 	printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
@@ -209,9 +209,9 @@ static inline pgd_t *pgd_offset_k(unsigned long address)
 
 
 /* Find an entry in the second-level page table.. */
-static inline pmd_t *pmd_offset(pgd_t *dir, unsigned long address)
+static inline pmd_t *pmd_offset(pud_t *dir, unsigned long address)
 {
-	return (pmd_t *)__pgd_page(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PMD-1));
+	return (pmd_t *)pud_page_vaddr(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PMD-1));
 }
 
 /* Find an entry in the third-level page table.. */
@@ -239,11 +239,15 @@ static inline void nocache_page(void *vaddr)
 
 	if (CPU_IS_040_OR_060) {
 		pgd_t *dir;
+		p4d_t *p4dp;
+		pud_t *pudp;
 		pmd_t *pmdp;
 		pte_t *ptep;
 
 		dir = pgd_offset_k(addr);
-		pmdp = pmd_offset(dir, addr);
+		p4dp = p4d_offset(dir, addr);
+		pudp = pud_offset(p4dp, addr);
+		pmdp = pmd_offset(pudp, addr);
 		ptep = pte_offset_kernel(pmdp, addr);
 		*ptep = pte_mknocache(*ptep);
 	}
@@ -255,11 +259,15 @@ static inline void cache_page(void *vaddr)
 
 	if (CPU_IS_040_OR_060) {
 		pgd_t *dir;
+		p4d_t *p4dp;
+		pud_t *pudp;
 		pmd_t *pmdp;
 		pte_t *ptep;
 
 		dir = pgd_offset_k(addr);
-		pmdp = pmd_offset(dir, addr);
+		p4dp = p4d_offset(dir, addr);
+		pudp = pud_offset(p4dp, addr);
+		pmdp = pmd_offset(pudp, addr);
 		ptep = pte_offset_kernel(pmdp, addr);
 		*ptep = pte_mkcache(*ptep);
 	}
diff --git a/arch/m68k/include/asm/page.h b/arch/m68k/include/asm/page.h
index 700d8195880c..05e1e1e77a9a 100644
--- a/arch/m68k/include/asm/page.h
+++ b/arch/m68k/include/asm/page.h
@@ -21,19 +21,22 @@
 /*
  * These are used to make use of C type-checking..
  */
-typedef struct { unsigned long pte; } pte_t;
+#if !defined(CONFIG_MMU) || CONFIG_PGTABLE_LEVELS == 3
 typedef struct { unsigned long pmd[16]; } pmd_t;
+#define pmd_val(x)	((&x)->pmd[0])
+#define __pmd(x)	((pmd_t) { { (x) }, })
+#endif
+
+typedef struct { unsigned long pte; } pte_t;
 typedef struct { unsigned long pgd; } pgd_t;
 typedef struct { unsigned long pgprot; } pgprot_t;
 typedef struct page *pgtable_t;
 
 #define pte_val(x)	((x).pte)
-#define pmd_val(x)	((&x)->pmd[0])
 #define pgd_val(x)	((x).pgd)
 #define pgprot_val(x)	((x).pgprot)
 
 #define __pte(x)	((pte_t) { (x) } )
-#define __pmd(x)	((pmd_t) { { (x) }, })
 #define __pgd(x)	((pgd_t) { (x) } )
 #define __pgprot(x)	((pgprot_t) { (x) } )
 
diff --git a/arch/m68k/include/asm/pgtable_mm.h b/arch/m68k/include/asm/pgtable_mm.h
index 646c174fff99..2bf5c3501e78 100644
--- a/arch/m68k/include/asm/pgtable_mm.h
+++ b/arch/m68k/include/asm/pgtable_mm.h
@@ -2,7 +2,12 @@
 #ifndef _M68K_PGTABLE_H
 #define _M68K_PGTABLE_H
 
-#include <asm-generic/4level-fixup.h>
+
+#if defined(CONFIG_SUN3) || defined(CONFIG_COLDFIRE)
+#include <asm-generic/pgtable-nopmd.h>
+#else
+#include <asm-generic/pgtable-nopud.h>
+#endif
 
 #include <asm/setup.h>
 
@@ -30,9 +35,7 @@
 
 
 /* PMD_SHIFT determines the size of the area a second-level page table can map */
-#ifdef CONFIG_SUN3
-#define PMD_SHIFT       17
-#else
+#if CONFIG_PGTABLE_LEVELS == 3
 #define PMD_SHIFT	22
 #endif
 #define PMD_SIZE	(1UL << PMD_SHIFT)
diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h
index 856121122b91..11b95dadf7c0 100644
--- a/arch/m68k/include/asm/sun3_pgalloc.h
+++ b/arch/m68k/include/asm/sun3_pgalloc.h
@@ -17,8 +17,6 @@
 
 extern const char bad_pmd_string[];
 
-#define pmd_alloc_one(mm,address)       ({ BUG(); ((pmd_t *)2); })
-
 #define __pte_free_tlb(tlb,pte,addr)			\
 do {							\
 	pgtable_pte_page_dtor(pte);			\
@@ -41,7 +39,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t page
  * inside the pgd, so has no extra memory associated with it.
  */
 #define pmd_free(mm, x)			do { } while (0)
-#define __pmd_free_tlb(tlb, x, addr)	do { } while (0)
 
 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
@@ -58,6 +55,4 @@ static inline pgd_t * pgd_alloc(struct mm_struct *mm)
      return new_pgd;
 }
 
-#define pgd_populate(mm, pmd, pte) BUG()
-
 #endif /* SUN3_PGALLOC_H */
diff --git a/arch/m68k/include/asm/sun3_pgtable.h b/arch/m68k/include/asm/sun3_pgtable.h
index c987d50866b4..bc4155264810 100644
--- a/arch/m68k/include/asm/sun3_pgtable.h
+++ b/arch/m68k/include/asm/sun3_pgtable.h
@@ -110,11 +110,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 
 #define pmd_set(pmdp,ptep) do {} while (0)
 
-static inline void pgd_set(pgd_t *pgdp, pmd_t *pmdp)
-{
-	pgd_val(*pgdp) = virt_to_phys(pmdp);
-}
-
 #define __pte_page(pte) \
 ((unsigned long) __va ((pte_val (pte) & SUN3_PAGE_PGNUM_MASK) << PAGE_SHIFT))
 #define __pmd_page(pmd) \
@@ -145,16 +140,9 @@ static inline int pmd_present2 (pmd_t *pmd) { return pmd_val (*pmd) & SUN3_PMD_V
 #define pmd_present(pmd) (!pmd_none2(&(pmd)))
 static inline void pmd_clear (pmd_t *pmdp) { pmd_val (*pmdp) = 0; }
 
-static inline int pgd_none (pgd_t pgd) { return 0; }
-static inline int pgd_bad (pgd_t pgd) { return 0; }
-static inline int pgd_present (pgd_t pgd) { return 1; }
-static inline void pgd_clear (pgd_t *pgdp) {}
-
 
 #define pte_ERROR(e) \
 	pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
-#define pmd_ERROR(e) \
-	pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
 #define pgd_ERROR(e) \
 	pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
 
@@ -194,12 +182,6 @@ extern pgd_t kernel_pg_dir[PTRS_PER_PGD];
 /* Find an entry in a kernel pagetable directory. */
 #define pgd_offset_k(address) pgd_offset(&init_mm, address)
 
-/* Find an entry in the second-level pagetable. */
-static inline pmd_t *pmd_offset (pgd_t *pgd, unsigned long address)
-{
-	return (pmd_t *) pgd;
-}
-
 /* Find an entry in the third-level pagetable. */
 #define pte_index(address) ((address >> PAGE_SHIFT) & (PTRS_PER_PTE-1))
 #define pte_offset_kernel(pmd, address) ((pte_t *) __pmd_page(*pmd) + pte_index(address))
diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c
index 6363ec83a290..18a4de7d5934 100644
--- a/arch/m68k/kernel/sys_m68k.c
+++ b/arch/m68k/kernel/sys_m68k.c
@@ -465,6 +465,8 @@ sys_atomic_cmpxchg_32(unsigned long newval, int oldval, int d3, int d4, int d5,
 	for (;;) {
 		struct mm_struct *mm = current->mm;
 		pgd_t *pgd;
+		p4d_t *p4d;
+		pud_t *pud;
 		pmd_t *pmd;
 		pte_t *pte;
 		spinlock_t *ptl;
@@ -474,7 +476,13 @@ sys_atomic_cmpxchg_32(unsigned long newval, int oldval, int d3, int d4, int d5,
 		pgd = pgd_offset(mm, (unsigned long)mem);
 		if (!pgd_present(*pgd))
 			goto bad_access;
-		pmd = pmd_offset(pgd, (unsigned long)mem);
+		p4d = p4d_offset(pgd, (unsigned long)mem);
+		if (!p4d_present(*p4d))
+			goto bad_access;
+		pud = pud_offset(p4d, (unsigned long)mem);
+		if (!pud_present(*pud))
+			goto bad_access;
+		pmd = pmd_offset(pud, (unsigned long)mem);
 		if (!pmd_present(*pmd))
 			goto bad_access;
 		pte = pte_offset_map_lock(mm, pmd, (unsigned long)mem, &ptl);
diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c
index 778cacb7d57b..27c453f4fffe 100644
--- a/arch/m68k/mm/init.c
+++ b/arch/m68k/mm/init.c
@@ -130,8 +130,10 @@ static inline void init_pointer_tables(void)
 	/* insert pointer tables allocated so far into the tablelist */
 	init_pointer_table((unsigned long)kernel_pg_dir);
 	for (i = 0; i < PTRS_PER_PGD; i++) {
-		if (pgd_present(kernel_pg_dir[i]))
-			init_pointer_table(__pgd_page(kernel_pg_dir[i]));
+		pud_t *pud = (pud_t *)(&kernel_pg_dir[i]);
+
+		if (pud_present(*pud))
+			init_pointer_table(pgd_page_vaddr(kernel_pg_dir[i]));
 	}
 
 	/* insert also pointer table that we used to unmap the zero page */
diff --git a/arch/m68k/mm/kmap.c b/arch/m68k/mm/kmap.c
index 23f9466aabb5..120030ad8dc4 100644
--- a/arch/m68k/mm/kmap.c
+++ b/arch/m68k/mm/kmap.c
@@ -63,18 +63,23 @@ static void __free_io_area(void *addr, unsigned long size)
 {
 	unsigned long virtaddr = (unsigned long)addr;
 	pgd_t *pgd_dir;
+	p4d_t *p4d_dir;
+	pud_t *pud_dir;
 	pmd_t *pmd_dir;
 	pte_t *pte_dir;
 
 	while ((long)size > 0) {
 		pgd_dir = pgd_offset_k(virtaddr);
-		if (pgd_bad(*pgd_dir)) {
-			printk("iounmap: bad pgd(%08lx)\n", pgd_val(*pgd_dir));
-			pgd_clear(pgd_dir);
+		p4d_dir = p4d_offset(pgd_dir, virtaddr);
+		pud_dir = pud_offset(p4d_dir, virtaddr);
+		if (pud_bad(*pud_dir)) {
+			printk("iounmap: bad pud(%08lx)\n", pud_val(*pud_dir));
+			pud_clear(pud_dir);
 			return;
 		}
-		pmd_dir = pmd_offset(pgd_dir, virtaddr);
+		pmd_dir = pmd_offset(pud_dir, virtaddr);
 
+#if CONFIG_PGTABLE_LEVELS == 3
 		if (CPU_IS_020_OR_030) {
 			int pmd_off = (virtaddr/PTRTREESIZE) & 15;
 			int pmd_type = pmd_dir->pmd[pmd_off] & _DESCTYPE_MASK;
@@ -87,6 +92,7 @@ static void __free_io_area(void *addr, unsigned long size)
 			} else if (pmd_type == 0)
 				continue;
 		}
+#endif
 
 		if (pmd_bad(*pmd_dir)) {
 			printk("iounmap: bad pmd (%08lx)\n", pmd_val(*pmd_dir));
@@ -159,6 +165,8 @@ void __iomem *__ioremap(unsigned long physaddr, unsigned long size, int cachefla
 	unsigned long virtaddr, retaddr;
 	long offset;
 	pgd_t *pgd_dir;
+	p4d_t *p4d_dir;
+	pud_t *pud_dir;
 	pmd_t *pmd_dir;
 	pte_t *pte_dir;
 
@@ -245,18 +253,23 @@ void __iomem *__ioremap(unsigned long physaddr, unsigned long size, int cachefla
 			printk ("\npa=%#lx va=%#lx ", physaddr, virtaddr);
 #endif
 		pgd_dir = pgd_offset_k(virtaddr);
-		pmd_dir = pmd_alloc(&init_mm, pgd_dir, virtaddr);
+		p4d_dir = p4d_offset(pgd_dir, virtaddr);
+		pud_dir = pud_offset(p4d_dir, virtaddr);
+		pmd_dir = pmd_alloc(&init_mm, pud_dir, virtaddr);
 		if (!pmd_dir) {
 			printk("ioremap: no mem for pmd_dir\n");
 			return NULL;
 		}
 
+#if CONFIG_PGTABLE_LEVELS == 3
 		if (CPU_IS_020_OR_030) {
 			pmd_dir->pmd[(virtaddr/PTRTREESIZE) & 15] = physaddr;
 			physaddr += PTRTREESIZE;
 			virtaddr += PTRTREESIZE;
 			size -= PTRTREESIZE;
-		} else {
+		} else
+#endif
+		{
 			pte_dir = pte_alloc_kernel(pmd_dir, virtaddr);
 			if (!pte_dir) {
 				printk("ioremap: no mem for pte_dir\n");
@@ -307,6 +320,8 @@ void kernel_set_cachemode(void *addr, unsigned long size, int cmode)
 {
 	unsigned long virtaddr = (unsigned long)addr;
 	pgd_t *pgd_dir;
+	p4d_t *p4d_dir;
+	pud_t *pud_dir;
 	pmd_t *pmd_dir;
 	pte_t *pte_dir;
 
@@ -341,13 +356,16 @@ void kernel_set_cachemode(void *addr, unsigned long size, int cmode)
 
 	while ((long)size > 0) {
 		pgd_dir = pgd_offset_k(virtaddr);
-		if (pgd_bad(*pgd_dir)) {
-			printk("iocachemode: bad pgd(%08lx)\n", pgd_val(*pgd_dir));
-			pgd_clear(pgd_dir);
+		p4d_dir = p4d_offset(pgd_dir, virtaddr);
+		pud_dir = pud_offset(p4d_dir, virtaddr);
+		if (pud_bad(*pud_dir)) {
+			printk("iocachemode: bad pud(%08lx)\n", pud_val(*pud_dir));
+			pud_clear(pud_dir);
 			return;
 		}
-		pmd_dir = pmd_offset(pgd_dir, virtaddr);
+		pmd_dir = pmd_offset(pud_dir, virtaddr);
 
+#if CONFIG_PGTABLE_LEVELS == 3
 		if (CPU_IS_020_OR_030) {
 			int pmd_off = (virtaddr/PTRTREESIZE) & 15;
 
@@ -359,6 +377,7 @@ void kernel_set_cachemode(void *addr, unsigned long size, int cmode)
 				continue;
 			}
 		}
+#endif
 
 		if (pmd_bad(*pmd_dir)) {
 			printk("iocachemode: bad pmd (%08lx)\n", pmd_val(*pmd_dir));
diff --git a/arch/m68k/mm/mcfmmu.c b/arch/m68k/mm/mcfmmu.c
index 6cb1e41d58d0..0ea375607767 100644
--- a/arch/m68k/mm/mcfmmu.c
+++ b/arch/m68k/mm/mcfmmu.c
@@ -92,6 +92,8 @@ int cf_tlb_miss(struct pt_regs *regs, int write, int dtlb, int extension_word)
 	unsigned long flags, mmuar, mmutr;
 	struct mm_struct *mm;
 	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
 	int asid;
@@ -113,7 +115,19 @@ int cf_tlb_miss(struct pt_regs *regs, int write, int dtlb, int extension_word)
 		return -1;
 	}
 
-	pmd = pmd_offset(pgd, mmuar);
+	p4d = p4d_offset(pgd, mmuar);
+	if (p4d_none(*p4d)) {
+		local_irq_restore(flags);
+		return -1;
+	}
+
+	pud = pud_offset(p4d, mmuar);
+	if (pud_none(*pud)) {
+		local_irq_restore(flags);
+		return -1;
+	}
+
+	pmd = pmd_offset(pud, mmuar);
 	if (pmd_none(*pmd)) {
 		local_irq_restore(flags);
 		return -1;
diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c
index 356601bf96d9..4857985b8080 100644
--- a/arch/m68k/mm/motorola.c
+++ b/arch/m68k/mm/motorola.c
@@ -82,9 +82,11 @@ static pmd_t * __init kernel_ptr_table(void)
 		 */
 		last = (unsigned long)kernel_pg_dir;
 		for (i = 0; i < PTRS_PER_PGD; i++) {
-			if (!pgd_present(kernel_pg_dir[i]))
+			pud_t *pud = (pud_t *)(&kernel_pg_dir[i]);
+
+			if (!pud_present(*pud))
 				continue;
-			pmd = __pgd_page(kernel_pg_dir[i]);
+			pmd = pgd_page_vaddr(kernel_pg_dir[i]);
 			if (pmd > last)
 				last = pmd;
 		}
@@ -118,6 +120,8 @@ static void __init map_node(int node)
 #define ROOTTREESIZE (32*1024*1024)
 	unsigned long physaddr, virtaddr, size;
 	pgd_t *pgd_dir;
+	p4d_t *p4d_dir;
+	pud_t *pud_dir;
 	pmd_t *pmd_dir;
 	pte_t *pte_dir;
 
@@ -149,14 +153,16 @@ static void __init map_node(int node)
 				continue;
 			}
 		}
-		if (!pgd_present(*pgd_dir)) {
+		p4d_dir = p4d_offset(pgd_dir, virtaddr);
+		pud_dir = pud_offset(p4d_dir, virtaddr);
+		if (!pud_present(*pud_dir)) {
 			pmd_dir = kernel_ptr_table();
 #ifdef DEBUG
 			printk ("[new pointer %p]", pmd_dir);
 #endif
-			pgd_set(pgd_dir, pmd_dir);
+			pud_set(pud_dir, pmd_dir);
 		} else
-			pmd_dir = pmd_offset(pgd_dir, virtaddr);
+			pmd_dir = pmd_offset(pud_dir, virtaddr);
 
 		if (CPU_IS_020_OR_030) {
 			if (virtaddr) {
@@ -304,4 +310,3 @@ void __init paging_init(void)
 			node_set_state(i, N_NORMAL_MEMORY);
 	}
 }
-
diff --git a/arch/m68k/sun3x/dvma.c b/arch/m68k/sun3x/dvma.c
index 89e630e66555..c4b8aa1d80f4 100644
--- a/arch/m68k/sun3x/dvma.c
+++ b/arch/m68k/sun3x/dvma.c
@@ -80,6 +80,8 @@ inline int dvma_map_cpu(unsigned long kaddr,
 			       unsigned long vaddr, int len)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
 	unsigned long end;
 	int ret = 0;
 
@@ -90,12 +92,14 @@ inline int dvma_map_cpu(unsigned long kaddr,
 
 	pr_debug("dvma: mapping kern %08lx to virt %08lx\n", kaddr, vaddr);
 	pgd = pgd_offset_k(vaddr);
+	p4d = p4d_offset(pgd, vaddr);
+	pud = pud_offset(p4d, vaddr);
 
 	do {
 		pmd_t *pmd;
 		unsigned long end2;
 
-		if((pmd = pmd_alloc(&init_mm, pgd, vaddr)) == NULL) {
+		if((pmd = pmd_alloc(&init_mm, pud, vaddr)) == NULL) {
 			ret = -ENOMEM;
 			goto out;
 		}
@@ -196,4 +200,3 @@ void dvma_unmap_iommu(unsigned long baddr, int len)
 	}
 
 }
-
-- 
cgit v1.2.3-59-g8ed1b


From ed48e1f812b585e2af5dee6e08712c64d75978e2 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Wed, 4 Dec 2019 16:54:03 -0800
Subject: microblaze: use pgtable-nopmd instead of 4level-fixup

microblaze has only two-level page tables and can use pgtable-nopmd and
folding of the upper layers.

Replace usage of include/asm-generic/4level-fixup.h and explicit
definition of __PAGETABLE_PMD_FOLDED in microblaze with
include/asm-generic/pgtable-nopmd.h and adjust page table manipulation
macros and functions accordingly.

Link: http://lkml.kernel.org/r/1572938135-31886-7-git-send-email-rppt@kernel.org
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Cc: Anatoly Pugachev <matorola@gmail.com>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Ungerer <gerg@linux-m68k.org>
Cc: Helge Deller <deller@gmx.de>
Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Mark Salter <msalter@redhat.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Peter Rosin <peda@axentia.se>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rolf Eike Beer <eike-kernel@sf-tec.de>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Russell King <rmk+kernel@armlinux.org.uk>
Cc: Sam Creasey <sammy@sammy.net>
Cc: Vincent Chen <deanbo422@gmail.com>
Cc: Vineet Gupta <Vineet.Gupta1@synopsys.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/microblaze/include/asm/page.h    |  3 ---
 arch/microblaze/include/asm/pgalloc.h | 16 ----------------
 arch/microblaze/include/asm/pgtable.h | 32 ++------------------------------
 arch/microblaze/kernel/signal.c       | 10 +++++++---
 arch/microblaze/mm/init.c             |  7 +++++--
 arch/microblaze/mm/pgtable.c          | 13 +++++++++++--
 6 files changed, 25 insertions(+), 56 deletions(-)

diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h
index d506bb0893f9..f4b44b24b02e 100644
--- a/arch/microblaze/include/asm/page.h
+++ b/arch/microblaze/include/asm/page.h
@@ -90,7 +90,6 @@ typedef struct { unsigned long	pte; }		pte_t;
 typedef struct { unsigned long	pgprot; }	pgprot_t;
 /* FIXME this can depend on linux kernel version */
 #   ifdef CONFIG_MMU
-typedef struct { unsigned long pmd; } pmd_t;
 typedef struct { unsigned long pgd; } pgd_t;
 #   else /* CONFIG_MMU */
 typedef struct { unsigned long	ste[64]; }	pmd_t;
@@ -103,7 +102,6 @@ typedef struct { p4d_t		pge[1]; }	pgd_t;
 # define pgprot_val(x)	((x).pgprot)
 
 #   ifdef CONFIG_MMU
-#   define pmd_val(x)      ((x).pmd)
 #   define pgd_val(x)      ((x).pgd)
 #   else  /* CONFIG_MMU */
 #   define pmd_val(x)	((x).ste[0])
@@ -112,7 +110,6 @@ typedef struct { p4d_t		pge[1]; }	pgd_t;
 #   endif  /* CONFIG_MMU */
 
 # define __pte(x)	((pte_t) { (x) })
-# define __pmd(x)	((pmd_t) { (x) })
 # define __pgd(x)	((pgd_t) { (x) })
 # define __pgprot(x)	((pgprot_t) { (x) })
 
diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
index 7ecb05baa601..fcf1e23f2e0a 100644
--- a/arch/microblaze/include/asm/pgalloc.h
+++ b/arch/microblaze/include/asm/pgalloc.h
@@ -41,13 +41,6 @@ static inline void free_pgd(pgd_t *pgd)
 
 #define pmd_pgtable(pmd)	pmd_page(pmd)
 
-/*
- * We don't have any real pmd's, and this code never triggers because
- * the pgd will always be present..
- */
-#define pmd_alloc_one_fast(mm, address)	({ BUG(); ((pmd_t *)1); })
-#define pmd_alloc_one(mm, address)	({ BUG(); ((pmd_t *)2); })
-
 extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
 
 #define __pte_free_tlb(tlb, pte, addr)	pte_free((tlb)->mm, (pte))
@@ -58,15 +51,6 @@ extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
 #define pmd_populate_kernel(mm, pmd, pte) \
 		(pmd_val(*(pmd)) = (unsigned long) (pte))
 
-/*
- * We don't have any real pmd's, and this code never triggers because
- * the pgd will always be present..
- */
-#define pmd_alloc_one(mm, address)	({ BUG(); ((pmd_t *)2); })
-#define pmd_free(mm, x)			do { } while (0)
-#define __pmd_free_tlb(tlb, x, addr)	pmd_free((tlb)->mm, x)
-#define pgd_populate(mm, pmd, pte)	BUG()
-
 #endif /* CONFIG_MMU */
 
 #endif /* _ASM_MICROBLAZE_PGALLOC_H */
diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
index 954b69af451f..2def331f9e2c 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -59,9 +59,7 @@ extern int mem_init_done;
 
 #else /* CONFIG_MMU */
 
-#include <asm-generic/4level-fixup.h>
-
-#define __PAGETABLE_PMD_FOLDED 1
+#include <asm-generic/pgtable-nopmd.h>
 
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
@@ -138,13 +136,8 @@ static inline pte_t pte_mkspecial(pte_t pte)	{ return pte; }
  *
  */
 
-/* PMD_SHIFT determines the size of the area mapped by the PTE pages */
-#define PMD_SHIFT	(PAGE_SHIFT + PTE_SHIFT)
-#define PMD_SIZE	(1UL << PMD_SHIFT)
-#define PMD_MASK	(~(PMD_SIZE-1))
-
 /* PGDIR_SHIFT determines what a top-level page table entry can map */
-#define PGDIR_SHIFT	PMD_SHIFT
+#define PGDIR_SHIFT	(PAGE_SHIFT + PTE_SHIFT)
 #define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
 #define PGDIR_MASK	(~(PGDIR_SIZE-1))
 
@@ -165,9 +158,6 @@ static inline pte_t pte_mkspecial(pte_t pte)	{ return pte; }
 #define pte_ERROR(e) \
 	printk(KERN_ERR "%s:%d: bad pte "PTE_FMT".\n", \
 		__FILE__, __LINE__, pte_val(e))
-#define pmd_ERROR(e) \
-	printk(KERN_ERR "%s:%d: bad pmd %08lx.\n", \
-		__FILE__, __LINE__, pmd_val(e))
 #define pgd_ERROR(e) \
 	printk(KERN_ERR "%s:%d: bad pgd %08lx.\n", \
 		__FILE__, __LINE__, pgd_val(e))
@@ -313,18 +303,6 @@ extern unsigned long empty_zero_page[1024];
 	__pte(((pte_basic_t)(pfn) << PFN_SHIFT_OFFSET) | pgprot_val(prot))
 
 #ifndef __ASSEMBLY__
-/*
- * The "pgd_xxx()" functions here are trivial for a folded two-level
- * setup: the pgd is never bad, and a pmd always exists (as it's folded
- * into the pgd entry)
- */
-static inline int pgd_none(pgd_t pgd)		{ return 0; }
-static inline int pgd_bad(pgd_t pgd)		{ return 0; }
-static inline int pgd_present(pgd_t pgd)	{ return 1; }
-#define pgd_clear(xp)				do { } while (0)
-#define pgd_page(pgd) \
-	((unsigned long) __va(pgd_val(pgd) & PAGE_MASK))
-
 /*
  * The following only work if pte_present() is true.
  * Undefined behaviour if not..
@@ -479,12 +457,6 @@ static inline void ptep_mkdirty(struct mm_struct *mm,
 #define pgd_index(address)	 ((address) >> PGDIR_SHIFT)
 #define pgd_offset(mm, address)	 ((mm)->pgd + pgd_index(address))
 
-/* Find an entry in the second-level page table.. */
-static inline pmd_t *pmd_offset(pgd_t *dir, unsigned long address)
-{
-	return (pmd_t *) dir;
-}
-
 /* Find an entry in the third-level page table.. */
 #define pte_index(address)		\
 	(((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c
index cdd4feb279c5..c9125c328949 100644
--- a/arch/microblaze/kernel/signal.c
+++ b/arch/microblaze/kernel/signal.c
@@ -160,6 +160,9 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 	int err = 0, sig = ksig->sig;
 	unsigned long address = 0;
 #ifdef CONFIG_MMU
+	pgd_t *pgdp;
+	p4d_t *p4dp;
+	pud_t *pudp;
 	pmd_t *pmdp;
 	pte_t *ptep;
 #endif
@@ -195,9 +198,10 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 
 	address = ((unsigned long)frame->tramp);
 #ifdef CONFIG_MMU
-	pmdp = pmd_offset(pud_offset(
-			pgd_offset(current->mm, address),
-					address), address);
+	pgdp = pgd_offset(current->mm, address);
+	p4dp = p4d_offset(pgdp, address);
+	pudp = pud_offset(p4dp, address);
+	pmdp = pmd_offset(pudp, address);
 
 	preempt_disable();
 	ptep = pte_offset_map(pmdp, address);
diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index a015a951c8b7..050fc621c920 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -53,8 +53,11 @@ EXPORT_SYMBOL(kmap_prot);
 
 static inline pte_t *virt_to_kpte(unsigned long vaddr)
 {
-	return pte_offset_kernel(pmd_offset(pgd_offset_k(vaddr),
-			vaddr), vaddr);
+	pgd_t *pgd = pgd_offset_k(vaddr);
+	p4d_t *p4d = p4d_offset(pgd, vaddr);
+	pud_t *pud = pud_offset(p4d, vaddr);
+
+	return pte_offset_kernel(pmd_offset(pud, vaddr), vaddr);
 }
 
 static void __init highmem_init(void)
diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c
index 010bb9cee2e4..68c26cacd930 100644
--- a/arch/microblaze/mm/pgtable.c
+++ b/arch/microblaze/mm/pgtable.c
@@ -134,11 +134,16 @@ EXPORT_SYMBOL(iounmap);
 
 int map_page(unsigned long va, phys_addr_t pa, int flags)
 {
+	p4d_t *p4d;
+	pud_t *pud;
 	pmd_t *pd;
 	pte_t *pg;
 	int err = -ENOMEM;
+
 	/* Use upper 10 bits of VA to index the first level map */
-	pd = pmd_offset(pgd_offset_k(va), va);
+	p4d = p4d_offset(pgd_offset_k(va), va);
+	pud = pud_offset(p4d, va);
+	pd = pmd_offset(pud, va);
 	/* Use middle 10 bits of VA to index the second-level map */
 	pg = pte_alloc_kernel(pd, va); /* from powerpc - pgtable.c */
 	/* pg = pte_alloc_kernel(&init_mm, pd, va); */
@@ -188,13 +193,17 @@ void __init mapin_ram(void)
 static int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep)
 {
 	pgd_t	*pgd;
+	p4d_t	*p4d;
+	pud_t	*pud;
 	pmd_t	*pmd;
 	pte_t	*pte;
 	int     retval = 0;
 
 	pgd = pgd_offset(mm, addr & PAGE_MASK);
 	if (pgd) {
-		pmd = pmd_offset(pgd, addr & PAGE_MASK);
+		p4d = p4d_offset(pgd, addr & PAGE_MASK);
+		pud = pud_offset(p4d, addr & PAGE_MASK);
+		pmd = pmd_offset(pud, addr & PAGE_MASK);
 		if (pmd_present(*pmd)) {
 			pte = pte_offset_kernel(pmd, addr & PAGE_MASK);
 			if (pte) {
-- 
cgit v1.2.3-59-g8ed1b


From 7c2763c42326a071220077513a9cae90db46b818 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Wed, 4 Dec 2019 16:54:08 -0800
Subject: nds32: use pgtable-nopmd instead of 4level-fixup

nds32 has only two-level page tables and can use pgtable-nopmd and
folding of the upper layers.

Replace usage of include/asm-generic/4level-fixup.h and explicit
definition of __PAGETABLE_PMD_FOLDED in nds32 with
include/asm-generic/pgtable-nopmd.h and adjust page table manipulation
macros and functions accordingly.

Link: http://lkml.kernel.org/r/1572938135-31886-8-git-send-email-rppt@kernel.org
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Cc: Anatoly Pugachev <matorola@gmail.com>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Ungerer <gerg@linux-m68k.org>
Cc: Helge Deller <deller@gmx.de>
Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Mark Salter <msalter@redhat.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Peter Rosin <peda@axentia.se>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rolf Eike Beer <eike-kernel@sf-tec.de>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Russell King <rmk+kernel@armlinux.org.uk>
Cc: Sam Creasey <sammy@sammy.net>
Cc: Vincent Chen <deanbo422@gmail.com>
Cc: Vineet Gupta <Vineet.Gupta1@synopsys.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/nds32/include/asm/page.h    |  3 ---
 arch/nds32/include/asm/pgalloc.h |  3 ---
 arch/nds32/include/asm/pgtable.h | 12 +-----------
 arch/nds32/include/asm/tlb.h     |  1 -
 arch/nds32/kernel/pm.c           |  4 +++-
 arch/nds32/mm/fault.c            | 16 +++++++++++++---
 arch/nds32/mm/init.c             | 11 ++++++++---
 arch/nds32/mm/mm-nds32.c         |  6 +++++-
 arch/nds32/mm/proc.c             | 26 +++++++++++++++++---------
 9 files changed, 47 insertions(+), 35 deletions(-)

diff --git a/arch/nds32/include/asm/page.h b/arch/nds32/include/asm/page.h
index 8feb1fa12f01..86b32014c5f9 100644
--- a/arch/nds32/include/asm/page.h
+++ b/arch/nds32/include/asm/page.h
@@ -41,17 +41,14 @@ void clear_page(void *page);
 void copy_page(void *to, void *from);
 
 typedef unsigned long pte_t;
-typedef unsigned long pmd_t;
 typedef unsigned long pgd_t;
 typedef unsigned long pgprot_t;
 
 #define pte_val(x)      (x)
-#define pmd_val(x)      (x)
 #define pgd_val(x)	(x)
 #define pgprot_val(x)   (x)
 
 #define __pte(x)        (x)
-#define __pmd(x)        (x)
 #define __pgd(x)        (x)
 #define __pgprot(x)     (x)
 
diff --git a/arch/nds32/include/asm/pgalloc.h b/arch/nds32/include/asm/pgalloc.h
index 37125e6884d7..85c117347c86 100644
--- a/arch/nds32/include/asm/pgalloc.h
+++ b/arch/nds32/include/asm/pgalloc.h
@@ -15,9 +15,6 @@
 /*
  * Since we have only two-level page tables, these are trivial
  */
-#define pmd_alloc_one(mm, addr)		({ BUG(); ((pmd_t *)2); })
-#define pmd_free(mm, pmd)			do { } while (0)
-#define pgd_populate(mm, pmd, pte)	BUG()
 #define pmd_pgtable(pmd) pmd_page(pmd)
 
 extern pgd_t *pgd_alloc(struct mm_struct *mm);
diff --git a/arch/nds32/include/asm/pgtable.h b/arch/nds32/include/asm/pgtable.h
index 6fbf251cfc26..0214e4150539 100644
--- a/arch/nds32/include/asm/pgtable.h
+++ b/arch/nds32/include/asm/pgtable.h
@@ -4,8 +4,7 @@
 #ifndef _ASMNDS32_PGTABLE_H
 #define _ASMNDS32_PGTABLE_H
 
-#define __PAGETABLE_PMD_FOLDED 1
-#include <asm-generic/4level-fixup.h>
+#include <asm-generic/pgtable-nopmd.h>
 #include <linux/sizes.h>
 
 #include <asm/memory.h>
@@ -18,26 +17,20 @@
 #ifdef CONFIG_ANDES_PAGE_SIZE_4KB
 #define PGDIR_SHIFT      22
 #define PTRS_PER_PGD     1024
-#define PMD_SHIFT        22
-#define PTRS_PER_PMD     1
 #define PTRS_PER_PTE     1024
 #endif
 
 #ifdef CONFIG_ANDES_PAGE_SIZE_8KB
 #define PGDIR_SHIFT      24
 #define PTRS_PER_PGD     256
-#define PMD_SHIFT        24
-#define PTRS_PER_PMD     1
 #define PTRS_PER_PTE     2048
 #endif
 
 #ifndef __ASSEMBLY__
 extern void __pte_error(const char *file, int line, unsigned long val);
-extern void __pmd_error(const char *file, int line, unsigned long val);
 extern void __pgd_error(const char *file, int line, unsigned long val);
 
 #define pte_ERROR(pte)		__pte_error(__FILE__, __LINE__, pte_val(pte))
-#define pmd_ERROR(pmd)		__pmd_error(__FILE__, __LINE__, pmd_val(pmd))
 #define pgd_ERROR(pgd)		__pgd_error(__FILE__, __LINE__, pgd_val(pgd))
 #endif /* !__ASSEMBLY__ */
 
@@ -368,9 +361,6 @@ static inline pmd_t __mk_pmd(pte_t * ptep, unsigned long prot)
 /* to find an entry in a kernel page-table-directory */
 #define pgd_offset_k(addr)      pgd_offset(&init_mm, addr)
 
-/* Find an entry in the second-level page table.. */
-#define pmd_offset(dir, addr)	((pmd_t *)(dir))
-
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
 	const unsigned long mask = 0xfff;
diff --git a/arch/nds32/include/asm/tlb.h b/arch/nds32/include/asm/tlb.h
index a8aff1c8b4f4..672603804a3b 100644
--- a/arch/nds32/include/asm/tlb.h
+++ b/arch/nds32/include/asm/tlb.h
@@ -7,6 +7,5 @@
 #include <asm-generic/tlb.h>
 
 #define __pte_free_tlb(tlb, pte, addr)	pte_free((tlb)->mm, pte)
-#define __pmd_free_tlb(tlb, pmd, addr)	pmd_free((tln)->mm, pmd)
 
 #endif
diff --git a/arch/nds32/kernel/pm.c b/arch/nds32/kernel/pm.c
index ffa8040d8be7..e25700e125d8 100644
--- a/arch/nds32/kernel/pm.c
+++ b/arch/nds32/kernel/pm.c
@@ -14,6 +14,7 @@ unsigned int *phy_addr_sp_tmp;
 static void nds32_suspend2ram(void)
 {
 	pgd_t *pgdv;
+	p4d_t *p4dv;
 	pud_t *pudv;
 	pmd_t *pmdv;
 	pte_t *ptev;
@@ -21,7 +22,8 @@ static void nds32_suspend2ram(void)
 	pgdv = (pgd_t *)__va((__nds32__mfsr(NDS32_SR_L1_PPTB) &
 		L1_PPTB_mskBASE)) + pgd_index((unsigned int)cpu_resume);
 
-	pudv = pud_offset(pgdv, (unsigned int)cpu_resume);
+	p4dv = p4d_offset(pgdv, (unsigned int)cpu_resume);
+	pudv = pud_offset(p4dv, (unsigned int)cpu_resume);
 	pmdv = pmd_offset(pudv, (unsigned int)cpu_resume);
 	ptev = pte_offset_map(pmdv, (unsigned int)cpu_resume);
 
diff --git a/arch/nds32/mm/fault.c b/arch/nds32/mm/fault.c
index 064ae5d2159d..906dfb25353c 100644
--- a/arch/nds32/mm/fault.c
+++ b/arch/nds32/mm/fault.c
@@ -31,6 +31,8 @@ void show_pte(struct mm_struct *mm, unsigned long addr)
 	pr_alert("[%08lx] *pgd=%08lx", addr, pgd_val(*pgd));
 
 	do {
+		p4d_t *p4d;
+		pud_t *pud;
 		pmd_t *pmd;
 
 		if (pgd_none(*pgd))
@@ -41,7 +43,9 @@ void show_pte(struct mm_struct *mm, unsigned long addr)
 			break;
 		}
 
-		pmd = pmd_offset(pgd, addr);
+		p4d = p4d_offset(pgd, addr);
+		pud = pud_offset(p4d, addr);
+		pmd = pmd_offset(pud, addr);
 #if PTRS_PER_PMD != 1
 		pr_alert(", *pmd=%08lx", pmd_val(*pmd));
 #endif
@@ -359,6 +363,7 @@ vmalloc_fault:
 
 		unsigned int index = pgd_index(addr);
 		pgd_t *pgd, *pgd_k;
+		p4d_t *p4d, *p4d_k;
 		pud_t *pud, *pud_k;
 		pmd_t *pmd, *pmd_k;
 		pte_t *pte_k;
@@ -369,8 +374,13 @@ vmalloc_fault:
 		if (!pgd_present(*pgd_k))
 			goto no_context;
 
-		pud = pud_offset(pgd, addr);
-		pud_k = pud_offset(pgd_k, addr);
+		p4d = p4d_offset(pgd, addr);
+		p4d_k = p4d_offset(pgd_k, addr);
+		if (!p4d_present(*p4d_k))
+			goto no_context;
+
+		pud = pud_offset(p4d, addr);
+		pud_k = pud_offset(p4d_k, addr);
 		if (!pud_present(*pud_k))
 			goto no_context;
 
diff --git a/arch/nds32/mm/init.c b/arch/nds32/mm/init.c
index 55703b03d172..0be3833f6814 100644
--- a/arch/nds32/mm/init.c
+++ b/arch/nds32/mm/init.c
@@ -54,6 +54,7 @@ static void __init map_ram(void)
 {
 	unsigned long v, p, e;
 	pgd_t *pge;
+	p4d_t *p4e;
 	pud_t *pue;
 	pmd_t *pme;
 	pte_t *pte;
@@ -69,7 +70,8 @@ static void __init map_ram(void)
 
 	while (p < e) {
 		int j;
-		pue = pud_offset(pge, v);
+		p4e = p4d_offset(pge, v);
+		pue = pud_offset(p4e, v);
 		pme = pmd_offset(pue, v);
 
 		if ((u32) pue != (u32) pge || (u32) pme != (u32) pge) {
@@ -100,6 +102,7 @@ static void __init fixedrange_init(void)
 {
 	unsigned long vaddr;
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 #ifdef CONFIG_HIGHMEM
@@ -111,7 +114,8 @@ static void __init fixedrange_init(void)
 	 */
 	vaddr = __fix_to_virt(__end_of_fixed_addresses - 1);
 	pgd = swapper_pg_dir + pgd_index(vaddr);
-	pud = pud_offset(pgd, vaddr);
+	p4d = p4d_offset(pgd, vaddr);
+	pud = pud_offset(p4d, vaddr);
 	pmd = pmd_offset(pud, vaddr);
 	fixmap_pmd_p = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
 	if (!fixmap_pmd_p)
@@ -126,7 +130,8 @@ static void __init fixedrange_init(void)
 	vaddr = PKMAP_BASE;
 
 	pgd = swapper_pg_dir + pgd_index(vaddr);
-	pud = pud_offset(pgd, vaddr);
+	p4d = p4d_offset(pgd, vaddr);
+	pud = pud_offset(p4d, vaddr);
 	pmd = pmd_offset(pud, vaddr);
 	pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
 	if (!pte)
diff --git a/arch/nds32/mm/mm-nds32.c b/arch/nds32/mm/mm-nds32.c
index 3b43798d754f..8503bee882d1 100644
--- a/arch/nds32/mm/mm-nds32.c
+++ b/arch/nds32/mm/mm-nds32.c
@@ -74,6 +74,8 @@ void setup_mm_for_reboot(char mode)
 {
 	unsigned long pmdval;
 	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
 	pmd_t *pmd;
 	int i;
 
@@ -84,7 +86,9 @@ void setup_mm_for_reboot(char mode)
 
 	for (i = 0; i < USER_PTRS_PER_PGD; i++) {
 		pmdval = (i << PGDIR_SHIFT);
-		pmd = pmd_offset(pgd + i, i << PGDIR_SHIFT);
+		p4d = p4d_offset(pgd, i << PGDIR_SHIFT);
+		pud = pud_offset(p4d, i << PGDIR_SHIFT);
+		pmd = pmd_offset(pud + i, i << PGDIR_SHIFT);
 		set_pmd(pmd, __pmd(pmdval));
 	}
 }
diff --git a/arch/nds32/mm/proc.c b/arch/nds32/mm/proc.c
index ba80992d13a2..837ae7728830 100644
--- a/arch/nds32/mm/proc.c
+++ b/arch/nds32/mm/proc.c
@@ -16,10 +16,14 @@ extern struct cache_info L1_cache_info[2];
 
 int va_kernel_present(unsigned long addr)
 {
+	p4d_t *p4d;
+	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *ptep, pte;
 
-	pmd = pmd_offset(pgd_offset_k(addr), addr);
+	p4d = p4d_offset(pgd_offset_k(addr), addr);
+	pud = pud_offset(p4d, addr);
+	pmd = pmd_offset(pud, addr);
 	if (!pmd_none(*pmd)) {
 		ptep = pte_offset_map(pmd, addr);
 		pte = *ptep;
@@ -32,20 +36,24 @@ int va_kernel_present(unsigned long addr)
 pte_t va_present(struct mm_struct * mm, unsigned long addr)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *ptep, pte;
 
 	pgd = pgd_offset(mm, addr);
 	if (!pgd_none(*pgd)) {
-		pud = pud_offset(pgd, addr);
-		if (!pud_none(*pud)) {
-			pmd = pmd_offset(pud, addr);
-			if (!pmd_none(*pmd)) {
-				ptep = pte_offset_map(pmd, addr);
-				pte = *ptep;
-				if (pte_present(pte))
-					return pte;
+		p4d = p4d_offset(pgd, addr);
+		if (!p4d_none(*p4d)) {
+			pud = pud_offset(p4d, addr);
+			if (!pud_none(*pud)) {
+				pmd = pmd_offset(pud, addr);
+				if (!pmd_none(*pmd)) {
+					ptep = pte_offset_map(pmd, addr);
+					pte = *ptep;
+					if (pte_present(pte))
+						return pte;
+				}
 			}
 		}
 	}
-- 
cgit v1.2.3-59-g8ed1b


From d96885e277b5edcd1e474e8b1579005163f23dbe Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Wed, 4 Dec 2019 16:54:12 -0800
Subject: parisc: use pgtable-nopXd instead of 4level-fixup

parisc has two or three levels of page tables and can use appropriate
pgtable-nopXd and folding of the upper layers.

Replace usage of include/asm-generic/4level-fixup.h and explicit
definitions of __PAGETABLE_PxD_FOLDED in parisc with
include/asm-generic/pgtable-nopmd.h for two-level configurations and
with include/asm-generic/pgtable-nopud.h for three-lelve configurations
and adjust page table manipulation macros and functions accordingly.

Link: http://lkml.kernel.org/r/1572938135-31886-9-git-send-email-rppt@kernel.org
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Acked-by: Helge Deller <deller@gmx.de>
Cc: Anatoly Pugachev <matorola@gmail.com>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Ungerer <gerg@linux-m68k.org>
Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Mark Salter <msalter@redhat.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Peter Rosin <peda@axentia.se>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rolf Eike Beer <eike-kernel@sf-tec.de>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Russell King <rmk+kernel@armlinux.org.uk>
Cc: Sam Creasey <sammy@sammy.net>
Cc: Vincent Chen <deanbo422@gmail.com>
Cc: Vineet Gupta <Vineet.Gupta1@synopsys.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/parisc/include/asm/page.h    | 30 +++++++++++++---------
 arch/parisc/include/asm/pgalloc.h | 41 +++++++++++-------------------
 arch/parisc/include/asm/pgtable.h | 52 +++++++++++++++++++--------------------
 arch/parisc/include/asm/tlb.h     |  2 ++
 arch/parisc/kernel/cache.c        | 13 ++++++----
 arch/parisc/kernel/pci-dma.c      |  9 +++++--
 arch/parisc/mm/fixmap.c           | 10 +++++---
 7 files changed, 81 insertions(+), 76 deletions(-)

diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h
index 93caf17ac5e2..796ae29e9b9a 100644
--- a/arch/parisc/include/asm/page.h
+++ b/arch/parisc/include/asm/page.h
@@ -42,48 +42,54 @@ typedef struct { unsigned long pte; } pte_t; /* either 32 or 64bit */
 
 /* NOTE: even on 64 bits, these entries are __u32 because we allocate
  * the pmd and pgd in ZONE_DMA (i.e. under 4GB) */
-typedef struct { __u32 pmd; } pmd_t;
 typedef struct { __u32 pgd; } pgd_t;
 typedef struct { unsigned long pgprot; } pgprot_t;
 
-#define pte_val(x)	((x).pte)
-/* These do not work lvalues, so make sure we don't use them as such. */
+#if CONFIG_PGTABLE_LEVELS == 3
+typedef struct { __u32 pmd; } pmd_t;
+#define __pmd(x)	((pmd_t) { (x) } )
+/* pXd_val() do not work as lvalues, so make sure we don't use them as such. */
 #define pmd_val(x)	((x).pmd + 0)
+#endif
+
+#define pte_val(x)	((x).pte)
 #define pgd_val(x)	((x).pgd + 0)
 #define pgprot_val(x)	((x).pgprot)
 
 #define __pte(x)	((pte_t) { (x) } )
-#define __pmd(x)	((pmd_t) { (x) } )
 #define __pgd(x)	((pgd_t) { (x) } )
 #define __pgprot(x)	((pgprot_t) { (x) } )
 
-#define __pmd_val_set(x,n) (x).pmd = (n)
-#define __pgd_val_set(x,n) (x).pgd = (n)
-
 #else
 /*
  * .. while these make it easier on the compiler
  */
 typedef unsigned long pte_t;
+
+#if CONFIG_PGTABLE_LEVELS == 3
 typedef         __u32 pmd_t;
+#define pmd_val(x)      (x)
+#define __pmd(x)	(x)
+#endif
+
 typedef         __u32 pgd_t;
 typedef unsigned long pgprot_t;
 
 #define pte_val(x)      (x)
-#define pmd_val(x)      (x)
 #define pgd_val(x)      (x)
 #define pgprot_val(x)   (x)
 
 #define __pte(x)        (x)
-#define __pmd(x)	(x)
 #define __pgd(x)        (x)
 #define __pgprot(x)     (x)
 
-#define __pmd_val_set(x,n) (x) = (n)
-#define __pgd_val_set(x,n) (x) = (n)
-
 #endif /* STRICT_MM_TYPECHECKS */
 
+#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
+#if CONFIG_PGTABLE_LEVELS == 3
+#define set_pud(pudptr, pudval) (*(pudptr) = (pudval))
+#endif
+
 typedef struct page *pgtable_t;
 
 typedef struct __physmem_range {
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index d98647c29b74..9ac74da256b8 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -34,13 +34,13 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 		/* Populate first pmd with allocated memory.  We mark it
 		 * with PxD_FLAG_ATTACHED as a signal to the system that this
 		 * pmd entry may not be cleared. */
-		__pgd_val_set(*actual_pgd, (PxD_FLAG_PRESENT | 
-				        PxD_FLAG_VALID | 
-					PxD_FLAG_ATTACHED) 
-			+ (__u32)(__pa((unsigned long)pgd) >> PxD_VALUE_SHIFT));
+		set_pgd(actual_pgd, __pgd((PxD_FLAG_PRESENT |
+				        PxD_FLAG_VALID |
+					PxD_FLAG_ATTACHED)
+			+ (__u32)(__pa((unsigned long)pgd) >> PxD_VALUE_SHIFT)));
 		/* The first pmd entry also is marked with PxD_FLAG_ATTACHED as
 		 * a signal that this pmd may not be freed */
-		__pgd_val_set(*pgd, PxD_FLAG_ATTACHED);
+		set_pgd(pgd, __pgd(PxD_FLAG_ATTACHED));
 #endif
 	}
 	spin_lock_init(pgd_spinlock(actual_pgd));
@@ -59,10 +59,10 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 
 /* Three Level Page Table Support for pmd's */
 
-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd)
+static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 {
-	__pgd_val_set(*pgd, (PxD_FLAG_PRESENT | PxD_FLAG_VALID) +
-		        (__u32)(__pa((unsigned long)pmd) >> PxD_VALUE_SHIFT));
+	set_pud(pud, __pud((PxD_FLAG_PRESENT | PxD_FLAG_VALID) +
+			(__u32)(__pa((unsigned long)pmd) >> PxD_VALUE_SHIFT)));
 }
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
@@ -88,19 +88,6 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 	free_pages((unsigned long)pmd, PMD_ORDER);
 }
 
-#else
-
-/* Two Level Page Table Support for pmd's */
-
-/*
- * allocating and freeing a pmd is trivial: the 1-entry pmd is
- * inside the pgd, so has no extra memory associated with it.
- */
-
-#define pmd_alloc_one(mm, addr)		({ BUG(); ((pmd_t *)2); })
-#define pmd_free(mm, x)			do { } while (0)
-#define pgd_populate(mm, pmd, pte)	BUG()
-
 #endif
 
 static inline void
@@ -110,14 +97,14 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
 	/* preserve the gateway marker if this is the beginning of
 	 * the permanent pmd */
 	if(pmd_flag(*pmd) & PxD_FLAG_ATTACHED)
-		__pmd_val_set(*pmd, (PxD_FLAG_PRESENT |
-				 PxD_FLAG_VALID |
-				 PxD_FLAG_ATTACHED) 
-			+ (__u32)(__pa((unsigned long)pte) >> PxD_VALUE_SHIFT));
+		set_pmd(pmd, __pmd((PxD_FLAG_PRESENT |
+				PxD_FLAG_VALID |
+				PxD_FLAG_ATTACHED)
+			+ (__u32)(__pa((unsigned long)pte) >> PxD_VALUE_SHIFT)));
 	else
 #endif
-		__pmd_val_set(*pmd, (PxD_FLAG_PRESENT | PxD_FLAG_VALID) 
-			+ (__u32)(__pa((unsigned long)pte) >> PxD_VALUE_SHIFT));
+		set_pmd(pmd, __pmd((PxD_FLAG_PRESENT | PxD_FLAG_VALID)
+			+ (__u32)(__pa((unsigned long)pte) >> PxD_VALUE_SHIFT)));
 }
 
 #define pmd_populate(mm, pmd, pte_page) \
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 4ac374b3a99f..f0a365950536 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -3,7 +3,12 @@
 #define _PARISC_PGTABLE_H
 
 #include <asm/page.h>
-#include <asm-generic/4level-fixup.h>
+
+#if CONFIG_PGTABLE_LEVELS == 3
+#include <asm-generic/pgtable-nopud.h>
+#elif CONFIG_PGTABLE_LEVELS == 2
+#include <asm-generic/pgtable-nopmd.h>
+#endif
 
 #include <asm/fixmap.h>
 
@@ -101,8 +106,10 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
 
 #define pte_ERROR(e) \
 	printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
+#if CONFIG_PGTABLE_LEVELS == 3
 #define pmd_ERROR(e) \
 	printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, (unsigned long)pmd_val(e))
+#endif
 #define pgd_ERROR(e) \
 	printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, (unsigned long)pgd_val(e))
 
@@ -132,19 +139,18 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
 #define PTRS_PER_PTE    (1UL << BITS_PER_PTE)
 
 /* Definitions for 2nd level */
+#if CONFIG_PGTABLE_LEVELS == 3
 #define PMD_SHIFT       (PLD_SHIFT + BITS_PER_PTE)
 #define PMD_SIZE	(1UL << PMD_SHIFT)
 #define PMD_MASK	(~(PMD_SIZE-1))
-#if CONFIG_PGTABLE_LEVELS == 3
 #define BITS_PER_PMD	(PAGE_SHIFT + PMD_ORDER - BITS_PER_PMD_ENTRY)
+#define PTRS_PER_PMD    (1UL << BITS_PER_PMD)
 #else
-#define __PAGETABLE_PMD_FOLDED 1
 #define BITS_PER_PMD	0
 #endif
-#define PTRS_PER_PMD    (1UL << BITS_PER_PMD)
 
 /* Definitions for 1st level */
-#define PGDIR_SHIFT	(PMD_SHIFT + BITS_PER_PMD)
+#define PGDIR_SHIFT	(PLD_SHIFT + BITS_PER_PTE + BITS_PER_PMD)
 #if (PGDIR_SHIFT + PAGE_SHIFT + PGD_ORDER - BITS_PER_PGD_ENTRY) > BITS_PER_LONG
 #define BITS_PER_PGD	(BITS_PER_LONG - PGDIR_SHIFT)
 #else
@@ -317,6 +323,8 @@ extern unsigned long *empty_zero_page;
 
 #define pmd_flag(x)	(pmd_val(x) & PxD_FLAG_MASK)
 #define pmd_address(x)	((unsigned long)(pmd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT)
+#define pud_flag(x)	(pud_val(x) & PxD_FLAG_MASK)
+#define pud_address(x)	((unsigned long)(pud_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT)
 #define pgd_flag(x)	(pgd_val(x) & PxD_FLAG_MASK)
 #define pgd_address(x)	((unsigned long)(pgd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT)
 
@@ -334,42 +342,32 @@ static inline void pmd_clear(pmd_t *pmd) {
 	if (pmd_flag(*pmd) & PxD_FLAG_ATTACHED)
 		/* This is the entry pointing to the permanent pmd
 		 * attached to the pgd; cannot clear it */
-		__pmd_val_set(*pmd, PxD_FLAG_ATTACHED);
+		set_pmd(pmd, __pmd(PxD_FLAG_ATTACHED));
 	else
 #endif
-		__pmd_val_set(*pmd,  0);
+		set_pmd(pmd,  __pmd(0));
 }
 
 
 #if CONFIG_PGTABLE_LEVELS == 3
-#define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_address(pgd)))
-#define pgd_page(pgd)	virt_to_page((void *)pgd_page_vaddr(pgd))
+#define pud_page_vaddr(pud) ((unsigned long) __va(pud_address(pud)))
+#define pud_page(pud)	virt_to_page((void *)pud_page_vaddr(pud))
 
 /* For 64 bit we have three level tables */
 
-#define pgd_none(x)     (!pgd_val(x))
-#define pgd_bad(x)      (!(pgd_flag(x) & PxD_FLAG_VALID))
-#define pgd_present(x)  (pgd_flag(x) & PxD_FLAG_PRESENT)
-static inline void pgd_clear(pgd_t *pgd) {
+#define pud_none(x)     (!pud_val(x))
+#define pud_bad(x)      (!(pud_flag(x) & PxD_FLAG_VALID))
+#define pud_present(x)  (pud_flag(x) & PxD_FLAG_PRESENT)
+static inline void pud_clear(pud_t *pud) {
 #if CONFIG_PGTABLE_LEVELS == 3
-	if(pgd_flag(*pgd) & PxD_FLAG_ATTACHED)
-		/* This is the permanent pmd attached to the pgd; cannot
+	if(pud_flag(*pud) & PxD_FLAG_ATTACHED)
+		/* This is the permanent pmd attached to the pud; cannot
 		 * free it */
 		return;
 #endif
-	__pgd_val_set(*pgd, 0);
+	set_pud(pud, __pud(0));
 }
-#else
-/*
- * The "pgd_xxx()" functions here are trivial for a folded two-level
- * setup: the pgd is never bad, and a pmd always exists (as it's folded
- * into the pgd entry)
- */
-static inline int pgd_none(pgd_t pgd)		{ return 0; }
-static inline int pgd_bad(pgd_t pgd)		{ return 0; }
-static inline int pgd_present(pgd_t pgd)	{ return 1; }
-static inline void pgd_clear(pgd_t * pgdp)	{ }
 #endif
 
 /*
@@ -452,7 +450,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 #if CONFIG_PGTABLE_LEVELS == 3
 #define pmd_index(addr)         (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
 #define pmd_offset(dir,address) \
-((pmd_t *) pgd_page_vaddr(*(dir)) + pmd_index(address))
+((pmd_t *) pud_page_vaddr(*(dir)) + pmd_index(address))
 #else
 #define pmd_offset(dir,addr) ((pmd_t *) dir)
 #endif
diff --git a/arch/parisc/include/asm/tlb.h b/arch/parisc/include/asm/tlb.h
index 8c0446b04c9e..44235f367674 100644
--- a/arch/parisc/include/asm/tlb.h
+++ b/arch/parisc/include/asm/tlb.h
@@ -4,7 +4,9 @@
 
 #include <asm-generic/tlb.h>
 
+#if CONFIG_PGTABLE_LEVELS == 3
 #define __pmd_free_tlb(tlb, pmd, addr)	pmd_free((tlb)->mm, pmd)
+#endif
 #define __pte_free_tlb(tlb, pte, addr)	pte_free((tlb)->mm, pte)
 
 #endif
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 2407b0b789d3..1eedfecc5137 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -534,11 +534,14 @@ static inline pte_t *get_ptep(pgd_t *pgd, unsigned long addr)
 	pte_t *ptep = NULL;
 
 	if (!pgd_none(*pgd)) {
-		pud_t *pud = pud_offset(pgd, addr);
-		if (!pud_none(*pud)) {
-			pmd_t *pmd = pmd_offset(pud, addr);
-			if (!pmd_none(*pmd))
-				ptep = pte_offset_map(pmd, addr);
+		p4d_t *p4d = p4d_offset(pgd, addr);
+		if (!p4d_none(*p4d)) {
+			pud_t *pud = pud_offset(p4d, addr);
+			if (!pud_none(*pud)) {
+				pmd_t *pmd = pmd_offset(pud, addr);
+				if (!pmd_none(*pmd))
+					ptep = pte_offset_map(pmd, addr);
+			}
 		}
 	}
 	return ptep;
diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c
index a60d47fd4d55..0f1b460ee715 100644
--- a/arch/parisc/kernel/pci-dma.c
+++ b/arch/parisc/kernel/pci-dma.c
@@ -133,9 +133,14 @@ static inline int map_uncached_pages(unsigned long vaddr, unsigned long size,
 
 	dir = pgd_offset_k(vaddr);
 	do {
+		p4d_t *p4d;
+		pud_t *pud;
 		pmd_t *pmd;
-		
-		pmd = pmd_alloc(NULL, dir, vaddr);
+
+		p4d = p4d_offset(dir, vaddr);
+		pud = pud_offset(p4d, vaddr);
+		pmd = pmd_alloc(NULL, pud, vaddr);
+
 		if (!pmd)
 			return -ENOMEM;
 		if (map_pmd_uncached(pmd, vaddr, end - vaddr, &paddr))
diff --git a/arch/parisc/mm/fixmap.c b/arch/parisc/mm/fixmap.c
index 474cd241c150..e2d8b0a857ee 100644
--- a/arch/parisc/mm/fixmap.c
+++ b/arch/parisc/mm/fixmap.c
@@ -14,11 +14,13 @@ void notrace set_fixmap(enum fixed_addresses idx, phys_addr_t phys)
 {
 	unsigned long vaddr = __fix_to_virt(idx);
 	pgd_t *pgd = pgd_offset_k(vaddr);
-	pmd_t *pmd = pmd_offset(pgd, vaddr);
+	p4d_t *p4d = p4d_offset(pgd, vaddr);
+	pud_t *pud = pud_offset(p4d, vaddr);
+	pmd_t *pmd = pmd_offset(pud, vaddr);
 	pte_t *pte;
 
 	if (pmd_none(*pmd))
-		pmd = pmd_alloc(NULL, pgd, vaddr);
+		pmd = pmd_alloc(NULL, pud, vaddr);
 
 	pte = pte_offset_kernel(pmd, vaddr);
 	if (pte_none(*pte))
@@ -32,7 +34,9 @@ void notrace clear_fixmap(enum fixed_addresses idx)
 {
 	unsigned long vaddr = __fix_to_virt(idx);
 	pgd_t *pgd = pgd_offset_k(vaddr);
-	pmd_t *pmd = pmd_offset(pgd, vaddr);
+	p4d_t *p4d = p4d_offset(pgd, vaddr);
+	pud_t *pud = pud_offset(p4d, vaddr);
+	pmd_t *pmd = pmd_offset(pud, vaddr);
 	pte_t *pte = pte_offset_kernel(pmd, vaddr);
 
 	if (WARN_ON(pte_none(*pte)))
-- 
cgit v1.2.3-59-g8ed1b


From 2fa245c1f8c9f68e26174fade45ccae5b060751d Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Wed, 4 Dec 2019 16:54:16 -0800
Subject: parisc/hugetlb: use pgtable-nopXd instead of 4level-fixup

Link: http://lkml.kernel.org/r/1572938135-31886-10-git-send-email-rppt@kernel.org
Signed-off-by: Helge Deller <deller@gmx.de>
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Cc: Anatoly Pugachev <matorola@gmail.com>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Ungerer <gerg@linux-m68k.org>
Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Mark Salter <msalter@redhat.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Peter Rosin <peda@axentia.se>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rolf Eike Beer <eike-kernel@sf-tec.de>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Russell King <rmk+kernel@armlinux.org.uk>
Cc: Sam Creasey <sammy@sammy.net>
Cc: Vincent Chen <deanbo422@gmail.com>
Cc: Vineet Gupta <Vineet.Gupta1@synopsys.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/parisc/mm/hugetlbpage.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/arch/parisc/mm/hugetlbpage.c b/arch/parisc/mm/hugetlbpage.c
index d578809e55cf..0e1e212f1c96 100644
--- a/arch/parisc/mm/hugetlbpage.c
+++ b/arch/parisc/mm/hugetlbpage.c
@@ -49,6 +49,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 			unsigned long addr, unsigned long sz)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte = NULL;
@@ -61,7 +62,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 	addr &= HPAGE_MASK;
 
 	pgd = pgd_offset(mm, addr);
-	pud = pud_alloc(mm, pgd, addr);
+	p4d = p4d_offset(pgd, addr);
+	pud = pud_alloc(mm, p4d, addr);
 	if (pud) {
 		pmd = pmd_alloc(mm, pud, addr);
 		if (pmd)
@@ -74,6 +76,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
 		       unsigned long addr, unsigned long sz)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte = NULL;
@@ -82,11 +85,14 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
 
 	pgd = pgd_offset(mm, addr);
 	if (!pgd_none(*pgd)) {
-		pud = pud_offset(pgd, addr);
-		if (!pud_none(*pud)) {
-			pmd = pmd_offset(pud, addr);
-			if (!pmd_none(*pmd))
-				pte = pte_offset_map(pmd, addr);
+		p4d = p4d_offset(pgd, addr);
+		if (!p4d_none(*p4d)) {
+			pud = pud_offset(p4d, addr);
+			if (!pud_none(*pud)) {
+				pmd = pmd_offset(pud, addr);
+				if (!pmd_none(*pmd))
+					pte = pte_offset_map(pmd, addr);
+			}
 		}
 	}
 	return pte;
-- 
cgit v1.2.3-59-g8ed1b


From 7235db268a2777bc380b99b7db49ff7b19c8fb76 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Wed, 4 Dec 2019 16:54:20 -0800
Subject: sparc32: use pgtable-nopud instead of 4level-fixup

32-bit version of sparc has three-level page tables and can use
pgtable-nopud and folding of the upper layers.

Replace usage of include/asm-generic/4level-fixup.h with
include/asm-generic/pgtable-nopud.h and adjust page table manipulation
macros and functions accordingly.

Link: http://lkml.kernel.org/r/1572938135-31886-11-git-send-email-rppt@kernel.org
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Acked-by: David S. Miller <davem@davemloft.net>
Tested-by: Anatoly Pugachev <matorola@gmail.com>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Ungerer <gerg@linux-m68k.org>
Cc: Helge Deller <deller@gmx.de>
Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Mark Salter <msalter@redhat.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Peter Rosin <peda@axentia.se>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rolf Eike Beer <eike-kernel@sf-tec.de>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Russell King <rmk+kernel@armlinux.org.uk>
Cc: Sam Creasey <sammy@sammy.net>
Cc: Vincent Chen <deanbo422@gmail.com>
Cc: Vineet Gupta <Vineet.Gupta1@synopsys.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/sparc/include/asm/pgalloc_32.h |  6 ++---
 arch/sparc/include/asm/pgtable_32.h | 28 ++++++++++----------
 arch/sparc/mm/fault_32.c            | 11 ++++++--
 arch/sparc/mm/highmem.c             |  6 ++++-
 arch/sparc/mm/io-unit.c             |  6 ++++-
 arch/sparc/mm/iommu.c               |  6 ++++-
 arch/sparc/mm/srmmu.c               | 51 +++++++++++++++++++++++++++++--------
 7 files changed, 81 insertions(+), 33 deletions(-)

diff --git a/arch/sparc/include/asm/pgalloc_32.h b/arch/sparc/include/asm/pgalloc_32.h
index 10538a4d1a1e..eae0c92ec422 100644
--- a/arch/sparc/include/asm/pgalloc_32.h
+++ b/arch/sparc/include/asm/pgalloc_32.h
@@ -26,14 +26,14 @@ static inline void free_pgd_fast(pgd_t *pgd)
 #define pgd_free(mm, pgd)	free_pgd_fast(pgd)
 #define pgd_alloc(mm)	get_pgd_fast()
 
-static inline void pgd_set(pgd_t * pgdp, pmd_t * pmdp)
+static inline void pud_set(pud_t * pudp, pmd_t * pmdp)
 {
 	unsigned long pa = __nocache_pa(pmdp);
 
-	set_pte((pte_t *)pgdp, __pte((SRMMU_ET_PTD | (pa >> 4))));
+	set_pte((pte_t *)pudp, __pte((SRMMU_ET_PTD | (pa >> 4))));
 }
 
-#define pgd_populate(MM, PGD, PMD)      pgd_set(PGD, PMD)
+#define pud_populate(MM, PGD, PMD)      pud_set(PGD, PMD)
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm,
 				   unsigned long address)
diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h
index 31da44826645..6d6f44c0cad9 100644
--- a/arch/sparc/include/asm/pgtable_32.h
+++ b/arch/sparc/include/asm/pgtable_32.h
@@ -12,7 +12,7 @@
 #include <linux/const.h>
 
 #ifndef __ASSEMBLY__
-#include <asm-generic/4level-fixup.h>
+#include <asm-generic/pgtable-nopud.h>
 
 #include <linux/spinlock.h>
 #include <linux/mm_types.h>
@@ -132,12 +132,12 @@ static inline struct page *pmd_page(pmd_t pmd)
 	return pfn_to_page((pmd_val(pmd) & SRMMU_PTD_PMASK) >> (PAGE_SHIFT-4));
 }
 
-static inline unsigned long pgd_page_vaddr(pgd_t pgd)
+static inline unsigned long pud_page_vaddr(pud_t pud)
 {
-	if (srmmu_device_memory(pgd_val(pgd))) {
+	if (srmmu_device_memory(pud_val(pud))) {
 		return ~0;
 	} else {
-		unsigned long v = pgd_val(pgd) & SRMMU_PTD_PMASK;
+		unsigned long v = pud_val(pud) & SRMMU_PTD_PMASK;
 		return (unsigned long)__nocache_va(v << 4);
 	}
 }
@@ -184,24 +184,24 @@ static inline void pmd_clear(pmd_t *pmdp)
 		set_pte((pte_t *)&pmdp->pmdv[i], __pte(0));
 }
 
-static inline int pgd_none(pgd_t pgd)          
+static inline int pud_none(pud_t pud)
 {
-	return !(pgd_val(pgd) & 0xFFFFFFF);
+	return !(pud_val(pud) & 0xFFFFFFF);
 }
 
-static inline int pgd_bad(pgd_t pgd)
+static inline int pud_bad(pud_t pud)
 {
-	return (pgd_val(pgd) & SRMMU_ET_MASK) != SRMMU_ET_PTD;
+	return (pud_val(pud) & SRMMU_ET_MASK) != SRMMU_ET_PTD;
 }
 
-static inline int pgd_present(pgd_t pgd)
+static inline int pud_present(pud_t pud)
 {
-	return ((pgd_val(pgd) & SRMMU_ET_MASK) == SRMMU_ET_PTD);
+	return ((pud_val(pud) & SRMMU_ET_MASK) == SRMMU_ET_PTD);
 }
 
-static inline void pgd_clear(pgd_t *pgdp)
+static inline void pud_clear(pud_t *pudp)
 {
-	set_pte((pte_t *)pgdp, __pte(0));
+	set_pte((pte_t *)pudp, __pte(0));
 }
 
 /*
@@ -319,9 +319,9 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 #define pgd_offset_k(address) pgd_offset(&init_mm, address)
 
 /* Find an entry in the second-level page table.. */
-static inline pmd_t *pmd_offset(pgd_t * dir, unsigned long address)
+static inline pmd_t *pmd_offset(pud_t * dir, unsigned long address)
 {
-	return (pmd_t *) pgd_page_vaddr(*dir) +
+	return (pmd_t *) pud_page_vaddr(*dir) +
 		((address >> PMD_SHIFT) & (PTRS_PER_PMD - 1));
 }
 
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index 8d69de111470..89976c9b936c 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -351,6 +351,8 @@ vmalloc_fault:
 		 */
 		int offset = pgd_index(address);
 		pgd_t *pgd, *pgd_k;
+		p4d_t *p4d, *p4d_k;
+		pud_t *pud, *pud_k;
 		pmd_t *pmd, *pmd_k;
 
 		pgd = tsk->active_mm->pgd + offset;
@@ -363,8 +365,13 @@ vmalloc_fault:
 			return;
 		}
 
-		pmd = pmd_offset(pgd, address);
-		pmd_k = pmd_offset(pgd_k, address);
+		p4d = p4d_offset(pgd, address);
+		pud = pud_offset(p4d, address);
+		pmd = pmd_offset(pud, address);
+
+		p4d_k = p4d_offset(pgd_k, address);
+		pud_k = pud_offset(p4d_k, address);
+		pmd_k = pmd_offset(pud_k, address);
 
 		if (pmd_present(*pmd) || !pmd_present(*pmd_k))
 			goto bad_area_nosemaphore;
diff --git a/arch/sparc/mm/highmem.c b/arch/sparc/mm/highmem.c
index 86bc2a58d26c..d4a80adea7e5 100644
--- a/arch/sparc/mm/highmem.c
+++ b/arch/sparc/mm/highmem.c
@@ -39,10 +39,14 @@ static pte_t *kmap_pte;
 void __init kmap_init(void)
 {
 	unsigned long address;
+	p4d_t *p4d;
+	pud_t *pud;
 	pmd_t *dir;
 
 	address = __fix_to_virt(FIX_KMAP_BEGIN);
-	dir = pmd_offset(pgd_offset_k(address), address);
+	p4d = p4d_offset(pgd_offset_k(address), address);
+	pud = pud_offset(p4d, address);
+	dir = pmd_offset(pud, address);
 
         /* cache the first kmap pte */
         kmap_pte = pte_offset_kernel(dir, address);
diff --git a/arch/sparc/mm/io-unit.c b/arch/sparc/mm/io-unit.c
index f770ee7229d8..33a0facd9eb5 100644
--- a/arch/sparc/mm/io-unit.c
+++ b/arch/sparc/mm/io-unit.c
@@ -239,12 +239,16 @@ static void *iounit_alloc(struct device *dev, size_t len,
 		page = va;
 		{
 			pgd_t *pgdp;
+			p4d_t *p4dp;
+			pud_t *pudp;
 			pmd_t *pmdp;
 			pte_t *ptep;
 			long i;
 
 			pgdp = pgd_offset(&init_mm, addr);
-			pmdp = pmd_offset(pgdp, addr);
+			p4dp = p4d_offset(pgdp, addr);
+			pudp = pud_offset(p4dp, addr);
+			pmdp = pmd_offset(pudp, addr);
 			ptep = pte_offset_map(pmdp, addr);
 
 			set_pte(ptep, mk_pte(virt_to_page(page), dvma_prot));
diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c
index 71ac353032b6..4d3c6991f0ae 100644
--- a/arch/sparc/mm/iommu.c
+++ b/arch/sparc/mm/iommu.c
@@ -343,6 +343,8 @@ static void *sbus_iommu_alloc(struct device *dev, size_t len,
 		page = va;
 		{
 			pgd_t *pgdp;
+			p4d_t *p4dp;
+			pud_t *pudp;
 			pmd_t *pmdp;
 			pte_t *ptep;
 
@@ -354,7 +356,9 @@ static void *sbus_iommu_alloc(struct device *dev, size_t len,
 				__flush_page_to_ram(page);
 
 			pgdp = pgd_offset(&init_mm, addr);
-			pmdp = pmd_offset(pgdp, addr);
+			p4dp = p4d_offset(pgdp, addr);
+			pudp = pud_offset(p4dp, addr);
+			pmdp = pmd_offset(pudp, addr);
 			ptep = pte_offset_map(pmdp, addr);
 
 			set_pte(ptep, mk_pte(virt_to_page(page), dvma_prot));
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index cc3ad64479ac..f56c3c9a9793 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -296,6 +296,8 @@ static void __init srmmu_nocache_init(void)
 	void *srmmu_nocache_bitmap;
 	unsigned int bitmap_bits;
 	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
 	unsigned long paddr, vaddr;
@@ -329,6 +331,8 @@ static void __init srmmu_nocache_init(void)
 
 	while (vaddr < srmmu_nocache_end) {
 		pgd = pgd_offset_k(vaddr);
+		p4d = p4d_offset(__nocache_fix(pgd), vaddr);
+		pud = pud_offset(__nocache_fix(p4d), vaddr);
 		pmd = pmd_offset(__nocache_fix(pgd), vaddr);
 		pte = pte_offset_kernel(__nocache_fix(pmd), vaddr);
 
@@ -516,13 +520,17 @@ static inline void srmmu_mapioaddr(unsigned long physaddr,
 				   unsigned long virt_addr, int bus_type)
 {
 	pgd_t *pgdp;
+	p4d_t *p4dp;
+	pud_t *pudp;
 	pmd_t *pmdp;
 	pte_t *ptep;
 	unsigned long tmp;
 
 	physaddr &= PAGE_MASK;
 	pgdp = pgd_offset_k(virt_addr);
-	pmdp = pmd_offset(pgdp, virt_addr);
+	p4dp = p4d_offset(pgdp, virt_addr);
+	pudp = pud_offset(p4dp, virt_addr);
+	pmdp = pmd_offset(pudp, virt_addr);
 	ptep = pte_offset_kernel(pmdp, virt_addr);
 	tmp = (physaddr >> 4) | SRMMU_ET_PTE;
 
@@ -551,11 +559,16 @@ void srmmu_mapiorange(unsigned int bus, unsigned long xpa,
 static inline void srmmu_unmapioaddr(unsigned long virt_addr)
 {
 	pgd_t *pgdp;
+	p4d_t *p4dp;
+	pud_t *pudp;
 	pmd_t *pmdp;
 	pte_t *ptep;
 
+
 	pgdp = pgd_offset_k(virt_addr);
-	pmdp = pmd_offset(pgdp, virt_addr);
+	p4dp = p4d_offset(pgdp, virt_addr);
+	pudp = pud_offset(p4dp, virt_addr);
+	pmdp = pmd_offset(pudp, virt_addr);
 	ptep = pte_offset_kernel(pmdp, virt_addr);
 
 	/* No need to flush uncacheable page. */
@@ -693,20 +706,24 @@ static void __init srmmu_early_allocate_ptable_skeleton(unsigned long start,
 							unsigned long end)
 {
 	pgd_t *pgdp;
+	p4d_t *p4dp;
+	pud_t *pudp;
 	pmd_t *pmdp;
 	pte_t *ptep;
 
 	while (start < end) {
 		pgdp = pgd_offset_k(start);
-		if (pgd_none(*(pgd_t *)__nocache_fix(pgdp))) {
+		p4dp = p4d_offset(pgdp, start);
+		pudp = pud_offset(p4dp, start);
+		if (pud_none(*(pud_t *)__nocache_fix(pudp))) {
 			pmdp = __srmmu_get_nocache(
 			    SRMMU_PMD_TABLE_SIZE, SRMMU_PMD_TABLE_SIZE);
 			if (pmdp == NULL)
 				early_pgtable_allocfail("pmd");
 			memset(__nocache_fix(pmdp), 0, SRMMU_PMD_TABLE_SIZE);
-			pgd_set(__nocache_fix(pgdp), pmdp);
+			pud_set(__nocache_fix(pudp), pmdp);
 		}
-		pmdp = pmd_offset(__nocache_fix(pgdp), start);
+		pmdp = pmd_offset(__nocache_fix(pudp), start);
 		if (srmmu_pmd_none(*(pmd_t *)__nocache_fix(pmdp))) {
 			ptep = __srmmu_get_nocache(PTE_SIZE, PTE_SIZE);
 			if (ptep == NULL)
@@ -724,19 +741,23 @@ static void __init srmmu_allocate_ptable_skeleton(unsigned long start,
 						  unsigned long end)
 {
 	pgd_t *pgdp;
+	p4d_t *p4dp;
+	pud_t *pudp;
 	pmd_t *pmdp;
 	pte_t *ptep;
 
 	while (start < end) {
 		pgdp = pgd_offset_k(start);
-		if (pgd_none(*pgdp)) {
+		p4dp = p4d_offset(pgdp, start);
+		pudp = pud_offset(p4dp, start);
+		if (pud_none(*pudp)) {
 			pmdp = __srmmu_get_nocache(SRMMU_PMD_TABLE_SIZE, SRMMU_PMD_TABLE_SIZE);
 			if (pmdp == NULL)
 				early_pgtable_allocfail("pmd");
 			memset(pmdp, 0, SRMMU_PMD_TABLE_SIZE);
-			pgd_set(pgdp, pmdp);
+			pud_set((pud_t *)pgdp, pmdp);
 		}
-		pmdp = pmd_offset(pgdp, start);
+		pmdp = pmd_offset(pudp, start);
 		if (srmmu_pmd_none(*pmdp)) {
 			ptep = __srmmu_get_nocache(PTE_SIZE,
 							     PTE_SIZE);
@@ -779,6 +800,8 @@ static void __init srmmu_inherit_prom_mappings(unsigned long start,
 	unsigned long probed;
 	unsigned long addr;
 	pgd_t *pgdp;
+	p4d_t *p4dp;
+	pud_t *pudp;
 	pmd_t *pmdp;
 	pte_t *ptep;
 	int what; /* 0 = normal-pte, 1 = pmd-level pte, 2 = pgd-level pte */
@@ -810,18 +833,20 @@ static void __init srmmu_inherit_prom_mappings(unsigned long start,
 		}
 
 		pgdp = pgd_offset_k(start);
+		p4dp = p4d_offset(pgdp, start);
+		pudp = pud_offset(p4dp, start);
 		if (what == 2) {
 			*(pgd_t *)__nocache_fix(pgdp) = __pgd(probed);
 			start += SRMMU_PGDIR_SIZE;
 			continue;
 		}
-		if (pgd_none(*(pgd_t *)__nocache_fix(pgdp))) {
+		if (pud_none(*(pud_t *)__nocache_fix(pudp))) {
 			pmdp = __srmmu_get_nocache(SRMMU_PMD_TABLE_SIZE,
 						   SRMMU_PMD_TABLE_SIZE);
 			if (pmdp == NULL)
 				early_pgtable_allocfail("pmd");
 			memset(__nocache_fix(pmdp), 0, SRMMU_PMD_TABLE_SIZE);
-			pgd_set(__nocache_fix(pgdp), pmdp);
+			pud_set(__nocache_fix(pudp), pmdp);
 		}
 		pmdp = pmd_offset(__nocache_fix(pgdp), start);
 		if (srmmu_pmd_none(*(pmd_t *)__nocache_fix(pmdp))) {
@@ -906,6 +931,8 @@ void __init srmmu_paging_init(void)
 	phandle cpunode;
 	char node_str[128];
 	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
 	unsigned long pages_avail;
@@ -967,7 +994,9 @@ void __init srmmu_paging_init(void)
 	srmmu_allocate_ptable_skeleton(PKMAP_BASE, PKMAP_END);
 
 	pgd = pgd_offset_k(PKMAP_BASE);
-	pmd = pmd_offset(pgd, PKMAP_BASE);
+	p4d = p4d_offset(pgd, PKMAP_BASE);
+	pud = pud_offset(p4d, PKMAP_BASE);
+	pmd = pmd_offset(pud, PKMAP_BASE);
 	pte = pte_offset_kernel(pmd, PKMAP_BASE);
 	pkmap_page_table = pte;
 
-- 
cgit v1.2.3-59-g8ed1b


From 4e65e76f1e588e6f5d263652179d51b31a2be855 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Wed, 4 Dec 2019 16:54:24 -0800
Subject: um: remove unused pxx_offset_proc() and addr_pte() functions

The pxx_offset_proc() and addr_pte() functions are never used.  Remove
them.

Link: http://lkml.kernel.org/r/1572938135-31886-12-git-send-email-rppt@kernel.org
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Acked-by: Richard Weinberger <richard@nod.at>
Cc: Anatoly Pugachev <matorola@gmail.com>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Ungerer <gerg@linux-m68k.org>
Cc: Helge Deller <deller@gmx.de>
Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Mark Salter <msalter@redhat.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Peter Rosin <peda@axentia.se>
Cc: Rolf Eike Beer <eike-kernel@sf-tec.de>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Russell King <rmk+kernel@armlinux.org.uk>
Cc: Sam Creasey <sammy@sammy.net>
Cc: Vincent Chen <deanbo422@gmail.com>
Cc: Vineet Gupta <Vineet.Gupta1@synopsys.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/um/kernel/tlb.c | 29 -----------------------------
 1 file changed, 29 deletions(-)

diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index b7eaf655635c..8425a22142b7 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -490,35 +490,6 @@ kill:
 	force_sig(SIGKILL);
 }
 
-pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address)
-{
-	return pgd_offset(mm, address);
-}
-
-pud_t *pud_offset_proc(pgd_t *pgd, unsigned long address)
-{
-	return pud_offset(pgd, address);
-}
-
-pmd_t *pmd_offset_proc(pud_t *pud, unsigned long address)
-{
-	return pmd_offset(pud, address);
-}
-
-pte_t *pte_offset_proc(pmd_t *pmd, unsigned long address)
-{
-	return pte_offset_kernel(pmd, address);
-}
-
-pte_t *addr_pte(struct task_struct *task, unsigned long addr)
-{
-	pgd_t *pgd = pgd_offset(task->mm, addr);
-	pud_t *pud = pud_offset(pgd, addr);
-	pmd_t *pmd = pmd_offset(pud, addr);
-
-	return pte_offset_map(pmd, addr);
-}
-
 void flush_tlb_all(void)
 {
 	/*
-- 
cgit v1.2.3-59-g8ed1b


From e19f97ed67d8f9b60e4ce14a7551d3dd45825570 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Wed, 4 Dec 2019 16:54:28 -0800
Subject: um: add support for folded p4d page tables

The UML port uses 4 and 5 level fixups to support higher level page
table directories in the generic VM code.

Implement primitives necessary for the 4th level folding, add walks of
p4d level where appropriate and drop usage of __ARCH_USE_5LEVEL_HACK.

Link: http://lkml.kernel.org/r/1572938135-31886-13-git-send-email-rppt@kernel.org
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Cc: Anatoly Pugachev <matorola@gmail.com>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Ungerer <gerg@linux-m68k.org>
Cc: Helge Deller <deller@gmx.de>
Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Mark Salter <msalter@redhat.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Peter Rosin <peda@axentia.se>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rolf Eike Beer <eike-kernel@sf-tec.de>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Russell King <rmk+kernel@armlinux.org.uk>
Cc: Sam Creasey <sammy@sammy.net>
Cc: Vincent Chen <deanbo422@gmail.com>
Cc: Vineet Gupta <Vineet.Gupta1@synopsys.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/um/include/asm/pgtable-2level.h |  1 -
 arch/um/include/asm/pgtable-3level.h |  1 -
 arch/um/include/asm/pgtable.h        |  3 ++
 arch/um/kernel/mem.c                 |  8 ++++--
 arch/um/kernel/skas/mmu.c            | 12 ++++++--
 arch/um/kernel/skas/uaccess.c        |  7 ++++-
 arch/um/kernel/tlb.c                 | 56 ++++++++++++++++++++++++++++++++----
 arch/um/kernel/trap.c                |  4 ++-
 8 files changed, 78 insertions(+), 14 deletions(-)

diff --git a/arch/um/include/asm/pgtable-2level.h b/arch/um/include/asm/pgtable-2level.h
index 32b3d26a7109..32106d31e4ab 100644
--- a/arch/um/include/asm/pgtable-2level.h
+++ b/arch/um/include/asm/pgtable-2level.h
@@ -8,7 +8,6 @@
 #ifndef __UM_PGTABLE_2LEVEL_H
 #define __UM_PGTABLE_2LEVEL_H
 
-#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 /* PGDIR_SHIFT determines what a third-level page table entry can map */
diff --git a/arch/um/include/asm/pgtable-3level.h b/arch/um/include/asm/pgtable-3level.h
index 9812269fefc9..8a3b689e0f86 100644
--- a/arch/um/include/asm/pgtable-3level.h
+++ b/arch/um/include/asm/pgtable-3level.h
@@ -7,7 +7,6 @@
 #ifndef __UM_PGTABLE_3LEVEL_H
 #define __UM_PGTABLE_3LEVEL_H
 
-#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopud.h>
 
 /* PGDIR_SHIFT determines what a third-level page table entry can map */
diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index 36a44d58f373..2daa58df2190 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -106,6 +106,9 @@ extern unsigned long end_iomem;
 #define pud_newpage(x)  (pud_val(x) & _PAGE_NEWPAGE)
 #define pud_mkuptodate(x) (pud_val(x) &= ~_PAGE_NEWPAGE)
 
+#define p4d_newpage(x)  (p4d_val(x) & _PAGE_NEWPAGE)
+#define p4d_mkuptodate(x) (p4d_val(x) &= ~_PAGE_NEWPAGE)
+
 #define pmd_page(pmd) phys_to_page(pmd_val(pmd) & PAGE_MASK)
 
 #define pte_page(x) pfn_to_page(pte_pfn(x))
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 417ff647fb37..30885d0b94ac 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -96,6 +96,7 @@ static void __init fixrange_init(unsigned long start, unsigned long end,
 				 pgd_t *pgd_base)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	int i, j;
@@ -107,7 +108,8 @@ static void __init fixrange_init(unsigned long start, unsigned long end,
 	pgd = pgd_base + i;
 
 	for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) {
-		pud = pud_offset(pgd, vaddr);
+		p4d = p4d_offset(pgd, vaddr);
+		pud = pud_offset(p4d, vaddr);
 		if (pud_none(*pud))
 			one_md_table_init(pud);
 		pmd = pmd_offset(pud, vaddr);
@@ -124,6 +126,7 @@ static void __init fixaddr_user_init( void)
 #ifdef CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA
 	long size = FIXADDR_USER_END - FIXADDR_USER_START;
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
@@ -144,7 +147,8 @@ static void __init fixaddr_user_init( void)
 	for ( ; size > 0; size -= PAGE_SIZE, vaddr += PAGE_SIZE,
 		      p += PAGE_SIZE) {
 		pgd = swapper_pg_dir + pgd_index(vaddr);
-		pud = pud_offset(pgd, vaddr);
+		p4d = p4d_offset(pgd, vaddr);
+		pud = pud_offset(p4d, vaddr);
 		pmd = pmd_offset(pud, vaddr);
 		pte = pte_offset_kernel(pmd, vaddr);
 		pte_set_val(*pte, p, PAGE_READONLY);
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index b5e3d91fc9c2..3f0d9a573fd6 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -19,15 +19,21 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
 			 unsigned long kernel)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
 
 	pgd = pgd_offset(mm, proc);
-	pud = pud_alloc(mm, pgd, proc);
-	if (!pud)
+
+	p4d = p4d_alloc(mm, pgd, proc);
+	if (!p4d)
 		goto out;
 
+	pud = pud_alloc(mm, p4d, proc);
+	if (!pud)
+		goto out_pud;
+
 	pmd = pmd_alloc(mm, pud, proc);
 	if (!pmd)
 		goto out_pmd;
@@ -44,6 +50,8 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
 	pmd_free(mm, pmd);
  out_pmd:
 	pud_free(mm, pud);
+ out_pud:
+	p4d_free(mm, p4d);
  out:
 	return -ENOMEM;
 }
diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c
index 3236052f20e6..d617f8dc9c19 100644
--- a/arch/um/kernel/skas/uaccess.c
+++ b/arch/um/kernel/skas/uaccess.c
@@ -17,6 +17,7 @@
 pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 
@@ -27,7 +28,11 @@ pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr)
 	if (!pgd_present(*pgd))
 		return NULL;
 
-	pud = pud_offset(pgd, addr);
+	p4d = p4d_offset(pgd, addr);
+	if (!p4d_present(*p4d))
+		return NULL;
+
+	pud = pud_offset(p4d, addr);
 	if (!pud_present(*pud))
 		return NULL;
 
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index 8425a22142b7..80a358c6d652 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -277,7 +277,7 @@ static inline int update_pmd_range(pud_t *pud, unsigned long addr,
 	return ret;
 }
 
-static inline int update_pud_range(pgd_t *pgd, unsigned long addr,
+static inline int update_pud_range(p4d_t *p4d, unsigned long addr,
 				   unsigned long end,
 				   struct host_vm_change *hvc)
 {
@@ -285,7 +285,7 @@ static inline int update_pud_range(pgd_t *pgd, unsigned long addr,
 	unsigned long next;
 	int ret = 0;
 
-	pud = pud_offset(pgd, addr);
+	pud = pud_offset(p4d, addr);
 	do {
 		next = pud_addr_end(addr, end);
 		if (!pud_present(*pud)) {
@@ -299,6 +299,28 @@ static inline int update_pud_range(pgd_t *pgd, unsigned long addr,
 	return ret;
 }
 
+static inline int update_p4d_range(pgd_t *pgd, unsigned long addr,
+				   unsigned long end,
+				   struct host_vm_change *hvc)
+{
+	p4d_t *p4d;
+	unsigned long next;
+	int ret = 0;
+
+	p4d = p4d_offset(pgd, addr);
+	do {
+		next = p4d_addr_end(addr, end);
+		if (!p4d_present(*p4d)) {
+			if (hvc->force || p4d_newpage(*p4d)) {
+				ret = add_munmap(addr, next - addr, hvc);
+				p4d_mkuptodate(*p4d);
+			}
+		} else
+			ret = update_pud_range(p4d, addr, next, hvc);
+	} while (p4d++, addr = next, ((addr < end) && !ret));
+	return ret;
+}
+
 void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
 		      unsigned long end_addr, int force)
 {
@@ -316,8 +338,8 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
 				ret = add_munmap(addr, next - addr, &hvc);
 				pgd_mkuptodate(*pgd);
 			}
-		}
-		else ret = update_pud_range(pgd, addr, next, &hvc);
+		} else
+			ret = update_p4d_range(pgd, addr, next, &hvc);
 	} while (pgd++, addr = next, ((addr < end_addr) && !ret));
 
 	if (!ret)
@@ -338,6 +360,7 @@ static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
 {
 	struct mm_struct *mm;
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
@@ -364,7 +387,23 @@ static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
 			continue;
 		}
 
-		pud = pud_offset(pgd, addr);
+		p4d = p4d_offset(pgd, addr);
+		if (!p4d_present(*p4d)) {
+			last = ADD_ROUND(addr, P4D_SIZE);
+			if (last > end)
+				last = end;
+			if (p4d_newpage(*p4d)) {
+				updated = 1;
+				err = add_munmap(addr, last - addr, &hvc);
+				if (err < 0)
+					panic("munmap failed, errno = %d\n",
+					      -err);
+			}
+			addr = last;
+			continue;
+		}
+
+		pud = pud_offset(p4d, addr);
 		if (!pud_present(*pud)) {
 			last = ADD_ROUND(addr, PUD_SIZE);
 			if (last > end)
@@ -424,6 +463,7 @@ static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
 void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
@@ -437,7 +477,11 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
 	if (!pgd_present(*pgd))
 		goto kill;
 
-	pud = pud_offset(pgd, address);
+	p4d = p4d_offset(pgd, address);
+	if (!p4d_present(*p4d))
+		goto kill;
+
+	pud = pud_offset(p4d, address);
 	if (!pud_present(*pud))
 		goto kill;
 
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index e62296c66c95..818553064f04 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -28,6 +28,7 @@ int handle_page_fault(unsigned long address, unsigned long ip,
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
@@ -104,7 +105,8 @@ good_area:
 		}
 
 		pgd = pgd_offset(mm, address);
-		pud = pud_offset(pgd, address);
+		p4d = p4d_offset(pgd, address);
+		pud = pud_offset(p4d, address);
 		pmd = pmd_offset(pud, address);
 		pte = pte_offset_kernel(pmd, address);
 	} while (!pte_present(*pte));
-- 
cgit v1.2.3-59-g8ed1b


From f949286c668aed5aa24acdb5838be9cfd9513bd3 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Wed, 4 Dec 2019 16:54:32 -0800
Subject: mm: remove __ARCH_HAS_4LEVEL_HACK and
 include/asm-generic/4level-fixup.h

There are no architectures that use include/asm-generic/4level-fixup.h
therefore it can be removed along with __ARCH_HAS_4LEVEL_HACK define.

Link: http://lkml.kernel.org/r/1572938135-31886-14-git-send-email-rppt@kernel.org
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Cc: Anatoly Pugachev <matorola@gmail.com>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Ungerer <gerg@linux-m68k.org>
Cc: Helge Deller <deller@gmx.de>
Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Mark Salter <msalter@redhat.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Peter Rosin <peda@axentia.se>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rolf Eike Beer <eike-kernel@sf-tec.de>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Russell King <rmk+kernel@armlinux.org.uk>
Cc: Sam Creasey <sammy@sammy.net>
Cc: Vincent Chen <deanbo422@gmail.com>
Cc: Vineet Gupta <Vineet.Gupta1@synopsys.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/4level-fixup.h | 39 --------------------------------------
 include/linux/mm.h                 | 10 +++++-----
 mm/memory.c                        |  8 --------
 3 files changed, 5 insertions(+), 52 deletions(-)
 delete mode 100644 include/asm-generic/4level-fixup.h

diff --git a/include/asm-generic/4level-fixup.h b/include/asm-generic/4level-fixup.h
deleted file mode 100644
index c86cf7cb4bba..000000000000
--- a/include/asm-generic/4level-fixup.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _4LEVEL_FIXUP_H
-#define _4LEVEL_FIXUP_H
-
-#define __ARCH_HAS_4LEVEL_HACK
-#define __PAGETABLE_PUD_FOLDED 1
-
-#define PUD_SHIFT			PGDIR_SHIFT
-#define PUD_SIZE			PGDIR_SIZE
-#define PUD_MASK			PGDIR_MASK
-#define PTRS_PER_PUD			1
-
-#define pud_t				pgd_t
-
-#define pmd_alloc(mm, pud, address) \
-	((unlikely(pgd_none(*(pud))) && __pmd_alloc(mm, pud, address))? \
- 		NULL: pmd_offset(pud, address))
-
-#define pud_offset(pgd, start)		(pgd)
-#define pud_none(pud)			0
-#define pud_bad(pud)			0
-#define pud_present(pud)		1
-#define pud_ERROR(pud)			do { } while (0)
-#define pud_clear(pud)			pgd_clear(pud)
-#define pud_val(pud)			pgd_val(pud)
-#define pud_populate(mm, pud, pmd)	pgd_populate(mm, pud, pmd)
-#define pud_page(pud)			pgd_page(pud)
-#define pud_page_vaddr(pud)		pgd_page_vaddr(pud)
-
-#undef pud_free_tlb
-#define pud_free_tlb(tlb, x, addr)	do { } while (0)
-#define pud_free(mm, x)			do { } while (0)
-
-#undef  pud_addr_end
-#define pud_addr_end(addr, end)		(end)
-
-#include <asm-generic/5level-fixup.h>
-
-#endif
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8b0ef04b6d15..c97ea3b694e6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1838,12 +1838,12 @@ static inline void mm_dec_nr_ptes(struct mm_struct *mm) {}
 int __pte_alloc(struct mm_struct *mm, pmd_t *pmd);
 int __pte_alloc_kernel(pmd_t *pmd);
 
+#if defined(CONFIG_MMU)
+
 /*
- * The following ifdef needed to get the 4level-fixup.h header to work.
- * Remove it when 4level-fixup.h has been removed.
+ * The following ifdef needed to get the 5level-fixup.h header to work.
+ * Remove it when 5level-fixup.h has been removed.
  */
-#if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK)
-
 #ifndef __ARCH_HAS_5LEVEL_HACK
 static inline p4d_t *p4d_alloc(struct mm_struct *mm, pgd_t *pgd,
 		unsigned long address)
@@ -1865,7 +1865,7 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a
 	return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
 		NULL: pmd_offset(pud, address);
 }
-#endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */
+#endif /* CONFIG_MMU */
 
 #if USE_SPLIT_PTE_PTLOCKS
 #if ALLOC_SPLIT_PTLOCKS
diff --git a/mm/memory.c b/mm/memory.c
index e455160e0f75..606da187d1de 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4197,19 +4197,11 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
 	smp_wmb(); /* See comment in __pte_alloc */
 
 	ptl = pud_lock(mm, pud);
-#ifndef __ARCH_HAS_4LEVEL_HACK
 	if (!pud_present(*pud)) {
 		mm_inc_nr_pmds(mm);
 		pud_populate(mm, pud, new);
 	} else	/* Another has populated it */
 		pmd_free(mm, new);
-#else
-	if (!pgd_present(*pud)) {
-		mm_inc_nr_pmds(mm);
-		pgd_populate(mm, pud, new);
-	} else /* Another has populated it */
-		pmd_free(mm, new);
-#endif /* __ARCH_HAS_4LEVEL_HACK */
 	spin_unlock(ptl);
 	return 0;
 }
-- 
cgit v1.2.3-59-g8ed1b