From b9921ecdee66984b00c38c00a358ef3f611d2b50 Mon Sep 17 00:00:00 2001
From: Qiang Huang <h.huangqiang@huawei.com>
Date: Tue, 12 Nov 2013 15:07:22 -0800
Subject: mm: add a helper function to check may oom condition

Use helper function to check if we need to deal with oom condition.

Signed-off-by: Qiang Huang <h.huangqiang@huawei.com>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/oom.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/oom.h b/include/linux/oom.h
index da60007075b5..4cd62677feb9 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -82,6 +82,11 @@ static inline void oom_killer_enable(void)
 	oom_killer_disabled = false;
 }
 
+static inline bool oom_gfp_allowed(gfp_t gfp_mask)
+{
+	return (gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY);
+}
+
 extern struct task_struct *find_lock_task_mm(struct task_struct *p);
 
 /* sysctls */
-- 
cgit v1.2.3-59-g8ed1b


From 01b0f19707c51ef247404e6af1d4a97a11ba34f7 Mon Sep 17 00:00:00 2001
From: Toshi Kani <toshi.kani@hp.com>
Date: Tue, 12 Nov 2013 15:07:25 -0800
Subject: cpu/mem hotplug: add try_online_node() for cpu_up()

cpu_up() has #ifdef CONFIG_MEMORY_HOTPLUG code blocks, which call
mem_online_node() to put its node online if offlined and then call
build_all_zonelists() to initialize the zone list.

These steps are specific to memory hotplug, and should be managed in
mm/memory_hotplug.c.  lock_memory_hotplug() should also be held for the
whole steps.

For this reason, this patch replaces mem_online_node() with
try_online_node(), which performs the whole steps with
lock_memory_hotplug() held.  try_online_node() is named after
try_offline_node() as they have similar purpose.

There is no functional change in this patch.

Signed-off-by: Toshi Kani <toshi.kani@hp.com>
Reviewed-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memory_hotplug.h |  8 +++++++-
 kernel/cpu.c                   | 29 +++--------------------------
 mm/memory_hotplug.c            | 16 ++++++++++++++--
 3 files changed, 24 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index dd38e62b84d2..22203c293f07 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -94,6 +94,8 @@ extern void __online_page_set_limits(struct page *page);
 extern void __online_page_increment_counters(struct page *page);
 extern void __online_page_free(struct page *page);
 
+extern int try_online_node(int nid);
+
 #ifdef CONFIG_MEMORY_HOTREMOVE
 extern bool is_pageblock_removable_nolock(struct page *page);
 extern int arch_remove_memory(u64 start, u64 size);
@@ -225,6 +227,11 @@ static inline void register_page_bootmem_info_node(struct pglist_data *pgdat)
 {
 }
 
+static inline int try_online_node(int nid)
+{
+	return 0;
+}
+
 static inline void lock_memory_hotplug(void) {}
 static inline void unlock_memory_hotplug(void) {}
 
@@ -256,7 +263,6 @@ static inline void remove_memory(int nid, u64 start, u64 size) {}
 
 extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
 		void *arg, int (*func)(struct memory_block *, void *));
-extern int mem_online_node(int nid);
 extern int add_memory(int nid, u64 start, u64 size);
 extern int arch_add_memory(int nid, u64 start, u64 size);
 extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 63aa50d7ce1e..973d034acf84 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -437,11 +437,6 @@ int cpu_up(unsigned int cpu)
 {
 	int err = 0;
 
-#ifdef	CONFIG_MEMORY_HOTPLUG
-	int nid;
-	pg_data_t	*pgdat;
-#endif
-
 	if (!cpu_possible(cpu)) {
 		printk(KERN_ERR "can't online cpu %d because it is not "
 			"configured as may-hotadd at boot time\n", cpu);
@@ -452,27 +447,9 @@ int cpu_up(unsigned int cpu)
 		return -EINVAL;
 	}
 
-#ifdef	CONFIG_MEMORY_HOTPLUG
-	nid = cpu_to_node(cpu);
-	if (!node_online(nid)) {
-		err = mem_online_node(nid);
-		if (err)
-			return err;
-	}
-
-	pgdat = NODE_DATA(nid);
-	if (!pgdat) {
-		printk(KERN_ERR
-			"Can't online cpu %d due to NULL pgdat\n", cpu);
-		return -ENOMEM;
-	}
-
-	if (pgdat->node_zonelists->_zonerefs->zone == NULL) {
-		mutex_lock(&zonelists_mutex);
-		build_all_zonelists(NULL, NULL);
-		mutex_unlock(&zonelists_mutex);
-	}
-#endif
+	err = try_online_node(cpu_to_node(cpu));
+	if (err)
+		return err;
 
 	cpu_maps_update_begin();
 
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 5118028468eb..8285346be663 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1043,17 +1043,23 @@ static void rollback_node_hotadd(int nid, pg_data_t *pgdat)
 }
 
 
-/*
+/**
+ * try_online_node - online a node if offlined
+ *
  * called by cpu_up() to online a node without onlined memory.
  */
-int mem_online_node(int nid)
+int try_online_node(int nid)
 {
 	pg_data_t	*pgdat;
 	int	ret;
 
+	if (node_online(nid))
+		return 0;
+
 	lock_memory_hotplug();
 	pgdat = hotadd_new_pgdat(nid, 0);
 	if (!pgdat) {
+		pr_err("Cannot online node %d due to NULL pgdat\n", nid);
 		ret = -ENOMEM;
 		goto out;
 	}
@@ -1061,6 +1067,12 @@ int mem_online_node(int nid)
 	ret = register_one_node(nid);
 	BUG_ON(ret);
 
+	if (pgdat->node_zonelists->_zonerefs->zone == NULL) {
+		mutex_lock(&zonelists_mutex);
+		build_all_zonelists(NULL, NULL);
+		mutex_unlock(&zonelists_mutex);
+	}
+
 out:
 	unlock_memory_hotplug();
 	return ret;
-- 
cgit v1.2.3-59-g8ed1b


From 948927ee9e4f35f287e61a79c9f0e85ca2202c7d Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Tue, 12 Nov 2013 15:07:28 -0800
Subject: mm, mempolicy: make mpol_to_str robust and always succeed

mpol_to_str() should not fail.  Currently, it either fails because the
string buffer is too small or because a string hasn't been defined for a
mempolicy mode.

If a new mempolicy mode is introduced and no string is defined for it,
just warn and return "unknown".

If the buffer is too small, just truncate the string and return, the
same behavior as snprintf().

This also fixes a bug where there was no NULL-byte termination when doing
*p++ = '=' and *p++ ':' and maxlen has been reached.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Chen Gang <gang.chen@asianux.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Dave Jones <davej@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/task_mmu.c        | 14 ++++++-------
 include/linux/mempolicy.h |  5 ++---
 mm/mempolicy.c            | 52 +++++++++++++++--------------------------------
 3 files changed, 24 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 390bdab01c3c..9f1369fe0afb 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1387,8 +1387,8 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
 	struct mm_struct *mm = vma->vm_mm;
 	struct mm_walk walk = {};
 	struct mempolicy *pol;
-	int n;
-	char buffer[50];
+	char buffer[64];
+	int nid;
 
 	if (!mm)
 		return 0;
@@ -1404,10 +1404,8 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
 	walk.mm = mm;
 
 	pol = get_vma_policy(task, vma, vma->vm_start);
-	n = mpol_to_str(buffer, sizeof(buffer), pol);
+	mpol_to_str(buffer, sizeof(buffer), pol);
 	mpol_cond_put(pol);
-	if (n < 0)
-		return n;
 
 	seq_printf(m, "%08lx %s", vma->vm_start, buffer);
 
@@ -1460,9 +1458,9 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
 	if (md->writeback)
 		seq_printf(m, " writeback=%lu", md->writeback);
 
-	for_each_node_state(n, N_MEMORY)
-		if (md->node[n])
-			seq_printf(m, " N%d=%lu", n, md->node[n]);
+	for_each_node_state(nid, N_MEMORY)
+		if (md->node[nid])
+			seq_printf(m, " N%d=%lu", nid, md->node[nid]);
 out:
 	seq_putc(m, '\n');
 
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index ea4d2495c646..9fe426b30a41 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -169,7 +169,7 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
 extern int mpol_parse_str(char *str, struct mempolicy **mpol);
 #endif
 
-extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol);
+extern void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol);
 
 /* Check if a vma is migratable */
 static inline int vma_migratable(struct vm_area_struct *vma)
@@ -307,9 +307,8 @@ static inline int mpol_parse_str(char *str, struct mempolicy **mpol)
 }
 #endif
 
-static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
+static inline void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
 {
-	return 0;
 }
 
 static inline int mpol_misplaced(struct page *page, struct vm_area_struct *vma,
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 71cb253368cb..260b8213a873 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2914,62 +2914,45 @@ out:
  * @maxlen:  length of @buffer
  * @pol:  pointer to mempolicy to be formatted
  *
- * Convert a mempolicy into a string.
- * Returns the number of characters in buffer (if positive)
- * or an error (negative)
+ * Convert @pol into a string.  If @buffer is too short, truncate the string.
+ * Recommend a @maxlen of at least 32 for the longest mode, "interleave", the
+ * longest flag, "relative", and to display at least a few node ids.
  */
-int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
+void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
 {
 	char *p = buffer;
-	int l;
-	nodemask_t nodes;
-	unsigned short mode;
-	unsigned short flags = pol ? pol->flags : 0;
-
-	/*
-	 * Sanity check:  room for longest mode, flag and some nodes
-	 */
-	VM_BUG_ON(maxlen < strlen("interleave") + strlen("relative") + 16);
+	nodemask_t nodes = NODE_MASK_NONE;
+	unsigned short mode = MPOL_DEFAULT;
+	unsigned short flags = 0;
 
-	if (!pol || pol == &default_policy)
-		mode = MPOL_DEFAULT;
-	else
+	if (pol && pol != &default_policy) {
 		mode = pol->mode;
+		flags = pol->flags;
+	}
 
 	switch (mode) {
 	case MPOL_DEFAULT:
-		nodes_clear(nodes);
 		break;
-
 	case MPOL_PREFERRED:
-		nodes_clear(nodes);
 		if (flags & MPOL_F_LOCAL)
 			mode = MPOL_LOCAL;
 		else
 			node_set(pol->v.preferred_node, nodes);
 		break;
-
 	case MPOL_BIND:
-		/* Fall through */
 	case MPOL_INTERLEAVE:
 		nodes = pol->v.nodes;
 		break;
-
 	default:
-		return -EINVAL;
+		WARN_ON_ONCE(1);
+		snprintf(p, maxlen, "unknown");
+		return;
 	}
 
-	l = strlen(policy_modes[mode]);
-	if (buffer + maxlen < p + l + 1)
-		return -ENOSPC;
-
-	strcpy(p, policy_modes[mode]);
-	p += l;
+	p += snprintf(p, maxlen, policy_modes[mode]);
 
 	if (flags & MPOL_MODE_FLAGS) {
-		if (buffer + maxlen < p + 2)
-			return -ENOSPC;
-		*p++ = '=';
+		p += snprintf(p, buffer + maxlen - p, "=");
 
 		/*
 		 * Currently, the only defined flags are mutually exclusive
@@ -2981,10 +2964,7 @@ int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
 	}
 
 	if (!nodes_empty(nodes)) {
-		if (buffer + maxlen < p + 2)
-			return -ENOSPC;
-		*p++ = ':';
+		p += snprintf(p, buffer + maxlen - p, ":");
 	 	p += nodelist_scnprintf(p, buffer + maxlen - p, nodes);
 	}
-	return p - buffer;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 85b35feaecd4d2284505b22708795bc1f03fc897 Mon Sep 17 00:00:00 2001
From: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Date: Tue, 12 Nov 2013 15:07:42 -0800
Subject: mm/sparsemem: use PAGES_PER_SECTION to remove redundant nr_pages
 parameter

For below functions,

- sparse_add_one_section()
- kmalloc_section_memmap()
- __kmalloc_section_memmap()
- __kfree_section_memmap()

they are always invoked to operate on one memory section, so it is
redundant to always pass a nr_pages parameter, which is the page numbers
in one section.  So we can directly use predefined macro PAGES_PER_SECTION
instead of passing the parameter.

Signed-off-by: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Cc: Wen Congyang <wency@cn.fujitsu.com>
Cc: Tang Chen <tangchen@cn.fujitsu.com>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memory_hotplug.h |  3 +--
 mm/memory_hotplug.c            |  3 +--
 mm/sparse.c                    | 33 +++++++++++++++------------------
 3 files changed, 17 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 22203c293f07..4ca3d951fe91 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -268,8 +268,7 @@ extern int arch_add_memory(int nid, u64 start, u64 size);
 extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
 extern bool is_memblock_offlined(struct memory_block *mem);
 extern void remove_memory(int nid, u64 start, u64 size);
-extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
-								int nr_pages);
+extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn);
 extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms);
 extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
 					  unsigned long pnum);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 8285346be663..1b6fe8ca71e6 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -401,13 +401,12 @@ static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn)
 static int __meminit __add_section(int nid, struct zone *zone,
 					unsigned long phys_start_pfn)
 {
-	int nr_pages = PAGES_PER_SECTION;
 	int ret;
 
 	if (pfn_valid(phys_start_pfn))
 		return -EEXIST;
 
-	ret = sparse_add_one_section(zone, phys_start_pfn, nr_pages);
+	ret = sparse_add_one_section(zone, phys_start_pfn);
 
 	if (ret < 0)
 		return ret;
diff --git a/mm/sparse.c b/mm/sparse.c
index 4ac1d7ef548f..fbb9dbc6aca9 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -590,16 +590,15 @@ void __init sparse_init(void)
 
 #ifdef CONFIG_MEMORY_HOTPLUG
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
-static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
-						 unsigned long nr_pages)
+static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid)
 {
 	/* This will make the necessary allocations eventually. */
 	return sparse_mem_map_populate(pnum, nid);
 }
-static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
+static void __kfree_section_memmap(struct page *memmap)
 {
 	unsigned long start = (unsigned long)memmap;
-	unsigned long end = (unsigned long)(memmap + nr_pages);
+	unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION);
 
 	vmemmap_free(start, end);
 }
@@ -613,10 +612,10 @@ static void free_map_bootmem(struct page *memmap, unsigned long nr_pages)
 }
 #endif /* CONFIG_MEMORY_HOTREMOVE */
 #else
-static struct page *__kmalloc_section_memmap(unsigned long nr_pages)
+static struct page *__kmalloc_section_memmap(void)
 {
 	struct page *page, *ret;
-	unsigned long memmap_size = sizeof(struct page) * nr_pages;
+	unsigned long memmap_size = sizeof(struct page) * PAGES_PER_SECTION;
 
 	page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size));
 	if (page)
@@ -634,19 +633,18 @@ got_map_ptr:
 	return ret;
 }
 
-static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
-						  unsigned long nr_pages)
+static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid)
 {
-	return __kmalloc_section_memmap(nr_pages);
+	return __kmalloc_section_memmap();
 }
 
-static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
+static void __kfree_section_memmap(struct page *memmap)
 {
 	if (is_vmalloc_addr(memmap))
 		vfree(memmap);
 	else
 		free_pages((unsigned long)memmap,
-			   get_order(sizeof(struct page) * nr_pages));
+			   get_order(sizeof(struct page) * PAGES_PER_SECTION));
 }
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
@@ -684,8 +682,7 @@ static void free_map_bootmem(struct page *memmap, unsigned long nr_pages)
  * set.  If this is <=0, then that means that the passed-in
  * map was not consumed and must be freed.
  */
-int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
-			   int nr_pages)
+int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn)
 {
 	unsigned long section_nr = pfn_to_section_nr(start_pfn);
 	struct pglist_data *pgdat = zone->zone_pgdat;
@@ -702,12 +699,12 @@ int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
 	ret = sparse_index_init(section_nr, pgdat->node_id);
 	if (ret < 0 && ret != -EEXIST)
 		return ret;
-	memmap = kmalloc_section_memmap(section_nr, pgdat->node_id, nr_pages);
+	memmap = kmalloc_section_memmap(section_nr, pgdat->node_id);
 	if (!memmap)
 		return -ENOMEM;
 	usemap = __kmalloc_section_usemap();
 	if (!usemap) {
-		__kfree_section_memmap(memmap, nr_pages);
+		__kfree_section_memmap(memmap);
 		return -ENOMEM;
 	}
 
@@ -719,7 +716,7 @@ int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
 		goto out;
 	}
 
-	memset(memmap, 0, sizeof(struct page) * nr_pages);
+	memset(memmap, 0, sizeof(struct page) * PAGES_PER_SECTION);
 
 	ms->section_mem_map |= SECTION_MARKED_PRESENT;
 
@@ -729,7 +726,7 @@ out:
 	pgdat_resize_unlock(pgdat, &flags);
 	if (ret <= 0) {
 		kfree(usemap);
-		__kfree_section_memmap(memmap, nr_pages);
+		__kfree_section_memmap(memmap);
 	}
 	return ret;
 }
@@ -771,7 +768,7 @@ static void free_section_usemap(struct page *memmap, unsigned long *usemap)
 	if (PageSlab(usemap_page) || PageCompound(usemap_page)) {
 		kfree(usemap);
 		if (memmap)
-			__kfree_section_memmap(memmap, PAGES_PER_SECTION);
+			__kfree_section_memmap(memmap);
 		return;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 46c77e2bb07eba3b38edfec76873f12942c49dd3 Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Tue, 12 Nov 2013 15:07:50 -0800
Subject: tools/vm/page-types.c: support KPF_SOFTDIRTY bit

Soft dirty bit allows us to track which pages are written since the last
clear_ref (by "echo 4 > /proc/pid/clear_refs".) This is useful for
userspace applications to know their memory footprints.

Note that the kernel exposes this flag via bit[55] of /proc/pid/pagemap,
and the semantics is not a default one (scheduled to be the default in the
near future.) However, it shifts to the new semantics at the first
clear_ref, and the users of soft dirty bit always do it before utilizing
the bit, so that's not a big deal.  Users must avoid relying on the bit in
page-types before the first clear_ref.

Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel-page-flags.h |  1 +
 tools/vm/page-types.c             | 32 ++++++++++++++++++++------------
 2 files changed, 21 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/kernel-page-flags.h b/include/linux/kernel-page-flags.h
index 546eb6a76934..f65ce09784f1 100644
--- a/include/linux/kernel-page-flags.h
+++ b/include/linux/kernel-page-flags.h
@@ -15,5 +15,6 @@
 #define KPF_OWNER_PRIVATE	37
 #define KPF_ARCH		38
 #define KPF_UNCACHED		39
+#define KPF_SOFTDIRTY		40
 
 #endif /* LINUX_KERNEL_PAGE_FLAGS_H */
diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c
index 71c9c2511ee7..d5e9d6d185c8 100644
--- a/tools/vm/page-types.c
+++ b/tools/vm/page-types.c
@@ -59,12 +59,14 @@
 #define PM_PSHIFT_BITS      6
 #define PM_PSHIFT_OFFSET    (PM_STATUS_OFFSET - PM_PSHIFT_BITS)
 #define PM_PSHIFT_MASK      (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET)
-#define PM_PSHIFT(x)        (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK)
+#define __PM_PSHIFT(x)      (((uint64_t) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK)
 #define PM_PFRAME_MASK      ((1LL << PM_PSHIFT_OFFSET) - 1)
 #define PM_PFRAME(x)        ((x) & PM_PFRAME_MASK)
 
+#define __PM_SOFT_DIRTY      (1LL)
 #define PM_PRESENT          PM_STATUS(4LL)
 #define PM_SWAP             PM_STATUS(2LL)
+#define PM_SOFT_DIRTY       __PM_PSHIFT(__PM_SOFT_DIRTY)
 
 
 /*
@@ -83,6 +85,7 @@
 #define KPF_OWNER_PRIVATE	37
 #define KPF_ARCH		38
 #define KPF_UNCACHED		39
+#define KPF_SOFTDIRTY		40
 
 /* [48-] take some arbitrary free slots for expanding overloaded flags
  * not part of kernel API
@@ -132,6 +135,7 @@ static const char * const page_flag_names[] = {
 	[KPF_OWNER_PRIVATE]	= "O:owner_private",
 	[KPF_ARCH]		= "h:arch",
 	[KPF_UNCACHED]		= "c:uncached",
+	[KPF_SOFTDIRTY]		= "f:softdirty",
 
 	[KPF_READAHEAD]		= "I:readahead",
 	[KPF_SLOB_FREE]		= "P:slob_free",
@@ -417,7 +421,7 @@ static int bit_mask_ok(uint64_t flags)
 	return 1;
 }
 
-static uint64_t expand_overloaded_flags(uint64_t flags)
+static uint64_t expand_overloaded_flags(uint64_t flags, uint64_t pme)
 {
 	/* SLOB/SLUB overload several page flags */
 	if (flags & BIT(SLAB)) {
@@ -433,6 +437,9 @@ static uint64_t expand_overloaded_flags(uint64_t flags)
 	if ((flags & (BIT(RECLAIM) | BIT(WRITEBACK))) == BIT(RECLAIM))
 		flags ^= BIT(RECLAIM) | BIT(READAHEAD);
 
+	if (pme & PM_SOFT_DIRTY)
+		flags |= BIT(SOFTDIRTY);
+
 	return flags;
 }
 
@@ -448,11 +455,11 @@ static uint64_t well_known_flags(uint64_t flags)
 	return flags;
 }
 
-static uint64_t kpageflags_flags(uint64_t flags)
+static uint64_t kpageflags_flags(uint64_t flags, uint64_t pme)
 {
-	flags = expand_overloaded_flags(flags);
-
-	if (!opt_raw)
+	if (opt_raw)
+		flags = expand_overloaded_flags(flags, pme);
+	else
 		flags = well_known_flags(flags);
 
 	return flags;
@@ -545,9 +552,9 @@ static size_t hash_slot(uint64_t flags)
 }
 
 static void add_page(unsigned long voffset,
-		     unsigned long offset, uint64_t flags)
+		     unsigned long offset, uint64_t flags, uint64_t pme)
 {
-	flags = kpageflags_flags(flags);
+	flags = kpageflags_flags(flags, pme);
 
 	if (!bit_mask_ok(flags))
 		return;
@@ -569,7 +576,8 @@ static void add_page(unsigned long voffset,
 #define KPAGEFLAGS_BATCH	(64 << 10)	/* 64k pages */
 static void walk_pfn(unsigned long voffset,
 		     unsigned long index,
-		     unsigned long count)
+		     unsigned long count,
+		     uint64_t pme)
 {
 	uint64_t buf[KPAGEFLAGS_BATCH];
 	unsigned long batch;
@@ -583,7 +591,7 @@ static void walk_pfn(unsigned long voffset,
 			break;
 
 		for (i = 0; i < pages; i++)
-			add_page(voffset + i, index + i, buf[i]);
+			add_page(voffset + i, index + i, buf[i], pme);
 
 		index += pages;
 		count -= pages;
@@ -608,7 +616,7 @@ static void walk_vma(unsigned long index, unsigned long count)
 		for (i = 0; i < pages; i++) {
 			pfn = pagemap_pfn(buf[i]);
 			if (pfn)
-				walk_pfn(index + i, pfn, 1);
+				walk_pfn(index + i, pfn, 1, buf[i]);
 		}
 
 		index += pages;
@@ -659,7 +667,7 @@ static void walk_addr_ranges(void)
 
 	for (i = 0; i < nr_addr_ranges; i++)
 		if (!opt_pid)
-			walk_pfn(0, opt_offset[i], opt_size[i]);
+			walk_pfn(0, opt_offset[i], opt_size[i], 0);
 		else
 			walk_task(opt_offset[i], opt_size[i]);
 
-- 
cgit v1.2.3-59-g8ed1b


From c4a391b53a72d2df4ee97f96f78c1d5971b47489 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 12 Nov 2013 15:07:51 -0800
Subject: writeback: do not sync data dirtied after sync start

When there are processes heavily creating small files while sync(2) is
running, it can easily happen that quite some new files are created
between WB_SYNC_NONE and WB_SYNC_ALL pass of sync(2).  That can happen
especially if there are several busy filesystems (remember that sync
traverses filesystems sequentially and waits in WB_SYNC_ALL phase on one
fs before starting it on another fs).  Because WB_SYNC_ALL pass is slow
(e.g.  causes a transaction commit and cache flush for each inode in
ext3), resulting sync(2) times are rather large.

The following script reproduces the problem:

  function run_writers
  {
    for (( i = 0; i < 10; i++ )); do
      mkdir $1/dir$i
      for (( j = 0; j < 40000; j++ )); do
        dd if=/dev/zero of=$1/dir$i/$j bs=4k count=4 &>/dev/null
      done &
    done
  }

  for dir in "$@"; do
    run_writers $dir
  done

  sleep 40
  time sync

Fix the problem by disregarding inodes dirtied after sync(2) was called
in the WB_SYNC_ALL pass.  To allow for this, sync_inodes_sb() now takes
a time stamp when sync has started which is used for setting up work for
flusher threads.

To give some numbers, when above script is run on two ext4 filesystems
on simple SATA drive, the average sync time from 10 runs is 267.549
seconds with standard deviation 104.799426.  With the patched kernel,
the average sync time from 10 runs is 2.995 seconds with standard
deviation 0.096.

Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Fengguang Wu <fengguang.wu@intel.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fs-writeback.c                | 33 ++++++++++++++++++++++-----------
 fs/sync.c                        | 15 +++++++++------
 fs/xfs/xfs_super.c               |  2 +-
 include/linux/writeback.h        |  2 +-
 include/trace/events/writeback.h |  6 +++---
 5 files changed, 36 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 9f4935b8f208..4afdbd6d9678 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -39,13 +39,18 @@
 struct wb_writeback_work {
 	long nr_pages;
 	struct super_block *sb;
-	unsigned long *older_than_this;
+	/*
+	 * Write only inodes dirtied before this time. Don't forget to set
+	 * older_than_this_is_set when you set this.
+	 */
+	unsigned long older_than_this;
 	enum writeback_sync_modes sync_mode;
 	unsigned int tagged_writepages:1;
 	unsigned int for_kupdate:1;
 	unsigned int range_cyclic:1;
 	unsigned int for_background:1;
 	unsigned int for_sync:1;	/* sync(2) WB_SYNC_ALL writeback */
+	unsigned int older_than_this_is_set:1;
 	enum wb_reason reason;		/* why was writeback initiated? */
 
 	struct list_head list;		/* pending work list */
@@ -246,10 +251,10 @@ static int move_expired_inodes(struct list_head *delaying_queue,
 	int do_sb_sort = 0;
 	int moved = 0;
 
+	WARN_ON_ONCE(!work->older_than_this_is_set);
 	while (!list_empty(delaying_queue)) {
 		inode = wb_inode(delaying_queue->prev);
-		if (work->older_than_this &&
-		    inode_dirtied_after(inode, *work->older_than_this))
+		if (inode_dirtied_after(inode, work->older_than_this))
 			break;
 		list_move(&inode->i_wb_list, &tmp);
 		moved++;
@@ -733,6 +738,8 @@ static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
 		.sync_mode	= WB_SYNC_NONE,
 		.range_cyclic	= 1,
 		.reason		= reason,
+		.older_than_this = jiffies,
+		.older_than_this_is_set = 1,
 	};
 
 	spin_lock(&wb->list_lock);
@@ -791,12 +798,13 @@ static long wb_writeback(struct bdi_writeback *wb,
 {
 	unsigned long wb_start = jiffies;
 	long nr_pages = work->nr_pages;
-	unsigned long oldest_jif;
 	struct inode *inode;
 	long progress;
 
-	oldest_jif = jiffies;
-	work->older_than_this = &oldest_jif;
+	if (!work->older_than_this_is_set) {
+		work->older_than_this = jiffies;
+		work->older_than_this_is_set = 1;
+	}
 
 	spin_lock(&wb->list_lock);
 	for (;;) {
@@ -830,10 +838,10 @@ static long wb_writeback(struct bdi_writeback *wb,
 		 * safe.
 		 */
 		if (work->for_kupdate) {
-			oldest_jif = jiffies -
+			work->older_than_this = jiffies -
 				msecs_to_jiffies(dirty_expire_interval * 10);
 		} else if (work->for_background)
-			oldest_jif = jiffies;
+			work->older_than_this = jiffies;
 
 		trace_writeback_start(wb->bdi, work);
 		if (list_empty(&wb->b_io))
@@ -1345,18 +1353,21 @@ EXPORT_SYMBOL(try_to_writeback_inodes_sb);
 
 /**
  * sync_inodes_sb	-	sync sb inode pages
- * @sb: the superblock
+ * @sb:			the superblock
+ * @older_than_this:	timestamp
  *
  * This function writes and waits on any dirty inode belonging to this
- * super_block.
+ * superblock that has been dirtied before given timestamp.
  */
-void sync_inodes_sb(struct super_block *sb)
+void sync_inodes_sb(struct super_block *sb, unsigned long older_than_this)
 {
 	DECLARE_COMPLETION_ONSTACK(done);
 	struct wb_writeback_work work = {
 		.sb		= sb,
 		.sync_mode	= WB_SYNC_ALL,
 		.nr_pages	= LONG_MAX,
+		.older_than_this = older_than_this,
+		.older_than_this_is_set = 1,
 		.range_cyclic	= 0,
 		.done		= &done,
 		.reason		= WB_REASON_SYNC,
diff --git a/fs/sync.c b/fs/sync.c
index 905f3f6b3d85..ff96f99fef64 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -27,10 +27,11 @@
  * wait == 1 case since in that case write_inode() functions do
  * sync_dirty_buffer() and thus effectively write one block at a time.
  */
-static int __sync_filesystem(struct super_block *sb, int wait)
+static int __sync_filesystem(struct super_block *sb, int wait,
+			     unsigned long start)
 {
 	if (wait)
-		sync_inodes_sb(sb);
+		sync_inodes_sb(sb, start);
 	else
 		writeback_inodes_sb(sb, WB_REASON_SYNC);
 
@@ -47,6 +48,7 @@ static int __sync_filesystem(struct super_block *sb, int wait)
 int sync_filesystem(struct super_block *sb)
 {
 	int ret;
+	unsigned long start = jiffies;
 
 	/*
 	 * We need to be protected against the filesystem going from
@@ -60,17 +62,17 @@ int sync_filesystem(struct super_block *sb)
 	if (sb->s_flags & MS_RDONLY)
 		return 0;
 
-	ret = __sync_filesystem(sb, 0);
+	ret = __sync_filesystem(sb, 0, start);
 	if (ret < 0)
 		return ret;
-	return __sync_filesystem(sb, 1);
+	return __sync_filesystem(sb, 1, start);
 }
 EXPORT_SYMBOL_GPL(sync_filesystem);
 
 static void sync_inodes_one_sb(struct super_block *sb, void *arg)
 {
 	if (!(sb->s_flags & MS_RDONLY))
-		sync_inodes_sb(sb);
+		sync_inodes_sb(sb, *((unsigned long *)arg));
 }
 
 static void sync_fs_one_sb(struct super_block *sb, void *arg)
@@ -102,9 +104,10 @@ static void fdatawait_one_bdev(struct block_device *bdev, void *arg)
 SYSCALL_DEFINE0(sync)
 {
 	int nowait = 0, wait = 1;
+	unsigned long start = jiffies;
 
 	wakeup_flusher_threads(0, WB_REASON_SYNC);
-	iterate_supers(sync_inodes_one_sb, NULL);
+	iterate_supers(sync_inodes_one_sb, &start);
 	iterate_supers(sync_fs_one_sb, &nowait);
 	iterate_supers(sync_fs_one_sb, &wait);
 	iterate_bdevs(fdatawrite_one_bdev, NULL);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 15188cc99449..8968f5036fa1 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -918,7 +918,7 @@ xfs_flush_inodes(
 	struct super_block	*sb = mp->m_super;
 
 	if (down_read_trylock(&sb->s_umount)) {
-		sync_inodes_sb(sb);
+		sync_inodes_sb(sb, jiffies);
 		up_read(&sb->s_umount);
 	}
 }
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 021b8a319b9e..fc0e4320aa6d 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -97,7 +97,7 @@ void writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
 int try_to_writeback_inodes_sb(struct super_block *, enum wb_reason reason);
 int try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
 				  enum wb_reason reason);
-void sync_inodes_sb(struct super_block *);
+void sync_inodes_sb(struct super_block *sb, unsigned long older_than_this);
 void wakeup_flusher_threads(long nr_pages, enum wb_reason reason);
 void inode_wait_for_writeback(struct inode *inode);
 
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 464ea82e10db..c7bbbe794e65 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -287,11 +287,11 @@ TRACE_EVENT(writeback_queue_io,
 		__field(int,		reason)
 	),
 	TP_fast_assign(
-		unsigned long *older_than_this = work->older_than_this;
+		unsigned long older_than_this = work->older_than_this;
 		strncpy(__entry->name, dev_name(wb->bdi->dev), 32);
-		__entry->older	= older_than_this ?  *older_than_this : 0;
+		__entry->older	= older_than_this;
 		__entry->age	= older_than_this ?
-				  (jiffies - *older_than_this) * 1000 / HZ : -1;
+				  (jiffies - older_than_this) * 1000 / HZ : -1;
 		__entry->moved	= moved;
 		__entry->reason	= work->reason;
 	),
-- 
cgit v1.2.3-59-g8ed1b


From 79442ed189acb8b949662676e750eda173c06f9b Mon Sep 17 00:00:00 2001
From: Tang Chen <tangchen@cn.fujitsu.com>
Date: Tue, 12 Nov 2013 15:07:59 -0800
Subject: mm/memblock.c: introduce bottom-up allocation mode

The Linux kernel cannot migrate pages used by the kernel.  As a result,
kernel pages cannot be hot-removed.  So we cannot allocate hotpluggable
memory for the kernel.

ACPI SRAT (System Resource Affinity Table) contains the memory hotplug
info.  But before SRAT is parsed, memblock has already started to allocate
memory for the kernel.  So we need to prevent memblock from doing this.

In a memory hotplug system, any numa node the kernel resides in should be
unhotpluggable.  And for a modern server, each node could have at least
16GB memory.  So memory around the kernel image is highly likely
unhotpluggable.

So the basic idea is: Allocate memory from the end of the kernel image and
to the higher memory.  Since memory allocation before SRAT is parsed won't
be too much, it could highly likely be in the same node with kernel image.

The current memblock can only allocate memory top-down.  So this patch
introduces a new bottom-up allocation mode to allocate memory bottom-up.
And later when we use this allocation direction to allocate memory, we
will limit the start address above the kernel.

Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Signed-off-by: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Acked-by: Toshi Kani <toshi.kani@hp.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Cc: Thomas Renninger <trenn@suse.de>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Jiang Liu <jiang.liu@huawei.com>
Cc: Wen Congyang <wency@cn.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Taku Izumi <izumi.taku@jp.fujitsu.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memblock.h | 24 ++++++++++++++
 include/linux/mm.h       |  4 +++
 mm/memblock.c            | 83 ++++++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 108 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 31e95acddb4d..77c60e52939d 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -35,6 +35,7 @@ struct memblock_type {
 };
 
 struct memblock {
+	bool bottom_up;  /* is bottom up direction? */
 	phys_addr_t current_limit;
 	struct memblock_type memory;
 	struct memblock_type reserved;
@@ -148,6 +149,29 @@ phys_addr_t memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid)
 
 phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align);
 
+#ifdef CONFIG_MOVABLE_NODE
+/*
+ * Set the allocation direction to bottom-up or top-down.
+ */
+static inline void memblock_set_bottom_up(bool enable)
+{
+	memblock.bottom_up = enable;
+}
+
+/*
+ * Check if the allocation direction is bottom-up or not.
+ * if this is true, that said, memblock will allocate memory
+ * in bottom-up direction.
+ */
+static inline bool memblock_bottom_up(void)
+{
+	return memblock.bottom_up;
+}
+#else
+static inline void memblock_set_bottom_up(bool enable) {}
+static inline bool memblock_bottom_up(void) { return false; }
+#endif
+
 /* Flags for memblock_alloc_base() amd __memblock_alloc_base() */
 #define MEMBLOCK_ALLOC_ANYWHERE	(~(phys_addr_t)0)
 #define MEMBLOCK_ALLOC_ACCESSIBLE	0
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8aa4006b9636..42a35d94b82c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -50,6 +50,10 @@ extern int sysctl_legacy_va_layout;
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 
+#ifndef __pa_symbol
+#define __pa_symbol(x)  __pa(RELOC_HIDE((unsigned long)(x), 0))
+#endif
+
 extern unsigned long sysctl_user_reserve_kbytes;
 extern unsigned long sysctl_admin_reserve_kbytes;
 
diff --git a/mm/memblock.c b/mm/memblock.c
index accff1087137..53e477bb5558 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -20,6 +20,8 @@
 #include <linux/seq_file.h>
 #include <linux/memblock.h>
 
+#include <asm-generic/sections.h>
+
 static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
 static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
 
@@ -32,6 +34,7 @@ struct memblock memblock __initdata_memblock = {
 	.reserved.cnt		= 1,	/* empty dummy entry */
 	.reserved.max		= INIT_MEMBLOCK_REGIONS,
 
+	.bottom_up		= false,
 	.current_limit		= MEMBLOCK_ALLOC_ANYWHERE,
 };
 
@@ -82,6 +85,38 @@ static long __init_memblock memblock_overlaps_region(struct memblock_type *type,
 	return (i < type->cnt) ? i : -1;
 }
 
+/*
+ * __memblock_find_range_bottom_up - find free area utility in bottom-up
+ * @start: start of candidate range
+ * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
+ * @size: size of free area to find
+ * @align: alignment of free area to find
+ * @nid: nid of the free area to find, %MAX_NUMNODES for any node
+ *
+ * Utility called from memblock_find_in_range_node(), find free area bottom-up.
+ *
+ * RETURNS:
+ * Found address on success, 0 on failure.
+ */
+static phys_addr_t __init_memblock
+__memblock_find_range_bottom_up(phys_addr_t start, phys_addr_t end,
+				phys_addr_t size, phys_addr_t align, int nid)
+{
+	phys_addr_t this_start, this_end, cand;
+	u64 i;
+
+	for_each_free_mem_range(i, nid, &this_start, &this_end, NULL) {
+		this_start = clamp(this_start, start, end);
+		this_end = clamp(this_end, start, end);
+
+		cand = round_up(this_start, align);
+		if (cand < this_end && this_end - cand >= size)
+			return cand;
+	}
+
+	return 0;
+}
+
 /**
  * __memblock_find_range_top_down - find free area utility, in top-down
  * @start: start of candidate range
@@ -93,7 +128,7 @@ static long __init_memblock memblock_overlaps_region(struct memblock_type *type,
  * Utility called from memblock_find_in_range_node(), find free area top-down.
  *
  * RETURNS:
- * Found address on success, %0 on failure.
+ * Found address on success, 0 on failure.
  */
 static phys_addr_t __init_memblock
 __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end,
@@ -127,13 +162,24 @@ __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end,
  *
  * Find @size free area aligned to @align in the specified range and node.
  *
+ * When allocation direction is bottom-up, the @start should be greater
+ * than the end of the kernel image. Otherwise, it will be trimmed. The
+ * reason is that we want the bottom-up allocation just near the kernel
+ * image so it is highly likely that the allocated memory and the kernel
+ * will reside in the same node.
+ *
+ * If bottom-up allocation failed, will try to allocate memory top-down.
+ *
  * RETURNS:
- * Found address on success, %0 on failure.
+ * Found address on success, 0 on failure.
  */
 phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start,
 					phys_addr_t end, phys_addr_t size,
 					phys_addr_t align, int nid)
 {
+	int ret;
+	phys_addr_t kernel_end;
+
 	/* pump up @end */
 	if (end == MEMBLOCK_ALLOC_ACCESSIBLE)
 		end = memblock.current_limit;
@@ -141,6 +187,37 @@ phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start,
 	/* avoid allocating the first page */
 	start = max_t(phys_addr_t, start, PAGE_SIZE);
 	end = max(start, end);
+	kernel_end = __pa_symbol(_end);
+
+	/*
+	 * try bottom-up allocation only when bottom-up mode
+	 * is set and @end is above the kernel image.
+	 */
+	if (memblock_bottom_up() && end > kernel_end) {
+		phys_addr_t bottom_up_start;
+
+		/* make sure we will allocate above the kernel */
+		bottom_up_start = max(start, kernel_end);
+
+		/* ok, try bottom-up allocation first */
+		ret = __memblock_find_range_bottom_up(bottom_up_start, end,
+						      size, align, nid);
+		if (ret)
+			return ret;
+
+		/*
+		 * we always limit bottom-up allocation above the kernel,
+		 * but top-down allocation doesn't have the limit, so
+		 * retrying top-down allocation may succeed when bottom-up
+		 * allocation failed.
+		 *
+		 * bottom-up allocation is expected to be fail very rarely,
+		 * so we use WARN_ONCE() here to see the stack trace if
+		 * fail happens.
+		 */
+		WARN_ONCE(1, "memblock: bottom-up allocation failed, "
+			     "memory hotunplug may be affected\n");
+	}
 
 	return __memblock_find_range_top_down(start, end, size, align, nid);
 }
@@ -155,7 +232,7 @@ phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start,
  * Find @size free area aligned to @align in the specified range.
  *
  * RETURNS:
- * Found address on success, %0 on failure.
+ * Found address on success, 0 on failure.
  */
 phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start,
 					phys_addr_t end, phys_addr_t size,
-- 
cgit v1.2.3-59-g8ed1b


From 52c8f6a5aeb0bdd396849ecaa72d96f8175528f5 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Tue, 12 Nov 2013 15:08:19 -0800
Subject: mm: get rid of unnecessary overhead of trace_mm_page_alloc_extfrag()

In general, every tracepoint should be zero overhead if it is disabled.
However, trace_mm_page_alloc_extfrag() is one of exception.  It evaluate
"new_type == start_migratetype" even if tracepoint is disabled.

However, the code can be moved into tracepoint's TP_fast_assign() and
TP_fast_assign exist exactly such purpose.  This patch does it.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/trace/events/kmem.h | 10 ++++------
 mm/page_alloc.c             |  5 ++---
 2 files changed, 6 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index d0c613476620..aece1346ceb7 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -267,14 +267,12 @@ DEFINE_EVENT_PRINT(mm_page, mm_page_pcpu_drain,
 TRACE_EVENT(mm_page_alloc_extfrag,
 
 	TP_PROTO(struct page *page,
-			int alloc_order, int fallback_order,
-			int alloc_migratetype, int fallback_migratetype,
-			int change_ownership),
+		int alloc_order, int fallback_order,
+		int alloc_migratetype, int fallback_migratetype, int new_migratetype),
 
 	TP_ARGS(page,
 		alloc_order, fallback_order,
-		alloc_migratetype, fallback_migratetype,
-		change_ownership),
+		alloc_migratetype, fallback_migratetype, new_migratetype),
 
 	TP_STRUCT__entry(
 		__field(	struct page *,	page			)
@@ -291,7 +289,7 @@ TRACE_EVENT(mm_page_alloc_extfrag,
 		__entry->fallback_order		= fallback_order;
 		__entry->alloc_migratetype	= alloc_migratetype;
 		__entry->fallback_migratetype	= fallback_migratetype;
-		__entry->change_ownership	= change_ownership;
+		__entry->change_ownership	= (new_migratetype == alloc_migratetype);
 	),
 
 	TP_printk("page=%p pfn=%lu alloc_order=%d fallback_order=%d pageblock_order=%d alloc_migratetype=%d fallback_migratetype=%d fragmenting=%d change_ownership=%d",
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5a9883614d99..442f1298f9a7 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1103,9 +1103,8 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
 			       is_migrate_cma(migratetype)
 			     ? migratetype : start_migratetype);
 
-			trace_mm_page_alloc_extfrag(page, order,
-				current_order, start_migratetype, migratetype,
-				new_type == start_migratetype);
+			trace_mm_page_alloc_extfrag(page, order, current_order,
+				start_migratetype, migratetype, new_type);
 
 			return page;
 		}
-- 
cgit v1.2.3-59-g8ed1b


From 00619bcc44d6b779aa366130b354153c222e4380 Mon Sep 17 00:00:00 2001
From: Jerome Marchand <jmarchan@redhat.com>
Date: Tue, 12 Nov 2013 15:08:31 -0800
Subject: mm: factor commit limit calculation

The same calculation is currently done in three differents places.
Factor that code so future changes has to be made at only one place.

[akpm@linux-foundation.org: uninline vm_commit_limit()]
Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/meminfo.c    |  5 +----
 include/linux/mman.h |  2 ++
 mm/mmap.c            |  4 +---
 mm/nommu.c           |  3 +--
 mm/util.c            | 13 +++++++++++++
 5 files changed, 18 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 59d85d608898..c805d5b69ba1 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -24,7 +24,6 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 {
 	struct sysinfo i;
 	unsigned long committed;
-	unsigned long allowed;
 	struct vmalloc_info vmi;
 	long cached;
 	unsigned long pages[NR_LRU_LISTS];
@@ -37,8 +36,6 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 	si_meminfo(&i);
 	si_swapinfo(&i);
 	committed = percpu_counter_read_positive(&vm_committed_as);
-	allowed = ((totalram_pages - hugetlb_total_pages())
-		* sysctl_overcommit_ratio / 100) + total_swap_pages;
 
 	cached = global_page_state(NR_FILE_PAGES) -
 			total_swapcache_pages() - i.bufferram;
@@ -147,7 +144,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 		K(global_page_state(NR_UNSTABLE_NFS)),
 		K(global_page_state(NR_BOUNCE)),
 		K(global_page_state(NR_WRITEBACK_TEMP)),
-		K(allowed),
+		K(vm_commit_limit()),
 		K(committed),
 		(unsigned long)VMALLOC_TOTAL >> 10,
 		vmi.used >> 10,
diff --git a/include/linux/mman.h b/include/linux/mman.h
index 92dc257251e4..7f7f8dae4b1d 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -87,4 +87,6 @@ calc_vm_flag_bits(unsigned long flags)
 	       _calc_vm_trans(flags, MAP_DENYWRITE,  VM_DENYWRITE ) |
 	       _calc_vm_trans(flags, MAP_LOCKED,     VM_LOCKED    );
 }
+
+unsigned long vm_commit_limit(void);
 #endif /* _LINUX_MMAN_H */
diff --git a/mm/mmap.c b/mm/mmap.c
index 3d3e224be771..803048e9c568 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -179,14 +179,12 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
 		goto error;
 	}
 
-	allowed = (totalram_pages - hugetlb_total_pages())
-	       	* sysctl_overcommit_ratio / 100;
+	allowed = vm_commit_limit();
 	/*
 	 * Reserve some for root
 	 */
 	if (!cap_sys_admin)
 		allowed -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10);
-	allowed += total_swap_pages;
 
 	/*
 	 * Don't let a single process grow so big a user can't recover
diff --git a/mm/nommu.c b/mm/nommu.c
index ecd1f158548e..d8a957bb9e31 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1948,13 +1948,12 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
 		goto error;
 	}
 
-	allowed = totalram_pages * sysctl_overcommit_ratio / 100;
+	allowed = vm_commit_limit();
 	/*
 	 * Reserve some 3% for root
 	 */
 	if (!cap_sys_admin)
 		allowed -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10);
-	allowed += total_swap_pages;
 
 	/*
 	 * Don't let a single process grow so big a user can't recover
diff --git a/mm/util.c b/mm/util.c
index eaf63fc2c92f..f7bc2096071c 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -7,6 +7,9 @@
 #include <linux/security.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
+#include <linux/mman.h>
+#include <linux/hugetlb.h>
+
 #include <asm/uaccess.h>
 
 #include "internal.h"
@@ -398,6 +401,16 @@ struct address_space *page_mapping(struct page *page)
 	return mapping;
 }
 
+/*
+ * Committed memory limit enforced when OVERCOMMIT_NEVER policy is used
+ */
+unsigned long vm_commit_limit(void)
+{
+	return ((totalram_pages - hugetlb_total_pages())
+		* sysctl_overcommit_ratio / 100) + total_swap_pages;
+}
+
+
 /* Tracepoints definitions. */
 EXPORT_TRACEPOINT_SYMBOL(kmalloc);
 EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
-- 
cgit v1.2.3-59-g8ed1b


From 72403b4a0fbdf433c1fe0127e49864658f6f6468 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Tue, 12 Nov 2013 15:08:32 -0800
Subject: mm: numa: return the number of base pages altered by protection
 changes

Commit 0255d4918480 ("mm: Account for a THP NUMA hinting update as one
PTE update") was added to account for the number of PTE updates when
marking pages prot_numa.  task_numa_work was using the old return value
to track how much address space had been updated.  Altering the return
value causes the scanner to do more work than it is configured or
documented to in a single unit of work.

This patch reverts that commit and accounts for the number of THP
updates separately in vmstat.  It is up to the administrator to
interpret the pair of values correctly.  This is a straight-forward
operation and likely to only be of interest when actively debugging NUMA
balancing problems.

The impact of this patch is that the NUMA PTE scanner will scan slower
when THP is enabled and workloads may converge slower as a result.  On
the flip size system CPU usage should be lower than recent tests
reported.  This is an illustrative example of a short single JVM specjbb
test

specjbb
                       3.12.0                3.12.0
                      vanilla      acctupdates
TPut 1      26143.00 (  0.00%)     25747.00 ( -1.51%)
TPut 7     185257.00 (  0.00%)    183202.00 ( -1.11%)
TPut 13    329760.00 (  0.00%)    346577.00 (  5.10%)
TPut 19    442502.00 (  0.00%)    460146.00 (  3.99%)
TPut 25    540634.00 (  0.00%)    549053.00 (  1.56%)
TPut 31    512098.00 (  0.00%)    519611.00 (  1.47%)
TPut 37    461276.00 (  0.00%)    474973.00 (  2.97%)
TPut 43    403089.00 (  0.00%)    414172.00 (  2.75%)

              3.12.0      3.12.0
             vanillaacctupdates
User         5169.64     5184.14
System        100.45       80.02
Elapsed       252.75      251.85

Performance is similar but note the reduction in system CPU time.  While
this showed a performance gain, it will not be universal but at least
it'll be behaving as documented.  The vmstats are obviously different but
here is an obvious interpretation of them from mmtests.

                                3.12.0      3.12.0
                               vanillaacctupdates
NUMA page range updates        1408326    11043064
NUMA huge PMD updates                0       21040
NUMA PTE updates               1408326      291624

"NUMA page range updates" == nr_pte_updates and is the value returned to
the NUMA pte scanner.  NUMA huge PMD updates were the number of THP
updates which in combination can be used to calculate how many ptes were
updated from userspace.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reported-by: Alex Thorlton <athorlton@sgi.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vm_event_item.h |  1 +
 mm/mprotect.c                 | 10 +++++++---
 mm/vmstat.c                   |  1 +
 3 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 1855f0a22add..c557c6d096de 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -39,6 +39,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		PAGEOUTRUN, ALLOCSTALL, PGROTATED,
 #ifdef CONFIG_NUMA_BALANCING
 		NUMA_PTE_UPDATES,
+		NUMA_HUGE_PTE_UPDATES,
 		NUMA_HINT_FAULTS,
 		NUMA_HINT_FAULTS_LOCAL,
 		NUMA_PAGE_MIGRATE,
diff --git a/mm/mprotect.c b/mm/mprotect.c
index a597f2ffcd6f..26667971c824 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -112,6 +112,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
 	pmd_t *pmd;
 	unsigned long next;
 	unsigned long pages = 0;
+	unsigned long nr_huge_updates = 0;
 
 	pmd = pmd_offset(pud, addr);
 	do {
@@ -126,9 +127,10 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
 						newprot, prot_numa);
 
 				if (nr_ptes) {
-					if (nr_ptes == HPAGE_PMD_NR)
-						pages++;
-
+					if (nr_ptes == HPAGE_PMD_NR) {
+						pages += HPAGE_PMD_NR;
+						nr_huge_updates++;
+					}
 					continue;
 				}
 			}
@@ -141,6 +143,8 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
 		pages += this_pages;
 	} while (pmd++, addr = next, addr != end);
 
+	if (nr_huge_updates)
+		count_vm_numa_events(NUMA_HUGE_PTE_UPDATES, nr_huge_updates);
 	return pages;
 }
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index b6d17edf8cf3..72496140ac08 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -812,6 +812,7 @@ const char * const vmstat_text[] = {
 
 #ifdef CONFIG_NUMA_BALANCING
 	"numa_pte_updates",
+	"numa_huge_pte_updates",
 	"numa_hint_faults",
 	"numa_hint_faults_local",
 	"numa_pages_migrated",
-- 
cgit v1.2.3-59-g8ed1b


From 83460ec8dcac14142e7860a01fa59c267ac4657c Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Tue, 12 Nov 2013 15:08:36 -0800
Subject: syscalls.h: use gcc alias instead of assembler aliases for syscalls

Use standard gcc __attribute__((alias(foo))) to define the syscall aliases
instead of custom assembler macros.

This is far cleaner, and also fixes my LTO kernel build.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compat.h   | 4 ++--
 include/linux/syscalls.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index 345da00a86e0..ada34c92b684 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -41,14 +41,14 @@
 	COMPAT_SYSCALL_DEFINEx(6, _##name, __VA_ARGS__)
 
 #define COMPAT_SYSCALL_DEFINEx(x, name, ...)				\
-	asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
+	asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))\
+		__attribute__((alias(__stringify(compat_SyS##name))));  \
 	static inline long C_SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
 	asmlinkage long compat_SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__));\
 	asmlinkage long compat_SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__))\
 	{								\
 		return C_SYSC##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__));	\
 	}								\
-	SYSCALL_ALIAS(compat_sys##name, compat_SyS##name);		\
 	static inline long C_SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__))
 
 #ifndef compat_user_stack_pointer
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 7fac04e7ff6e..c27f846f6b71 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -184,7 +184,8 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 
 #define __PROTECT(...) asmlinkage_protect(__VA_ARGS__)
 #define __SYSCALL_DEFINEx(x, name, ...)					\
-	asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));	\
+	asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))	\
+		__attribute__((alias(__stringify(SyS##name))));		\
 	static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__));	\
 	asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__));	\
 	asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__))	\
@@ -194,7 +195,6 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 		__PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__));	\
 		return ret;						\
 	}								\
-	SYSCALL_ALIAS(sys##name, SyS##name);				\
 	static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__))
 
 asmlinkage long sys_time(time_t __user *tloc);
-- 
cgit v1.2.3-59-g8ed1b


From 0ca43435188b9f911c8efcdf10731f726142dda1 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Tue, 12 Nov 2013 15:08:40 -0800
Subject: errno.h: remove "NFS" from descriptions in comments

glibc recently changed the error string for ESTALE to remove "NFS" -

https://sourceware.org/git/?p=glibc.git;a=commitdiff;h=96945714ec61951cc748da2b4b8a80cf02127ee9

from: [ERR_REMAP (ESTALE)] = N_("Stale NFS file handle"),
to:   [ERR_REMAP (ESTALE)] = N_("Stale file handle"),

And some have expressed concern that the kernel's errno.h
comments still refer to NFS.

So make that change... note that this is a comment-only change,
and has no functional difference.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/include/uapi/asm/errno.h                         | 2 +-
 arch/mips/include/uapi/asm/errno.h                          | 2 +-
 arch/parisc/include/uapi/asm/errno.h                        | 2 +-
 arch/sparc/include/uapi/asm/errno.h                         | 2 +-
 drivers/staging/lustre/lustre/include/lustre/lustre_errno.h | 2 +-
 include/uapi/asm-generic/errno.h                            | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/include/uapi/asm/errno.h b/arch/alpha/include/uapi/asm/errno.h
index e5f29ca28180..17f92aa76b2f 100644
--- a/arch/alpha/include/uapi/asm/errno.h
+++ b/arch/alpha/include/uapi/asm/errno.h
@@ -43,7 +43,7 @@
 
 #define	EUSERS		68	/* Too many users */
 #define	EDQUOT		69	/* Quota exceeded */
-#define	ESTALE		70	/* Stale NFS file handle */
+#define	ESTALE		70	/* Stale file handle */
 #define	EREMOTE		71	/* Object is remote */
 
 #define	ENOLCK		77	/* No record locks available */
diff --git a/arch/mips/include/uapi/asm/errno.h b/arch/mips/include/uapi/asm/errno.h
index 31575e2fd1bd..02d645d7aa9a 100644
--- a/arch/mips/include/uapi/asm/errno.h
+++ b/arch/mips/include/uapi/asm/errno.h
@@ -102,7 +102,7 @@
 #define EWOULDBLOCK	EAGAIN	/* Operation would block */
 #define EALREADY	149	/* Operation already in progress */
 #define EINPROGRESS	150	/* Operation now in progress */
-#define ESTALE		151	/* Stale NFS file handle */
+#define ESTALE		151	/* Stale file handle */
 #define ECANCELED	158	/* AIO operation canceled */
 
 /*
diff --git a/arch/parisc/include/uapi/asm/errno.h b/arch/parisc/include/uapi/asm/errno.h
index 135ad6047e51..f3a8aa554841 100644
--- a/arch/parisc/include/uapi/asm/errno.h
+++ b/arch/parisc/include/uapi/asm/errno.h
@@ -37,7 +37,7 @@
 #define	EBADMSG		67	/* Not a data message */
 #define	EUSERS		68	/* Too many users */
 #define	EDQUOT		69	/* Quota exceeded */
-#define	ESTALE		70	/* Stale NFS file handle */
+#define	ESTALE		70	/* Stale file handle */
 #define	EREMOTE		71	/* Object is remote */
 #define	EOVERFLOW	72	/* Value too large for defined data type */
 
diff --git a/arch/sparc/include/uapi/asm/errno.h b/arch/sparc/include/uapi/asm/errno.h
index c351aba997b7..20423e172853 100644
--- a/arch/sparc/include/uapi/asm/errno.h
+++ b/arch/sparc/include/uapi/asm/errno.h
@@ -40,7 +40,7 @@
 #define EPROCLIM        67      /* SUNOS: Too many processes */
 #define	EUSERS		68	/* Too many users */
 #define	EDQUOT		69	/* Quota exceeded */
-#define	ESTALE		70	/* Stale NFS file handle */
+#define	ESTALE		70	/* Stale file handle */
 #define	EREMOTE		71	/* Object is remote */
 #define	ENOSTR		72	/* Device not a stream */
 #define	ETIME		73	/* Timer expired */
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_errno.h b/drivers/staging/lustre/lustre/include/lustre/lustre_errno.h
index 2870487dd286..35aefa2cdad1 100644
--- a/drivers/staging/lustre/lustre/include/lustre/lustre_errno.h
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_errno.h
@@ -165,7 +165,7 @@
 #define LUSTRE_EHOSTUNREACH	113	/* No route to host */
 #define LUSTRE_EALREADY		114	/* Operation already in progress */
 #define LUSTRE_EINPROGRESS	115	/* Operation now in progress */
-#define LUSTRE_ESTALE		116	/* Stale NFS file handle */
+#define LUSTRE_ESTALE		116	/* Stale file handle */
 #define LUSTRE_EUCLEAN		117	/* Structure needs cleaning */
 #define LUSTRE_ENOTNAM		118	/* Not a XENIX named type file */
 #define LUSTRE_ENAVAIL		119	/* No XENIX semaphores available */
diff --git a/include/uapi/asm-generic/errno.h b/include/uapi/asm-generic/errno.h
index a1331ce50445..1e1ea6e6e7a5 100644
--- a/include/uapi/asm-generic/errno.h
+++ b/include/uapi/asm-generic/errno.h
@@ -86,7 +86,7 @@
 #define	EHOSTUNREACH	113	/* No route to host */
 #define	EALREADY	114	/* Operation already in progress */
 #define	EINPROGRESS	115	/* Operation now in progress */
-#define	ESTALE		116	/* Stale NFS file handle */
+#define	ESTALE		116	/* Stale file handle */
 #define	EUCLEAN		117	/* Structure needs cleaning */
 #define	ENOTNAM		118	/* Not a XENIX named type file */
 #define	ENAVAIL		119	/* No XENIX semaphores available */
-- 
cgit v1.2.3-59-g8ed1b


From 261adc9a609dbfde815337889b9e2c8728959ab8 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Tue, 12 Nov 2013 15:08:44 -0800
Subject: jump_label: unlikely(x) > 0

if (unlikely(x) > 0) doesn't seem to help branch prediction

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Cc: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: "H. Peter Anvin" <hpa@linux.intel.com>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/jump_label.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index a5079072da66..cf08540d6204 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -132,14 +132,14 @@ static __always_inline void jump_label_init(void)
 
 static __always_inline bool static_key_false(struct static_key *key)
 {
-	if (unlikely(atomic_read(&key->enabled)) > 0)
+	if (unlikely(atomic_read(&key->enabled) > 0))
 		return true;
 	return false;
 }
 
 static __always_inline bool static_key_true(struct static_key *key)
 {
-	if (likely(atomic_read(&key->enabled)) > 0)
+	if (likely(atomic_read(&key->enabled) > 0))
 		return true;
 	return false;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 27f69e68a5e534412faebc53a4e04acc9ce7fd7e Mon Sep 17 00:00:00 2001
From: Vineet Gupta <Vineet.Gupta1@synopsys.com>
Date: Tue, 12 Nov 2013 15:08:47 -0800
Subject: sched: remove ARCH specific fpu_counter from task_struct

fpu_counter in task_struct was used only by sh/x86.  Both of these now
carry it in ARCH specific thread_struct, hence this can now be removed
from generic task_struct, shrinking it slightly for other arches.

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Paul Mundt <paul.mundt@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 045b0d227846..5e226fe3e512 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1062,15 +1062,6 @@ struct task_struct {
 	struct hlist_head preempt_notifiers;
 #endif
 
-	/*
-	 * fpu_counter contains the number of consecutive context switches
-	 * that the FPU is used. If this is over a threshold, the lazy fpu
-	 * saving becomes unlazy to save the trap. This is an unsigned char
-	 * so that after 256 times the counter wraps and the behavior turns
-	 * lazy again; this to deal with bursty apps that only use FPU for
-	 * a short time
-	 */
-	unsigned char fpu_counter;
 #ifdef CONFIG_BLK_DEV_IO_TRACE
 	unsigned int btrace_seq;
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 5812c13a4e636da4bd7f7cabbbbc59d9dbf3c86c Mon Sep 17 00:00:00 2001
From: Milo Kim <milo.kim@ti.com>
Date: Tue, 12 Nov 2013 15:08:57 -0800
Subject: backlight: lp855x_bl: support new LP8555 device

LP8555 is one of the LP855x family devices.

This device needs pre_init_device() and post_init_device() driver
structure.  It's same as LP8557, so the device configuration code is
shared with LP8557.  Backlight outputs are generated from dual DC-DC boost
converters.  It's configurable EPROM settings which are defined in the
platform data.

Driver documentation and device tree bindings are updated.

Signed-off-by: Milo Kim <milo.kim@ti.com>
Signed-off-by: Jingoo Han <jg1.han@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/backlight/lp855x-driver.txt          |  5 ++--
 .../devicetree/bindings/video/backlight/lp855x.txt | 29 +++++++++++++++++++++-
 drivers/video/backlight/Kconfig                    |  4 +--
 drivers/video/backlight/lp855x_bl.c                | 17 +++++++++++--
 include/linux/platform_data/lp855x.h               | 19 ++++++++++++++
 5 files changed, 67 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/Documentation/backlight/lp855x-driver.txt b/Documentation/backlight/lp855x-driver.txt
index 1c732f0c6758..01bce243d3d7 100644
--- a/Documentation/backlight/lp855x-driver.txt
+++ b/Documentation/backlight/lp855x-driver.txt
@@ -4,7 +4,8 @@ Kernel driver lp855x
 Backlight driver for LP855x ICs
 
 Supported chips:
-	Texas Instruments LP8550, LP8551, LP8552, LP8553, LP8556 and LP8557
+	Texas Instruments LP8550, LP8551, LP8552, LP8553, LP8555, LP8556 and
+	LP8557
 
 Author: Milo(Woogyom) Kim <milo.kim@ti.com>
 
@@ -24,7 +25,7 @@ Value : pwm based or register based
 
 2) chip_id
 The lp855x chip id.
-Value : lp8550/lp8551/lp8552/lp8553/lp8556/lp8557
+Value : lp8550/lp8551/lp8552/lp8553/lp8555/lp8556/lp8557
 
 Platform data for lp855x
 ------------------------
diff --git a/Documentation/devicetree/bindings/video/backlight/lp855x.txt b/Documentation/devicetree/bindings/video/backlight/lp855x.txt
index 1482103d288f..96e83a56048e 100644
--- a/Documentation/devicetree/bindings/video/backlight/lp855x.txt
+++ b/Documentation/devicetree/bindings/video/backlight/lp855x.txt
@@ -2,7 +2,7 @@ lp855x bindings
 
 Required properties:
   - compatible: "ti,lp8550", "ti,lp8551", "ti,lp8552", "ti,lp8553",
-                "ti,lp8556", "ti,lp8557"
+                "ti,lp8555", "ti,lp8556", "ti,lp8557"
   - reg: I2C slave address (u8)
   - dev-ctrl: Value of DEVICE CONTROL register (u8). It depends on the device.
 
@@ -15,6 +15,33 @@ Optional properties:
 
 Example:
 
+	/* LP8555 */
+	backlight@2c {
+		compatible = "ti,lp8555";
+		reg = <0x2c>;
+
+		dev-ctrl = /bits/ 8 <0x00>;
+		pwm-period = <10000>;
+
+		/* 4V OV, 4 output LED0 string enabled */
+		rom_14h {
+			rom-addr = /bits/ 8 <0x14>;
+			rom-val = /bits/ 8 <0xcf>;
+		};
+
+		/* Heavy smoothing, 24ms ramp time step */
+		rom_15h {
+			rom-addr = /bits/ 8 <0x15>;
+			rom-val = /bits/ 8 <0xc7>;
+		};
+
+		/* 4 output LED1 string enabled */
+		rom_19h {
+			rom-addr = /bits/ 8 <0x19>;
+			rom-val = /bits/ 8 <0x0f>;
+		};
+	};
+
 	/* LP8556 */
 	backlight@2c {
 		compatible = "ti,lp8556";
diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig
index d4a7a351d67c..a65dd063ecad 100644
--- a/drivers/video/backlight/Kconfig
+++ b/drivers/video/backlight/Kconfig
@@ -388,8 +388,8 @@ config BACKLIGHT_LP855X
 	tristate "Backlight driver for TI LP855X"
 	depends on BACKLIGHT_CLASS_DEVICE && I2C
 	help
-	  This supports TI LP8550, LP8551, LP8552, LP8553, LP8556 and LP8557
-	  backlight driver.
+	  This supports TI LP8550, LP8551, LP8552, LP8553, LP8555, LP8556 and
+	  LP8557 backlight driver.
 
 config BACKLIGHT_LP8788
 	tristate "Backlight driver for TI LP8788 MFD"
diff --git a/drivers/video/backlight/lp855x_bl.c b/drivers/video/backlight/lp855x_bl.c
index c0b41f13bd4a..c952175d4113 100644
--- a/drivers/video/backlight/lp855x_bl.c
+++ b/drivers/video/backlight/lp855x_bl.c
@@ -26,13 +26,15 @@
 #define LP8556_EPROM_START		0xA0
 #define LP8556_EPROM_END		0xAF
 
-/* LP8557 Registers */
+/* LP8555/7 Registers */
 #define LP8557_BL_CMD			0x00
 #define LP8557_BL_MASK			0x01
 #define LP8557_BL_ON			0x01
 #define LP8557_BL_OFF			0x00
 #define LP8557_BRIGHTNESS_CTRL		0x04
 #define LP8557_CONFIG			0x10
+#define LP8555_EPROM_START		0x10
+#define LP8555_EPROM_END		0x7A
 #define LP8557_EPROM_START		0x10
 #define LP8557_EPROM_END		0x1E
 
@@ -111,6 +113,10 @@ static bool lp855x_is_valid_rom_area(struct lp855x *lp, u8 addr)
 		start = LP8556_EPROM_START;
 		end = LP8556_EPROM_END;
 		break;
+	case LP8555:
+		start = LP8555_EPROM_START;
+		end = LP8555_EPROM_END;
+		break;
 	case LP8557:
 		start = LP8557_EPROM_START;
 		end = LP8557_EPROM_END;
@@ -165,9 +171,14 @@ static int lp855x_configure(struct lp855x *lp)
 	struct lp855x_platform_data *pd = lp->pdata;
 
 	switch (lp->chip_id) {
-	case LP8550 ... LP8556:
+	case LP8550:
+	case LP8551:
+	case LP8552:
+	case LP8553:
+	case LP8556:
 		lp->cfg = &lp855x_dev_cfg;
 		break;
+	case LP8555:
 	case LP8557:
 		lp->cfg = &lp8557_dev_cfg;
 		break;
@@ -470,6 +481,7 @@ static const struct of_device_id lp855x_dt_ids[] = {
 	{ .compatible = "ti,lp8551", },
 	{ .compatible = "ti,lp8552", },
 	{ .compatible = "ti,lp8553", },
+	{ .compatible = "ti,lp8555", },
 	{ .compatible = "ti,lp8556", },
 	{ .compatible = "ti,lp8557", },
 	{ }
@@ -481,6 +493,7 @@ static const struct i2c_device_id lp855x_ids[] = {
 	{"lp8551", LP8551},
 	{"lp8552", LP8552},
 	{"lp8553", LP8553},
+	{"lp8555", LP8555},
 	{"lp8556", LP8556},
 	{"lp8557", LP8557},
 	{ }
diff --git a/include/linux/platform_data/lp855x.h b/include/linux/platform_data/lp855x.h
index ea3200527dd3..1b2ba24e4e03 100644
--- a/include/linux/platform_data/lp855x.h
+++ b/include/linux/platform_data/lp855x.h
@@ -40,6 +40,17 @@
 #define LP8553_PWM_CONFIG	LP8550_PWM_CONFIG
 #define LP8553_I2C_CONFIG	LP8550_I2C_CONFIG
 
+/* CONFIG register - LP8555 */
+#define LP8555_PWM_STANDBY	BIT(7)
+#define LP8555_PWM_FILTER	BIT(6)
+#define LP8555_RELOAD_EPROM	BIT(3)	/* use it if EPROMs should be reset
+					   when the backlight turns on */
+#define LP8555_OFF_OPENLEDS	BIT(2)
+#define LP8555_PWM_CONFIG	LP8555_PWM_ONLY
+#define LP8555_I2C_CONFIG	LP8555_I2C_ONLY
+#define LP8555_COMB1_CONFIG	LP8555_COMBINED1
+#define LP8555_COMB2_CONFIG	LP8555_COMBINED2
+
 /* DEVICE CONTROL register - LP8556 */
 #define LP8556_PWM_CONFIG	(LP8556_PWM_ONLY << BRT_MODE_SHFT)
 #define LP8556_COMB1_CONFIG	(LP8556_COMBINED1 << BRT_MODE_SHFT)
@@ -65,6 +76,7 @@ enum lp855x_chip_id {
 	LP8551,
 	LP8552,
 	LP8553,
+	LP8555,
 	LP8556,
 	LP8557,
 };
@@ -89,6 +101,13 @@ enum lp8553_brighntess_source {
 	LP8553_I2C_ONLY = LP8550_I2C_ONLY,
 };
 
+enum lp8555_brightness_source {
+	LP8555_PWM_ONLY,
+	LP8555_I2C_ONLY,
+	LP8555_COMBINED1,	/* Brightness register with shaped PWM */
+	LP8555_COMBINED2,	/* PWM with shaped brightness register */
+};
+
 enum lp8556_brightness_source {
 	LP8556_PWM_ONLY,
 	LP8556_COMBINED1,	/* pwm + i2c before the shaper block */
-- 
cgit v1.2.3-59-g8ed1b


From 28e64a68a2ef1c48f30e8b6803725199929069fc Mon Sep 17 00:00:00 2001
From: Daniel Jeong <gshark.jeong@gmail.com>
Date: Tue, 12 Nov 2013 15:08:58 -0800
Subject: backlight: lm3630: apply chip revision

The LM3630 chip was revised by TI and chip name was also changed to
LM3630A.  And register map, default values and initial sequences are
changed.  The files, lm3630_bl.{c,h} are replaced by lm3630a_bl.{c,h} You
can find more information about LM3630A(datasheet, evm etc) at
http://www.ti.com/product/lm3630a

Signed-off-by: Daniel Jeong <gshark.jeong@gmail.com>
Signed-off-by: Jingoo Han <jg1.han@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/backlight/Kconfig          |   6 +-
 drivers/video/backlight/Makefile         |   2 +-
 drivers/video/backlight/lm3630_bl.c      | 475 ------------------------------
 drivers/video/backlight/lm3630a_bl.c     | 483 +++++++++++++++++++++++++++++++
 include/linux/platform_data/lm3630_bl.h  |  57 ----
 include/linux/platform_data/lm3630a_bl.h |  65 +++++
 6 files changed, 552 insertions(+), 536 deletions(-)
 delete mode 100644 drivers/video/backlight/lm3630_bl.c
 create mode 100644 drivers/video/backlight/lm3630a_bl.c
 delete mode 100644 include/linux/platform_data/lm3630_bl.h
 create mode 100644 include/linux/platform_data/lm3630a_bl.h

(limited to 'include')

diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig
index a65dd063ecad..5a3eb2ecb525 100644
--- a/drivers/video/backlight/Kconfig
+++ b/drivers/video/backlight/Kconfig
@@ -368,12 +368,12 @@ config BACKLIGHT_AAT2870
 	  If you have a AnalogicTech AAT2870 say Y to enable the
 	  backlight driver.
 
-config BACKLIGHT_LM3630
-	tristate "Backlight Driver for LM3630"
+config BACKLIGHT_LM3630A
+	tristate "Backlight Driver for LM3630A"
 	depends on BACKLIGHT_CLASS_DEVICE && I2C
 	select REGMAP_I2C
 	help
-	  This supports TI LM3630 Backlight Driver
+	  This supports TI LM3630A Backlight Driver
 
 config BACKLIGHT_LM3639
 	tristate "Backlight Driver for LM3639"
diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile
index 38e1babb1946..bb820024f346 100644
--- a/drivers/video/backlight/Makefile
+++ b/drivers/video/backlight/Makefile
@@ -37,7 +37,7 @@ obj-$(CONFIG_BACKLIGHT_GPIO)		+= gpio_backlight.o
 obj-$(CONFIG_BACKLIGHT_HP680)		+= hp680_bl.o
 obj-$(CONFIG_BACKLIGHT_HP700)		+= jornada720_bl.o
 obj-$(CONFIG_BACKLIGHT_LM3533)		+= lm3533_bl.o
-obj-$(CONFIG_BACKLIGHT_LM3630)		+= lm3630_bl.o
+obj-$(CONFIG_BACKLIGHT_LM3630A)		+= lm3630a_bl.o
 obj-$(CONFIG_BACKLIGHT_LM3639)		+= lm3639_bl.o
 obj-$(CONFIG_BACKLIGHT_LOCOMO)		+= locomolcd.o
 obj-$(CONFIG_BACKLIGHT_LP855X)		+= lp855x_bl.o
diff --git a/drivers/video/backlight/lm3630_bl.c b/drivers/video/backlight/lm3630_bl.c
deleted file mode 100644
index 76a62e978fc3..000000000000
--- a/drivers/video/backlight/lm3630_bl.c
+++ /dev/null
@@ -1,475 +0,0 @@
-/*
-* Simple driver for Texas Instruments LM3630 Backlight driver chip
-* Copyright (C) 2012 Texas Instruments
-*
-* This program is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License version 2 as
-* published by the Free Software Foundation.
-*
-*/
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/i2c.h>
-#include <linux/backlight.h>
-#include <linux/err.h>
-#include <linux/delay.h>
-#include <linux/uaccess.h>
-#include <linux/interrupt.h>
-#include <linux/regmap.h>
-#include <linux/platform_data/lm3630_bl.h>
-
-#define REG_CTRL	0x00
-#define REG_CONFIG	0x01
-#define REG_BRT_A	0x03
-#define REG_BRT_B	0x04
-#define REG_INT_STATUS	0x09
-#define REG_INT_EN	0x0A
-#define REG_FAULT	0x0B
-#define REG_PWM_OUTLOW	0x12
-#define REG_PWM_OUTHIGH	0x13
-#define REG_MAX		0x1F
-
-#define INT_DEBOUNCE_MSEC	10
-
-enum lm3630_leds {
-	BLED_ALL = 0,
-	BLED_1,
-	BLED_2
-};
-
-static const char * const bled_name[] = {
-	[BLED_ALL] = "lm3630_bled",	/*Bank1 controls all string */
-	[BLED_1] = "lm3630_bled1",	/*Bank1 controls bled1 */
-	[BLED_2] = "lm3630_bled2",	/*Bank1 or 2 controls bled2 */
-};
-
-struct lm3630_chip_data {
-	struct device *dev;
-	struct delayed_work work;
-	int irq;
-	struct workqueue_struct *irqthread;
-	struct lm3630_platform_data *pdata;
-	struct backlight_device *bled1;
-	struct backlight_device *bled2;
-	struct regmap *regmap;
-};
-
-/* initialize chip */
-static int lm3630_chip_init(struct lm3630_chip_data *pchip)
-{
-	int ret;
-	unsigned int reg_val;
-	struct lm3630_platform_data *pdata = pchip->pdata;
-
-	/*pwm control */
-	reg_val = ((pdata->pwm_active & 0x01) << 2) | (pdata->pwm_ctrl & 0x03);
-	ret = regmap_update_bits(pchip->regmap, REG_CONFIG, 0x07, reg_val);
-	if (ret < 0)
-		goto out;
-
-	/* bank control */
-	reg_val = ((pdata->bank_b_ctrl & 0x01) << 1) |
-			(pdata->bank_a_ctrl & 0x07);
-	ret = regmap_update_bits(pchip->regmap, REG_CTRL, 0x07, reg_val);
-	if (ret < 0)
-		goto out;
-
-	ret = regmap_update_bits(pchip->regmap, REG_CTRL, 0x80, 0x00);
-	if (ret < 0)
-		goto out;
-
-	/* set initial brightness */
-	if (pdata->bank_a_ctrl != BANK_A_CTRL_DISABLE) {
-		ret = regmap_write(pchip->regmap,
-				   REG_BRT_A, pdata->init_brt_led1);
-		if (ret < 0)
-			goto out;
-	}
-
-	if (pdata->bank_b_ctrl != BANK_B_CTRL_DISABLE) {
-		ret = regmap_write(pchip->regmap,
-				   REG_BRT_B, pdata->init_brt_led2);
-		if (ret < 0)
-			goto out;
-	}
-	return ret;
-
-out:
-	dev_err(pchip->dev, "i2c failed to access register\n");
-	return ret;
-}
-
-/* interrupt handling */
-static void lm3630_delayed_func(struct work_struct *work)
-{
-	int ret;
-	unsigned int reg_val;
-	struct lm3630_chip_data *pchip;
-
-	pchip = container_of(work, struct lm3630_chip_data, work.work);
-
-	ret = regmap_read(pchip->regmap, REG_INT_STATUS, &reg_val);
-	if (ret < 0) {
-		dev_err(pchip->dev,
-			"i2c failed to access REG_INT_STATUS Register\n");
-		return;
-	}
-
-	dev_info(pchip->dev, "REG_INT_STATUS Register is 0x%x\n", reg_val);
-}
-
-static irqreturn_t lm3630_isr_func(int irq, void *chip)
-{
-	int ret;
-	struct lm3630_chip_data *pchip = chip;
-	unsigned long delay = msecs_to_jiffies(INT_DEBOUNCE_MSEC);
-
-	queue_delayed_work(pchip->irqthread, &pchip->work, delay);
-
-	ret = regmap_update_bits(pchip->regmap, REG_CTRL, 0x80, 0x00);
-	if (ret < 0)
-		goto out;
-
-	return IRQ_HANDLED;
-out:
-	dev_err(pchip->dev, "i2c failed to access register\n");
-	return IRQ_HANDLED;
-}
-
-static int lm3630_intr_config(struct lm3630_chip_data *pchip)
-{
-	INIT_DELAYED_WORK(&pchip->work, lm3630_delayed_func);
-	pchip->irqthread = create_singlethread_workqueue("lm3630-irqthd");
-	if (!pchip->irqthread) {
-		dev_err(pchip->dev, "create irq thread fail...\n");
-		return -1;
-	}
-	if (request_threaded_irq
-	    (pchip->irq, NULL, lm3630_isr_func,
-	     IRQF_TRIGGER_FALLING | IRQF_ONESHOT, "lm3630_irq", pchip)) {
-		dev_err(pchip->dev, "request threaded irq fail..\n");
-		return -1;
-	}
-	return 0;
-}
-
-static bool
-set_intensity(struct backlight_device *bl, struct lm3630_chip_data *pchip)
-{
-	if (!pchip->pdata->pwm_set_intensity)
-		return false;
-	pchip->pdata->pwm_set_intensity(bl->props.brightness - 1,
-					pchip->pdata->pwm_period);
-	return true;
-}
-
-/* update and get brightness */
-static int lm3630_bank_a_update_status(struct backlight_device *bl)
-{
-	int ret;
-	struct lm3630_chip_data *pchip = bl_get_data(bl);
-	enum lm3630_pwm_ctrl pwm_ctrl = pchip->pdata->pwm_ctrl;
-
-	/* brightness 0 means disable */
-	if (!bl->props.brightness) {
-		ret = regmap_update_bits(pchip->regmap, REG_CTRL, 0x04, 0x00);
-		if (ret < 0)
-			goto out;
-		return bl->props.brightness;
-	}
-
-	/* pwm control */
-	if (pwm_ctrl == PWM_CTRL_BANK_A || pwm_ctrl == PWM_CTRL_BANK_ALL) {
-		if (!set_intensity(bl, pchip))
-			dev_err(pchip->dev, "No pwm control func. in plat-data\n");
-	} else {
-
-		/* i2c control */
-		ret = regmap_update_bits(pchip->regmap, REG_CTRL, 0x80, 0x00);
-		if (ret < 0)
-			goto out;
-		mdelay(1);
-		ret = regmap_write(pchip->regmap,
-				   REG_BRT_A, bl->props.brightness - 1);
-		if (ret < 0)
-			goto out;
-	}
-	return bl->props.brightness;
-out:
-	dev_err(pchip->dev, "i2c failed to access REG_CTRL\n");
-	return bl->props.brightness;
-}
-
-static int lm3630_bank_a_get_brightness(struct backlight_device *bl)
-{
-	unsigned int reg_val;
-	int brightness, ret;
-	struct lm3630_chip_data *pchip = bl_get_data(bl);
-	enum lm3630_pwm_ctrl pwm_ctrl = pchip->pdata->pwm_ctrl;
-
-	if (pwm_ctrl == PWM_CTRL_BANK_A || pwm_ctrl == PWM_CTRL_BANK_ALL) {
-		ret = regmap_read(pchip->regmap, REG_PWM_OUTHIGH, &reg_val);
-		if (ret < 0)
-			goto out;
-		brightness = reg_val & 0x01;
-		ret = regmap_read(pchip->regmap, REG_PWM_OUTLOW, &reg_val);
-		if (ret < 0)
-			goto out;
-		brightness = ((brightness << 8) | reg_val) + 1;
-	} else {
-		ret = regmap_update_bits(pchip->regmap, REG_CTRL, 0x80, 0x00);
-		if (ret < 0)
-			goto out;
-		mdelay(1);
-		ret = regmap_read(pchip->regmap, REG_BRT_A, &reg_val);
-		if (ret < 0)
-			goto out;
-		brightness = reg_val + 1;
-	}
-	bl->props.brightness = brightness;
-	return bl->props.brightness;
-out:
-	dev_err(pchip->dev, "i2c failed to access register\n");
-	return 0;
-}
-
-static const struct backlight_ops lm3630_bank_a_ops = {
-	.options = BL_CORE_SUSPENDRESUME,
-	.update_status = lm3630_bank_a_update_status,
-	.get_brightness = lm3630_bank_a_get_brightness,
-};
-
-static int lm3630_bank_b_update_status(struct backlight_device *bl)
-{
-	int ret;
-	struct lm3630_chip_data *pchip = bl_get_data(bl);
-	enum lm3630_pwm_ctrl pwm_ctrl = pchip->pdata->pwm_ctrl;
-
-	if (pwm_ctrl == PWM_CTRL_BANK_B || pwm_ctrl == PWM_CTRL_BANK_ALL) {
-		if (!set_intensity(bl, pchip))
-			dev_err(pchip->dev,
-				"no pwm control func. in plat-data\n");
-	} else {
-		ret = regmap_update_bits(pchip->regmap, REG_CTRL, 0x80, 0x00);
-		if (ret < 0)
-			goto out;
-		mdelay(1);
-		ret = regmap_write(pchip->regmap,
-				   REG_BRT_B, bl->props.brightness - 1);
-	}
-	return bl->props.brightness;
-out:
-	dev_err(pchip->dev, "i2c failed to access register\n");
-	return bl->props.brightness;
-}
-
-static int lm3630_bank_b_get_brightness(struct backlight_device *bl)
-{
-	unsigned int reg_val;
-	int brightness, ret;
-	struct lm3630_chip_data *pchip = bl_get_data(bl);
-	enum lm3630_pwm_ctrl pwm_ctrl = pchip->pdata->pwm_ctrl;
-
-	if (pwm_ctrl == PWM_CTRL_BANK_B || pwm_ctrl == PWM_CTRL_BANK_ALL) {
-		ret = regmap_read(pchip->regmap, REG_PWM_OUTHIGH, &reg_val);
-		if (ret < 0)
-			goto out;
-		brightness = reg_val & 0x01;
-		ret = regmap_read(pchip->regmap, REG_PWM_OUTLOW, &reg_val);
-		if (ret < 0)
-			goto out;
-		brightness = ((brightness << 8) | reg_val) + 1;
-	} else {
-		ret = regmap_update_bits(pchip->regmap, REG_CTRL, 0x80, 0x00);
-		if (ret < 0)
-			goto out;
-		mdelay(1);
-		ret = regmap_read(pchip->regmap, REG_BRT_B, &reg_val);
-		if (ret < 0)
-			goto out;
-		brightness = reg_val + 1;
-	}
-	bl->props.brightness = brightness;
-
-	return bl->props.brightness;
-out:
-	dev_err(pchip->dev, "i2c failed to access register\n");
-	return bl->props.brightness;
-}
-
-static const struct backlight_ops lm3630_bank_b_ops = {
-	.options = BL_CORE_SUSPENDRESUME,
-	.update_status = lm3630_bank_b_update_status,
-	.get_brightness = lm3630_bank_b_get_brightness,
-};
-
-static int lm3630_backlight_register(struct lm3630_chip_data *pchip,
-				     enum lm3630_leds ledno)
-{
-	const char *name = bled_name[ledno];
-	struct backlight_properties props;
-	struct lm3630_platform_data *pdata = pchip->pdata;
-
-	props.type = BACKLIGHT_RAW;
-	switch (ledno) {
-	case BLED_1:
-	case BLED_ALL:
-		props.brightness = pdata->init_brt_led1;
-		props.max_brightness = pdata->max_brt_led1;
-		pchip->bled1 =
-		    backlight_device_register(name, pchip->dev, pchip,
-					      &lm3630_bank_a_ops, &props);
-		if (IS_ERR(pchip->bled1))
-			return PTR_ERR(pchip->bled1);
-		break;
-	case BLED_2:
-		props.brightness = pdata->init_brt_led2;
-		props.max_brightness = pdata->max_brt_led2;
-		pchip->bled2 =
-		    backlight_device_register(name, pchip->dev, pchip,
-					      &lm3630_bank_b_ops, &props);
-		if (IS_ERR(pchip->bled2))
-			return PTR_ERR(pchip->bled2);
-		break;
-	}
-	return 0;
-}
-
-static void lm3630_backlight_unregister(struct lm3630_chip_data *pchip)
-{
-	if (pchip->bled1)
-		backlight_device_unregister(pchip->bled1);
-	if (pchip->bled2)
-		backlight_device_unregister(pchip->bled2);
-}
-
-static const struct regmap_config lm3630_regmap = {
-	.reg_bits = 8,
-	.val_bits = 8,
-	.max_register = REG_MAX,
-};
-
-static int lm3630_probe(struct i2c_client *client,
-				  const struct i2c_device_id *id)
-{
-	struct lm3630_platform_data *pdata = client->dev.platform_data;
-	struct lm3630_chip_data *pchip;
-	int ret;
-
-	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
-		dev_err(&client->dev, "fail : i2c functionality check...\n");
-		return -EOPNOTSUPP;
-	}
-
-	if (pdata == NULL) {
-		dev_err(&client->dev, "fail : no platform data.\n");
-		return -ENODATA;
-	}
-
-	pchip = devm_kzalloc(&client->dev, sizeof(struct lm3630_chip_data),
-			     GFP_KERNEL);
-	if (!pchip)
-		return -ENOMEM;
-	pchip->pdata = pdata;
-	pchip->dev = &client->dev;
-
-	pchip->regmap = devm_regmap_init_i2c(client, &lm3630_regmap);
-	if (IS_ERR(pchip->regmap)) {
-		ret = PTR_ERR(pchip->regmap);
-		dev_err(&client->dev, "fail : allocate register map: %d\n",
-			ret);
-		return ret;
-	}
-	i2c_set_clientdata(client, pchip);
-
-	/* chip initialize */
-	ret = lm3630_chip_init(pchip);
-	if (ret < 0) {
-		dev_err(&client->dev, "fail : init chip\n");
-		goto err_chip_init;
-	}
-
-	switch (pdata->bank_a_ctrl) {
-	case BANK_A_CTRL_ALL:
-		ret = lm3630_backlight_register(pchip, BLED_ALL);
-		pdata->bank_b_ctrl = BANK_B_CTRL_DISABLE;
-		break;
-	case BANK_A_CTRL_LED1:
-		ret = lm3630_backlight_register(pchip, BLED_1);
-		break;
-	case BANK_A_CTRL_LED2:
-		ret = lm3630_backlight_register(pchip, BLED_2);
-		pdata->bank_b_ctrl = BANK_B_CTRL_DISABLE;
-		break;
-	default:
-		break;
-	}
-
-	if (ret < 0)
-		goto err_bl_reg;
-
-	if (pdata->bank_b_ctrl && pchip->bled2 == NULL) {
-		ret = lm3630_backlight_register(pchip, BLED_2);
-		if (ret < 0)
-			goto err_bl_reg;
-	}
-
-	/* interrupt enable  : irq 0 is not allowed for lm3630 */
-	pchip->irq = client->irq;
-	if (pchip->irq)
-		lm3630_intr_config(pchip);
-
-	dev_info(&client->dev, "LM3630 backlight register OK.\n");
-	return 0;
-
-err_bl_reg:
-	dev_err(&client->dev, "fail : backlight register.\n");
-	lm3630_backlight_unregister(pchip);
-err_chip_init:
-	return ret;
-}
-
-static int lm3630_remove(struct i2c_client *client)
-{
-	int ret;
-	struct lm3630_chip_data *pchip = i2c_get_clientdata(client);
-
-	ret = regmap_write(pchip->regmap, REG_BRT_A, 0);
-	if (ret < 0)
-		dev_err(pchip->dev, "i2c failed to access register\n");
-
-	ret = regmap_write(pchip->regmap, REG_BRT_B, 0);
-	if (ret < 0)
-		dev_err(pchip->dev, "i2c failed to access register\n");
-
-	lm3630_backlight_unregister(pchip);
-	if (pchip->irq) {
-		free_irq(pchip->irq, pchip);
-		flush_workqueue(pchip->irqthread);
-		destroy_workqueue(pchip->irqthread);
-	}
-	return 0;
-}
-
-static const struct i2c_device_id lm3630_id[] = {
-	{LM3630_NAME, 0},
-	{}
-};
-
-MODULE_DEVICE_TABLE(i2c, lm3630_id);
-
-static struct i2c_driver lm3630_i2c_driver = {
-	.driver = {
-		   .name = LM3630_NAME,
-		   },
-	.probe = lm3630_probe,
-	.remove = lm3630_remove,
-	.id_table = lm3630_id,
-};
-
-module_i2c_driver(lm3630_i2c_driver);
-
-MODULE_DESCRIPTION("Texas Instruments Backlight driver for LM3630");
-MODULE_AUTHOR("G.Shark Jeong <gshark.jeong@gmail.com>");
-MODULE_AUTHOR("Daniel Jeong <daniel.jeong@ti.com>");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/video/backlight/lm3630a_bl.c b/drivers/video/backlight/lm3630a_bl.c
new file mode 100644
index 000000000000..cf40cc8e662b
--- /dev/null
+++ b/drivers/video/backlight/lm3630a_bl.c
@@ -0,0 +1,483 @@
+/*
+* Simple driver for Texas Instruments LM3630A Backlight driver chip
+* Copyright (C) 2012 Texas Instruments
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License version 2 as
+* published by the Free Software Foundation.
+*
+*/
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/backlight.h>
+#include <linux/err.h>
+#include <linux/delay.h>
+#include <linux/uaccess.h>
+#include <linux/interrupt.h>
+#include <linux/regmap.h>
+#include <linux/pwm.h>
+#include <linux/platform_data/lm3630a_bl.h>
+
+#define REG_CTRL	0x00
+#define REG_BOOST	0x02
+#define REG_CONFIG	0x01
+#define REG_BRT_A	0x03
+#define REG_BRT_B	0x04
+#define REG_I_A		0x05
+#define REG_I_B		0x06
+#define REG_INT_STATUS	0x09
+#define REG_INT_EN	0x0A
+#define REG_FAULT	0x0B
+#define REG_PWM_OUTLOW	0x12
+#define REG_PWM_OUTHIGH	0x13
+#define REG_MAX		0x1F
+
+#define INT_DEBOUNCE_MSEC	10
+struct lm3630a_chip {
+	struct device *dev;
+	struct delayed_work work;
+
+	int irq;
+	struct workqueue_struct *irqthread;
+	struct lm3630a_platform_data *pdata;
+	struct backlight_device *bleda;
+	struct backlight_device *bledb;
+	struct regmap *regmap;
+	struct pwm_device *pwmd;
+};
+
+/* i2c access */
+static int lm3630a_read(struct lm3630a_chip *pchip, unsigned int reg)
+{
+	int rval;
+	unsigned int reg_val;
+
+	rval = regmap_read(pchip->regmap, reg, &reg_val);
+	if (rval < 0)
+		return rval;
+	return reg_val & 0xFF;
+}
+
+static int lm3630a_write(struct lm3630a_chip *pchip,
+			 unsigned int reg, unsigned int data)
+{
+	return regmap_write(pchip->regmap, reg, data);
+}
+
+static int lm3630a_update(struct lm3630a_chip *pchip,
+			  unsigned int reg, unsigned int mask,
+			  unsigned int data)
+{
+	return regmap_update_bits(pchip->regmap, reg, mask, data);
+}
+
+/* initialize chip */
+static int lm3630a_chip_init(struct lm3630a_chip *pchip)
+{
+	int rval;
+	struct lm3630a_platform_data *pdata = pchip->pdata;
+
+	usleep_range(1000, 2000);
+	/* set Filter Strength Register */
+	rval = lm3630a_write(pchip, 0x50, 0x03);
+	/* set Cofig. register */
+	rval |= lm3630a_update(pchip, REG_CONFIG, 0x07, pdata->pwm_ctrl);
+	/* set boost control */
+	rval |= lm3630a_write(pchip, REG_BOOST, 0x38);
+	/* set current A */
+	rval |= lm3630a_update(pchip, REG_I_A, 0x1F, 0x1F);
+	/* set current B */
+	rval |= lm3630a_write(pchip, REG_I_B, 0x1F);
+	/* set control */
+	rval |=
+	    lm3630a_write(pchip, REG_CTRL, pdata->leda_ctrl | pdata->ledb_ctrl);
+	usleep_range(1000, 2000);
+	/* set brightness A and B */
+	rval |= lm3630a_write(pchip, REG_BRT_A, pdata->leda_init_brt);
+	rval |= lm3630a_write(pchip, REG_BRT_B, pdata->ledb_init_brt);
+
+	if (rval < 0)
+		dev_err(pchip->dev, "i2c failed to access register\n");
+	return rval;
+}
+
+/* interrupt handling */
+static void lm3630a_delayed_func(struct work_struct *work)
+{
+	unsigned int rval;
+	struct lm3630a_chip *pchip;
+
+	pchip = container_of(work, struct lm3630a_chip, work.work);
+
+	rval = lm3630a_read(pchip, REG_INT_STATUS);
+	if (rval < 0) {
+		dev_err(pchip->dev,
+			"i2c failed to access REG_INT_STATUS Register\n");
+		return;
+	}
+
+	dev_info(pchip->dev, "REG_INT_STATUS Register is 0x%x\n", rval);
+}
+
+static irqreturn_t lm3630a_isr_func(int irq, void *chip)
+{
+	int rval;
+	struct lm3630a_chip *pchip = chip;
+	unsigned long delay = msecs_to_jiffies(INT_DEBOUNCE_MSEC);
+
+	queue_delayed_work(pchip->irqthread, &pchip->work, delay);
+
+	rval = lm3630a_update(pchip, REG_CTRL, 0x80, 0x00);
+	if (rval < 0) {
+		dev_err(pchip->dev, "i2c failed to access register\n");
+		return IRQ_NONE;
+	}
+	return IRQ_HANDLED;
+}
+
+static int lm3630a_intr_config(struct lm3630a_chip *pchip)
+{
+	int rval;
+
+	rval = lm3630a_write(pchip, REG_INT_EN, 0x87);
+	if (rval < 0)
+		return rval;
+
+	INIT_DELAYED_WORK(&pchip->work, lm3630a_delayed_func);
+	pchip->irqthread = create_singlethread_workqueue("lm3630a-irqthd");
+	if (!pchip->irqthread) {
+		dev_err(pchip->dev, "create irq thread fail\n");
+		return -ENOMEM;
+	}
+	if (request_threaded_irq
+	    (pchip->irq, NULL, lm3630a_isr_func,
+	     IRQF_TRIGGER_FALLING | IRQF_ONESHOT, "lm3630a_irq", pchip)) {
+		dev_err(pchip->dev, "request threaded irq fail\n");
+		return -ENOMEM;
+	}
+	return rval;
+}
+
+static void lm3630a_pwm_ctrl(struct lm3630a_chip *pchip, int br, int br_max)
+{
+	unsigned int period = pwm_get_period(pchip->pwmd);
+	unsigned int duty = br * period / br_max;
+
+	pwm_config(pchip->pwmd, duty, period);
+	if (duty)
+		pwm_enable(pchip->pwmd);
+	else
+		pwm_disable(pchip->pwmd);
+}
+
+/* update and get brightness */
+static int lm3630a_bank_a_update_status(struct backlight_device *bl)
+{
+	int ret;
+	struct lm3630a_chip *pchip = bl_get_data(bl);
+	enum lm3630a_pwm_ctrl pwm_ctrl = pchip->pdata->pwm_ctrl;
+
+	/* pwm control */
+	if ((pwm_ctrl & LM3630A_PWM_BANK_A) != 0) {
+		lm3630a_pwm_ctrl(pchip, bl->props.brightness,
+				 bl->props.max_brightness);
+		return bl->props.brightness;
+	}
+
+	/* disable sleep */
+	ret = lm3630a_update(pchip, REG_CTRL, 0x80, 0x00);
+	if (ret < 0)
+		goto out_i2c_err;
+	usleep_range(1000, 2000);
+	/* minimum brightness is 0x04 */
+	ret = lm3630a_write(pchip, REG_BRT_A, bl->props.brightness);
+	if (bl->props.brightness < 0x4)
+		ret |= lm3630a_update(pchip, REG_CTRL, LM3630A_LEDA_ENABLE, 0);
+	else
+		ret |= lm3630a_update(pchip, REG_CTRL,
+				      LM3630A_LEDA_ENABLE, LM3630A_LEDA_ENABLE);
+	if (ret < 0)
+		goto out_i2c_err;
+	return bl->props.brightness;
+
+out_i2c_err:
+	dev_err(pchip->dev, "i2c failed to access\n");
+	return bl->props.brightness;
+}
+
+static int lm3630a_bank_a_get_brightness(struct backlight_device *bl)
+{
+	int brightness, rval;
+	struct lm3630a_chip *pchip = bl_get_data(bl);
+	enum lm3630a_pwm_ctrl pwm_ctrl = pchip->pdata->pwm_ctrl;
+
+	if ((pwm_ctrl & LM3630A_PWM_BANK_A) != 0) {
+		rval = lm3630a_read(pchip, REG_PWM_OUTHIGH);
+		if (rval < 0)
+			goto out_i2c_err;
+		brightness = (rval & 0x01) << 8;
+		rval = lm3630a_read(pchip, REG_PWM_OUTLOW);
+		if (rval < 0)
+			goto out_i2c_err;
+		brightness |= rval;
+		goto out;
+	}
+
+	/* disable sleep */
+	rval = lm3630a_update(pchip, REG_CTRL, 0x80, 0x00);
+	if (rval < 0)
+		goto out_i2c_err;
+	usleep_range(1000, 2000);
+	rval = lm3630a_read(pchip, REG_BRT_A);
+	if (rval < 0)
+		goto out_i2c_err;
+	brightness = rval;
+
+out:
+	bl->props.brightness = brightness;
+	return bl->props.brightness;
+out_i2c_err:
+	dev_err(pchip->dev, "i2c failed to access register\n");
+	return 0;
+}
+
+static const struct backlight_ops lm3630a_bank_a_ops = {
+	.options = BL_CORE_SUSPENDRESUME,
+	.update_status = lm3630a_bank_a_update_status,
+	.get_brightness = lm3630a_bank_a_get_brightness,
+};
+
+/* update and get brightness */
+static int lm3630a_bank_b_update_status(struct backlight_device *bl)
+{
+	int ret;
+	struct lm3630a_chip *pchip = bl_get_data(bl);
+	enum lm3630a_pwm_ctrl pwm_ctrl = pchip->pdata->pwm_ctrl;
+
+	/* pwm control */
+	if ((pwm_ctrl & LM3630A_PWM_BANK_B) != 0) {
+		lm3630a_pwm_ctrl(pchip, bl->props.brightness,
+				 bl->props.max_brightness);
+		return bl->props.brightness;
+	}
+
+	/* disable sleep */
+	ret = lm3630a_update(pchip, REG_CTRL, 0x80, 0x00);
+	if (ret < 0)
+		goto out_i2c_err;
+	usleep_range(1000, 2000);
+	/* minimum brightness is 0x04 */
+	ret = lm3630a_write(pchip, REG_BRT_B, bl->props.brightness);
+	if (bl->props.brightness < 0x4)
+		ret |= lm3630a_update(pchip, REG_CTRL, LM3630A_LEDB_ENABLE, 0);
+	else
+		ret |= lm3630a_update(pchip, REG_CTRL,
+				      LM3630A_LEDB_ENABLE, LM3630A_LEDB_ENABLE);
+	if (ret < 0)
+		goto out_i2c_err;
+	return bl->props.brightness;
+
+out_i2c_err:
+	dev_err(pchip->dev, "i2c failed to access REG_CTRL\n");
+	return bl->props.brightness;
+}
+
+static int lm3630a_bank_b_get_brightness(struct backlight_device *bl)
+{
+	int brightness, rval;
+	struct lm3630a_chip *pchip = bl_get_data(bl);
+	enum lm3630a_pwm_ctrl pwm_ctrl = pchip->pdata->pwm_ctrl;
+
+	if ((pwm_ctrl & LM3630A_PWM_BANK_B) != 0) {
+		rval = lm3630a_read(pchip, REG_PWM_OUTHIGH);
+		if (rval < 0)
+			goto out_i2c_err;
+		brightness = (rval & 0x01) << 8;
+		rval = lm3630a_read(pchip, REG_PWM_OUTLOW);
+		if (rval < 0)
+			goto out_i2c_err;
+		brightness |= rval;
+		goto out;
+	}
+
+	/* disable sleep */
+	rval = lm3630a_update(pchip, REG_CTRL, 0x80, 0x00);
+	if (rval < 0)
+		goto out_i2c_err;
+	usleep_range(1000, 2000);
+	rval = lm3630a_read(pchip, REG_BRT_B);
+	if (rval < 0)
+		goto out_i2c_err;
+	brightness = rval;
+
+out:
+	bl->props.brightness = brightness;
+	return bl->props.brightness;
+out_i2c_err:
+	dev_err(pchip->dev, "i2c failed to access register\n");
+	return 0;
+}
+
+static const struct backlight_ops lm3630a_bank_b_ops = {
+	.options = BL_CORE_SUSPENDRESUME,
+	.update_status = lm3630a_bank_b_update_status,
+	.get_brightness = lm3630a_bank_b_get_brightness,
+};
+
+static int lm3630a_backlight_register(struct lm3630a_chip *pchip)
+{
+	struct backlight_properties props;
+	struct lm3630a_platform_data *pdata = pchip->pdata;
+
+	props.type = BACKLIGHT_RAW;
+	if (pdata->leda_ctrl != LM3630A_LEDA_DISABLE) {
+		props.brightness = pdata->leda_init_brt;
+		props.max_brightness = pdata->leda_max_brt;
+		pchip->bleda =
+		    devm_backlight_device_register(pchip->dev, "lm3630a_leda",
+						   pchip->dev, pchip,
+						   &lm3630a_bank_a_ops, &props);
+		if (IS_ERR(pchip->bleda))
+			return PTR_ERR(pchip->bleda);
+	}
+
+	if ((pdata->ledb_ctrl != LM3630A_LEDB_DISABLE) &&
+	    (pdata->ledb_ctrl != LM3630A_LEDB_ON_A)) {
+		props.brightness = pdata->ledb_init_brt;
+		props.max_brightness = pdata->ledb_max_brt;
+		pchip->bledb =
+		    devm_backlight_device_register(pchip->dev, "lm3630a_ledb",
+						   pchip->dev, pchip,
+						   &lm3630a_bank_b_ops, &props);
+		if (IS_ERR(pchip->bledb))
+			return PTR_ERR(pchip->bledb);
+	}
+	return 0;
+}
+
+static const struct regmap_config lm3630a_regmap = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.max_register = REG_MAX,
+};
+
+static int lm3630a_probe(struct i2c_client *client,
+			 const struct i2c_device_id *id)
+{
+	struct lm3630a_platform_data *pdata = client->dev.platform_data;
+	struct lm3630a_chip *pchip;
+	int rval;
+
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
+		dev_err(&client->dev, "fail : i2c functionality check\n");
+		return -EOPNOTSUPP;
+	}
+
+	pchip = devm_kzalloc(&client->dev, sizeof(struct lm3630a_chip),
+			     GFP_KERNEL);
+	if (!pchip)
+		return -ENOMEM;
+	pchip->dev = &client->dev;
+
+	pchip->regmap = devm_regmap_init_i2c(client, &lm3630a_regmap);
+	if (IS_ERR(pchip->regmap)) {
+		rval = PTR_ERR(pchip->regmap);
+		dev_err(&client->dev, "fail : allocate reg. map: %d\n", rval);
+		return rval;
+	}
+
+	i2c_set_clientdata(client, pchip);
+	if (pdata == NULL) {
+		pchip->pdata = devm_kzalloc(pchip->dev,
+					    sizeof(struct
+						   lm3630a_platform_data),
+					    GFP_KERNEL);
+		if (pchip->pdata == NULL)
+			return -ENOMEM;
+		/* default values */
+		pchip->pdata->leda_ctrl = LM3630A_LEDA_ENABLE;
+		pchip->pdata->ledb_ctrl = LM3630A_LEDB_ENABLE;
+		pchip->pdata->leda_max_brt = LM3630A_MAX_BRIGHTNESS;
+		pchip->pdata->ledb_max_brt = LM3630A_MAX_BRIGHTNESS;
+		pchip->pdata->leda_init_brt = LM3630A_MAX_BRIGHTNESS;
+		pchip->pdata->ledb_init_brt = LM3630A_MAX_BRIGHTNESS;
+	} else {
+		pchip->pdata = pdata;
+	}
+	/* chip initialize */
+	rval = lm3630a_chip_init(pchip);
+	if (rval < 0) {
+		dev_err(&client->dev, "fail : init chip\n");
+		return rval;
+	}
+	/* backlight register */
+	rval = lm3630a_backlight_register(pchip);
+	if (rval < 0) {
+		dev_err(&client->dev, "fail : backlight register.\n");
+		return rval;
+	}
+	/* pwm */
+	if (pdata->pwm_ctrl != LM3630A_PWM_DISABLE) {
+		pchip->pwmd = devm_pwm_get(pchip->dev, "lm3630a-pwm");
+		if (IS_ERR(pchip->pwmd)) {
+			dev_err(&client->dev, "fail : get pwm device\n");
+			return PTR_ERR(pchip->pwmd);
+		}
+	}
+	pchip->pwmd->period = pdata->pwm_period;
+
+	/* interrupt enable  : irq 0 is not allowed */
+	pchip->irq = client->irq;
+	if (pchip->irq) {
+		rval = lm3630a_intr_config(pchip);
+		if (rval < 0)
+			return rval;
+	}
+	dev_info(&client->dev, "LM3630A backlight register OK.\n");
+	return 0;
+}
+
+static int lm3630a_remove(struct i2c_client *client)
+{
+	int rval;
+	struct lm3630a_chip *pchip = i2c_get_clientdata(client);
+
+	rval = lm3630a_write(pchip, REG_BRT_A, 0);
+	if (rval < 0)
+		dev_err(pchip->dev, "i2c failed to access register\n");
+
+	rval = lm3630a_write(pchip, REG_BRT_B, 0);
+	if (rval < 0)
+		dev_err(pchip->dev, "i2c failed to access register\n");
+
+	if (pchip->irq) {
+		free_irq(pchip->irq, pchip);
+		flush_workqueue(pchip->irqthread);
+		destroy_workqueue(pchip->irqthread);
+	}
+	return 0;
+}
+
+static const struct i2c_device_id lm3630a_id[] = {
+	{LM3630A_NAME, 0},
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, lm3630a_id);
+
+static struct i2c_driver lm3630a_i2c_driver = {
+	.driver = {
+		   .name = LM3630A_NAME,
+		   },
+	.probe = lm3630a_probe,
+	.remove = lm3630a_remove,
+	.id_table = lm3630a_id,
+};
+
+module_i2c_driver(lm3630a_i2c_driver);
+
+MODULE_DESCRIPTION("Texas Instruments Backlight driver for LM3630A");
+MODULE_AUTHOR("Daniel Jeong <gshark.jeong@gmail.com>");
+MODULE_AUTHOR("LDD MLP <ldd-mlp@list.ti.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/platform_data/lm3630_bl.h b/include/linux/platform_data/lm3630_bl.h
deleted file mode 100644
index 9176dd3f2d63..000000000000
--- a/include/linux/platform_data/lm3630_bl.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
-* Simple driver for Texas Instruments LM3630 LED Flash driver chip
-* Copyright (C) 2012 Texas Instruments
-*
-* This program is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License version 2 as
-* published by the Free Software Foundation.
-*
-*/
-
-#ifndef __LINUX_LM3630_H
-#define __LINUX_LM3630_H
-
-#define LM3630_NAME "lm3630_bl"
-
-enum lm3630_pwm_ctrl {
-	PWM_CTRL_DISABLE = 0,
-	PWM_CTRL_BANK_A,
-	PWM_CTRL_BANK_B,
-	PWM_CTRL_BANK_ALL,
-};
-
-enum lm3630_pwm_active {
-	PWM_ACTIVE_HIGH = 0,
-	PWM_ACTIVE_LOW,
-};
-
-enum lm3630_bank_a_ctrl {
-	BANK_A_CTRL_DISABLE = 0x0,
-	BANK_A_CTRL_LED1 = 0x4,
-	BANK_A_CTRL_LED2 = 0x1,
-	BANK_A_CTRL_ALL = 0x5,
-};
-
-enum lm3630_bank_b_ctrl {
-	BANK_B_CTRL_DISABLE = 0,
-	BANK_B_CTRL_LED2,
-};
-
-struct lm3630_platform_data {
-
-	/* maximum brightness */
-	int max_brt_led1;
-	int max_brt_led2;
-
-	/* initial on brightness */
-	int init_brt_led1;
-	int init_brt_led2;
-	enum lm3630_pwm_ctrl pwm_ctrl;
-	enum lm3630_pwm_active pwm_active;
-	enum lm3630_bank_a_ctrl bank_a_ctrl;
-	enum lm3630_bank_b_ctrl bank_b_ctrl;
-	unsigned int pwm_period;
-	void (*pwm_set_intensity) (int brightness, int max_brightness);
-};
-
-#endif /* __LINUX_LM3630_H */
diff --git a/include/linux/platform_data/lm3630a_bl.h b/include/linux/platform_data/lm3630a_bl.h
new file mode 100644
index 000000000000..7538e38e270b
--- /dev/null
+++ b/include/linux/platform_data/lm3630a_bl.h
@@ -0,0 +1,65 @@
+/*
+* Simple driver for Texas Instruments LM3630A LED Flash driver chip
+* Copyright (C) 2012 Texas Instruments
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License version 2 as
+* published by the Free Software Foundation.
+*
+*/
+
+#ifndef __LINUX_LM3630A_H
+#define __LINUX_LM3630A_H
+
+#define LM3630A_NAME "lm3630a_bl"
+
+enum lm3630a_pwm_ctrl {
+	LM3630A_PWM_DISABLE = 0x00,
+	LM3630A_PWM_BANK_A,
+	LM3630A_PWM_BANK_B,
+	LM3630A_PWM_BANK_ALL,
+	LM3630A_PWM_BANK_A_ACT_LOW = 0x05,
+	LM3630A_PWM_BANK_B_ACT_LOW,
+	LM3630A_PWM_BANK_ALL_ACT_LOW,
+};
+
+enum lm3630a_leda_ctrl {
+	LM3630A_LEDA_DISABLE = 0x00,
+	LM3630A_LEDA_ENABLE = 0x04,
+	LM3630A_LEDA_ENABLE_LINEAR = 0x14,
+};
+
+enum lm3630a_ledb_ctrl {
+	LM3630A_LEDB_DISABLE = 0x00,
+	LM3630A_LEDB_ON_A = 0x01,
+	LM3630A_LEDB_ENABLE = 0x02,
+	LM3630A_LEDB_ENABLE_LINEAR = 0x0A,
+};
+
+#define LM3630A_MAX_BRIGHTNESS 255
+/*
+ *@leda_init_brt : led a init brightness. 4~255
+ *@leda_max_brt  : led a max brightness.  4~255
+ *@leda_ctrl     : led a disable, enable linear, enable exponential
+ *@ledb_init_brt : led b init brightness. 4~255
+ *@ledb_max_brt  : led b max brightness.  4~255
+ *@ledb_ctrl     : led b disable, enable linear, enable exponential
+ *@pwm_period    : pwm period
+ *@pwm_ctrl      : pwm disable, bank a or b, active high or low
+ */
+struct lm3630a_platform_data {
+
+	/* led a config.  */
+	int leda_init_brt;
+	int leda_max_brt;
+	enum lm3630a_leda_ctrl leda_ctrl;
+	/* led b config. */
+	int ledb_init_brt;
+	int ledb_max_brt;
+	enum lm3630a_ledb_ctrl ledb_ctrl;
+	/* pwm config. */
+	unsigned int pwm_period;
+	enum lm3630a_pwm_ctrl pwm_ctrl;
+};
+
+#endif /* __LINUX_LM3630A_H */
-- 
cgit v1.2.3-59-g8ed1b


From ec778edf97dcaa517e5b0eea6f1a9ba9d476e4a8 Mon Sep 17 00:00:00 2001
From: Cody P Schafer <cody@linux.vnet.ibm.com>
Date: Tue, 12 Nov 2013 15:09:48 -0800
Subject: bitops/find: clarify and extend documentation

Add return value documentation and clarify the units of the @size
parameter.

Signed-off-by: Cody P Schafer <cody@linux.vnet.ibm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Cody P Schafer <cody@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/bitops/find.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/bitops/find.h b/include/asm-generic/bitops/find.h
index 71c778033f57..998d4d544f18 100644
--- a/include/asm-generic/bitops/find.h
+++ b/include/asm-generic/bitops/find.h
@@ -7,6 +7,9 @@
  * @addr: The address to base the search on
  * @offset: The bitnumber to start searching at
  * @size: The bitmap size in bits
+ *
+ * Returns the bit number for the next set bit
+ * If no bits are set, returns @size.
  */
 extern unsigned long find_next_bit(const unsigned long *addr, unsigned long
 		size, unsigned long offset);
@@ -18,6 +21,9 @@ extern unsigned long find_next_bit(const unsigned long *addr, unsigned long
  * @addr: The address to base the search on
  * @offset: The bitnumber to start searching at
  * @size: The bitmap size in bits
+ *
+ * Returns the bit number of the next zero bit
+ * If no bits are zero, returns @size.
  */
 extern unsigned long find_next_zero_bit(const unsigned long *addr, unsigned
 		long size, unsigned long offset);
@@ -28,9 +34,10 @@ extern unsigned long find_next_zero_bit(const unsigned long *addr, unsigned
 /**
  * find_first_bit - find the first set bit in a memory region
  * @addr: The address to start the search at
- * @size: The maximum size to search
+ * @size: The maximum number of bits to search
  *
  * Returns the bit number of the first set bit.
+ * If no bits are set, returns @size.
  */
 extern unsigned long find_first_bit(const unsigned long *addr,
 				    unsigned long size);
@@ -38,9 +45,10 @@ extern unsigned long find_first_bit(const unsigned long *addr,
 /**
  * find_first_zero_bit - find the first cleared bit in a memory region
  * @addr: The address to start the search at
- * @size: The maximum size to search
+ * @size: The maximum number of bits to search
  *
  * Returns the bit number of the first cleared bit.
+ * If no bits are zero, returns @size.
  */
 extern unsigned long find_first_zero_bit(const unsigned long *addr,
 					 unsigned long size);
-- 
cgit v1.2.3-59-g8ed1b


From 684f0d3d14f2744ae7ad79063b21909e32bf444e Mon Sep 17 00:00:00 2001
From: Nicolin Chen <b42378@freescale.com>
Date: Tue, 12 Nov 2013 15:09:52 -0800
Subject: lib/genalloc: add a helper function for DMA buffer allocation

When using pool space for DMA buffer, there might be duplicated calling of
gen_pool_alloc() and gen_pool_virt_to_phys() in each implementation.

Thus it's better to add a simple helper function, a compatible one to the
common dma_alloc_coherent(), to save some code.

Signed-off-by: Nicolin Chen <b42378@freescale.com>
Cc: "Hans J. Koch" <hjk@hansjkoch.de>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Eric Miao <eric.y.miao@gmail.com>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Haojian Zhuang <haojian.zhuang@gmail.com>
Cc: Jaroslav Kysela <perex@perex.cz>
Cc: Kevin Hilman <khilman@deeprootsystems.com>
Cc: Liam Girdwood <lgirdwood@gmail.com>
Cc: Mark Brown <broonie@kernel.org>
Cc: Mauro Carvalho Chehab <m.chehab@samsung.com>
Cc: Rob Herring <rob.herring@calxeda.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Sekhar Nori <nsekhar@ti.com>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/genalloc.h |  2 ++
 lib/genalloc.c           | 28 ++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+)

(limited to 'include')

diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h
index f8d41cb1cbe0..1eda33d7cb10 100644
--- a/include/linux/genalloc.h
+++ b/include/linux/genalloc.h
@@ -94,6 +94,8 @@ static inline int gen_pool_add(struct gen_pool *pool, unsigned long addr,
 }
 extern void gen_pool_destroy(struct gen_pool *);
 extern unsigned long gen_pool_alloc(struct gen_pool *, size_t);
+extern void *gen_pool_dma_alloc(struct gen_pool *pool, size_t size,
+		dma_addr_t *dma);
 extern void gen_pool_free(struct gen_pool *, unsigned long, size_t);
 extern void gen_pool_for_each_chunk(struct gen_pool *,
 	void (*)(struct gen_pool *, struct gen_pool_chunk *, void *), void *);
diff --git a/lib/genalloc.c b/lib/genalloc.c
index 26cf20be72b7..dda31168844f 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -312,6 +312,34 @@ retry:
 }
 EXPORT_SYMBOL(gen_pool_alloc);
 
+/**
+ * gen_pool_dma_alloc - allocate special memory from the pool for DMA usage
+ * @pool: pool to allocate from
+ * @size: number of bytes to allocate from the pool
+ * @dma: dma-view physical address
+ *
+ * Allocate the requested number of bytes from the specified pool.
+ * Uses the pool allocation function (with first-fit algorithm by default).
+ * Can not be used in NMI handler on architectures without
+ * NMI-safe cmpxchg implementation.
+ */
+void *gen_pool_dma_alloc(struct gen_pool *pool, size_t size, dma_addr_t *dma)
+{
+	unsigned long vaddr;
+
+	if (!pool)
+		return NULL;
+
+	vaddr = gen_pool_alloc(pool, size);
+	if (!vaddr)
+		return NULL;
+
+	*dma = gen_pool_virt_to_phys(pool, vaddr);
+
+	return (void *)vaddr;
+}
+EXPORT_SYMBOL(gen_pool_dma_alloc);
+
 /**
  * gen_pool_free - free allocated special memory back to the pool
  * @pool: pool to free to
-- 
cgit v1.2.3-59-g8ed1b


From 008208c6b26f21c2648c250a09c55e737c02c5f8 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 12 Nov 2013 15:10:01 -0800
Subject: list: introduce list_next_entry() and list_prev_entry()

Add two trivial helpers list_next_entry() and list_prev_entry(), they
can have a lot of users including list.h itself.  In fact the 1st one is
already defined in events/core.c and bnx2x_sp.c, so the patch simply
moves the definition to list.h.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Eilon Greenstein <eilong@broadcom.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c |  3 ---
 include/linux/list.h                           | 16 ++++++++++++++++
 kernel/events/core.c                           |  3 ---
 3 files changed, 16 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
index 9fbeee522d2c..32c92abf5094 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
@@ -1217,9 +1217,6 @@ static void bnx2x_set_one_vlan_mac_e1h(struct bnx2x *bp,
 				     ETH_VLAN_FILTER_CLASSIFY, config);
 }
 
-#define list_next_entry(pos, member) \
-	list_entry((pos)->member.next, typeof(*(pos)), member)
-
 /**
  * bnx2x_vlan_mac_restore - reconfigure next MAC/VLAN/VLAN-MAC element
  *
diff --git a/include/linux/list.h b/include/linux/list.h
index f4d8a2f12a33..2ece63847001 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -372,6 +372,22 @@ static inline void list_splice_tail_init(struct list_head *list,
 #define list_first_entry_or_null(ptr, type, member) \
 	(!list_empty(ptr) ? list_first_entry(ptr, type, member) : NULL)
 
+/**
+ * list_next_entry - get the next element in list
+ * @pos:	the type * to cursor
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_next_entry(pos, member) \
+	list_entry((pos)->member.next, typeof(*(pos)), member)
+
+/**
+ * list_prev_entry - get the prev element in list
+ * @pos:	the type * to cursor
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_prev_entry(pos, member) \
+	list_entry((pos)->member.prev, typeof(*(pos)), member)
+
 /**
  * list_for_each	-	iterate over a list
  * @pos:	the &struct list_head to use as a loop cursor.
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 8c875ef6e120..d724e7757cd1 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2234,9 +2234,6 @@ static void __perf_event_sync_stat(struct perf_event *event,
 	perf_event_update_userpage(next_event);
 }
 
-#define list_next_entry(pos, member) \
-	list_entry(pos->member.next, typeof(*pos), member)
-
 static void perf_event_sync_stat(struct perf_event_context *ctx,
 				   struct perf_event_context *next_ctx)
 {
-- 
cgit v1.2.3-59-g8ed1b


From 8120e2e5141a420edee725ff28f18aa264795f7a Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 12 Nov 2013 15:10:02 -0800
Subject: list: change list_for_each_entry*() to use list_*_entry()

Now that we have list_{next,prev}_entry() we can change
list_for_each_entry*() and list_safe_reset_next() to use the new helpers
to improve the readability.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Eilon Greenstein <eilong@broadcom.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/list.h | 44 ++++++++++++++++++++++----------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/linux/list.h b/include/linux/list.h
index 2ece63847001..c88a591d1c02 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -433,8 +433,8 @@ static inline void list_splice_tail_init(struct list_head *list,
  */
 #define list_for_each_entry(pos, head, member)				\
 	for (pos = list_entry((head)->next, typeof(*pos), member);	\
-	     &pos->member != (head); 	\
-	     pos = list_entry(pos->member.next, typeof(*pos), member))
+	     &pos->member != (head);					\
+	     pos = list_next_entry(pos, member))
 
 /**
  * list_for_each_entry_reverse - iterate backwards over list of given type.
@@ -444,8 +444,8 @@ static inline void list_splice_tail_init(struct list_head *list,
  */
 #define list_for_each_entry_reverse(pos, head, member)			\
 	for (pos = list_entry((head)->prev, typeof(*pos), member);	\
-	     &pos->member != (head); 	\
-	     pos = list_entry(pos->member.prev, typeof(*pos), member))
+	     &pos->member != (head); 					\
+	     pos = list_prev_entry(pos, member))
 
 /**
  * list_prepare_entry - prepare a pos entry for use in list_for_each_entry_continue()
@@ -468,9 +468,9 @@ static inline void list_splice_tail_init(struct list_head *list,
  * the current position.
  */
 #define list_for_each_entry_continue(pos, head, member) 		\
-	for (pos = list_entry(pos->member.next, typeof(*pos), member);	\
-	     &pos->member != (head);	\
-	     pos = list_entry(pos->member.next, typeof(*pos), member))
+	for (pos = list_next_entry(pos, member);			\
+	     &pos->member != (head);					\
+	     pos = list_next_entry(pos, member))
 
 /**
  * list_for_each_entry_continue_reverse - iterate backwards from the given point
@@ -482,9 +482,9 @@ static inline void list_splice_tail_init(struct list_head *list,
  * the current position.
  */
 #define list_for_each_entry_continue_reverse(pos, head, member)		\
-	for (pos = list_entry(pos->member.prev, typeof(*pos), member);	\
-	     &pos->member != (head);	\
-	     pos = list_entry(pos->member.prev, typeof(*pos), member))
+	for (pos = list_prev_entry(pos, member);			\
+	     &pos->member != (head);					\
+	     pos = list_prev_entry(pos, member))
 
 /**
  * list_for_each_entry_from - iterate over list of given type from the current point
@@ -495,8 +495,8 @@ static inline void list_splice_tail_init(struct list_head *list,
  * Iterate over list of given type, continuing from current position.
  */
 #define list_for_each_entry_from(pos, head, member) 			\
-	for (; &pos->member != (head);	\
-	     pos = list_entry(pos->member.next, typeof(*pos), member))
+	for (; &pos->member != (head);					\
+	     pos = list_next_entry(pos, member))
 
 /**
  * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
@@ -507,9 +507,9 @@ static inline void list_splice_tail_init(struct list_head *list,
  */
 #define list_for_each_entry_safe(pos, n, head, member)			\
 	for (pos = list_entry((head)->next, typeof(*pos), member),	\
-		n = list_entry(pos->member.next, typeof(*pos), member);	\
+		n = list_next_entry(pos, member);			\
 	     &pos->member != (head); 					\
-	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
+	     pos = n, n = list_next_entry(n, member))
 
 /**
  * list_for_each_entry_safe_continue - continue list iteration safe against removal
@@ -522,10 +522,10 @@ static inline void list_splice_tail_init(struct list_head *list,
  * safe against removal of list entry.
  */
 #define list_for_each_entry_safe_continue(pos, n, head, member) 		\
-	for (pos = list_entry(pos->member.next, typeof(*pos), member), 		\
-		n = list_entry(pos->member.next, typeof(*pos), member);		\
+	for (pos = list_next_entry(pos, member), 				\
+		n = list_next_entry(pos, member);				\
 	     &pos->member != (head);						\
-	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
+	     pos = n, n = list_next_entry(n, member))
 
 /**
  * list_for_each_entry_safe_from - iterate over list from current point safe against removal
@@ -538,9 +538,9 @@ static inline void list_splice_tail_init(struct list_head *list,
  * removal of list entry.
  */
 #define list_for_each_entry_safe_from(pos, n, head, member) 			\
-	for (n = list_entry(pos->member.next, typeof(*pos), member);		\
+	for (n = list_next_entry(pos, member);					\
 	     &pos->member != (head);						\
-	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
+	     pos = n, n = list_next_entry(n, member))
 
 /**
  * list_for_each_entry_safe_reverse - iterate backwards over list safe against removal
@@ -554,9 +554,9 @@ static inline void list_splice_tail_init(struct list_head *list,
  */
 #define list_for_each_entry_safe_reverse(pos, n, head, member)		\
 	for (pos = list_entry((head)->prev, typeof(*pos), member),	\
-		n = list_entry(pos->member.prev, typeof(*pos), member);	\
+		n = list_prev_entry(pos, member);			\
 	     &pos->member != (head); 					\
-	     pos = n, n = list_entry(n->member.prev, typeof(*n), member))
+	     pos = n, n = list_prev_entry(n, member))
 
 /**
  * list_safe_reset_next - reset a stale list_for_each_entry_safe loop
@@ -571,7 +571,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * completing the current iteration of the loop body.
  */
 #define list_safe_reset_next(pos, n, member)				\
-	n = list_entry(pos->member.next, typeof(*pos), member)
+	n = list_next_entry(pos, member)
 
 /*
  * Double linked lists with a single pointer list head.
-- 
cgit v1.2.3-59-g8ed1b


From 93be3c2eb3371f022ad88acf1ab6bee8e3c38378 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 12 Nov 2013 15:10:03 -0800
Subject: list: introduce list_last_entry(), use list_{first,last}_entry()

We already have list_first_entry(), it makes sense to also add
list_last_entry() for consistency.  And we use both helpers in
list_for_each_*().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Eilon Greenstein <eilong@broadcom.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/list.h | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/list.h b/include/linux/list.h
index c88a591d1c02..ef9594171062 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -361,6 +361,17 @@ static inline void list_splice_tail_init(struct list_head *list,
 #define list_first_entry(ptr, type, member) \
 	list_entry((ptr)->next, type, member)
 
+/**
+ * list_last_entry - get the last element from a list
+ * @ptr:	the list head to take the element from.
+ * @type:	the type of the struct this is embedded in.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Note, that list is expected to be not empty.
+ */
+#define list_last_entry(ptr, type, member) \
+	list_entry((ptr)->prev, type, member)
+
 /**
  * list_first_entry_or_null - get the first element from a list
  * @ptr:	the list head to take the element from.
@@ -432,7 +443,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * @member:	the name of the list_struct within the struct.
  */
 #define list_for_each_entry(pos, head, member)				\
-	for (pos = list_entry((head)->next, typeof(*pos), member);	\
+	for (pos = list_first_entry(head, typeof(*pos), member);	\
 	     &pos->member != (head);					\
 	     pos = list_next_entry(pos, member))
 
@@ -443,7 +454,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * @member:	the name of the list_struct within the struct.
  */
 #define list_for_each_entry_reverse(pos, head, member)			\
-	for (pos = list_entry((head)->prev, typeof(*pos), member);	\
+	for (pos = list_last_entry(head, typeof(*pos), member);		\
 	     &pos->member != (head); 					\
 	     pos = list_prev_entry(pos, member))
 
@@ -506,7 +517,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * @member:	the name of the list_struct within the struct.
  */
 #define list_for_each_entry_safe(pos, n, head, member)			\
-	for (pos = list_entry((head)->next, typeof(*pos), member),	\
+	for (pos = list_first_entry(head, typeof(*pos), member),	\
 		n = list_next_entry(pos, member);			\
 	     &pos->member != (head); 					\
 	     pos = n, n = list_next_entry(n, member))
@@ -553,7 +564,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * of list entry.
  */
 #define list_for_each_entry_safe_reverse(pos, n, head, member)		\
-	for (pos = list_entry((head)->prev, typeof(*pos), member),	\
+	for (pos = list_last_entry(head, typeof(*pos), member),		\
 		n = list_prev_entry(pos, member);			\
 	     &pos->member != (head); 					\
 	     pos = n, n = list_prev_entry(n, member))
-- 
cgit v1.2.3-59-g8ed1b


From 65321547c8be5b00427ac8de23fd15801b68de1f Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Tue, 12 Nov 2013 15:10:19 -0800
Subject: init.h: document the existence of __initconst

Initdata can be const since more than 5 years, using the __initconst
keyword.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/init.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/init.h b/include/linux/init.h
index f1c27a71d03c..8e68a64bfe00 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -26,8 +26,8 @@
  * extern int initialize_foobar_device(int, int, int) __init;
  *
  * For initialized data:
- * You should insert __initdata between the variable name and equal
- * sign followed by value, e.g.:
+ * You should insert __initdata or __initconst between the variable name
+ * and equal sign followed by value, e.g.:
  *
  * static int init_variable __initdata = 0;
  * static const char linux_logo[] __initconst = { 0x32, 0x36, ... };
@@ -35,8 +35,6 @@
  * Don't forget to initialize data not at file scope, i.e. within a function,
  * as gcc otherwise puts the data into the bss section and not into the init
  * section.
- * 
- * Also note, that this data cannot be "const".
  */
 
 /* These are for everybody (although not all archs will actually
-- 
cgit v1.2.3-59-g8ed1b


From 5bccae6ec4587044779f0b8e6fcb8f87db4181f0 Mon Sep 17 00:00:00 2001
From: Sangbeom Kim <sbkim73@samsung.com>
Date: Tue, 12 Nov 2013 15:11:04 -0800
Subject: rtc: s5m-rtc: add real-time clock driver for s5m8767

Add real-time clock driver for s5m8767.

Signed-off-by: Sangbeom Kim <sbkim73@samsung.com>
Signed-off-by: Sachin Kamat <sachin.kamat@linaro.org>
Cc: Todd Broch <tbroch@chromium.org>
Cc: Mark Brown <broonie@kernel.org>
Acked-by: Lee Jones <lee.jones@linaro.org>	[mfd parts]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rtc/Kconfig              |  10 +
 drivers/rtc/Makefile             |   1 +
 drivers/rtc/rtc-s5m.c            | 635 +++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/samsung/core.h |   1 +
 include/linux/mfd/samsung/rtc.h  |  11 +
 5 files changed, 658 insertions(+)
 create mode 100644 drivers/rtc/rtc-s5m.c

(limited to 'include')

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 9654aa3c05cb..4f48b9a26aa0 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -497,6 +497,16 @@ config RTC_DRV_RV3029C2
 	  This driver can also be built as a module. If so, the module
 	  will be called rtc-rv3029c2.
 
+config RTC_DRV_S5M
+	tristate "Samsung S5M series"
+	depends on MFD_SEC_CORE
+	help
+	  If you say yes here you will get support for the
+	  RTC of Samsung S5M PMIC series.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called rtc-s5m.
+
 endif # I2C
 
 comment "SPI RTC drivers"
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 2dff3d2009b5..9312e7965365 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -107,6 +107,7 @@ obj-$(CONFIG_RTC_DRV_RX8025)	+= rtc-rx8025.o
 obj-$(CONFIG_RTC_DRV_RX8581)	+= rtc-rx8581.o
 obj-$(CONFIG_RTC_DRV_S35390A)	+= rtc-s35390a.o
 obj-$(CONFIG_RTC_DRV_S3C)	+= rtc-s3c.o
+obj-$(CONFIG_RTC_DRV_S5M)	+= rtc-s5m.o
 obj-$(CONFIG_RTC_DRV_SA1100)	+= rtc-sa1100.o
 obj-$(CONFIG_RTC_DRV_SH)	+= rtc-sh.o
 obj-$(CONFIG_RTC_DRV_SNVS)	+= rtc-snvs.o
diff --git a/drivers/rtc/rtc-s5m.c b/drivers/rtc/rtc-s5m.c
new file mode 100644
index 000000000000..b7fd02bc0a14
--- /dev/null
+++ b/drivers/rtc/rtc-s5m.c
@@ -0,0 +1,635 @@
+/*
+ * Copyright (c) 2013 Samsung Electronics Co., Ltd
+ *	http://www.samsung.com
+ *
+ *  Copyright (C) 2013 Google, Inc
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/slab.h>
+#include <linux/bcd.h>
+#include <linux/bitops.h>
+#include <linux/regmap.h>
+#include <linux/rtc.h>
+#include <linux/delay.h>
+#include <linux/platform_device.h>
+#include <linux/mfd/samsung/core.h>
+#include <linux/mfd/samsung/irq.h>
+#include <linux/mfd/samsung/rtc.h>
+
+struct s5m_rtc_info {
+	struct device *dev;
+	struct sec_pmic_dev *s5m87xx;
+	struct regmap *rtc;
+	struct rtc_device *rtc_dev;
+	int irq;
+	int device_type;
+	int rtc_24hr_mode;
+	bool wtsr_smpl;
+};
+
+static void s5m8767_data_to_tm(u8 *data, struct rtc_time *tm,
+			       int rtc_24hr_mode)
+{
+	tm->tm_sec = data[RTC_SEC] & 0x7f;
+	tm->tm_min = data[RTC_MIN] & 0x7f;
+	if (rtc_24hr_mode) {
+		tm->tm_hour = data[RTC_HOUR] & 0x1f;
+	} else {
+		tm->tm_hour = data[RTC_HOUR] & 0x0f;
+		if (data[RTC_HOUR] & HOUR_PM_MASK)
+			tm->tm_hour += 12;
+	}
+
+	tm->tm_wday = ffs(data[RTC_WEEKDAY] & 0x7f);
+	tm->tm_mday = data[RTC_DATE] & 0x1f;
+	tm->tm_mon = (data[RTC_MONTH] & 0x0f) - 1;
+	tm->tm_year = (data[RTC_YEAR1] & 0x7f) + 100;
+	tm->tm_yday = 0;
+	tm->tm_isdst = 0;
+}
+
+static int s5m8767_tm_to_data(struct rtc_time *tm, u8 *data)
+{
+	data[RTC_SEC] = tm->tm_sec;
+	data[RTC_MIN] = tm->tm_min;
+
+	if (tm->tm_hour >= 12)
+		data[RTC_HOUR] = tm->tm_hour | HOUR_PM_MASK;
+	else
+		data[RTC_HOUR] = tm->tm_hour & ~HOUR_PM_MASK;
+
+	data[RTC_WEEKDAY] = 1 << tm->tm_wday;
+	data[RTC_DATE] = tm->tm_mday;
+	data[RTC_MONTH] = tm->tm_mon + 1;
+	data[RTC_YEAR1] = tm->tm_year > 100 ? (tm->tm_year - 100) : 0;
+
+	if (tm->tm_year < 100) {
+		pr_err("s5m8767 RTC cannot handle the year %d.\n",
+		       1900 + tm->tm_year);
+		return -EINVAL;
+	} else {
+		return 0;
+	}
+}
+
+static inline int s5m8767_rtc_set_time_reg(struct s5m_rtc_info *info)
+{
+	int ret;
+	unsigned int data;
+
+	ret = regmap_read(info->rtc, SEC_RTC_UDR_CON, &data);
+	if (ret < 0) {
+		dev_err(info->dev, "failed to read update reg(%d)\n", ret);
+		return ret;
+	}
+
+	data |= RTC_TIME_EN_MASK;
+	data |= RTC_UDR_MASK;
+
+	ret = regmap_write(info->rtc, SEC_RTC_UDR_CON, data);
+	if (ret < 0) {
+		dev_err(info->dev, "failed to write update reg(%d)\n", ret);
+		return ret;
+	}
+
+	do {
+		ret = regmap_read(info->rtc, SEC_RTC_UDR_CON, &data);
+	} while ((data & RTC_UDR_MASK) && !ret);
+
+	return ret;
+}
+
+static inline int s5m8767_rtc_set_alarm_reg(struct s5m_rtc_info *info)
+{
+	int ret;
+	unsigned int data;
+
+	ret = regmap_read(info->rtc, SEC_RTC_UDR_CON, &data);
+	if (ret < 0) {
+		dev_err(info->dev, "%s: fail to read update reg(%d)\n",
+			__func__, ret);
+		return ret;
+	}
+
+	data &= ~RTC_TIME_EN_MASK;
+	data |= RTC_UDR_MASK;
+
+	ret = regmap_write(info->rtc, SEC_RTC_UDR_CON, data);
+	if (ret < 0) {
+		dev_err(info->dev, "%s: fail to write update reg(%d)\n",
+			__func__, ret);
+		return ret;
+	}
+
+	do {
+		ret = regmap_read(info->rtc, SEC_RTC_UDR_CON, &data);
+	} while ((data & RTC_UDR_MASK) && !ret);
+
+	return ret;
+}
+
+static void s5m8763_data_to_tm(u8 *data, struct rtc_time *tm)
+{
+	tm->tm_sec = bcd2bin(data[RTC_SEC]);
+	tm->tm_min = bcd2bin(data[RTC_MIN]);
+
+	if (data[RTC_HOUR] & HOUR_12) {
+		tm->tm_hour = bcd2bin(data[RTC_HOUR] & 0x1f);
+		if (data[RTC_HOUR] & HOUR_PM)
+			tm->tm_hour += 12;
+	} else {
+		tm->tm_hour = bcd2bin(data[RTC_HOUR] & 0x3f);
+	}
+
+	tm->tm_wday = data[RTC_WEEKDAY] & 0x07;
+	tm->tm_mday = bcd2bin(data[RTC_DATE]);
+	tm->tm_mon = bcd2bin(data[RTC_MONTH]);
+	tm->tm_year = bcd2bin(data[RTC_YEAR1]) + bcd2bin(data[RTC_YEAR2]) * 100;
+	tm->tm_year -= 1900;
+}
+
+static void s5m8763_tm_to_data(struct rtc_time *tm, u8 *data)
+{
+	data[RTC_SEC] = bin2bcd(tm->tm_sec);
+	data[RTC_MIN] = bin2bcd(tm->tm_min);
+	data[RTC_HOUR] = bin2bcd(tm->tm_hour);
+	data[RTC_WEEKDAY] = tm->tm_wday;
+	data[RTC_DATE] = bin2bcd(tm->tm_mday);
+	data[RTC_MONTH] = bin2bcd(tm->tm_mon);
+	data[RTC_YEAR1] = bin2bcd(tm->tm_year % 100);
+	data[RTC_YEAR2] = bin2bcd((tm->tm_year + 1900) / 100);
+}
+
+static int s5m_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+	struct s5m_rtc_info *info = dev_get_drvdata(dev);
+	u8 data[8];
+	int ret;
+
+	ret = regmap_bulk_read(info->rtc, SEC_RTC_SEC, data, 8);
+	if (ret < 0)
+		return ret;
+
+	switch (info->device_type) {
+	case S5M8763X:
+		s5m8763_data_to_tm(data, tm);
+		break;
+
+	case S5M8767X:
+		s5m8767_data_to_tm(data, tm, info->rtc_24hr_mode);
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	dev_dbg(dev, "%s: %d/%d/%d %d:%d:%d(%d)\n", __func__,
+		1900 + tm->tm_year, 1 + tm->tm_mon, tm->tm_mday,
+		tm->tm_hour, tm->tm_min, tm->tm_sec, tm->tm_wday);
+
+	return rtc_valid_tm(tm);
+}
+
+static int s5m_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+	struct s5m_rtc_info *info = dev_get_drvdata(dev);
+	u8 data[8];
+	int ret = 0;
+
+	switch (info->device_type) {
+	case S5M8763X:
+		s5m8763_tm_to_data(tm, data);
+		break;
+	case S5M8767X:
+		ret = s5m8767_tm_to_data(tm, data);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (ret < 0)
+		return ret;
+
+	dev_dbg(dev, "%s: %d/%d/%d %d:%d:%d(%d)\n", __func__,
+		1900 + tm->tm_year, 1 + tm->tm_mon, tm->tm_mday,
+		tm->tm_hour, tm->tm_min, tm->tm_sec, tm->tm_wday);
+
+	ret = regmap_raw_write(info->rtc, SEC_RTC_SEC, data, 8);
+	if (ret < 0)
+		return ret;
+
+	ret = s5m8767_rtc_set_time_reg(info);
+
+	return ret;
+}
+
+static int s5m_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+	struct s5m_rtc_info *info = dev_get_drvdata(dev);
+	u8 data[8];
+	unsigned int val;
+	int ret, i;
+
+	ret = regmap_bulk_read(info->rtc, SEC_ALARM0_SEC, data, 8);
+	if (ret < 0)
+		return ret;
+
+	switch (info->device_type) {
+	case S5M8763X:
+		s5m8763_data_to_tm(data, &alrm->time);
+		ret = regmap_read(info->rtc, SEC_ALARM0_CONF, &val);
+		if (ret < 0)
+			return ret;
+
+		alrm->enabled = !!val;
+
+		ret = regmap_read(info->rtc, SEC_RTC_STATUS, &val);
+		if (ret < 0)
+			return ret;
+
+		break;
+
+	case S5M8767X:
+		s5m8767_data_to_tm(data, &alrm->time, info->rtc_24hr_mode);
+		dev_dbg(dev, "%s: %d/%d/%d %d:%d:%d(%d)\n", __func__,
+			1900 + alrm->time.tm_year, 1 + alrm->time.tm_mon,
+			alrm->time.tm_mday, alrm->time.tm_hour,
+			alrm->time.tm_min, alrm->time.tm_sec,
+			alrm->time.tm_wday);
+
+		alrm->enabled = 0;
+		for (i = 0; i < 7; i++) {
+			if (data[i] & ALARM_ENABLE_MASK) {
+				alrm->enabled = 1;
+				break;
+			}
+		}
+
+		alrm->pending = 0;
+		ret = regmap_read(info->rtc, SEC_RTC_STATUS, &val);
+		if (ret < 0)
+			return ret;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	if (val & ALARM0_STATUS)
+		alrm->pending = 1;
+	else
+		alrm->pending = 0;
+
+	return 0;
+}
+
+static int s5m_rtc_stop_alarm(struct s5m_rtc_info *info)
+{
+	u8 data[8];
+	int ret, i;
+	struct rtc_time tm;
+
+	ret = regmap_bulk_read(info->rtc, SEC_ALARM0_SEC, data, 8);
+	if (ret < 0)
+		return ret;
+
+	s5m8767_data_to_tm(data, &tm, info->rtc_24hr_mode);
+	dev_dbg(info->dev, "%s: %d/%d/%d %d:%d:%d(%d)\n", __func__,
+		1900 + tm.tm_year, 1 + tm.tm_mon, tm.tm_mday,
+		tm.tm_hour, tm.tm_min, tm.tm_sec, tm.tm_wday);
+
+	switch (info->device_type) {
+	case S5M8763X:
+		ret = regmap_write(info->rtc, SEC_ALARM0_CONF, 0);
+		break;
+
+	case S5M8767X:
+		for (i = 0; i < 7; i++)
+			data[i] &= ~ALARM_ENABLE_MASK;
+
+		ret = regmap_raw_write(info->rtc, SEC_ALARM0_SEC, data, 8);
+		if (ret < 0)
+			return ret;
+
+		ret = s5m8767_rtc_set_alarm_reg(info);
+
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return ret;
+}
+
+static int s5m_rtc_start_alarm(struct s5m_rtc_info *info)
+{
+	int ret;
+	u8 data[8];
+	u8 alarm0_conf;
+	struct rtc_time tm;
+
+	ret = regmap_bulk_read(info->rtc, SEC_ALARM0_SEC, data, 8);
+	if (ret < 0)
+		return ret;
+
+	s5m8767_data_to_tm(data, &tm, info->rtc_24hr_mode);
+	dev_dbg(info->dev, "%s: %d/%d/%d %d:%d:%d(%d)\n", __func__,
+		1900 + tm.tm_year, 1 + tm.tm_mon, tm.tm_mday,
+		tm.tm_hour, tm.tm_min, tm.tm_sec, tm.tm_wday);
+
+	switch (info->device_type) {
+	case S5M8763X:
+		alarm0_conf = 0x77;
+		ret = regmap_write(info->rtc, SEC_ALARM0_CONF, alarm0_conf);
+		break;
+
+	case S5M8767X:
+		data[RTC_SEC] |= ALARM_ENABLE_MASK;
+		data[RTC_MIN] |= ALARM_ENABLE_MASK;
+		data[RTC_HOUR] |= ALARM_ENABLE_MASK;
+		data[RTC_WEEKDAY] &= ~ALARM_ENABLE_MASK;
+		if (data[RTC_DATE] & 0x1f)
+			data[RTC_DATE] |= ALARM_ENABLE_MASK;
+		if (data[RTC_MONTH] & 0xf)
+			data[RTC_MONTH] |= ALARM_ENABLE_MASK;
+		if (data[RTC_YEAR1] & 0x7f)
+			data[RTC_YEAR1] |= ALARM_ENABLE_MASK;
+
+		ret = regmap_raw_write(info->rtc, SEC_ALARM0_SEC, data, 8);
+		if (ret < 0)
+			return ret;
+		ret = s5m8767_rtc_set_alarm_reg(info);
+
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return ret;
+}
+
+static int s5m_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+	struct s5m_rtc_info *info = dev_get_drvdata(dev);
+	u8 data[8];
+	int ret;
+
+	switch (info->device_type) {
+	case S5M8763X:
+		s5m8763_tm_to_data(&alrm->time, data);
+		break;
+
+	case S5M8767X:
+		s5m8767_tm_to_data(&alrm->time, data);
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	dev_dbg(dev, "%s: %d/%d/%d %d:%d:%d(%d)\n", __func__,
+		1900 + alrm->time.tm_year, 1 + alrm->time.tm_mon,
+		alrm->time.tm_mday, alrm->time.tm_hour, alrm->time.tm_min,
+		alrm->time.tm_sec, alrm->time.tm_wday);
+
+	ret = s5m_rtc_stop_alarm(info);
+	if (ret < 0)
+		return ret;
+
+	ret = regmap_raw_write(info->rtc, SEC_ALARM0_SEC, data, 8);
+	if (ret < 0)
+		return ret;
+
+	ret = s5m8767_rtc_set_alarm_reg(info);
+	if (ret < 0)
+		return ret;
+
+	if (alrm->enabled)
+		ret = s5m_rtc_start_alarm(info);
+
+	return ret;
+}
+
+static int s5m_rtc_alarm_irq_enable(struct device *dev,
+				    unsigned int enabled)
+{
+	struct s5m_rtc_info *info = dev_get_drvdata(dev);
+
+	if (enabled)
+		return s5m_rtc_start_alarm(info);
+	else
+		return s5m_rtc_stop_alarm(info);
+}
+
+static irqreturn_t s5m_rtc_alarm_irq(int irq, void *data)
+{
+	struct s5m_rtc_info *info = data;
+
+	rtc_update_irq(info->rtc_dev, 1, RTC_IRQF | RTC_AF);
+
+	return IRQ_HANDLED;
+}
+
+static const struct rtc_class_ops s5m_rtc_ops = {
+	.read_time = s5m_rtc_read_time,
+	.set_time = s5m_rtc_set_time,
+	.read_alarm = s5m_rtc_read_alarm,
+	.set_alarm = s5m_rtc_set_alarm,
+	.alarm_irq_enable = s5m_rtc_alarm_irq_enable,
+};
+
+static void s5m_rtc_enable_wtsr(struct s5m_rtc_info *info, bool enable)
+{
+	int ret;
+	ret = regmap_update_bits(info->rtc, SEC_WTSR_SMPL_CNTL,
+				 WTSR_ENABLE_MASK,
+				 enable ? WTSR_ENABLE_MASK : 0);
+	if (ret < 0)
+		dev_err(info->dev, "%s: fail to update WTSR reg(%d)\n",
+			__func__, ret);
+}
+
+static void s5m_rtc_enable_smpl(struct s5m_rtc_info *info, bool enable)
+{
+	int ret;
+	ret = regmap_update_bits(info->rtc, SEC_WTSR_SMPL_CNTL,
+				 SMPL_ENABLE_MASK,
+				 enable ? SMPL_ENABLE_MASK : 0);
+	if (ret < 0)
+		dev_err(info->dev, "%s: fail to update SMPL reg(%d)\n",
+			__func__, ret);
+}
+
+static int s5m8767_rtc_init_reg(struct s5m_rtc_info *info)
+{
+	u8 data[2];
+	unsigned int tp_read;
+	int ret;
+	struct rtc_time tm;
+
+	ret = regmap_read(info->rtc, SEC_RTC_UDR_CON, &tp_read);
+	if (ret < 0) {
+		dev_err(info->dev, "%s: fail to read control reg(%d)\n",
+			__func__, ret);
+		return ret;
+	}
+
+	/* Set RTC control register : Binary mode, 24hour mode */
+	data[0] = (1 << BCD_EN_SHIFT) | (1 << MODEL24_SHIFT);
+	data[1] = (0 << BCD_EN_SHIFT) | (1 << MODEL24_SHIFT);
+
+	info->rtc_24hr_mode = 1;
+	ret = regmap_raw_write(info->rtc, SEC_ALARM0_CONF, data, 2);
+	if (ret < 0) {
+		dev_err(info->dev, "%s: fail to write controlm reg(%d)\n",
+			__func__, ret);
+		return ret;
+	}
+
+	/* In first boot time, Set rtc time to 1/1/2012 00:00:00(SUN) */
+	if ((tp_read & RTC_TCON_MASK) == 0) {
+		dev_dbg(info->dev, "rtc init\n");
+		tm.tm_sec = 0;
+		tm.tm_min = 0;
+		tm.tm_hour = 0;
+		tm.tm_wday = 0;
+		tm.tm_mday = 1;
+		tm.tm_mon = 0;
+		tm.tm_year = 112;
+		tm.tm_yday = 0;
+		tm.tm_isdst = 0;
+		ret = s5m_rtc_set_time(info->dev, &tm);
+	}
+
+	ret = regmap_update_bits(info->rtc, SEC_RTC_UDR_CON,
+				 RTC_TCON_MASK, tp_read | RTC_TCON_MASK);
+	if (ret < 0)
+		dev_err(info->dev, "%s: fail to update TCON reg(%d)\n",
+			__func__, ret);
+
+	return ret;
+}
+
+static int s5m_rtc_probe(struct platform_device *pdev)
+{
+	struct sec_pmic_dev *s5m87xx = dev_get_drvdata(pdev->dev.parent);
+	struct sec_platform_data *pdata = s5m87xx->pdata;
+	struct s5m_rtc_info *info;
+	int ret;
+
+	if (!pdata) {
+		dev_err(pdev->dev.parent, "Platform data not supplied\n");
+		return -ENODEV;
+	}
+
+	info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+
+	info->dev = &pdev->dev;
+	info->s5m87xx = s5m87xx;
+	info->rtc = s5m87xx->rtc;
+	info->device_type = s5m87xx->device_type;
+	info->wtsr_smpl = s5m87xx->wtsr_smpl;
+
+	switch (pdata->device_type) {
+	case S5M8763X:
+		info->irq = s5m87xx->irq_base + S5M8763_IRQ_ALARM0;
+		break;
+
+	case S5M8767X:
+		info->irq = s5m87xx->irq_base + S5M8767_IRQ_RTCA1;
+		break;
+
+	default:
+		ret = -EINVAL;
+		dev_err(&pdev->dev, "Unsupported device type: %d\n", ret);
+		return ret;
+	}
+
+	platform_set_drvdata(pdev, info);
+
+	ret = s5m8767_rtc_init_reg(info);
+
+	if (info->wtsr_smpl) {
+		s5m_rtc_enable_wtsr(info, true);
+		s5m_rtc_enable_smpl(info, true);
+	}
+
+	device_init_wakeup(&pdev->dev, 1);
+
+	info->rtc_dev = devm_rtc_device_register(&pdev->dev, "s5m-rtc",
+						 &s5m_rtc_ops, THIS_MODULE);
+
+	if (IS_ERR(info->rtc_dev))
+		return PTR_ERR(info->rtc_dev);
+
+	ret = devm_request_threaded_irq(&pdev->dev, info->irq, NULL,
+					s5m_rtc_alarm_irq, 0, "rtc-alarm0",
+					info);
+	if (ret < 0)
+		dev_err(&pdev->dev, "Failed to request alarm IRQ: %d: %d\n",
+			info->irq, ret);
+
+	return ret;
+}
+
+static void s5m_rtc_shutdown(struct platform_device *pdev)
+{
+	struct s5m_rtc_info *info = platform_get_drvdata(pdev);
+	int i;
+	unsigned int val = 0;
+	if (info->wtsr_smpl) {
+		for (i = 0; i < 3; i++) {
+			s5m_rtc_enable_wtsr(info, false);
+			regmap_read(info->rtc, SEC_WTSR_SMPL_CNTL, &val);
+			pr_debug("%s: WTSR_SMPL reg(0x%02x)\n", __func__, val);
+			if (val & WTSR_ENABLE_MASK)
+				pr_emerg("%s: fail to disable WTSR\n",
+					 __func__);
+			else {
+				pr_info("%s: success to disable WTSR\n",
+					__func__);
+				break;
+			}
+		}
+	}
+	/* Disable SMPL when power off */
+	s5m_rtc_enable_smpl(info, false);
+}
+
+static const struct platform_device_id s5m_rtc_id[] = {
+	{ "s5m-rtc", 0 },
+};
+
+static struct platform_driver s5m_rtc_driver = {
+	.driver		= {
+		.name	= "s5m-rtc",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= s5m_rtc_probe,
+	.shutdown	= s5m_rtc_shutdown,
+	.id_table	= s5m_rtc_id,
+};
+
+module_platform_driver(s5m_rtc_driver);
+
+/* Module information */
+MODULE_AUTHOR("Sangbeom Kim <sbkim73@samsung.com>");
+MODULE_DESCRIPTION("Samsung S5M RTC driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:s5m-rtc");
diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h
index 378ae8a04c6a..2d0c9071bcfb 100644
--- a/include/linux/mfd/samsung/core.h
+++ b/include/linux/mfd/samsung/core.h
@@ -51,6 +51,7 @@ struct sec_pmic_dev {
 	int ono;
 	int type;
 	bool wakeup;
+	bool wtsr_smpl;
 };
 
 int sec_irq_init(struct sec_pmic_dev *sec_pmic);
diff --git a/include/linux/mfd/samsung/rtc.h b/include/linux/mfd/samsung/rtc.h
index 71597e20cddb..94b7cd6d8891 100644
--- a/include/linux/mfd/samsung/rtc.h
+++ b/include/linux/mfd/samsung/rtc.h
@@ -62,6 +62,11 @@ enum sec_rtc_reg {
 /* RTC Update Register1 */
 #define RTC_UDR_SHIFT		0
 #define RTC_UDR_MASK		(1 << RTC_UDR_SHIFT)
+#define RTC_TCON_SHIFT		1
+#define RTC_TCON_MASK		(1 << RTC_TCON_SHIFT)
+#define RTC_TIME_EN_SHIFT	3
+#define RTC_TIME_EN_MASK	(1 << RTC_TIME_EN_SHIFT)
+
 /* RTC Hour register */
 #define HOUR_PM_SHIFT		6
 #define HOUR_PM_MASK		(1 << HOUR_PM_SHIFT)
@@ -69,6 +74,12 @@ enum sec_rtc_reg {
 #define ALARM_ENABLE_SHIFT	7
 #define ALARM_ENABLE_MASK	(1 << ALARM_ENABLE_SHIFT)
 
+#define SMPL_ENABLE_SHIFT	7
+#define SMPL_ENABLE_MASK	(1 << SMPL_ENABLE_SHIFT)
+
+#define WTSR_ENABLE_SHIFT	6
+#define WTSR_ENABLE_MASK	(1 << WTSR_ENABLE_SHIFT)
+
 enum {
 	RTC_SEC = 0,
 	RTC_MIN,
-- 
cgit v1.2.3-59-g8ed1b


From d049f74f2dbe71354d43d393ac3a188947811348 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 12 Nov 2013 15:11:17 -0800
Subject: exec/ptrace: fix get_dumpable() incorrect tests

The get_dumpable() return value is not boolean.  Most users of the
function actually want to be testing for non-SUID_DUMP_USER(1) rather than
SUID_DUMP_DISABLE(0).  The SUID_DUMP_ROOT(2) is also considered a
protected state.  Almost all places did this correctly, excepting the two
places fixed in this patch.

Wrong logic:
    if (dumpable == SUID_DUMP_DISABLE) { /* be protective */ }
        or
    if (dumpable == 0) { /* be protective */ }
        or
    if (!dumpable) { /* be protective */ }

Correct logic:
    if (dumpable != SUID_DUMP_USER) { /* be protective */ }
        or
    if (dumpable != 1) { /* be protective */ }

Without this patch, if the system had set the sysctl fs/suid_dumpable=2, a
user was able to ptrace attach to processes that had dropped privileges to
that user.  (This may have been partially mitigated if Yama was enabled.)

The macros have been moved into the file that declares get/set_dumpable(),
which means things like the ia64 code can see them too.

CVE-2013-2929

Reported-by: Vasily Kulikov <segoon@openwall.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/include/asm/processor.h | 2 +-
 fs/exec.c                         | 6 ++++++
 include/linux/binfmts.h           | 3 ---
 include/linux/sched.h             | 4 ++++
 kernel/ptrace.c                   | 3 ++-
 5 files changed, 13 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h
index e0a899a1a8a6..5a84b3a50741 100644
--- a/arch/ia64/include/asm/processor.h
+++ b/arch/ia64/include/asm/processor.h
@@ -319,7 +319,7 @@ struct thread_struct {
 	regs->loadrs = 0;									\
 	regs->r8 = get_dumpable(current->mm);	/* set "don't zap registers" flag */		\
 	regs->r12 = new_sp - 16;	/* allocate 16 byte scratch area */			\
-	if (unlikely(!get_dumpable(current->mm))) {							\
+	if (unlikely(get_dumpable(current->mm) != SUID_DUMP_USER)) {	\
 		/*										\
 		 * Zap scratch regs to avoid leaking bits between processes with different	\
 		 * uid/privileges.								\
diff --git a/fs/exec.c b/fs/exec.c
index 2ea437e5acf4..12120620f040 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1669,6 +1669,12 @@ int __get_dumpable(unsigned long mm_flags)
 	return (ret > SUID_DUMP_USER) ? SUID_DUMP_ROOT : ret;
 }
 
+/*
+ * This returns the actual value of the suid_dumpable flag. For things
+ * that are using this for checking for privilege transitions, it must
+ * test against SUID_DUMP_USER rather than treating it as a boolean
+ * value.
+ */
 int get_dumpable(struct mm_struct *mm)
 {
 	return __get_dumpable(mm->flags);
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index e8112ae50531..7554fd410bcc 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -99,9 +99,6 @@ extern void setup_new_exec(struct linux_binprm * bprm);
 extern void would_dump(struct linux_binprm *, struct file *);
 
 extern int suid_dumpable;
-#define SUID_DUMP_DISABLE	0	/* No setuid dumping */
-#define SUID_DUMP_USER		1	/* Dump as user of process */
-#define SUID_DUMP_ROOT		2	/* Dump as root */
 
 /* Stack area protections */
 #define EXSTACK_DEFAULT   0	/* Whatever the arch defaults to */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5e226fe3e512..f7efc8604652 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -323,6 +323,10 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
 extern void set_dumpable(struct mm_struct *mm, int value);
 extern int get_dumpable(struct mm_struct *mm);
 
+#define SUID_DUMP_DISABLE	0	/* No setuid dumping */
+#define SUID_DUMP_USER		1	/* Dump as user of process */
+#define SUID_DUMP_ROOT		2	/* Dump as root */
+
 /* mm flags */
 /* dumpable bits */
 #define MMF_DUMPABLE      0  /* core dump is permitted */
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index dd562e9aa2c8..1f4bcb3cc21c 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -257,7 +257,8 @@ ok:
 	if (task->mm)
 		dumpable = get_dumpable(task->mm);
 	rcu_read_lock();
-	if (!dumpable && !ptrace_has_cap(__task_cred(task)->user_ns, mode)) {
+	if (dumpable != SUID_DUMP_USER &&
+	    !ptrace_has_cap(__task_cred(task)->user_ns, mode)) {
 		rcu_read_unlock();
 		return -EPERM;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 1310a5a99d900ee30b9f171146406bde0c6c2bd4 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 12 Nov 2013 15:11:19 -0800
Subject: rbtree: fix rbtree_postorder_for_each_entry_safe() iterator

The iterator rbtree_postorder_for_each_entry_safe() relies on pointer
underflow behavior when testing for loop termination.  In particular it
expects that

  &rb_entry(NULL, type, field)->field

is NULL.  But the result of this expression is not defined by a C standard
and some gcc versions (e.g.  4.3.4) assume the above expression can never
be equal to NULL.  The net result is an oops because the iteration is not
properly terminated.

Fix the problem by modifying the iterator to avoid pointer underflows.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Cody P Schafer <cody@linux.vnet.ibm.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Artem Bityutskiy <dedekind1@gmail.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Cc: Pablo Neira Ayuso <pablo@netfilter.org>
Cc: Patrick McHardy <kaber@trash.net>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: <stable@vger.kernel.org>		[3.12.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rbtree.h | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index aa870a4ddf54..57e75ae9910f 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -85,6 +85,11 @@ static inline void rb_link_node(struct rb_node * node, struct rb_node * parent,
 	*rb_link = node;
 }
 
+#define rb_entry_safe(ptr, type, member) \
+	({ typeof(ptr) ____ptr = (ptr); \
+	   ____ptr ? rb_entry(____ptr, type, member) : NULL; \
+	})
+
 /**
  * rbtree_postorder_for_each_entry_safe - iterate over rb_root in post order of
  * given type safe against removal of rb_node entry
@@ -95,12 +100,9 @@ static inline void rb_link_node(struct rb_node * node, struct rb_node * parent,
  * @field:	the name of the rb_node field within 'type'.
  */
 #define rbtree_postorder_for_each_entry_safe(pos, n, root, field) \
-	for (pos = rb_entry(rb_first_postorder(root), typeof(*pos), field),\
-		n = rb_entry(rb_next_postorder(&pos->field), \
-			typeof(*pos), field); \
-	     &pos->field; \
-	     pos = n, \
-		n = rb_entry(rb_next_postorder(&pos->field), \
-			typeof(*pos), field))
+	for (pos = rb_entry_safe(rb_first_postorder(root), typeof(*pos), field); \
+	     pos && ({ n = rb_entry_safe(rb_next_postorder(&pos->field), \
+			typeof(*pos), field); 1; }); \
+	     pos = n)
 
 #endif	/* _LINUX_RBTREE_H */
-- 
cgit v1.2.3-59-g8ed1b


From 4e9b45a19241354daec281d7a785739829b52359 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Tue, 12 Nov 2013 15:11:47 -0800
Subject: ipc, msg: fix message length check for negative values

On 64 bit systems the test for negative message sizes is bogus as the
size, which may be positive when evaluated as a long, will get truncated
to an int when passed to load_msg().  So a long might very well contain a
positive value but when truncated to an int it would become negative.

That in combination with a small negative value of msg_ctlmax (which will
be promoted to an unsigned type for the comparison against msgsz, making
it a big positive value and therefore make it pass the check) will lead to
two problems: 1/ The kmalloc() call in alloc_msg() will allocate a too
small buffer as the addition of alen is effectively a subtraction.  2/ The
copy_from_user() call in load_msg() will first overflow the buffer with
userland data and then, when the userland access generates an access
violation, the fixup handler copy_user_handle_tail() will try to fill the
remainder with zeros -- roughly 4GB.  That almost instantly results in a
system crash or reset.

  ,-[ Reproducer (needs to be run as root) ]--
  | #include <sys/stat.h>
  | #include <sys/msg.h>
  | #include <unistd.h>
  | #include <fcntl.h>
  |
  | int main(void) {
  |     long msg = 1;
  |     int fd;
  |
  |     fd = open("/proc/sys/kernel/msgmax", O_WRONLY);
  |     write(fd, "-1", 2);
  |     close(fd);
  |
  |     msgsnd(0, &msg, 0xfffffff0, IPC_NOWAIT);
  |
  |     return 0;
  | }
  '---

Fix the issue by preventing msgsz from getting truncated by consistently
using size_t for the message length.  This way the size checks in
do_msgsnd() could still be passed with a negative value for msg_ctlmax but
we would fail on the buffer allocation in that case and error out.

Also change the type of m_ts from int to size_t to avoid similar nastiness
in other code paths -- it is used in similar constructs, i.e.  signed vs.
unsigned checks.  It should never become negative under normal
circumstances, though.

Setting msg_ctlmax to a negative value is an odd configuration and should
be prevented.  As that might break existing userland, it will be handled
in a separate commit so it could easily be reverted and reworked without
reintroducing the above described bug.

Hardening mechanisms for user copy operations would have catched that bug
early -- e.g.  checking slab object sizes on user copy operations as the
usercopy feature of the PaX patch does.  Or, for that matter, detect the
long vs.  int sign change due to truncation, as the size overflow plugin
of the very same patch does.

[akpm@linux-foundation.org: fix i386 min() warnings]
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Pax Team <pageexec@freemail.hu>
Cc: Davidlohr Bueso <davidlohr@hp.com>
Cc: Brad Spengler <spender@grsecurity.net>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: <stable@vger.kernel.org>	[ v2.3.27+ -- yes, that old ;) ]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/msg.h |  6 +++---
 ipc/msgutil.c       | 20 ++++++++++----------
 ipc/util.h          |  4 ++--
 3 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/msg.h b/include/linux/msg.h
index 391af8d11cce..e21f9d44307f 100644
--- a/include/linux/msg.h
+++ b/include/linux/msg.h
@@ -6,9 +6,9 @@
 
 /* one msg_msg structure for each message */
 struct msg_msg {
-	struct list_head m_list; 
-	long  m_type;          
-	int m_ts;           /* message text size */
+	struct list_head m_list;
+	long m_type;
+	size_t m_ts;		/* message text size */
 	struct msg_msgseg* next;
 	void *security;
 	/* the actual message follows immediately */
diff --git a/ipc/msgutil.c b/ipc/msgutil.c
index 491e71f2a1b8..7e7095974d54 100644
--- a/ipc/msgutil.c
+++ b/ipc/msgutil.c
@@ -41,15 +41,15 @@ struct msg_msgseg {
 	/* the next part of the message follows immediately */
 };
 
-#define DATALEN_MSG	(int)(PAGE_SIZE-sizeof(struct msg_msg))
-#define DATALEN_SEG	(int)(PAGE_SIZE-sizeof(struct msg_msgseg))
+#define DATALEN_MSG	((size_t)PAGE_SIZE-sizeof(struct msg_msg))
+#define DATALEN_SEG	((size_t)PAGE_SIZE-sizeof(struct msg_msgseg))
 
 
-static struct msg_msg *alloc_msg(int len)
+static struct msg_msg *alloc_msg(size_t len)
 {
 	struct msg_msg *msg;
 	struct msg_msgseg **pseg;
-	int alen;
+	size_t alen;
 
 	alen = min(len, DATALEN_MSG);
 	msg = kmalloc(sizeof(*msg) + alen, GFP_KERNEL);
@@ -80,12 +80,12 @@ out_err:
 	return NULL;
 }
 
-struct msg_msg *load_msg(const void __user *src, int len)
+struct msg_msg *load_msg(const void __user *src, size_t len)
 {
 	struct msg_msg *msg;
 	struct msg_msgseg *seg;
 	int err = -EFAULT;
-	int alen;
+	size_t alen;
 
 	msg = alloc_msg(len);
 	if (msg == NULL)
@@ -117,8 +117,8 @@ out_err:
 struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst)
 {
 	struct msg_msgseg *dst_pseg, *src_pseg;
-	int len = src->m_ts;
-	int alen;
+	size_t len = src->m_ts;
+	size_t alen;
 
 	BUG_ON(dst == NULL);
 	if (src->m_ts > dst->m_ts)
@@ -147,9 +147,9 @@ struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst)
 	return ERR_PTR(-ENOSYS);
 }
 #endif
-int store_msg(void __user *dest, struct msg_msg *msg, int len)
+int store_msg(void __user *dest, struct msg_msg *msg, size_t len)
 {
-	int alen;
+	size_t alen;
 	struct msg_msgseg *seg;
 
 	alen = min(len, DATALEN_MSG);
diff --git a/ipc/util.h b/ipc/util.h
index f2f5036f2eed..59d78aa94987 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -148,9 +148,9 @@ int ipc_parse_version (int *cmd);
 #endif
 
 extern void free_msg(struct msg_msg *msg);
-extern struct msg_msg *load_msg(const void __user *src, int len);
+extern struct msg_msg *load_msg(const void __user *src, size_t len);
 extern struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst);
-extern int store_msg(void __user *dest, struct msg_msg *msg, int len);
+extern int store_msg(void __user *dest, struct msg_msg *msg, size_t len);
 
 extern void recompute_msgmni(struct ipc_namespace *);
 
-- 
cgit v1.2.3-59-g8ed1b