From 402b27f9f2c22309d5bb285628765bc27b82fcf5 Mon Sep 17 00:00:00 2001
From: Roger Pau Monne <roger.pau@citrix.com>
Date: Thu, 18 Apr 2013 16:06:54 +0200
Subject: xen-block: implement indirect descriptors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Indirect descriptors introduce a new block operation
(BLKIF_OP_INDIRECT) that passes grant references instead of segments
in the request. This grant references are filled with arrays of
blkif_request_segment_aligned, this way we can send more segments in a
request.

The proposed implementation sets the maximum number of indirect grefs
(frames filled with blkif_request_segment_aligned) to 256 in the
backend and 32 in the frontend. The value in the frontend has been
chosen experimentally, and the backend value has been set to a sane
value that allows expanding the maximum number of indirect descriptors
in the frontend if needed.

The migration code has changed from the previous implementation, in
which we simply remapped the segments on the shared ring. Now the
maximum number of segments allowed in a request can change depending
on the backend, so we have to requeue all the requests in the ring and
in the queue and split the bios in them if they are bigger than the
new maximum number of segments.

[v2: Fixed minor comments by Konrad.
[v1: Added padding to make the indirect request 64bit aligned.
 Added some BUGs, comments; fixed number of indirect pages in
 blkif_get_x86_{32/64}_req. Added description about the indirect operation
 in blkif.h]
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
[v3: Fixed spaces and tabs mix ups]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 include/xen/interface/io/blkif.h | 53 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)

(limited to 'include')

diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h
index ffd4652de91c..65e12099ef89 100644
--- a/include/xen/interface/io/blkif.h
+++ b/include/xen/interface/io/blkif.h
@@ -102,6 +102,30 @@ typedef uint64_t blkif_sector_t;
  */
 #define BLKIF_OP_DISCARD           5
 
+/*
+ * Recognized if "feature-max-indirect-segments" in present in the backend
+ * xenbus info. The "feature-max-indirect-segments" node contains the maximum
+ * number of segments allowed by the backend per request. If the node is
+ * present, the frontend might use blkif_request_indirect structs in order to
+ * issue requests with more than BLKIF_MAX_SEGMENTS_PER_REQUEST (11). The
+ * maximum number of indirect segments is fixed by the backend, but the
+ * frontend can issue requests with any number of indirect segments as long as
+ * it's less than the number provided by the backend. The indirect_grefs field
+ * in blkif_request_indirect should be filled by the frontend with the
+ * grant references of the pages that are holding the indirect segments.
+ * This pages are filled with an array of blkif_request_segment_aligned
+ * that hold the information about the segments. The number of indirect
+ * pages to use is determined by the maximum number of segments
+ * a indirect request contains. Every indirect page can contain a maximum
+ * of 512 segments (PAGE_SIZE/sizeof(blkif_request_segment_aligned)),
+ * so to calculate the number of indirect pages to use we have to do
+ * ceil(indirect_segments/512).
+ *
+ * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not*
+ * create the "feature-max-indirect-segments" node!
+ */
+#define BLKIF_OP_INDIRECT          6
+
 /*
  * Maximum scatter/gather segments per request.
  * This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE.
@@ -109,6 +133,16 @@ typedef uint64_t blkif_sector_t;
  */
 #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
 
+#define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8
+
+struct blkif_request_segment_aligned {
+	grant_ref_t gref;        /* reference to I/O buffer frame        */
+	/* @first_sect: first sector in frame to transfer (inclusive).   */
+	/* @last_sect: last sector in frame to transfer (inclusive).     */
+	uint8_t     first_sect, last_sect;
+	uint16_t    _pad; /* padding to make it 8 bytes, so it's cache-aligned */
+} __attribute__((__packed__));
+
 struct blkif_request_rw {
 	uint8_t        nr_segments;  /* number of segments                   */
 	blkif_vdev_t   handle;       /* only for read/write requests         */
@@ -147,12 +181,31 @@ struct blkif_request_other {
 	uint64_t     id;           /* private guest value, echoed in resp  */
 } __attribute__((__packed__));
 
+struct blkif_request_indirect {
+	uint8_t        indirect_op;
+	uint16_t       nr_segments;
+#ifdef CONFIG_X86_64
+	uint32_t       _pad1;        /* offsetof(blkif_...,u.indirect.id) == 8 */
+#endif
+	uint64_t       id;
+	blkif_sector_t sector_number;
+	blkif_vdev_t   handle;
+	uint16_t       _pad2;
+	grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
+#ifdef CONFIG_X86_64
+	uint32_t      _pad3;         /* make it 64 byte aligned */
+#else
+	uint64_t      _pad3;         /* make it 64 byte aligned */
+#endif
+} __attribute__((__packed__));
+
 struct blkif_request {
 	uint8_t        operation;    /* BLKIF_OP_???                         */
 	union {
 		struct blkif_request_rw rw;
 		struct blkif_request_discard discard;
 		struct blkif_request_other other;
+		struct blkif_request_indirect indirect;
 	} u;
 } __attribute__((__packed__));
 
-- 
cgit v1.2.3-59-g8ed1b


From 8d9256906a97c24e97e016482b9be06ea2532b05 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Mon, 17 Jun 2013 15:16:33 -0400
Subject: xen/io/ring.h: new macro to detect whether there are too many
 requests on the ring

Backends may need to protect themselves against an insane number of
produced requests stored by a frontend, in case they iterate over
requests until reaching the req_prod value. There can't be more
requests on the ring than the difference between produced requests
and produced (but possibly not yet published) responses.

This is a more strict alternative to a patch previously posted by
Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 include/xen/interface/io/ring.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h
index 75271b9a8f61..7d28aff605c7 100644
--- a/include/xen/interface/io/ring.h
+++ b/include/xen/interface/io/ring.h
@@ -188,6 +188,11 @@ struct __name##_back_ring {						\
 #define RING_REQUEST_CONS_OVERFLOW(_r, _cons)				\
     (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r))
 
+/* Ill-behaved frontend determination: Can there be this many requests? */
+#define RING_REQUEST_PROD_OVERFLOW(_r, _prod)               \
+    (((_prod) - (_r)->rsp_prod_pvt) > RING_SIZE(_r))
+
+
 #define RING_PUSH_REQUESTS(_r) do {					\
     wmb(); /* back sees requests /before/ updated producer index */	\
     (_r)->sring->req_prod = (_r)->req_prod_pvt;				\
-- 
cgit v1.2.3-59-g8ed1b


From c37511b863f36c1cc6e18440717fd4cc0e881b8a Mon Sep 17 00:00:00 2001
From: Kent Overstreet <koverstreet@google.com>
Date: Fri, 26 Apr 2013 15:39:55 -0700
Subject: bcache: Fix/revamp tracepoints

The tracepoints were reworked to be more sensible, and fixed a null
pointer deref in one of the tracepoints.

Converted some of the pr_debug()s to tracepoints - this is partly a
performance optimization; it used to be that with DEBUG or
CONFIG_DYNAMIC_DEBUG pr_debug() was an empty macro; but at some point it
was changed to an empty inline function.

Some of the pr_debug() statements had rather expensive function calls as
part of the arguments, so this code was getting run unnecessarily even
on non debug kernels - in some fast paths, too.

Signed-off-by: Kent Overstreet <koverstreet@google.com>
---
 drivers/md/bcache/alloc.c     |  10 +-
 drivers/md/bcache/bcache.h    |  20 ---
 drivers/md/bcache/bset.h      |   4 +
 drivers/md/bcache/btree.c     |  47 +++---
 drivers/md/bcache/io.c        |   2 +
 drivers/md/bcache/journal.c   |  14 +-
 drivers/md/bcache/movinggc.c  |  12 +-
 drivers/md/bcache/request.c   |  65 ++++----
 drivers/md/bcache/request.h   |   2 +-
 drivers/md/bcache/super.c     |   2 +-
 drivers/md/bcache/sysfs.c     |   1 +
 drivers/md/bcache/trace.c     |  45 ++++--
 drivers/md/bcache/util.h      |   2 -
 drivers/md/bcache/writeback.c |  10 +-
 include/trace/events/bcache.h | 352 +++++++++++++++++++++++++++++-------------
 15 files changed, 367 insertions(+), 221 deletions(-)

(limited to 'include')

diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 38428f46ea74..b54b73b9b2b7 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -65,6 +65,7 @@
 
 #include <linux/kthread.h>
 #include <linux/random.h>
+#include <trace/events/bcache.h>
 
 #define MAX_IN_FLIGHT_DISCARDS		8U
 
@@ -351,10 +352,7 @@ static void invalidate_buckets(struct cache *ca)
 		break;
 	}
 
-	pr_debug("free %zu/%zu free_inc %zu/%zu unused %zu/%zu",
-		 fifo_used(&ca->free), ca->free.size,
-		 fifo_used(&ca->free_inc), ca->free_inc.size,
-		 fifo_used(&ca->unused), ca->unused.size);
+	trace_bcache_alloc_invalidate(ca);
 }
 
 #define allocator_wait(ca, cond)					\
@@ -473,9 +471,7 @@ again:
 		return r;
 	}
 
-	pr_debug("alloc failure: blocked %i free %zu free_inc %zu unused %zu",
-		 atomic_read(&ca->set->prio_blocked), fifo_used(&ca->free),
-		 fifo_used(&ca->free_inc), fifo_used(&ca->unused));
+	trace_bcache_alloc_fail(ca);
 
 	if (cl) {
 		closure_wait(&ca->set->bucket_wait, cl);
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index ad4957b52f10..59c15e09e4dd 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -178,7 +178,6 @@
 #define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__
 
 #include <linux/bio.h>
-#include <linux/blktrace_api.h>
 #include <linux/kobject.h>
 #include <linux/list.h>
 #include <linux/mutex.h>
@@ -901,8 +900,6 @@ static inline unsigned local_clock_us(void)
 	return local_clock() >> 10;
 }
 
-#define MAX_BSETS		4U
-
 #define BTREE_PRIO		USHRT_MAX
 #define INITIAL_PRIO		32768
 
@@ -1107,23 +1104,6 @@ static inline void __bkey_put(struct cache_set *c, struct bkey *k)
 		atomic_dec_bug(&PTR_BUCKET(c, k, i)->pin);
 }
 
-/* Blktrace macros */
-
-#define blktrace_msg(c, fmt, ...)					\
-do {									\
-	struct request_queue *q = bdev_get_queue(c->bdev);		\
-	if (q)								\
-		blk_add_trace_msg(q, fmt, ##__VA_ARGS__);		\
-} while (0)
-
-#define blktrace_msg_all(s, fmt, ...)					\
-do {									\
-	struct cache *_c;						\
-	unsigned i;							\
-	for_each_cache(_c, (s), i)					\
-		blktrace_msg(_c, fmt, ##__VA_ARGS__);			\
-} while (0)
-
 static inline void cached_dev_put(struct cached_dev *dc)
 {
 	if (atomic_dec_and_test(&dc->count))
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index 57a9cff41546..ae115a253d73 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -1,6 +1,8 @@
 #ifndef _BCACHE_BSET_H
 #define _BCACHE_BSET_H
 
+#include <linux/slab.h>
+
 /*
  * BKEYS:
  *
@@ -142,6 +144,8 @@
 
 /* Btree key comparison/iteration */
 
+#define MAX_BSETS		4U
+
 struct btree_iter {
 	size_t size, used;
 	struct btree_iter_set {
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index aaec186f7ba6..218d486259a3 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -223,8 +223,9 @@ void bch_btree_node_read(struct btree *b)
 	struct closure cl;
 	struct bio *bio;
 
+	trace_bcache_btree_read(b);
+
 	closure_init_stack(&cl);
-	pr_debug("%s", pbtree(b));
 
 	bio = bch_bbio_alloc(b->c);
 	bio->bi_rw	= REQ_META|READ_SYNC;
@@ -234,7 +235,6 @@ void bch_btree_node_read(struct btree *b)
 
 	bch_bio_map(bio, b->sets[0].data);
 
-	trace_bcache_btree_read(bio);
 	bch_submit_bbio(bio, b->c, &b->key, 0);
 	closure_sync(&cl);
 
@@ -343,7 +343,6 @@ static void do_btree_node_write(struct btree *b)
 			memcpy(page_address(bv->bv_page),
 			       base + j * PAGE_SIZE, PAGE_SIZE);
 
-		trace_bcache_btree_write(b->bio);
 		bch_submit_bbio(b->bio, b->c, &k.key, 0);
 
 		continue_at(cl, btree_node_write_done, NULL);
@@ -351,7 +350,6 @@ static void do_btree_node_write(struct btree *b)
 		b->bio->bi_vcnt = 0;
 		bch_bio_map(b->bio, i);
 
-		trace_bcache_btree_write(b->bio);
 		bch_submit_bbio(b->bio, b->c, &k.key, 0);
 
 		closure_sync(cl);
@@ -363,10 +361,13 @@ void bch_btree_node_write(struct btree *b, struct closure *parent)
 {
 	struct bset *i = b->sets[b->nsets].data;
 
+	trace_bcache_btree_write(b);
+
 	BUG_ON(current->bio_list);
 	BUG_ON(b->written >= btree_blocks(b));
 	BUG_ON(b->written && !i->keys);
 	BUG_ON(b->sets->data->seq != i->seq);
+	bch_check_key_order(b, i);
 
 	cancel_delayed_work(&b->work);
 
@@ -376,12 +377,8 @@ void bch_btree_node_write(struct btree *b, struct closure *parent)
 	clear_bit(BTREE_NODE_dirty,	 &b->flags);
 	change_bit(BTREE_NODE_write_idx, &b->flags);
 
-	bch_check_key_order(b, i);
-
 	do_btree_node_write(b);
 
-	pr_debug("%s block %i keys %i", pbtree(b), b->written, i->keys);
-
 	b->written += set_blocks(i, b->c);
 	atomic_long_add(set_blocks(i, b->c) * b->c->sb.block_size,
 			&PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written);
@@ -752,6 +749,8 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct bkey *k,
 	int ret = -ENOMEM;
 	struct btree *i;
 
+	trace_bcache_btree_cache_cannibalize(c);
+
 	if (!cl)
 		return ERR_PTR(-ENOMEM);
 
@@ -770,7 +769,6 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct bkey *k,
 		return ERR_PTR(-EAGAIN);
 	}
 
-	/* XXX: tracepoint */
 	c->try_harder = cl;
 	c->try_harder_start = local_clock();
 retry:
@@ -956,13 +954,14 @@ static void btree_node_free(struct btree *b, struct btree_op *op)
 {
 	unsigned i;
 
+	trace_bcache_btree_node_free(b);
+
 	/*
 	 * The BUG_ON() in btree_node_get() implies that we must have a write
 	 * lock on parent to free or even invalidate a node
 	 */
 	BUG_ON(op->lock <= b->level);
 	BUG_ON(b == b->c->root);
-	pr_debug("bucket %s", pbtree(b));
 
 	if (btree_node_dirty(b))
 		btree_complete_write(b, btree_current_write(b));
@@ -1012,12 +1011,16 @@ retry:
 	bch_bset_init_next(b);
 
 	mutex_unlock(&c->bucket_lock);
+
+	trace_bcache_btree_node_alloc(b);
 	return b;
 err_free:
 	bch_bucket_free(c, &k.key);
 	__bkey_put(c, &k.key);
 err:
 	mutex_unlock(&c->bucket_lock);
+
+	trace_bcache_btree_node_alloc_fail(b);
 	return b;
 }
 
@@ -1254,7 +1257,7 @@ static void btree_gc_coalesce(struct btree *b, struct btree_op *op,
 	btree_node_free(r->b, op);
 	up_write(&r->b->lock);
 
-	pr_debug("coalesced %u nodes", nodes);
+	trace_bcache_btree_gc_coalesce(nodes);
 
 	gc->nodes--;
 	nodes--;
@@ -1479,8 +1482,7 @@ static void bch_btree_gc(struct closure *cl)
 	struct btree_op op;
 	uint64_t start_time = local_clock();
 
-	trace_bcache_gc_start(c->sb.set_uuid);
-	blktrace_msg_all(c, "Starting gc");
+	trace_bcache_gc_start(c);
 
 	memset(&stats, 0, sizeof(struct gc_stat));
 	closure_init_stack(&writes);
@@ -1496,9 +1498,7 @@ static void bch_btree_gc(struct closure *cl)
 	closure_sync(&writes);
 
 	if (ret) {
-		blktrace_msg_all(c, "Stopped gc");
 		pr_warn("gc failed!");
-
 		continue_at(cl, bch_btree_gc, bch_gc_wq);
 	}
 
@@ -1519,8 +1519,7 @@ static void bch_btree_gc(struct closure *cl)
 	stats.in_use	= (c->nbuckets - available) * 100 / c->nbuckets;
 	memcpy(&c->gc_stats, &stats, sizeof(struct gc_stat));
 
-	blktrace_msg_all(c, "Finished gc");
-	trace_bcache_gc_end(c->sb.set_uuid);
+	trace_bcache_gc_end(c);
 
 	continue_at(cl, bch_moving_gc, bch_gc_wq);
 }
@@ -1901,12 +1900,11 @@ static int btree_split(struct btree *b, struct btree_op *op)
 
 	split = set_blocks(n1->sets[0].data, n1->c) > (btree_blocks(b) * 4) / 5;
 
-	pr_debug("%ssplitting at %s keys %i", split ? "" : "not ",
-		 pbtree(b), n1->sets[0].data->keys);
-
 	if (split) {
 		unsigned keys = 0;
 
+		trace_bcache_btree_node_split(b, n1->sets[0].data->keys);
+
 		n2 = bch_btree_node_alloc(b->c, b->level, &op->cl);
 		if (IS_ERR(n2))
 			goto err_free1;
@@ -1941,8 +1939,11 @@ static int btree_split(struct btree *b, struct btree_op *op)
 		bch_keylist_add(&op->keys, &n2->key);
 		bch_btree_node_write(n2, &op->cl);
 		rw_unlock(true, n2);
-	} else
+	} else {
+		trace_bcache_btree_node_compact(b, n1->sets[0].data->keys);
+
 		bch_btree_insert_keys(n1, op);
+	}
 
 	bch_keylist_add(&op->keys, &n1->key);
 	bch_btree_node_write(n1, &op->cl);
@@ -2117,6 +2118,8 @@ void bch_btree_set_root(struct btree *b)
 {
 	unsigned i;
 
+	trace_bcache_btree_set_root(b);
+
 	BUG_ON(!b->written);
 
 	for (i = 0; i < KEY_PTRS(&b->key); i++)
@@ -2130,7 +2133,6 @@ void bch_btree_set_root(struct btree *b)
 	__bkey_put(b->c, &b->key);
 
 	bch_journal_meta(b->c, NULL);
-	pr_debug("%s for %pf", pbtree(b), __builtin_return_address(0));
 }
 
 /* Cache lookup */
@@ -2216,7 +2218,6 @@ static int submit_partial_cache_hit(struct btree *b, struct btree_op *op,
 		n->bi_end_io	= bch_cache_read_endio;
 		n->bi_private	= &s->cl;
 
-		trace_bcache_cache_hit(n);
 		__bch_submit_bbio(n, b->c);
 	}
 
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index d285cd49104c..0f6d69658b61 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -9,6 +9,8 @@
 #include "bset.h"
 #include "debug.h"
 
+#include <linux/blkdev.h>
+
 static void bch_bi_idx_hack_endio(struct bio *bio, int error)
 {
 	struct bio *p = bio->bi_private;
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 970d819d4350..5ca22149b749 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -9,6 +9,8 @@
 #include "debug.h"
 #include "request.h"
 
+#include <trace/events/bcache.h>
+
 /*
  * Journal replay/recovery:
  *
@@ -300,7 +302,8 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list,
 		for (k = i->j.start;
 		     k < end(&i->j);
 		     k = bkey_next(k)) {
-			pr_debug("%s", pkey(k));
+			trace_bcache_journal_replay_key(k);
+
 			bkey_copy(op->keys.top, k);
 			bch_keylist_push(&op->keys);
 
@@ -712,7 +715,8 @@ void bch_journal(struct closure *cl)
 	spin_lock(&c->journal.lock);
 
 	if (journal_full(&c->journal)) {
-		/* XXX: tracepoint */
+		trace_bcache_journal_full(c);
+
 		closure_wait(&c->journal.wait, cl);
 
 		journal_reclaim(c);
@@ -728,13 +732,15 @@ void bch_journal(struct closure *cl)
 
 	if (b * c->sb.block_size > PAGE_SECTORS << JSET_BITS ||
 	    b > c->journal.blocks_free) {
-		/* XXX: If we were inserting so many keys that they won't fit in
+		trace_bcache_journal_entry_full(c);
+
+		/*
+		 * XXX: If we were inserting so many keys that they won't fit in
 		 * an _empty_ journal write, we'll deadlock. For now, handle
 		 * this in bch_keylist_realloc() - but something to think about.
 		 */
 		BUG_ON(!w->data->keys);
 
-		/* XXX: tracepoint */
 		BUG_ON(!closure_wait(&w->wait, cl));
 
 		closure_flush(&c->journal.io);
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index 8589512c972e..04f6b97ffda6 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -9,6 +9,8 @@
 #include "debug.h"
 #include "request.h"
 
+#include <trace/events/bcache.h>
+
 struct moving_io {
 	struct keybuf_key	*w;
 	struct search		s;
@@ -49,9 +51,8 @@ static void write_moving_finish(struct closure *cl)
 	while (bv-- != bio->bi_io_vec)
 		__free_page(bv->bv_page);
 
-	pr_debug("%s %s", io->s.op.insert_collision
-		 ? "collision moving" : "moved",
-		 pkey(&io->w->key));
+	if (io->s.op.insert_collision)
+		trace_bcache_gc_copy_collision(&io->w->key);
 
 	bch_keybuf_del(&io->s.op.c->moving_gc_keys, io->w);
 
@@ -94,8 +95,6 @@ static void write_moving(struct closure *cl)
 	struct moving_io *io = container_of(s, struct moving_io, s);
 
 	if (!s->error) {
-		trace_bcache_write_moving(&io->bio.bio);
-
 		moving_init(io);
 
 		io->bio.bio.bi_sector	= KEY_START(&io->w->key);
@@ -122,7 +121,6 @@ static void read_moving_submit(struct closure *cl)
 	struct moving_io *io = container_of(s, struct moving_io, s);
 	struct bio *bio = &io->bio.bio;
 
-	trace_bcache_read_moving(bio);
 	bch_submit_bbio(bio, s->op.c, &io->w->key, 0);
 
 	continue_at(cl, write_moving, bch_gc_wq);
@@ -162,7 +160,7 @@ static void read_moving(struct closure *cl)
 		if (bch_bio_alloc_pages(bio, GFP_KERNEL))
 			goto err;
 
-		pr_debug("%s", pkey(&w->key));
+		trace_bcache_gc_copy(&w->key);
 
 		closure_call(&io->s.cl, read_moving_submit, NULL, &c->gc.cl);
 
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index e5ff12e52d5b..695469958c1e 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -530,10 +530,9 @@ static void bch_insert_data_loop(struct closure *cl)
 		if (KEY_CSUM(k))
 			bio_csum(n, k);
 
-		pr_debug("%s", pkey(k));
+		trace_bcache_cache_insert(k);
 		bch_keylist_push(&op->keys);
 
-		trace_bcache_cache_insert(n, n->bi_sector, n->bi_bdev);
 		n->bi_rw |= REQ_WRITE;
 		bch_submit_bbio(n, op->c, k, 0);
 	} while (n != bio);
@@ -784,11 +783,8 @@ static void request_read_error(struct closure *cl)
 	int i;
 
 	if (s->recoverable) {
-		/* The cache read failed, but we can retry from the backing
-		 * device.
-		 */
-		pr_debug("recovering at sector %llu",
-			 (uint64_t) s->orig_bio->bi_sector);
+		/* Retry from the backing device: */
+		trace_bcache_read_retry(s->orig_bio);
 
 		s->error = 0;
 		bv = s->bio.bio.bi_io_vec;
@@ -806,7 +802,6 @@ static void request_read_error(struct closure *cl)
 
 		/* XXX: invalidate cache */
 
-		trace_bcache_read_retry(&s->bio.bio);
 		closure_bio_submit(&s->bio.bio, &s->cl, s->d);
 	}
 
@@ -899,6 +894,7 @@ static void request_read_done_bh(struct closure *cl)
 	struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
 
 	bch_mark_cache_accounting(s, !s->cache_miss, s->op.skip);
+	trace_bcache_read(s->orig_bio, !s->cache_miss, s->op.skip);
 
 	if (s->error)
 		continue_at_nobarrier(cl, request_read_error, bcache_wq);
@@ -969,7 +965,6 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
 	s->cache_miss = miss;
 	bio_get(s->op.cache_bio);
 
-	trace_bcache_cache_miss(s->orig_bio);
 	closure_bio_submit(s->op.cache_bio, &s->cl, s->d);
 
 	return ret;
@@ -1040,15 +1035,15 @@ static void request_write(struct cached_dev *dc, struct search *s)
 	if (should_writeback(dc, s->orig_bio))
 		s->writeback = true;
 
+	trace_bcache_write(s->orig_bio, s->writeback, s->op.skip);
+
 	if (!s->writeback) {
 		s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO,
 						   dc->disk.bio_split);
 
-		trace_bcache_writethrough(s->orig_bio);
 		closure_bio_submit(bio, cl, s->d);
 	} else {
 		s->op.cache_bio = bio;
-		trace_bcache_writeback(s->orig_bio);
 		bch_writeback_add(dc, bio_sectors(bio));
 	}
 out:
@@ -1058,7 +1053,6 @@ skip:
 	s->op.skip = true;
 	s->op.cache_bio = s->orig_bio;
 	bio_get(s->op.cache_bio);
-	trace_bcache_write_skip(s->orig_bio);
 
 	if ((bio->bi_rw & REQ_DISCARD) &&
 	    !blk_queue_discard(bdev_get_queue(dc->bdev)))
@@ -1088,9 +1082,10 @@ static void request_nodata(struct cached_dev *dc, struct search *s)
 
 /* Cached devices - read & write stuff */
 
-int bch_get_congested(struct cache_set *c)
+unsigned bch_get_congested(struct cache_set *c)
 {
 	int i;
+	long rand;
 
 	if (!c->congested_read_threshold_us &&
 	    !c->congested_write_threshold_us)
@@ -1106,7 +1101,13 @@ int bch_get_congested(struct cache_set *c)
 
 	i += CONGESTED_MAX;
 
-	return i <= 0 ? 1 : fract_exp_two(i, 6);
+	if (i > 0)
+		i = fract_exp_two(i, 6);
+
+	rand = get_random_int();
+	i -= bitmap_weight(&rand, BITS_PER_LONG);
+
+	return i > 0 ? i : 1;
 }
 
 static void add_sequential(struct task_struct *t)
@@ -1126,10 +1127,8 @@ static void check_should_skip(struct cached_dev *dc, struct search *s)
 {
 	struct cache_set *c = s->op.c;
 	struct bio *bio = &s->bio.bio;
-
-	long rand;
-	int cutoff = bch_get_congested(c);
 	unsigned mode = cache_mode(dc, bio);
+	unsigned sectors, congested = bch_get_congested(c);
 
 	if (atomic_read(&dc->disk.detaching) ||
 	    c->gc_stats.in_use > CUTOFF_CACHE_ADD ||
@@ -1147,17 +1146,14 @@ static void check_should_skip(struct cached_dev *dc, struct search *s)
 		goto skip;
 	}
 
-	if (!cutoff) {
-		cutoff = dc->sequential_cutoff >> 9;
+	if (!congested && !dc->sequential_cutoff)
+		goto rescale;
 
-		if (!cutoff)
-			goto rescale;
-
-		if (mode == CACHE_MODE_WRITEBACK &&
-		    (bio->bi_rw & REQ_WRITE) &&
-		    (bio->bi_rw & REQ_SYNC))
-			goto rescale;
-	}
+	if (!congested &&
+	    mode == CACHE_MODE_WRITEBACK &&
+	    (bio->bi_rw & REQ_WRITE) &&
+	    (bio->bi_rw & REQ_SYNC))
+		goto rescale;
 
 	if (dc->sequential_merge) {
 		struct io *i;
@@ -1192,12 +1188,19 @@ found:
 		add_sequential(s->task);
 	}
 
-	rand = get_random_int();
-	cutoff -= bitmap_weight(&rand, BITS_PER_LONG);
+	sectors = max(s->task->sequential_io,
+		      s->task->sequential_io_avg) >> 9;
 
-	if (cutoff <= (int) (max(s->task->sequential_io,
-				 s->task->sequential_io_avg) >> 9))
+	if (dc->sequential_cutoff &&
+	    sectors >= dc->sequential_cutoff >> 9) {
+		trace_bcache_bypass_sequential(s->orig_bio);
 		goto skip;
+	}
+
+	if (congested && sectors >= congested) {
+		trace_bcache_bypass_congested(s->orig_bio);
+		goto skip;
+	}
 
 rescale:
 	bch_rescale_priorities(c, bio_sectors(bio));
diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h
index 254d9ab5707c..57dc4784f4f4 100644
--- a/drivers/md/bcache/request.h
+++ b/drivers/md/bcache/request.h
@@ -30,7 +30,7 @@ struct search {
 };
 
 void bch_cache_read_endio(struct bio *, int);
-int bch_get_congested(struct cache_set *);
+unsigned bch_get_congested(struct cache_set *);
 void bch_insert_data(struct closure *cl);
 void bch_btree_insert_async(struct closure *);
 void bch_cache_read_endio(struct bio *, int);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index e53f89988b08..47bc13745068 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -11,6 +11,7 @@
 #include "debug.h"
 #include "request.h"
 
+#include <linux/blkdev.h>
 #include <linux/buffer_head.h>
 #include <linux/debugfs.h>
 #include <linux/genhd.h>
@@ -543,7 +544,6 @@ void bch_prio_write(struct cache *ca)
 
 	pr_debug("free %zu, free_inc %zu, unused %zu", fifo_used(&ca->free),
 		 fifo_used(&ca->free_inc), fifo_used(&ca->unused));
-	blktrace_msg(ca, "Starting priorities: " buckets_free(ca));
 
 	for (i = prio_buckets(ca) - 1; i >= 0; --i) {
 		long bucket;
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 29228b8a6ffe..f5c2d8695230 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -10,6 +10,7 @@
 #include "btree.h"
 #include "request.h"
 
+#include <linux/blkdev.h>
 #include <linux/sort.h>
 
 static const char * const cache_replacement_policies[] = {
diff --git a/drivers/md/bcache/trace.c b/drivers/md/bcache/trace.c
index 983f9bb411bc..7f4f38aa16ae 100644
--- a/drivers/md/bcache/trace.c
+++ b/drivers/md/bcache/trace.c
@@ -2,6 +2,7 @@
 #include "btree.h"
 #include "request.h"
 
+#include <linux/blktrace_api.h>
 #include <linux/module.h>
 
 #define CREATE_TRACE_POINTS
@@ -9,18 +10,42 @@
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_request_start);
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_request_end);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_passthrough);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_hit);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_miss);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_bypass_sequential);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_bypass_congested);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_write);
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read_retry);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writethrough);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_write_skip);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_insert);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_replay_key);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_write);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_full);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_entry_full);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_cache_cannibalize);
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_read);
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_write);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_write_dirty);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read_dirty);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_write);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_insert);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_alloc);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_alloc_fail);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_free);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_gc_coalesce);
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_start);
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_end);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_copy);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_copy_collision);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_split);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_compact);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_set_root);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_alloc_invalidate);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_alloc_fail);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback_collision);
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index 577393e38c3a..e02780545f12 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -15,8 +15,6 @@
 
 struct closure;
 
-#include <trace/events/bcache.h>
-
 #ifdef CONFIG_BCACHE_EDEBUG
 
 #define atomic_dec_bug(v)	BUG_ON(atomic_dec_return(v) < 0)
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 2714ed3991d1..82f6d4577be2 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -10,6 +10,8 @@
 #include "btree.h"
 #include "debug.h"
 
+#include <trace/events/bcache.h>
+
 static struct workqueue_struct *dirty_wq;
 
 static void read_dirty(struct closure *);
@@ -236,10 +238,12 @@ static void write_dirty_finish(struct closure *cl)
 		for (i = 0; i < KEY_PTRS(&w->key); i++)
 			atomic_inc(&PTR_BUCKET(dc->disk.c, &w->key, i)->pin);
 
-		pr_debug("clearing %s", pkey(&w->key));
 		bch_btree_insert(&op, dc->disk.c);
 		closure_sync(&op.cl);
 
+		if (op.insert_collision)
+			trace_bcache_writeback_collision(&w->key);
+
 		atomic_long_inc(op.insert_collision
 				? &dc->disk.c->writeback_keys_failed
 				: &dc->disk.c->writeback_keys_done);
@@ -275,7 +279,6 @@ static void write_dirty(struct closure *cl)
 	io->bio.bi_bdev		= io->dc->bdev;
 	io->bio.bi_end_io	= dirty_endio;
 
-	trace_bcache_write_dirty(&io->bio);
 	closure_bio_submit(&io->bio, cl, &io->dc->disk);
 
 	continue_at(cl, write_dirty_finish, dirty_wq);
@@ -296,7 +299,6 @@ static void read_dirty_submit(struct closure *cl)
 {
 	struct dirty_io *io = container_of(cl, struct dirty_io, cl);
 
-	trace_bcache_read_dirty(&io->bio);
 	closure_bio_submit(&io->bio, cl, &io->dc->disk);
 
 	continue_at(cl, write_dirty, dirty_wq);
@@ -352,7 +354,7 @@ static void read_dirty(struct closure *cl)
 		if (bch_bio_alloc_pages(&io->bio, GFP_KERNEL))
 			goto err_free;
 
-		pr_debug("%s", pkey(&w->key));
+		trace_bcache_writeback(&w->key);
 
 		closure_call(&io->cl, read_dirty_submit, NULL, &dc->disk.cl);
 
diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h
index 3cc5a0b278c3..c9952b36fcea 100644
--- a/include/trace/events/bcache.h
+++ b/include/trace/events/bcache.h
@@ -9,9 +9,7 @@
 struct search;
 
 DECLARE_EVENT_CLASS(bcache_request,
-
 	TP_PROTO(struct search *s, struct bio *bio),
-
 	TP_ARGS(s, bio),
 
 	TP_STRUCT__entry(
@@ -22,7 +20,6 @@ DECLARE_EVENT_CLASS(bcache_request,
 		__field(dev_t,		orig_sector		)
 		__field(unsigned int,	nr_sector		)
 		__array(char,		rwbs,	6		)
-		__array(char,		comm,	TASK_COMM_LEN	)
 	),
 
 	TP_fast_assign(
@@ -33,36 +30,66 @@ DECLARE_EVENT_CLASS(bcache_request,
 		__entry->orig_sector	= bio->bi_sector - 16;
 		__entry->nr_sector	= bio->bi_size >> 9;
 		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
-		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
 	),
 
-	TP_printk("%d,%d %s %llu + %u [%s] (from %d,%d @ %llu)",
+	TP_printk("%d,%d %s %llu + %u (from %d,%d @ %llu)",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->rwbs,
-		  (unsigned long long)__entry->sector,
-		  __entry->nr_sector, __entry->comm,
-		  __entry->orig_major, __entry->orig_minor,
+		  __entry->rwbs, (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->orig_major, __entry->orig_minor,
 		  (unsigned long long)__entry->orig_sector)
 );
 
-DEFINE_EVENT(bcache_request, bcache_request_start,
+DECLARE_EVENT_CLASS(bkey,
+	TP_PROTO(struct bkey *k),
+	TP_ARGS(k),
 
-	TP_PROTO(struct search *s, struct bio *bio),
+	TP_STRUCT__entry(
+		__field(u32,	size				)
+		__field(u32,	inode				)
+		__field(u64,	offset				)
+		__field(bool,	dirty				)
+	),
 
-	TP_ARGS(s, bio)
+	TP_fast_assign(
+		__entry->inode	= KEY_INODE(k);
+		__entry->offset	= KEY_OFFSET(k);
+		__entry->size	= KEY_SIZE(k);
+		__entry->dirty	= KEY_DIRTY(k);
+	),
+
+	TP_printk("%u:%llu len %u dirty %u", __entry->inode,
+		  __entry->offset, __entry->size, __entry->dirty)
 );
 
-DEFINE_EVENT(bcache_request, bcache_request_end,
+DECLARE_EVENT_CLASS(btree_node,
+	TP_PROTO(struct btree *b),
+	TP_ARGS(b),
+
+	TP_STRUCT__entry(
+		__field(size_t,		bucket			)
+	),
+
+	TP_fast_assign(
+		__entry->bucket	= PTR_BUCKET_NR(b->c, &b->key, 0);
+	),
 
+	TP_printk("bucket %zu", __entry->bucket)
+);
+
+/* request.c */
+
+DEFINE_EVENT(bcache_request, bcache_request_start,
 	TP_PROTO(struct search *s, struct bio *bio),
+	TP_ARGS(s, bio)
+);
 
+DEFINE_EVENT(bcache_request, bcache_request_end,
+	TP_PROTO(struct search *s, struct bio *bio),
 	TP_ARGS(s, bio)
 );
 
 DECLARE_EVENT_CLASS(bcache_bio,
-
 	TP_PROTO(struct bio *bio),
-
 	TP_ARGS(bio),
 
 	TP_STRUCT__entry(
@@ -70,7 +97,6 @@ DECLARE_EVENT_CLASS(bcache_bio,
 		__field(sector_t,	sector			)
 		__field(unsigned int,	nr_sector		)
 		__array(char,		rwbs,	6		)
-		__array(char,		comm,	TASK_COMM_LEN	)
 	),
 
 	TP_fast_assign(
@@ -78,191 +104,295 @@ DECLARE_EVENT_CLASS(bcache_bio,
 		__entry->sector		= bio->bi_sector;
 		__entry->nr_sector	= bio->bi_size >> 9;
 		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
-		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
 	),
 
-	TP_printk("%d,%d  %s %llu + %u [%s]",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->rwbs,
-		  (unsigned long long)__entry->sector,
-		  __entry->nr_sector, __entry->comm)
+	TP_printk("%d,%d  %s %llu + %u",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  (unsigned long long)__entry->sector, __entry->nr_sector)
 );
 
-
-DEFINE_EVENT(bcache_bio, bcache_passthrough,
-
+DEFINE_EVENT(bcache_bio, bcache_bypass_sequential,
 	TP_PROTO(struct bio *bio),
-
 	TP_ARGS(bio)
 );
 
-DEFINE_EVENT(bcache_bio, bcache_cache_hit,
-
+DEFINE_EVENT(bcache_bio, bcache_bypass_congested,
 	TP_PROTO(struct bio *bio),
-
 	TP_ARGS(bio)
 );
 
-DEFINE_EVENT(bcache_bio, bcache_cache_miss,
+TRACE_EVENT(bcache_read,
+	TP_PROTO(struct bio *bio, bool hit, bool bypass),
+	TP_ARGS(bio, hit, bypass),
 
-	TP_PROTO(struct bio *bio),
+	TP_STRUCT__entry(
+		__field(dev_t,		dev			)
+		__field(sector_t,	sector			)
+		__field(unsigned int,	nr_sector		)
+		__array(char,		rwbs,	6		)
+		__field(bool,		cache_hit		)
+		__field(bool,		bypass			)
+	),
 
-	TP_ARGS(bio)
+	TP_fast_assign(
+		__entry->dev		= bio->bi_bdev->bd_dev;
+		__entry->sector		= bio->bi_sector;
+		__entry->nr_sector	= bio->bi_size >> 9;
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+		__entry->cache_hit = hit;
+		__entry->bypass = bypass;
+	),
+
+	TP_printk("%d,%d  %s %llu + %u hit %u bypass %u",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->rwbs, (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->cache_hit, __entry->bypass)
 );
 
-DEFINE_EVENT(bcache_bio, bcache_read_retry,
+TRACE_EVENT(bcache_write,
+	TP_PROTO(struct bio *bio, bool writeback, bool bypass),
+	TP_ARGS(bio, writeback, bypass),
 
-	TP_PROTO(struct bio *bio),
+	TP_STRUCT__entry(
+		__field(dev_t,		dev			)
+		__field(sector_t,	sector			)
+		__field(unsigned int,	nr_sector		)
+		__array(char,		rwbs,	6		)
+		__field(bool,		writeback		)
+		__field(bool,		bypass			)
+	),
 
-	TP_ARGS(bio)
-);
+	TP_fast_assign(
+		__entry->dev		= bio->bi_bdev->bd_dev;
+		__entry->sector		= bio->bi_sector;
+		__entry->nr_sector	= bio->bi_size >> 9;
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+		__entry->writeback = writeback;
+		__entry->bypass = bypass;
+	),
 
-DEFINE_EVENT(bcache_bio, bcache_writethrough,
+	TP_printk("%d,%d  %s %llu + %u hit %u bypass %u",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->rwbs, (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->writeback, __entry->bypass)
+);
 
+DEFINE_EVENT(bcache_bio, bcache_read_retry,
 	TP_PROTO(struct bio *bio),
-
 	TP_ARGS(bio)
 );
 
-DEFINE_EVENT(bcache_bio, bcache_writeback,
+DEFINE_EVENT(bkey, bcache_cache_insert,
+	TP_PROTO(struct bkey *k),
+	TP_ARGS(k)
+);
 
-	TP_PROTO(struct bio *bio),
+/* Journal */
 
-	TP_ARGS(bio)
-);
+DECLARE_EVENT_CLASS(cache_set,
+	TP_PROTO(struct cache_set *c),
+	TP_ARGS(c),
 
-DEFINE_EVENT(bcache_bio, bcache_write_skip,
+	TP_STRUCT__entry(
+		__array(char,		uuid,	16 )
+	),
 
-	TP_PROTO(struct bio *bio),
+	TP_fast_assign(
+		memcpy(__entry->uuid, c->sb.set_uuid, 16);
+	),
 
-	TP_ARGS(bio)
+	TP_printk("%pU", __entry->uuid)
 );
 
-DEFINE_EVENT(bcache_bio, bcache_btree_read,
-
-	TP_PROTO(struct bio *bio),
+DEFINE_EVENT(bkey, bcache_journal_replay_key,
+	TP_PROTO(struct bkey *k),
+	TP_ARGS(k)
+);
 
-	TP_ARGS(bio)
+DEFINE_EVENT(cache_set, bcache_journal_full,
+	TP_PROTO(struct cache_set *c),
+	TP_ARGS(c)
 );
 
-DEFINE_EVENT(bcache_bio, bcache_btree_write,
+DEFINE_EVENT(cache_set, bcache_journal_entry_full,
+	TP_PROTO(struct cache_set *c),
+	TP_ARGS(c)
+);
 
+DEFINE_EVENT(bcache_bio, bcache_journal_write,
 	TP_PROTO(struct bio *bio),
-
 	TP_ARGS(bio)
 );
 
-DEFINE_EVENT(bcache_bio, bcache_write_dirty,
+/* Btree */
 
-	TP_PROTO(struct bio *bio),
+DEFINE_EVENT(cache_set, bcache_btree_cache_cannibalize,
+	TP_PROTO(struct cache_set *c),
+	TP_ARGS(c)
+);
 
-	TP_ARGS(bio)
+DEFINE_EVENT(btree_node, bcache_btree_read,
+	TP_PROTO(struct btree *b),
+	TP_ARGS(b)
 );
 
-DEFINE_EVENT(bcache_bio, bcache_read_dirty,
+TRACE_EVENT(bcache_btree_write,
+	TP_PROTO(struct btree *b),
+	TP_ARGS(b),
 
-	TP_PROTO(struct bio *bio),
+	TP_STRUCT__entry(
+		__field(size_t,		bucket			)
+		__field(unsigned,	block			)
+		__field(unsigned,	keys			)
+	),
 
-	TP_ARGS(bio)
+	TP_fast_assign(
+		__entry->bucket	= PTR_BUCKET_NR(b->c, &b->key, 0);
+		__entry->block	= b->written;
+		__entry->keys	= b->sets[b->nsets].data->keys;
+	),
+
+	TP_printk("bucket %zu", __entry->bucket)
 );
 
-DEFINE_EVENT(bcache_bio, bcache_write_moving,
+DEFINE_EVENT(btree_node, bcache_btree_node_alloc,
+	TP_PROTO(struct btree *b),
+	TP_ARGS(b)
+);
 
-	TP_PROTO(struct bio *bio),
+DEFINE_EVENT(btree_node, bcache_btree_node_alloc_fail,
+	TP_PROTO(struct btree *b),
+	TP_ARGS(b)
+);
 
-	TP_ARGS(bio)
+DEFINE_EVENT(btree_node, bcache_btree_node_free,
+	TP_PROTO(struct btree *b),
+	TP_ARGS(b)
 );
 
-DEFINE_EVENT(bcache_bio, bcache_read_moving,
+TRACE_EVENT(bcache_btree_gc_coalesce,
+	TP_PROTO(unsigned nodes),
+	TP_ARGS(nodes),
 
-	TP_PROTO(struct bio *bio),
+	TP_STRUCT__entry(
+		__field(unsigned,	nodes			)
+	),
 
-	TP_ARGS(bio)
-);
+	TP_fast_assign(
+		__entry->nodes	= nodes;
+	),
 
-DEFINE_EVENT(bcache_bio, bcache_journal_write,
+	TP_printk("coalesced %u nodes", __entry->nodes)
+);
 
-	TP_PROTO(struct bio *bio),
+DEFINE_EVENT(cache_set, bcache_gc_start,
+	TP_PROTO(struct cache_set *c),
+	TP_ARGS(c)
+);
 
-	TP_ARGS(bio)
+DEFINE_EVENT(cache_set, bcache_gc_end,
+	TP_PROTO(struct cache_set *c),
+	TP_ARGS(c)
 );
 
-DECLARE_EVENT_CLASS(bcache_cache_bio,
+DEFINE_EVENT(bkey, bcache_gc_copy,
+	TP_PROTO(struct bkey *k),
+	TP_ARGS(k)
+);
 
-	TP_PROTO(struct bio *bio,
-		 sector_t orig_sector,
-		 struct block_device* orig_bdev),
+DEFINE_EVENT(bkey, bcache_gc_copy_collision,
+	TP_PROTO(struct bkey *k),
+	TP_ARGS(k)
+);
 
-	TP_ARGS(bio, orig_sector, orig_bdev),
+DECLARE_EVENT_CLASS(btree_split,
+	TP_PROTO(struct btree *b, unsigned keys),
+	TP_ARGS(b, keys),
 
 	TP_STRUCT__entry(
-		__field(dev_t,		dev			)
-		__field(dev_t,		orig_dev		)
-		__field(sector_t,	sector			)
-		__field(sector_t,	orig_sector		)
-		__field(unsigned int,	nr_sector		)
-		__array(char,		rwbs,	6		)
-		__array(char,		comm,	TASK_COMM_LEN	)
+		__field(size_t,		bucket			)
+		__field(unsigned,	keys			)
 	),
 
 	TP_fast_assign(
-		__entry->dev		= bio->bi_bdev->bd_dev;
-		__entry->orig_dev	= orig_bdev->bd_dev;
-		__entry->sector		= bio->bi_sector;
-		__entry->orig_sector	= orig_sector;
-		__entry->nr_sector	= bio->bi_size >> 9;
-		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
-		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+		__entry->bucket	= PTR_BUCKET_NR(b->c, &b->key, 0);
+		__entry->keys	= keys;
 	),
 
-	TP_printk("%d,%d  %s %llu + %u [%s] (from %d,%d %llu)",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->rwbs,
-		  (unsigned long long)__entry->sector,
-		  __entry->nr_sector, __entry->comm,
-		  MAJOR(__entry->orig_dev), MINOR(__entry->orig_dev),
-		  (unsigned long long)__entry->orig_sector)
+	TP_printk("bucket %zu keys %u", __entry->bucket, __entry->keys)
 );
 
-DEFINE_EVENT(bcache_cache_bio, bcache_cache_insert,
-
-	TP_PROTO(struct bio *bio,
-		 sector_t orig_sector,
-		 struct block_device *orig_bdev),
+DEFINE_EVENT(btree_split, bcache_btree_node_split,
+	TP_PROTO(struct btree *b, unsigned keys),
+	TP_ARGS(b, keys)
+);
 
-	TP_ARGS(bio, orig_sector, orig_bdev)
+DEFINE_EVENT(btree_split, bcache_btree_node_compact,
+	TP_PROTO(struct btree *b, unsigned keys),
+	TP_ARGS(b, keys)
 );
 
-DECLARE_EVENT_CLASS(bcache_gc,
+DEFINE_EVENT(btree_node, bcache_btree_set_root,
+	TP_PROTO(struct btree *b),
+	TP_ARGS(b)
+);
 
-	TP_PROTO(uint8_t *uuid),
+/* Allocator */
 
-	TP_ARGS(uuid),
+TRACE_EVENT(bcache_alloc_invalidate,
+	TP_PROTO(struct cache *ca),
+	TP_ARGS(ca),
 
 	TP_STRUCT__entry(
-		__field(uint8_t *,	uuid)
+		__field(unsigned,	free			)
+		__field(unsigned,	free_inc		)
+		__field(unsigned,	free_inc_size		)
+		__field(unsigned,	unused			)
 	),
 
 	TP_fast_assign(
-		__entry->uuid		= uuid;
+		__entry->free		= fifo_used(&ca->free);
+		__entry->free_inc	= fifo_used(&ca->free_inc);
+		__entry->free_inc_size	= ca->free_inc.size;
+		__entry->unused		= fifo_used(&ca->unused);
 	),
 
-	TP_printk("%pU", __entry->uuid)
+	TP_printk("free %u free_inc %u/%u unused %u", __entry->free,
+		  __entry->free_inc, __entry->free_inc_size, __entry->unused)
 );
 
+TRACE_EVENT(bcache_alloc_fail,
+	TP_PROTO(struct cache *ca),
+	TP_ARGS(ca),
 
-DEFINE_EVENT(bcache_gc, bcache_gc_start,
+	TP_STRUCT__entry(
+		__field(unsigned,	free			)
+		__field(unsigned,	free_inc		)
+		__field(unsigned,	unused			)
+		__field(unsigned,	blocked			)
+	),
 
-	     TP_PROTO(uint8_t *uuid),
+	TP_fast_assign(
+		__entry->free		= fifo_used(&ca->free);
+		__entry->free_inc	= fifo_used(&ca->free_inc);
+		__entry->unused		= fifo_used(&ca->unused);
+		__entry->blocked	= atomic_read(&ca->set->prio_blocked);
+	),
 
-	     TP_ARGS(uuid)
+	TP_printk("free %u free_inc %u unused %u blocked %u", __entry->free,
+		  __entry->free_inc, __entry->unused, __entry->blocked)
 );
 
-DEFINE_EVENT(bcache_gc, bcache_gc_end,
+/* Background writeback */
 
-	     TP_PROTO(uint8_t *uuid),
+DEFINE_EVENT(bkey, bcache_writeback,
+	TP_PROTO(struct bkey *k),
+	TP_ARGS(k)
+);
 
-	     TP_ARGS(uuid)
+DEFINE_EVENT(bkey, bcache_writeback_collision,
+	TP_PROTO(struct bkey *k),
+	TP_ARGS(k)
 );
 
 #endif /* _TRACE_BCACHE_H */
-- 
cgit v1.2.3-59-g8ed1b


From 85b1492ee113486d871de7676a61f506a43ca475 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <koverstreet@google.com>
Date: Tue, 14 May 2013 20:33:16 -0700
Subject: bcache: Rip out pkey()/pbtree()

Old gcc doesnt like the struct hack, and it is kind of ugly. So finish
off the work to convert pr_debug() statements to tracepoints, and delete
pkey()/pbtree().

Signed-off-by: Kent Overstreet <koverstreet@google.com>
---
 drivers/md/bcache/bset.c      | 16 ++++++++++++----
 drivers/md/bcache/btree.c     | 21 ++++++---------------
 drivers/md/bcache/btree.h     |  7 +++++++
 drivers/md/bcache/debug.c     | 40 +++++++++++++++++++++++++---------------
 drivers/md/bcache/debug.h     | 11 ++---------
 drivers/md/bcache/super.c     |  5 +++--
 drivers/md/bcache/trace.c     |  2 ++
 include/trace/events/bcache.h | 33 +++++++++++++++++++++++++++++++++
 8 files changed, 90 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index cb4578a327b9..e9399ed7f688 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -78,6 +78,7 @@ struct bkey *bch_keylist_pop(struct keylist *l)
 bool __bch_ptr_invalid(struct cache_set *c, int level, const struct bkey *k)
 {
 	unsigned i;
+	char buf[80];
 
 	if (level && (!KEY_PTRS(k) || !KEY_SIZE(k) || KEY_DIRTY(k)))
 		goto bad;
@@ -102,7 +103,8 @@ bool __bch_ptr_invalid(struct cache_set *c, int level, const struct bkey *k)
 
 	return false;
 bad:
-	cache_bug(c, "spotted bad key %s: %s", pkey(k), bch_ptr_status(c, k));
+	bch_bkey_to_text(buf, sizeof(buf), k);
+	cache_bug(c, "spotted bad key %s: %s", buf, bch_ptr_status(c, k));
 	return true;
 }
 
@@ -162,10 +164,16 @@ bool bch_ptr_bad(struct btree *b, const struct bkey *k)
 #ifdef CONFIG_BCACHE_EDEBUG
 bug:
 	mutex_unlock(&b->c->bucket_lock);
-	btree_bug(b,
+
+	{
+		char buf[80];
+
+		bch_bkey_to_text(buf, sizeof(buf), k);
+		btree_bug(b,
 "inconsistent pointer %s: bucket %zu pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i",
-		  pkey(k), PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin),
-		  g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen);
+			  buf, PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin),
+			  g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen);
+	}
 	return true;
 #endif
 }
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 218d486259a3..53a0f4ef4e32 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -1770,7 +1770,7 @@ static bool btree_insert_key(struct btree *b, struct btree_op *op,
 {
 	struct bset *i = b->sets[b->nsets].data;
 	struct bkey *m, *prev;
-	const char *status = "insert";
+	unsigned status = BTREE_INSERT_STATUS_INSERT;
 
 	BUG_ON(bkey_cmp(k, &b->key) > 0);
 	BUG_ON(b->level && !KEY_PTRS(k));
@@ -1803,17 +1803,17 @@ static bool btree_insert_key(struct btree *b, struct btree_op *op,
 			goto insert;
 
 		/* prev is in the tree, if we merge we're done */
-		status = "back merging";
+		status = BTREE_INSERT_STATUS_BACK_MERGE;
 		if (prev &&
 		    bch_bkey_try_merge(b, prev, k))
 			goto merged;
 
-		status = "overwrote front";
+		status = BTREE_INSERT_STATUS_OVERWROTE;
 		if (m != end(i) &&
 		    KEY_PTRS(m) == KEY_PTRS(k) && !KEY_SIZE(m))
 			goto copy;
 
-		status = "front merge";
+		status = BTREE_INSERT_STATUS_FRONT_MERGE;
 		if (m != end(i) &&
 		    bch_bkey_try_merge(b, k, m))
 			goto copy;
@@ -1823,16 +1823,12 @@ static bool btree_insert_key(struct btree *b, struct btree_op *op,
 insert:	shift_keys(b, m, k);
 copy:	bkey_copy(m, k);
 merged:
-	bch_check_keys(b, "%s for %s at %s: %s", status,
-		       op_type(op), pbtree(b), pkey(k));
-	bch_check_key_order_msg(b, i, "%s for %s at %s: %s", status,
-				op_type(op), pbtree(b), pkey(k));
+	bch_check_keys(b, "%u for %s", status, op_type(op));
 
 	if (b->level && !KEY_OFFSET(k))
 		btree_current_write(b)->prio_blocked++;
 
-	pr_debug("%s for %s at %s: %s", status,
-		 op_type(op), pbtree(b), pkey(k));
+	trace_bcache_btree_insert_key(b, k, op->type, status);
 
 	return true;
 }
@@ -2234,9 +2230,6 @@ int bch_btree_search_recurse(struct btree *b, struct btree_op *op)
 	struct btree_iter iter;
 	bch_btree_iter_init(b, &iter, &KEY(op->inode, bio->bi_sector, 0));
 
-	pr_debug("at %s searching for %u:%llu", pbtree(b), op->inode,
-		 (uint64_t) bio->bi_sector);
-
 	do {
 		k = bch_btree_iter_next_filter(&iter, b, bch_ptr_bad);
 		if (!k) {
@@ -2302,8 +2295,6 @@ static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op,
 			if (buf->key_predicate(buf, k)) {
 				struct keybuf_key *w;
 
-				pr_debug("%s", pkey(k));
-
 				spin_lock(&buf->lock);
 
 				w = array_alloc(&buf->freelist);
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index 809bd77847a2..2b016b93cad4 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -271,6 +271,13 @@ struct btree_op {
 	BKEY_PADDED(replace);
 };
 
+enum {
+	BTREE_INSERT_STATUS_INSERT,
+	BTREE_INSERT_STATUS_BACK_MERGE,
+	BTREE_INSERT_STATUS_OVERWROTE,
+	BTREE_INSERT_STATUS_FRONT_MERGE,
+};
+
 void bch_btree_op_init_stack(struct btree_op *);
 
 static inline void rw_lock(bool w, struct btree *b, int level)
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index ae6096c6845d..82e3a07771ec 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -47,11 +47,10 @@ const char *bch_ptr_status(struct cache_set *c, const struct bkey *k)
 	return "";
 }
 
-struct keyprint_hack bch_pkey(const struct bkey *k)
+int bch_bkey_to_text(char *buf, size_t size, const struct bkey *k)
 {
 	unsigned i = 0;
-	struct keyprint_hack r;
-	char *out = r.s, *end = r.s + KEYHACK_SIZE;
+	char *out = buf, *end = buf + size;
 
 #define p(...)	(out += scnprintf(out, end - out, __VA_ARGS__))
 
@@ -75,16 +74,14 @@ struct keyprint_hack bch_pkey(const struct bkey *k)
 	if (KEY_CSUM(k))
 		p(" cs%llu %llx", KEY_CSUM(k), k->ptr[1]);
 #undef p
-	return r;
+	return out - buf;
 }
 
-struct keyprint_hack bch_pbtree(const struct btree *b)
+int bch_btree_to_text(char *buf, size_t size, const struct btree *b)
 {
-	struct keyprint_hack r;
-
-	snprintf(r.s, 40, "%zu level %i/%i", PTR_BUCKET_NR(b->c, &b->key, 0),
-		 b->level, b->c->root ? b->c->root->level : -1);
-	return r;
+	return scnprintf(buf, size, "%zu level %i/%i",
+			 PTR_BUCKET_NR(b->c, &b->key, 0),
+			 b->level, b->c->root ? b->c->root->level : -1);
 }
 
 #if defined(CONFIG_BCACHE_DEBUG) || defined(CONFIG_BCACHE_EDEBUG)
@@ -100,10 +97,12 @@ static void dump_bset(struct btree *b, struct bset *i)
 {
 	struct bkey *k;
 	unsigned j;
+	char buf[80];
 
 	for (k = i->start; k < end(i); k = bkey_next(k)) {
+		bch_bkey_to_text(buf, sizeof(buf), k);
 		printk(KERN_ERR "block %zu key %zi/%u: %s", index(i, b),
-		       (uint64_t *) k - i->d, i->keys, pkey(k));
+		       (uint64_t *) k - i->d, i->keys, buf);
 
 		for (j = 0; j < KEY_PTRS(k); j++) {
 			size_t n = PTR_BUCKET_NR(b->c, k, j);
@@ -252,6 +251,7 @@ static void vdump_bucket_and_panic(struct btree *b, const char *fmt,
 				   va_list args)
 {
 	unsigned i;
+	char buf[80];
 
 	console_lock();
 
@@ -262,7 +262,8 @@ static void vdump_bucket_and_panic(struct btree *b, const char *fmt,
 
 	console_unlock();
 
-	panic("at %s\n", pbtree(b));
+	bch_btree_to_text(buf, sizeof(buf), b);
+	panic("at %s\n", buf);
 }
 
 void bch_check_key_order_msg(struct btree *b, struct bset *i,
@@ -337,6 +338,7 @@ static ssize_t bch_dump_read(struct file *file, char __user *buf,
 {
 	struct dump_iterator *i = file->private_data;
 	ssize_t ret = 0;
+	char kbuf[80];
 
 	while (size) {
 		struct keybuf_key *w;
@@ -359,7 +361,8 @@ static ssize_t bch_dump_read(struct file *file, char __user *buf,
 		if (!w)
 			break;
 
-		i->bytes = snprintf(i->buf, PAGE_SIZE, "%s\n", pkey(&w->key));
+		bch_bkey_to_text(kbuf, sizeof(kbuf), &w->key);
+		i->bytes = snprintf(i->buf, PAGE_SIZE, "%s\n", kbuf);
 		bch_keybuf_del(&i->keys, w);
 	}
 
@@ -526,10 +529,17 @@ static ssize_t btree_fuzz(struct kobject *k, struct kobj_attribute *a,
 			     k < end(i);
 			     k = bkey_next(k), l = bkey_next(l))
 				if (bkey_cmp(k, l) ||
-				    KEY_SIZE(k) != KEY_SIZE(l))
+				    KEY_SIZE(k) != KEY_SIZE(l)) {
+					char buf1[80];
+					char buf2[80];
+
+					bch_bkey_to_text(buf1, sizeof(buf1), k);
+					bch_bkey_to_text(buf2, sizeof(buf2), l);
+
 					pr_err("key %zi differs: %s != %s",
 					       (uint64_t *) k - i->d,
-					       pkey(k), pkey(l));
+					       buf1, buf2);
+				}
 
 			for (j = 0; j < 3; j++) {
 				pr_err("**** Set %i ****", j);
diff --git a/drivers/md/bcache/debug.h b/drivers/md/bcache/debug.h
index f9378a218148..1c39b5a2489b 100644
--- a/drivers/md/bcache/debug.h
+++ b/drivers/md/bcache/debug.h
@@ -3,15 +3,8 @@
 
 /* Btree/bkey debug printing */
 
-#define KEYHACK_SIZE 80
-struct keyprint_hack {
-	char s[KEYHACK_SIZE];
-};
-
-struct keyprint_hack bch_pkey(const struct bkey *k);
-struct keyprint_hack bch_pbtree(const struct btree *b);
-#define pkey(k)		(&bch_pkey(k).s[0])
-#define pbtree(b)	(&bch_pbtree(b).s[0])
+int bch_bkey_to_text(char *buf, size_t size, const struct bkey *k);
+int bch_btree_to_text(char *buf, size_t size, const struct btree *b);
 
 #ifdef CONFIG_BCACHE_EDEBUG
 
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 47bc13745068..f24c2e0cbb1c 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -343,6 +343,7 @@ static void uuid_io(struct cache_set *c, unsigned long rw,
 	struct closure *cl = &c->uuid_write.cl;
 	struct uuid_entry *u;
 	unsigned i;
+	char buf[80];
 
 	BUG_ON(!parent);
 	closure_lock(&c->uuid_write, parent);
@@ -363,8 +364,8 @@ static void uuid_io(struct cache_set *c, unsigned long rw,
 			break;
 	}
 
-	pr_debug("%s UUIDs at %s", rw & REQ_WRITE ? "wrote" : "read",
-		 pkey(&c->uuid_bucket));
+	bch_bkey_to_text(buf, sizeof(buf), k);
+	pr_debug("%s UUIDs at %s", rw & REQ_WRITE ? "wrote" : "read", buf);
 
 	for (u = c->uuids; u < c->uuids + c->nr_uuids; u++)
 		if (!bch_is_zero(u->uuid, 16))
diff --git a/drivers/md/bcache/trace.c b/drivers/md/bcache/trace.c
index 7f4f38aa16ae..f7b6c197f90f 100644
--- a/drivers/md/bcache/trace.c
+++ b/drivers/md/bcache/trace.c
@@ -40,6 +40,8 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_end);
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_copy);
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_copy_collision);
 
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_insert_key);
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_split);
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_compact);
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_set_root);
diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h
index c9952b36fcea..5ebda976ea93 100644
--- a/include/trace/events/bcache.h
+++ b/include/trace/events/bcache.h
@@ -305,6 +305,39 @@ DEFINE_EVENT(bkey, bcache_gc_copy_collision,
 	TP_ARGS(k)
 );
 
+TRACE_EVENT(bcache_btree_insert_key,
+	TP_PROTO(struct btree *b, struct bkey *k, unsigned op, unsigned status),
+	TP_ARGS(b, k, op, status),
+
+	TP_STRUCT__entry(
+		__field(u64,	btree_node			)
+		__field(u32,	btree_level			)
+		__field(u32,	inode				)
+		__field(u64,	offset				)
+		__field(u32,	size				)
+		__field(u8,	dirty				)
+		__field(u8,	op				)
+		__field(u8,	status				)
+	),
+
+	TP_fast_assign(
+		__entry->btree_node = PTR_BUCKET_NR(b->c, &b->key, 0);
+		__entry->btree_level = b->level;
+		__entry->inode	= KEY_INODE(k);
+		__entry->offset	= KEY_OFFSET(k);
+		__entry->size	= KEY_SIZE(k);
+		__entry->dirty	= KEY_DIRTY(k);
+		__entry->op = op;
+		__entry->status = status;
+	),
+
+	TP_printk("%u for %u at %llu(%u): %u:%llu len %u dirty %u",
+		  __entry->status, __entry->op,
+		  __entry->btree_node, __entry->btree_level,
+		  __entry->inode, __entry->offset,
+		  __entry->size, __entry->dirty)
+);
+
 DECLARE_EVENT_CLASS(btree_split,
 	TP_PROTO(struct btree *b, unsigned keys),
 	TP_ARGS(b, keys),
-- 
cgit v1.2.3-59-g8ed1b


From d752b2696072ed52fd5afab08b601e2220a3b87e Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Tue, 25 Jun 2013 16:50:08 +0200
Subject: drbd: Allow online change of al-stripes and al-stripe-size

Allow to change the AL layout with an resize operation. For that
the reisze command gets two new fields: al_stripes and al_stripe_size.

In order to make the operation crash save:
1) Lock out all IO and MD-IO
2) Write the super block with MDF_PRIMARY_IND clear
3) write the bitmap to the new location (all zeros, since
   we allow only while connected)
4) Initialize the new AL-area
5) Write the super block with the restored MDF_PRIMARY_IND.
6) Unfreeze all IO

Since the AL-layout has no influence on the protocol, this operation
needs to be beforemed on both sides of a resource (if intended).

Signed-off-by: Andreas Gruenbacher <agruen@linbit.com>
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/drbd/drbd_actlog.c   |  21 ++++++
 drivers/block/drbd/drbd_int.h      |   7 +-
 drivers/block/drbd/drbd_main.c     |  57 ++++++++-------
 drivers/block/drbd/drbd_nl.c       | 137 ++++++++++++++++++++++++++++++-------
 drivers/block/drbd/drbd_receiver.c |   2 +-
 include/linux/drbd.h               |   6 +-
 include/linux/drbd_genl.h          |   2 +
 include/linux/drbd_limits.h        |   9 +++
 8 files changed, 188 insertions(+), 53 deletions(-)

(limited to 'include')

diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 6608076dc39e..28c73ca320a8 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -659,6 +659,27 @@ void drbd_al_shrink(struct drbd_conf *mdev)
 	wake_up(&mdev->al_wait);
 }
 
+int drbd_initialize_al(struct drbd_conf *mdev, void *buffer)
+{
+	struct al_transaction_on_disk *al = buffer;
+	struct drbd_md *md = &mdev->ldev->md;
+	sector_t al_base = md->md_offset + md->al_offset;
+	int al_size_4k = md->al_stripes * md->al_stripe_size_4k;
+	int i;
+
+	memset(al, 0, 4096);
+	al->magic = cpu_to_be32(DRBD_AL_MAGIC);
+	al->transaction_type = cpu_to_be16(AL_TR_INITIALIZED);
+	al->crc32c = cpu_to_be32(crc32c(0, al, 4096));
+
+	for (i = 0; i < al_size_4k; i++) {
+		int err = drbd_md_sync_page_io(mdev, mdev->ldev, al_base + i * 8, WRITE);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
 static int w_update_odbm(struct drbd_work *w, int unused)
 {
 	struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w);
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 4519d63350fb..2d7f608d181c 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1133,6 +1133,7 @@ extern void drbd_mdev_cleanup(struct drbd_conf *mdev);
 void drbd_print_uuids(struct drbd_conf *mdev, const char *text);
 
 extern void conn_md_sync(struct drbd_tconn *tconn);
+extern void drbd_md_write(struct drbd_conf *mdev, void *buffer);
 extern void drbd_md_sync(struct drbd_conf *mdev);
 extern int  drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev);
 extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local);
@@ -1468,12 +1469,15 @@ extern void drbd_resume_io(struct drbd_conf *mdev);
 extern char *ppsize(char *buf, unsigned long long size);
 extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, sector_t, int);
 enum determine_dev_size {
+	DS_ERROR_SHRINK = -3,
+	DS_ERROR_SPACE_MD = -2,
 	DS_ERROR = -1,
 	DS_UNCHANGED = 0,
 	DS_SHRUNK = 1,
 	DS_GREW = 2
 };
-extern enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local);
+extern enum determine_dev_size
+drbd_determine_dev_size(struct drbd_conf *, enum dds_flags, struct resize_parms *) __must_hold(local);
 extern void resync_after_online_grow(struct drbd_conf *);
 extern void drbd_reconsider_max_bio_size(struct drbd_conf *mdev);
 extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev,
@@ -1639,6 +1643,7 @@ extern int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector,
 #define drbd_set_out_of_sync(mdev, sector, size) \
 	__drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__)
 extern void drbd_al_shrink(struct drbd_conf *mdev);
+extern int drbd_initialize_al(struct drbd_conf *, void *);
 
 /* drbd_nl.c */
 /* state info broadcast */
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 49040a336949..55635edf563b 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2879,34 +2879,14 @@ struct meta_data_on_disk {
 	u8 reserved_u8[4096 - (7*8 + 10*4)];
 } __packed;
 
-/**
- * drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set
- * @mdev:	DRBD device.
- */
-void drbd_md_sync(struct drbd_conf *mdev)
+
+
+void drbd_md_write(struct drbd_conf *mdev, void *b)
 {
-	struct meta_data_on_disk *buffer;
+	struct meta_data_on_disk *buffer = b;
 	sector_t sector;
 	int i;
 
-	/* Don't accidentally change the DRBD meta data layout. */
-	BUILD_BUG_ON(UI_SIZE != 4);
-	BUILD_BUG_ON(sizeof(struct meta_data_on_disk) != 4096);
-
-	del_timer(&mdev->md_sync_timer);
-	/* timer may be rearmed by drbd_md_mark_dirty() now. */
-	if (!test_and_clear_bit(MD_DIRTY, &mdev->flags))
-		return;
-
-	/* We use here D_FAILED and not D_ATTACHING because we try to write
-	 * metadata even if we detach due to a disk failure! */
-	if (!get_ldev_if_state(mdev, D_FAILED))
-		return;
-
-	buffer = drbd_md_get_buffer(mdev);
-	if (!buffer)
-		goto out;
-
 	memset(buffer, 0, sizeof(*buffer));
 
 	buffer->la_size_sect = cpu_to_be64(drbd_get_capacity(mdev->this_bdev));
@@ -2935,6 +2915,35 @@ void drbd_md_sync(struct drbd_conf *mdev)
 		dev_err(DEV, "meta data update failed!\n");
 		drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
 	}
+}
+
+/**
+ * drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set
+ * @mdev:	DRBD device.
+ */
+void drbd_md_sync(struct drbd_conf *mdev)
+{
+	struct meta_data_on_disk *buffer;
+
+	/* Don't accidentally change the DRBD meta data layout. */
+	BUILD_BUG_ON(UI_SIZE != 4);
+	BUILD_BUG_ON(sizeof(struct meta_data_on_disk) != 4096);
+
+	del_timer(&mdev->md_sync_timer);
+	/* timer may be rearmed by drbd_md_mark_dirty() now. */
+	if (!test_and_clear_bit(MD_DIRTY, &mdev->flags))
+		return;
+
+	/* We use here D_FAILED and not D_ATTACHING because we try to write
+	 * metadata even if we detach due to a disk failure! */
+	if (!get_ldev_if_state(mdev, D_FAILED))
+		return;
+
+	buffer = drbd_md_get_buffer(mdev);
+	if (!buffer)
+		goto out;
+
+	drbd_md_write(mdev, buffer);
 
 	/* Update mdev->ldev->md.la_size_sect,
 	 * since we updated it on metadata. */
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 45d127522e0a..8cc1e640f485 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -827,12 +827,17 @@ void drbd_resume_io(struct drbd_conf *mdev)
  * Returns 0 on success, negative return values indicate errors.
  * You should call drbd_md_sync() after calling this function.
  */
-enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local)
+enum determine_dev_size
+drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags, struct resize_parms *rs) __must_hold(local)
 {
 	sector_t prev_first_sect, prev_size; /* previous meta location */
 	sector_t la_size_sect, u_size;
+	struct drbd_md *md = &mdev->ldev->md;
+	u32 prev_al_stripe_size_4k;
+	u32 prev_al_stripes;
 	sector_t size;
 	char ppb[10];
+	void *buffer;
 
 	int md_moved, la_size_changed;
 	enum determine_dev_size rv = DS_UNCHANGED;
@@ -847,6 +852,11 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
 	 * still lock the act_log to not trigger ASSERTs there.
 	 */
 	drbd_suspend_io(mdev);
+	buffer = drbd_md_get_buffer(mdev); /* Lock meta-data IO */
+	if (!buffer) {
+		drbd_resume_io(mdev);
+		return DS_ERROR;
+	}
 
 	/* no wait necessary anymore, actually we could assert that */
 	wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
@@ -855,7 +865,17 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
 	prev_size = mdev->ldev->md.md_size_sect;
 	la_size_sect = mdev->ldev->md.la_size_sect;
 
-	/* TODO: should only be some assert here, not (re)init... */
+	if (rs) {
+		/* rs is non NULL if we should change the AL layout only */
+
+		prev_al_stripes = md->al_stripes;
+		prev_al_stripe_size_4k = md->al_stripe_size_4k;
+
+		md->al_stripes = rs->al_stripes;
+		md->al_stripe_size_4k = rs->al_stripe_size / 4;
+		md->al_size_4k = (u64)rs->al_stripes * rs->al_stripe_size / 4;
+	}
+
 	drbd_md_set_sector_offsets(mdev, mdev->ldev);
 
 	rcu_read_lock();
@@ -863,6 +883,21 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
 	rcu_read_unlock();
 	size = drbd_new_dev_size(mdev, mdev->ldev, u_size, flags & DDSF_FORCED);
 
+	if (size < la_size_sect) {
+		if (rs && u_size == 0) {
+			/* Remove "rs &&" later. This check should always be active, but
+			   right now the receiver expects the permissive behavior */
+			dev_warn(DEV, "Implicit shrink not allowed. "
+				 "Use --size=%llus for explicit shrink.\n",
+				 (unsigned long long)size);
+			rv = DS_ERROR_SHRINK;
+		}
+		if (u_size > size)
+			rv = DS_ERROR_SPACE_MD;
+		if (rv != DS_UNCHANGED)
+			goto err_out;
+	}
+
 	if (drbd_get_capacity(mdev->this_bdev) != size ||
 	    drbd_bm_capacity(mdev) != size) {
 		int err;
@@ -886,38 +921,57 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
 		dev_info(DEV, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
 		     (unsigned long long)size>>1);
 	}
-	if (rv == DS_ERROR)
-		goto out;
+	if (rv <= DS_ERROR)
+		goto err_out;
 
 	la_size_changed = (la_size_sect != mdev->ldev->md.la_size_sect);
 
 	md_moved = prev_first_sect != drbd_md_first_sector(mdev->ldev)
 		|| prev_size	   != mdev->ldev->md.md_size_sect;
 
-	if (la_size_changed || md_moved) {
-		int err;
+	if (la_size_changed || md_moved || rs) {
+		u32 prev_flags;
 
 		drbd_al_shrink(mdev); /* All extents inactive. */
+
+		prev_flags = md->flags;
+		md->flags &= ~MDF_PRIMARY_IND;
+		drbd_md_write(mdev, buffer);
+
 		dev_info(DEV, "Writing the whole bitmap, %s\n",
 			 la_size_changed && md_moved ? "size changed and md moved" :
 			 la_size_changed ? "size changed" : "md moved");
 		/* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
-		err = drbd_bitmap_io(mdev, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
-				     "size changed", BM_LOCKED_MASK);
-		if (err) {
-			rv = DS_ERROR;
-			goto out;
-		}
-		drbd_md_mark_dirty(mdev);
+		drbd_bitmap_io(mdev, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
+			       "size changed", BM_LOCKED_MASK);
+		drbd_initialize_al(mdev, buffer);
+
+		md->flags = prev_flags;
+		drbd_md_write(mdev, buffer);
+
+		if (rs)
+			dev_info(DEV, "Changed AL layout to al-stripes = %d, al-stripe-size-kB = %d\n",
+				 md->al_stripes, md->al_stripe_size_4k * 4);
 	}
 
 	if (size > la_size_sect)
 		rv = DS_GREW;
 	if (size < la_size_sect)
 		rv = DS_SHRUNK;
-out:
+
+	if (0) {
+	err_out:
+		if (rs) {
+			md->al_stripes = prev_al_stripes;
+			md->al_stripe_size_4k = prev_al_stripe_size_4k;
+			md->al_size_4k = (u64)prev_al_stripes * prev_al_stripe_size_4k;
+
+			drbd_md_set_sector_offsets(mdev, mdev->ldev);
+		}
+	}
 	lc_unlock(mdev->act_log);
 	wake_up(&mdev->al_wait);
+	drbd_md_put_buffer(mdev);
 	drbd_resume_io(mdev);
 
 	return rv;
@@ -1618,8 +1672,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	    !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND))
 		set_bit(USE_DEGR_WFC_T, &mdev->flags);
 
-	dd = drbd_determine_dev_size(mdev, 0);
-	if (dd == DS_ERROR) {
+	dd = drbd_determine_dev_size(mdev, 0, NULL);
+	if (dd <= DS_ERROR) {
 		retcode = ERR_NOMEM_BITMAP;
 		goto force_diskless_dec;
 	} else if (dd == DS_GREW)
@@ -2316,6 +2370,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 	struct drbd_conf *mdev;
 	enum drbd_ret_code retcode;
 	enum determine_dev_size dd;
+	bool change_al_layout = false;
 	enum dds_flags ddsf;
 	sector_t u_size;
 	int err;
@@ -2326,31 +2381,33 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 	if (retcode != NO_ERROR)
 		goto fail;
 
+	mdev = adm_ctx.mdev;
+	if (!get_ldev(mdev)) {
+		retcode = ERR_NO_DISK;
+		goto fail;
+	}
+
 	memset(&rs, 0, sizeof(struct resize_parms));
+	rs.al_stripes = mdev->ldev->md.al_stripes;
+	rs.al_stripe_size = mdev->ldev->md.al_stripe_size_4k * 4;
 	if (info->attrs[DRBD_NLA_RESIZE_PARMS]) {
 		err = resize_parms_from_attrs(&rs, info);
 		if (err) {
 			retcode = ERR_MANDATORY_TAG;
 			drbd_msg_put_info(from_attrs_err_to_txt(err));
-			goto fail;
+			goto fail_ldev;
 		}
 	}
 
-	mdev = adm_ctx.mdev;
 	if (mdev->state.conn > C_CONNECTED) {
 		retcode = ERR_RESIZE_RESYNC;
-		goto fail;
+		goto fail_ldev;
 	}
 
 	if (mdev->state.role == R_SECONDARY &&
 	    mdev->state.peer == R_SECONDARY) {
 		retcode = ERR_NO_PRIMARY;
-		goto fail;
-	}
-
-	if (!get_ldev(mdev)) {
-		retcode = ERR_NO_DISK;
-		goto fail;
+		goto fail_ldev;
 	}
 
 	if (rs.no_resync && mdev->tconn->agreed_pro_version < 93) {
@@ -2369,6 +2426,28 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 		}
 	}
 
+	if (mdev->ldev->md.al_stripes != rs.al_stripes ||
+	    mdev->ldev->md.al_stripe_size_4k != rs.al_stripe_size / 4) {
+		u32 al_size_k = rs.al_stripes * rs.al_stripe_size;
+
+		if (al_size_k > (16 * 1024 * 1024)) {
+			retcode = ERR_MD_LAYOUT_TOO_BIG;
+			goto fail_ldev;
+		}
+
+		if (al_size_k < MD_32kB_SECT/2) {
+			retcode = ERR_MD_LAYOUT_TOO_SMALL;
+			goto fail_ldev;
+		}
+
+		if (mdev->state.conn != C_CONNECTED) {
+			retcode = ERR_MD_LAYOUT_CONNECTED;
+			goto fail_ldev;
+		}
+
+		change_al_layout = true;
+	}
+
 	if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev))
 		mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
 
@@ -2384,12 +2463,18 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
-	dd = drbd_determine_dev_size(mdev, ddsf);
+	dd = drbd_determine_dev_size(mdev, ddsf, change_al_layout ? &rs : NULL);
 	drbd_md_sync(mdev);
 	put_ldev(mdev);
 	if (dd == DS_ERROR) {
 		retcode = ERR_NOMEM_BITMAP;
 		goto fail;
+	} else if (dd == DS_ERROR_SPACE_MD) {
+		retcode = ERR_MD_LAYOUT_NO_FIT;
+		goto fail;
+	} else if (dd == DS_ERROR_SHRINK) {
+		retcode = ERR_IMPLICIT_SHRINK;
+		goto fail;
 	}
 
 	if (mdev->state.conn == C_CONNECTED) {
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 26852b87b034..cc29cd3bf78b 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -3617,7 +3617,7 @@ static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi)
 
 	ddsf = be16_to_cpu(p->dds_flags);
 	if (get_ldev(mdev)) {
-		dd = drbd_determine_dev_size(mdev, ddsf);
+		dd = drbd_determine_dev_size(mdev, ddsf, NULL);
 		put_ldev(mdev);
 		if (dd == DS_ERROR)
 			return -EIO;
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 1b4d4ee1168f..de7d74ab3de6 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -177,7 +177,11 @@ enum drbd_ret_code {
 	ERR_NEED_APV_100	= 163,
 	ERR_NEED_ALLOW_TWO_PRI  = 164,
 	ERR_MD_UNCLEAN          = 165,
-
+	ERR_MD_LAYOUT_CONNECTED = 166,
+	ERR_MD_LAYOUT_TOO_BIG   = 167,
+	ERR_MD_LAYOUT_TOO_SMALL = 168,
+	ERR_MD_LAYOUT_NO_FIT    = 169,
+	ERR_IMPLICIT_SHRINK     = 170,
 	/* insert new ones above this line */
 	AFTER_LAST_ERR_CODE
 };
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index d0d8fac8a6e4..e8c44572b8cb 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -181,6 +181,8 @@ GENL_struct(DRBD_NLA_RESIZE_PARMS, 7, resize_parms,
 	__u64_field(1, DRBD_GENLA_F_MANDATORY,	resize_size)
 	__flg_field(2, DRBD_GENLA_F_MANDATORY,	resize_force)
 	__flg_field(3, DRBD_GENLA_F_MANDATORY,	no_resync)
+	__u32_field_def(4, 0 /* OPTIONAL */, al_stripes, DRBD_AL_STRIPES_DEF)
+	__u32_field_def(5, 0 /* OPTIONAL */, al_stripe_size, DRBD_AL_STRIPE_SIZE_DEF)
 )
 
 GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info,
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 1fedf2b17cc8..17e50bb00521 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -215,4 +215,13 @@
 #define DRBD_ALWAYS_ASBP_DEF	0
 #define DRBD_USE_RLE_DEF	1
 
+#define DRBD_AL_STRIPES_MIN     1
+#define DRBD_AL_STRIPES_MAX     1024
+#define DRBD_AL_STRIPES_DEF     1
+#define DRBD_AL_STRIPES_SCALE   '1'
+
+#define DRBD_AL_STRIPE_SIZE_MIN   4
+#define DRBD_AL_STRIPE_SIZE_MAX   16777216
+#define DRBD_AL_STRIPE_SIZE_DEF   32
+#define DRBD_AL_STRIPE_SIZE_SCALE 'k' /* kilobytes */
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 1c297a66654a3295ae87e2b7f3724d214eb2b5ec Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Mon, 1 Jul 2013 15:20:00 +0100
Subject: iio: Fix iio_channel_has_info

Since the info_mask split, iio_channel_has_info() is not working correctly.
info_mask_separate and info_mask_shared_by_type, it is not possible to compare
them directly with the iio_chan_info_enum enum. Correct that bit using the BIT()
macro.

Cc: <stable@vger.kernel.org> # 3.10.x
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 include/linux/iio/iio.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index 8d171f427632..3d35b7023591 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -211,8 +211,8 @@ struct iio_chan_spec {
 static inline bool iio_channel_has_info(const struct iio_chan_spec *chan,
 	enum iio_chan_info_enum type)
 {
-	return (chan->info_mask_separate & type) |
-	       (chan->info_mask_shared_by_type & type);
+	return (chan->info_mask_separate & BIT(type)) |
+	       (chan->info_mask_shared_by_type & BIT(type));
 }
 
 #define IIO_ST(si, rb, sb, sh)						\
-- 
cgit v1.2.3-59-g8ed1b


From add0c59d802e6118e51e21244c3871be35164e4b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 9 Jul 2013 16:17:39 -0700
Subject: cgroup: remove bcache_subsys_id which got added stealthily

cafe563591 ("bcache: A block layer cache") added a new cgroup
subsystem bcache_subsys without proper review and ack.  bcache_subsys
seems to use cgroup for group stats and per-group cache_mode
configuration.  This is very much the type of usage that we don't want
to allow.

Fortunately, CONFIG_CGROUP_BCACHE which enables bcache_subsys is
currently commented out, so this shouldn't have any upstream users.
Let's nip in the bud.  While at it, clarify in cgroup_subsys.h that no
new subsystem should be added without explicit acks from cgroup
maintainers.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Li Zefan <lizefan@huawei.com>
Cc: cgroups@vger.kernel.org
Cc: Kent Overstreet <kent.overstreet@gmail.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: linux-bcache@vger.kernel.org
---
 include/linux/cgroup_subsys.h | 45 +++++++------------------------------------
 1 file changed, 7 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 6e7ec64b69ab..b613ffd402d1 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -1,86 +1,55 @@
-/* Add subsystem definitions of the form SUBSYS(<name>) in this
- * file. Surround each one by a line of comment markers so that
- * patches don't collide
+/*
+ * List of cgroup subsystems.
+ *
+ * DO NOT ADD ANY SUBSYSTEM WITHOUT EXPLICIT ACKS FROM CGROUP MAINTAINERS.
  */
-
-/* */
-
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_CPUSETS)
 SUBSYS(cpuset)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_DEBUG)
 SUBSYS(debug)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_SCHED)
 SUBSYS(cpu_cgroup)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_CPUACCT)
 SUBSYS(cpuacct)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_MEMCG)
 SUBSYS(mem_cgroup)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_DEVICE)
 SUBSYS(devices)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_FREEZER)
 SUBSYS(freezer)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_NET_CLS_CGROUP)
 SUBSYS(net_cls)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_BLK_CGROUP)
 SUBSYS(blkio)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_PERF)
 SUBSYS(perf)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_NETPRIO_CGROUP)
 SUBSYS(net_prio)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_HUGETLB)
 SUBSYS(hugetlb)
 #endif
-
-/* */
-
-#ifdef CONFIG_CGROUP_BCACHE
-SUBSYS(bcache)
-#endif
-
-/* */
+/*
+ * DO NOT ADD ANY SUBSYSTEM WITHOUT EXPLICIT ACKS FROM CGROUP MAINTAINERS.
+ */
-- 
cgit v1.2.3-59-g8ed1b


From 913ffdb54366f94eec65c656cae8c6e00e1ab1b0 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 11 Jul 2013 16:34:48 -0700
Subject: cgroup: replace task_cgroup_path_from_hierarchy() with
 task_cgroup_path()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

task_cgroup_path_from_hierarchy() was added for the planned new users
and none of the currently planned users wants to know about multiple
hierarchies.  This patch drops the multiple hierarchy part and makes
it always return the path in the first non-dummy hierarchy.

As unified hierarchy will always have id 1, this is guaranteed to
return the path for the unified hierarchy if mounted; otherwise, it
will return the path from the hierarchy which happens to occupy the
lowest hierarchy id, which will usually be the first hierarchy mounted
after boot.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
Cc: Lennart Poettering <lennart@poettering.net>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Cc: Jan Kaluža <jkaluza@redhat.com>
---
 include/linux/cgroup.h |  3 +--
 kernel/cgroup.c        | 31 +++++++++++++++++++------------
 2 files changed, 20 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index fd097ecfcd97..21cfaff7e002 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -540,8 +540,7 @@ int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
 bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor);
 
 int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen);
-int task_cgroup_path_from_hierarchy(struct task_struct *task, int hierarchy_id,
-				    char *buf, size_t buflen);
+int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen);
 
 int cgroup_task_count(const struct cgroup *cgrp);
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index e5583d10a325..afb8d53ca6c7 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1846,36 +1846,43 @@ out:
 EXPORT_SYMBOL_GPL(cgroup_path);
 
 /**
- * task_cgroup_path_from_hierarchy - cgroup path of a task on a hierarchy
+ * task_cgroup_path - cgroup path of a task in the first cgroup hierarchy
  * @task: target task
- * @hierarchy_id: the hierarchy to look up @task's cgroup from
  * @buf: the buffer to write the path into
  * @buflen: the length of the buffer
  *
- * Determine @task's cgroup on the hierarchy specified by @hierarchy_id and
- * copy its path into @buf.  This function grabs cgroup_mutex and shouldn't
- * be used inside locks used by cgroup controller callbacks.
+ * Determine @task's cgroup on the first (the one with the lowest non-zero
+ * hierarchy_id) cgroup hierarchy and copy its path into @buf.  This
+ * function grabs cgroup_mutex and shouldn't be used inside locks used by
+ * cgroup controller callbacks.
+ *
+ * Returns 0 on success, fails with -%ENAMETOOLONG if @buflen is too short.
  */
-int task_cgroup_path_from_hierarchy(struct task_struct *task, int hierarchy_id,
-				    char *buf, size_t buflen)
+int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
 {
 	struct cgroupfs_root *root;
-	struct cgroup *cgrp = NULL;
-	int ret = -ENOENT;
+	struct cgroup *cgrp;
+	int hierarchy_id = 1, ret = 0;
+
+	if (buflen < 2)
+		return -ENAMETOOLONG;
 
 	mutex_lock(&cgroup_mutex);
 
-	root = idr_find(&cgroup_hierarchy_idr, hierarchy_id);
+	root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id);
+
 	if (root) {
 		cgrp = task_cgroup_from_root(task, root);
 		ret = cgroup_path(cgrp, buf, buflen);
+	} else {
+		/* if no hierarchy exists, everyone is in "/" */
+		memcpy(buf, "/", 2);
 	}
 
 	mutex_unlock(&cgroup_mutex);
-
 	return ret;
 }
-EXPORT_SYMBOL_GPL(task_cgroup_path_from_hierarchy);
+EXPORT_SYMBOL_GPL(task_cgroup_path);
 
 /*
  * Control Group taskset
-- 
cgit v1.2.3-59-g8ed1b


From 1258ca805f613025ec079d959d4a78acfb1f79d3 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Thu, 11 Jul 2013 13:55:58 +0900
Subject: PM / Sleep: Fix comment typo in pm_wakeup.h

Fix a comment typo (sorce -> source) in pm_wakeup.h.

[rjw: Changelog]
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm_wakeup.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h
index 569781faa504..a0f70808d7f4 100644
--- a/include/linux/pm_wakeup.h
+++ b/include/linux/pm_wakeup.h
@@ -36,8 +36,8 @@
  * @last_time: Monotonic clock when the wakeup source's was touched last time.
  * @prevent_sleep_time: Total time this source has been preventing autosleep.
  * @event_count: Number of signaled wakeup events.
- * @active_count: Number of times the wakeup sorce was activated.
- * @relax_count: Number of times the wakeup sorce was deactivated.
+ * @active_count: Number of times the wakeup source was activated.
+ * @relax_count: Number of times the wakeup source was deactivated.
  * @expire_count: Number of times the wakeup source's timeout has expired.
  * @wakeup_count: Number of times the wakeup source might abort suspend.
  * @active: Status of the wakeup source.
-- 
cgit v1.2.3-59-g8ed1b


From 0db0628d90125193280eabb501c94feaf48fa9ab Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Wed, 19 Jun 2013 14:53:51 -0400
Subject: kernel: delete __cpuinit usage from all core kernel files

The __cpuinit type of throwaway sections might have made sense
some time ago when RAM was more constrained, but now the savings
do not offset the cost and complications.  For example, the fix in
commit 5e427ec2d0 ("x86: Fix bit corruption at CPU resume time")
is a good example of the nasty type of bugs that can be created
with improper use of the various __init prefixes.

After a discussion on LKML[1] it was decided that cpuinit should go
the way of devinit and be phased out.  Once all the users are gone,
we can then finally remove the macros themselves from linux/init.h.

This removes all the uses of the __cpuinit macros from C files in
the core kernel directories (kernel, init, lib, mm, and include)
that don't really have a specific maintainer.

[1] https://lkml.org/lkml/2013/5/20/589

Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 Documentation/cpu-hotplug.txt |  6 +++---
 include/linux/cpu.h           |  2 +-
 include/linux/perf_event.h    |  2 +-
 init/calibrate.c              | 13 ++++++++-----
 kernel/cpu.c                  |  6 +++---
 kernel/events/core.c          |  4 ++--
 kernel/fork.c                 |  2 +-
 kernel/hrtimer.c              |  6 +++---
 kernel/printk.c               |  2 +-
 kernel/profile.c              |  2 +-
 kernel/relay.c                |  2 +-
 kernel/sched/core.c           | 12 ++++++------
 kernel/sched/fair.c           |  2 +-
 kernel/smp.c                  |  2 +-
 kernel/smpboot.c              |  2 +-
 kernel/softirq.c              |  8 ++++----
 kernel/time/tick-sched.c      |  2 +-
 kernel/timer.c                | 10 +++++-----
 kernel/workqueue.c            |  4 ++--
 lib/Kconfig.debug             |  2 +-
 lib/earlycpio.c               |  2 +-
 lib/percpu_counter.c          |  2 +-
 mm/memcontrol.c               |  2 +-
 mm/page-writeback.c           |  4 ++--
 mm/slab.c                     | 10 +++++-----
 mm/slub.c                     |  4 ++--
 mm/vmstat.c                   |  6 +++---
 27 files changed, 62 insertions(+), 59 deletions(-)

(limited to 'include')

diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt
index edd4b4df3932..786dc82f98ce 100644
--- a/Documentation/cpu-hotplug.txt
+++ b/Documentation/cpu-hotplug.txt
@@ -267,8 +267,8 @@ Q: If i have some kernel code that needs to be aware of CPU arrival and
 A: This is what you would need in your kernel code to receive notifications.
 
 	#include <linux/cpu.h>
-	static int __cpuinit foobar_cpu_callback(struct notifier_block *nfb,
-					    unsigned long action, void *hcpu)
+	static int foobar_cpu_callback(struct notifier_block *nfb,
+				       unsigned long action, void *hcpu)
 	{
 		unsigned int cpu = (unsigned long)hcpu;
 
@@ -285,7 +285,7 @@ A: This is what you would need in your kernel code to receive notifications.
 		return NOTIFY_OK;
 	}
 
-	static struct notifier_block __cpuinitdata foobar_cpu_notifer =
+	static struct notifier_block foobar_cpu_notifer =
 	{
 	   .notifier_call = foobar_cpu_callback,
 	};
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 944f283f01c4..ab0eade73039 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -114,7 +114,7 @@ enum {
 /* Need to know about CPUs going up/down? */
 #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE)
 #define cpu_notifier(fn, pri) {					\
-	static struct notifier_block fn##_nb __cpuinitdata =	\
+	static struct notifier_block fn##_nb =			\
 		{ .notifier_call = fn, .priority = pri };	\
 	register_cpu_notifier(&fn##_nb);			\
 }
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 8873f82c7baa..c43f6eabad5b 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -826,7 +826,7 @@ static inline void perf_restore_debug_store(void)			{ }
  */
 #define perf_cpu_notifier(fn)						\
 do {									\
-	static struct notifier_block fn##_nb __cpuinitdata =		\
+	static struct notifier_block fn##_nb =				\
 		{ .notifier_call = fn, .priority = CPU_PRI_PERF };	\
 	unsigned long cpu = smp_processor_id();				\
 	unsigned long flags;						\
diff --git a/init/calibrate.c b/init/calibrate.c
index fda0a7b0f06c..520702db9acc 100644
--- a/init/calibrate.c
+++ b/init/calibrate.c
@@ -31,7 +31,7 @@ __setup("lpj=", lpj_setup);
 #define DELAY_CALIBRATION_TICKS			((HZ < 100) ? 1 : (HZ/100))
 #define MAX_DIRECT_CALIBRATION_RETRIES		5
 
-static unsigned long __cpuinit calibrate_delay_direct(void)
+static unsigned long calibrate_delay_direct(void)
 {
 	unsigned long pre_start, start, post_start;
 	unsigned long pre_end, end, post_end;
@@ -166,7 +166,10 @@ static unsigned long __cpuinit calibrate_delay_direct(void)
 	return 0;
 }
 #else
-static unsigned long __cpuinit calibrate_delay_direct(void) {return 0;}
+static unsigned long calibrate_delay_direct(void)
+{
+	return 0;
+}
 #endif
 
 /*
@@ -180,7 +183,7 @@ static unsigned long __cpuinit calibrate_delay_direct(void) {return 0;}
  */
 #define LPS_PREC 8
 
-static unsigned long __cpuinit calibrate_delay_converge(void)
+static unsigned long calibrate_delay_converge(void)
 {
 	/* First stage - slowly accelerate to find initial bounds */
 	unsigned long lpj, lpj_base, ticks, loopadd, loopadd_base, chop_limit;
@@ -254,12 +257,12 @@ static DEFINE_PER_CPU(unsigned long, cpu_loops_per_jiffy) = { 0 };
  * Architectures should override this function if a faster calibration
  * method is available.
  */
-unsigned long __attribute__((weak)) __cpuinit calibrate_delay_is_known(void)
+unsigned long __attribute__((weak)) calibrate_delay_is_known(void)
 {
 	return 0;
 }
 
-void __cpuinit calibrate_delay(void)
+void calibrate_delay(void)
 {
 	unsigned long lpj;
 	static bool printed;
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 198a38883e64..b2b227b82123 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -366,7 +366,7 @@ EXPORT_SYMBOL(cpu_down);
 #endif /*CONFIG_HOTPLUG_CPU*/
 
 /* Requires cpu_add_remove_lock to be held */
-static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
+static int _cpu_up(unsigned int cpu, int tasks_frozen)
 {
 	int ret, nr_calls = 0;
 	void *hcpu = (void *)(long)cpu;
@@ -419,7 +419,7 @@ out:
 	return ret;
 }
 
-int __cpuinit cpu_up(unsigned int cpu)
+int cpu_up(unsigned int cpu)
 {
 	int err = 0;
 
@@ -618,7 +618,7 @@ core_initcall(cpu_hotplug_pm_sync_init);
  * It must be called by the arch code on the new cpu, before the new cpu
  * enables interrupts and before the "boot" cpu returns from __cpu_up().
  */
-void __cpuinit notify_cpu_starting(unsigned int cpu)
+void notify_cpu_starting(unsigned int cpu)
 {
 	unsigned long val = CPU_STARTING;
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index eba8fb5834ae..f3e9dce39bc9 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7630,7 +7630,7 @@ static void __init perf_event_init_all_cpus(void)
 	}
 }
 
-static void __cpuinit perf_event_init_cpu(int cpu)
+static void perf_event_init_cpu(int cpu)
 {
 	struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
 
@@ -7719,7 +7719,7 @@ static struct notifier_block perf_reboot_notifier = {
 	.priority = INT_MIN,
 };
 
-static int __cpuinit
+static int
 perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
 {
 	unsigned int cpu = (long)hcpu;
diff --git a/kernel/fork.c b/kernel/fork.c
index 66635c80a813..403d2bb8a968 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1546,7 +1546,7 @@ static inline void init_idle_pids(struct pid_link *links)
 	}
 }
 
-struct task_struct * __cpuinit fork_idle(int cpu)
+struct task_struct *fork_idle(int cpu)
 {
 	struct task_struct *task;
 	task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0);
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index f0f4fe29cd21..383319bae3f7 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1659,7 +1659,7 @@ SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp,
 /*
  * Functions related to boot-time initialization:
  */
-static void __cpuinit init_hrtimers_cpu(int cpu)
+static void init_hrtimers_cpu(int cpu)
 {
 	struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
 	int i;
@@ -1740,7 +1740,7 @@ static void migrate_hrtimers(int scpu)
 
 #endif /* CONFIG_HOTPLUG_CPU */
 
-static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self,
+static int hrtimer_cpu_notify(struct notifier_block *self,
 					unsigned long action, void *hcpu)
 {
 	int scpu = (long)hcpu;
@@ -1773,7 +1773,7 @@ static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self,
 	return NOTIFY_OK;
 }
 
-static struct notifier_block __cpuinitdata hrtimers_nb = {
+static struct notifier_block hrtimers_nb = {
 	.notifier_call = hrtimer_cpu_notify,
 };
 
diff --git a/kernel/printk.c b/kernel/printk.c
index d37d45c90ae6..69b0890ed7e5 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1921,7 +1921,7 @@ void resume_console(void)
  * called when a new CPU comes online (or fails to come up), and ensures
  * that any such output gets printed.
  */
-static int __cpuinit console_cpu_notify(struct notifier_block *self,
+static int console_cpu_notify(struct notifier_block *self,
 	unsigned long action, void *hcpu)
 {
 	switch (action) {
diff --git a/kernel/profile.c b/kernel/profile.c
index 0bf400737660..6631e1ef55ab 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -331,7 +331,7 @@ out:
 	put_cpu();
 }
 
-static int __cpuinit profile_cpu_callback(struct notifier_block *info,
+static int profile_cpu_callback(struct notifier_block *info,
 					unsigned long action, void *__cpu)
 {
 	int node, cpu = (unsigned long)__cpu;
diff --git a/kernel/relay.c b/kernel/relay.c
index b91488ba2e5a..5001c9887db1 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -516,7 +516,7 @@ static void setup_callbacks(struct rchan *chan,
  *
  * 	Returns the success/failure of the operation. (%NOTIFY_OK, %NOTIFY_BAD)
  */
-static int __cpuinit relay_hotcpu_callback(struct notifier_block *nb,
+static int relay_hotcpu_callback(struct notifier_block *nb,
 				unsigned long action,
 				void *hcpu)
 {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0d8eb4525e76..b7c32cb7bfeb 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4133,7 +4133,7 @@ void show_state_filter(unsigned long state_filter)
 		debug_show_all_locks();
 }
 
-void __cpuinit init_idle_bootup_task(struct task_struct *idle)
+void init_idle_bootup_task(struct task_struct *idle)
 {
 	idle->sched_class = &idle_sched_class;
 }
@@ -4146,7 +4146,7 @@ void __cpuinit init_idle_bootup_task(struct task_struct *idle)
  * NOTE: this function does not set the idle thread's NEED_RESCHED
  * flag, to make booting more robust.
  */
-void __cpuinit init_idle(struct task_struct *idle, int cpu)
+void init_idle(struct task_struct *idle, int cpu)
 {
 	struct rq *rq = cpu_rq(cpu);
 	unsigned long flags;
@@ -4630,7 +4630,7 @@ static void set_rq_offline(struct rq *rq)
  * migration_call - callback that gets triggered when a CPU is added.
  * Here we can start up the necessary migration thread for the new CPU.
  */
-static int __cpuinit
+static int
 migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 {
 	int cpu = (long)hcpu;
@@ -4684,12 +4684,12 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
  * happens before everything else.  This has to be lower priority than
  * the notifier in the perf_event subsystem, though.
  */
-static struct notifier_block __cpuinitdata migration_notifier = {
+static struct notifier_block migration_notifier = {
 	.notifier_call = migration_call,
 	.priority = CPU_PRI_MIGRATION,
 };
 
-static int __cpuinit sched_cpu_active(struct notifier_block *nfb,
+static int sched_cpu_active(struct notifier_block *nfb,
 				      unsigned long action, void *hcpu)
 {
 	switch (action & ~CPU_TASKS_FROZEN) {
@@ -4702,7 +4702,7 @@ static int __cpuinit sched_cpu_active(struct notifier_block *nfb,
 	}
 }
 
-static int __cpuinit sched_cpu_inactive(struct notifier_block *nfb,
+static int sched_cpu_inactive(struct notifier_block *nfb,
 					unsigned long action, void *hcpu)
 {
 	switch (action & ~CPU_TASKS_FROZEN) {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f77f9c527449..bb456f44b7b1 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5506,7 +5506,7 @@ void nohz_balance_enter_idle(int cpu)
 	set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu));
 }
 
-static int __cpuinit sched_ilb_notifier(struct notifier_block *nfb,
+static int sched_ilb_notifier(struct notifier_block *nfb,
 					unsigned long action, void *hcpu)
 {
 	switch (action & ~CPU_TASKS_FROZEN) {
diff --git a/kernel/smp.c b/kernel/smp.c
index 4dba0f7b72ad..fe9f773d7114 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -73,7 +73,7 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
 	return NOTIFY_OK;
 }
 
-static struct notifier_block __cpuinitdata hotplug_cfd_notifier = {
+static struct notifier_block hotplug_cfd_notifier = {
 	.notifier_call		= hotplug_cfd,
 };
 
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index 02fc5c933673..eb89e1807408 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -24,7 +24,7 @@
  */
 static DEFINE_PER_CPU(struct task_struct *, idle_threads);
 
-struct task_struct * __cpuinit idle_thread_get(unsigned int cpu)
+struct task_struct *idle_thread_get(unsigned int cpu)
 {
 	struct task_struct *tsk = per_cpu(idle_threads, cpu);
 
diff --git a/kernel/softirq.c b/kernel/softirq.c
index ca25e6e704a2..be3d3514c325 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -699,7 +699,7 @@ void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
 }
 EXPORT_SYMBOL(send_remote_softirq);
 
-static int __cpuinit remote_softirq_cpu_notify(struct notifier_block *self,
+static int remote_softirq_cpu_notify(struct notifier_block *self,
 					       unsigned long action, void *hcpu)
 {
 	/*
@@ -728,7 +728,7 @@ static int __cpuinit remote_softirq_cpu_notify(struct notifier_block *self,
 	return NOTIFY_OK;
 }
 
-static struct notifier_block __cpuinitdata remote_softirq_cpu_notifier = {
+static struct notifier_block remote_softirq_cpu_notifier = {
 	.notifier_call	= remote_softirq_cpu_notify,
 };
 
@@ -830,7 +830,7 @@ static void takeover_tasklets(unsigned int cpu)
 }
 #endif /* CONFIG_HOTPLUG_CPU */
 
-static int __cpuinit cpu_callback(struct notifier_block *nfb,
+static int cpu_callback(struct notifier_block *nfb,
 				  unsigned long action,
 				  void *hcpu)
 {
@@ -845,7 +845,7 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb,
 	return NOTIFY_OK;
 }
 
-static struct notifier_block __cpuinitdata cpu_nfb = {
+static struct notifier_block cpu_nfb = {
 	.notifier_call = cpu_callback
 };
 
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 69601726a745..e80183f4a6c4 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -298,7 +298,7 @@ static int __init tick_nohz_full_setup(char *str)
 }
 __setup("nohz_full=", tick_nohz_full_setup);
 
-static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb,
+static int tick_nohz_cpu_down_callback(struct notifier_block *nfb,
 						 unsigned long action,
 						 void *hcpu)
 {
diff --git a/kernel/timer.c b/kernel/timer.c
index 15bc1b41021d..4296d13db3d1 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1505,11 +1505,11 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout)
 }
 EXPORT_SYMBOL(schedule_timeout_uninterruptible);
 
-static int __cpuinit init_timers_cpu(int cpu)
+static int init_timers_cpu(int cpu)
 {
 	int j;
 	struct tvec_base *base;
-	static char __cpuinitdata tvec_base_done[NR_CPUS];
+	static char tvec_base_done[NR_CPUS];
 
 	if (!tvec_base_done[cpu]) {
 		static char boot_done;
@@ -1577,7 +1577,7 @@ static void migrate_timer_list(struct tvec_base *new_base, struct list_head *hea
 	}
 }
 
-static void __cpuinit migrate_timers(int cpu)
+static void migrate_timers(int cpu)
 {
 	struct tvec_base *old_base;
 	struct tvec_base *new_base;
@@ -1610,7 +1610,7 @@ static void __cpuinit migrate_timers(int cpu)
 }
 #endif /* CONFIG_HOTPLUG_CPU */
 
-static int __cpuinit timer_cpu_notify(struct notifier_block *self,
+static int timer_cpu_notify(struct notifier_block *self,
 				unsigned long action, void *hcpu)
 {
 	long cpu = (long)hcpu;
@@ -1635,7 +1635,7 @@ static int __cpuinit timer_cpu_notify(struct notifier_block *self,
 	return NOTIFY_OK;
 }
 
-static struct notifier_block __cpuinitdata timers_nb = {
+static struct notifier_block timers_nb = {
 	.notifier_call	= timer_cpu_notify,
 };
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f02c4a4a0c3c..0b72e816b8d0 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -4644,7 +4644,7 @@ static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu)
  * Workqueues should be brought up before normal priority CPU notifiers.
  * This will be registered high priority CPU notifier.
  */
-static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
+static int workqueue_cpu_up_callback(struct notifier_block *nfb,
 					       unsigned long action,
 					       void *hcpu)
 {
@@ -4697,7 +4697,7 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
  * Workqueues should be brought down after normal priority CPU notifiers.
  * This will be registered as low priority CPU notifier.
  */
-static int __cpuinit workqueue_cpu_down_callback(struct notifier_block *nfb,
+static int workqueue_cpu_down_callback(struct notifier_block *nfb,
 						 unsigned long action,
 						 void *hcpu)
 {
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 98ac17ed6222..1501aa553221 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -238,7 +238,7 @@ config DEBUG_SECTION_MISMATCH
 	  any use of code/data previously in these sections would
 	  most likely result in an oops.
 	  In the code, functions and variables are annotated with
-	  __init, __cpuinit, etc. (see the full list in include/linux/init.h),
+	  __init,, etc. (see the full list in include/linux/init.h),
 	  which results in the code/data being placed in specific sections.
 	  The section mismatch analysis is always performed after a full
 	  kernel build, and enabling this option causes the following
diff --git a/lib/earlycpio.c b/lib/earlycpio.c
index 8078ef49cb79..7aa7ce250c94 100644
--- a/lib/earlycpio.c
+++ b/lib/earlycpio.c
@@ -63,7 +63,7 @@ enum cpio_fields {
  *          the match returned an empty filename string.
  */
 
-struct cpio_data __cpuinit find_cpio_data(const char *path, void *data,
+struct cpio_data find_cpio_data(const char *path, void *data,
 					  size_t len,  long *offset)
 {
 	const size_t cpio_header_len = 8*C_NFIELDS - 2;
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index 1fc23a3277e1..93c5d5ecff4e 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -158,7 +158,7 @@ static void compute_batch_value(void)
 	percpu_counter_batch = max(32, nr*2);
 }
 
-static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
+static int percpu_counter_hotcpu_callback(struct notifier_block *nb,
 					unsigned long action, void *hcpu)
 {
 #ifdef CONFIG_HOTPLUG_CPU
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d12ca6f3c293..00a7a664b9c1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2522,7 +2522,7 @@ static void mem_cgroup_drain_pcp_counter(struct mem_cgroup *memcg, int cpu)
 	spin_unlock(&memcg->pcp_counter_lock);
 }
 
-static int __cpuinit memcg_cpu_hotplug_callback(struct notifier_block *nb,
+static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
 					unsigned long action,
 					void *hcpu)
 {
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 4514ad7415c3..3f0c895c71fe 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1619,7 +1619,7 @@ void writeback_set_ratelimit(void)
 		ratelimit_pages = 16;
 }
 
-static int __cpuinit
+static int
 ratelimit_handler(struct notifier_block *self, unsigned long action,
 		  void *hcpu)
 {
@@ -1634,7 +1634,7 @@ ratelimit_handler(struct notifier_block *self, unsigned long action,
 	}
 }
 
-static struct notifier_block __cpuinitdata ratelimit_nb = {
+static struct notifier_block ratelimit_nb = {
 	.notifier_call	= ratelimit_handler,
 	.next		= NULL,
 };
diff --git a/mm/slab.c b/mm/slab.c
index 35cb0c861508..2580db062df9 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -787,7 +787,7 @@ static void next_reap_node(void)
  * the CPUs getting into lockstep and contending for the global cache chain
  * lock.
  */
-static void __cpuinit start_cpu_timer(int cpu)
+static void start_cpu_timer(int cpu)
 {
 	struct delayed_work *reap_work = &per_cpu(slab_reap_work, cpu);
 
@@ -1186,7 +1186,7 @@ static inline int slabs_tofree(struct kmem_cache *cachep,
 	return (n->free_objects + cachep->num - 1) / cachep->num;
 }
 
-static void __cpuinit cpuup_canceled(long cpu)
+static void cpuup_canceled(long cpu)
 {
 	struct kmem_cache *cachep;
 	struct kmem_cache_node *n = NULL;
@@ -1251,7 +1251,7 @@ free_array_cache:
 	}
 }
 
-static int __cpuinit cpuup_prepare(long cpu)
+static int cpuup_prepare(long cpu)
 {
 	struct kmem_cache *cachep;
 	struct kmem_cache_node *n = NULL;
@@ -1334,7 +1334,7 @@ bad:
 	return -ENOMEM;
 }
 
-static int __cpuinit cpuup_callback(struct notifier_block *nfb,
+static int cpuup_callback(struct notifier_block *nfb,
 				    unsigned long action, void *hcpu)
 {
 	long cpu = (long)hcpu;
@@ -1390,7 +1390,7 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
 	return notifier_from_errno(err);
 }
 
-static struct notifier_block __cpuinitdata cpucache_notifier = {
+static struct notifier_block cpucache_notifier = {
 	&cpuup_callback, NULL, 0
 };
 
diff --git a/mm/slub.c b/mm/slub.c
index 3b482c863002..2b02d666bf63 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3773,7 +3773,7 @@ int __kmem_cache_create(struct kmem_cache *s, unsigned long flags)
  * Use the cpu notifier to insure that the cpu slabs are flushed when
  * necessary.
  */
-static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
+static int slab_cpuup_callback(struct notifier_block *nfb,
 		unsigned long action, void *hcpu)
 {
 	long cpu = (long)hcpu;
@@ -3799,7 +3799,7 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
 	return NOTIFY_OK;
 }
 
-static struct notifier_block __cpuinitdata slab_notifier = {
+static struct notifier_block slab_notifier = {
 	.notifier_call = slab_cpuup_callback
 };
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index f42745e65780..20c2ef4458fa 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1182,7 +1182,7 @@ static void vmstat_update(struct work_struct *w)
 		round_jiffies_relative(sysctl_stat_interval));
 }
 
-static void __cpuinit start_cpu_timer(int cpu)
+static void start_cpu_timer(int cpu)
 {
 	struct delayed_work *work = &per_cpu(vmstat_work, cpu);
 
@@ -1194,7 +1194,7 @@ static void __cpuinit start_cpu_timer(int cpu)
  * Use the cpu notifier to insure that the thresholds are recalculated
  * when necessary.
  */
-static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb,
+static int vmstat_cpuup_callback(struct notifier_block *nfb,
 		unsigned long action,
 		void *hcpu)
 {
@@ -1226,7 +1226,7 @@ static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb,
 	return NOTIFY_OK;
 }
 
-static struct notifier_block __cpuinitdata vmstat_notifier =
+static struct notifier_block vmstat_notifier =
 	{ &vmstat_cpuup_callback, NULL, 0 };
 #endif
 
-- 
cgit v1.2.3-59-g8ed1b


From ceac9b9214df539ca814a784c2af94f554bc78d4 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Wed, 26 Jun 2013 15:08:48 +0200
Subject: ARM i.MX6Q: Fix IOMUXC GPR1 defines for ENET_CLK_SEL and IPU1/2_MUX

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
---
 include/linux/mfd/syscon/imx6q-iomuxc-gpr.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
index dab34a1deb2c..b1521e82fecf 100644
--- a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
+++ b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
@@ -103,15 +103,15 @@
 #define IMX6Q_GPR1_EXC_MON_MASK			BIT(22)
 #define IMX6Q_GPR1_EXC_MON_OKAY			0x0
 #define IMX6Q_GPR1_EXC_MON_SLVE			BIT(22)
-#define IMX6Q_GPR1_MIPI_IPU2_SEL_MASK		BIT(21)
-#define IMX6Q_GPR1_MIPI_IPU2_SEL_GASKET		0x0
-#define IMX6Q_GPR1_MIPI_IPU2_SEL_IOMUX		BIT(21)
-#define IMX6Q_GPR1_MIPI_IPU1_MUX_MASK		BIT(20)
-#define IMX6Q_GPR1_MIPI_IPU1_MUX_GASKET		0x0
-#define IMX6Q_GPR1_MIPI_IPU1_MUX_IOMUX		BIT(20)
-#define IMX6Q_GPR1_MIPI_IPU2_MUX_MASK		BIT(19)
+#define IMX6Q_GPR1_ENET_CLK_SEL_MASK		BIT(21)
+#define IMX6Q_GPR1_ENET_CLK_SEL_PAD		0
+#define IMX6Q_GPR1_ENET_CLK_SEL_ANATOP		BIT(21)
+#define IMX6Q_GPR1_MIPI_IPU2_MUX_MASK		BIT(20)
 #define IMX6Q_GPR1_MIPI_IPU2_MUX_GASKET		0x0
-#define IMX6Q_GPR1_MIPI_IPU2_MUX_IOMUX		BIT(19)
+#define IMX6Q_GPR1_MIPI_IPU2_MUX_IOMUX		BIT(20)
+#define IMX6Q_GPR1_MIPI_IPU1_MUX_MASK		BIT(19)
+#define IMX6Q_GPR1_MIPI_IPU1_MUX_GASKET		0x0
+#define IMX6Q_GPR1_MIPI_IPU1_MUX_IOMUX		BIT(19)
 #define IMX6Q_GPR1_PCIE_TEST_PD			BIT(18)
 #define IMX6Q_GPR1_IPU_VPU_MUX_MASK		BIT(17)
 #define IMX6Q_GPR1_IPU_VPU_MUX_IPU1		0x0
-- 
cgit v1.2.3-59-g8ed1b


From 4f71612ee3a1b2d15c8246d926a40c4f7d21cc3b Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Wed, 10 Jul 2013 14:05:44 +0800
Subject: ARM: imx: fix vf610 enet module clock selection

The fec/enet driver calculates MDC rate with the formula below.

  ref_freq / ((MII_SPEED + 1) x 2)

The ref_freq here is the fec internal module clock, which is missing
from clk-vf610 clock driver right now.  And clk-vf610 driver mistakenly
supplies RMII clock (50 MHz) as the source to fec.  This results in the
situation that fec driver gets ref_freq as 50 MHz, while physically it
runs at 66 MHz (fec module clock physically sources from ipg which runs
at 66 MHz).  That's why software expects MDC runs at 2.5 MHz, while the
measurement tells it runs at 3.3 MHz.  And this causes the PHY KSZ8041
keeps swithing between Full and Half mode as below.

  libphy: 400d0000.etherne:00 - Link is Up - 100/Full
  libphy: 400d0000.etherne:00 - Link is Up - 100/Half
  libphy: 400d0000.etherne:00 - Link is Up - 100/Full
  libphy: 400d0000.etherne:00 - Link is Up - 100/Half
  libphy: 400d0000.etherne:00 - Link is Up - 100/Full
  libphy: 400d0000.etherne:00 - Link is Up - 100/Half

Add the missing module clock for ENET0 and ENET1, and correct the clock
supplying in device tree to fix above issue.

Thanks to Alison Wang <b18965@freescale.com> for debugging the issue.

Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
---
 arch/arm/boot/dts/vf610.dtsi            | 8 ++++----
 arch/arm/mach-imx/clk-vf610.c           | 2 ++
 include/dt-bindings/clock/vf610-clock.h | 4 +++-
 3 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/arch/arm/boot/dts/vf610.dtsi b/arch/arm/boot/dts/vf610.dtsi
index e1eb7dadda80..67d929cf9804 100644
--- a/arch/arm/boot/dts/vf610.dtsi
+++ b/arch/arm/boot/dts/vf610.dtsi
@@ -442,8 +442,8 @@
 				compatible = "fsl,mvf600-fec";
 				reg = <0x400d0000 0x1000>;
 				interrupts = <0 78 0x04>;
-				clocks = <&clks VF610_CLK_ENET>,
-					<&clks VF610_CLK_ENET>,
+				clocks = <&clks VF610_CLK_ENET0>,
+					<&clks VF610_CLK_ENET0>,
 					<&clks VF610_CLK_ENET>;
 				clock-names = "ipg", "ahb", "ptp";
 				status = "disabled";
@@ -453,8 +453,8 @@
 				compatible = "fsl,mvf600-fec";
 				reg = <0x400d1000 0x1000>;
 				interrupts = <0 79 0x04>;
-				clocks = <&clks VF610_CLK_ENET>,
-					<&clks VF610_CLK_ENET>,
+				clocks = <&clks VF610_CLK_ENET1>,
+					<&clks VF610_CLK_ENET1>,
 					<&clks VF610_CLK_ENET>;
 				clock-names = "ipg", "ahb", "ptp";
 				status = "disabled";
diff --git a/arch/arm/mach-imx/clk-vf610.c b/arch/arm/mach-imx/clk-vf610.c
index d617c0b7c809..b169a396d93b 100644
--- a/arch/arm/mach-imx/clk-vf610.c
+++ b/arch/arm/mach-imx/clk-vf610.c
@@ -183,6 +183,8 @@ static void __init vf610_clocks_init(struct device_node *ccm_node)
 	clk[VF610_CLK_ENET_TS_SEL] = imx_clk_mux("enet_ts_sel", CCM_CSCMR2, 0, 3, enet_ts_sels, 7);
 	clk[VF610_CLK_ENET] = imx_clk_gate("enet", "enet_sel", CCM_CSCDR1, 24);
 	clk[VF610_CLK_ENET_TS] = imx_clk_gate("enet_ts", "enet_ts_sel", CCM_CSCDR1, 23);
+	clk[VF610_CLK_ENET0] = imx_clk_gate2("enet0", "ipg_bus", CCM_CCGR9, CCM_CCGRx_CGn(0));
+	clk[VF610_CLK_ENET1] = imx_clk_gate2("enet1", "ipg_bus", CCM_CCGR9, CCM_CCGRx_CGn(1));
 
 	clk[VF610_CLK_PIT] = imx_clk_gate2("pit", "ipg_bus", CCM_CCGR1, CCM_CCGRx_CGn(7));
 
diff --git a/include/dt-bindings/clock/vf610-clock.h b/include/dt-bindings/clock/vf610-clock.h
index 15e997fa78f2..4aa2b48cd151 100644
--- a/include/dt-bindings/clock/vf610-clock.h
+++ b/include/dt-bindings/clock/vf610-clock.h
@@ -158,6 +158,8 @@
 #define VF610_CLK_GPU_SEL		145
 #define VF610_CLK_GPU_EN		146
 #define VF610_CLK_GPU2D			147
-#define VF610_CLK_END			148
+#define VF610_CLK_ENET0			148
+#define VF610_CLK_ENET1			149
+#define VF610_CLK_END			150
 
 #endif /* __DT_BINDINGS_CLOCK_VF610_H */
-- 
cgit v1.2.3-59-g8ed1b


From b9b3259746d77f4fcb786e2a43c25bcc40773755 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sun, 14 Jul 2013 16:05:51 -0700
Subject: sysfs.h: add __ATTR_RW() macro

A number of parts of the kernel created their own version of this, might
as well have the sysfs core provide it instead.

Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/sysfs.h | 2 ++
 kernel/events/core.c  | 2 --
 mm/backing-dev.c      | 2 --
 3 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index e2cee22f578a..9cd20c8404e5 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -79,6 +79,8 @@ struct attribute_group {
 	.show	= _name##_show,					\
 }
 
+#define __ATTR_RW(_name) __ATTR(_name, 0644, _name##_show, _name##_store)
+
 #define __ATTR_NULL { .attr = { .name = NULL } }
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
diff --git a/kernel/events/core.c b/kernel/events/core.c
index eba8fb5834ae..dd9878029d1f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6234,8 +6234,6 @@ perf_event_mux_interval_ms_store(struct device *dev,
 	return count;
 }
 
-#define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store)
-
 static struct device_attribute pmu_dev_attrs[] = {
 	__ATTR_RO(type),
 	__ATTR_RW(perf_event_mux_interval_ms),
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index d014ee5fcbbd..e04454cdb33f 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -232,8 +232,6 @@ static ssize_t stable_pages_required_show(struct device *dev,
 			bdi_cap_stable_pages_required(bdi) ? 1 : 0);
 }
 
-#define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store)
-
 static struct device_attribute bdi_dev_attrs[] = {
 	__ATTR_RW(read_ahead_kb),
 	__ATTR_RW(min_ratio),
-- 
cgit v1.2.3-59-g8ed1b


From f2f37f58b1b933b06d6d84e80a31a1b500fb0db2 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sun, 14 Jul 2013 16:05:52 -0700
Subject: sysfs.h: add ATTRIBUTE_GROUPS() macro

To make it easier for driver subsystems to work with attribute groups,
create the ATTRIBUTE_GROUPS macro to remove some of the repetitive
typing for the most common use for attribute groups.

Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/sysfs.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 9cd20c8404e5..f62ff01e5f59 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -94,6 +94,15 @@ struct attribute_group {
 #define __ATTR_IGNORE_LOCKDEP	__ATTR
 #endif
 
+#define ATTRIBUTE_GROUPS(name)					\
+static const struct attribute_group name##_group = {		\
+	.attrs = name##_attrs,					\
+};								\
+static const struct attribute_group *name##_groups[] = {	\
+	&name##_group,						\
+	NULL,							\
+}
+
 #define attr_name(_attr) (_attr).attr.name
 
 struct file;
-- 
cgit v1.2.3-59-g8ed1b


From e4b63603c2a1e2c4db3de11b0f2b17360a7695bb Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sun, 14 Jul 2013 16:05:53 -0700
Subject: sysfs.h: add BIN_ATTR macro

This makes it easier to create static binary attributes, which is needed
in a number of drivers, instead of "open coding" them.

Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/sysfs.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index f62ff01e5f59..d50a96b9bb6d 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -132,6 +132,15 @@ struct bin_attribute {
  */
 #define sysfs_bin_attr_init(bin_attr) sysfs_attr_init(&(bin_attr)->attr)
 
+/* macro to create static binary attributes easier */
+#define BIN_ATTR(_name, _mode, _read, _write, _size)		\
+struct bin_attribute bin_attr_##_name = {			\
+	.attr = {.name = __stringify(_name), .mode = _mode },	\
+	.read	= _read,					\
+	.write	= _write,					\
+	.size	= _size,					\
+}
+
 struct sysfs_ops {
 	ssize_t	(*show)(struct kobject *, struct attribute *,char *);
 	ssize_t	(*store)(struct kobject *,struct attribute *,const char *, size_t);
-- 
cgit v1.2.3-59-g8ed1b


From ced321bf9151535f85779b0004c93529f860b2a4 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sun, 14 Jul 2013 16:05:54 -0700
Subject: driver core: device.h: add RW and RO attribute macros

Make it easier to create attributes without having to always audit the
mode settings.

Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/device.h | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/device.h b/include/linux/device.h
index bcf8c0d4cd98..f207a8f49f80 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -47,7 +47,11 @@ struct bus_attribute {
 };
 
 #define BUS_ATTR(_name, _mode, _show, _store)	\
-struct bus_attribute bus_attr_##_name = __ATTR(_name, _mode, _show, _store)
+	struct bus_attribute bus_attr_##_name = __ATTR(_name, _mode, _show, _store)
+#define BUS_ATTR_RW(_name) \
+	struct bus_attribute bus_attr_##_name = __ATTR_RW(_name)
+#define BUS_ATTR_RO(_name) \
+	struct bus_attribute bus_attr_##_name = __ATTR_RO(_name)
 
 extern int __must_check bus_create_file(struct bus_type *,
 					struct bus_attribute *);
@@ -261,9 +265,12 @@ struct driver_attribute {
 			 size_t count);
 };
 
-#define DRIVER_ATTR(_name, _mode, _show, _store)	\
-struct driver_attribute driver_attr_##_name =		\
-	__ATTR(_name, _mode, _show, _store)
+#define DRIVER_ATTR(_name, _mode, _show, _store) \
+	struct driver_attribute driver_attr_##_name = __ATTR(_name, _mode, _show, _store)
+#define DRIVER_ATTR_RW(_name) \
+	struct driver_attribute driver_attr_##_name = __ATTR_RW(_name)
+#define DRIVER_ATTR_RO(_name) \
+	struct driver_attribute driver_attr_##_name = __ATTR_RO(_name)
 
 extern int __must_check driver_create_file(struct device_driver *driver,
 					const struct driver_attribute *attr);
@@ -414,8 +421,12 @@ struct class_attribute {
 				 const struct class_attribute *attr);
 };
 
-#define CLASS_ATTR(_name, _mode, _show, _store)			\
-struct class_attribute class_attr_##_name = __ATTR(_name, _mode, _show, _store)
+#define CLASS_ATTR(_name, _mode, _show, _store) \
+	struct class_attribute class_attr_##_name = __ATTR(_name, _mode, _show, _store)
+#define CLASS_ATTR_RW(_name) \
+	struct class_attribute class_attr_##_name = __ATTR_RW(_name)
+#define CLASS_ATTR_RO(_name) \
+	struct class_attribute class_attr_##_name = __ATTR_RO(_name)
 
 extern int __must_check class_create_file(struct class *class,
 					  const struct class_attribute *attr);
@@ -423,7 +434,6 @@ extern void class_remove_file(struct class *class,
 			      const struct class_attribute *attr);
 
 /* Simple class attribute that is just a static string */
-
 struct class_attribute_string {
 	struct class_attribute attr;
 	char *str;
@@ -512,6 +522,10 @@ ssize_t device_store_bool(struct device *dev, struct device_attribute *attr,
 
 #define DEVICE_ATTR(_name, _mode, _show, _store) \
 	struct device_attribute dev_attr_##_name = __ATTR(_name, _mode, _show, _store)
+#define DEVICE_ATTR_RW(_name) \
+	struct device_attribute dev_attr_##_name = __ATTR_RW(_name)
+#define DEVICE_ATTR_RO(_name) \
+	struct device_attribute dev_attr_##_name = __ATTR_RO(_name)
 #define DEVICE_ULONG_ATTR(_name, _mode, _var) \
 	struct dev_ext_attribute dev_attr_##_name = \
 		{ __ATTR(_name, _mode, device_show_ulong, device_store_ulong), &(_var) }
-- 
cgit v1.2.3-59-g8ed1b


From 6ab9cea16075ea707022753395f340b67f64304c Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sun, 14 Jul 2013 16:05:55 -0700
Subject: sysfs: add support for binary attributes in groups

groups should be able to support binary attributes, just like it
supports "normal" attributes.  This lets us only handle one type of
structure, groups, throughout the driver core and subsystems, making
binary attributes a "full fledged" part of the driver model, and not
something just "tacked on".

Reported-by: Oliver Schinagl <oliver@schinagl.nl>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/sysfs/group.c      | 66 +++++++++++++++++++++++++++++++++++----------------
 include/linux/sysfs.h |  4 ++--
 2 files changed, 48 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index aec3d5c98c94..e5719c6095c3 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -20,38 +20,64 @@ static void remove_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
 			 const struct attribute_group *grp)
 {
 	struct attribute *const* attr;
-	int i;
+	struct bin_attribute *const* bin_attr;
 
-	for (i = 0, attr = grp->attrs; *attr; i++, attr++)
-		sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name);
+	if (grp->attrs)
+		for (attr = grp->attrs; *attr; attr++)
+			sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name);
+	if (grp->bin_attrs)
+		for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++)
+			sysfs_remove_bin_file(kobj, *bin_attr);
 }
 
 static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
 			const struct attribute_group *grp, int update)
 {
 	struct attribute *const* attr;
+	struct bin_attribute *const* bin_attr;
 	int error = 0, i;
 
-	for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) {
-		umode_t mode = 0;
+	if (grp->attrs) {
+		for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) {
+			umode_t mode = 0;
+
+			/*
+			 * In update mode, we're changing the permissions or
+			 * visibility.  Do this by first removing then
+			 * re-adding (if required) the file.
+			 */
+			if (update)
+				sysfs_hash_and_remove(dir_sd, NULL,
+						      (*attr)->name);
+			if (grp->is_visible) {
+				mode = grp->is_visible(kobj, *attr, i);
+				if (!mode)
+					continue;
+			}
+			error = sysfs_add_file_mode(dir_sd, *attr,
+						    SYSFS_KOBJ_ATTR,
+						    (*attr)->mode | mode);
+			if (unlikely(error))
+				break;
+		}
+		if (error) {
+			remove_files(dir_sd, kobj, grp);
+			goto exit;
+		}
+	}
 
-		/* in update mode, we're changing the permissions or
-		 * visibility.  Do this by first removing then
-		 * re-adding (if required) the file */
-		if (update)
-			sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name);
-		if (grp->is_visible) {
-			mode = grp->is_visible(kobj, *attr, i);
-			if (!mode)
-				continue;
+	if (grp->bin_attrs) {
+		for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) {
+			if (update)
+				sysfs_remove_bin_file(kobj, *bin_attr);
+			error = sysfs_create_bin_file(kobj, *bin_attr);
+			if (error)
+				break;
 		}
-		error = sysfs_add_file_mode(dir_sd, *attr, SYSFS_KOBJ_ATTR,
-					    (*attr)->mode | mode);
-		if (unlikely(error))
-			break;
+		if (error)
+			remove_files(dir_sd, kobj, grp);
 	}
-	if (error)
-		remove_files(dir_sd, kobj, grp);
+exit:
 	return error;
 }
 
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index d50a96b9bb6d..2c3b6a30697d 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -21,6 +21,7 @@
 
 struct kobject;
 struct module;
+struct bin_attribute;
 enum kobj_ns_type;
 
 struct attribute {
@@ -59,10 +60,9 @@ struct attribute_group {
 	umode_t			(*is_visible)(struct kobject *,
 					      struct attribute *, int);
 	struct attribute	**attrs;
+	struct bin_attribute	**bin_attrs;
 };
 
-
-
 /**
  * Use these macros to make defining attributes easier. See include/linux/device.h
  * for examples..
-- 
cgit v1.2.3-59-g8ed1b


From 39ef311204941ddd01ea2950d6220c8ccc710d15 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sun, 14 Jul 2013 16:05:57 -0700
Subject: driver core: Introduce device_create_groups

device_create_groups lets callers create devices as well as associated
sysfs attributes with a single call. This avoids race conditions seen
if sysfs attributes on new devices are created later.

[fixed up comment block placement and add checks for printk buffer
formats - gregkh]

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Cc: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c    | 111 ++++++++++++++++++++++++++++++++++++-------------
 include/linux/device.h |   5 +++
 2 files changed, 88 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index dc3ea237f086..a8aae1823f73 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -1667,34 +1667,11 @@ static void device_create_release(struct device *dev)
 	kfree(dev);
 }
 
-/**
- * device_create_vargs - creates a device and registers it with sysfs
- * @class: pointer to the struct class that this device should be registered to
- * @parent: pointer to the parent struct device of this new device, if any
- * @devt: the dev_t for the char device to be added
- * @drvdata: the data to be added to the device for callbacks
- * @fmt: string for the device's name
- * @args: va_list for the device's name
- *
- * This function can be used by char device classes.  A struct device
- * will be created in sysfs, registered to the specified class.
- *
- * A "dev" file will be created, showing the dev_t for the device, if
- * the dev_t is not 0,0.
- * If a pointer to a parent struct device is passed in, the newly created
- * struct device will be a child of that device in sysfs.
- * The pointer to the struct device will be returned from the call.
- * Any further sysfs files that might be required can be created using this
- * pointer.
- *
- * Returns &struct device pointer on success, or ERR_PTR() on error.
- *
- * Note: the struct class passed to this function must have previously
- * been created with a call to class_create().
- */
-struct device *device_create_vargs(struct class *class, struct device *parent,
-				   dev_t devt, void *drvdata, const char *fmt,
-				   va_list args)
+static struct device *
+device_create_groups_vargs(struct class *class, struct device *parent,
+			   dev_t devt, void *drvdata,
+			   const struct attribute_group **groups,
+			   const char *fmt, va_list args)
 {
 	struct device *dev = NULL;
 	int retval = -ENODEV;
@@ -1711,6 +1688,7 @@ struct device *device_create_vargs(struct class *class, struct device *parent,
 	dev->devt = devt;
 	dev->class = class;
 	dev->parent = parent;
+	dev->groups = groups;
 	dev->release = device_create_release;
 	dev_set_drvdata(dev, drvdata);
 
@@ -1728,6 +1706,39 @@ error:
 	put_device(dev);
 	return ERR_PTR(retval);
 }
+
+/**
+ * device_create_vargs - creates a device and registers it with sysfs
+ * @class: pointer to the struct class that this device should be registered to
+ * @parent: pointer to the parent struct device of this new device, if any
+ * @devt: the dev_t for the char device to be added
+ * @drvdata: the data to be added to the device for callbacks
+ * @fmt: string for the device's name
+ * @args: va_list for the device's name
+ *
+ * This function can be used by char device classes.  A struct device
+ * will be created in sysfs, registered to the specified class.
+ *
+ * A "dev" file will be created, showing the dev_t for the device, if
+ * the dev_t is not 0,0.
+ * If a pointer to a parent struct device is passed in, the newly created
+ * struct device will be a child of that device in sysfs.
+ * The pointer to the struct device will be returned from the call.
+ * Any further sysfs files that might be required can be created using this
+ * pointer.
+ *
+ * Returns &struct device pointer on success, or ERR_PTR() on error.
+ *
+ * Note: the struct class passed to this function must have previously
+ * been created with a call to class_create().
+ */
+struct device *device_create_vargs(struct class *class, struct device *parent,
+				   dev_t devt, void *drvdata, const char *fmt,
+				   va_list args)
+{
+	return device_create_groups_vargs(class, parent, devt, drvdata, NULL,
+					  fmt, args);
+}
 EXPORT_SYMBOL_GPL(device_create_vargs);
 
 /**
@@ -1767,6 +1778,50 @@ struct device *device_create(struct class *class, struct device *parent,
 }
 EXPORT_SYMBOL_GPL(device_create);
 
+/**
+ * device_create_with_groups - creates a device and registers it with sysfs
+ * @class: pointer to the struct class that this device should be registered to
+ * @parent: pointer to the parent struct device of this new device, if any
+ * @devt: the dev_t for the char device to be added
+ * @drvdata: the data to be added to the device for callbacks
+ * @groups: NULL-terminated list of attribute groups to be created
+ * @fmt: string for the device's name
+ *
+ * This function can be used by char device classes.  A struct device
+ * will be created in sysfs, registered to the specified class.
+ * Additional attributes specified in the groups parameter will also
+ * be created automatically.
+ *
+ * A "dev" file will be created, showing the dev_t for the device, if
+ * the dev_t is not 0,0.
+ * If a pointer to a parent struct device is passed in, the newly created
+ * struct device will be a child of that device in sysfs.
+ * The pointer to the struct device will be returned from the call.
+ * Any further sysfs files that might be required can be created using this
+ * pointer.
+ *
+ * Returns &struct device pointer on success, or ERR_PTR() on error.
+ *
+ * Note: the struct class passed to this function must have previously
+ * been created with a call to class_create().
+ */
+struct device *device_create_with_groups(struct class *class,
+					 struct device *parent, dev_t devt,
+					 void *drvdata,
+					 const struct attribute_group **groups,
+					 const char *fmt, ...)
+{
+	va_list vargs;
+	struct device *dev;
+
+	va_start(vargs, fmt);
+	dev = device_create_groups_vargs(class, parent, devt, drvdata, groups,
+					 fmt, vargs);
+	va_end(vargs);
+	return dev;
+}
+EXPORT_SYMBOL_GPL(device_create_with_groups);
+
 static int __match_devt(struct device *dev, const void *data)
 {
 	const dev_t *devt = data;
diff --git a/include/linux/device.h b/include/linux/device.h
index f207a8f49f80..bd5931e89f74 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -938,6 +938,11 @@ extern __printf(5, 6)
 struct device *device_create(struct class *cls, struct device *parent,
 			     dev_t devt, void *drvdata,
 			     const char *fmt, ...);
+extern __printf(6, 7)
+struct device *device_create_with_groups(struct class *cls,
+			     struct device *parent, dev_t devt, void *drvdata,
+			     const struct attribute_group **groups,
+			     const char *fmt, ...);
 extern void device_destroy(struct class *cls, dev_t devt);
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From d05a6f96c76062b5f25858ac02cf677602076f7e Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sun, 14 Jul 2013 16:05:58 -0700
Subject: driver core: add default groups to struct class

We should be using groups, not attribute lists, for classes to allow
subdirectories, and soon, binary files.  Groups are just more flexible
overall, so add them.

The dev_attrs list will go away after all in-kernel users are converted
to use dev_groups.

Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c    | 9 ++++++++-
 include/linux/device.h | 4 +++-
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index a8aae1823f73..8856d74545d9 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -528,9 +528,12 @@ static int device_add_attrs(struct device *dev)
 	int error;
 
 	if (class) {
-		error = device_add_attributes(dev, class->dev_attrs);
+		error = device_add_groups(dev, class->dev_groups);
 		if (error)
 			return error;
+		error = device_add_attributes(dev, class->dev_attrs);
+		if (error)
+			goto err_remove_class_groups;
 		error = device_add_bin_attributes(dev, class->dev_bin_attrs);
 		if (error)
 			goto err_remove_class_attrs;
@@ -563,6 +566,9 @@ static int device_add_attrs(struct device *dev)
  err_remove_class_attrs:
 	if (class)
 		device_remove_attributes(dev, class->dev_attrs);
+ err_remove_class_groups:
+	if (class)
+		device_remove_groups(dev, class->dev_groups);
 
 	return error;
 }
@@ -581,6 +587,7 @@ static void device_remove_attrs(struct device *dev)
 	if (class) {
 		device_remove_attributes(dev, class->dev_attrs);
 		device_remove_bin_attributes(dev, class->dev_bin_attrs);
+		device_remove_groups(dev, class->dev_groups);
 	}
 }
 
diff --git a/include/linux/device.h b/include/linux/device.h
index bd5931e89f74..22b546a58591 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -320,6 +320,7 @@ int subsys_virtual_register(struct bus_type *subsys,
  * @name:	Name of the class.
  * @owner:	The module owner.
  * @class_attrs: Default attributes of this class.
+ * @dev_groups:	Default attributes of the devices that belong to the class.
  * @dev_attrs:	Default attributes of the devices belong to the class.
  * @dev_bin_attrs: Default binary attributes of the devices belong to the class.
  * @dev_kobj:	The kobject that represents this class and links it into the hierarchy.
@@ -349,7 +350,8 @@ struct class {
 	struct module		*owner;
 
 	struct class_attribute		*class_attrs;
-	struct device_attribute		*dev_attrs;
+	struct device_attribute		*dev_attrs;	/* use dev_groups instead */
+	const struct attribute_group	**dev_groups;
 	struct bin_attribute		*dev_bin_attrs;
 	struct kobject			*dev_kobj;
 
-- 
cgit v1.2.3-59-g8ed1b


From 3493f69f4c4e8703961919a9a56c2d2e6a25b46f Mon Sep 17 00:00:00 2001
From: Oliver Schinagl <oliver@schinagl.nl>
Date: Sun, 14 Jul 2013 16:05:59 -0700
Subject: sysfs: add more helper macro's for (bin_)attribute(_groups)

With the recent changes to sysfs there's various helper macro's.
However there's no RW, RO BIN_ helper macro's. This patch adds them.

Signed-off-by: Oliver Schinagl <oliver@schinagl.nl>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/sysfs.h | 51 ++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 38 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 2c3b6a30697d..d907a7328025 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -17,6 +17,7 @@
 #include <linux/list.h>
 #include <linux/lockdep.h>
 #include <linux/kobject_ns.h>
+#include <linux/stat.h>
 #include <linux/atomic.h>
 
 struct kobject;
@@ -94,15 +95,18 @@ struct attribute_group {
 #define __ATTR_IGNORE_LOCKDEP	__ATTR
 #endif
 
-#define ATTRIBUTE_GROUPS(name)					\
-static const struct attribute_group name##_group = {		\
-	.attrs = name##_attrs,					\
-};								\
-static const struct attribute_group *name##_groups[] = {	\
-	&name##_group,						\
+#define __ATTRIBUTE_GROUPS(_name)				\
+static const struct attribute_group *_name##_groups[] = {	\
+	&_name##_group,						\
 	NULL,							\
 }
 
+#define ATTRIBUTE_GROUPS(_name)					\
+static const struct attribute_group _name##_group = {		\
+	.attrs = _name##_attrs,					\
+};								\
+__ATTRIBUTE_GROUPS(_name)
+
 #define attr_name(_attr) (_attr).attr.name
 
 struct file;
@@ -132,15 +136,36 @@ struct bin_attribute {
  */
 #define sysfs_bin_attr_init(bin_attr) sysfs_attr_init(&(bin_attr)->attr)
 
-/* macro to create static binary attributes easier */
-#define BIN_ATTR(_name, _mode, _read, _write, _size)		\
-struct bin_attribute bin_attr_##_name = {			\
-	.attr = {.name = __stringify(_name), .mode = _mode },	\
-	.read	= _read,					\
-	.write	= _write,					\
-	.size	= _size,					\
+/* macros to create static binary attributes easier */
+#define __BIN_ATTR(_name, _mode, _read, _write, _size) {		\
+	.attr = { .name = __stringify(_name), .mode = _mode },		\
+	.read	= _read,						\
+	.write	= _write,						\
+	.size	= _size,						\
+}
+
+#define __BIN_ATTR_RO(_name, _size) {					\
+	.attr	= { .name = __stringify(_name), .mode = S_IRUGO },	\
+	.read	= _name##_read,						\
+	.size	= _size,						\
 }
 
+#define __BIN_ATTR_RW(_name, _size) __BIN_ATTR(_name,			\
+				   (S_IWUSR | S_IRUGO), _name##_read,	\
+				   _name##_write)
+
+#define __BIN_ATTR_NULL __ATTR_NULL
+
+#define BIN_ATTR(_name, _mode, _read, _write, _size)			\
+struct bin_attribute bin_attr_##_name = __BIN_ATTR(_name, _mode, _read,	\
+					_write, _size)
+
+#define BIN_ATTR_RO(_name, _size)					\
+struct bin_attribute bin_attr_##_name = __BIN_ATTR_RO(_name, _size)
+
+#define BIN_ATTR_RW(_name, _size)					\
+struct bin_attribute bin_attr_##_name = __BIN_ATTR_RW(_name, _size)
+
 struct sysfs_ops {
 	ssize_t	(*show)(struct kobject *, struct attribute *,char *);
 	ssize_t	(*store)(struct kobject *,struct attribute *,const char *, size_t);
-- 
cgit v1.2.3-59-g8ed1b


From aa01aa3ca205ea04f44423a58bae38aec886fb96 Mon Sep 17 00:00:00 2001
From: Oliver Schinagl <oliver@schinagl.nl>
Date: Sun, 14 Jul 2013 16:06:00 -0700
Subject: sysfs: use file mode defines from stat.h

With the last patches stat.h was included to the header, and thus those
permission defines should be used.

Signed-off-by: Oliver Schinagl <oliver@schinagl.nl>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/sysfs.h | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index d907a7328025..9e8a9b555ad6 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -69,18 +69,19 @@ struct attribute_group {
  * for examples..
  */
 
-#define __ATTR(_name,_mode,_show,_store) { \
-	.attr = {.name = __stringify(_name), .mode = _mode },	\
-	.show	= _show,					\
-	.store	= _store,					\
+#define __ATTR(_name,_mode,_show,_store) { 				\
+	.attr = {.name = __stringify(_name), .mode = _mode },		\
+	.show	= _show,						\
+	.store	= _store,						\
 }
 
-#define __ATTR_RO(_name) { \
-	.attr	= { .name = __stringify(_name), .mode = 0444 },	\
-	.show	= _name##_show,					\
+#define __ATTR_RO(_name) {						\
+	.attr	= { .name = __stringify(_name), .mode = S_IRUGO },	\
+	.show	= _name##_show,						\
 }
 
-#define __ATTR_RW(_name) __ATTR(_name, 0644, _name##_show, _name##_store)
+#define __ATTR_RW(_name) __ATTR(_name, (S_IWUSR | S_IRUGO),		\
+			 _name##_show, _name##_store)
 
 #define __ATTR_NULL { .attr = { .name = NULL } }
 
-- 
cgit v1.2.3-59-g8ed1b


From 36ff66db3fb5642906e46e73ca9cf92f1c5974ff Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Thu, 27 Jun 2013 15:27:07 -0400
Subject: USB: move the definition of USB_MAXCHILDREN

The USB_MAXCHILDREN symbol is used in include/uapi/linux/usb/ch11.h, a
user-mode header, even though it is defined in include/linux/usb.h,
which is kernel-only.  This causes compile-time errors when user
programs try to #include linux/usb/ch11.h.

This patch fixes the problem by moving the definition of USB_MAXCHILDREN
into ch11.h.  It also gets rid of unneeded parentheses.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb.h           | 11 -----------
 include/uapi/linux/usb/ch11.h | 11 +++++++++++
 2 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index a232b7ece1f6..0eec2689b955 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -367,17 +367,6 @@ struct usb_bus {
 
 /* ----------------------------------------------------------------------- */
 
-/* This is arbitrary.
- * From USB 2.0 spec Table 11-13, offset 7, a hub can
- * have up to 255 ports. The most yet reported is 10.
- *
- * Current Wireless USB host hardware (Intel i1480 for example) allows
- * up to 22 devices to connect. Upcoming hardware might raise that
- * limit. Because the arrays need to add a bit for hub status data, we
- * do 31, so plus one evens out to four bytes.
- */
-#define USB_MAXCHILDREN		(31)
-
 struct usb_tt;
 
 enum usb_device_removable {
diff --git a/include/uapi/linux/usb/ch11.h b/include/uapi/linux/usb/ch11.h
index 7692dc69ccf7..331499d597fa 100644
--- a/include/uapi/linux/usb/ch11.h
+++ b/include/uapi/linux/usb/ch11.h
@@ -11,6 +11,17 @@
 
 #include <linux/types.h>	/* __u8 etc */
 
+/* This is arbitrary.
+ * From USB 2.0 spec Table 11-13, offset 7, a hub can
+ * have up to 255 ports. The most yet reported is 10.
+ *
+ * Current Wireless USB host hardware (Intel i1480 for example) allows
+ * up to 22 devices to connect. Upcoming hardware might raise that
+ * limit. Because the arrays need to add a bit for hub status data, we
+ * use 31, so plus one evens out to four bytes.
+ */
+#define USB_MAXCHILDREN		31
+
 /*
  * Hub request types
  */
-- 
cgit v1.2.3-59-g8ed1b


From c0d15cc7ee8c0d1970197d9eb1727503bcdd2471 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Tue, 16 Jul 2013 22:44:08 -0400
Subject: linked-list: Remove __list_for_each

__list_for_each used to be the non prefetch() aware list walking
primitive.  When we removed the prefetch macros from the list routines,
it became redundant.  Given it does exactly the same thing as
list_for_each now, we might as well remove it and call list_for_each
directly.

All users of __list_for_each have been converted to list_for_each calls
in the current merge window.

Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/list.h | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/list.h b/include/linux/list.h
index b83e5657365a..f4d8a2f12a33 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -380,17 +380,6 @@ static inline void list_splice_tail_init(struct list_head *list,
 #define list_for_each(pos, head) \
 	for (pos = (head)->next; pos != (head); pos = pos->next)
 
-/**
- * __list_for_each	-	iterate over a list
- * @pos:	the &struct list_head to use as a loop cursor.
- * @head:	the head for your list.
- *
- * This variant doesn't differ from list_for_each() any more.
- * We don't do prefetching in either case.
- */
-#define __list_for_each(pos, head) \
-	for (pos = (head)->next; pos != (head); pos = pos->next)
-
 /**
  * list_for_each_prev	-	iterate over a list backwards
  * @pos:	the &struct list_head to use as a loop cursor.
-- 
cgit v1.2.3-59-g8ed1b


From 242b2287cd7f27521c8b54a4101d569e53e7a0ca Mon Sep 17 00:00:00 2001
From: Aaron Lu <aaron.lu@intel.com>
Date: Tue, 2 Jul 2013 21:59:10 +0800
Subject: ACPICA: expose OSI version

Expose acpi_gbl_osi_data so that code outside of ACPICA can check
the value of the last successfull _OSI call.  The definitions for
OSI versions are moved to actypes.h so that other components can
access them too.

Based on a patch from Matthew Garrett which in turn was based on
an earlier patch from Seth Forshee.

[rjw: Changelog]
Signed-off-by: Aaron Lu <aaron.lu@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/aclocal.h | 13 -------------
 include/acpi/acpixf.h         |  1 +
 include/acpi/actypes.h        | 15 +++++++++++++++
 3 files changed, 16 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h
index dfed26545ba2..d4a4901637cd 100644
--- a/drivers/acpi/acpica/aclocal.h
+++ b/drivers/acpi/acpica/aclocal.h
@@ -931,19 +931,6 @@ struct acpi_bit_register_info {
 
 /* Structs and definitions for _OSI support and I/O port validation */
 
-#define ACPI_OSI_WIN_2000               0x01
-#define ACPI_OSI_WIN_XP                 0x02
-#define ACPI_OSI_WIN_XP_SP1             0x03
-#define ACPI_OSI_WINSRV_2003            0x04
-#define ACPI_OSI_WIN_XP_SP2             0x05
-#define ACPI_OSI_WINSRV_2003_SP1        0x06
-#define ACPI_OSI_WIN_VISTA              0x07
-#define ACPI_OSI_WINSRV_2008            0x08
-#define ACPI_OSI_WIN_VISTA_SP1          0x09
-#define ACPI_OSI_WIN_VISTA_SP2          0x0A
-#define ACPI_OSI_WIN_7                  0x0B
-#define ACPI_OSI_WIN_8                  0x0C
-
 #define ACPI_ALWAYS_ILLEGAL             0x00
 
 struct acpi_interface_info {
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 1b09300810e6..22d497ee6ef9 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -62,6 +62,7 @@ extern u32 acpi_current_gpe_count;
 extern struct acpi_table_fadt acpi_gbl_FADT;
 extern u8 acpi_gbl_system_awake_and_running;
 extern u8 acpi_gbl_reduced_hardware;	/* ACPI 5.0 */
+extern u8 acpi_gbl_osi_data;
 
 /* Runtime configuration of debug print levels */
 
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index a64adcc29ae5..22b03c9286e9 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -1144,4 +1144,19 @@ struct acpi_memory_list {
 #endif
 };
 
+/* Definitions for _OSI support */
+
+#define ACPI_OSI_WIN_2000               0x01
+#define ACPI_OSI_WIN_XP                 0x02
+#define ACPI_OSI_WIN_XP_SP1             0x03
+#define ACPI_OSI_WINSRV_2003            0x04
+#define ACPI_OSI_WIN_XP_SP2             0x05
+#define ACPI_OSI_WINSRV_2003_SP1        0x06
+#define ACPI_OSI_WIN_VISTA              0x07
+#define ACPI_OSI_WINSRV_2008            0x08
+#define ACPI_OSI_WIN_VISTA_SP1          0x09
+#define ACPI_OSI_WIN_VISTA_SP2          0x0A
+#define ACPI_OSI_WIN_7                  0x0B
+#define ACPI_OSI_WIN_8                  0x0C
+
 #endif				/* __ACTYPES_H__ */
-- 
cgit v1.2.3-59-g8ed1b


From 8c5bd7adb2ce47e6aa39d17b2375f69b0c0aa255 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 18 Jul 2013 02:08:06 +0200
Subject: ACPI / video / i915: No ACPI backlight if firmware expects Windows 8

According to Matthew Garrett, "Windows 8 leaves backlight control up
to individual graphics drivers rather than making ACPI calls itself.
There's plenty of evidence to suggest that the Intel driver for
Windows [8] doesn't use the ACPI interface, including the fact that
it's broken on a bunch of machines when the OS claims to support
Windows 8.  The simplest thing to do appears to be to disable the
ACPI backlight interface on these systems".

There's a problem with that approach, however, because simply
avoiding to register the ACPI backlight interface if the firmware
calls _OSI for Windows 8 may not work in the following situations:
 (1) The ACPI backlight interface actually works on the given system
     and the i915 driver is not loaded (e.g. another graphics driver
     is used).
 (2) The ACPI backlight interface doesn't work on the given system,
     but there is a vendor platform driver that will register its
     own, equally broken, backlight interface if not prevented from
     doing so by the ACPI subsystem.
Therefore we need to allow the ACPI backlight interface to be
registered until the i915 driver is loaded which then will unregister
it if the firmware has called _OSI for Windows 8 (or will register
the ACPI video driver without backlight support if not already
present).

For this reason, introduce an alternative function for registering
ACPI video, acpi_video_register_with_quirks(), that will check
whether or not the ACPI video driver has already been registered
and whether or not the backlight Windows 8 quirk has to be applied.
If the quirk has to be applied, it will block the ACPI backlight
support and either unregister the backlight interface if the ACPI
video driver has already been registered, or register the ACPI
video driver without the backlight interface otherwise.  Make
the i915 driver use acpi_video_register_with_quirks() instead of
acpi_video_register() in i915_driver_load().

This change is based on earlier patches from Matthew Garrett,
Chun-Yi Lee and Seth Forshee and includes a fix from Aaron Lu's.

References: https://bugzilla.kernel.org/show_bug.cgi?id=51231
Tested-by: Aaron Lu <aaron.lu@intel.com>
Tested-by: Igor Gnatenko <i.gnatenko.brain@gmail.com>
Tested-by: Yves-Alexis Perez <corsac@debian.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Aaron Lu <aaron.lu@intel.com>
Acked-by: Matthew Garrett <matthew.garrett@nebula.com>
---
 drivers/acpi/internal.h         | 11 +++++++
 drivers/acpi/video.c            | 69 ++++++++++++++++++++++++++++++++++++-----
 drivers/acpi/video_detect.c     | 21 +++++++++++++
 drivers/gpu/drm/i915/i915_dma.c |  2 +-
 include/acpi/video.h            | 11 ++++++-
 include/linux/acpi.h            |  1 +
 6 files changed, 105 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 3a50a34fe176..227aca77ee1e 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -164,4 +164,15 @@ struct platform_device;
 int acpi_create_platform_device(struct acpi_device *adev,
 				const struct acpi_device_id *id);
 
+/*--------------------------------------------------------------------------
+					Video
+  -------------------------------------------------------------------------- */
+#if defined(CONFIG_ACPI_VIDEO) || defined(CONFIG_ACPI_VIDEO_MODULE)
+bool acpi_video_backlight_quirks(void);
+bool acpi_video_verify_backlight_support(void);
+#else
+static inline bool acpi_video_backlight_quirks(void) { return false; }
+static inline bool acpi_video_verify_backlight_support(void) { return false; }
+#endif
+
 #endif /* _ACPI_INTERNAL_H_ */
diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index f236e172d948..e9d4bb60c35c 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -44,6 +44,8 @@
 #include <linux/suspend.h>
 #include <acpi/video.h>
 
+#include "internal.h"
+
 #define PREFIX "ACPI: "
 
 #define ACPI_VIDEO_BUS_NAME		"Video Bus"
@@ -901,7 +903,7 @@ static void acpi_video_device_find_cap(struct acpi_video_device *device)
 	if (acpi_video_init_brightness(device))
 		return;
 
-	if (acpi_video_backlight_support()) {
+	if (acpi_video_verify_backlight_support()) {
 		struct backlight_properties props;
 		struct pci_dev *pdev;
 		acpi_handle acpi_parent;
@@ -1356,8 +1358,8 @@ acpi_video_switch_brightness(struct acpi_video_device *device, int event)
 	unsigned long long level_current, level_next;
 	int result = -EINVAL;
 
-	/* no warning message if acpi_backlight=vendor is used */
-	if (!acpi_video_backlight_support())
+	/* no warning message if acpi_backlight=vendor or a quirk is used */
+	if (!acpi_video_verify_backlight_support())
 		return 0;
 
 	if (!device->brightness)
@@ -1859,6 +1861,46 @@ static int acpi_video_bus_remove(struct acpi_device *device)
 	return 0;
 }
 
+static acpi_status video_unregister_backlight(acpi_handle handle, u32 lvl,
+					      void *context, void **rv)
+{
+	struct acpi_device *acpi_dev;
+	struct acpi_video_bus *video;
+	struct acpi_video_device *dev, *next;
+
+	if (acpi_bus_get_device(handle, &acpi_dev))
+		return AE_OK;
+
+	if (acpi_match_device_ids(acpi_dev, video_device_ids))
+		return AE_OK;
+
+	video = acpi_driver_data(acpi_dev);
+	if (!video)
+		return AE_OK;
+
+	acpi_video_bus_stop_devices(video);
+	mutex_lock(&video->device_list_lock);
+	list_for_each_entry_safe(dev, next, &video->video_device_list, entry) {
+		if (dev->backlight) {
+			backlight_device_unregister(dev->backlight);
+			dev->backlight = NULL;
+			kfree(dev->brightness->levels);
+			kfree(dev->brightness);
+		}
+		if (dev->cooling_dev) {
+			sysfs_remove_link(&dev->dev->dev.kobj,
+					  "thermal_cooling");
+			sysfs_remove_link(&dev->cooling_dev->device.kobj,
+					  "device");
+			thermal_cooling_device_unregister(dev->cooling_dev);
+			dev->cooling_dev = NULL;
+		}
+	}
+	mutex_unlock(&video->device_list_lock);
+	acpi_video_bus_start_devices(video);
+	return AE_OK;
+}
+
 static int __init is_i740(struct pci_dev *dev)
 {
 	if (dev->device == 0x00D1)
@@ -1890,14 +1932,25 @@ static int __init intel_opregion_present(void)
 	return opregion;
 }
 
-int acpi_video_register(void)
+int __acpi_video_register(bool backlight_quirks)
 {
-	int result = 0;
+	bool no_backlight;
+	int result;
+
+	no_backlight = backlight_quirks ? acpi_video_backlight_quirks() : false;
+
 	if (register_count) {
 		/*
-		 * if the function of acpi_video_register is already called,
-		 * don't register the acpi_vide_bus again and return no error.
+		 * If acpi_video_register() has been called already, don't try
+		 * to register acpi_video_bus, but unregister backlight devices
+		 * if no backlight support is requested.
 		 */
+		if (no_backlight)
+			acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT,
+					    ACPI_UINT32_MAX,
+					    video_unregister_backlight,
+					    NULL, NULL, NULL);
+
 		return 0;
 	}
 
@@ -1913,7 +1966,7 @@ int acpi_video_register(void)
 
 	return 0;
 }
-EXPORT_SYMBOL(acpi_video_register);
+EXPORT_SYMBOL(__acpi_video_register);
 
 void acpi_video_unregister(void)
 {
diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
index e6bd910bc6ed..826e52def080 100644
--- a/drivers/acpi/video_detect.c
+++ b/drivers/acpi/video_detect.c
@@ -38,6 +38,8 @@
 #include <linux/dmi.h>
 #include <linux/pci.h>
 
+#include "internal.h"
+
 #define PREFIX "ACPI: "
 
 ACPI_MODULE_NAME("video");
@@ -234,6 +236,17 @@ static void acpi_video_caps_check(void)
 		acpi_video_get_capabilities(NULL);
 }
 
+bool acpi_video_backlight_quirks(void)
+{
+	if (acpi_gbl_osi_data >= ACPI_OSI_WIN_8) {
+		acpi_video_caps_check();
+		acpi_video_support |= ACPI_VIDEO_SKIP_BACKLIGHT;
+		return true;
+	}
+	return false;
+}
+EXPORT_SYMBOL(acpi_video_backlight_quirks);
+
 /* Promote the vendor interface instead of the generic video module.
  * This function allow DMI blacklists to be implemented by externals
  * platform drivers instead of putting a big blacklist in video_detect.c
@@ -278,6 +291,14 @@ int acpi_video_backlight_support(void)
 }
 EXPORT_SYMBOL(acpi_video_backlight_support);
 
+/* For the ACPI video driver use only. */
+bool acpi_video_verify_backlight_support(void)
+{
+	return (acpi_video_support & ACPI_VIDEO_SKIP_BACKLIGHT) ?
+		false : acpi_video_backlight_support();
+}
+EXPORT_SYMBOL(acpi_video_verify_backlight_support);
+
 /*
  * Use acpi_backlight=vendor/video to force that backlight switching
  * is processed by vendor specific acpi drivers or video.ko driver.
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index adb319b53ecd..cf188ab7051a 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1648,7 +1648,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	if (INTEL_INFO(dev)->num_pipes) {
 		/* Must be done after probing outputs */
 		intel_opregion_init(dev);
-		acpi_video_register();
+		acpi_video_register_with_quirks();
 	}
 
 	if (IS_GEN5(dev))
diff --git a/include/acpi/video.h b/include/acpi/video.h
index 61109f2609fc..b26dc4fb7ba8 100644
--- a/include/acpi/video.h
+++ b/include/acpi/video.h
@@ -17,12 +17,21 @@ struct acpi_device;
 #define ACPI_VIDEO_DISPLAY_LEGACY_TV      0x0200
 
 #if (defined CONFIG_ACPI_VIDEO || defined CONFIG_ACPI_VIDEO_MODULE)
-extern int acpi_video_register(void);
+extern int __acpi_video_register(bool backlight_quirks);
+static inline int acpi_video_register(void)
+{
+	return __acpi_video_register(false);
+}
+static inline int acpi_video_register_with_quirks(void)
+{
+	return __acpi_video_register(true);
+}
 extern void acpi_video_unregister(void);
 extern int acpi_video_get_edid(struct acpi_device *device, int type,
 			       int device_id, void **edid);
 #else
 static inline int acpi_video_register(void) { return 0; }
+static inline int acpi_video_register_with_quirks(void) { return 0; }
 static inline void acpi_video_unregister(void) { return; }
 static inline int acpi_video_get_edid(struct acpi_device *device, int type,
 				      int device_id, void **edid)
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 353ba256f368..6ad72f92469c 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -191,6 +191,7 @@ extern bool wmi_has_guid(const char *guid);
 #define ACPI_VIDEO_BACKLIGHT_DMI_VIDEO			0x0200
 #define ACPI_VIDEO_OUTPUT_SWITCHING_DMI_VENDOR		0x0400
 #define ACPI_VIDEO_OUTPUT_SWITCHING_DMI_VIDEO		0x0800
+#define ACPI_VIDEO_SKIP_BACKLIGHT			0x1000
 
 #if defined(CONFIG_ACPI_VIDEO) || defined(CONFIG_ACPI_VIDEO_MODULE)
 
-- 
cgit v1.2.3-59-g8ed1b


From d4b812dea4a236f729526facf97df1a9d18e191c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 18 Jul 2013 07:19:26 -0700
Subject: vlan: mask vlan prio bits

In commit 48cc32d38a52d0b68f91a171a8d00531edc6a46e
("vlan: don't deliver frames for unknown vlans to protocols")
Florian made sure we set pkt_type to PACKET_OTHERHOST
if the vlan id is set and we could find a vlan device for this
particular id.

But we also have a problem if prio bits are set.

Steinar reported an issue on a router receiving IPv6 frames with a
vlan tag of 4000 (id 0, prio 2), and tunneled into a sit device,
because skb->vlan_tci is set.

Forwarded frame is completely corrupted : We can see (8100:4000)
being inserted in the middle of IPv6 source address :

16:48:00.780413 IP6 2001:16d8:8100:4000:ee1c:0:9d9:bc87 >
9f94:4d95:2001:67c:29f4::: ICMP6, unknown icmp6 type (0), length 64
       0x0000:  0000 0029 8000 c7c3 7103 0001 a0ae e651
       0x0010:  0000 0000 ccce 0b00 0000 0000 1011 1213
       0x0020:  1415 1617 1819 1a1b 1c1d 1e1f 2021 2223
       0x0030:  2425 2627 2829 2a2b 2c2d 2e2f 3031 3233

It seems we are not really ready to properly cope with this right now.

We can probably do better in future kernels :
vlan_get_ingress_priority() should be a netdev property instead of
a per vlan_dev one.

For stable kernels, lets clear vlan_tci to fix the bugs.

Reported-by: Steinar H. Gunderson <sesse@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h |  3 +--
 net/8021q/vlan_core.c   |  2 +-
 net/core/dev.c          | 11 +++++++++--
 3 files changed, 11 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index cdcbafa9b39a..715c343f7c00 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -79,9 +79,8 @@ static inline int is_vlan_dev(struct net_device *dev)
 }
 
 #define vlan_tx_tag_present(__skb)	((__skb)->vlan_tci & VLAN_TAG_PRESENT)
-#define vlan_tx_nonzero_tag_present(__skb) \
-	(vlan_tx_tag_present(__skb) && ((__skb)->vlan_tci & VLAN_VID_MASK))
 #define vlan_tx_tag_get(__skb)		((__skb)->vlan_tci & ~VLAN_TAG_PRESENT)
+#define vlan_tx_tag_get_id(__skb)	((__skb)->vlan_tci & VLAN_VID_MASK)
 
 #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
 
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 8a15eaadc4bd..4a78c4de9f20 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -9,7 +9,7 @@ bool vlan_do_receive(struct sk_buff **skbp)
 {
 	struct sk_buff *skb = *skbp;
 	__be16 vlan_proto = skb->vlan_proto;
-	u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK;
+	u16 vlan_id = vlan_tx_tag_get_id(skb);
 	struct net_device *vlan_dev;
 	struct vlan_pcpu_stats *rx_stats;
 
diff --git a/net/core/dev.c b/net/core/dev.c
index a3d8d44cb7f4..26755dd40daa 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3580,8 +3580,15 @@ ncls:
 		}
 	}
 
-	if (vlan_tx_nonzero_tag_present(skb))
-		skb->pkt_type = PACKET_OTHERHOST;
+	if (unlikely(vlan_tx_tag_present(skb))) {
+		if (vlan_tx_tag_get_id(skb))
+			skb->pkt_type = PACKET_OTHERHOST;
+		/* Note: we might in the future use prio bits
+		 * and set skb->priority like in vlan_do_receive()
+		 * For the time being, just ignore Priority Code Point
+		 */
+		skb->vlan_tci = 0;
+	}
 
 	/* deliver only exact match when indicated */
 	null_or_dev = deliver_exact ? skb->dev : NULL;
-- 
cgit v1.2.3-59-g8ed1b


From b01a60be7a4a161ac0a11df30569d21a20795aef Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 5 Jul 2013 17:43:56 +0200
Subject: ssb: fix alignment of struct bcma_device_id

The ARM OABI and EABI disagree on the alignment of structures
with small members, so module init tools may interpret the
ssb device table incorrectly, as shown  by this warning when
building the b43 device driver in an OABI kernel:

FATAL: drivers/net/wireless/b43/b43: sizeof(struct ssb_device_id)=6 is
not a modulo of the size of section __mod_ssb_device_table=88.

Forcing the default (EABI) alignment on the structure makes this
problem go away. Since the ssb_device_id may have the same problem,
better fix both structures.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: John W. Linville <linville@tuxdriver.com>
Cc: Michael Buesch <mb@bu3sch.de>
Cc: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/mod_devicetable.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index b62d4af6c667..45e921401b06 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -361,7 +361,8 @@ struct ssb_device_id {
 	__u16	vendor;
 	__u16	coreid;
 	__u8	revision;
-};
+	__u8	__pad;
+} __attribute__((packed, aligned(2)));
 #define SSB_DEVICE(_vendor, _coreid, _revision)  \
 	{ .vendor = _vendor, .coreid = _coreid, .revision = _revision, }
 #define SSB_DEVTABLE_END  \
@@ -377,7 +378,7 @@ struct bcma_device_id {
 	__u16	id;
 	__u8	rev;
 	__u8	class;
-};
+} __attribute__((packed,aligned(2)));
 #define BCMA_CORE(_manuf, _id, _rev, _class)  \
 	{ .manuf = _manuf, .id = _id, .rev = _rev, .class = _class, }
 #define BCMA_CORETABLE_END  \
-- 
cgit v1.2.3-59-g8ed1b


From cd92bf61d6d70bd3eb33b46d600e3f3eb9c5778a Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 17 Jun 2013 19:02:11 +0200
Subject: tracing/perf: Move the PERF_MAX_TRACE_SIZE check into
 perf_trace_buf_prepare()

Every perf_trace_buf_prepare() caller does
WARN_ONCE(size > PERF_MAX_TRACE_SIZE, message) and "message" is
almost the same.

Shift this WARN_ONCE() into perf_trace_buf_prepare(). This changes
the meaning of _ONCE, but I think this is fine.

	- 4947014 2932448 10104832  17984294  1126b26 vmlinux
	+ 4948422 2932448 10104832  17985702  11270a6 vmlinux

on my build.

Link: http://lkml.kernel.org/r/20130617170211.GA19813@redhat.com

Acked-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/ftrace.h          |  4 ----
 kernel/trace/trace_event_perf.c |  4 ++++
 kernel/trace/trace_kprobe.c     |  6 ------
 kernel/trace/trace_syscalls.c   | 12 ------------
 kernel/trace/trace_uprobe.c     |  2 --
 5 files changed, 4 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index d615f78cc6b6..41a6643e2136 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -670,10 +670,6 @@ perf_trace_##call(void *__data, proto)					\
 			     sizeof(u64));				\
 	__entry_size -= sizeof(u32);					\
 									\
-	if (WARN_ONCE(__entry_size > PERF_MAX_TRACE_SIZE,		\
-		      "profile buffer not large enough"))		\
-		return;							\
-									\
 	entry = (struct ftrace_raw_##call *)perf_trace_buf_prepare(	\
 		__entry_size, event_call->event.type, &__regs, &rctx);	\
 	if (!entry)							\
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 12df5573086e..80c36bcf66e8 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -236,6 +236,10 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
 
 	BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
 
+	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
+			"perf buffer not large enough"))
+		return NULL;
+
 	pc = preempt_count();
 
 	*rctxp = perf_swevent_get_recursion_context();
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 7ed6976493c8..ae6ce835b023 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1087,9 +1087,6 @@ kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs)
 	__size = sizeof(*entry) + tp->size + dsize;
 	size = ALIGN(__size + sizeof(u32), sizeof(u64));
 	size -= sizeof(u32);
-	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
-		     "profile buffer not large enough"))
-		return;
 
 	entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
 	if (!entry)
@@ -1120,9 +1117,6 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri,
 	__size = sizeof(*entry) + tp->size + dsize;
 	size = ALIGN(__size + sizeof(u32), sizeof(u64));
 	size -= sizeof(u32);
-	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
-		     "profile buffer not large enough"))
-		return;
 
 	entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
 	if (!entry)
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index ac0085777fbd..8fd03657bc7d 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -575,10 +575,6 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
 	size = ALIGN(size + sizeof(u32), sizeof(u64));
 	size -= sizeof(u32);
 
-	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
-		      "perf buffer not large enough"))
-		return;
-
 	rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size,
 				sys_data->enter_event->event.type, regs, &rctx);
 	if (!rec)
@@ -652,14 +648,6 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
 	size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
 	size -= sizeof(u32);
 
-	/*
-	 * Impossible, but be paranoid with the future
-	 * How to put this check outside runtime?
-	 */
-	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
-		"exit event has grown above perf buffer size"))
-		return;
-
 	rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size,
 				sys_data->exit_event->event.type, regs, &rctx);
 	if (!rec)
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index d5d0cd368a56..a23d2d71188e 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -818,8 +818,6 @@ static void uprobe_perf_print(struct trace_uprobe *tu,
 
 	size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
 	size = ALIGN(size + tu->size + sizeof(u32), sizeof(u64)) - sizeof(u32);
-	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough"))
-		return;
 
 	preempt_disable();
 	head = this_cpu_ptr(call->perf_events);
-- 
cgit v1.2.3-59-g8ed1b


From ba57ea64cb1820deb37637de0fdb107f0dc90089 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 20 Jul 2013 03:11:32 +0400
Subject: allow O_TMPFILE to work with O_WRONLY

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/open.c                        | 2 ++
 include/uapi/asm-generic/fcntl.h | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/open.c b/fs/open.c
index 9156cb050d08..d53e29895082 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -844,6 +844,8 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o
 		if ((flags & O_TMPFILE_MASK) != O_TMPFILE)
 			return -EINVAL;
 		acc_mode = MAY_OPEN | ACC_MODE(flags);
+		if (!(acc_mode & MAY_WRITE))
+			return -EINVAL;
 	} else if (flags & O_PATH) {
 		/*
 		 * If we have O_PATH in the open flag. Then we
diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h
index 05ac354e124d..95e46c8e05f9 100644
--- a/include/uapi/asm-generic/fcntl.h
+++ b/include/uapi/asm-generic/fcntl.h
@@ -89,8 +89,8 @@
 #endif
 
 /* a horrid kludge trying to make sure that this will fail on old kernels */
-#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY | O_RDWR)
-#define O_TMPFILE_MASK (__O_TMPFILE | O_DIRECTORY | O_CREAT | O_ACCMODE)      
+#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
+#define O_TMPFILE_MASK (__O_TMPFILE | O_DIRECTORY | O_CREAT)      
 
 #ifndef O_NDELAY
 #define O_NDELAY	O_NONBLOCK
-- 
cgit v1.2.3-59-g8ed1b


From 24924a20dab603089011f9d3eb7622f0f6ef93c0 Mon Sep 17 00:00:00 2001
From: Peng Tao <bergwolf@gmail.com>
Date: Thu, 18 Jul 2013 22:09:08 +0800
Subject: vfs: constify dentry parameter in d_count()

so that it can be used in places like d_compare/d_hash
without causing a compiler warning.

Signed-off-by: Peng Tao <tao.peng@emc.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/dcache.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 3092df3614ae..b90337c9d468 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -324,7 +324,7 @@ static inline int __d_rcu_to_refcount(struct dentry *dentry, unsigned seq)
 	return ret;
 }
 
-static inline unsigned d_count(struct dentry *dentry)
+static inline unsigned d_count(const struct dentry *dentry)
 {
 	return dentry->d_count;
 }
-- 
cgit v1.2.3-59-g8ed1b


From a1a8e1dc111d6f05e7164e851e58219d428359e1 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Tue, 16 Jul 2013 15:28:00 +0100
Subject: iio:trigger: Fix use_count race condition

When using more than one trigger consumer it can happen that multiple threads
perform a read-modify-update cycle on 'use_count' concurrently. This can cause
updates to be lost and use_count can get stuck at non-zero value, in which case
the IIO core assumes that at least one thread is still running and will wait for
it to finish before running any trigger handlers again. This effectively renders
the trigger disabled and a reboot is necessary before it can be used again. To
fix this make use_count an atomic variable. Also set it to the number of
consumers before starting the first consumer, otherwise it might happen that
use_count drops to 0 even though not all consumers have been run yet.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Tested-by: Denis Ciocca <denis.ciocca@st.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/industrialio-trigger.c | 34 ++++++++++++++++++++++------------
 include/linux/iio/trigger.h        |  3 ++-
 2 files changed, 24 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/iio/industrialio-trigger.c b/drivers/iio/industrialio-trigger.c
index ea8a4146620d..0dd9bb873130 100644
--- a/drivers/iio/industrialio-trigger.c
+++ b/drivers/iio/industrialio-trigger.c
@@ -127,12 +127,17 @@ static struct iio_trigger *iio_trigger_find_by_name(const char *name,
 void iio_trigger_poll(struct iio_trigger *trig, s64 time)
 {
 	int i;
-	if (!trig->use_count)
-		for (i = 0; i < CONFIG_IIO_CONSUMERS_PER_TRIGGER; i++)
-			if (trig->subirqs[i].enabled) {
-				trig->use_count++;
+
+	if (!atomic_read(&trig->use_count)) {
+		atomic_set(&trig->use_count, CONFIG_IIO_CONSUMERS_PER_TRIGGER);
+
+		for (i = 0; i < CONFIG_IIO_CONSUMERS_PER_TRIGGER; i++) {
+			if (trig->subirqs[i].enabled)
 				generic_handle_irq(trig->subirq_base + i);
-			}
+			else
+				iio_trigger_notify_done(trig);
+		}
+	}
 }
 EXPORT_SYMBOL(iio_trigger_poll);
 
@@ -146,19 +151,24 @@ EXPORT_SYMBOL(iio_trigger_generic_data_rdy_poll);
 void iio_trigger_poll_chained(struct iio_trigger *trig, s64 time)
 {
 	int i;
-	if (!trig->use_count)
-		for (i = 0; i < CONFIG_IIO_CONSUMERS_PER_TRIGGER; i++)
-			if (trig->subirqs[i].enabled) {
-				trig->use_count++;
+
+	if (!atomic_read(&trig->use_count)) {
+		atomic_set(&trig->use_count, CONFIG_IIO_CONSUMERS_PER_TRIGGER);
+
+		for (i = 0; i < CONFIG_IIO_CONSUMERS_PER_TRIGGER; i++) {
+			if (trig->subirqs[i].enabled)
 				handle_nested_irq(trig->subirq_base + i);
-			}
+			else
+				iio_trigger_notify_done(trig);
+		}
+	}
 }
 EXPORT_SYMBOL(iio_trigger_poll_chained);
 
 void iio_trigger_notify_done(struct iio_trigger *trig)
 {
-	trig->use_count--;
-	if (trig->use_count == 0 && trig->ops && trig->ops->try_reenable)
+	if (atomic_dec_and_test(&trig->use_count) && trig->ops &&
+		trig->ops->try_reenable)
 		if (trig->ops->try_reenable(trig))
 			/* Missed an interrupt so launch new poll now */
 			iio_trigger_poll(trig, 0);
diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h
index 3869c525b052..369cf2cd5144 100644
--- a/include/linux/iio/trigger.h
+++ b/include/linux/iio/trigger.h
@@ -8,6 +8,7 @@
  */
 #include <linux/irq.h>
 #include <linux/module.h>
+#include <linux/atomic.h>
 
 #ifndef _IIO_TRIGGER_H_
 #define _IIO_TRIGGER_H_
@@ -61,7 +62,7 @@ struct iio_trigger {
 
 	struct list_head		list;
 	struct list_head		alloc_list;
-	int use_count;
+	atomic_t			use_count;
 
 	struct irq_chip			subirq_chip;
 	int				subirq_base;
-- 
cgit v1.2.3-59-g8ed1b


From b1451e546899bc8f450773b2af02e0cd000cf1fa Mon Sep 17 00:00:00 2001
From: "Patil, Rachna" <rachna@ti.com>
Date: Sat, 20 Jul 2013 17:27:00 +0100
Subject: iio: ti_am335x_adc: Fix wrong samples received on 1st read

Previously we tried to read data form ADC even before ADC sequencer
finished sampling. This led to wrong samples.
We now wait on ADC status register idle bit to be set.

Signed-off-by: Patil, Rachna <rachna@ti.com>
Signed-off-by: Zubair Lutfullah <zubair.lutfullah@gmail.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/adc/ti_am335x_adc.c      | 30 ++++++++++++++++++++++--------
 include/linux/mfd/ti_am335x_tscadc.h | 16 ++++++++++++++++
 2 files changed, 38 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c
index 0ad208a69c29..3ceac3e91dde 100644
--- a/drivers/iio/adc/ti_am335x_adc.c
+++ b/drivers/iio/adc/ti_am335x_adc.c
@@ -60,7 +60,6 @@ static void tiadc_step_config(struct tiadc_device *adc_dev)
 {
 	unsigned int stepconfig;
 	int i, steps;
-	u32 step_en;
 
 	/*
 	 * There are 16 configurable steps and 8 analog input
@@ -86,8 +85,7 @@ static void tiadc_step_config(struct tiadc_device *adc_dev)
 		adc_dev->channel_step[i] = steps;
 		steps++;
 	}
-	step_en = get_adc_step_mask(adc_dev);
-	am335x_tsc_se_set(adc_dev->mfd_tscadc, step_en);
+
 }
 
 static const char * const chan_name_ain[] = {
@@ -142,10 +140,22 @@ static int tiadc_read_raw(struct iio_dev *indio_dev,
 		int *val, int *val2, long mask)
 {
 	struct tiadc_device *adc_dev = iio_priv(indio_dev);
-	int i;
-	unsigned int fifo1count, read;
+	int i, map_val;
+	unsigned int fifo1count, read, stepid;
 	u32 step = UINT_MAX;
 	bool found = false;
+	u32 step_en;
+	unsigned long timeout = jiffies + usecs_to_jiffies
+				(IDLE_TIMEOUT * adc_dev->channels);
+	step_en = get_adc_step_mask(adc_dev);
+	am335x_tsc_se_set(adc_dev->mfd_tscadc, step_en);
+
+	/* Wait for ADC sequencer to complete sampling */
+	while (tiadc_readl(adc_dev, REG_ADCFSM) & SEQ_STATUS) {
+		if (time_after(jiffies, timeout))
+			return -EAGAIN;
+		}
+	map_val = chan->channel + TOTAL_CHANNELS;
 
 	/*
 	 * When the sub-system is first enabled,
@@ -170,12 +180,16 @@ static int tiadc_read_raw(struct iio_dev *indio_dev,
 	fifo1count = tiadc_readl(adc_dev, REG_FIFO1CNT);
 	for (i = 0; i < fifo1count; i++) {
 		read = tiadc_readl(adc_dev, REG_FIFO1);
-		if (read >> 16 == step) {
-			*val = read & 0xfff;
+		stepid = read & FIFOREAD_CHNLID_MASK;
+		stepid = stepid >> 0x10;
+
+		if (stepid == map_val) {
+			read = read & FIFOREAD_DATA_MASK;
 			found = true;
+			*val = read;
 		}
 	}
-	am335x_tsc_se_update(adc_dev->mfd_tscadc);
+
 	if (found == false)
 		return -EBUSY;
 	return IIO_VAL_INT;
diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h
index 8d73fe29796a..db1791bb997a 100644
--- a/include/linux/mfd/ti_am335x_tscadc.h
+++ b/include/linux/mfd/ti_am335x_tscadc.h
@@ -113,11 +113,27 @@
 #define CNTRLREG_8WIRE		CNTRLREG_AFE_CTRL(3)
 #define CNTRLREG_TSCENB		BIT(7)
 
+/* FIFO READ Register */
+#define FIFOREAD_DATA_MASK (0xfff << 0)
+#define FIFOREAD_CHNLID_MASK (0xf << 16)
+
+/* Sequencer Status */
+#define SEQ_STATUS BIT(5)
+
 #define ADC_CLK			3000000
 #define	MAX_CLK_DIV		7
 #define TOTAL_STEPS		16
 #define TOTAL_CHANNELS		8
 
+/*
+* ADC runs at 3MHz, and it takes
+* 15 cycles to latch one data output.
+* Hence the idle time for ADC to
+* process one sample data would be
+* around 5 micro seconds.
+*/
+#define IDLE_TIMEOUT 5 /* microsec */
+
 #define TSCADC_CELLS		2
 
 struct ti_tscadc_dev {
-- 
cgit v1.2.3-59-g8ed1b


From c5f167d31530b9b1aa7beb469aeca10f869eff5a Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Wed, 3 Jul 2013 09:44:50 +0100
Subject: pinctrl: am33xx dt binding: correct include path

Using #include <include/...> is a bit odd. It happens to work because the DTC
flags include -Iarch/FOO/boot/dts as well as arch/FOO/boot/dts/include and
arch/FOO/boot/dts/include/dt-bindings is a symlink to include/dt-bindings.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Cc: Grant Likely <grant.likely@secretlab.ca>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/dt-bindings/pinctrl/am33xx.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/dt-bindings/pinctrl/am33xx.h b/include/dt-bindings/pinctrl/am33xx.h
index 469e0325e6f4..2fbc804e1a45 100644
--- a/include/dt-bindings/pinctrl/am33xx.h
+++ b/include/dt-bindings/pinctrl/am33xx.h
@@ -5,7 +5,7 @@
 #ifndef _DT_BINDINGS_PINCTRL_AM33XX_H
 #define _DT_BINDINGS_PINCTRL_AM33XX_H
 
-#include <include/dt-bindings/pinctrl/omap.h>
+#include <dt-bindings/pinctrl/omap.h>
 
 /* am33xx specific mux bit defines */
 #undef PULL_ENA
-- 
cgit v1.2.3-59-g8ed1b


From ab116a4df4942c78c189d9b0744dd940ab9e00b9 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Wed, 10 Jul 2013 11:09:12 +0900
Subject: dmaengine: shdma: fix a build failure on platforms with no DMA
 support

On platforms with no support for the shdma dmaengine driver build is
currently failing with

drivers/built-in.o: In function `sh_mobile_sdhi_probe':
drivers/mmc/host/sh_mobile_sdhi.c:170: undefined reference to`shdma_chan_filter'

Fix the breakage by defining shdma_chan_filter to NULL in such
configurations.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski+renesas@gmail.com>
[horms+renesas@verge.net.au: Apply change to shdma-base.h instead of sh_dma.h]
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 include/linux/shdma-base.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/shdma-base.h b/include/linux/shdma-base.h
index 382cf710ca9a..5b1c9848124c 100644
--- a/include/linux/shdma-base.h
+++ b/include/linux/shdma-base.h
@@ -124,6 +124,10 @@ void shdma_chan_remove(struct shdma_chan *schan);
 int shdma_init(struct device *dev, struct shdma_dev *sdev,
 		    int chan_num);
 void shdma_cleanup(struct shdma_dev *sdev);
+#if IS_ENABLED(CONFIG_SH_DMAE_BASE)
 bool shdma_chan_filter(struct dma_chan *chan, void *arg);
+#else
+#define shdma_chan_filter NULL
+#endif
 
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From a829abf8daa2dcf8223a9284b76d221e61130e13 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 5 Jul 2013 17:51:20 +0200
Subject: ARM: pxa: propagate errors from regulator_enable() to pxamci

The em_x270_mci_setpower() and em_x270_usb_hub_init() functions
call regulator_enable(), which may return an error that must
be checked.

This changes the em_x270_usb_hub_init() function to bail out
if it fails, and changes the pxamci_platform_data->setpower
callback so that the a failed em_x270_mci_setpower call
can be propagated by the pxamci driver into the mmc core.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Mike Rapoport <mike@compulab.co.il>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Mark Brown <broonie@opensource.wolfsonmicro.com>
Cc: Haojian Zhuang <haojian.zhuang@gmail.com>
Acked-by: Chris Ball <cjb@laptop.org>
[olof: fixed order of regulator_enable() and test in em_x270_usb_hub_init]
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 arch/arm/mach-pxa/em-x270.c              | 17 +++++++++++++----
 arch/arm/mach-pxa/mainstone.c            |  3 ++-
 arch/arm/mach-pxa/pcm990-baseboard.c     |  3 ++-
 arch/arm/mach-pxa/poodle.c               |  4 +++-
 arch/arm/mach-pxa/spitz.c                |  4 +++-
 arch/arm/mach-pxa/stargate2.c            |  3 ++-
 drivers/mmc/host/pxamci.c                |  2 +-
 include/linux/platform_data/mmc-pxamci.h |  2 +-
 8 files changed, 27 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c
index f6726bb4eb95..3a3362fa793e 100644
--- a/arch/arm/mach-pxa/em-x270.c
+++ b/arch/arm/mach-pxa/em-x270.c
@@ -477,16 +477,24 @@ static int em_x270_usb_hub_init(void)
 	/* USB Hub power-on and reset */
 	gpio_direction_output(usb_hub_reset, 1);
 	gpio_direction_output(GPIO9_USB_VBUS_EN, 0);
-	regulator_enable(em_x270_usb_ldo);
+	err = regulator_enable(em_x270_usb_ldo);
+	if (err)
+		goto err_free_rst_gpio;
+
 	gpio_set_value(usb_hub_reset, 0);
 	gpio_set_value(usb_hub_reset, 1);
 	regulator_disable(em_x270_usb_ldo);
-	regulator_enable(em_x270_usb_ldo);
+	err = regulator_enable(em_x270_usb_ldo);
+	if (err)
+		goto err_free_rst_gpio;
+
 	gpio_set_value(usb_hub_reset, 0);
 	gpio_set_value(GPIO9_USB_VBUS_EN, 1);
 
 	return 0;
 
+err_free_rst_gpio:
+	gpio_free(usb_hub_reset);
 err_free_vbus_gpio:
 	gpio_free(GPIO9_USB_VBUS_EN);
 err_free_usb_ldo:
@@ -592,7 +600,7 @@ err_irq:
 	return err;
 }
 
-static void em_x270_mci_setpower(struct device *dev, unsigned int vdd)
+static int em_x270_mci_setpower(struct device *dev, unsigned int vdd)
 {
 	struct pxamci_platform_data* p_d = dev->platform_data;
 
@@ -600,10 +608,11 @@ static void em_x270_mci_setpower(struct device *dev, unsigned int vdd)
 		int vdd_uV = (2000 + (vdd - __ffs(MMC_VDD_20_21)) * 100) * 1000;
 
 		regulator_set_voltage(em_x270_sdio_ldo, vdd_uV, vdd_uV);
-		regulator_enable(em_x270_sdio_ldo);
+		return regulator_enable(em_x270_sdio_ldo);
 	} else {
 		regulator_disable(em_x270_sdio_ldo);
 	}
+	return 0;
 }
 
 static void em_x270_mci_exit(struct device *dev, void *data)
diff --git a/arch/arm/mach-pxa/mainstone.c b/arch/arm/mach-pxa/mainstone.c
index d2c652318376..dd70343c8708 100644
--- a/arch/arm/mach-pxa/mainstone.c
+++ b/arch/arm/mach-pxa/mainstone.c
@@ -408,7 +408,7 @@ static int mainstone_mci_init(struct device *dev, irq_handler_t mstone_detect_in
 	return err;
 }
 
-static void mainstone_mci_setpower(struct device *dev, unsigned int vdd)
+static int mainstone_mci_setpower(struct device *dev, unsigned int vdd)
 {
 	struct pxamci_platform_data* p_d = dev->platform_data;
 
@@ -420,6 +420,7 @@ static void mainstone_mci_setpower(struct device *dev, unsigned int vdd)
 		printk(KERN_DEBUG "%s: off\n", __func__);
 		MST_MSCWR1 &= ~MST_MSCWR1_MMC_ON;
 	}
+	return 0;
 }
 
 static void mainstone_mci_exit(struct device *dev, void *data)
diff --git a/arch/arm/mach-pxa/pcm990-baseboard.c b/arch/arm/mach-pxa/pcm990-baseboard.c
index fb7f1d1627dc..13e5b00eae90 100644
--- a/arch/arm/mach-pxa/pcm990-baseboard.c
+++ b/arch/arm/mach-pxa/pcm990-baseboard.c
@@ -335,7 +335,7 @@ static int pcm990_mci_init(struct device *dev, irq_handler_t mci_detect_int,
 	return err;
 }
 
-static void pcm990_mci_setpower(struct device *dev, unsigned int vdd)
+static int pcm990_mci_setpower(struct device *dev, unsigned int vdd)
 {
 	struct pxamci_platform_data *p_d = dev->platform_data;
 	u8 val;
@@ -348,6 +348,7 @@ static void pcm990_mci_setpower(struct device *dev, unsigned int vdd)
 		val &= ~PCM990_CTRL_MMC2PWR;
 
 	pcm990_cpld_writeb(PCM990_CTRL_MMC2PWR, PCM990_CTRL_REG5);
+	return 0;
 }
 
 static void pcm990_mci_exit(struct device *dev, void *data)
diff --git a/arch/arm/mach-pxa/poodle.c b/arch/arm/mach-pxa/poodle.c
index 711d37e26bd8..aedf053a1de5 100644
--- a/arch/arm/mach-pxa/poodle.c
+++ b/arch/arm/mach-pxa/poodle.c
@@ -258,7 +258,7 @@ err_free_2:
 	return err;
 }
 
-static void poodle_mci_setpower(struct device *dev, unsigned int vdd)
+static int poodle_mci_setpower(struct device *dev, unsigned int vdd)
 {
 	struct pxamci_platform_data* p_d = dev->platform_data;
 
@@ -270,6 +270,8 @@ static void poodle_mci_setpower(struct device *dev, unsigned int vdd)
 		gpio_set_value(POODLE_GPIO_SD_PWR1, 0);
 		gpio_set_value(POODLE_GPIO_SD_PWR, 0);
 	}
+
+	return 0;
 }
 
 static void poodle_mci_exit(struct device *dev, void *data)
diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c
index 2125df0444e7..4c29173026e8 100644
--- a/arch/arm/mach-pxa/spitz.c
+++ b/arch/arm/mach-pxa/spitz.c
@@ -598,7 +598,7 @@ static inline void spitz_spi_init(void) {}
  * NOTE: The card detect interrupt isn't debounced so we delay it by 250ms to
  * give the card a chance to fully insert/eject.
  */
-static void spitz_mci_setpower(struct device *dev, unsigned int vdd)
+static int spitz_mci_setpower(struct device *dev, unsigned int vdd)
 {
 	struct pxamci_platform_data* p_d = dev->platform_data;
 
@@ -606,6 +606,8 @@ static void spitz_mci_setpower(struct device *dev, unsigned int vdd)
 		spitz_card_pwr_ctrl(SCOOP_CPR_SD_3V, SCOOP_CPR_SD_3V);
 	else
 		spitz_card_pwr_ctrl(SCOOP_CPR_SD_3V, 0x0);
+
+	return 0;
 }
 
 static struct pxamci_platform_data spitz_mci_platform_data = {
diff --git a/arch/arm/mach-pxa/stargate2.c b/arch/arm/mach-pxa/stargate2.c
index 88fde43c948c..62aea3e835f3 100644
--- a/arch/arm/mach-pxa/stargate2.c
+++ b/arch/arm/mach-pxa/stargate2.c
@@ -734,9 +734,10 @@ static int stargate2_mci_init(struct device *dev,
  *
  * Very simple control. Either it is on or off and is controlled by
  * a gpio pin */
-static void stargate2_mci_setpower(struct device *dev, unsigned int vdd)
+static int stargate2_mci_setpower(struct device *dev, unsigned int vdd)
 {
 	gpio_set_value(SG2_SD_POWER_ENABLE, !!vdd);
+	return 0;
 }
 
 static void stargate2_mci_exit(struct device *dev, void *data)
diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c
index 847b1996ce8e..2c5a91bb8ec3 100644
--- a/drivers/mmc/host/pxamci.c
+++ b/drivers/mmc/host/pxamci.c
@@ -128,7 +128,7 @@ static inline int pxamci_set_power(struct pxamci_host *host,
 			       !!on ^ host->pdata->gpio_power_invert);
 	}
 	if (!host->vcc && host->pdata && host->pdata->setpower)
-		host->pdata->setpower(mmc_dev(host->mmc), vdd);
+		return host->pdata->setpower(mmc_dev(host->mmc), vdd);
 
 	return 0;
 }
diff --git a/include/linux/platform_data/mmc-pxamci.h b/include/linux/platform_data/mmc-pxamci.h
index 9eb515bb799d..1706b3597ce0 100644
--- a/include/linux/platform_data/mmc-pxamci.h
+++ b/include/linux/platform_data/mmc-pxamci.h
@@ -12,7 +12,7 @@ struct pxamci_platform_data {
 	unsigned long detect_delay_ms;		/* delay in millisecond before detecting cards after interrupt */
 	int (*init)(struct device *, irq_handler_t , void *);
 	int (*get_ro)(struct device *);
-	void (*setpower)(struct device *, unsigned int);
+	int (*setpower)(struct device *, unsigned int);
 	void (*exit)(struct device *, void *);
 	int gpio_card_detect;			/* gpio detecting card insertion */
 	int gpio_card_ro;			/* gpio detecting read only toggle */
-- 
cgit v1.2.3-59-g8ed1b


From 88d84ac97378c2f1d5fec9af1e8b7d9a662d6b00 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Fri, 19 Jul 2013 12:28:25 +0200
Subject: EDAC: Fix lockdep splat

Fix the following:

BUG: key ffff88043bdd0330 not in .data!
------------[ cut here ]------------
WARNING: at kernel/lockdep.c:2987 lockdep_init_map+0x565/0x5a0()
DEBUG_LOCKS_WARN_ON(1)
Modules linked in: glue_helper sb_edac(+) edac_core snd acpi_cpufreq lrw gf128mul ablk_helper iTCO_wdt evdev i2c_i801 dcdbas button cryptd pcspkr iTCO_vendor_support usb_common lpc_ich mfd_core soundcore mperf processor microcode
CPU: 2 PID: 599 Comm: modprobe Not tainted 3.10.0 #1
Hardware name: Dell Inc. Precision T3600/0PTTT9, BIOS A08 01/24/2013
 0000000000000009 ffff880439a1d920 ffffffff8160a9a9 ffff880439a1d958
 ffffffff8103d9e0 ffff88043af4a510 ffffffff81a16e11 0000000000000000
 ffff88043bdd0330 0000000000000000 ffff880439a1d9b8 ffffffff8103dacc
Call Trace:
  dump_stack
  warn_slowpath_common
  warn_slowpath_fmt
  lockdep_init_map
  ? trace_hardirqs_on_caller
  ? trace_hardirqs_on
  debug_mutex_init
  __mutex_init
  bus_register
  edac_create_sysfs_mci_device
  edac_mc_add_mc
  sbridge_probe
  pci_device_probe
  driver_probe_device
  __driver_attach
  ? driver_probe_device
  bus_for_each_dev
  driver_attach
  bus_add_driver
  driver_register
  __pci_register_driver
  ? 0xffffffffa0010fff
  sbridge_init
  ? 0xffffffffa0010fff
  do_one_initcall
  load_module
  ? unset_module_init_ro_nx
  SyS_init_module
  tracesys
---[ end trace d24a70b0d3ddf733 ]---
EDAC MC0: Giving out device to 'sbridge_edac.c' 'Sandy Bridge Socket#0': DEV 0000:3f:0e.0
EDAC sbridge: Driver loaded.

What happens is that bus_register needs a statically allocated lock_key
because the last is handed in to lockdep. However, struct mem_ctl_info
embeds struct bus_type (the whole struct, not a pointer to it) and the
whole thing gets dynamically allocated.

Fix this by using a statically allocated struct bus_type for the MC bus.

Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Mauro Carvalho Chehab <mchehab@infradead.org>
Cc: Markus Trippelsdorf <markus@trippelsdorf.de>
Cc: stable@kernel.org # v3.10
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 drivers/edac/edac_mc.c       |  9 +++++++++
 drivers/edac/edac_mc_sysfs.c | 28 +++++++++++++++-------------
 drivers/edac/i5100_edac.c    |  2 +-
 include/linux/edac.h         |  7 ++++++-
 4 files changed, 31 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 27e86d938262..89e109022d78 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -48,6 +48,8 @@ static LIST_HEAD(mc_devices);
  */
 static void const *edac_mc_owner;
 
+static struct bus_type mc_bus[EDAC_MAX_MCS];
+
 unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
 			         unsigned len)
 {
@@ -723,6 +725,11 @@ int edac_mc_add_mc(struct mem_ctl_info *mci)
 	int ret = -EINVAL;
 	edac_dbg(0, "\n");
 
+	if (mci->mc_idx >= EDAC_MAX_MCS) {
+		pr_warn_once("Too many memory controllers: %d\n", mci->mc_idx);
+		return -ENODEV;
+	}
+
 #ifdef CONFIG_EDAC_DEBUG
 	if (edac_debug_level >= 3)
 		edac_mc_dump_mci(mci);
@@ -762,6 +769,8 @@ int edac_mc_add_mc(struct mem_ctl_info *mci)
 	/* set load time so that error rate can be tracked */
 	mci->start_time = jiffies;
 
+	mci->bus = &mc_bus[mci->mc_idx];
+
 	if (edac_create_sysfs_mci_device(mci)) {
 		edac_mc_printk(mci, KERN_WARNING,
 			"failed to create sysfs device\n");
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index ef15a7e613bc..e7c32c4f7837 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -370,7 +370,7 @@ static int edac_create_csrow_object(struct mem_ctl_info *mci,
 		return -ENODEV;
 
 	csrow->dev.type = &csrow_attr_type;
-	csrow->dev.bus = &mci->bus;
+	csrow->dev.bus = mci->bus;
 	device_initialize(&csrow->dev);
 	csrow->dev.parent = &mci->dev;
 	csrow->mci = mci;
@@ -605,7 +605,7 @@ static int edac_create_dimm_object(struct mem_ctl_info *mci,
 	dimm->mci = mci;
 
 	dimm->dev.type = &dimm_attr_type;
-	dimm->dev.bus = &mci->bus;
+	dimm->dev.bus = mci->bus;
 	device_initialize(&dimm->dev);
 
 	dimm->dev.parent = &mci->dev;
@@ -975,11 +975,13 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci)
 	 * The memory controller needs its own bus, in order to avoid
 	 * namespace conflicts at /sys/bus/edac.
 	 */
-	mci->bus.name = kasprintf(GFP_KERNEL, "mc%d", mci->mc_idx);
-	if (!mci->bus.name)
+	mci->bus->name = kasprintf(GFP_KERNEL, "mc%d", mci->mc_idx);
+	if (!mci->bus->name)
 		return -ENOMEM;
-	edac_dbg(0, "creating bus %s\n", mci->bus.name);
-	err = bus_register(&mci->bus);
+
+	edac_dbg(0, "creating bus %s\n", mci->bus->name);
+
+	err = bus_register(mci->bus);
 	if (err < 0)
 		return err;
 
@@ -988,7 +990,7 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci)
 	device_initialize(&mci->dev);
 
 	mci->dev.parent = mci_pdev;
-	mci->dev.bus = &mci->bus;
+	mci->dev.bus = mci->bus;
 	dev_set_name(&mci->dev, "mc%d", mci->mc_idx);
 	dev_set_drvdata(&mci->dev, mci);
 	pm_runtime_forbid(&mci->dev);
@@ -997,8 +999,8 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci)
 	err = device_add(&mci->dev);
 	if (err < 0) {
 		edac_dbg(1, "failure: create device %s\n", dev_name(&mci->dev));
-		bus_unregister(&mci->bus);
-		kfree(mci->bus.name);
+		bus_unregister(mci->bus);
+		kfree(mci->bus->name);
 		return err;
 	}
 
@@ -1064,8 +1066,8 @@ fail:
 	}
 fail2:
 	device_unregister(&mci->dev);
-	bus_unregister(&mci->bus);
-	kfree(mci->bus.name);
+	bus_unregister(mci->bus);
+	kfree(mci->bus->name);
 	return err;
 }
 
@@ -1098,8 +1100,8 @@ void edac_unregister_sysfs(struct mem_ctl_info *mci)
 {
 	edac_dbg(1, "Unregistering device %s\n", dev_name(&mci->dev));
 	device_unregister(&mci->dev);
-	bus_unregister(&mci->bus);
-	kfree(mci->bus.name);
+	bus_unregister(mci->bus);
+	kfree(mci->bus->name);
 }
 
 static void mc_attr_release(struct device *dev)
diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c
index 1b635178cc44..157b934e8ce3 100644
--- a/drivers/edac/i5100_edac.c
+++ b/drivers/edac/i5100_edac.c
@@ -974,7 +974,7 @@ static int i5100_setup_debugfs(struct mem_ctl_info *mci)
 	if (!i5100_debugfs)
 		return -ENODEV;
 
-	priv->debugfs = debugfs_create_dir(mci->bus.name, i5100_debugfs);
+	priv->debugfs = debugfs_create_dir(mci->bus->name, i5100_debugfs);
 
 	if (!priv->debugfs)
 		return -ENOMEM;
diff --git a/include/linux/edac.h b/include/linux/edac.h
index 0b763276f619..5c6d7fbaf89e 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -622,7 +622,7 @@ struct edac_raw_error_desc {
  */
 struct mem_ctl_info {
 	struct device			dev;
-	struct bus_type			bus;
+	struct bus_type			*bus;
 
 	struct list_head link;	/* for global list of mem_ctl_info structs */
 
@@ -742,4 +742,9 @@ struct mem_ctl_info {
 #endif
 };
 
+/*
+ * Maximum number of memory controllers in the coherent fabric.
+ */
+#define EDAC_MAX_MCS	16
+
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From e70308ec0e4bff344fcfdf160de40e1150552c5f Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 24 Jul 2013 17:04:16 +1000
Subject: Revert "crypto: crct10dif - Wrap crc_t10dif function all to use
 crypto transform framework"

This reverts commits
    67822649d7305caf3dd50ed46c27b99c94eff996
    39761214eefc6b070f29402aa1165f24d789b3f7
    0b95a7f85718adcbba36407ef88bba0a7379ed03
    31d939625a9a20b1badd2d4e6bf6fd39fa523405
    2d31e518a42828df7877bca23a958627d60408bc

Unfortunately this change broke boot on some systems that used an
initrd which does not include the newly created crct10dif modules.
As these modules are required by sd_mod under certain configurations
this is a serious problem.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/Makefile                |   2 -
 arch/x86/crypto/crct10dif-pcl-asm_64.S  | 643 --------------------------------
 arch/x86/crypto/crct10dif-pclmul_glue.c | 151 --------
 crypto/Kconfig                          |  19 -
 crypto/Makefile                         |   1 -
 crypto/crct10dif.c                      | 178 ---------
 crypto/tcrypt.c                         |   8 -
 crypto/testmgr.c                        |  10 -
 crypto/testmgr.h                        |  33 --
 include/linux/crc-t10dif.h              |   4 -
 lib/Kconfig                             |   2 -
 lib/crc-t10dif.c                        |  73 ++--
 12 files changed, 43 insertions(+), 1081 deletions(-)
 delete mode 100644 arch/x86/crypto/crct10dif-pcl-asm_64.S
 delete mode 100644 arch/x86/crypto/crct10dif-pclmul_glue.c
 delete mode 100644 crypto/crct10dif.c

(limited to 'include')

diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 7d6ba9db1be9..6c63c358a7e6 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -27,7 +27,6 @@ obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o
 obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o
 obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o
 obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o
-obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o
 
 # These modules require assembler to support AVX.
 ifeq ($(avx_supported),yes)
@@ -82,4 +81,3 @@ crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o
 crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o
 sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o
 sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o
-crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o
diff --git a/arch/x86/crypto/crct10dif-pcl-asm_64.S b/arch/x86/crypto/crct10dif-pcl-asm_64.S
deleted file mode 100644
index 35e97569d05f..000000000000
--- a/arch/x86/crypto/crct10dif-pcl-asm_64.S
+++ /dev/null
@@ -1,643 +0,0 @@
-########################################################################
-# Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions
-#
-# Copyright (c) 2013, Intel Corporation
-#
-# Authors:
-#     Erdinc Ozturk <erdinc.ozturk@intel.com>
-#     Vinodh Gopal <vinodh.gopal@intel.com>
-#     James Guilford <james.guilford@intel.com>
-#     Tim Chen <tim.c.chen@linux.intel.com>
-#
-# This software is available to you under a choice of one of two
-# licenses.  You may choose to be licensed under the terms of the GNU
-# General Public License (GPL) Version 2, available from the file
-# COPYING in the main directory of this source tree, or the
-# OpenIB.org BSD license below:
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-# * Redistributions of source code must retain the above copyright
-#   notice, this list of conditions and the following disclaimer.
-#
-# * Redistributions in binary form must reproduce the above copyright
-#   notice, this list of conditions and the following disclaimer in the
-#   documentation and/or other materials provided with the
-#   distribution.
-#
-# * Neither the name of the Intel Corporation nor the names of its
-#   contributors may be used to endorse or promote products derived from
-#   this software without specific prior written permission.
-#
-#
-# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-########################################################################
-#       Function API:
-#       UINT16 crc_t10dif_pcl(
-#               UINT16 init_crc, //initial CRC value, 16 bits
-#               const unsigned char *buf, //buffer pointer to calculate CRC on
-#               UINT64 len //buffer length in bytes (64-bit data)
-#       );
-#
-#       Reference paper titled "Fast CRC Computation for Generic
-#	Polynomials Using PCLMULQDQ Instruction"
-#       URL: http://www.intel.com/content/dam/www/public/us/en/documents
-#  /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
-#
-#
-
-#include <linux/linkage.h>
-
-.text
-
-#define        arg1 %rdi
-#define        arg2 %rsi
-#define        arg3 %rdx
-
-#define        arg1_low32 %edi
-
-ENTRY(crc_t10dif_pcl)
-.align 16
-
-	# adjust the 16-bit initial_crc value, scale it to 32 bits
-	shl	$16, arg1_low32
-
-	# Allocate Stack Space
-	mov     %rsp, %rcx
-	sub	$16*2, %rsp
-	# align stack to 16 byte boundary
-	and     $~(0x10 - 1), %rsp
-
-	# check if smaller than 256
-	cmp	$256, arg3
-
-	# for sizes less than 128, we can't fold 64B at a time...
-	jl	_less_than_128
-
-
-	# load the initial crc value
-	movd	arg1_low32, %xmm10	# initial crc
-
-	# crc value does not need to be byte-reflected, but it needs
-	# to be moved to the high part of the register.
-	# because data will be byte-reflected and will align with
-	# initial crc at correct place.
-	pslldq	$12, %xmm10
-
-	movdqa  SHUF_MASK(%rip), %xmm11
-	# receive the initial 64B data, xor the initial crc value
-	movdqu	16*0(arg2), %xmm0
-	movdqu	16*1(arg2), %xmm1
-	movdqu	16*2(arg2), %xmm2
-	movdqu	16*3(arg2), %xmm3
-	movdqu	16*4(arg2), %xmm4
-	movdqu	16*5(arg2), %xmm5
-	movdqu	16*6(arg2), %xmm6
-	movdqu	16*7(arg2), %xmm7
-
-	pshufb	%xmm11, %xmm0
-	# XOR the initial_crc value
-	pxor	%xmm10, %xmm0
-	pshufb	%xmm11, %xmm1
-	pshufb	%xmm11, %xmm2
-	pshufb	%xmm11, %xmm3
-	pshufb	%xmm11, %xmm4
-	pshufb	%xmm11, %xmm5
-	pshufb	%xmm11, %xmm6
-	pshufb	%xmm11, %xmm7
-
-	movdqa	rk3(%rip), %xmm10	#xmm10 has rk3 and rk4
-					#imm value of pclmulqdq instruction
-					#will determine which constant to use
-
-	#################################################################
-	# we subtract 256 instead of 128 to save one instruction from the loop
-	sub	$256, arg3
-
-	# at this section of the code, there is 64*x+y (0<=y<64) bytes of
-	# buffer. The _fold_64_B_loop will fold 64B at a time
-	# until we have 64+y Bytes of buffer
-
-
-	# fold 64B at a time. This section of the code folds 4 xmm
-	# registers in parallel
-_fold_64_B_loop:
-
-	# update the buffer pointer
-	add	$128, arg2		#    buf += 64#
-
-	movdqu	16*0(arg2), %xmm9
-	movdqu	16*1(arg2), %xmm12
-	pshufb	%xmm11, %xmm9
-	pshufb	%xmm11, %xmm12
-	movdqa	%xmm0, %xmm8
-	movdqa	%xmm1, %xmm13
-	pclmulqdq	$0x0 , %xmm10, %xmm0
-	pclmulqdq	$0x11, %xmm10, %xmm8
-	pclmulqdq	$0x0 , %xmm10, %xmm1
-	pclmulqdq	$0x11, %xmm10, %xmm13
-	pxor	%xmm9 , %xmm0
-	xorps	%xmm8 , %xmm0
-	pxor	%xmm12, %xmm1
-	xorps	%xmm13, %xmm1
-
-	movdqu	16*2(arg2), %xmm9
-	movdqu	16*3(arg2), %xmm12
-	pshufb	%xmm11, %xmm9
-	pshufb	%xmm11, %xmm12
-	movdqa	%xmm2, %xmm8
-	movdqa	%xmm3, %xmm13
-	pclmulqdq	$0x0, %xmm10, %xmm2
-	pclmulqdq	$0x11, %xmm10, %xmm8
-	pclmulqdq	$0x0, %xmm10, %xmm3
-	pclmulqdq	$0x11, %xmm10, %xmm13
-	pxor	%xmm9 , %xmm2
-	xorps	%xmm8 , %xmm2
-	pxor	%xmm12, %xmm3
-	xorps	%xmm13, %xmm3
-
-	movdqu	16*4(arg2), %xmm9
-	movdqu	16*5(arg2), %xmm12
-	pshufb	%xmm11, %xmm9
-	pshufb	%xmm11, %xmm12
-	movdqa	%xmm4, %xmm8
-	movdqa	%xmm5, %xmm13
-	pclmulqdq	$0x0,  %xmm10, %xmm4
-	pclmulqdq	$0x11, %xmm10, %xmm8
-	pclmulqdq	$0x0,  %xmm10, %xmm5
-	pclmulqdq	$0x11, %xmm10, %xmm13
-	pxor	%xmm9 ,  %xmm4
-	xorps	%xmm8 ,  %xmm4
-	pxor	%xmm12,  %xmm5
-	xorps	%xmm13,  %xmm5
-
-	movdqu	16*6(arg2), %xmm9
-	movdqu	16*7(arg2), %xmm12
-	pshufb	%xmm11, %xmm9
-	pshufb	%xmm11, %xmm12
-	movdqa	%xmm6 , %xmm8
-	movdqa	%xmm7 , %xmm13
-	pclmulqdq	$0x0 , %xmm10, %xmm6
-	pclmulqdq	$0x11, %xmm10, %xmm8
-	pclmulqdq	$0x0 , %xmm10, %xmm7
-	pclmulqdq	$0x11, %xmm10, %xmm13
-	pxor	%xmm9 , %xmm6
-	xorps	%xmm8 , %xmm6
-	pxor	%xmm12, %xmm7
-	xorps	%xmm13, %xmm7
-
-	sub	$128, arg3
-
-	# check if there is another 64B in the buffer to be able to fold
-	jge	_fold_64_B_loop
-	##################################################################
-
-
-	add	$128, arg2
-	# at this point, the buffer pointer is pointing at the last y Bytes
-	# of the buffer the 64B of folded data is in 4 of the xmm
-	# registers: xmm0, xmm1, xmm2, xmm3
-
-
-	# fold the 8 xmm registers to 1 xmm register with different constants
-
-	movdqa	rk9(%rip), %xmm10
-	movdqa	%xmm0, %xmm8
-	pclmulqdq	$0x11, %xmm10, %xmm0
-	pclmulqdq	$0x0 , %xmm10, %xmm8
-	pxor	%xmm8, %xmm7
-	xorps	%xmm0, %xmm7
-
-	movdqa	rk11(%rip), %xmm10
-	movdqa	%xmm1, %xmm8
-	pclmulqdq	 $0x11, %xmm10, %xmm1
-	pclmulqdq	 $0x0 , %xmm10, %xmm8
-	pxor	%xmm8, %xmm7
-	xorps	%xmm1, %xmm7
-
-	movdqa	rk13(%rip), %xmm10
-	movdqa	%xmm2, %xmm8
-	pclmulqdq	 $0x11, %xmm10, %xmm2
-	pclmulqdq	 $0x0 , %xmm10, %xmm8
-	pxor	%xmm8, %xmm7
-	pxor	%xmm2, %xmm7
-
-	movdqa	rk15(%rip), %xmm10
-	movdqa	%xmm3, %xmm8
-	pclmulqdq	$0x11, %xmm10, %xmm3
-	pclmulqdq	$0x0 , %xmm10, %xmm8
-	pxor	%xmm8, %xmm7
-	xorps	%xmm3, %xmm7
-
-	movdqa	rk17(%rip), %xmm10
-	movdqa	%xmm4, %xmm8
-	pclmulqdq	$0x11, %xmm10, %xmm4
-	pclmulqdq	$0x0 , %xmm10, %xmm8
-	pxor	%xmm8, %xmm7
-	pxor	%xmm4, %xmm7
-
-	movdqa	rk19(%rip), %xmm10
-	movdqa	%xmm5, %xmm8
-	pclmulqdq	$0x11, %xmm10, %xmm5
-	pclmulqdq	$0x0 , %xmm10, %xmm8
-	pxor	%xmm8, %xmm7
-	xorps	%xmm5, %xmm7
-
-	movdqa	rk1(%rip), %xmm10	#xmm10 has rk1 and rk2
-					#imm value of pclmulqdq instruction
-					#will determine which constant to use
-	movdqa	%xmm6, %xmm8
-	pclmulqdq	$0x11, %xmm10, %xmm6
-	pclmulqdq	$0x0 , %xmm10, %xmm8
-	pxor	%xmm8, %xmm7
-	pxor	%xmm6, %xmm7
-
-
-	# instead of 64, we add 48 to the loop counter to save 1 instruction
-	# from the loop instead of a cmp instruction, we use the negative
-	# flag with the jl instruction
-	add	$128-16, arg3
-	jl	_final_reduction_for_128
-
-	# now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7
-	# and the rest is in memory. We can fold 16 bytes at a time if y>=16
-	# continue folding 16B at a time
-
-_16B_reduction_loop:
-	movdqa	%xmm7, %xmm8
-	pclmulqdq	$0x11, %xmm10, %xmm7
-	pclmulqdq	$0x0 , %xmm10, %xmm8
-	pxor	%xmm8, %xmm7
-	movdqu	(arg2), %xmm0
-	pshufb	%xmm11, %xmm0
-	pxor	%xmm0 , %xmm7
-	add	$16, arg2
-	sub	$16, arg3
-	# instead of a cmp instruction, we utilize the flags with the
-	# jge instruction equivalent of: cmp arg3, 16-16
-	# check if there is any more 16B in the buffer to be able to fold
-	jge	_16B_reduction_loop
-
-	#now we have 16+z bytes left to reduce, where 0<= z < 16.
-	#first, we reduce the data in the xmm7 register
-
-
-_final_reduction_for_128:
-	# check if any more data to fold. If not, compute the CRC of
-	# the final 128 bits
-	add	$16, arg3
-	je	_128_done
-
-	# here we are getting data that is less than 16 bytes.
-	# since we know that there was data before the pointer, we can
-	# offset the input pointer before the actual point, to receive
-	# exactly 16 bytes. after that the registers need to be adjusted.
-_get_last_two_xmms:
-	movdqa	%xmm7, %xmm2
-
-	movdqu	-16(arg2, arg3), %xmm1
-	pshufb	%xmm11, %xmm1
-
-	# get rid of the extra data that was loaded before
-	# load the shift constant
-	lea	pshufb_shf_table+16(%rip), %rax
-	sub	arg3, %rax
-	movdqu	(%rax), %xmm0
-
-	# shift xmm2 to the left by arg3 bytes
-	pshufb	%xmm0, %xmm2
-
-	# shift xmm7 to the right by 16-arg3 bytes
-	pxor	mask1(%rip), %xmm0
-	pshufb	%xmm0, %xmm7
-	pblendvb	%xmm2, %xmm1	#xmm0 is implicit
-
-	# fold 16 Bytes
-	movdqa	%xmm1, %xmm2
-	movdqa	%xmm7, %xmm8
-	pclmulqdq	$0x11, %xmm10, %xmm7
-	pclmulqdq	$0x0 , %xmm10, %xmm8
-	pxor	%xmm8, %xmm7
-	pxor	%xmm2, %xmm7
-
-_128_done:
-	# compute crc of a 128-bit value
-	movdqa	rk5(%rip), %xmm10	# rk5 and rk6 in xmm10
-	movdqa	%xmm7, %xmm0
-
-	#64b fold
-	pclmulqdq	$0x1, %xmm10, %xmm7
-	pslldq	$8   ,  %xmm0
-	pxor	%xmm0,  %xmm7
-
-	#32b fold
-	movdqa	%xmm7, %xmm0
-
-	pand	mask2(%rip), %xmm0
-
-	psrldq	$12, %xmm7
-	pclmulqdq	$0x10, %xmm10, %xmm7
-	pxor	%xmm0, %xmm7
-
-	#barrett reduction
-_barrett:
-	movdqa	rk7(%rip), %xmm10	# rk7 and rk8 in xmm10
-	movdqa	%xmm7, %xmm0
-	pclmulqdq	$0x01, %xmm10, %xmm7
-	pslldq	$4, %xmm7
-	pclmulqdq	$0x11, %xmm10, %xmm7
-
-	pslldq	$4, %xmm7
-	pxor	%xmm0, %xmm7
-	pextrd	$1, %xmm7, %eax
-
-_cleanup:
-	# scale the result back to 16 bits
-	shr	$16, %eax
-	mov     %rcx, %rsp
-	ret
-
-########################################################################
-
-.align 16
-_less_than_128:
-
-	# check if there is enough buffer to be able to fold 16B at a time
-	cmp	$32, arg3
-	jl	_less_than_32
-	movdqa  SHUF_MASK(%rip), %xmm11
-
-	# now if there is, load the constants
-	movdqa	rk1(%rip), %xmm10	# rk1 and rk2 in xmm10
-
-	movd	arg1_low32, %xmm0	# get the initial crc value
-	pslldq	$12, %xmm0	# align it to its correct place
-	movdqu	(arg2), %xmm7	# load the plaintext
-	pshufb	%xmm11, %xmm7	# byte-reflect the plaintext
-	pxor	%xmm0, %xmm7
-
-
-	# update the buffer pointer
-	add	$16, arg2
-
-	# update the counter. subtract 32 instead of 16 to save one
-	# instruction from the loop
-	sub	$32, arg3
-
-	jmp	_16B_reduction_loop
-
-
-.align 16
-_less_than_32:
-	# mov initial crc to the return value. this is necessary for
-	# zero-length buffers.
-	mov	arg1_low32, %eax
-	test	arg3, arg3
-	je	_cleanup
-
-	movdqa  SHUF_MASK(%rip), %xmm11
-
-	movd	arg1_low32, %xmm0	# get the initial crc value
-	pslldq	$12, %xmm0	# align it to its correct place
-
-	cmp	$16, arg3
-	je	_exact_16_left
-	jl	_less_than_16_left
-
-	movdqu	(arg2), %xmm7	# load the plaintext
-	pshufb	%xmm11, %xmm7	# byte-reflect the plaintext
-	pxor	%xmm0 , %xmm7	# xor the initial crc value
-	add	$16, arg2
-	sub	$16, arg3
-	movdqa	rk1(%rip), %xmm10	# rk1 and rk2 in xmm10
-	jmp	_get_last_two_xmms
-
-
-.align 16
-_less_than_16_left:
-	# use stack space to load data less than 16 bytes, zero-out
-	# the 16B in memory first.
-
-	pxor	%xmm1, %xmm1
-	mov	%rsp, %r11
-	movdqa	%xmm1, (%r11)
-
-	cmp	$4, arg3
-	jl	_only_less_than_4
-
-	# backup the counter value
-	mov	arg3, %r9
-	cmp	$8, arg3
-	jl	_less_than_8_left
-
-	# load 8 Bytes
-	mov	(arg2), %rax
-	mov	%rax, (%r11)
-	add	$8, %r11
-	sub	$8, arg3
-	add	$8, arg2
-_less_than_8_left:
-
-	cmp	$4, arg3
-	jl	_less_than_4_left
-
-	# load 4 Bytes
-	mov	(arg2), %eax
-	mov	%eax, (%r11)
-	add	$4, %r11
-	sub	$4, arg3
-	add	$4, arg2
-_less_than_4_left:
-
-	cmp	$2, arg3
-	jl	_less_than_2_left
-
-	# load 2 Bytes
-	mov	(arg2), %ax
-	mov	%ax, (%r11)
-	add	$2, %r11
-	sub	$2, arg3
-	add	$2, arg2
-_less_than_2_left:
-	cmp     $1, arg3
-        jl      _zero_left
-
-	# load 1 Byte
-	mov	(arg2), %al
-	mov	%al, (%r11)
-_zero_left:
-	movdqa	(%rsp), %xmm7
-	pshufb	%xmm11, %xmm7
-	pxor	%xmm0 , %xmm7	# xor the initial crc value
-
-	# shl r9, 4
-	lea	pshufb_shf_table+16(%rip), %rax
-	sub	%r9, %rax
-	movdqu	(%rax), %xmm0
-	pxor	mask1(%rip), %xmm0
-
-	pshufb	%xmm0, %xmm7
-	jmp	_128_done
-
-.align 16
-_exact_16_left:
-	movdqu	(arg2), %xmm7
-	pshufb	%xmm11, %xmm7
-	pxor	%xmm0 , %xmm7   # xor the initial crc value
-
-	jmp	_128_done
-
-_only_less_than_4:
-	cmp	$3, arg3
-	jl	_only_less_than_3
-
-	# load 3 Bytes
-	mov	(arg2), %al
-	mov	%al, (%r11)
-
-	mov	1(arg2), %al
-	mov	%al, 1(%r11)
-
-	mov	2(arg2), %al
-	mov	%al, 2(%r11)
-
-	movdqa	 (%rsp), %xmm7
-	pshufb	 %xmm11, %xmm7
-	pxor	 %xmm0 , %xmm7  # xor the initial crc value
-
-	psrldq	$5, %xmm7
-
-	jmp	_barrett
-_only_less_than_3:
-	cmp	$2, arg3
-	jl	_only_less_than_2
-
-	# load 2 Bytes
-	mov	(arg2), %al
-	mov	%al, (%r11)
-
-	mov	1(arg2), %al
-	mov	%al, 1(%r11)
-
-	movdqa	(%rsp), %xmm7
-	pshufb	%xmm11, %xmm7
-	pxor	%xmm0 , %xmm7   # xor the initial crc value
-
-	psrldq	$6, %xmm7
-
-	jmp	_barrett
-_only_less_than_2:
-
-	# load 1 Byte
-	mov	(arg2), %al
-	mov	%al, (%r11)
-
-	movdqa	(%rsp), %xmm7
-	pshufb	%xmm11, %xmm7
-	pxor	%xmm0 , %xmm7   # xor the initial crc value
-
-	psrldq	$7, %xmm7
-
-	jmp	_barrett
-
-ENDPROC(crc_t10dif_pcl)
-
-.data
-
-# precomputed constants
-# these constants are precomputed from the poly:
-# 0x8bb70000 (0x8bb7 scaled to 32 bits)
-.align 16
-# Q = 0x18BB70000
-# rk1 = 2^(32*3) mod Q << 32
-# rk2 = 2^(32*5) mod Q << 32
-# rk3 = 2^(32*15) mod Q << 32
-# rk4 = 2^(32*17) mod Q << 32
-# rk5 = 2^(32*3) mod Q << 32
-# rk6 = 2^(32*2) mod Q << 32
-# rk7 = floor(2^64/Q)
-# rk8 = Q
-rk1:
-.quad 0x2d56000000000000
-rk2:
-.quad 0x06df000000000000
-rk3:
-.quad 0x9d9d000000000000
-rk4:
-.quad 0x7cf5000000000000
-rk5:
-.quad 0x2d56000000000000
-rk6:
-.quad 0x1368000000000000
-rk7:
-.quad 0x00000001f65a57f8
-rk8:
-.quad 0x000000018bb70000
-
-rk9:
-.quad 0xceae000000000000
-rk10:
-.quad 0xbfd6000000000000
-rk11:
-.quad 0x1e16000000000000
-rk12:
-.quad 0x713c000000000000
-rk13:
-.quad 0xf7f9000000000000
-rk14:
-.quad 0x80a6000000000000
-rk15:
-.quad 0x044c000000000000
-rk16:
-.quad 0xe658000000000000
-rk17:
-.quad 0xad18000000000000
-rk18:
-.quad 0xa497000000000000
-rk19:
-.quad 0x6ee3000000000000
-rk20:
-.quad 0xe7b5000000000000
-
-
-
-mask1:
-.octa 0x80808080808080808080808080808080
-mask2:
-.octa 0x00000000FFFFFFFFFFFFFFFFFFFFFFFF
-
-SHUF_MASK:
-.octa 0x000102030405060708090A0B0C0D0E0F
-
-pshufb_shf_table:
-# use these values for shift constants for the pshufb instruction
-# different alignments result in values as shown:
-#	DDQ 0x008f8e8d8c8b8a898887868584838281 # shl 15 (16-1) / shr1
-#	DDQ 0x01008f8e8d8c8b8a8988878685848382 # shl 14 (16-3) / shr2
-#	DDQ 0x0201008f8e8d8c8b8a89888786858483 # shl 13 (16-4) / shr3
-#	DDQ 0x030201008f8e8d8c8b8a898887868584 # shl 12 (16-4) / shr4
-#	DDQ 0x04030201008f8e8d8c8b8a8988878685 # shl 11 (16-5) / shr5
-#	DDQ 0x0504030201008f8e8d8c8b8a89888786 # shl 10 (16-6) / shr6
-#	DDQ 0x060504030201008f8e8d8c8b8a898887 # shl 9  (16-7) / shr7
-#	DDQ 0x07060504030201008f8e8d8c8b8a8988 # shl 8  (16-8) / shr8
-#	DDQ 0x0807060504030201008f8e8d8c8b8a89 # shl 7  (16-9) / shr9
-#	DDQ 0x090807060504030201008f8e8d8c8b8a # shl 6  (16-10) / shr10
-#	DDQ 0x0a090807060504030201008f8e8d8c8b # shl 5  (16-11) / shr11
-#	DDQ 0x0b0a090807060504030201008f8e8d8c # shl 4  (16-12) / shr12
-#	DDQ 0x0c0b0a090807060504030201008f8e8d # shl 3  (16-13) / shr13
-#	DDQ 0x0d0c0b0a090807060504030201008f8e # shl 2  (16-14) / shr14
-#	DDQ 0x0e0d0c0b0a090807060504030201008f # shl 1  (16-15) / shr15
-.octa 0x8f8e8d8c8b8a89888786858483828100
-.octa 0x000e0d0c0b0a09080706050403020100
diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c
deleted file mode 100644
index 7845d7fd54c0..000000000000
--- a/arch/x86/crypto/crct10dif-pclmul_glue.c
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * Cryptographic API.
- *
- * T10 Data Integrity Field CRC16 Crypto Transform using PCLMULQDQ Instructions
- *
- * Copyright (C) 2013 Intel Corporation
- * Author: Tim Chen <tim.c.chen@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/crc-t10dif.h>
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <asm/i387.h>
-#include <asm/cpufeature.h>
-#include <asm/cpu_device_id.h>
-
-asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf,
-				size_t len);
-
-struct chksum_desc_ctx {
-	__u16 crc;
-};
-
-/*
- * Steps through buffer one byte at at time, calculates reflected
- * crc using table.
- */
-
-static int chksum_init(struct shash_desc *desc)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	ctx->crc = 0;
-
-	return 0;
-}
-
-static int chksum_update(struct shash_desc *desc, const u8 *data,
-			 unsigned int length)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	if (irq_fpu_usable()) {
-		kernel_fpu_begin();
-		ctx->crc = crc_t10dif_pcl(ctx->crc, data, length);
-		kernel_fpu_end();
-	} else
-		ctx->crc = crc_t10dif_generic(ctx->crc, data, length);
-	return 0;
-}
-
-static int chksum_final(struct shash_desc *desc, u8 *out)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	*(__u16 *)out = ctx->crc;
-	return 0;
-}
-
-static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len,
-			u8 *out)
-{
-	if (irq_fpu_usable()) {
-		kernel_fpu_begin();
-		*(__u16 *)out = crc_t10dif_pcl(*crcp, data, len);
-		kernel_fpu_end();
-	} else
-		*(__u16 *)out = crc_t10dif_generic(*crcp, data, len);
-	return 0;
-}
-
-static int chksum_finup(struct shash_desc *desc, const u8 *data,
-			unsigned int len, u8 *out)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	return __chksum_finup(&ctx->crc, data, len, out);
-}
-
-static int chksum_digest(struct shash_desc *desc, const u8 *data,
-			 unsigned int length, u8 *out)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	return __chksum_finup(&ctx->crc, data, length, out);
-}
-
-static struct shash_alg alg = {
-	.digestsize		=	CRC_T10DIF_DIGEST_SIZE,
-	.init		=	chksum_init,
-	.update		=	chksum_update,
-	.final		=	chksum_final,
-	.finup		=	chksum_finup,
-	.digest		=	chksum_digest,
-	.descsize		=	sizeof(struct chksum_desc_ctx),
-	.base			=	{
-		.cra_name		=	"crct10dif",
-		.cra_driver_name	=	"crct10dif-pclmul",
-		.cra_priority		=	200,
-		.cra_blocksize		=	CRC_T10DIF_BLOCK_SIZE,
-		.cra_module		=	THIS_MODULE,
-	}
-};
-
-static const struct x86_cpu_id crct10dif_cpu_id[] = {
-	X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ),
-	{}
-};
-MODULE_DEVICE_TABLE(x86cpu, crct10dif_cpu_id);
-
-static int __init crct10dif_intel_mod_init(void)
-{
-	if (!x86_match_cpu(crct10dif_cpu_id))
-		return -ENODEV;
-
-	return crypto_register_shash(&alg);
-}
-
-static void __exit crct10dif_intel_mod_fini(void)
-{
-	crypto_unregister_shash(&alg);
-}
-
-module_init(crct10dif_intel_mod_init);
-module_exit(crct10dif_intel_mod_fini);
-
-MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>");
-MODULE_DESCRIPTION("T10 DIF CRC calculation accelerated with PCLMULQDQ.");
-MODULE_LICENSE("GPL");
-
-MODULE_ALIAS("crct10dif");
-MODULE_ALIAS("crct10dif-pclmul");
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 904ffe838567..2754f2bf5d91 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -376,25 +376,6 @@ config CRYPTO_CRC32_PCLMUL
 	  which will enable any routine to use the CRC-32-IEEE 802.3 checksum
 	  and gain better performance as compared with the table implementation.
 
-config CRYPTO_CRCT10DIF
-	tristate "CRCT10DIF algorithm"
-	select CRYPTO_HASH
-	help
-	  CRC T10 Data Integrity Field computation is being cast as
-	  a crypto transform.  This allows for faster crc t10 diff
-	  transforms to be used if they are available.
-
-config CRYPTO_CRCT10DIF_PCLMUL
-	tristate "CRCT10DIF PCLMULQDQ hardware acceleration"
-	depends on X86 && 64BIT && CRC_T10DIF
-	select CRYPTO_HASH
-	help
-	  For x86_64 processors with SSE4.2 and PCLMULQDQ supported,
-	  CRC T10 DIF PCLMULQDQ computation can be hardware
-	  accelerated PCLMULQDQ instruction. This option will create
-	  'crct10dif-plcmul' module, which is faster when computing the
-	  crct10dif checksum as compared with the generic table implementation.
-
 config CRYPTO_GHASH
 	tristate "GHASH digest algorithm"
 	select CRYPTO_GF128MUL
diff --git a/crypto/Makefile b/crypto/Makefile
index 62af87df8729..a8e9b0fefbe9 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -83,7 +83,6 @@ obj-$(CONFIG_CRYPTO_ZLIB) += zlib.o
 obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o
 obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o
 obj-$(CONFIG_CRYPTO_CRC32) += crc32.o
-obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif.o
 obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o
 obj-$(CONFIG_CRYPTO_LZO) += lzo.o
 obj-$(CONFIG_CRYPTO_842) += 842.o
diff --git a/crypto/crct10dif.c b/crypto/crct10dif.c
deleted file mode 100644
index 92aca96d6b98..000000000000
--- a/crypto/crct10dif.c
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * Cryptographic API.
- *
- * T10 Data Integrity Field CRC16 Crypto Transform
- *
- * Copyright (c) 2007 Oracle Corporation.  All rights reserved.
- * Written by Martin K. Petersen <martin.petersen@oracle.com>
- * Copyright (C) 2013 Intel Corporation
- * Author: Tim Chen <tim.c.chen@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/crc-t10dif.h>
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-
-struct chksum_desc_ctx {
-	__u16 crc;
-};
-
-/* Table generated using the following polynomium:
- * x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1
- * gt: 0x8bb7
- */
-static const __u16 t10_dif_crc_table[256] = {
-	0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B,
-	0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6,
-	0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6,
-	0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B,
-	0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1,
-	0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C,
-	0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C,
-	0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781,
-	0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8,
-	0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255,
-	0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925,
-	0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698,
-	0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472,
-	0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF,
-	0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF,
-	0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02,
-	0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA,
-	0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067,
-	0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17,
-	0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA,
-	0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640,
-	0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD,
-	0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D,
-	0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30,
-	0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759,
-	0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4,
-	0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394,
-	0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29,
-	0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3,
-	0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E,
-	0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E,
-	0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3
-};
-
-__u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, size_t len)
-{
-	unsigned int i;
-
-	for (i = 0 ; i < len ; i++)
-		crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff];
-
-	return crc;
-}
-EXPORT_SYMBOL(crc_t10dif_generic);
-
-/*
- * Steps through buffer one byte at at time, calculates reflected
- * crc using table.
- */
-
-static int chksum_init(struct shash_desc *desc)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	ctx->crc = 0;
-
-	return 0;
-}
-
-static int chksum_update(struct shash_desc *desc, const u8 *data,
-			 unsigned int length)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	ctx->crc = crc_t10dif_generic(ctx->crc, data, length);
-	return 0;
-}
-
-static int chksum_final(struct shash_desc *desc, u8 *out)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	*(__u16 *)out = ctx->crc;
-	return 0;
-}
-
-static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len,
-			u8 *out)
-{
-	*(__u16 *)out = crc_t10dif_generic(*crcp, data, len);
-	return 0;
-}
-
-static int chksum_finup(struct shash_desc *desc, const u8 *data,
-			unsigned int len, u8 *out)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	return __chksum_finup(&ctx->crc, data, len, out);
-}
-
-static int chksum_digest(struct shash_desc *desc, const u8 *data,
-			 unsigned int length, u8 *out)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	return __chksum_finup(&ctx->crc, data, length, out);
-}
-
-static struct shash_alg alg = {
-	.digestsize		=	CRC_T10DIF_DIGEST_SIZE,
-	.init		=	chksum_init,
-	.update		=	chksum_update,
-	.final		=	chksum_final,
-	.finup		=	chksum_finup,
-	.digest		=	chksum_digest,
-	.descsize		=	sizeof(struct chksum_desc_ctx),
-	.base			=	{
-		.cra_name		=	"crct10dif",
-		.cra_driver_name	=	"crct10dif-generic",
-		.cra_priority		=	100,
-		.cra_blocksize		=	CRC_T10DIF_BLOCK_SIZE,
-		.cra_module		=	THIS_MODULE,
-	}
-};
-
-static int __init crct10dif_mod_init(void)
-{
-	int ret;
-
-	ret = crypto_register_shash(&alg);
-	return ret;
-}
-
-static void __exit crct10dif_mod_fini(void)
-{
-	crypto_unregister_shash(&alg);
-}
-
-module_init(crct10dif_mod_init);
-module_exit(crct10dif_mod_fini);
-
-MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>");
-MODULE_DESCRIPTION("T10 DIF CRC calculation.");
-MODULE_LICENSE("GPL");
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 25a5934f0e50..66d254ce0d11 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -1174,10 +1174,6 @@ static int do_test(int m)
 		ret += tcrypt_test("ghash");
 		break;
 
-	case 47:
-		ret += tcrypt_test("crct10dif");
-		break;
-
 	case 100:
 		ret += tcrypt_test("hmac(md5)");
 		break;
@@ -1502,10 +1498,6 @@ static int do_test(int m)
 		test_hash_speed("crc32c", sec, generic_hash_speed_template);
 		if (mode > 300 && mode < 400) break;
 
-	case 320:
-		test_hash_speed("crct10dif", sec, generic_hash_speed_template);
-		if (mode > 300 && mode < 400) break;
-
 	case 399:
 		break;
 
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 2f00607039e2..ecddf921a9db 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -2045,16 +2045,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 				.count = CRC32C_TEST_VECTORS
 			}
 		}
-	}, {
-		.alg = "crct10dif",
-		.test = alg_test_hash,
-		.fips_allowed = 1,
-		.suite = {
-			.hash = {
-				.vecs = crct10dif_tv_template,
-				.count = CRCT10DIF_TEST_VECTORS
-			}
-		}
 	}, {
 		.alg = "cryptd(__driver-cbc-aes-aesni)",
 		.test = alg_test_null,
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 7d44aa3d6b44..1e701bc075b9 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -450,39 +450,6 @@ static struct hash_testvec rmd320_tv_template[] = {
 	}
 };
 
-#define CRCT10DIF_TEST_VECTORS	3
-static struct hash_testvec crct10dif_tv_template[] = {
-	{
-		.plaintext = "abc",
-		.psize  = 3,
-#ifdef __LITTLE_ENDIAN
-		.digest = "\x3b\x44",
-#else
-		.digest = "\x44\x3b",
-#endif
-	}, {
-		.plaintext = "1234567890123456789012345678901234567890"
-			     "123456789012345678901234567890123456789",
-		.psize	= 79,
-#ifdef __LITTLE_ENDIAN
-		.digest	= "\x70\x4b",
-#else
-		.digest	= "\x4b\x70",
-#endif
-	}, {
-		.plaintext =
-		"abcddddddddddddddddddddddddddddddddddddddddddddddddddddd",
-		.psize  = 56,
-#ifdef __LITTLE_ENDIAN
-		.digest = "\xe3\x9c",
-#else
-		.digest = "\x9c\xe3",
-#endif
-		.np     = 2,
-		.tap    = { 28, 28 }
-	}
-};
-
 /*
  * SHA1 test vectors  from from FIPS PUB 180-1
  * Long vector from CAVS 5.0
diff --git a/include/linux/crc-t10dif.h b/include/linux/crc-t10dif.h
index b3cb71f0d3b0..a9c96d865ee7 100644
--- a/include/linux/crc-t10dif.h
+++ b/include/linux/crc-t10dif.h
@@ -3,10 +3,6 @@
 
 #include <linux/types.h>
 
-#define CRC_T10DIF_DIGEST_SIZE 2
-#define CRC_T10DIF_BLOCK_SIZE 1
-
-__u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, size_t len);
 __u16 crc_t10dif(unsigned char const *, size_t);
 
 #endif
diff --git a/lib/Kconfig b/lib/Kconfig
index 339d5a4292f8..fe01d418b09a 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -63,8 +63,6 @@ config CRC16
 
 config CRC_T10DIF
 	tristate "CRC calculation for the T10 Data Integrity Field"
-	select CRYPTO
-	select CRYPTO_CRCT10DIF
 	help
 	  This option is only needed if a module that's not in the
 	  kernel tree needs to calculate CRC checks for use with the
diff --git a/lib/crc-t10dif.c b/lib/crc-t10dif.c
index fe3428c07b47..fbbd66ed86cd 100644
--- a/lib/crc-t10dif.c
+++ b/lib/crc-t10dif.c
@@ -11,44 +11,57 @@
 #include <linux/types.h>
 #include <linux/module.h>
 #include <linux/crc-t10dif.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <crypto/hash.h>
 
-static struct crypto_shash *crct10dif_tfm;
+/* Table generated using the following polynomium:
+ * x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1
+ * gt: 0x8bb7
+ */
+static const __u16 t10_dif_crc_table[256] = {
+	0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B,
+	0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6,
+	0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6,
+	0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B,
+	0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1,
+	0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C,
+	0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C,
+	0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781,
+	0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8,
+	0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255,
+	0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925,
+	0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698,
+	0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472,
+	0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF,
+	0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF,
+	0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02,
+	0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA,
+	0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067,
+	0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17,
+	0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA,
+	0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640,
+	0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD,
+	0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D,
+	0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30,
+	0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759,
+	0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4,
+	0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394,
+	0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29,
+	0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3,
+	0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E,
+	0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E,
+	0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3
+};
 
 __u16 crc_t10dif(const unsigned char *buffer, size_t len)
 {
-	struct {
-		struct shash_desc shash;
-		char ctx[2];
-	} desc;
-	int err;
-
-	desc.shash.tfm = crct10dif_tfm;
-	desc.shash.flags = 0;
-	*(__u16 *)desc.ctx = 0;
+	__u16 crc = 0;
+	unsigned int i;
 
-	err = crypto_shash_update(&desc.shash, buffer, len);
-	BUG_ON(err);
+	for (i = 0 ; i < len ; i++)
+		crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff];
 
-	return *(__u16 *)desc.ctx;
+	return crc;
 }
 EXPORT_SYMBOL(crc_t10dif);
 
-static int __init crc_t10dif_mod_init(void)
-{
-	crct10dif_tfm = crypto_alloc_shash("crct10dif", 0, 0);
-	return PTR_RET(crct10dif_tfm);
-}
-
-static void __exit crc_t10dif_mod_fini(void)
-{
-	crypto_free_shash(crct10dif_tfm);
-}
-
-module_init(crc_t10dif_mod_init);
-module_exit(crc_t10dif_mod_fini);
-
 MODULE_DESCRIPTION("T10 DIF CRC calculation");
 MODULE_LICENSE("GPL");
-- 
cgit v1.2.3-59-g8ed1b


From 6a6c21ef487be47b300a0b24cd6afeb69d8b9a1a Mon Sep 17 00:00:00 2001
From: Richard Zhu <r65037@freescale.com>
Date: Wed, 24 Jul 2013 14:15:28 +0800
Subject: ARM: imx6q: update the sata bits definitions of gpr13

Replace the SATA_PHY_# by the more readable definitons.

tj: Being routed through libata branch to enable implementation of
    ahci_imx.

Signed-off-by: Richard Zhu <r65037@freescale.com>
Acked-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/mfd/syscon/imx6q-iomuxc-gpr.h | 121 +++++++++++++++++++---------
 1 file changed, 84 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
index dab34a1deb2c..e235251f1ba9 100644
--- a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
+++ b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
@@ -279,41 +279,88 @@
 #define IMX6Q_GPR13_CAN2_STOP_REQ		BIT(29)
 #define IMX6Q_GPR13_CAN1_STOP_REQ		BIT(28)
 #define IMX6Q_GPR13_ENET_STOP_REQ		BIT(27)
-#define IMX6Q_GPR13_SATA_PHY_8_MASK		(0x7 << 24)
-#define IMX6Q_GPR13_SATA_PHY_8_0_5_DB		(0x0 << 24)
-#define IMX6Q_GPR13_SATA_PHY_8_1_0_DB		(0x1 << 24)
-#define IMX6Q_GPR13_SATA_PHY_8_1_5_DB		(0x2 << 24)
-#define IMX6Q_GPR13_SATA_PHY_8_2_0_DB		(0x3 << 24)
-#define IMX6Q_GPR13_SATA_PHY_8_2_5_DB		(0x4 << 24)
-#define IMX6Q_GPR13_SATA_PHY_8_3_0_DB		(0x5 << 24)
-#define IMX6Q_GPR13_SATA_PHY_8_3_5_DB		(0x6 << 24)
-#define IMX6Q_GPR13_SATA_PHY_8_4_0_DB		(0x7 << 24)
-#define IMX6Q_GPR13_SATA_PHY_7_MASK		(0x1f << 19)
-#define IMX6Q_GPR13_SATA_PHY_7_SATA1I		(0x10 << 19)
-#define IMX6Q_GPR13_SATA_PHY_7_SATA1M		(0x10 << 19)
-#define IMX6Q_GPR13_SATA_PHY_7_SATA1X		(0x1a << 19)
-#define IMX6Q_GPR13_SATA_PHY_7_SATA2I		(0x12 << 19)
-#define IMX6Q_GPR13_SATA_PHY_7_SATA2M		(0x12 << 19)
-#define IMX6Q_GPR13_SATA_PHY_7_SATA2X		(0x1a << 19)
-#define IMX6Q_GPR13_SATA_PHY_6_MASK		(0x7 << 16)
-#define IMX6Q_GPR13_SATA_SPEED_MASK		BIT(15)
-#define IMX6Q_GPR13_SATA_SPEED_1P5G		0x0
-#define IMX6Q_GPR13_SATA_SPEED_3P0G		BIT(15)
-#define IMX6Q_GPR13_SATA_PHY_5			BIT(14)
-#define IMX6Q_GPR13_SATA_PHY_4_MASK		(0x7 << 11)
-#define IMX6Q_GPR13_SATA_PHY_4_16_16		(0x0 << 11)
-#define IMX6Q_GPR13_SATA_PHY_4_14_16		(0x1 << 11)
-#define IMX6Q_GPR13_SATA_PHY_4_12_16		(0x2 << 11)
-#define IMX6Q_GPR13_SATA_PHY_4_10_16		(0x3 << 11)
-#define IMX6Q_GPR13_SATA_PHY_4_9_16		(0x4 << 11)
-#define IMX6Q_GPR13_SATA_PHY_4_8_16		(0x5 << 11)
-#define IMX6Q_GPR13_SATA_PHY_3_MASK		(0xf << 7)
-#define IMX6Q_GPR13_SATA_PHY_3_OFF		0x7
-#define IMX6Q_GPR13_SATA_PHY_2_MASK		(0x1f << 2)
-#define IMX6Q_GPR13_SATA_PHY_2_OFF		0x2
-#define IMX6Q_GPR13_SATA_PHY_1_MASK		(0x3 << 0)
-#define IMX6Q_GPR13_SATA_PHY_1_FAST		(0x0 << 0)
-#define IMX6Q_GPR13_SATA_PHY_1_MED		(0x1 << 0)
-#define IMX6Q_GPR13_SATA_PHY_1_SLOW		(0x2 << 0)
-
+#define IMX6Q_GPR13_SATA_RX_EQ_VAL_MASK		(0x7 << 24)
+#define IMX6Q_GPR13_SATA_RX_EQ_VAL_0_5_DB	(0x0 << 24)
+#define IMX6Q_GPR13_SATA_RX_EQ_VAL_1_0_DB	(0x1 << 24)
+#define IMX6Q_GPR13_SATA_RX_EQ_VAL_1_5_DB	(0x2 << 24)
+#define IMX6Q_GPR13_SATA_RX_EQ_VAL_2_0_DB	(0x3 << 24)
+#define IMX6Q_GPR13_SATA_RX_EQ_VAL_2_5_DB	(0x4 << 24)
+#define IMX6Q_GPR13_SATA_RX_EQ_VAL_3_0_DB	(0x5 << 24)
+#define IMX6Q_GPR13_SATA_RX_EQ_VAL_3_5_DB	(0x6 << 24)
+#define IMX6Q_GPR13_SATA_RX_EQ_VAL_4_0_DB	(0x7 << 24)
+#define IMX6Q_GPR13_SATA_RX_LOS_LVL_MASK	(0x1f << 19)
+#define IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA1I	(0x10 << 19)
+#define IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA1M	(0x10 << 19)
+#define IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA1X	(0x1a << 19)
+#define IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA2I	(0x12 << 19)
+#define IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA2M	(0x12 << 19)
+#define IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA2X	(0x1a << 19)
+#define IMX6Q_GPR13_SATA_RX_DPLL_MODE_MASK	(0x7 << 16)
+#define IMX6Q_GPR13_SATA_RX_DPLL_MODE_1P_1F	(0x0 << 16)
+#define IMX6Q_GPR13_SATA_RX_DPLL_MODE_2P_2F	(0x1 << 16)
+#define IMX6Q_GPR13_SATA_RX_DPLL_MODE_1P_4F	(0x2 << 16)
+#define IMX6Q_GPR13_SATA_RX_DPLL_MODE_2P_4F	(0x3 << 16)
+#define IMX6Q_GPR13_SATA_SPD_MODE_MASK		BIT(15)
+#define IMX6Q_GPR13_SATA_SPD_MODE_1P5G		0x0
+#define IMX6Q_GPR13_SATA_SPD_MODE_3P0G		BIT(15)
+#define IMX6Q_GPR13_SATA_MPLL_SS_EN		BIT(14)
+#define IMX6Q_GPR13_SATA_TX_ATTEN_MASK		(0x7 << 11)
+#define IMX6Q_GPR13_SATA_TX_ATTEN_16_16		(0x0 << 11)
+#define IMX6Q_GPR13_SATA_TX_ATTEN_14_16		(0x1 << 11)
+#define IMX6Q_GPR13_SATA_TX_ATTEN_12_16		(0x2 << 11)
+#define IMX6Q_GPR13_SATA_TX_ATTEN_10_16		(0x3 << 11)
+#define IMX6Q_GPR13_SATA_TX_ATTEN_9_16		(0x4 << 11)
+#define IMX6Q_GPR13_SATA_TX_ATTEN_8_16		(0x5 << 11)
+#define IMX6Q_GPR13_SATA_TX_BOOST_MASK		(0xf << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_0_00_DB	(0x0 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_0_37_DB	(0x1 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_0_74_DB	(0x2 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_1_11_DB	(0x3 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_1_48_DB	(0x4 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_1_85_DB	(0x5 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_2_22_DB	(0x6 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_2_59_DB	(0x7 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_2_96_DB	(0x8 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_3_33_DB	(0x9 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_3_70_DB	(0xa << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_4_07_DB	(0xb << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_4_44_DB	(0xc << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_4_81_DB	(0xd << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_5_28_DB	(0xe << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_5_75_DB	(0xf << 7)
+#define IMX6Q_GPR13_SATA_TX_LVL_MASK		(0x1f << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_0_937_V		(0x00 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_0_947_V		(0x01 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_0_957_V		(0x02 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_0_966_V		(0x03 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_0_976_V		(0x04 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_0_986_V		(0x05 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_0_996_V		(0x06 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_005_V		(0x07 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_015_V		(0x08 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_025_V		(0x09 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_035_V		(0x0a << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_045_V		(0x0b << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_054_V		(0x0c << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_064_V		(0x0d << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_074_V		(0x0e << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_084_V		(0x0f << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_094_V		(0x10 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_104_V		(0x11 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_113_V		(0x12 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_123_V		(0x13 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_133_V		(0x14 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_143_V		(0x15 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_152_V		(0x16 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_162_V		(0x17 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_172_V		(0x18 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_182_V		(0x19 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_191_V		(0x1a << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_201_V		(0x1b << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_211_V		(0x1c << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_221_V		(0x1d << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_230_V		(0x1e << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_240_V		(0x1f << 2)
+#define IMX6Q_GPR13_SATA_MPLL_CLK_EN		BIT(1)
+#define IMX6Q_GPR13_SATA_TX_EDGE_RATE		BIT(0)
 #endif /* __LINUX_IMX6Q_IOMUXC_GPR_H */
-- 
cgit v1.2.3-59-g8ed1b


From 8e5c2b776ae4c35f54547c017e0a943429f5748a Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 25 Jul 2013 21:43:39 +0200
Subject: Revert "ACPI / video / i915: No ACPI backlight if firmware expects
 Windows 8"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We attempted to address a regression introduced by commit a57f7f9
(ACPICA: Add Windows8/Server2012 string for _OSI method.) after which
ACPI video backlight support doesn't work on a number of systems,
because the relevant AML methods in the ACPI tables in their BIOSes
become useless after the BIOS has been told that the OS is compatible
with Windows 8.  That problem is tracked by the bug entry at:

https://bugzilla.kernel.org/show_bug.cgi?id=51231

Commit 8c5bd7a (ACPI / video / i915: No ACPI backlight if firmware
expects Windows 8) introduced for this purpose essentially prevented
the ACPI backlight support from being used if the BIOS had been told
that the OS was compatible with Windows 8 and the i915 driver was
loaded, in which case the backlight would always be handled by i915.
Unfortunately, however, that turned out to cause problems with
backlight to appear on multiple systems with symptoms indicating that
i915 was unable to control the backlight on those systems as
expected.

For this reason, revert commit 8c5bd7a, but leave the function
acpi_video_backlight_quirks() introduced by it, because another
commit on top of it uses that function.

References: https://lkml.org/lkml/2013/7/21/119
References: https://lkml.org/lkml/2013/7/22/261
References: https://lkml.org/lkml/2013/7/23/429
References: https://lkml.org/lkml/2013/7/23/459
References: https://lkml.org/lkml/2013/7/23/81
References: https://lkml.org/lkml/2013/7/24/27
Reported-and-tested-by: James Hogan <james@albanarts.com>
Reported-and-tested-by: Kamal Mostafa <kamal@canonical.com>
Reported-and-tested-by: Jörg Otte <jrg.otte@gmail.com>
Reported-and-tested-by: Steven Newbury <steve@snewbury.org.uk>
Reported-by: Martin Steigerwald <Martin@lichtvoll.de>
Reported-by: Kalle Valo <kvalo@adurom.com>
Tested-by: Joerg Platte <jplatte@naasa.net>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/internal.h         |  2 --
 drivers/acpi/video.c            | 67 +++++------------------------------------
 drivers/acpi/video_detect.c     | 15 +--------
 drivers/gpu/drm/i915/i915_dma.c |  2 +-
 include/acpi/video.h            | 11 +------
 include/linux/acpi.h            |  1 -
 6 files changed, 11 insertions(+), 87 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 227aca77ee1e..5da44e81dd4d 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -169,10 +169,8 @@ int acpi_create_platform_device(struct acpi_device *adev,
   -------------------------------------------------------------------------- */
 #if defined(CONFIG_ACPI_VIDEO) || defined(CONFIG_ACPI_VIDEO_MODULE)
 bool acpi_video_backlight_quirks(void);
-bool acpi_video_verify_backlight_support(void);
 #else
 static inline bool acpi_video_backlight_quirks(void) { return false; }
-static inline bool acpi_video_verify_backlight_support(void) { return false; }
 #endif
 
 #endif /* _ACPI_INTERNAL_H_ */
diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index 6dd237e79b4f..0ec434d2586d 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -911,7 +911,7 @@ static void acpi_video_device_find_cap(struct acpi_video_device *device)
 	if (acpi_video_init_brightness(device))
 		return;
 
-	if (acpi_video_verify_backlight_support()) {
+	if (acpi_video_backlight_support()) {
 		struct backlight_properties props;
 		struct pci_dev *pdev;
 		acpi_handle acpi_parent;
@@ -1366,8 +1366,8 @@ acpi_video_switch_brightness(struct acpi_video_device *device, int event)
 	unsigned long long level_current, level_next;
 	int result = -EINVAL;
 
-	/* no warning message if acpi_backlight=vendor or a quirk is used */
-	if (!acpi_video_verify_backlight_support())
+	/* no warning message if acpi_backlight=vendor is used */
+	if (!acpi_video_backlight_support())
 		return 0;
 
 	if (!device->brightness)
@@ -1875,46 +1875,6 @@ static int acpi_video_bus_remove(struct acpi_device *device)
 	return 0;
 }
 
-static acpi_status video_unregister_backlight(acpi_handle handle, u32 lvl,
-					      void *context, void **rv)
-{
-	struct acpi_device *acpi_dev;
-	struct acpi_video_bus *video;
-	struct acpi_video_device *dev, *next;
-
-	if (acpi_bus_get_device(handle, &acpi_dev))
-		return AE_OK;
-
-	if (acpi_match_device_ids(acpi_dev, video_device_ids))
-		return AE_OK;
-
-	video = acpi_driver_data(acpi_dev);
-	if (!video)
-		return AE_OK;
-
-	acpi_video_bus_stop_devices(video);
-	mutex_lock(&video->device_list_lock);
-	list_for_each_entry_safe(dev, next, &video->video_device_list, entry) {
-		if (dev->backlight) {
-			backlight_device_unregister(dev->backlight);
-			dev->backlight = NULL;
-			kfree(dev->brightness->levels);
-			kfree(dev->brightness);
-		}
-		if (dev->cooling_dev) {
-			sysfs_remove_link(&dev->dev->dev.kobj,
-					  "thermal_cooling");
-			sysfs_remove_link(&dev->cooling_dev->device.kobj,
-					  "device");
-			thermal_cooling_device_unregister(dev->cooling_dev);
-			dev->cooling_dev = NULL;
-		}
-	}
-	mutex_unlock(&video->device_list_lock);
-	acpi_video_bus_start_devices(video);
-	return AE_OK;
-}
-
 static int __init is_i740(struct pci_dev *dev)
 {
 	if (dev->device == 0x00D1)
@@ -1946,25 +1906,14 @@ static int __init intel_opregion_present(void)
 	return opregion;
 }
 
-int __acpi_video_register(bool backlight_quirks)
+int acpi_video_register(void)
 {
-	bool no_backlight;
-	int result;
-
-	no_backlight = backlight_quirks ? acpi_video_backlight_quirks() : false;
-
+	int result = 0;
 	if (register_count) {
 		/*
-		 * If acpi_video_register() has been called already, don't try
-		 * to register acpi_video_bus, but unregister backlight devices
-		 * if no backlight support is requested.
+		 * if the function of acpi_video_register is already called,
+		 * don't register the acpi_vide_bus again and return no error.
 		 */
-		if (no_backlight)
-			acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT,
-					    ACPI_UINT32_MAX,
-					    video_unregister_backlight,
-					    NULL, NULL, NULL);
-
 		return 0;
 	}
 
@@ -1980,7 +1929,7 @@ int __acpi_video_register(bool backlight_quirks)
 
 	return 0;
 }
-EXPORT_SYMBOL(__acpi_video_register);
+EXPORT_SYMBOL(acpi_video_register);
 
 void acpi_video_unregister(void)
 {
diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
index 826e52def080..c3397748ba46 100644
--- a/drivers/acpi/video_detect.c
+++ b/drivers/acpi/video_detect.c
@@ -238,12 +238,7 @@ static void acpi_video_caps_check(void)
 
 bool acpi_video_backlight_quirks(void)
 {
-	if (acpi_gbl_osi_data >= ACPI_OSI_WIN_8) {
-		acpi_video_caps_check();
-		acpi_video_support |= ACPI_VIDEO_SKIP_BACKLIGHT;
-		return true;
-	}
-	return false;
+	return acpi_gbl_osi_data >= ACPI_OSI_WIN_8;
 }
 EXPORT_SYMBOL(acpi_video_backlight_quirks);
 
@@ -291,14 +286,6 @@ int acpi_video_backlight_support(void)
 }
 EXPORT_SYMBOL(acpi_video_backlight_support);
 
-/* For the ACPI video driver use only. */
-bool acpi_video_verify_backlight_support(void)
-{
-	return (acpi_video_support & ACPI_VIDEO_SKIP_BACKLIGHT) ?
-		false : acpi_video_backlight_support();
-}
-EXPORT_SYMBOL(acpi_video_verify_backlight_support);
-
 /*
  * Use acpi_backlight=vendor/video to force that backlight switching
  * is processed by vendor specific acpi drivers or video.ko driver.
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index cf188ab7051a..adb319b53ecd 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1648,7 +1648,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	if (INTEL_INFO(dev)->num_pipes) {
 		/* Must be done after probing outputs */
 		intel_opregion_init(dev);
-		acpi_video_register_with_quirks();
+		acpi_video_register();
 	}
 
 	if (IS_GEN5(dev))
diff --git a/include/acpi/video.h b/include/acpi/video.h
index b26dc4fb7ba8..61109f2609fc 100644
--- a/include/acpi/video.h
+++ b/include/acpi/video.h
@@ -17,21 +17,12 @@ struct acpi_device;
 #define ACPI_VIDEO_DISPLAY_LEGACY_TV      0x0200
 
 #if (defined CONFIG_ACPI_VIDEO || defined CONFIG_ACPI_VIDEO_MODULE)
-extern int __acpi_video_register(bool backlight_quirks);
-static inline int acpi_video_register(void)
-{
-	return __acpi_video_register(false);
-}
-static inline int acpi_video_register_with_quirks(void)
-{
-	return __acpi_video_register(true);
-}
+extern int acpi_video_register(void);
 extern void acpi_video_unregister(void);
 extern int acpi_video_get_edid(struct acpi_device *device, int type,
 			       int device_id, void **edid);
 #else
 static inline int acpi_video_register(void) { return 0; }
-static inline int acpi_video_register_with_quirks(void) { return 0; }
 static inline void acpi_video_unregister(void) { return; }
 static inline int acpi_video_get_edid(struct acpi_device *device, int type,
 				      int device_id, void **edid)
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 6ad72f92469c..353ba256f368 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -191,7 +191,6 @@ extern bool wmi_has_guid(const char *guid);
 #define ACPI_VIDEO_BACKLIGHT_DMI_VIDEO			0x0200
 #define ACPI_VIDEO_OUTPUT_SWITCHING_DMI_VENDOR		0x0400
 #define ACPI_VIDEO_OUTPUT_SWITCHING_DMI_VIDEO		0x0800
-#define ACPI_VIDEO_SKIP_BACKLIGHT			0x1000
 
 #if defined(CONFIG_ACPI_VIDEO) || defined(CONFIG_ACPI_VIDEO_MODULE)
 
-- 
cgit v1.2.3-59-g8ed1b


From 81913283c80be8c0b7e038c26e2a611ab38394f1 Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <a.hajda@samsung.com>
Date: Fri, 28 Jun 2013 05:44:22 -0300
Subject: [media] v4l2: added missing mutex.h include to v4l2-ctrls.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch fixes following error:
include/media/v4l2-ctrls.h:193:15: error: field ‘_lock’ has incomplete type
include/media/v4l2-ctrls.h: In function ‘v4l2_ctrl_lock’:
include/media/v4l2-ctrls.h:570:2: error: implicit declaration of
	function ‘mutex_lock’ [-Werror=implicit-function-declaration]
include/media/v4l2-ctrls.h: In function ‘v4l2_ctrl_unlock’:
include/media/v4l2-ctrls.h:579:2: error: implicit declaration of
	function ‘mutex_unlock’ [-Werror=implicit-function-declaration]

Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 include/media/v4l2-ctrls.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/media/v4l2-ctrls.h b/include/media/v4l2-ctrls.h
index 7343a27fe819..47ada23345a1 100644
--- a/include/media/v4l2-ctrls.h
+++ b/include/media/v4l2-ctrls.h
@@ -22,6 +22,7 @@
 #define _V4L2_CTRLS_H
 
 #include <linux/list.h>
+#include <linux/mutex.h>
 #include <linux/videodev2.h>
 
 /* forward references */
-- 
cgit v1.2.3-59-g8ed1b


From 0699a73af3811b66b1ab5650575acee5eea841ab Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Mon, 22 Jul 2013 21:32:09 +0200
Subject: firewire: fix libdc1394/FlyCap2 iso event regression

Commit 18d627113b83 (firewire: prevent dropping of completed iso packet
header data) was intended to be an obvious bug fix, but libdc1394 and
FlyCap2 depend on the old behaviour by ignoring all returned information
and thus not noticing that not all packets have been received yet.  The
result was that the video frame buffers would be saved before they
contained the correct data.

Reintroduce the old behaviour for old clients.

Tested-by: Stepan Salenikovich <stepan.salenikovich@gmail.com>
Tested-by: Josep Bosch <jep250@gmail.com>
Cc: <stable@vger.kernel.org> # 3.4+
Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-cdev.c       |  3 +++
 drivers/firewire/ohci.c            | 10 ++++++++--
 include/linux/firewire.h           |  1 +
 include/uapi/linux/firewire-cdev.h |  4 ++--
 4 files changed, 14 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index 7ef316fdc4d9..ac1b43a04285 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -54,6 +54,7 @@
 #define FW_CDEV_KERNEL_VERSION			5
 #define FW_CDEV_VERSION_EVENT_REQUEST2		4
 #define FW_CDEV_VERSION_ALLOCATE_REGION_END	4
+#define FW_CDEV_VERSION_AUTO_FLUSH_ISO_OVERFLOW	5
 
 struct client {
 	u32 version;
@@ -1005,6 +1006,8 @@ static int ioctl_create_iso_context(struct client *client, union ioctl_arg *arg)
 			a->channel, a->speed, a->header_size, cb, client);
 	if (IS_ERR(context))
 		return PTR_ERR(context);
+	if (client->version < FW_CDEV_VERSION_AUTO_FLUSH_ISO_OVERFLOW)
+		context->drop_overflow_headers = true;
 
 	/* We only support one context at this time. */
 	spin_lock_irq(&client->lock);
diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
index 9e1db6490b9a..afb701ec90ca 100644
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -2749,8 +2749,11 @@ static void copy_iso_headers(struct iso_context *ctx, const u32 *dma_hdr)
 {
 	u32 *ctx_hdr;
 
-	if (ctx->header_length + ctx->base.header_size > PAGE_SIZE)
+	if (ctx->header_length + ctx->base.header_size > PAGE_SIZE) {
+		if (ctx->base.drop_overflow_headers)
+			return;
 		flush_iso_completions(ctx);
+	}
 
 	ctx_hdr = ctx->header + ctx->header_length;
 	ctx->last_timestamp = (u16)le32_to_cpu((__force __le32)dma_hdr[0]);
@@ -2910,8 +2913,11 @@ static int handle_it_packet(struct context *context,
 
 	sync_it_packet_for_cpu(context, d);
 
-	if (ctx->header_length + 4 > PAGE_SIZE)
+	if (ctx->header_length + 4 > PAGE_SIZE) {
+		if (ctx->base.drop_overflow_headers)
+			return 1;
 		flush_iso_completions(ctx);
+	}
 
 	ctx_hdr = ctx->header + ctx->header_length;
 	ctx->last_timestamp = le16_to_cpu(last->res_count);
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index 3b0e820375ab..5d7782e42b8f 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -436,6 +436,7 @@ struct fw_iso_context {
 	int type;
 	int channel;
 	int speed;
+	bool drop_overflow_headers;
 	size_t header_size;
 	union {
 		fw_iso_callback_t sc;
diff --git a/include/uapi/linux/firewire-cdev.h b/include/uapi/linux/firewire-cdev.h
index d50036953497..1db453e4b550 100644
--- a/include/uapi/linux/firewire-cdev.h
+++ b/include/uapi/linux/firewire-cdev.h
@@ -215,8 +215,8 @@ struct fw_cdev_event_request2 {
  * with the %FW_CDEV_ISO_INTERRUPT bit set, when explicitly requested with
  * %FW_CDEV_IOC_FLUSH_ISO, or when there have been so many completed packets
  * without the interrupt bit set that the kernel's internal buffer for @header
- * is about to overflow.  (In the last case, kernels with ABI version < 5 drop
- * header data up to the next interrupt packet.)
+ * is about to overflow.  (In the last case, ABI versions < 5 drop header data
+ * up to the next interrupt packet.)
  *
  * Isochronous transmit events (context type %FW_CDEV_ISO_CONTEXT_TRANSMIT):
  *
-- 
cgit v1.2.3-59-g8ed1b


From 148519120c6d1f19ad53349683aeae9f228b0b8d Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sat, 27 Jul 2013 01:41:34 +0200
Subject: Revert "cpuidle: Quickly notice prediction failure for repeat mode"

Revert commit 69a37bea (cpuidle: Quickly notice prediction failure for
repeat mode), because it has been identified as the source of a
significant performance regression in v3.8 and later as explained by
Jeremy Eder:

  We believe we've identified a particular commit to the cpuidle code
  that seems to be impacting performance of variety of workloads.
  The simplest way to reproduce is using netperf TCP_RR test, so
  we're using that, on a pair of Sandy Bridge based servers.  We also
  have data from a large database setup where performance is also
  measurably/positively impacted, though that test data isn't easily
  share-able.

  Included below are test results from 3 test kernels:

  kernel       reverts
  -----------------------------------------------------------
  1) vanilla   upstream (no reverts)

  2) perfteam2 reverts e11538d1f03914eb92af5a1a378375c05ae8520c

  3) test      reverts 69a37beabf1f0a6705c08e879bdd5d82ff6486c4
                       e11538d1f03914eb92af5a1a378375c05ae8520c

  In summary, netperf TCP_RR numbers improve by approximately 4%
  after reverting 69a37beabf1f0a6705c08e879bdd5d82ff6486c4.  When
  69a37beabf1f0a6705c08e879bdd5d82ff6486c4 is included, C0 residency
  never seems to get above 40%.  Taking that patch out gets C0 near
  100% quite often, and performance increases.

  The below data are histograms representing the %c0 residency @
  1-second sample rates (using turbostat), while under netperf test.

  - If you look at the first 4 histograms, you can see %c0 residency
    almost entirely in the 30,40% bin.
  - The last pair, which reverts 69a37beabf1f0a6705c08e879bdd5d82ff6486c4,
    shows %c0 in the 80,90,100% bins.

  Below each kernel name are netperf TCP_RR trans/s numbers for the
  particular kernel that can be disclosed publicly, comparing the 3
  test kernels.  We ran a 4th test with the vanilla kernel where
  we've also set /dev/cpu_dma_latency=0 to show overall impact
  boosting single-threaded TCP_RR performance over 11% above
  baseline.

  3.10-rc2 vanilla RX + c0 lock (/dev/cpu_dma_latency=0):
  TCP_RR trans/s 54323.78

  -----------------------------------------------------------
  3.10-rc2 vanilla RX (no reverts)
  TCP_RR trans/s 48192.47

  Receiver %c0
      0.0000 -    10.0000 [     1]: *
     10.0000 -    20.0000 [     0]:
     20.0000 -    30.0000 [     0]:
     30.0000 -    40.0000 [    59]:
  ***********************************************************
     40.0000 -    50.0000 [     1]: *
     50.0000 -    60.0000 [     0]:
     60.0000 -    70.0000 [     0]:
     70.0000 -    80.0000 [     0]:
     80.0000 -    90.0000 [     0]:
     90.0000 -   100.0000 [     0]:

  Sender %c0
      0.0000 -    10.0000 [     1]: *
     10.0000 -    20.0000 [     0]:
     20.0000 -    30.0000 [     0]:
     30.0000 -    40.0000 [    11]: ***********
     40.0000 -    50.0000 [    49]:
  *************************************************
     50.0000 -    60.0000 [     0]:
     60.0000 -    70.0000 [     0]:
     70.0000 -    80.0000 [     0]:
     80.0000 -    90.0000 [     0]:
     90.0000 -   100.0000 [     0]:

  -----------------------------------------------------------
  3.10-rc2 perfteam2 RX (reverts commit
  e11538d1f03914eb92af5a1a378375c05ae8520c)
  TCP_RR trans/s 49698.69

  Receiver %c0
      0.0000 -    10.0000 [     1]: *
     10.0000 -    20.0000 [     1]: *
     20.0000 -    30.0000 [     0]:
     30.0000 -    40.0000 [    59]:
  ***********************************************************
     40.0000 -    50.0000 [     0]:
     50.0000 -    60.0000 [     0]:
     60.0000 -    70.0000 [     0]:
     70.0000 -    80.0000 [     0]:
     80.0000 -    90.0000 [     0]:
     90.0000 -   100.0000 [     0]:

  Sender %c0
      0.0000 -    10.0000 [     1]: *
     10.0000 -    20.0000 [     0]:
     20.0000 -    30.0000 [     0]:
     30.0000 -    40.0000 [     2]: **
     40.0000 -    50.0000 [    58]:
  **********************************************************
     50.0000 -    60.0000 [     0]:
     60.0000 -    70.0000 [     0]:
     70.0000 -    80.0000 [     0]:
     80.0000 -    90.0000 [     0]:
     90.0000 -   100.0000 [     0]:

  -----------------------------------------------------------
  3.10-rc2 test RX (reverts 69a37beabf1f0a6705c08e879bdd5d82ff6486c4
  and e11538d1f03914eb92af5a1a378375c05ae8520c)
  TCP_RR trans/s 47766.95

  Receiver %c0
      0.0000 -    10.0000 [     1]: *
     10.0000 -    20.0000 [     1]: *
     20.0000 -    30.0000 [     0]:
     30.0000 -    40.0000 [    27]: ***************************
     40.0000 -    50.0000 [     2]: **
     50.0000 -    60.0000 [     0]:
     60.0000 -    70.0000 [     2]: **
     70.0000 -    80.0000 [     0]:
     80.0000 -    90.0000 [     0]:
     90.0000 -   100.0000 [    28]: ****************************

  Sender:
      0.0000 -    10.0000 [     1]: *
     10.0000 -    20.0000 [     0]:
     20.0000 -    30.0000 [     0]:
     30.0000 -    40.0000 [    11]: ***********
     40.0000 -    50.0000 [     0]:
     50.0000 -    60.0000 [     1]: *
     60.0000 -    70.0000 [     0]:
     70.0000 -    80.0000 [     3]: ***
     80.0000 -    90.0000 [     7]: *******
     90.0000 -   100.0000 [    38]: **************************************

  These results demonstrate gaining back the tendency of the CPU to
  stay in more responsive, performant C-states (and thus yield
  measurably better performance), by reverting commit
  69a37beabf1f0a6705c08e879bdd5d82ff6486c4.

Requested-by: Jeremy Eder <jeder@redhat.com>
Tested-by: Len Brown <len.brown@intel.com>
Cc: 3.8+ <stable@vger.kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/governors/menu.c | 73 +++-------------------------------------
 include/linux/tick.h             |  6 ----
 kernel/time/tick-sched.c         |  9 ++---
 3 files changed, 6 insertions(+), 82 deletions(-)

(limited to 'include')

diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index b69a87e22155..bc580b67a652 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -28,13 +28,6 @@
 #define MAX_INTERESTING 50000
 #define STDDEV_THRESH 400
 
-/* 60 * 60 > STDDEV_THRESH * INTERVALS = 400 * 8 */
-#define MAX_DEVIATION 60
-
-static DEFINE_PER_CPU(struct hrtimer, menu_hrtimer);
-static DEFINE_PER_CPU(int, hrtimer_status);
-/* menu hrtimer mode */
-enum {MENU_HRTIMER_STOP, MENU_HRTIMER_REPEAT};
 
 /*
  * Concepts and ideas behind the menu governor
@@ -198,42 +191,17 @@ static u64 div_round64(u64 dividend, u32 divisor)
 	return div_u64(dividend + (divisor / 2), divisor);
 }
 
-/* Cancel the hrtimer if it is not triggered yet */
-void menu_hrtimer_cancel(void)
-{
-	int cpu = smp_processor_id();
-	struct hrtimer *hrtmr = &per_cpu(menu_hrtimer, cpu);
-
-	/* The timer is still not time out*/
-	if (per_cpu(hrtimer_status, cpu)) {
-		hrtimer_cancel(hrtmr);
-		per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP;
-	}
-}
-EXPORT_SYMBOL_GPL(menu_hrtimer_cancel);
-
-/* Call back for hrtimer is triggered */
-static enum hrtimer_restart menu_hrtimer_notify(struct hrtimer *hrtimer)
-{
-	int cpu = smp_processor_id();
-
-	per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP;
-
-	return HRTIMER_NORESTART;
-}
-
 /*
  * Try detecting repeating patterns by keeping track of the last 8
  * intervals, and checking if the standard deviation of that set
  * of points is below a threshold. If it is... then use the
  * average of these 8 points as the estimated value.
  */
-static u32 get_typical_interval(struct menu_device *data)
+static void get_typical_interval(struct menu_device *data)
 {
 	int i = 0, divisor = 0;
 	uint64_t max = 0, avg = 0, stddev = 0;
 	int64_t thresh = LLONG_MAX; /* Discard outliers above this value. */
-	unsigned int ret = 0;
 
 again:
 
@@ -274,16 +242,13 @@ again:
 	if (((avg > stddev * 6) && (divisor * 4 >= INTERVALS * 3))
 							|| stddev <= 20) {
 		data->predicted_us = avg;
-		ret = 1;
-		return ret;
+		return;
 
 	} else if ((divisor * 4) > INTERVALS * 3) {
 		/* Exclude the max interval */
 		thresh = max - 1;
 		goto again;
 	}
-
-	return ret;
 }
 
 /**
@@ -298,9 +263,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	int i;
 	int multiplier;
 	struct timespec t;
-	int repeat = 0, low_predicted = 0;
-	int cpu = smp_processor_id();
-	struct hrtimer *hrtmr = &per_cpu(menu_hrtimer, cpu);
 
 	if (data->needs_update) {
 		menu_update(drv, dev);
@@ -335,7 +297,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	data->predicted_us = div_round64(data->expected_us * data->correction_factor[data->bucket],
 					 RESOLUTION * DECAY);
 
-	repeat = get_typical_interval(data);
+	get_typical_interval(data);
 
 	/*
 	 * We want to default to C1 (hlt), not to busy polling
@@ -356,10 +318,8 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 
 		if (s->disabled || su->disable)
 			continue;
-		if (s->target_residency > data->predicted_us) {
-			low_predicted = 1;
+		if (s->target_residency > data->predicted_us)
 			continue;
-		}
 		if (s->exit_latency > latency_req)
 			continue;
 		if (s->exit_latency * multiplier > data->predicted_us)
@@ -369,28 +329,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 		data->exit_us = s->exit_latency;
 	}
 
-	/* not deepest C-state chosen for low predicted residency */
-	if (low_predicted) {
-		unsigned int timer_us = 0;
-
-		/*
-		 * Set a timer to detect whether this sleep is much
-		 * longer than repeat mode predicted.  If the timer
-		 * triggers, the code will evaluate whether to put
-		 * the CPU into a deeper C-state.
-		 * The timer is cancelled on CPU wakeup.
-		 */
-		timer_us = 2 * (data->predicted_us + MAX_DEVIATION);
-
-		if (repeat && (4 * timer_us < data->expected_us)) {
-			RCU_NONIDLE(hrtimer_start(hrtmr,
-				ns_to_ktime(1000 * timer_us),
-				HRTIMER_MODE_REL_PINNED));
-			/* In repeat case, menu hrtimer is started */
-			per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_REPEAT;
-		}
-	}
-
 	return data->last_state_idx;
 }
 
@@ -481,9 +419,6 @@ static int menu_enable_device(struct cpuidle_driver *drv,
 				struct cpuidle_device *dev)
 {
 	struct menu_device *data = &per_cpu(menu_devices, dev->cpu);
-	struct hrtimer *t = &per_cpu(menu_hrtimer, dev->cpu);
-	hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-	t->function = menu_hrtimer_notify;
 
 	memset(data, 0, sizeof(struct menu_device));
 
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 9180f4b85e6d..62bd8b72873c 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -174,10 +174,4 @@ static inline void tick_nohz_task_switch(struct task_struct *tsk) { }
 #endif
 
 
-# ifdef CONFIG_CPU_IDLE_GOV_MENU
-extern void menu_hrtimer_cancel(void);
-# else
-static inline void menu_hrtimer_cancel(void) {}
-# endif /* CONFIG_CPU_IDLE_GOV_MENU */
-
 #endif
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index e80183f4a6c4..e77edc97e036 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -827,13 +827,10 @@ void tick_nohz_irq_exit(void)
 {
 	struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
 
-	if (ts->inidle) {
-		/* Cancel the timer because CPU already waken up from the C-states*/
-		menu_hrtimer_cancel();
+	if (ts->inidle)
 		__tick_nohz_idle_enter(ts);
-	} else {
+	else
 		tick_nohz_full_stop_tick(ts);
-	}
 }
 
 /**
@@ -931,8 +928,6 @@ void tick_nohz_idle_exit(void)
 
 	ts->inidle = 0;
 
-	/* Cancel the timer because CPU already waken up from the C-states*/
-	menu_hrtimer_cancel();
 	if (ts->idle_active || ts->tick_stopped)
 		now = ktime_get();
 
-- 
cgit v1.2.3-59-g8ed1b


From 2b44c4db2e2f1765d35163a861d301038e0c8a75 Mon Sep 17 00:00:00 2001
From: Colin Cross <ccross@android.com>
Date: Wed, 24 Jul 2013 17:41:33 -0700
Subject: freezer: set PF_SUSPEND_TASK flag on tasks that call freeze_processes

Calling freeze_processes sets a global flag that will cause any
process that calls try_to_freeze to enter the refrigerator.  It
skips sending a signal to the current task, but if the current
task ever hits try_to_freeze, all threads will be frozen and the
system will deadlock.

Set a new flag, PF_SUSPEND_TASK, on the task that calls
freeze_processes.  The flag notifies the freezer that the thread
is involved in suspend and should not be frozen.  Also add a
WARN_ON in thaw_processes if the caller does not have the
PF_SUSPEND_TASK flag set to catch if a different task calls
thaw_processes than the one that called freeze_processes, leaving
a task with PF_SUSPEND_TASK permanently set on it.

Threads that spawn off a task with PF_SUSPEND_TASK set (which
swsusp does) will also have PF_SUSPEND_TASK set, preventing them
from freezing while they are helping with suspend, but they need
to be dead by the time suspend is triggered, otherwise they may
run when userspace is expected to be frozen.  Add a WARN_ON in
thaw_processes if more than one thread has the PF_SUSPEND_TASK
flag set.

Reported-and-tested-by: Michael Leun <lkml20130126@newton.leun.net>
Signed-off-by: Colin Cross <ccross@android.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/sched.h  |  1 +
 kernel/freezer.c       |  2 +-
 kernel/power/process.c | 11 +++++++++++
 3 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 50d04b92ceda..d722490da030 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1628,6 +1628,7 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut,
 #define PF_MEMPOLICY	0x10000000	/* Non-default NUMA mempolicy */
 #define PF_MUTEX_TESTER	0x20000000	/* Thread belongs to the rt mutex tester */
 #define PF_FREEZER_SKIP	0x40000000	/* Freezer should not count it as freezable */
+#define PF_SUSPEND_TASK 0x80000000      /* this thread called freeze_processes and should not be frozen */
 
 /*
  * Only the _current_ task can read/write to tsk->flags, but other
diff --git a/kernel/freezer.c b/kernel/freezer.c
index 8b2afc1c9df0..b462fa197517 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -33,7 +33,7 @@ static DEFINE_SPINLOCK(freezer_lock);
  */
 bool freezing_slow_path(struct task_struct *p)
 {
-	if (p->flags & PF_NOFREEZE)
+	if (p->flags & (PF_NOFREEZE | PF_SUSPEND_TASK))
 		return false;
 
 	if (pm_nosig_freezing || cgroup_freezing(p))
diff --git a/kernel/power/process.c b/kernel/power/process.c
index fc0df8486449..06ec8869dbf1 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -109,6 +109,8 @@ static int try_to_freeze_tasks(bool user_only)
 
 /**
  * freeze_processes - Signal user space processes to enter the refrigerator.
+ * The current thread will not be frozen.  The same process that calls
+ * freeze_processes must later call thaw_processes.
  *
  * On success, returns 0.  On failure, -errno and system is fully thawed.
  */
@@ -120,6 +122,9 @@ int freeze_processes(void)
 	if (error)
 		return error;
 
+	/* Make sure this task doesn't get frozen */
+	current->flags |= PF_SUSPEND_TASK;
+
 	if (!pm_freezing)
 		atomic_inc(&system_freezing_cnt);
 
@@ -168,6 +173,7 @@ int freeze_kernel_threads(void)
 void thaw_processes(void)
 {
 	struct task_struct *g, *p;
+	struct task_struct *curr = current;
 
 	if (pm_freezing)
 		atomic_dec(&system_freezing_cnt);
@@ -182,10 +188,15 @@ void thaw_processes(void)
 
 	read_lock(&tasklist_lock);
 	do_each_thread(g, p) {
+		/* No other threads should have PF_SUSPEND_TASK set */
+		WARN_ON((p != curr) && (p->flags & PF_SUSPEND_TASK));
 		__thaw_task(p);
 	} while_each_thread(g, p);
 	read_unlock(&tasklist_lock);
 
+	WARN_ON(!(curr->flags & PF_SUSPEND_TASK));
+	curr->flags &= ~PF_SUSPEND_TASK;
+
 	usermodehelper_enable();
 
 	schedule();
-- 
cgit v1.2.3-59-g8ed1b


From a838834b2f7cbc09b6319a1fc332c03e4d665b20 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 30 Jul 2013 16:43:55 -0400
Subject: drm: fix 64 bit drm fixed point helpers

Sign bit wasn't handled properly and a small typo.

Thanks to Christian for helping me sort this out.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/drm/drm_fixed.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_fixed.h b/include/drm/drm_fixed.h
index f5e1168c7647..d639049a613d 100644
--- a/include/drm/drm_fixed.h
+++ b/include/drm/drm_fixed.h
@@ -84,12 +84,12 @@ static inline int drm_fixp2int(int64_t a)
 	return ((s64)a) >> DRM_FIXED_POINT;
 }
 
-static inline s64 drm_fixp_msbset(int64_t a)
+static inline unsigned drm_fixp_msbset(int64_t a)
 {
 	unsigned shift, sign = (a >> 63) & 1;
 
 	for (shift = 62; shift > 0; --shift)
-		if ((a >> shift) != sign)
+		if (((a >> shift) & 1) != sign)
 			return shift;
 
 	return 0;
@@ -100,9 +100,9 @@ static inline s64 drm_fixp_mul(s64 a, s64 b)
 	unsigned shift = drm_fixp_msbset(a) + drm_fixp_msbset(b);
 	s64 result;
 
-	if (shift > 63) {
-		shift = shift - 63;
-		a >>= shift >> 1;
+	if (shift > 61) {
+		shift = shift - 61;
+		a >>= (shift >> 1) + (shift & 1);
 		b >>= shift >> 1;
 	} else
 		shift = 0;
@@ -120,7 +120,7 @@ static inline s64 drm_fixp_mul(s64 a, s64 b)
 
 static inline s64 drm_fixp_div(s64 a, s64 b)
 {
-	unsigned shift = 63 - drm_fixp_msbset(a);
+	unsigned shift = 62 - drm_fixp_msbset(a);
 	s64 result;
 
 	a <<= shift;
@@ -154,7 +154,7 @@ static inline s64 drm_fixp_exp(s64 x)
 	}
 
 	if (x < 0)
-		sum = drm_fixp_div(1, sum);
+		sum = drm_fixp_div(DRM_FIXED_ONE, sum);
 
 	return sum;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 9ea7187c53f63e31f2d1b2b1e474e31808565009 Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <sameo@linux.intel.com>
Date: Wed, 31 Jul 2013 01:19:43 +0200
Subject: NFC: netlink: Rename CMD_FW_UPLOAD to CMD_FW_DOWNLOAD

Loading a firmware into a target is typically called firmware
download, not firmware upload. So we rename the netlink API to
NFC_CMD_FW_DOWNLOAD in order to avoid any terminology confusion from
userspace.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/hci.h    |  2 +-
 include/net/nfc/nfc.h    |  4 ++--
 include/uapi/linux/nfc.h |  6 +++---
 net/nfc/core.c           | 20 ++++++++++----------
 net/nfc/hci/core.c       |  8 ++++----
 net/nfc/netlink.c        | 12 ++++++------
 net/nfc/nfc.h            |  6 +++---
 7 files changed, 29 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h
index 0af851c3b038..b64b7bce4b94 100644
--- a/include/net/nfc/hci.h
+++ b/include/net/nfc/hci.h
@@ -59,7 +59,7 @@ struct nfc_hci_ops {
 			      struct nfc_target *target);
 	int (*event_received)(struct nfc_hci_dev *hdev, u8 gate, u8 event,
 			      struct sk_buff *skb);
-	int (*fw_upload)(struct nfc_hci_dev *hdev, const char *firmware_name);
+	int (*fw_download)(struct nfc_hci_dev *hdev, const char *firmware_name);
 	int (*discover_se)(struct nfc_hci_dev *dev);
 	int (*enable_se)(struct nfc_hci_dev *dev, u32 se_idx);
 	int (*disable_se)(struct nfc_hci_dev *dev, u32 se_idx);
diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h
index 0e353f1658bb..5f286b726bb6 100644
--- a/include/net/nfc/nfc.h
+++ b/include/net/nfc/nfc.h
@@ -68,7 +68,7 @@ struct nfc_ops {
 			     void *cb_context);
 	int (*tm_send)(struct nfc_dev *dev, struct sk_buff *skb);
 	int (*check_presence)(struct nfc_dev *dev, struct nfc_target *target);
-	int (*fw_upload)(struct nfc_dev *dev, const char *firmware_name);
+	int (*fw_download)(struct nfc_dev *dev, const char *firmware_name);
 
 	/* Secure Element API */
 	int (*discover_se)(struct nfc_dev *dev);
@@ -127,7 +127,7 @@ struct nfc_dev {
 	int targets_generation;
 	struct device dev;
 	bool dev_up;
-	bool fw_upload_in_progress;
+	bool fw_download_in_progress;
 	u8 rf_mode;
 	bool polling;
 	struct nfc_target *active_target;
diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h
index caed0f324d5f..8137dd8d2adf 100644
--- a/include/uapi/linux/nfc.h
+++ b/include/uapi/linux/nfc.h
@@ -69,8 +69,8 @@
  *	starting a poll from a device which has a secure element enabled means
  *	we want to do SE based card emulation.
  * @NFC_CMD_DISABLE_SE: Disable the physical link to a specific secure element.
- * @NFC_CMD_FW_UPLOAD: Request to Load/flash firmware, or event to inform that
- *	some firmware was loaded
+ * @NFC_CMD_FW_DOWNLOAD: Request to Load/flash firmware, or event to inform
+ *	that some firmware was loaded
  */
 enum nfc_commands {
 	NFC_CMD_UNSPEC,
@@ -94,7 +94,7 @@ enum nfc_commands {
 	NFC_CMD_DISABLE_SE,
 	NFC_CMD_LLC_SDREQ,
 	NFC_EVENT_LLC_SDRES,
-	NFC_CMD_FW_UPLOAD,
+	NFC_CMD_FW_DOWNLOAD,
 	NFC_EVENT_SE_ADDED,
 	NFC_EVENT_SE_REMOVED,
 /* private: internal use only */
diff --git a/net/nfc/core.c b/net/nfc/core.c
index dc96a83aa6ab..1d074dd1650f 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -44,7 +44,7 @@ DEFINE_MUTEX(nfc_devlist_mutex);
 /* NFC device ID bitmap */
 static DEFINE_IDA(nfc_index_ida);
 
-int nfc_fw_upload(struct nfc_dev *dev, const char *firmware_name)
+int nfc_fw_download(struct nfc_dev *dev, const char *firmware_name)
 {
 	int rc = 0;
 
@@ -62,28 +62,28 @@ int nfc_fw_upload(struct nfc_dev *dev, const char *firmware_name)
 		goto error;
 	}
 
-	if (!dev->ops->fw_upload) {
+	if (!dev->ops->fw_download) {
 		rc = -EOPNOTSUPP;
 		goto error;
 	}
 
-	dev->fw_upload_in_progress = true;
-	rc = dev->ops->fw_upload(dev, firmware_name);
+	dev->fw_download_in_progress = true;
+	rc = dev->ops->fw_download(dev, firmware_name);
 	if (rc)
-		dev->fw_upload_in_progress = false;
+		dev->fw_download_in_progress = false;
 
 error:
 	device_unlock(&dev->dev);
 	return rc;
 }
 
-int nfc_fw_upload_done(struct nfc_dev *dev, const char *firmware_name)
+int nfc_fw_download_done(struct nfc_dev *dev, const char *firmware_name)
 {
-	dev->fw_upload_in_progress = false;
+	dev->fw_download_in_progress = false;
 
-	return nfc_genl_fw_upload_done(dev, firmware_name);
+	return nfc_genl_fw_download_done(dev, firmware_name);
 }
-EXPORT_SYMBOL(nfc_fw_upload_done);
+EXPORT_SYMBOL(nfc_fw_download_done);
 
 /**
  * nfc_dev_up - turn on the NFC device
@@ -110,7 +110,7 @@ int nfc_dev_up(struct nfc_dev *dev)
 		goto error;
 	}
 
-	if (dev->fw_upload_in_progress) {
+	if (dev->fw_download_in_progress) {
 		rc = -EBUSY;
 		goto error;
 	}
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index 7b1c186736eb..fe66908401f5 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -809,14 +809,14 @@ static void nfc_hci_recv_from_llc(struct nfc_hci_dev *hdev, struct sk_buff *skb)
 	}
 }
 
-static int hci_fw_upload(struct nfc_dev *nfc_dev, const char *firmware_name)
+static int hci_fw_download(struct nfc_dev *nfc_dev, const char *firmware_name)
 {
 	struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev);
 
-	if (!hdev->ops->fw_upload)
+	if (!hdev->ops->fw_download)
 		return -ENOTSUPP;
 
-	return hdev->ops->fw_upload(hdev, firmware_name);
+	return hdev->ops->fw_download(hdev, firmware_name);
 }
 
 static struct nfc_ops hci_nfc_ops = {
@@ -831,7 +831,7 @@ static struct nfc_ops hci_nfc_ops = {
 	.im_transceive = hci_transceive,
 	.tm_send = hci_tm_send,
 	.check_presence = hci_check_presence,
-	.fw_upload = hci_fw_upload,
+	.fw_download = hci_fw_download,
 	.discover_se = hci_discover_se,
 	.enable_se = hci_enable_se,
 	.disable_se = hci_disable_se,
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index b05ad909778f..f16fd59d4160 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -1089,7 +1089,7 @@ exit:
 	return rc;
 }
 
-static int nfc_genl_fw_upload(struct sk_buff *skb, struct genl_info *info)
+static int nfc_genl_fw_download(struct sk_buff *skb, struct genl_info *info)
 {
 	struct nfc_dev *dev;
 	int rc;
@@ -1108,13 +1108,13 @@ static int nfc_genl_fw_upload(struct sk_buff *skb, struct genl_info *info)
 	nla_strlcpy(firmware_name, info->attrs[NFC_ATTR_FIRMWARE_NAME],
 		    sizeof(firmware_name));
 
-	rc = nfc_fw_upload(dev, firmware_name);
+	rc = nfc_fw_download(dev, firmware_name);
 
 	nfc_put_device(dev);
 	return rc;
 }
 
-int nfc_genl_fw_upload_done(struct nfc_dev *dev, const char *firmware_name)
+int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name)
 {
 	struct sk_buff *msg;
 	void *hdr;
@@ -1124,7 +1124,7 @@ int nfc_genl_fw_upload_done(struct nfc_dev *dev, const char *firmware_name)
 		return -ENOMEM;
 
 	hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0,
-			  NFC_CMD_FW_UPLOAD);
+			  NFC_CMD_FW_DOWNLOAD);
 	if (!hdr)
 		goto free_msg;
 
@@ -1251,8 +1251,8 @@ static struct genl_ops nfc_genl_ops[] = {
 		.policy = nfc_genl_policy,
 	},
 	{
-		.cmd = NFC_CMD_FW_UPLOAD,
-		.doit = nfc_genl_fw_upload,
+		.cmd = NFC_CMD_FW_DOWNLOAD,
+		.doit = nfc_genl_fw_download,
 		.policy = nfc_genl_policy,
 	},
 	{
diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h
index ee85a1fc1b24..820a7850c36a 100644
--- a/net/nfc/nfc.h
+++ b/net/nfc/nfc.h
@@ -123,10 +123,10 @@ static inline void nfc_device_iter_exit(struct class_dev_iter *iter)
 	class_dev_iter_exit(iter);
 }
 
-int nfc_fw_upload(struct nfc_dev *dev, const char *firmware_name);
-int nfc_genl_fw_upload_done(struct nfc_dev *dev, const char *firmware_name);
+int nfc_fw_download(struct nfc_dev *dev, const char *firmware_name);
+int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name);
 
-int nfc_fw_upload_done(struct nfc_dev *dev, const char *firmware_name);
+int nfc_fw_download_done(struct nfc_dev *dev, const char *firmware_name);
 
 int nfc_dev_up(struct nfc_dev *dev);
 
-- 
cgit v1.2.3-59-g8ed1b


From 2816c551c796ec14620325b2c9ed75b9979d3125 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 29 Jul 2013 19:50:33 +0200
Subject: tracing: trace_remove_event_call() should fail if call/file is in use

Change trace_remove_event_call(call) to return the error if this
call is active. This is what the callers assume but can't verify
outside of the tracing locks. Both trace_kprobe.c/trace_uprobe.c
need the additional changes, unregister_trace_probe() should abort
if trace_remove_event_call() fails.

The caller is going to free this call/file so we must ensure that
nobody can use them after trace_remove_event_call() succeeds.
debugfs should be fine after the previous changes and event_remove()
does TRACE_REG_UNREGISTER, but still there are 2 reasons why we need
the additional checks:

- There could be a perf_event(s) attached to this tp_event, so the
  patch checks ->perf_refcount.

- TRACE_REG_UNREGISTER can be suppressed by FTRACE_EVENT_FL_SOFT_MODE,
  so we simply check FTRACE_EVENT_FL_ENABLED protected by event_mutex.

Link: http://lkml.kernel.org/r/20130729175033.GB26284@redhat.com

Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h |  2 +-
 kernel/trace/trace_events.c  | 35 +++++++++++++++++++++++++++++++++--
 2 files changed, 34 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 4372658c73ae..f98ab063e95e 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -332,7 +332,7 @@ extern int trace_define_field(struct ftrace_event_call *call, const char *type,
 			      const char *name, int offset, int size,
 			      int is_signed, int filter_type);
 extern int trace_add_event_call(struct ftrace_event_call *call);
-extern void trace_remove_event_call(struct ftrace_event_call *call);
+extern int trace_remove_event_call(struct ftrace_event_call *call);
 
 #define is_signed_type(type)	(((type)(-1)) < (type)1)
 
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index a67c913e2f9f..ec04836273c0 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1713,16 +1713,47 @@ static void __trace_remove_event_call(struct ftrace_event_call *call)
 	destroy_preds(call);
 }
 
+static int probe_remove_event_call(struct ftrace_event_call *call)
+{
+	struct trace_array *tr;
+	struct ftrace_event_file *file;
+
+#ifdef CONFIG_PERF_EVENTS
+	if (call->perf_refcount)
+		return -EBUSY;
+#endif
+	do_for_each_event_file(tr, file) {
+		if (file->event_call != call)
+			continue;
+		/*
+		 * We can't rely on ftrace_event_enable_disable(enable => 0)
+		 * we are going to do, FTRACE_EVENT_FL_SOFT_MODE can suppress
+		 * TRACE_REG_UNREGISTER.
+		 */
+		if (file->flags & FTRACE_EVENT_FL_ENABLED)
+			return -EBUSY;
+		break;
+	} while_for_each_event_file();
+
+	__trace_remove_event_call(call);
+
+	return 0;
+}
+
 /* Remove an event_call */
-void trace_remove_event_call(struct ftrace_event_call *call)
+int trace_remove_event_call(struct ftrace_event_call *call)
 {
+	int ret;
+
 	mutex_lock(&trace_types_lock);
 	mutex_lock(&event_mutex);
 	down_write(&trace_event_sem);
-	__trace_remove_event_call(call);
+	ret = probe_remove_event_call(call);
 	up_write(&trace_event_sem);
 	mutex_unlock(&event_mutex);
 	mutex_unlock(&trace_types_lock);
+
+	return ret;
 }
 
 #define for_each_event(event, start, end)			\
-- 
cgit v1.2.3-59-g8ed1b


From cd23b14b654769db83c9684ae1ba32c0e066670f Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@dev.mellanox.co.il>
Date: Thu, 18 Jul 2013 15:31:08 +0300
Subject: mlx5_core: Implement new initialization sequence

Introduce enbale_hca and disable_hca commands to signify when the
driver starts or ceases to operate on the device.

In addition the driver will use boot and init pages count; boot pages
is required to allow firmware to complete boot commands and the other
to complete init hca.  Command interface revision is bumped to 4 to
enforce using supported firmware.

This patch breaks compatibility with old versions of firmware (< 4);
however, the first GA firmware we will publish will support version 4
so this should not be a problem.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c      |  8 ++-
 drivers/net/ethernet/mellanox/mlx5/core/main.c     | 69 +++++++++++++++++++---
 .../net/ethernet/mellanox/mlx5/core/pagealloc.c    | 20 +++++--
 include/linux/mlx5/device.h                        | 20 +++++++
 include/linux/mlx5/driver.h                        |  4 +-
 5 files changed, 106 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 40374063c01e..c571de85d0f9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -46,7 +46,7 @@
 #include "mlx5_core.h"
 
 enum {
-	CMD_IF_REV = 3,
+	CMD_IF_REV = 4,
 };
 
 enum {
@@ -282,6 +282,12 @@ const char *mlx5_command_str(int command)
 	case MLX5_CMD_OP_TEARDOWN_HCA:
 		return "TEARDOWN_HCA";
 
+	case MLX5_CMD_OP_ENABLE_HCA:
+		return "MLX5_CMD_OP_ENABLE_HCA";
+
+	case MLX5_CMD_OP_DISABLE_HCA:
+		return "MLX5_CMD_OP_DISABLE_HCA";
+
 	case MLX5_CMD_OP_QUERY_PAGES:
 		return "QUERY_PAGES";
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 12242de2b0e3..b47739b0b5f6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -249,6 +249,44 @@ static int set_hca_ctrl(struct mlx5_core_dev *dev)
 	return err;
 }
 
+static int mlx5_core_enable_hca(struct mlx5_core_dev *dev)
+{
+	int err;
+	struct mlx5_enable_hca_mbox_in in;
+	struct mlx5_enable_hca_mbox_out out;
+
+	memset(&in, 0, sizeof(in));
+	memset(&out, 0, sizeof(out));
+	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ENABLE_HCA);
+	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+	if (err)
+		return err;
+
+	if (out.hdr.status)
+		return mlx5_cmd_status_to_err(&out.hdr);
+
+	return 0;
+}
+
+static int mlx5_core_disable_hca(struct mlx5_core_dev *dev)
+{
+	int err;
+	struct mlx5_disable_hca_mbox_in in;
+	struct mlx5_disable_hca_mbox_out out;
+
+	memset(&in, 0, sizeof(in));
+	memset(&out, 0, sizeof(out));
+	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DISABLE_HCA);
+	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+	if (err)
+		return err;
+
+	if (out.hdr.status)
+		return mlx5_cmd_status_to_err(&out.hdr);
+
+	return 0;
+}
+
 int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev)
 {
 	struct mlx5_priv *priv = &dev->priv;
@@ -304,28 +342,41 @@ int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev)
 	}
 
 	mlx5_pagealloc_init(dev);
+
+	err = mlx5_core_enable_hca(dev);
+	if (err) {
+		dev_err(&pdev->dev, "enable hca failed\n");
+		goto err_pagealloc_cleanup;
+	}
+
+	err = mlx5_satisfy_startup_pages(dev, 1);
+	if (err) {
+		dev_err(&pdev->dev, "failed to allocate boot pages\n");
+		goto err_disable_hca;
+	}
+
 	err = set_hca_ctrl(dev);
 	if (err) {
 		dev_err(&pdev->dev, "set_hca_ctrl failed\n");
-		goto err_pagealloc_cleanup;
+		goto reclaim_boot_pages;
 	}
 
 	err = handle_hca_cap(dev);
 	if (err) {
 		dev_err(&pdev->dev, "handle_hca_cap failed\n");
-		goto err_pagealloc_cleanup;
+		goto reclaim_boot_pages;
 	}
 
-	err = mlx5_satisfy_startup_pages(dev);
+	err = mlx5_satisfy_startup_pages(dev, 0);
 	if (err) {
-		dev_err(&pdev->dev, "failed to allocate startup pages\n");
-		goto err_pagealloc_cleanup;
+		dev_err(&pdev->dev, "failed to allocate init pages\n");
+		goto reclaim_boot_pages;
 	}
 
 	err = mlx5_pagealloc_start(dev);
 	if (err) {
 		dev_err(&pdev->dev, "mlx5_pagealloc_start failed\n");
-		goto err_reclaim_pages;
+		goto reclaim_boot_pages;
 	}
 
 	err = mlx5_cmd_init_hca(dev);
@@ -396,9 +447,12 @@ err_stop_poll:
 err_pagealloc_stop:
 	mlx5_pagealloc_stop(dev);
 
-err_reclaim_pages:
+reclaim_boot_pages:
 	mlx5_reclaim_startup_pages(dev);
 
+err_disable_hca:
+	mlx5_core_disable_hca(dev);
+
 err_pagealloc_cleanup:
 	mlx5_pagealloc_cleanup(dev);
 	mlx5_cmd_cleanup(dev);
@@ -434,6 +488,7 @@ void mlx5_dev_cleanup(struct mlx5_core_dev *dev)
 	mlx5_cmd_teardown_hca(dev);
 	mlx5_pagealloc_stop(dev);
 	mlx5_reclaim_startup_pages(dev);
+	mlx5_core_disable_hca(dev);
 	mlx5_pagealloc_cleanup(dev);
 	mlx5_cmd_cleanup(dev);
 	iounmap(dev->iseg);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index f0bf46339b28..4a3e137931a3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -64,7 +64,7 @@ struct mlx5_query_pages_inbox {
 
 struct mlx5_query_pages_outbox {
 	struct mlx5_outbox_hdr	hdr;
-	u8			reserved[2];
+	__be16			num_boot_pages;
 	__be16			func_id;
 	__be16			init_pages;
 	__be16			num_pages;
@@ -146,7 +146,7 @@ static struct page *remove_page(struct mlx5_core_dev *dev, u64 addr)
 }
 
 static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
-				s16 *pages, s16 *init_pages)
+				s16 *pages, s16 *init_pages, u16 *boot_pages)
 {
 	struct mlx5_query_pages_inbox	in;
 	struct mlx5_query_pages_outbox	out;
@@ -164,8 +164,13 @@ static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
 
 	if (pages)
 		*pages = be16_to_cpu(out.num_pages);
+
 	if (init_pages)
 		*init_pages = be16_to_cpu(out.init_pages);
+
+	if (boot_pages)
+		*boot_pages = be16_to_cpu(out.num_boot_pages);
+
 	*func_id = be16_to_cpu(out.func_id);
 
 	return err;
@@ -357,19 +362,22 @@ void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
 	queue_work(dev->priv.pg_wq, &req->work);
 }
 
-int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev)
+int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot)
 {
+	u16 uninitialized_var(boot_pages);
 	s16 uninitialized_var(init_pages);
 	u16 uninitialized_var(func_id);
 	int err;
 
-	err = mlx5_cmd_query_pages(dev, &func_id, NULL, &init_pages);
+	err = mlx5_cmd_query_pages(dev, &func_id, NULL, &init_pages,
+				   &boot_pages);
 	if (err)
 		return err;
 
-	mlx5_core_dbg(dev, "requested %d init pages for func_id 0x%x\n", init_pages, func_id);
 
-	return give_pages(dev, func_id, init_pages, 0);
+	mlx5_core_dbg(dev, "requested %d init pages and %d boot pages for func_id 0x%x\n",
+		      init_pages, boot_pages, func_id);
+	return give_pages(dev, func_id, boot ? boot_pages : init_pages, 0);
 }
 
 static int optimal_reclaimed_pages(void)
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 8de8d8f22384..737685e9e852 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -690,6 +690,26 @@ struct mlx5_query_cq_mbox_out {
 	__be64			pas[0];
 };
 
+struct mlx5_enable_hca_mbox_in {
+	struct mlx5_inbox_hdr	hdr;
+	u8			rsvd[8];
+};
+
+struct mlx5_enable_hca_mbox_out {
+	struct mlx5_outbox_hdr	hdr;
+	u8			rsvd[8];
+};
+
+struct mlx5_disable_hca_mbox_in {
+	struct mlx5_inbox_hdr	hdr;
+	u8			rsvd[8];
+};
+
+struct mlx5_disable_hca_mbox_out {
+	struct mlx5_outbox_hdr	hdr;
+	u8			rsvd[8];
+};
+
 struct mlx5_eq_context {
 	u8			status;
 	u8			ec_oi;
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index f22e4419839b..2aa258b0ced1 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -101,6 +101,8 @@ enum {
 	MLX5_CMD_OP_QUERY_ADAPTER		= 0x101,
 	MLX5_CMD_OP_INIT_HCA			= 0x102,
 	MLX5_CMD_OP_TEARDOWN_HCA		= 0x103,
+	MLX5_CMD_OP_ENABLE_HCA			= 0x104,
+	MLX5_CMD_OP_DISABLE_HCA			= 0x105,
 	MLX5_CMD_OP_QUERY_PAGES			= 0x107,
 	MLX5_CMD_OP_MANAGE_PAGES		= 0x108,
 	MLX5_CMD_OP_SET_HCA_CAP			= 0x109,
@@ -690,7 +692,7 @@ int mlx5_pagealloc_start(struct mlx5_core_dev *dev);
 void mlx5_pagealloc_stop(struct mlx5_core_dev *dev);
 void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
 				 s16 npages);
-int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev);
+int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot);
 int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev);
 void mlx5_register_debugfs(void);
 void mlx5_unregister_debugfs(void);
-- 
cgit v1.2.3-59-g8ed1b


From 22f2020f84c6da2dd0acb2dce12e39e59ff7c8be Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.cz>
Date: Wed, 31 Jul 2013 13:53:48 -0700
Subject: vmpressure: change vmpressure::sr_lock to spinlock

There is nothing that can sleep inside critical sections protected by
this lock and those sections are really small so there doesn't make much
sense to use mutex for them.  Change the log to a spinlock

Signed-off-by: Michal Hocko <mhocko@suse.cz>
Reported-by: Tejun Heo <tj@kernel.org>
Cc: Anton Vorontsov <anton.vorontsov@linaro.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Li Zefan <lizefan@huawei.com>
Reviewed-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vmpressure.h |  2 +-
 mm/vmpressure.c            | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/vmpressure.h b/include/linux/vmpressure.h
index 76be077340ea..2081680e015d 100644
--- a/include/linux/vmpressure.h
+++ b/include/linux/vmpressure.h
@@ -12,7 +12,7 @@ struct vmpressure {
 	unsigned long scanned;
 	unsigned long reclaimed;
 	/* The lock is used to keep the scanned/reclaimed above in sync. */
-	struct mutex sr_lock;
+	struct spinlock sr_lock;
 
 	/* The list of vmpressure_event structs. */
 	struct list_head events;
diff --git a/mm/vmpressure.c b/mm/vmpressure.c
index 736a6011c2c8..f4ee6a190a4d 100644
--- a/mm/vmpressure.c
+++ b/mm/vmpressure.c
@@ -180,12 +180,12 @@ static void vmpressure_work_fn(struct work_struct *work)
 	if (!vmpr->scanned)
 		return;
 
-	mutex_lock(&vmpr->sr_lock);
+	spin_lock(&vmpr->sr_lock);
 	scanned = vmpr->scanned;
 	reclaimed = vmpr->reclaimed;
 	vmpr->scanned = 0;
 	vmpr->reclaimed = 0;
-	mutex_unlock(&vmpr->sr_lock);
+	spin_unlock(&vmpr->sr_lock);
 
 	do {
 		if (vmpressure_event(vmpr, scanned, reclaimed))
@@ -240,11 +240,11 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,
 	if (!scanned)
 		return;
 
-	mutex_lock(&vmpr->sr_lock);
+	spin_lock(&vmpr->sr_lock);
 	vmpr->scanned += scanned;
 	vmpr->reclaimed += reclaimed;
 	scanned = vmpr->scanned;
-	mutex_unlock(&vmpr->sr_lock);
+	spin_unlock(&vmpr->sr_lock);
 
 	if (scanned < vmpressure_win || work_pending(&vmpr->work))
 		return;
@@ -367,7 +367,7 @@ void vmpressure_unregister_event(struct cgroup *cg, struct cftype *cft,
  */
 void vmpressure_init(struct vmpressure *vmpr)
 {
-	mutex_init(&vmpr->sr_lock);
+	spin_lock_init(&vmpr->sr_lock);
 	mutex_init(&vmpr->events_lock);
 	INIT_LIST_HEAD(&vmpr->events);
 	INIT_WORK(&vmpr->work, vmpressure_work_fn);
-- 
cgit v1.2.3-59-g8ed1b


From 33cb876e947b9ddda8dca3fb99234b743a597ef9 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.cz>
Date: Wed, 31 Jul 2013 13:53:51 -0700
Subject: vmpressure: make sure there are no events queued after memcg is
 offlined

vmpressure is called synchronously from reclaim where the target_memcg
is guaranteed to be alive but the eventfd is signaled from the work
queue context.  This means that memcg (along with vmpressure structure
which is embedded into it) might go away while the work item is pending
which would result in use-after-release bug.

We have two possible ways how to fix this.  Either vmpressure pins memcg
before it schedules vmpr->work and unpin it in vmpressure_work_fn or
explicitely flush the work item from the css_offline context (as
suggested by Tejun).

This patch implements the later one and it introduces vmpressure_cleanup
which flushes the vmpressure work queue item item.  It hooks into
mem_cgroup_css_offline after the memcg itself is cleaned up.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Michal Hocko <mhocko@suse.cz>
Reported-by: Tejun Heo <tj@kernel.org>
Cc: Anton Vorontsov <anton.vorontsov@linaro.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Li Zefan <lizefan@huawei.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vmpressure.h |  1 +
 mm/memcontrol.c            |  1 +
 mm/vmpressure.c            | 16 ++++++++++++++++
 3 files changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/linux/vmpressure.h b/include/linux/vmpressure.h
index 2081680e015d..7dc17e2456de 100644
--- a/include/linux/vmpressure.h
+++ b/include/linux/vmpressure.h
@@ -30,6 +30,7 @@ extern void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,
 extern void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio);
 
 extern void vmpressure_init(struct vmpressure *vmpr);
+extern void vmpressure_cleanup(struct vmpressure *vmpr);
 extern struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg);
 extern struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr);
 extern struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 00a7a664b9c1..c290a1cf3862 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6335,6 +6335,7 @@ static void mem_cgroup_css_offline(struct cgroup *cont)
 	mem_cgroup_invalidate_reclaim_iterators(memcg);
 	mem_cgroup_reparent_charges(memcg);
 	mem_cgroup_destroy_all_caches(memcg);
+	vmpressure_cleanup(&memcg->vmpressure);
 }
 
 static void mem_cgroup_css_free(struct cgroup *cont)
diff --git a/mm/vmpressure.c b/mm/vmpressure.c
index 192f9731931d..0c1e37d829fa 100644
--- a/mm/vmpressure.c
+++ b/mm/vmpressure.c
@@ -372,3 +372,19 @@ void vmpressure_init(struct vmpressure *vmpr)
 	INIT_LIST_HEAD(&vmpr->events);
 	INIT_WORK(&vmpr->work, vmpressure_work_fn);
 }
+
+/**
+ * vmpressure_cleanup() - shuts down vmpressure control structure
+ * @vmpr:	Structure to be cleaned up
+ *
+ * This function should be called before the structure in which it is
+ * embedded is cleaned up.
+ */
+void vmpressure_cleanup(struct vmpressure *vmpr)
+{
+	/*
+	 * Make sure there is no pending work before eventfd infrastructure
+	 * goes away.
+	 */
+	flush_work(&vmpr->work);
+}
-- 
cgit v1.2.3-59-g8ed1b


From d9d10a30964504af834d8d250a0c76d4ae91eb1e Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 30 Jul 2013 10:31:00 -0700
Subject: ndisc: Add missing inline to ndisc_addr_option_pad

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ndisc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index 949d77528f2f..6fea32340ae8 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -119,7 +119,7 @@ extern struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
  * if RFC 3831 IPv6-over-Fibre Channel is ever implemented it may
  * also need a pad of 2.
  */
-static int ndisc_addr_option_pad(unsigned short type)
+static inline int ndisc_addr_option_pad(unsigned short type)
 {
 	switch (type) {
 	case ARPHRD_INFINIBAND: return 2;
-- 
cgit v1.2.3-59-g8ed1b


From 2ac3ac8f86f2fe065d746d9a9abaca867adec577 Mon Sep 17 00:00:00 2001
From: Michal Kubeček <mkubecek@suse.cz>
Date: Thu, 1 Aug 2013 10:04:14 +0200
Subject: ipv6: prevent fib6_run_gc() contention

On a high-traffic router with many processors and many IPv6 dst
entries, soft lockup in fib6_run_gc() can occur when number of
entries reaches gc_thresh.

This happens because fib6_run_gc() uses fib6_gc_lock to allow
only one thread to run the garbage collector but ip6_dst_gc()
doesn't update net->ipv6.ip6_rt_last_gc until fib6_run_gc()
returns. On a system with many entries, this can take some time
so that in the meantime, other threads pass the tests in
ip6_dst_gc() (ip6_rt_last_gc is still not updated) and wait for
the lock. They then have to run the garbage collector one after
another which blocks them for quite long.

Resolve this by replacing special value ~0UL of expire parameter
to fib6_run_gc() by explicit "force" parameter to choose between
spin_lock_bh() and spin_trylock_bh() and call fib6_run_gc() with
force=false if gc_thresh is reached but not max_size.

Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h |  2 +-
 net/ipv6/ip6_fib.c    | 19 ++++++++-----------
 net/ipv6/ndisc.c      |  4 ++--
 net/ipv6/route.c      |  4 ++--
 4 files changed, 13 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 2a601e7da1bf..48ec25a7fcb6 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -300,7 +300,7 @@ extern void			inet6_rt_notify(int event, struct rt6_info *rt,
 						struct nl_info *info);
 
 extern void			fib6_run_gc(unsigned long expires,
-					    struct net *net);
+					    struct net *net, bool force);
 
 extern void			fib6_gc_cleanup(void);
 
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 5fc9c7a68d8d..d872553ca933 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1632,19 +1632,16 @@ static int fib6_age(struct rt6_info *rt, void *arg)
 
 static DEFINE_SPINLOCK(fib6_gc_lock);
 
-void fib6_run_gc(unsigned long expires, struct net *net)
+void fib6_run_gc(unsigned long expires, struct net *net, bool force)
 {
-	if (expires != ~0UL) {
+	if (force) {
 		spin_lock_bh(&fib6_gc_lock);
-		gc_args.timeout = expires ? (int)expires :
-			net->ipv6.sysctl.ip6_rt_gc_interval;
-	} else {
-		if (!spin_trylock_bh(&fib6_gc_lock)) {
-			mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ);
-			return;
-		}
-		gc_args.timeout = net->ipv6.sysctl.ip6_rt_gc_interval;
+	} else if (!spin_trylock_bh(&fib6_gc_lock)) {
+		mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ);
+		return;
 	}
+	gc_args.timeout = expires ? (int)expires :
+			  net->ipv6.sysctl.ip6_rt_gc_interval;
 
 	gc_args.more = icmp6_dst_gc();
 
@@ -1661,7 +1658,7 @@ void fib6_run_gc(unsigned long expires, struct net *net)
 
 static void fib6_gc_timer_cb(unsigned long arg)
 {
-	fib6_run_gc(0, (struct net *)arg);
+	fib6_run_gc(0, (struct net *)arg, true);
 }
 
 static int __net_init fib6_net_init(struct net *net)
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 24c03396e008..79aa9652ed86 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1576,7 +1576,7 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
 	switch (event) {
 	case NETDEV_CHANGEADDR:
 		neigh_changeaddr(&nd_tbl, dev);
-		fib6_run_gc(~0UL, net);
+		fib6_run_gc(0, net, false);
 		idev = in6_dev_get(dev);
 		if (!idev)
 			break;
@@ -1586,7 +1586,7 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
 		break;
 	case NETDEV_DOWN:
 		neigh_ifdown(&nd_tbl, dev);
-		fib6_run_gc(~0UL, net);
+		fib6_run_gc(0, net, false);
 		break;
 	case NETDEV_NOTIFY_PEERS:
 		ndisc_send_unsol_na(dev);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a8c891aa2464..824c424f9648 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1326,7 +1326,7 @@ static int ip6_dst_gc(struct dst_ops *ops)
 		goto out;
 
 	net->ipv6.ip6_rt_gc_expire++;
-	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
+	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size);
 	net->ipv6.ip6_rt_last_gc = now;
 	entries = dst_entries_get_slow(ops);
 	if (entries < ops->gc_thresh)
@@ -2827,7 +2827,7 @@ int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
 	net = (struct net *)ctl->extra1;
 	delay = net->ipv6.sysctl.flush_delay;
 	proc_dointvec(ctl, write, buffer, lenp, ppos);
-	fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
+	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
 	return 0;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From dfcefb0be1231982784df2152213103ad33c1cfd Mon Sep 17 00:00:00 2001
From: Cong Wang <amwang@redhat.com>
Date: Thu, 1 Aug 2013 11:10:24 +0800
Subject: net: fix a compile error when CONFIG_NET_LL_RX_POLL is not set
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When CONFIG_NET_LL_RX_POLL is not set, I got:

net/socket.c: In function ‘sock_poll’:
net/socket.c:1165:4: error: implicit declaration of function ‘sk_busy_loop’ [-Werror=implicit-function-declaration]

Fix this by adding a nop when !CONFIG_NET_LL_RX_POLL.

Cc: Eliezer Tamir <eliezer.tamir@linux.intel.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/busy_poll.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index a14339c2985f..6cd8848fec68 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -181,5 +181,10 @@ static inline bool busy_loop_timeout(unsigned long end_time)
 	return true;
 }
 
+static inline bool sk_busy_loop(struct sock *sk, int nonblock)
+{
+	return false;
+}
+
 #endif /* CONFIG_NET_LL_RX_POLL */
 #endif /* _LINUX_NET_BUSY_POLL_H */
-- 
cgit v1.2.3-59-g8ed1b


From e0d1095ae3405404d247afb00233ef837d58da83 Mon Sep 17 00:00:00 2001
From: Cong Wang <amwang@redhat.com>
Date: Thu, 1 Aug 2013 11:10:25 +0800
Subject: net: rename CONFIG_NET_LL_RX_POLL to CONFIG_NET_RX_BUSY_POLL

Eliezer renames several *ll_poll to *busy_poll, but forgets
CONFIG_NET_LL_RX_POLL, so in case of confusion, rename it too.

Cc: Eliezer Tamir <eliezer.tamir@linux.intel.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/sysctl/net.txt                     |  4 ++--
 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h      |  8 ++++----
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c  |  2 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c |  2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe.h         | 12 ++++++------
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c    |  6 +++---
 drivers/net/ethernet/mellanox/mlx4/en_ethtool.c  |  6 +++---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c   |  6 +++---
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h     | 10 +++++-----
 include/linux/netdevice.h                        |  2 +-
 include/linux/skbuff.h                           |  2 +-
 include/net/busy_poll.h                          |  6 +++---
 include/net/sock.h                               |  2 +-
 net/Kconfig                                      |  2 +-
 net/core/skbuff.c                                |  2 +-
 net/core/sock.c                                  |  6 +++---
 net/core/sysctl_net_core.c                       |  2 +-
 net/socket.c                                     |  2 +-
 18 files changed, 41 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
index 1c15043aaee4..d569f2a424d5 100644
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@@ -52,7 +52,7 @@ Default: 64
 
 busy_read
 ----------------
-Low latency busy poll timeout for socket reads. (needs CONFIG_NET_LL_RX_POLL)
+Low latency busy poll timeout for socket reads. (needs CONFIG_NET_RX_BUSY_POLL)
 Approximate time in us to busy loop waiting for packets on the device queue.
 This sets the default value of the SO_BUSY_POLL socket option.
 Can be set or overridden per socket by setting socket option SO_BUSY_POLL,
@@ -63,7 +63,7 @@ Default: 0 (off)
 
 busy_poll
 ----------------
-Low latency busy poll timeout for poll and select. (needs CONFIG_NET_LL_RX_POLL)
+Low latency busy poll timeout for poll and select. (needs CONFIG_NET_RX_BUSY_POLL)
 Approximate time in us to busy loop waiting for events.
 Recommended value depends on the number of sockets you poll on.
 For several sockets 50, for several hundreds 100.
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index dedbd76c033e..d80e34b8285f 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -486,7 +486,7 @@ struct bnx2x_fastpath {
 
 	struct napi_struct	napi;
 
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 	unsigned int state;
 #define BNX2X_FP_STATE_IDLE		      0
 #define BNX2X_FP_STATE_NAPI		(1 << 0)    /* NAPI owns this FP */
@@ -498,7 +498,7 @@ struct bnx2x_fastpath {
 #define BNX2X_FP_USER_PEND (BNX2X_FP_STATE_POLL | BNX2X_FP_STATE_POLL_YIELD)
 	/* protect state */
 	spinlock_t lock;
-#endif /* CONFIG_NET_LL_RX_POLL */
+#endif /* CONFIG_NET_RX_BUSY_POLL */
 
 	union host_hc_status_block	status_blk;
 	/* chip independent shortcuts into sb structure */
@@ -572,7 +572,7 @@ struct bnx2x_fastpath {
 #define bnx2x_fp_stats(bp, fp)	(&((bp)->fp_stats[(fp)->index]))
 #define bnx2x_fp_qstats(bp, fp)	(&((bp)->fp_stats[(fp)->index].eth_q_stats))
 
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 static inline void bnx2x_fp_init_lock(struct bnx2x_fastpath *fp)
 {
 	spin_lock_init(&fp->lock);
@@ -680,7 +680,7 @@ static inline bool bnx2x_fp_ll_polling(struct bnx2x_fastpath *fp)
 {
 	return false;
 }
-#endif /* CONFIG_NET_LL_RX_POLL */
+#endif /* CONFIG_NET_RX_BUSY_POLL */
 
 /* Use 2500 as a mini-jumbo MTU for FCoE */
 #define BNX2X_FCOE_MINI_JUMBO_MTU	2500
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index ee350bde1818..f2d1ff10054b 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -3117,7 +3117,7 @@ int bnx2x_poll(struct napi_struct *napi, int budget)
 	return work_done;
 }
 
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 /* must be called with local_bh_disable()d */
 int bnx2x_low_latency_recv(struct napi_struct *napi)
 {
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index e5da07858a2f..e06186c305d8 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -12026,7 +12026,7 @@ static const struct net_device_ops bnx2x_netdev_ops = {
 	.ndo_fcoe_get_wwn	= bnx2x_fcoe_get_wwn,
 #endif
 
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 	.ndo_busy_poll		= bnx2x_low_latency_recv,
 #endif
 };
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 7be725cdfea8..a6494e5daffe 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -54,7 +54,7 @@
 
 #include <net/busy_poll.h>
 
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 #define LL_EXTENDED_STATS
 #endif
 /* common prefix used by pr_<> macros */
@@ -366,7 +366,7 @@ struct ixgbe_q_vector {
 	struct rcu_head rcu;	/* to avoid race with update stats on free */
 	char name[IFNAMSIZ + 9];
 
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 	unsigned int state;
 #define IXGBE_QV_STATE_IDLE        0
 #define IXGBE_QV_STATE_NAPI	   1    /* NAPI owns this QV */
@@ -377,12 +377,12 @@ struct ixgbe_q_vector {
 #define IXGBE_QV_YIELD (IXGBE_QV_STATE_NAPI_YIELD | IXGBE_QV_STATE_POLL_YIELD)
 #define IXGBE_QV_USER_PEND (IXGBE_QV_STATE_POLL | IXGBE_QV_STATE_POLL_YIELD)
 	spinlock_t lock;
-#endif  /* CONFIG_NET_LL_RX_POLL */
+#endif  /* CONFIG_NET_RX_BUSY_POLL */
 
 	/* for dynamic allocation of rings associated with this q_vector */
 	struct ixgbe_ring ring[0] ____cacheline_internodealigned_in_smp;
 };
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 static inline void ixgbe_qv_init_lock(struct ixgbe_q_vector *q_vector)
 {
 
@@ -462,7 +462,7 @@ static inline bool ixgbe_qv_ll_polling(struct ixgbe_q_vector *q_vector)
 	WARN_ON(!(q_vector->state & IXGBE_QV_LOCKED));
 	return q_vector->state & IXGBE_QV_USER_PEND;
 }
-#else /* CONFIG_NET_LL_RX_POLL */
+#else /* CONFIG_NET_RX_BUSY_POLL */
 static inline void ixgbe_qv_init_lock(struct ixgbe_q_vector *q_vector)
 {
 }
@@ -491,7 +491,7 @@ static inline bool ixgbe_qv_ll_polling(struct ixgbe_q_vector *q_vector)
 {
 	return false;
 }
-#endif /* CONFIG_NET_LL_RX_POLL */
+#endif /* CONFIG_NET_RX_BUSY_POLL */
 
 #ifdef CONFIG_IXGBE_HWMON
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index bad8f14b1941..be4b1fb3d0d2 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -1998,7 +1998,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
 	return total_rx_packets;
 }
 
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 /* must be called with local_bh_disable()d */
 static int ixgbe_low_latency_recv(struct napi_struct *napi)
 {
@@ -2030,7 +2030,7 @@ static int ixgbe_low_latency_recv(struct napi_struct *napi)
 
 	return found;
 }
-#endif	/* CONFIG_NET_LL_RX_POLL */
+#endif	/* CONFIG_NET_RX_BUSY_POLL */
 
 /**
  * ixgbe_configure_msix - Configure MSI-X hardware
@@ -7227,7 +7227,7 @@ static const struct net_device_ops ixgbe_netdev_ops = {
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= ixgbe_netpoll,
 #endif
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 	.ndo_busy_poll		= ixgbe_low_latency_recv,
 #endif
 #ifdef IXGBE_FCOE
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index 727874f575ce..a28cd801a236 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -223,7 +223,7 @@ static int mlx4_en_get_sset_count(struct net_device *dev, int sset)
 	case ETH_SS_STATS:
 		return (priv->stats_bitmap ? bit_count : NUM_ALL_STATS) +
 			(priv->tx_ring_num * 2) +
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 			(priv->rx_ring_num * 5);
 #else
 			(priv->rx_ring_num * 2);
@@ -276,7 +276,7 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev,
 	for (i = 0; i < priv->rx_ring_num; i++) {
 		data[index++] = priv->rx_ring[i].packets;
 		data[index++] = priv->rx_ring[i].bytes;
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 		data[index++] = priv->rx_ring[i].yields;
 		data[index++] = priv->rx_ring[i].misses;
 		data[index++] = priv->rx_ring[i].cleaned;
@@ -344,7 +344,7 @@ static void mlx4_en_get_strings(struct net_device *dev,
 				"rx%d_packets", i);
 			sprintf(data + (index++) * ETH_GSTRING_LEN,
 				"rx%d_bytes", i);
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 			sprintf(data + (index++) * ETH_GSTRING_LEN,
 				"rx%d_napi_yield", i);
 			sprintf(data + (index++) * ETH_GSTRING_LEN,
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 5eac871399d8..fa37b7a61213 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -68,7 +68,7 @@ int mlx4_en_setup_tc(struct net_device *dev, u8 up)
 	return 0;
 }
 
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 /* must be called with local_bh_disable()d */
 static int mlx4_en_low_latency_recv(struct napi_struct *napi)
 {
@@ -94,7 +94,7 @@ static int mlx4_en_low_latency_recv(struct napi_struct *napi)
 
 	return done;
 }
-#endif	/* CONFIG_NET_LL_RX_POLL */
+#endif	/* CONFIG_NET_RX_BUSY_POLL */
 
 #ifdef CONFIG_RFS_ACCEL
 
@@ -2140,7 +2140,7 @@ static const struct net_device_ops mlx4_netdev_ops = {
 #ifdef CONFIG_RFS_ACCEL
 	.ndo_rx_flow_steer	= mlx4_en_filter_rfs,
 #endif
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 	.ndo_busy_poll		= mlx4_en_low_latency_recv,
 #endif
 };
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 35fb60e2320c..5e0aa569306a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -292,7 +292,7 @@ struct mlx4_en_rx_ring {
 	void *rx_info;
 	unsigned long bytes;
 	unsigned long packets;
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 	unsigned long yields;
 	unsigned long misses;
 	unsigned long cleaned;
@@ -318,7 +318,7 @@ struct mlx4_en_cq {
 	struct mlx4_cqe *buf;
 #define MLX4_EN_OPCODE_ERROR	0x1e
 
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 	unsigned int state;
 #define MLX4_EN_CQ_STATE_IDLE        0
 #define MLX4_EN_CQ_STATE_NAPI     1    /* NAPI owns this CQ */
@@ -329,7 +329,7 @@ struct mlx4_en_cq {
 #define CQ_YIELD (MLX4_EN_CQ_STATE_NAPI_YIELD | MLX4_EN_CQ_STATE_POLL_YIELD)
 #define CQ_USER_PEND (MLX4_EN_CQ_STATE_POLL | MLX4_EN_CQ_STATE_POLL_YIELD)
 	spinlock_t poll_lock; /* protects from LLS/napi conflicts */
-#endif  /* CONFIG_NET_LL_RX_POLL */
+#endif  /* CONFIG_NET_RX_BUSY_POLL */
 };
 
 struct mlx4_en_port_profile {
@@ -580,7 +580,7 @@ struct mlx4_mac_entry {
 	struct rcu_head rcu;
 };
 
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 static inline void mlx4_en_cq_init_lock(struct mlx4_en_cq *cq)
 {
 	spin_lock_init(&cq->poll_lock);
@@ -687,7 +687,7 @@ static inline bool mlx4_en_cq_ll_polling(struct mlx4_en_cq *cq)
 {
 	return false;
 }
-#endif /* CONFIG_NET_LL_RX_POLL */
+#endif /* CONFIG_NET_RX_BUSY_POLL */
 
 #define MLX4_EN_WOL_DO_MODIFY (1ULL << 63)
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0741a1e919a5..9a4156845e93 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -973,7 +973,7 @@ struct net_device_ops {
 						     gfp_t gfp);
 	void			(*ndo_netpoll_cleanup)(struct net_device *dev);
 #endif
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 	int			(*ndo_busy_poll)(struct napi_struct *dev);
 #endif
 	int			(*ndo_set_vf_mac)(struct net_device *dev,
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 5afefa01a13c..3b71a4e83642 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -501,7 +501,7 @@ struct sk_buff {
 	/* 7/9 bit hole (depending on ndisc_nodetype presence) */
 	kmemcheck_bitfield_end(flags2);
 
-#if defined CONFIG_NET_DMA || defined CONFIG_NET_LL_RX_POLL
+#if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL
 	union {
 		unsigned int	napi_id;
 		dma_cookie_t	dma_cookie;
diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index 6cd8848fec68..f18b91966d3d 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -27,7 +27,7 @@
 #include <linux/netdevice.h>
 #include <net/ip.h>
 
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 
 struct napi_struct;
 extern unsigned int sysctl_net_busy_read __read_mostly;
@@ -146,7 +146,7 @@ static inline void sk_mark_napi_id(struct sock *sk, struct sk_buff *skb)
 	sk->sk_napi_id = skb->napi_id;
 }
 
-#else /* CONFIG_NET_LL_RX_POLL */
+#else /* CONFIG_NET_RX_BUSY_POLL */
 static inline unsigned long net_busy_loop_on(void)
 {
 	return 0;
@@ -186,5 +186,5 @@ static inline bool sk_busy_loop(struct sock *sk, int nonblock)
 	return false;
 }
 
-#endif /* CONFIG_NET_LL_RX_POLL */
+#endif /* CONFIG_NET_RX_BUSY_POLL */
 #endif /* _LINUX_NET_BUSY_POLL_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index 95a5a2c6925a..31d5cfbb51ec 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -327,7 +327,7 @@ struct sock {
 #ifdef CONFIG_RPS
 	__u32			sk_rxhash;
 #endif
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 	unsigned int		sk_napi_id;
 	unsigned int		sk_ll_usec;
 #endif
diff --git a/net/Kconfig b/net/Kconfig
index 37702491abe9..2b406608a1a4 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -244,7 +244,7 @@ config NETPRIO_CGROUP
 	  Cgroup subsystem for use in assigning processes to network priorities on
 	  a per-interface basis
 
-config NET_LL_RX_POLL
+config NET_RX_BUSY_POLL
 	boolean
 	default y
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3df4d4ccf440..2c3d0f53d198 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -740,7 +740,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 
 	skb_copy_secmark(new, old);
 
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 	new->napi_id	= old->napi_id;
 #endif
 }
diff --git a/net/core/sock.c b/net/core/sock.c
index 548d716c5f62..2c097c5a35dd 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -900,7 +900,7 @@ set_rcvbuf:
 		sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
 		break;
 
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 	case SO_BUSY_POLL:
 		/* allow unprivileged users to decrease the value */
 		if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN))
@@ -1170,7 +1170,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
 		break;
 
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 	case SO_BUSY_POLL:
 		v.val = sk->sk_ll_usec;
 		break;
@@ -2292,7 +2292,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 
 	sk->sk_stamp = ktime_set(-1L, 0);
 
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 	sk->sk_napi_id		=	0;
 	sk->sk_ll_usec		=	sysctl_net_busy_read;
 #endif
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 660968616637..b59b6804fd98 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -298,7 +298,7 @@ static struct ctl_table net_core_table[] = {
 		.proc_handler	= flow_limit_table_len_sysctl
 	},
 #endif /* CONFIG_NET_FLOW_LIMIT */
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 	{
 		.procname	= "busy_poll",
 		.data		= &sysctl_net_busy_poll,
diff --git a/net/socket.c b/net/socket.c
index 829b460acb87..b2d7c629eeb9 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -106,7 +106,7 @@
 #include <linux/atalk.h>
 #include <net/busy_poll.h>
 
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 unsigned int sysctl_net_busy_read __read_mostly;
 unsigned int sysctl_net_busy_poll __read_mostly;
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From ed5467da0e369e65b247b99eb6403cb79172bcda Mon Sep 17 00:00:00 2001
From: Andrew Vagin <avagin@openvz.org>
Date: Fri, 2 Aug 2013 21:16:43 +0400
Subject: tracing: Fix fields of struct trace_iterator that are zeroed by
 mistake

tracing_read_pipe zeros all fields bellow "seq". The declaration contains
a comment about that, but it doesn't help.

The first field is "snapshot", it's true when current open file is
snapshot. Looks obvious, that it should not be zeroed.

The second field is "started". It was converted from cpumask_t to
cpumask_var_t (v2.6.28-4983-g4462344), in other words it was
converted from cpumask to pointer on cpumask.

Currently the reference on "started" memory is lost after the first read
from tracing_read_pipe and a proper object will never be freed.

The "started" is never dereferenced for trace_pipe, because trace_pipe
can't have the TRACE_FILE_ANNOTATE options.

Link: http://lkml.kernel.org/r/1375463803-3085183-1-git-send-email-avagin@openvz.org

Cc: stable@vger.kernel.org # 2.6.30
Signed-off-by: Andrew Vagin <avagin@openvz.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h | 10 ++++++----
 kernel/trace/trace.c         |  1 +
 2 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index f98ab063e95e..120d57a1c3a5 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -78,6 +78,11 @@ struct trace_iterator {
 	/* trace_seq for __print_flags() and __print_symbolic() etc. */
 	struct trace_seq	tmp_seq;
 
+	cpumask_var_t		started;
+
+	/* it's true when current open file is snapshot */
+	bool			snapshot;
+
 	/* The below is zeroed out in pipe_read */
 	struct trace_seq	seq;
 	struct trace_entry	*ent;
@@ -90,10 +95,7 @@ struct trace_iterator {
 	loff_t			pos;
 	long			idx;
 
-	cpumask_var_t		started;
-
-	/* it's true when current open file is snapshot */
-	bool			snapshot;
+	/* All new field here will be zeroed out in pipe_read */
 };
 
 enum trace_iter_flags {
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 882ec1dd1515..f5b35a5e852f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4151,6 +4151,7 @@ waitagain:
 	memset(&iter->seq, 0,
 	       sizeof(struct trace_iterator) -
 	       offsetof(struct trace_iterator, seq));
+	cpumask_clear(iter->started);
 	iter->pos = -1;
 
 	trace_event_read_lock();
-- 
cgit v1.2.3-59-g8ed1b


From e67bc51e574ffe3c4bc1e09cab7658b1e780b4ce Mon Sep 17 00:00:00 2001
From: Dhaval Giani <dhaval.giani@gmail.com>
Date: Fri, 2 Aug 2013 14:47:29 -0400
Subject: tracing: Fix trace_dump_stack() proto when CONFIG_TRACING is not set

When CONFIG_TRACING is not enabled, the stub prototype for trace_dump_stack()
is incorrect. It has (void) when it should be (int).

Link: http://lkml.kernel.org/r/CAPhKKr_H=ukFnBL4WgDOVT5ay2xeF-Ho+CA0DWZX0E2JW-=vSQ@mail.gmail.com

Signed-off-by: Dhaval Giani <dhaval.giani@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/kernel.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 3bef14c6586b..482ad2d84a32 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -629,7 +629,7 @@ extern void ftrace_dump(enum ftrace_dump_mode oops_dump_mode);
 static inline void tracing_start(void) { }
 static inline void tracing_stop(void) { }
 static inline void ftrace_off_permanent(void) { }
-static inline void trace_dump_stack(void) { }
+static inline void trace_dump_stack(int skip) { }
 
 static inline void tracing_on(void) { }
 static inline void tracing_off(void) { }
-- 
cgit v1.2.3-59-g8ed1b


From 007ccfcf89401e764c33965b739310d86a94626d Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 6 Aug 2013 14:32:54 +0200
Subject: ACPI: Drop physical_node_id_bitmap from struct acpi_device

The physical_node_id_bitmap in struct acpi_device is only used for
looking up the first currently unused dependent phyiscal node ID
by acpi_bind_one().  It is not really necessary, however, because
acpi_bind_one() walks the entire physical_node_list of the given
device object for sanity checking anyway and if that list is always
sorted by node_id, it is straightforward to find the first gap
between the currently used node IDs and use that number as the ID
of the new list node.

This also removes the artificial limit of the maximum number of
dependent physical devices per ACPI device object, which now depends
only on the capacity of unsigend int.  As a result, it fixes a
regression introduced by commit e2ff394 (ACPI / memhotplug: Bind
removable memory blocks to ACPI device nodes) that caused
acpi_memory_enable_device() to fail when the number of 128 MB blocks
within one removable memory module was greater than 32.

Reported-and-tested-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Toshi Kani <toshi.kani@hp.com>
Reviewed-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
---
 drivers/acpi/glue.c     | 34 +++++++++++++++++++---------------
 include/acpi/acpi_bus.h |  8 ++------
 2 files changed, 21 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
index f68095756fb7..17e15d11bd39 100644
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -31,6 +31,7 @@ static LIST_HEAD(bus_type_list);
 static DECLARE_RWSEM(bus_type_sem);
 
 #define PHYSICAL_NODE_STRING "physical_node"
+#define PHYSICAL_NODE_NAME_SIZE (sizeof(PHYSICAL_NODE_STRING) + 10)
 
 int register_acpi_bus_type(struct acpi_bus_type *type)
 {
@@ -112,7 +113,9 @@ int acpi_bind_one(struct device *dev, acpi_handle handle)
 	struct acpi_device *acpi_dev;
 	acpi_status status;
 	struct acpi_device_physical_node *physical_node, *pn;
-	char physical_node_name[sizeof(PHYSICAL_NODE_STRING) + 2];
+	char physical_node_name[PHYSICAL_NODE_NAME_SIZE];
+	struct list_head *physnode_list;
+	unsigned int node_id;
 	int retval = -EINVAL;
 
 	if (ACPI_HANDLE(dev)) {
@@ -139,25 +142,27 @@ int acpi_bind_one(struct device *dev, acpi_handle handle)
 
 	mutex_lock(&acpi_dev->physical_node_lock);
 
-	/* Sanity check. */
-	list_for_each_entry(pn, &acpi_dev->physical_node_list, node)
+	/*
+	 * Keep the list sorted by node_id so that the IDs of removed nodes can
+	 * be recycled easily.
+	 */
+	physnode_list = &acpi_dev->physical_node_list;
+	node_id = 0;
+	list_for_each_entry(pn, &acpi_dev->physical_node_list, node) {
+		/* Sanity check. */
 		if (pn->dev == dev) {
 			dev_warn(dev, "Already associated with ACPI node\n");
 			goto err_free;
 		}
-
-	/* allocate physical node id according to physical_node_id_bitmap */
-	physical_node->node_id =
-		find_first_zero_bit(acpi_dev->physical_node_id_bitmap,
-		ACPI_MAX_PHYSICAL_NODE);
-	if (physical_node->node_id >= ACPI_MAX_PHYSICAL_NODE) {
-		retval = -ENOSPC;
-		goto err_free;
+		if (pn->node_id == node_id) {
+			physnode_list = &pn->node;
+			node_id++;
+		}
 	}
 
-	set_bit(physical_node->node_id, acpi_dev->physical_node_id_bitmap);
+	physical_node->node_id = node_id;
 	physical_node->dev = dev;
-	list_add_tail(&physical_node->node, &acpi_dev->physical_node_list);
+	list_add(&physical_node->node, physnode_list);
 	acpi_dev->physical_node_count++;
 
 	mutex_unlock(&acpi_dev->physical_node_lock);
@@ -208,7 +213,7 @@ int acpi_unbind_one(struct device *dev)
 
 	mutex_lock(&acpi_dev->physical_node_lock);
 	list_for_each_safe(node, next, &acpi_dev->physical_node_list) {
-		char physical_node_name[sizeof(PHYSICAL_NODE_STRING) + 2];
+		char physical_node_name[PHYSICAL_NODE_NAME_SIZE];
 
 		entry = list_entry(node, struct acpi_device_physical_node,
 			node);
@@ -216,7 +221,6 @@ int acpi_unbind_one(struct device *dev)
 			continue;
 
 		list_del(node);
-		clear_bit(entry->node_id, acpi_dev->physical_node_id_bitmap);
 
 		acpi_dev->physical_node_count--;
 
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 56e6b68c8d2f..5026aaa35133 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -274,15 +274,12 @@ struct acpi_device_wakeup {
 };
 
 struct acpi_device_physical_node {
-	u8 node_id;
+	unsigned int node_id;
 	struct list_head node;
 	struct device *dev;
 	bool put_online:1;
 };
 
-/* set maximum of physical nodes to 32 for expansibility */
-#define ACPI_MAX_PHYSICAL_NODE	32
-
 /* Device */
 struct acpi_device {
 	int device_type;
@@ -302,10 +299,9 @@ struct acpi_device {
 	struct acpi_driver *driver;
 	void *driver_data;
 	struct device dev;
-	u8 physical_node_count;
+	unsigned int physical_node_count;
 	struct list_head physical_node_list;
 	struct mutex physical_node_lock;
-	DECLARE_BITMAP(physical_node_id_bitmap, ACPI_MAX_PHYSICAL_NODE);
 	struct list_head power_dependent;
 	void (*remove)(struct acpi_device *);
 };
-- 
cgit v1.2.3-59-g8ed1b


From 49ccc142f9cbc33fdda18e8fa90c1c5b4a79c0ad Mon Sep 17 00:00:00 2001
From: Mateusz Krawczuk <m.krawczuk@partner.samsung.com>
Date: Tue, 6 Aug 2013 18:34:40 +0200
Subject: regmap: Add missing header for !CONFIG_REGMAP stubs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

regmap.h requires linux/err.h if CONFIG_REGMAP is not defined. Without it I get
error.
CC      drivers/media/platform/exynos4-is/fimc-reg.o
In file included from drivers/media/platform/exynos4-is/fimc-reg.c:14:0:
include/linux/regmap.h: In function ‘regmap_write’:
include/linux/regmap.h:525:10: error: ‘EINVAL’ undeclared (first use in this function)
include/linux/regmap.h:525:10: note: each undeclared identifier is reported only once for each function it appears in

Signed-off-by: Mateusz Krawczuk <m.krawczuk@partner.samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
Cc: stable@kernel.org
---
 include/linux/regmap.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 75981d0b57dc..580a5320cc96 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -15,6 +15,7 @@
 
 #include <linux/list.h>
 #include <linux/rbtree.h>
+#include <linux/err.h>
 
 struct module;
 struct device;
-- 
cgit v1.2.3-59-g8ed1b


From 60f75b8e97daf4a39790a20d962cb861b9220af5 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 7 Aug 2013 22:55:00 +0200
Subject: ACPI: Try harder to resolve _ADR collisions for bridges

In theory, under a given ACPI namespace node there should be only
one child device object with _ADR whose value matches a given bus
address exactly.  In practice, however, there are systems in which
multiple child device objects under a given parent have _ADR matching
exactly the same address.  In those cases we use _STA to determine
which of the multiple matching devices is enabled, since some systems
are known to indicate which ACPI device object to associate with the
given physical (usually PCI) device this way.

Unfortunately, as it turns out, there are systems in which many
device objects under the same parent have _ADR matching exactly the
same bus address and none of them has _STA, in which case they all
should be regarded as enabled according to the spec.  Still, if
those device objects are supposed to represent bridges (e.g. this
is the case for device objects corresponding to PCIe ports), we can
try harder and skip the ones that have no child device objects in the
ACPI namespace.  With luck, we can avoid using device objects that we
are not expected to use this way.

Although this only works for bridges whose children also have ACPI
namespace representation, it is sufficient to address graphics
adapter detection issues on some systems, so rework the code finding
a matching device ACPI handle for a given bus address to implement
this idea.

Introduce a new function, acpi_find_child(), taking three arguments:
the ACPI handle of the device's parent, a bus address suitable for
the device's bus type and a bool indicating if the device is a
bridge and make it work as outlined above.  Reimplement the function
currently used for this purpose, acpi_get_child(), as a call to
acpi_find_child() with the last argument set to 'false' and make
the PCI subsystem use acpi_find_child() with the bridge information
passed as the last argument to it.  [Lan Tianyu notices that it is
not sufficient to use pci_is_bridge() for that, because the device's
subordinate pointer hasn't been set yet at this point, so use
hdr_type instead.]

This change fixes a regression introduced inadvertently by commit
33f767d (ACPI: Rework acpi_get_child() to be more efficient) which
overlooked the fact that for acpi_walk_namespace() "post-order" means
"after all children have been visited" rather than "on the way back",
so for device objects without children and for namespace walks of
depth 1, as in the acpi_get_child() case, the "post-order" callbacks
ordering is actually the same as the ordering of "pre-order" ones.
Since that commit changed the namespace walk in acpi_get_child() to
terminate after finding the first matching object instead of going
through all of them and returning the last one, it effectively
changed the result returned by that function in some rare cases and
that led to problems (the switch from a "pre-order" to a "post-order"
callback was supposed to prevent that from happening, but it was
ineffective).

As it turns out, the systems where the change made by commit
33f767d actually matters are those where there are multiple ACPI
device objects representing the same PCIe port (which effectively
is a bridge).  Moreover, only one of them, and the one we are
expected to use, has child device objects in the ACPI namespace,
so the regression can be addressed as described above.

References: https://bugzilla.kernel.org/show_bug.cgi?id=60561
Reported-by: Peter Wu <lekensteyn@gmail.com>
Tested-by: Vladimir Lalov <mail@vlalov.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: 3.9+ <stable@vger.kernel.org> # 3.9+
---
 drivers/acpi/glue.c     | 99 ++++++++++++++++++++++++++++++++++++++++---------
 drivers/pci/pci-acpi.c  | 15 ++++++--
 include/acpi/acpi_bus.h |  6 ++-
 3 files changed, 98 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
index 17e15d11bd39..408f6b2a5fa8 100644
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -79,34 +79,99 @@ static struct acpi_bus_type *acpi_get_bus_type(struct device *dev)
 	return ret;
 }
 
-static acpi_status do_acpi_find_child(acpi_handle handle, u32 lvl_not_used,
-				      void *addr_p, void **ret_p)
+static acpi_status acpi_dev_present(acpi_handle handle, u32 lvl_not_used,
+				  void *not_used, void **ret_p)
 {
-	unsigned long long addr, sta;
-	acpi_status status;
+	struct acpi_device *adev = NULL;
 
-	status = acpi_evaluate_integer(handle, METHOD_NAME__ADR, NULL, &addr);
-	if (ACPI_SUCCESS(status) && addr == *((u64 *)addr_p)) {
+	acpi_bus_get_device(handle, &adev);
+	if (adev) {
 		*ret_p = handle;
-		status = acpi_bus_get_status_handle(handle, &sta);
-		if (ACPI_SUCCESS(status) && (sta & ACPI_STA_DEVICE_ENABLED))
-			return AE_CTRL_TERMINATE;
+		return AE_CTRL_TERMINATE;
 	}
 	return AE_OK;
 }
 
-acpi_handle acpi_get_child(acpi_handle parent, u64 address)
+static bool acpi_extra_checks_passed(acpi_handle handle, bool is_bridge)
 {
-	void *ret = NULL;
+	unsigned long long sta;
+	acpi_status status;
+
+	status = acpi_bus_get_status_handle(handle, &sta);
+	if (ACPI_FAILURE(status) || !(sta & ACPI_STA_DEVICE_ENABLED))
+		return false;
+
+	if (is_bridge) {
+		void *test = NULL;
+
+		/* Check if this object has at least one child device. */
+		acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1,
+				    acpi_dev_present, NULL, NULL, &test);
+		return !!test;
+	}
+	return true;
+}
+
+struct find_child_context {
+	u64 addr;
+	bool is_bridge;
+	acpi_handle ret;
+	bool ret_checked;
+};
+
+static acpi_status do_find_child(acpi_handle handle, u32 lvl_not_used,
+				 void *data, void **not_used)
+{
+	struct find_child_context *context = data;
+	unsigned long long addr;
+	acpi_status status;
 
-	if (!parent)
-		return NULL;
+	status = acpi_evaluate_integer(handle, METHOD_NAME__ADR, NULL, &addr);
+	if (ACPI_FAILURE(status) || addr != context->addr)
+		return AE_OK;
 
-	acpi_walk_namespace(ACPI_TYPE_DEVICE, parent, 1, NULL,
-			    do_acpi_find_child, &address, &ret);
-	return (acpi_handle)ret;
+	if (!context->ret) {
+		/* This is the first matching object.  Save its handle. */
+		context->ret = handle;
+		return AE_OK;
+	}
+	/*
+	 * There is more than one matching object with the same _ADR value.
+	 * That really is unexpected, so we are kind of beyond the scope of the
+	 * spec here.  We have to choose which one to return, though.
+	 *
+	 * First, check if the previously found object is good enough and return
+	 * its handle if so.  Second, check the same for the object that we've
+	 * just found.
+	 */
+	if (!context->ret_checked) {
+		if (acpi_extra_checks_passed(context->ret, context->is_bridge))
+			return AE_CTRL_TERMINATE;
+		else
+			context->ret_checked = true;
+	}
+	if (acpi_extra_checks_passed(handle, context->is_bridge)) {
+		context->ret = handle;
+		return AE_CTRL_TERMINATE;
+	}
+	return AE_OK;
+}
+
+acpi_handle acpi_find_child(acpi_handle parent, u64 addr, bool is_bridge)
+{
+	if (parent) {
+		struct find_child_context context = {
+			.addr = addr,
+			.is_bridge = is_bridge,
+		};
+
+		acpi_walk_namespace(ACPI_TYPE_DEVICE, parent, 1, do_find_child,
+				    NULL, &context, NULL);
+		return context.ret;
+	}
+	return NULL;
 }
-EXPORT_SYMBOL(acpi_get_child);
+EXPORT_SYMBOL_GPL(acpi_find_child);
 
 int acpi_bind_one(struct device *dev, acpi_handle handle)
 {
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index dbdc5f7e2b29..01e264fb50e0 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -317,13 +317,20 @@ void acpi_pci_remove_bus(struct pci_bus *bus)
 /* ACPI bus type */
 static int acpi_pci_find_device(struct device *dev, acpi_handle *handle)
 {
-	struct pci_dev * pci_dev;
-	u64	addr;
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	bool is_bridge;
+	u64 addr;
 
-	pci_dev = to_pci_dev(dev);
+	/*
+	 * pci_is_bridge() is not suitable here, because pci_dev->subordinate
+	 * is set only after acpi_pci_find_device() has been called for the
+	 * given device.
+	 */
+	is_bridge = pci_dev->hdr_type == PCI_HEADER_TYPE_BRIDGE
+			|| pci_dev->hdr_type == PCI_HEADER_TYPE_CARDBUS;
 	/* Please ref to ACPI spec for the syntax of _ADR */
 	addr = (PCI_SLOT(pci_dev->devfn) << 16) | PCI_FUNC(pci_dev->devfn);
-	*handle = acpi_get_child(DEVICE_ACPI_HANDLE(dev->parent), addr);
+	*handle = acpi_find_child(ACPI_HANDLE(dev->parent), addr, is_bridge);
 	if (!*handle)
 		return -ENODEV;
 	return 0;
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 5026aaa35133..94383a70c1a3 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -441,7 +441,11 @@ struct acpi_pci_root {
 };
 
 /* helper */
-acpi_handle acpi_get_child(acpi_handle, u64);
+acpi_handle acpi_find_child(acpi_handle, u64, bool);
+static inline acpi_handle acpi_get_child(acpi_handle handle, u64 addr)
+{
+	return acpi_find_child(handle, addr, false);
+}
 int acpi_is_root_bridge(acpi_handle);
 struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle);
 #define DEVICE_ACPI_HANDLE(dev) ((acpi_handle)ACPI_HANDLE(dev))
-- 
cgit v1.2.3-59-g8ed1b


From 786615bc1ce84150ded80daea6bd9f6297f48e73 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 5 Aug 2013 16:04:47 -0400
Subject: SUNRPC: If the rpcbind channel is disconnected, fail the call to
 unregister

If rpcbind causes our connection to the AF_LOCAL socket to close after
we've registered a service, then we want to be careful about reconnecting
since the mount namespace may have changed.

By simply refusing to reconnect the AF_LOCAL socket in the case of
unregister, we avoid the need to somehow save the mount namespace. While
this may lead to some services not unregistering properly, it should
be safe.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Nix <nix@esperi.org.uk>
Cc: Jeff Layton <jlayton@redhat.com>
Cc: stable@vger.kernel.org # 3.9.x
---
 include/linux/sunrpc/sched.h |  1 +
 net/sunrpc/clnt.c            |  4 ++++
 net/sunrpc/netns.h           |  1 +
 net/sunrpc/rpcb_clnt.c       | 40 +++++++++++++++++++++++++++-------------
 4 files changed, 33 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 6d870353674a..1821445708d6 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -121,6 +121,7 @@ struct rpc_task_setup {
 #define RPC_TASK_SOFTCONN	0x0400		/* Fail if can't connect */
 #define RPC_TASK_SENT		0x0800		/* message was sent */
 #define RPC_TASK_TIMEOUT	0x1000		/* fail with ETIMEDOUT on timeout */
+#define RPC_TASK_NOCONNECT	0x2000		/* return ENOTCONN if not connected */
 
 #define RPC_IS_ASYNC(t)		((t)->tk_flags & RPC_TASK_ASYNC)
 #define RPC_IS_SWAPPER(t)	((t)->tk_flags & RPC_TASK_SWAPPER)
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 74f6a704e374..ecbc4e3d83ad 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1660,6 +1660,10 @@ call_connect(struct rpc_task *task)
 		task->tk_action = call_connect_status;
 		if (task->tk_status < 0)
 			return;
+		if (task->tk_flags & RPC_TASK_NOCONNECT) {
+			rpc_exit(task, -ENOTCONN);
+			return;
+		}
 		xprt_connect(task);
 	}
 }
diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h
index 74d948f5d5a1..779742cfc1ff 100644
--- a/net/sunrpc/netns.h
+++ b/net/sunrpc/netns.h
@@ -23,6 +23,7 @@ struct sunrpc_net {
 	struct rpc_clnt *rpcb_local_clnt4;
 	spinlock_t rpcb_clnt_lock;
 	unsigned int rpcb_users;
+	unsigned int rpcb_is_af_local : 1;
 
 	struct mutex gssp_lock;
 	wait_queue_head_t gssp_wq;
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index b0f723227157..1891a1022c17 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -204,13 +204,15 @@ void rpcb_put_local(struct net *net)
 }
 
 static void rpcb_set_local(struct net *net, struct rpc_clnt *clnt,
-			struct rpc_clnt *clnt4)
+			struct rpc_clnt *clnt4,
+			bool is_af_local)
 {
 	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
 
 	/* Protected by rpcb_create_local_mutex */
 	sn->rpcb_local_clnt = clnt;
 	sn->rpcb_local_clnt4 = clnt4;
+	sn->rpcb_is_af_local = is_af_local ? 1 : 0;
 	smp_wmb(); 
 	sn->rpcb_users = 1;
 	dprintk("RPC:       created new rpcb local clients (rpcb_local_clnt: "
@@ -271,7 +273,7 @@ static int rpcb_create_local_unix(struct net *net)
 		clnt4 = NULL;
 	}
 
-	rpcb_set_local(net, clnt, clnt4);
+	rpcb_set_local(net, clnt, clnt4, true);
 
 out:
 	return result;
@@ -323,7 +325,7 @@ static int rpcb_create_local_net(struct net *net)
 		clnt4 = NULL;
 	}
 
-	rpcb_set_local(net, clnt, clnt4);
+	rpcb_set_local(net, clnt, clnt4, false);
 
 out:
 	return result;
@@ -384,13 +386,16 @@ static struct rpc_clnt *rpcb_create(struct net *net, const char *hostname,
 	return rpc_create(&args);
 }
 
-static int rpcb_register_call(struct rpc_clnt *clnt, struct rpc_message *msg)
+static int rpcb_register_call(struct sunrpc_net *sn, struct rpc_clnt *clnt, struct rpc_message *msg, bool is_set)
 {
-	int result, error = 0;
+	int flags = RPC_TASK_NOCONNECT;
+	int error, result = 0;
 
+	if (is_set || !sn->rpcb_is_af_local)
+		flags = RPC_TASK_SOFTCONN;
 	msg->rpc_resp = &result;
 
-	error = rpc_call_sync(clnt, msg, RPC_TASK_SOFTCONN);
+	error = rpc_call_sync(clnt, msg, flags);
 	if (error < 0) {
 		dprintk("RPC:       failed to contact local rpcbind "
 				"server (errno %d).\n", -error);
@@ -447,16 +452,19 @@ int rpcb_register(struct net *net, u32 prog, u32 vers, int prot, unsigned short
 		.rpc_argp	= &map,
 	};
 	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+	bool is_set = false;
 
 	dprintk("RPC:       %sregistering (%u, %u, %d, %u) with local "
 			"rpcbind\n", (port ? "" : "un"),
 			prog, vers, prot, port);
 
 	msg.rpc_proc = &rpcb_procedures2[RPCBPROC_UNSET];
-	if (port)
+	if (port != 0) {
 		msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET];
+		is_set = true;
+	}
 
-	return rpcb_register_call(sn->rpcb_local_clnt, &msg);
+	return rpcb_register_call(sn, sn->rpcb_local_clnt, &msg, is_set);
 }
 
 /*
@@ -469,6 +477,7 @@ static int rpcb_register_inet4(struct sunrpc_net *sn,
 	const struct sockaddr_in *sin = (const struct sockaddr_in *)sap;
 	struct rpcbind_args *map = msg->rpc_argp;
 	unsigned short port = ntohs(sin->sin_port);
+	bool is_set = false;
 	int result;
 
 	map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL);
@@ -479,10 +488,12 @@ static int rpcb_register_inet4(struct sunrpc_net *sn,
 			map->r_addr, map->r_netid);
 
 	msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET];
-	if (port)
+	if (port != 0) {
 		msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET];
+		is_set = true;
+	}
 
-	result = rpcb_register_call(sn->rpcb_local_clnt4, msg);
+	result = rpcb_register_call(sn, sn->rpcb_local_clnt4, msg, is_set);
 	kfree(map->r_addr);
 	return result;
 }
@@ -497,6 +508,7 @@ static int rpcb_register_inet6(struct sunrpc_net *sn,
 	const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sap;
 	struct rpcbind_args *map = msg->rpc_argp;
 	unsigned short port = ntohs(sin6->sin6_port);
+	bool is_set = false;
 	int result;
 
 	map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL);
@@ -507,10 +519,12 @@ static int rpcb_register_inet6(struct sunrpc_net *sn,
 			map->r_addr, map->r_netid);
 
 	msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET];
-	if (port)
+	if (port != 0) {
 		msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET];
+		is_set = true;
+	}
 
-	result = rpcb_register_call(sn->rpcb_local_clnt4, msg);
+	result = rpcb_register_call(sn, sn->rpcb_local_clnt4, msg, is_set);
 	kfree(map->r_addr);
 	return result;
 }
@@ -527,7 +541,7 @@ static int rpcb_unregister_all_protofamilies(struct sunrpc_net *sn,
 	map->r_addr = "";
 	msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET];
 
-	return rpcb_register_call(sn->rpcb_local_clnt4, msg);
+	return rpcb_register_call(sn, sn->rpcb_local_clnt4, msg, false);
 }
 
 /**
-- 
cgit v1.2.3-59-g8ed1b


From 8742f229b635bf1c1c84a3dfe5e47c814c20b5c8 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Thu, 8 Aug 2013 18:55:32 +0200
Subject: userns: limit the maximum depth of user_namespace->parent chain

Ensure that user_namespace->parent chain can't grow too much.
Currently we use the hardroded 32 as limit.

Reported-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/user_namespace.h | 1 +
 kernel/user_namespace.c        | 4 ++++
 2 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index b6b215f13b45..14105c26a836 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -23,6 +23,7 @@ struct user_namespace {
 	struct uid_gid_map	projid_map;
 	atomic_t		count;
 	struct user_namespace	*parent;
+	int			level;
 	kuid_t			owner;
 	kgid_t			group;
 	unsigned int		proc_inum;
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 6e50a44610ee..9064b919a406 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -62,6 +62,9 @@ int create_user_ns(struct cred *new)
 	kgid_t group = new->egid;
 	int ret;
 
+	if (parent_ns->level > 32)
+		return -EUSERS;
+
 	/*
 	 * Verify that we can not violate the policy of which files
 	 * may be accessed that is specified by the root directory,
@@ -92,6 +95,7 @@ int create_user_ns(struct cred *new)
 	atomic_set(&ns->count, 1);
 	/* Leave the new->user_ns reference with the new user namespace. */
 	ns->parent = parent_ns;
+	ns->level = parent_ns->level + 1;
 	ns->owner = owner;
 	ns->group = group;
 
-- 
cgit v1.2.3-59-g8ed1b