aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/aoe/aoedev.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/aoe/aoedev.c')
-rw-r--r--drivers/block/aoe/aoedev.c265
1 files changed, 204 insertions, 61 deletions
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index 6b5110a47458..90e5b537f94b 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2007 Coraid, Inc. See COPYING for GPL terms. */
+/* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */
/*
* aoedev.c
* AoE device utility functions; maintains device list.
@@ -9,6 +9,9 @@
#include <linux/netdevice.h>
#include <linux/delay.h>
#include <linux/slab.h>
+#include <linux/bitmap.h>
+#include <linux/kdev_t.h>
+#include <linux/moduleparam.h>
#include "aoe.h"
static void dummy_timer(ulong);
@@ -16,23 +19,121 @@ static void aoedev_freedev(struct aoedev *);
static void freetgt(struct aoedev *d, struct aoetgt *t);
static void skbpoolfree(struct aoedev *d);
+static int aoe_dyndevs = 1;
+module_param(aoe_dyndevs, int, 0644);
+MODULE_PARM_DESC(aoe_dyndevs, "Use dynamic minor numbers for devices.");
+
static struct aoedev *devlist;
static DEFINE_SPINLOCK(devlist_lock);
-struct aoedev *
-aoedev_by_aoeaddr(int maj, int min)
+/* Because some systems will have one, many, or no
+ * - partitions,
+ * - slots per shelf,
+ * - or shelves,
+ * we need some flexibility in the way the minor numbers
+ * are allocated. So they are dynamic.
+ */
+#define N_DEVS ((1U<<MINORBITS)/AOE_PARTITIONS)
+
+static DEFINE_SPINLOCK(used_minors_lock);
+static DECLARE_BITMAP(used_minors, N_DEVS);
+
+static int
+minor_get_dyn(ulong *sysminor)
{
- struct aoedev *d;
ulong flags;
+ ulong n;
+ int error = 0;
+
+ spin_lock_irqsave(&used_minors_lock, flags);
+ n = find_first_zero_bit(used_minors, N_DEVS);
+ if (n < N_DEVS)
+ set_bit(n, used_minors);
+ else
+ error = -1;
+ spin_unlock_irqrestore(&used_minors_lock, flags);
+
+ *sysminor = n * AOE_PARTITIONS;
+ return error;
+}
- spin_lock_irqsave(&devlist_lock, flags);
+static int
+minor_get_static(ulong *sysminor, ulong aoemaj, int aoemin)
+{
+ ulong flags;
+ ulong n;
+ int error = 0;
+ enum {
+ /* for backwards compatibility when !aoe_dyndevs,
+ * a static number of supported slots per shelf */
+ NPERSHELF = 16,
+ };
+
+ n = aoemaj * NPERSHELF + aoemin;
+ if (aoemin >= NPERSHELF || n >= N_DEVS) {
+ pr_err("aoe: %s with e%ld.%d\n",
+ "cannot use static minor device numbers",
+ aoemaj, aoemin);
+ error = -1;
+ } else {
+ spin_lock_irqsave(&used_minors_lock, flags);
+ if (test_bit(n, used_minors)) {
+ pr_err("aoe: %s %lu\n",
+ "existing device already has static minor number",
+ n);
+ error = -1;
+ } else
+ set_bit(n, used_minors);
+ spin_unlock_irqrestore(&used_minors_lock, flags);
+ }
- for (d=devlist; d; d=d->next)
- if (d->aoemajor == maj && d->aoeminor == min)
- break;
+ *sysminor = n;
+ return error;
+}
+
+static int
+minor_get(ulong *sysminor, ulong aoemaj, int aoemin)
+{
+ if (aoe_dyndevs)
+ return minor_get_dyn(sysminor);
+ else
+ return minor_get_static(sysminor, aoemaj, aoemin);
+}
+
+static void
+minor_free(ulong minor)
+{
+ ulong flags;
+
+ minor /= AOE_PARTITIONS;
+ BUG_ON(minor >= N_DEVS);
+
+ spin_lock_irqsave(&used_minors_lock, flags);
+ BUG_ON(!test_bit(minor, used_minors));
+ clear_bit(minor, used_minors);
+ spin_unlock_irqrestore(&used_minors_lock, flags);
+}
+
+/*
+ * Users who grab a pointer to the device with aoedev_by_aoeaddr
+ * automatically get a reference count and must be responsible
+ * for performing a aoedev_put. With the addition of async
+ * kthread processing I'm no longer confident that we can
+ * guarantee consistency in the face of device flushes.
+ *
+ * For the time being, we only bother to add extra references for
+ * frames sitting on the iocq. When the kthreads finish processing
+ * these frames, they will aoedev_put the device.
+ */
+
+void
+aoedev_put(struct aoedev *d)
+{
+ ulong flags;
+ spin_lock_irqsave(&devlist_lock, flags);
+ d->ref--;
spin_unlock_irqrestore(&devlist_lock, flags);
- return d;
}
static void
@@ -47,54 +148,74 @@ dummy_timer(ulong vp)
add_timer(&d->timer);
}
+static void
+aoe_failip(struct aoedev *d)
+{
+ struct request *rq;
+ struct bio *bio;
+ unsigned long n;
+
+ aoe_failbuf(d, d->ip.buf);
+
+ rq = d->ip.rq;
+ if (rq == NULL)
+ return;
+ while ((bio = d->ip.nxbio)) {
+ clear_bit(BIO_UPTODATE, &bio->bi_flags);
+ d->ip.nxbio = bio->bi_next;
+ n = (unsigned long) rq->special;
+ rq->special = (void *) --n;
+ }
+ if ((unsigned long) rq->special == 0)
+ aoe_end_request(d, rq, 0);
+}
+
void
aoedev_downdev(struct aoedev *d)
{
- struct aoetgt **t, **te;
- struct frame *f, *e;
- struct buf *buf;
- struct bio *bio;
+ struct aoetgt *t, **tt, **te;
+ struct frame *f;
+ struct list_head *head, *pos, *nx;
+ struct request *rq;
+ int i;
- t = d->targets;
- te = t + NTARGETS;
- for (; t < te && *t; t++) {
- f = (*t)->frames;
- e = f + (*t)->nframes;
- for (; f < e; f->tag = FREETAG, f->buf = NULL, f++) {
- if (f->tag == FREETAG || f->buf == NULL)
- continue;
- buf = f->buf;
- bio = buf->bio;
- if (--buf->nframesout == 0
- && buf != d->inprocess) {
- mempool_free(buf, d->bufpool);
- bio_endio(bio, -EIO);
+ d->flags &= ~DEVFL_UP;
+
+ /* clean out active buffers */
+ for (i = 0; i < NFACTIVE; i++) {
+ head = &d->factive[i];
+ list_for_each_safe(pos, nx, head) {
+ f = list_entry(pos, struct frame, head);
+ list_del(pos);
+ if (f->buf) {
+ f->buf->nframesout--;
+ aoe_failbuf(d, f->buf);
}
+ aoe_freetframe(f);
}
- (*t)->maxout = (*t)->nframes;
- (*t)->nout = 0;
}
- buf = d->inprocess;
- if (buf) {
- bio = buf->bio;
- mempool_free(buf, d->bufpool);
- bio_endio(bio, -EIO);
+ /* reset window dressings */
+ tt = d->targets;
+ te = tt + NTARGETS;
+ for (; tt < te && (t = *tt); tt++) {
+ t->maxout = t->nframes;
+ t->nout = 0;
}
- d->inprocess = NULL;
+
+ /* clean out the in-process request (if any) */
+ aoe_failip(d);
d->htgt = NULL;
- while (!list_empty(&d->bufq)) {
- buf = container_of(d->bufq.next, struct buf, bufs);
- list_del(d->bufq.next);
- bio = buf->bio;
- mempool_free(buf, d->bufpool);
- bio_endio(bio, -EIO);
+ /* fast fail all pending I/O */
+ if (d->blkq) {
+ while ((rq = blk_peek_request(d->blkq))) {
+ blk_start_request(rq);
+ aoe_end_request(d, rq, 1);
+ }
}
if (d->gd)
set_capacity(d->gd, 0);
-
- d->flags &= ~DEVFL_UP;
}
static void
@@ -107,6 +228,7 @@ aoedev_freedev(struct aoedev *d)
aoedisk_rm_sysfs(d);
del_gendisk(d->gd);
put_disk(d->gd);
+ blk_cleanup_queue(d->blkq);
}
t = d->targets;
e = t + NTARGETS;
@@ -115,7 +237,7 @@ aoedev_freedev(struct aoedev *d)
if (d->bufpool)
mempool_destroy(d->bufpool);
skbpoolfree(d);
- blk_cleanup_queue(d->blkq);
+ minor_free(d->sysminor);
kfree(d);
}
@@ -142,7 +264,8 @@ aoedev_flush(const char __user *str, size_t cnt)
spin_lock(&d->lock);
if ((!all && (d->flags & DEVFL_UP))
|| (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE))
- || d->nopen) {
+ || d->nopen
+ || d->ref) {
spin_unlock(&d->lock);
dd = &d->next;
continue;
@@ -163,12 +286,15 @@ aoedev_flush(const char __user *str, size_t cnt)
return 0;
}
-/* I'm not really sure that this is a realistic problem, but if the
-network driver goes gonzo let's just leak memory after complaining. */
+/* This has been confirmed to occur once with Tms=3*1000 due to the
+ * driver changing link and not processing its transmit ring. The
+ * problem is hard enough to solve by returning an error that I'm
+ * still punting on "solving" this.
+ */
static void
skbfree(struct sk_buff *skb)
{
- enum { Sms = 100, Tms = 3*1000};
+ enum { Sms = 250, Tms = 30 * 1000};
int i = Tms / Sms;
if (skb == NULL)
@@ -182,6 +308,7 @@ skbfree(struct sk_buff *skb)
"cannot free skb -- memory leaked.");
return;
}
+ skb->truesize -= skb->data_len;
skb_shinfo(skb)->nr_frags = skb->data_len = 0;
skb_trim(skb, 0);
dev_kfree_skb(skb);
@@ -198,26 +325,29 @@ skbpoolfree(struct aoedev *d)
__skb_queue_head_init(&d->skbpool);
}
-/* find it or malloc it */
+/* find it or allocate it */
struct aoedev *
-aoedev_by_sysminor_m(ulong sysminor)
+aoedev_by_aoeaddr(ulong maj, int min, int do_alloc)
{
struct aoedev *d;
+ int i;
ulong flags;
+ ulong sysminor;
spin_lock_irqsave(&devlist_lock, flags);
for (d=devlist; d; d=d->next)
- if (d->sysminor == sysminor)
+ if (d->aoemajor == maj && d->aoeminor == min) {
+ d->ref++;
break;
- if (d)
+ }
+ if (d || !do_alloc || minor_get(&sysminor, maj, min) < 0)
goto out;
d = kcalloc(1, sizeof *d, GFP_ATOMIC);
if (!d)
goto out;
INIT_WORK(&d->work, aoecmd_sleepwork);
spin_lock_init(&d->lock);
- skb_queue_head_init(&d->sendq);
skb_queue_head_init(&d->skbpool);
init_timer(&d->timer);
d->timer.data = (ulong) d;
@@ -226,10 +356,12 @@ aoedev_by_sysminor_m(ulong sysminor)
add_timer(&d->timer);
d->bufpool = NULL; /* defer to aoeblk_gdalloc */
d->tgt = d->targets;
- INIT_LIST_HEAD(&d->bufq);
+ d->ref = 1;
+ for (i = 0; i < NFACTIVE; i++)
+ INIT_LIST_HEAD(&d->factive[i]);
d->sysminor = sysminor;
- d->aoemajor = AOEMAJOR(sysminor);
- d->aoeminor = AOEMINOR(sysminor);
+ d->aoemajor = maj;
+ d->aoeminor = min;
d->mintimer = MINTIMER;
d->next = devlist;
devlist = d;
@@ -241,13 +373,23 @@ aoedev_by_sysminor_m(ulong sysminor)
static void
freetgt(struct aoedev *d, struct aoetgt *t)
{
- struct frame *f, *e;
+ struct frame *f;
+ struct list_head *pos, *nx, *head;
+ struct aoeif *ifp;
- f = t->frames;
- e = f + t->nframes;
- for (; f < e; f++)
+ for (ifp = t->ifs; ifp < &t->ifs[NAOEIFS]; ++ifp) {
+ if (!ifp->nd)
+ break;
+ dev_put(ifp->nd);
+ }
+
+ head = &t->ffree;
+ list_for_each_safe(pos, nx, head) {
+ list_del(pos);
+ f = list_entry(pos, struct frame, head);
skbfree(f->skb);
- kfree(t->frames);
+ kfree(f);
+ }
kfree(t);
}
@@ -257,6 +399,7 @@ aoedev_exit(void)
struct aoedev *d;
ulong flags;
+ aoe_flush_iocq();
while ((d = devlist)) {
devlist = d->next;