aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Howells <dhowells@redhat.com>2017-11-02 15:27:51 +0000
committerDavid Howells <dhowells@redhat.com>2017-11-13 15:38:19 +0000
commitbf99a53ce22a29d64d3190093edf52f1d44d53b3 (patch)
treee7544749643200b92e9d53dd09b28ec0a73a6d07
parentafs: Overhaul volume and server record caching and fileserver rotation (diff)
downloadlinux-dev-bf99a53ce22a29d64d3190093edf52f1d44d53b3.tar.xz
linux-dev-bf99a53ce22a29d64d3190093edf52f1d44d53b3.zip
afs: Make use of the YFS service upgrade to fully support IPv6
YFS VL servers offer an upgraded Volume Location service that can return IPv6 addresses to fileservers and volume servers in addition to IPv4 addresses using the YFSVL.GetEndpoints operation which we should use if it's available. To this end: (1) Make rxrpc_kernel_recv_data() return the call's current service ID so that the caller can detect service upgrade and see what the service was upgraded to. (2) When we see a VL server address we haven't seen before, send a VL.GetCapabilities operation to it with the service upgrade bit set. If we get an upgrade to the YFS VL service, change the service ID in the address list for that address to use the upgraded service and set a flag to note that this appears to be a YFS-compatible server. (3) If, when a server's addresses are being looked up, we note that we previously detected a YFS-compatible server, then send the YFSVL.GetEndpoints operation rather than VL.GetAddrsU. (4) Build a fileserver address list from the reply of YFSVL.GetEndpoints, including both IPv4 and IPv6 addresses. Volume server addresses are discarded. (5) The address list is sorted by address and port now, instead of just address. This allows multiple servers on the same host sitting on different ports. Signed-off-by: David Howells <dhowells@redhat.com>
-rw-r--r--fs/afs/addr_list.c54
-rw-r--r--fs/afs/afs_vl.h16
-rw-r--r--fs/afs/internal.h10
-rw-r--r--fs/afs/server.c5
-rw-r--r--fs/afs/vlclient.c337
-rw-r--r--fs/afs/volume.c16
6 files changed, 428 insertions, 10 deletions
diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c
index b91e59a77f0e..a537368ba0db 100644
--- a/fs/afs/addr_list.c
+++ b/fs/afs/addr_list.c
@@ -17,9 +17,10 @@
#include "internal.h"
#include "afs_fs.h"
-#define AFS_MAX_ADDRESSES \
- ((unsigned int)((PAGE_SIZE - sizeof(struct afs_addr_list)) / \
- sizeof(struct sockaddr_rxrpc)))
+//#define AFS_MAX_ADDRESSES
+// ((unsigned int)((PAGE_SIZE - sizeof(struct afs_addr_list)) /
+// sizeof(struct sockaddr_rxrpc)))
+#define AFS_MAX_ADDRESSES ((unsigned int)(sizeof(unsigned long) * 8))
/*
* Release an address list.
@@ -230,15 +231,20 @@ struct afs_addr_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry)
/*
* Merge an IPv4 entry into a fileserver address list.
*/
-void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr)
+void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port)
{
struct sockaddr_in6 *a;
+ __be16 xport = htons(port);
int i;
for (i = 0; i < alist->nr_ipv4; i++) {
a = &alist->addrs[i].transport.sin6;
- if (xdr == a->sin6_addr.s6_addr32[3])
+ if (xdr == a->sin6_addr.s6_addr32[3] &&
+ xport == a->sin6_port)
return;
+ if (xdr == a->sin6_addr.s6_addr32[3] &&
+ xport < a->sin6_port)
+ break;
if (xdr < a->sin6_addr.s6_addr32[3])
break;
}
@@ -249,7 +255,7 @@ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr)
sizeof(alist->addrs[0]) * (alist->nr_addrs - i));
a = &alist->addrs[i].transport.sin6;
- a->sin6_port = htons(AFS_FS_PORT);
+ a->sin6_port = xport;
a->sin6_addr.s6_addr32[0] = 0;
a->sin6_addr.s6_addr32[1] = 0;
a->sin6_addr.s6_addr32[2] = htonl(0xffff);
@@ -259,6 +265,42 @@ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr)
}
/*
+ * Merge an IPv6 entry into a fileserver address list.
+ */
+void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port)
+{
+ struct sockaddr_in6 *a;
+ __be16 xport = htons(port);
+ int i, diff;
+
+ for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
+ a = &alist->addrs[i].transport.sin6;
+ diff = memcmp(xdr, &a->sin6_addr, 16);
+ if (diff == 0 &&
+ xport == a->sin6_port)
+ return;
+ if (diff == 0 &&
+ xport < a->sin6_port)
+ break;
+ if (diff < 0)
+ break;
+ }
+
+ if (i < alist->nr_addrs)
+ memmove(alist->addrs + i + 1,
+ alist->addrs + i,
+ sizeof(alist->addrs[0]) * (alist->nr_addrs - i));
+
+ a = &alist->addrs[i].transport.sin6;
+ a->sin6_port = xport;
+ a->sin6_addr.s6_addr32[0] = xdr[0];
+ a->sin6_addr.s6_addr32[1] = xdr[1];
+ a->sin6_addr.s6_addr32[2] = xdr[2];
+ a->sin6_addr.s6_addr32[3] = xdr[3];
+ alist->nr_addrs++;
+}
+
+/*
* Get an address to try.
*/
bool afs_iterate_addresses(struct afs_addr_cursor *ac)
diff --git a/fs/afs/afs_vl.h b/fs/afs/afs_vl.h
index 6350b417aee9..e3c4688f573b 100644
--- a/fs/afs/afs_vl.h
+++ b/fs/afs/afs_vl.h
@@ -16,6 +16,7 @@
#define AFS_VL_PORT 7003 /* volume location service port */
#define VL_SERVICE 52 /* RxRPC service ID for the Volume Location service */
+#define YFS_VL_SERVICE 2503 /* Service ID for AuriStor upgraded VL service */
enum AFSVL_Operations {
VLGETENTRYBYID = 503, /* AFS Get VLDB entry by ID */
@@ -24,6 +25,8 @@ enum AFSVL_Operations {
VLGETENTRYBYIDU = 526, /* AFS Get VLDB entry by ID (UUID-variant) */
VLGETENTRYBYNAMEU = 527, /* AFS Get VLDB entry by name (UUID-variant) */
VLGETADDRSU = 533, /* AFS Get addrs for fileserver */
+ YVLGETENDPOINTS = 64002, /* YFS Get endpoints for file/volume server */
+ VLGETCAPABILITIES = 65537, /* AFS Get server capabilities */
};
enum AFSVL_Errors {
@@ -57,6 +60,19 @@ enum AFSVL_Errors {
AFSVL_NOMEM = 363547, /* malloc/realloc failed to alloc enough memory */
};
+enum {
+ YFS_SERVER_INDEX = 0,
+ YFS_SERVER_UUID = 1,
+ YFS_SERVER_ENDPOINT = 2,
+};
+
+enum {
+ YFS_ENDPOINT_IPV4 = 0,
+ YFS_ENDPOINT_IPV6 = 1,
+};
+
+#define YFS_MAXENDPOINTS 16
+
/*
* maps to "struct vldbentry" in vvl-spec.pdf
*/
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 1fadf40551fd..767317bf33db 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -70,6 +70,8 @@ struct afs_addr_list {
unsigned short nr_addrs;
unsigned short index; /* Address currently in use */
unsigned short nr_ipv4; /* Number of IPv4 addresses */
+ unsigned long probed; /* Mask of servers that have been probed */
+ unsigned long yfs; /* Mask of servers that are YFS */
struct sockaddr_rxrpc addrs[];
};
@@ -113,7 +115,7 @@ struct afs_call {
bool async; /* T if asynchronous */
bool ret_reply0; /* T if should return reply[0] on success */
bool upgrade; /* T to request service upgrade */
- u16 service_id; /* RxRPC service ID to call */
+ u16 service_id; /* Actual service ID (after upgrade) */
u32 operation_ID; /* operation ID for an incoming call */
u32 count; /* count for use in unmarshalling */
__be32 tmp; /* place to extract temporary data */
@@ -564,7 +566,8 @@ extern bool afs_iterate_addresses(struct afs_addr_cursor *);
extern int afs_end_cursor(struct afs_addr_cursor *);
extern int afs_set_vl_cursor(struct afs_addr_cursor *, struct afs_cell *);
-extern void afs_merge_fs_addr4(struct afs_addr_list *, __be32);
+extern void afs_merge_fs_addr4(struct afs_addr_list *, __be32, u16);
+extern void afs_merge_fs_addr6(struct afs_addr_list *, __be32 *, u16);
/*
* cache.c
@@ -846,6 +849,9 @@ extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *,
struct key *, const char *, int);
extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *, struct afs_addr_cursor *,
struct key *, const uuid_t *);
+extern int afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *, struct key *);
+extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *, struct afs_addr_cursor *,
+ struct key *, const uuid_t *);
/*
* volume.c
diff --git a/fs/afs/server.c b/fs/afs/server.c
index a6c860bcf391..1880f1b6a9f1 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -266,7 +266,10 @@ static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
return ERR_PTR(ret);
while (afs_iterate_addresses(&ac)) {
- alist = afs_vl_get_addrs_u(cell->net, &ac, key, uuid);
+ if (test_bit(ac.index, &ac.alist->yfs))
+ alist = afs_yfsvl_get_endpoints(cell->net, &ac, key, uuid);
+ else
+ alist = afs_vl_get_addrs_u(cell->net, &ac, key, uuid);
switch (ac.error) {
case 0:
afs_end_cursor(&ac);
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index 173c652fe875..1d38cbdf6cad 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -225,7 +225,7 @@ again:
bp = call->buffer;
for (i = 0; i < count; i++)
if (alist->nr_addrs < call->count2)
- afs_merge_fs_addr4(alist, *bp++);
+ afs_merge_fs_addr4(alist, *bp++, AFS_FS_PORT);
call->count -= count;
if (call->count > 0)
@@ -300,3 +300,338 @@ struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *net,
return (struct afs_addr_list *)afs_make_call(ac, call, GFP_KERNEL, false);
}
+
+/*
+ * Deliver reply data to an VL.GetCapabilities operation.
+ */
+static int afs_deliver_vl_get_capabilities(struct afs_call *call)
+{
+ u32 count;
+ int ret;
+
+ _enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count);
+
+again:
+ switch (call->unmarshall) {
+ case 0:
+ call->offset = 0;
+ call->unmarshall++;
+
+ /* Extract the capabilities word count */
+ case 1:
+ ret = afs_extract_data(call, &call->tmp,
+ 1 * sizeof(__be32),
+ true);
+ if (ret < 0)
+ return ret;
+
+ count = ntohl(call->tmp);
+
+ call->count = count;
+ call->count2 = count;
+ call->offset = 0;
+ call->unmarshall++;
+
+ /* Extract capabilities words */
+ case 2:
+ count = min(call->count, 16U);
+ ret = afs_extract_data(call, call->buffer,
+ count * sizeof(__be32),
+ call->count > 16);
+ if (ret < 0)
+ return ret;
+
+ /* TODO: Examine capabilities */
+
+ call->count -= count;
+ if (call->count > 0)
+ goto again;
+ call->offset = 0;
+ call->unmarshall++;
+ break;
+ }
+
+ call->reply[0] = (void *)(unsigned long)call->service_id;
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * VL.GetCapabilities operation type
+ */
+static const struct afs_call_type afs_RXVLGetCapabilities = {
+ .name = "VL.GetCapabilities",
+ .deliver = afs_deliver_vl_get_capabilities,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Probe a fileserver for the capabilities that it supports. This can
+ * return up to 196 words.
+ *
+ * We use this to probe for service upgrade to determine what the server at the
+ * other end supports.
+ */
+int afs_vl_get_capabilities(struct afs_net *net,
+ struct afs_addr_cursor *ac,
+ struct key *key)
+{
+ struct afs_call *call;
+ __be32 *bp;
+
+ _enter("");
+
+ call = afs_alloc_flat_call(net, &afs_RXVLGetCapabilities, 1 * 4, 16 * 4);
+ if (!call)
+ return -ENOMEM;
+
+ call->key = key;
+ call->upgrade = true; /* Let's see if this is a YFS server */
+ call->reply[0] = (void *)VLGETCAPABILITIES;
+ call->ret_reply0 = true;
+
+ /* marshall the parameters */
+ bp = call->request;
+ *bp++ = htonl(VLGETCAPABILITIES);
+
+ /* Can't take a ref on server */
+ return afs_make_call(ac, call, GFP_KERNEL, false);
+}
+
+/*
+ * Deliver reply data to a YFSVL.GetEndpoints call.
+ *
+ * GetEndpoints(IN yfsServerAttributes *attr,
+ * OUT opr_uuid *uuid,
+ * OUT afs_int32 *uniquifier,
+ * OUT endpoints *fsEndpoints,
+ * OUT endpoints *volEndpoints)
+ */
+static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
+{
+ struct afs_addr_list *alist;
+ __be32 *bp;
+ u32 uniquifier, size;
+ int ret;
+
+ _enter("{%u,%zu/%u,%u}", call->unmarshall, call->offset, call->count, call->count2);
+
+again:
+ switch (call->unmarshall) {
+ case 0:
+ call->offset = 0;
+ call->unmarshall = 1;
+
+ /* Extract the returned uuid, uniquifier, fsEndpoints count and
+ * either the first fsEndpoint type or the volEndpoints
+ * count if there are no fsEndpoints. */
+ case 1:
+ ret = afs_extract_data(call, call->buffer,
+ sizeof(uuid_t) +
+ 3 * sizeof(__be32),
+ true);
+ if (ret < 0)
+ return ret;
+
+ bp = call->buffer + sizeof(uuid_t);
+ uniquifier = ntohl(*bp++);
+ call->count = ntohl(*bp++);
+ call->count2 = ntohl(*bp); /* Type or next count */
+
+ if (call->count > YFS_MAXENDPOINTS)
+ return -EBADMSG;
+
+ alist = afs_alloc_addrlist(call->count, FS_SERVICE, AFS_FS_PORT);
+ if (!alist)
+ return -ENOMEM;
+ alist->version = uniquifier;
+ call->reply[0] = alist;
+ call->offset = 0;
+
+ if (call->count == 0)
+ goto extract_volendpoints;
+
+ call->unmarshall = 2;
+
+ /* Extract fsEndpoints[] entries */
+ case 2:
+ switch (call->count2) {
+ case YFS_ENDPOINT_IPV4:
+ size = sizeof(__be32) * (1 + 1 + 1);
+ break;
+ case YFS_ENDPOINT_IPV6:
+ size = sizeof(__be32) * (1 + 4 + 1);
+ break;
+ default:
+ return -EBADMSG;
+ }
+
+ size += sizeof(__be32);
+ ret = afs_extract_data(call, call->buffer, size, true);
+ if (ret < 0)
+ return ret;
+
+ alist = call->reply[0];
+ bp = call->buffer;
+ switch (call->count2) {
+ case YFS_ENDPOINT_IPV4:
+ if (ntohl(bp[0]) != sizeof(__be32) * 2)
+ return -EBADMSG;
+ afs_merge_fs_addr4(alist, bp[1], ntohl(bp[2]));
+ bp += 3;
+ break;
+ case YFS_ENDPOINT_IPV6:
+ if (ntohl(bp[0]) != sizeof(__be32) * 5)
+ return -EBADMSG;
+ afs_merge_fs_addr6(alist, bp + 1, ntohl(bp[5]));
+ bp += 6;
+ break;
+ default:
+ return -EBADMSG;
+ }
+
+ /* Got either the type of the next entry or the count of
+ * volEndpoints if no more fsEndpoints.
+ */
+ call->count2 = htonl(*bp++);
+
+ call->offset = 0;
+ call->count--;
+ if (call->count > 0)
+ goto again;
+
+ extract_volendpoints:
+ /* Extract the list of volEndpoints. */
+ call->count = call->count2;
+ if (!call->count)
+ goto end;
+ if (call->count > YFS_MAXENDPOINTS)
+ return -EBADMSG;
+
+ call->unmarshall = 3;
+
+ /* Extract the type of volEndpoints[0]. Normally we would
+ * extract the type of the next endpoint when we extract the
+ * data of the current one, but this is the first...
+ */
+ case 3:
+ ret = afs_extract_data(call, call->buffer, sizeof(__be32), true);
+ if (ret < 0)
+ return ret;
+
+ bp = call->buffer;
+ call->count2 = htonl(*bp++);
+ call->offset = 0;
+ call->unmarshall = 4;
+
+ /* Extract volEndpoints[] entries */
+ case 4:
+ switch (call->count2) {
+ case YFS_ENDPOINT_IPV4:
+ size = sizeof(__be32) * (1 + 1 + 1);
+ break;
+ case YFS_ENDPOINT_IPV6:
+ size = sizeof(__be32) * (1 + 4 + 1);
+ break;
+ default:
+ return -EBADMSG;
+ }
+
+ if (call->count > 1)
+ size += sizeof(__be32);
+ ret = afs_extract_data(call, call->buffer, size, true);
+ if (ret < 0)
+ return ret;
+
+ bp = call->buffer;
+ switch (call->count2) {
+ case YFS_ENDPOINT_IPV4:
+ if (ntohl(bp[0]) != sizeof(__be32) * 2)
+ return -EBADMSG;
+ bp += 3;
+ break;
+ case YFS_ENDPOINT_IPV6:
+ if (ntohl(bp[0]) != sizeof(__be32) * 5)
+ return -EBADMSG;
+ bp += 6;
+ break;
+ default:
+ return -EBADMSG;
+ }
+
+ /* Got either the type of the next entry or the count of
+ * volEndpoints if no more fsEndpoints.
+ */
+ call->offset = 0;
+ call->count--;
+ if (call->count > 0) {
+ call->count2 = htonl(*bp++);
+ goto again;
+ }
+
+ end:
+ call->unmarshall = 5;
+
+ /* Done */
+ case 5:
+ ret = afs_extract_data(call, call->buffer, 0, false);
+ if (ret < 0)
+ return ret;
+ call->unmarshall = 6;
+
+ case 6:
+ break;
+ }
+
+ alist = call->reply[0];
+
+ /* Start with IPv6 if available. */
+ if (alist->nr_ipv4 < alist->nr_addrs)
+ alist->index = alist->nr_ipv4;
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * YFSVL.GetEndpoints operation type.
+ */
+static const struct afs_call_type afs_YFSVLGetEndpoints = {
+ .name = "VL.GetEndpoints",
+ .deliver = afs_deliver_yfsvl_get_endpoints,
+ .destructor = afs_vl_get_addrs_u_destructor,
+};
+
+/*
+ * Dispatch an operation to get the addresses for a server, where the server is
+ * nominated by UUID.
+ */
+struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *net,
+ struct afs_addr_cursor *ac,
+ struct key *key,
+ const uuid_t *uuid)
+{
+ struct afs_call *call;
+ __be32 *bp;
+
+ _enter("");
+
+ call = afs_alloc_flat_call(net, &afs_YFSVLGetEndpoints,
+ sizeof(__be32) * 2 + sizeof(*uuid),
+ sizeof(struct in6_addr) + sizeof(__be32) * 3);
+ if (!call)
+ return ERR_PTR(-ENOMEM);
+
+ call->key = key;
+ call->reply[0] = NULL;
+ call->ret_reply0 = true;
+
+ /* Marshall the parameters */
+ bp = call->request;
+ *bp++ = htonl(YVLGETENDPOINTS);
+ *bp++ = htonl(YFS_SERVER_UUID);
+ memcpy(bp, uuid, sizeof(*uuid)); /* Type opr_uuid */
+
+ return (struct afs_addr_list *)afs_make_call(ac, call, GFP_KERNEL, false);
+}
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 2295dd4f9b15..684c48293353 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -124,6 +124,22 @@ static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell,
return ERR_PTR(ret);
while (afs_iterate_addresses(&ac)) {
+ if (!test_bit(ac.index, &ac.alist->probed)) {
+ ret = afs_vl_get_capabilities(cell->net, &ac, key);
+ switch (ret) {
+ case VL_SERVICE:
+ clear_bit(ac.index, &ac.alist->yfs);
+ set_bit(ac.index, &ac.alist->probed);
+ ac.addr->srx_service = ret;
+ break;
+ case YFS_VL_SERVICE:
+ set_bit(ac.index, &ac.alist->yfs);
+ set_bit(ac.index, &ac.alist->probed);
+ ac.addr->srx_service = ret;
+ break;
+ }
+ }
+
vldb = afs_vl_get_entry_by_name_u(cell->net, &ac, key,
volname, volnamesz);
switch (ac.error) {