From 0189197f441602acdca3f97750d392a895b778fd Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 3 Mar 2015 19:10:47 -0600 Subject: mpls: Basic routing support This change adds a new Kconfig option MPLS_ROUTING. The core of this change is the code to look at an mpls packet received from another machine. Look that packet up in a routing table and forward the packet on. Support of MPLS over ATM is not considered or attempted here. This implemntation follows RFC3032 and implements the MPLS shim header that can pass over essentially any network. What RFC3021 refers to as the as the Incoming Label Map (ILM) I call net->mpls.platform_label[]. What RFC3031 refers to as the Next Label Hop Forwarding Entry (NHLFE) I call mpls_route. Though calling it the label fordwarding information base (lfib) might also be valid. Further the implemntation forwards packets as described in RFC3032. There is no need and given the original motivation for MPLS a strong discincentive to have a flexible label forwarding path. In essence the logic is the topmost label is read, looked up, removed, and replaced by 0 or more new lables and the sent out the specified interface to it's next hop. Quite a few optional features are not implemented here. Among them are generation of ICMP errors when the TTL is exceeded or the packet is larger than the next hop MTU (those conditions are detected and the packets are dropped instead of generating an icmp error). The traffic class field is always set to 0. The implementation focuses on IP over MPLS and does not handle egress of other kinds of protocols. Instead of implementing coordination with the neighbour table and sorting out how to input next hops in a different address family (for which there is value). I was lazy and implemented a next hop mac address instead. The code is simpler and there are flavor of MPLS such as MPLS-TP where neither an IPv4 nor an IPv6 next hop is appropriate so a next hop by mac address would need to be implemented at some point. Two new definitions AF_MPLS and PF_MPLS are exposed to userspace. Decoding the mpls header must be done by first byeswapping a 32bit bit endian word into the local cpu endian and then bit shifting to extract the pieces. There is no C bit-field that can represent a wire format mpls header on a little endian machine as the low bits of the 20bit label wind up in the wrong half of third byte. Therefore internally everything is deal with in cpu native byte order except when writing to and reading from a packet. For management simplicity if a label is configured to forward out an interface that is down the packet is dropped early. Similarly if an network interface is removed rt_dev is updated to NULL (so no reference is preserved) and any packets for that label are dropped. Keeping the label entries in the kernel allows the kernel label table to function as the definitive source of which labels are allocated and which are not. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/linux/socket.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/socket.h') diff --git a/include/linux/socket.h b/include/linux/socket.h index 5c19cba34dce..fab4d0ddf4ed 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -181,6 +181,7 @@ struct ucred { #define AF_WANPIPE 25 /* Wanpipe API Sockets */ #define AF_LLC 26 /* Linux LLC */ #define AF_IB 27 /* Native InfiniBand address */ +#define AF_MPLS 28 /* MPLS */ #define AF_CAN 29 /* Controller Area Network */ #define AF_TIPC 30 /* TIPC sockets */ #define AF_BLUETOOTH 31 /* Bluetooth sockets */ @@ -226,6 +227,7 @@ struct ucred { #define PF_WANPIPE AF_WANPIPE #define PF_LLC AF_LLC #define PF_IB AF_IB +#define PF_MPLS AF_MPLS #define PF_CAN AF_CAN #define PF_TIPC AF_TIPC #define PF_BLUETOOTH AF_BLUETOOTH -- cgit v1.3-6-gb490 From 0345f93138b2224e0d7ce91fcffdb3dd23f364d7 Mon Sep 17 00:00:00 2001 From: "tadeusz.struk@intel.com" Date: Thu, 19 Mar 2015 12:31:25 -0700 Subject: net: socket: add support for async operations Add support for async operations. Signed-off-by: Tadeusz Struk Signed-off-by: David S. Miller --- include/linux/socket.h | 1 + net/compat.c | 2 ++ net/socket.c | 8 ++++++-- 3 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux/socket.h') diff --git a/include/linux/socket.h b/include/linux/socket.h index fab4d0ddf4ed..c9852ef7e317 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -51,6 +51,7 @@ struct msghdr { void *msg_control; /* ancillary data */ __kernel_size_t msg_controllen; /* ancillary data buffer length */ unsigned int msg_flags; /* flags on received message */ + struct kiocb *msg_iocb; /* ptr to iocb for async requests */ }; struct user_msghdr { diff --git a/net/compat.c b/net/compat.c index 13c0c9a25cd9..c4b6b0f43d5d 100644 --- a/net/compat.c +++ b/net/compat.c @@ -79,6 +79,8 @@ ssize_t get_compat_msghdr(struct msghdr *kmsg, if (nr_segs > UIO_MAXIOV) return -EMSGSIZE; + kmsg->msg_iocb = NULL; + err = compat_rw_copy_check_uvector(save_addr ? READ : WRITE, compat_ptr(uiov), nr_segs, UIO_FASTIOV, *iov, iov); diff --git a/net/socket.c b/net/socket.c index 3e776776f42c..073809f4125f 100644 --- a/net/socket.c +++ b/net/socket.c @@ -798,7 +798,8 @@ static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; struct socket *sock = file->private_data; - struct msghdr msg = {.msg_iter = *to}; + struct msghdr msg = {.msg_iter = *to, + .msg_iocb = iocb}; ssize_t res; if (file->f_flags & O_NONBLOCK) @@ -819,7 +820,8 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct socket *sock = file->private_data; - struct msghdr msg = {.msg_iter = *from}; + struct msghdr msg = {.msg_iter = *from, + .msg_iocb = iocb}; ssize_t res; if (iocb->ki_pos != 0) @@ -1894,6 +1896,8 @@ static ssize_t copy_msghdr_from_user(struct msghdr *kmsg, if (nr_segs > UIO_MAXIOV) return -EMSGSIZE; + kmsg->msg_iocb = NULL; + err = rw_copy_check_uvector(save_addr ? READ : WRITE, uiov, nr_segs, UIO_FASTIOV, *iov, iov); -- cgit v1.3-6-gb490 From 01e97e6517053d7c0b9af5248e944a9209909cf5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 15 Dec 2014 21:39:31 -0500 Subject: new helper: msg_data_left() convert open-coded instances Signed-off-by: Al Viro --- crypto/algif_hash.c | 4 ++-- crypto/algif_skcipher.c | 4 ++-- drivers/vhost/net.c | 4 ++-- include/linux/socket.h | 5 +++++ net/core/datagram.c | 2 +- net/ipv4/tcp.c | 8 ++++---- net/rxrpc/ar-output.c | 19 +++++++++---------- net/socket.c | 4 ++-- 8 files changed, 27 insertions(+), 23 deletions(-) (limited to 'include/linux/socket.h') diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index 0a465e0f3012..1396ad0787fc 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -56,8 +56,8 @@ static int hash_sendmsg(struct socket *sock, struct msghdr *msg, ctx->more = 0; - while (iov_iter_count(&msg->msg_iter)) { - int len = iov_iter_count(&msg->msg_iter); + while (msg_data_left(msg)) { + int len = msg_data_left(msg); if (len > limit) len = limit; diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 8f903b6df299..945075292bc9 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -641,7 +641,7 @@ static int skcipher_recvmsg_sync(struct socket *sock, struct msghdr *msg, long copied = 0; lock_sock(sk); - while (iov_iter_count(&msg->msg_iter)) { + while (msg_data_left(msg)) { sgl = list_first_entry(&ctx->tsgl, struct skcipher_sg_list, list); sg = sgl->sg; @@ -655,7 +655,7 @@ static int skcipher_recvmsg_sync(struct socket *sock, struct msghdr *msg, goto unlock; } - used = min_t(unsigned long, ctx->used, iov_iter_count(&msg->msg_iter)); + used = min_t(unsigned long, ctx->used, msg_data_left(msg)); used = af_alg_make_sg(&ctx->rsgl, &msg->msg_iter, used); err = used; diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 18f05bff8826..7d137a43cc86 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -357,13 +357,13 @@ static void handle_tx(struct vhost_net *net) iov_iter_init(&msg.msg_iter, WRITE, vq->iov, out, len); iov_iter_advance(&msg.msg_iter, hdr_size); /* Sanity check */ - if (!iov_iter_count(&msg.msg_iter)) { + if (!msg_data_left(&msg)) { vq_err(vq, "Unexpected header len for TX: " "%zd expected %zd\n", len, hdr_size); break; } - len = iov_iter_count(&msg.msg_iter); + len = msg_data_left(&msg); zcopy_used = zcopy && len >= VHOST_GOODCOPY_LEN && (nvq->upend_idx + 1) % UIO_MAXIOV != diff --git a/include/linux/socket.h b/include/linux/socket.h index c9852ef7e317..5bf59c8493b7 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -139,6 +139,11 @@ static inline struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr return __cmsg_nxthdr(__msg->msg_control, __msg->msg_controllen, __cmsg); } +static inline size_t msg_data_left(struct msghdr *msg) +{ + return iov_iter_count(&msg->msg_iter); +} + /* "Socket"-level control message types: */ #define SCM_RIGHTS 0x01 /* rw: access rights (array of int) */ diff --git a/net/core/datagram.c b/net/core/datagram.c index df493d68330c..b80fb91bb3f7 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -673,7 +673,7 @@ int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, if (!chunk) return 0; - if (iov_iter_count(&msg->msg_iter) < chunk) { + if (msg_data_left(msg) < chunk) { if (__skb_checksum_complete(skb)) goto csum_error; if (skb_copy_datagram_msg(skb, hlen, msg, chunk)) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 094a6822c71d..18e3a12eb1b2 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1119,7 +1119,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) sg = !!(sk->sk_route_caps & NETIF_F_SG); - while (iov_iter_count(&msg->msg_iter)) { + while (msg_data_left(msg)) { int copy = 0; int max = size_goal; @@ -1163,8 +1163,8 @@ new_segment: } /* Try to append data to the end of skb. */ - if (copy > iov_iter_count(&msg->msg_iter)) - copy = iov_iter_count(&msg->msg_iter); + if (copy > msg_data_left(msg)) + copy = msg_data_left(msg); /* Where to copy to? */ if (skb_availroom(skb) > 0) { @@ -1221,7 +1221,7 @@ new_segment: tcp_skb_pcount_set(skb, 0); copied += copy; - if (!iov_iter_count(&msg->msg_iter)) { + if (!msg_data_left(msg)) { tcp_tx_timestamp(sk, skb); goto out; } diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c index 7a31a3958364..c0042807bfc6 100644 --- a/net/rxrpc/ar-output.c +++ b/net/rxrpc/ar-output.c @@ -564,8 +564,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, max &= ~(call->conn->size_align - 1UL); chunk = max; - if (chunk > iov_iter_count(&msg->msg_iter) && !more) - chunk = iov_iter_count(&msg->msg_iter); + if (chunk > msg_data_left(msg) && !more) + chunk = msg_data_left(msg); space = chunk + call->conn->size_align; space &= ~(call->conn->size_align - 1UL); @@ -608,11 +608,11 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, sp = rxrpc_skb(skb); /* append next segment of data to the current buffer */ - if (iov_iter_count(&msg->msg_iter) > 0) { + if (msg_data_left(msg) > 0) { int copy = skb_tailroom(skb); ASSERTCMP(copy, >, 0); - if (copy > iov_iter_count(&msg->msg_iter)) - copy = iov_iter_count(&msg->msg_iter); + if (copy > msg_data_left(msg)) + copy = msg_data_left(msg); if (copy > sp->remain) copy = sp->remain; @@ -633,7 +633,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, /* add the packet to the send queue if it's now full */ if (sp->remain <= 0 || - (iov_iter_count(&msg->msg_iter) == 0 && !more)) { + (msg_data_left(msg) == 0 && !more)) { struct rxrpc_connection *conn = call->conn; uint32_t seq; size_t pad; @@ -663,7 +663,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, sp->hdr.serviceId = conn->service_id; sp->hdr.flags = conn->out_clientflag; - if (iov_iter_count(&msg->msg_iter) == 0 && !more) + if (msg_data_left(msg) == 0 && !more) sp->hdr.flags |= RXRPC_LAST_PACKET; else if (CIRC_SPACE(call->acks_head, call->acks_tail, call->acks_winsz) > 1) @@ -679,11 +679,10 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, memcpy(skb->head, &sp->hdr, sizeof(struct rxrpc_header)); - rxrpc_queue_packet(call, skb, - iov_iter_count(&msg->msg_iter) == 0 && !more); + rxrpc_queue_packet(call, skb, !msg_data_left(msg) && !more); skb = NULL; } - } while (iov_iter_count(&msg->msg_iter) > 0); + } while (msg_data_left(msg) > 0); success: ret = copied; diff --git a/net/socket.c b/net/socket.c index 21676e469b13..5b0126234606 100644 --- a/net/socket.c +++ b/net/socket.c @@ -612,7 +612,7 @@ EXPORT_SYMBOL(__sock_tx_timestamp); static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg) { - int ret = sock->ops->sendmsg(sock, msg, iov_iter_count(&msg->msg_iter)); + int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg)); BUG_ON(ret == -EIOCBQUEUED); return ret; } @@ -620,7 +620,7 @@ static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg) int sock_sendmsg(struct socket *sock, struct msghdr *msg) { int err = security_socket_sendmsg(sock, msg, - iov_iter_count(&msg->msg_iter)); + msg_data_left(msg)); return err ?: sock_sendmsg_nosec(sock, msg); } -- cgit v1.3-6-gb490