diff options
-rw-r--r-- | drivers/net/Kconfig | 4 | ||||
-rw-r--r-- | drivers/net/geneve.c | 736 | ||||
-rw-r--r-- | drivers/net/vxlan.c | 29 | ||||
-rw-r--r-- | include/net/dst_metadata.h | 61 | ||||
-rw-r--r-- | include/net/geneve.h | 35 | ||||
-rw-r--r-- | include/net/udp_tunnel.h | 4 | ||||
-rw-r--r-- | include/uapi/linux/if_link.h | 2 | ||||
-rw-r--r-- | net/ipv4/Kconfig | 14 | ||||
-rw-r--r-- | net/ipv4/Makefile | 1 | ||||
-rw-r--r-- | net/ipv4/geneve_core.c | 447 | ||||
-rw-r--r-- | net/ipv4/ip_gre.c | 21 | ||||
-rw-r--r-- | net/ipv4/udp_tunnel.c | 25 | ||||
-rw-r--r-- | net/openvswitch/Kconfig | 2 | ||||
-rw-r--r-- | net/openvswitch/vport-geneve.c | 179 |
14 files changed, 767 insertions, 793 deletions
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 770483b31d62..d18eb607bee6 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -180,8 +180,8 @@ config VXLAN will be called vxlan. config GENEVE - tristate "Generic Network Virtualization Encapsulation netdev" - depends on INET && GENEVE_CORE + tristate "Generic Network Virtualization Encapsulation" + depends on INET && NET_UDP_TUNNEL select NET_IP_TUNNEL ---help--- This allows one to create geneve virtual interfaces that provide diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 897e1a3f035b..4357bae732d7 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -15,8 +15,10 @@ #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/hash.h> +#include <net/dst_metadata.h> #include <net/rtnetlink.h> #include <net/geneve.h> +#include <net/protocol.h> #define GENEVE_NETDEV_VER "0.6" @@ -32,12 +34,17 @@ static bool log_ecn_error = true; module_param(log_ecn_error, bool, 0644); MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); +#define GENEVE_VER 0 +#define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr)) + /* per-network namespace private data for this module */ struct geneve_net { - struct list_head geneve_list; - struct hlist_head vni_list[VNI_HASH_SIZE]; + struct list_head geneve_list; + struct list_head sock_list; }; +static int geneve_net_id; + /* Pseudo network device */ struct geneve_dev { struct hlist_node hlist; /* vni hash table */ @@ -49,9 +56,19 @@ struct geneve_dev { u8 tos; /* TOS override */ struct sockaddr_in remote; /* IPv4 address for link partner */ struct list_head next; /* geneve's per namespace list */ + __be16 dst_port; + bool collect_md; }; -static int geneve_net_id; +struct geneve_sock { + bool collect_md; + struct list_head list; + struct socket *sock; + struct rcu_head rcu; + int refcnt; + struct udp_offload udp_offloads; + struct hlist_head vni_list[VNI_HASH_SIZE]; +}; static inline __u32 geneve_net_vni_hash(u8 vni[3]) { @@ -61,46 +78,101 @@ static inline __u32 geneve_net_vni_hash(u8 vni[3]) return hash_32(vnid, VNI_HASH_BITS); } +static __be64 vni_to_tunnel_id(const __u8 *vni) +{ +#ifdef __BIG_ENDIAN + return (vni[0] << 16) | (vni[1] << 8) | vni[2]; +#else + return (__force __be64)(((__force u64)vni[0] << 40) | + ((__force u64)vni[1] << 48) | + ((__force u64)vni[2] << 56)); +#endif +} + +static struct geneve_dev *geneve_lookup(struct geneve_sock *gs, + __be32 addr, u8 vni[]) +{ + struct hlist_head *vni_list_head; + struct geneve_dev *geneve; + __u32 hash; + + /* Find the device for this VNI */ + hash = geneve_net_vni_hash(vni); + vni_list_head = &gs->vni_list[hash]; + hlist_for_each_entry_rcu(geneve, vni_list_head, hlist) { + if (!memcmp(vni, geneve->vni, sizeof(geneve->vni)) && + addr == geneve->remote.sin_addr.s_addr) + return geneve; + } + return NULL; +} + +static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) +{ + return (struct genevehdr *)(udp_hdr(skb) + 1); +} + /* geneve receive/decap routine */ static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb) { struct genevehdr *gnvh = geneve_hdr(skb); - struct geneve_dev *dummy, *geneve = NULL; - struct geneve_net *gn; - struct iphdr *iph = NULL; + struct metadata_dst *tun_dst = NULL; + struct geneve_dev *geneve = NULL; struct pcpu_sw_netstats *stats; - struct hlist_head *vni_list_head; - int err = 0; - __u32 hash; - - iph = ip_hdr(skb); /* Still outer IP header... */ + struct iphdr *iph; + u8 *vni; + __be32 addr; + int err; - gn = gs->rcv_data; + if (gs->collect_md) { + static u8 zero_vni[3]; - /* Find the device for this VNI */ - hash = geneve_net_vni_hash(gnvh->vni); - vni_list_head = &gn->vni_list[hash]; - hlist_for_each_entry_rcu(dummy, vni_list_head, hlist) { - if (!memcmp(gnvh->vni, dummy->vni, sizeof(dummy->vni)) && - iph->saddr == dummy->remote.sin_addr.s_addr) { - geneve = dummy; - break; - } + vni = zero_vni; + addr = 0; + } else { + vni = gnvh->vni; + iph = ip_hdr(skb); /* Still outer IP header... */ + addr = iph->saddr; } + + geneve = geneve_lookup(gs, addr, vni); if (!geneve) goto drop; - /* Drop packets w/ critical options, - * since we don't support any... - */ - if (gnvh->critical) - goto drop; + if (ip_tunnel_collect_metadata() || gs->collect_md) { + __be16 flags; + void *opts; + + flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT | + (gnvh->oam ? TUNNEL_OAM : 0) | + (gnvh->critical ? TUNNEL_CRIT_OPT : 0); + + tun_dst = udp_tun_rx_dst(skb, AF_INET, flags, + vni_to_tunnel_id(gnvh->vni), + gnvh->opt_len * 4); + if (!tun_dst) + goto drop; + + /* Update tunnel dst according to Geneve options. */ + opts = ip_tunnel_info_opts(&tun_dst->u.tun_info, + gnvh->opt_len * 4); + memcpy(opts, gnvh->options, gnvh->opt_len * 4); + } else { + /* Drop packets w/ critical options, + * since we don't support any... + */ + if (gnvh->critical) + goto drop; + } skb_reset_mac_header(skb); skb_scrub_packet(skb, !net_eq(geneve->net, dev_net(geneve->dev))); skb->protocol = eth_type_trans(skb, geneve->dev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); + if (tun_dst) + skb_dst_set(skb, &tun_dst->dst); + /* Ignore packet loops (and multicast echo) */ if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr)) goto drop; @@ -128,7 +200,6 @@ static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb) u64_stats_update_end(&stats->syncp); netif_rx(skb); - return; drop: /* Consume bad packet */ @@ -141,7 +212,6 @@ static int geneve_init(struct net_device *dev) dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; - return 0; } @@ -150,20 +220,281 @@ static void geneve_uninit(struct net_device *dev) free_percpu(dev->tstats); } +/* Callback from net/ipv4/udp.c to receive packets */ +static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb) +{ + struct genevehdr *geneveh; + struct geneve_sock *gs; + int opts_len; + + /* Need Geneve and inner Ethernet header to be present */ + if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN))) + goto error; + + /* Return packets with reserved bits set */ + geneveh = geneve_hdr(skb); + if (unlikely(geneveh->ver != GENEVE_VER)) + goto error; + + if (unlikely(geneveh->proto_type != htons(ETH_P_TEB))) + goto error; + + opts_len = geneveh->opt_len * 4; + if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len, + htons(ETH_P_TEB))) + goto drop; + + gs = rcu_dereference_sk_user_data(sk); + if (!gs) + goto drop; + + geneve_rx(gs, skb); + return 0; + +drop: + /* Consume bad packet */ + kfree_skb(skb); + return 0; + +error: + /* Let the UDP layer deal with the skb */ + return 1; +} + +static struct socket *geneve_create_sock(struct net *net, bool ipv6, + __be16 port) +{ + struct socket *sock; + struct udp_port_cfg udp_conf; + int err; + + memset(&udp_conf, 0, sizeof(udp_conf)); + + if (ipv6) { + udp_conf.family = AF_INET6; + } else { + udp_conf.family = AF_INET; + udp_conf.local_ip.s_addr = htonl(INADDR_ANY); + } + + udp_conf.local_udp_port = port; + + /* Open UDP socket */ + err = udp_sock_create(net, &udp_conf, &sock); + if (err < 0) + return ERR_PTR(err); + + return sock; +} + +static void geneve_notify_add_rx_port(struct geneve_sock *gs) +{ + struct sock *sk = gs->sock->sk; + sa_family_t sa_family = sk->sk_family; + int err; + + if (sa_family == AF_INET) { + err = udp_add_offload(&gs->udp_offloads); + if (err) + pr_warn("geneve: udp_add_offload failed with status %d\n", + err); + } +} + +static int geneve_hlen(struct genevehdr *gh) +{ + return sizeof(*gh) + gh->opt_len * 4; +} + +static struct sk_buff **geneve_gro_receive(struct sk_buff **head, + struct sk_buff *skb, + struct udp_offload *uoff) +{ + struct sk_buff *p, **pp = NULL; + struct genevehdr *gh, *gh2; + unsigned int hlen, gh_len, off_gnv; + const struct packet_offload *ptype; + __be16 type; + int flush = 1; + + off_gnv = skb_gro_offset(skb); + hlen = off_gnv + sizeof(*gh); + gh = skb_gro_header_fast(skb, off_gnv); + if (skb_gro_header_hard(skb, hlen)) { + gh = skb_gro_header_slow(skb, hlen, off_gnv); + if (unlikely(!gh)) + goto out; + } + + if (gh->ver != GENEVE_VER || gh->oam) + goto out; + gh_len = geneve_hlen(gh); + + hlen = off_gnv + gh_len; + if (skb_gro_header_hard(skb, hlen)) { + gh = skb_gro_header_slow(skb, hlen, off_gnv); + if (unlikely(!gh)) + goto out; + } + + flush = 0; + + for (p = *head; p; p = p->next) { + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + gh2 = (struct genevehdr *)(p->data + off_gnv); + if (gh->opt_len != gh2->opt_len || + memcmp(gh, gh2, gh_len)) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + } + + type = gh->proto_type; + + rcu_read_lock(); + ptype = gro_find_receive_by_type(type); + if (!ptype) { + flush = 1; + goto out_unlock; + } + + skb_gro_pull(skb, gh_len); + skb_gro_postpull_rcsum(skb, gh, gh_len); + pp = ptype->callbacks.gro_receive(head, skb); + +out_unlock: + rcu_read_unlock(); +out: + NAPI_GRO_CB(skb)->flush |= flush; + + return pp; +} + +static int geneve_gro_complete(struct sk_buff *skb, int nhoff, + struct udp_offload *uoff) +{ + struct genevehdr *gh; + struct packet_offload *ptype; + __be16 type; + int gh_len; + int err = -ENOSYS; + + udp_tunnel_gro_complete(skb, nhoff); + + gh = (struct genevehdr *)(skb->data + nhoff); + gh_len = geneve_hlen(gh); + type = gh->proto_type; + + rcu_read_lock(); + ptype = gro_find_complete_by_type(type); + if (ptype) + err = ptype->callbacks.gro_complete(skb, nhoff + gh_len); + + rcu_read_unlock(); + return err; +} + +/* Create new listen socket if needed */ +static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, + bool ipv6) +{ + struct geneve_net *gn = net_generic(net, geneve_net_id); + struct geneve_sock *gs; + struct socket *sock; + struct udp_tunnel_sock_cfg tunnel_cfg; + int h; + + gs = kzalloc(sizeof(*gs), GFP_KERNEL); + if (!gs) + return ERR_PTR(-ENOMEM); + + sock = geneve_create_sock(net, ipv6, port); + if (IS_ERR(sock)) { + kfree(gs); + return ERR_CAST(sock); + } + + gs->sock = sock; + gs->refcnt = 1; + for (h = 0; h < VNI_HASH_SIZE; ++h) + INIT_HLIST_HEAD(&gs->vni_list[h]); + + /* Initialize the geneve udp offloads structure */ + gs->udp_offloads.port = port; + gs->udp_offloads.callbacks.gro_receive = geneve_gro_receive; + gs->udp_offloads.callbacks.gro_complete = geneve_gro_complete; + geneve_notify_add_rx_port(gs); + + /* Mark socket as an encapsulation socket */ + tunnel_cfg.sk_user_data = gs; + tunnel_cfg.encap_type = 1; + tunnel_cfg.encap_rcv = geneve_udp_encap_recv; + tunnel_cfg.encap_destroy = NULL; + setup_udp_tunnel_sock(net, sock, &tunnel_cfg); + list_add(&gs->list, &gn->sock_list); + return gs; +} + +static void geneve_notify_del_rx_port(struct geneve_sock *gs) +{ + struct sock *sk = gs->sock->sk; + sa_family_t sa_family = sk->sk_family; + + if (sa_family == AF_INET) + udp_del_offload(&gs->udp_offloads); +} + +static void geneve_sock_release(struct geneve_sock *gs) +{ + if (--gs->refcnt) + return; + + list_del(&gs->list); + geneve_notify_del_rx_port(gs); + udp_tunnel_sock_release(gs->sock); + kfree_rcu(gs, rcu); +} + +static struct geneve_sock *geneve_find_sock(struct geneve_net *gn, + __be16 dst_port) +{ + struct geneve_sock *gs; + + list_for_each_entry(gs, &gn->sock_list, list) { + if (inet_sk(gs->sock->sk)->inet_sport == dst_port && + inet_sk(gs->sock->sk)->sk.sk_family == AF_INET) { + return gs; + } + } + return NULL; +} + static int geneve_open(struct net_device *dev) { struct geneve_dev *geneve = netdev_priv(dev); struct net *net = geneve->net; - struct geneve_net *gn = net_generic(geneve->net, geneve_net_id); + struct geneve_net *gn = net_generic(net, geneve_net_id); struct geneve_sock *gs; + __u32 hash; + + gs = geneve_find_sock(gn, geneve->dst_port); + if (gs) { + gs->refcnt++; + goto out; + } - gs = geneve_sock_add(net, htons(GENEVE_UDP_PORT), geneve_rx, gn, - false, false); + gs = geneve_socket_create(net, geneve->dst_port, false); if (IS_ERR(gs)) return PTR_ERR(gs); +out: + gs->collect_md = geneve->collect_md; geneve->sock = gs; + hash = geneve_net_vni_hash(geneve->vni); + hlist_add_head_rcu(&geneve->hlist, &gs->vni_list[hash]); return 0; } @@ -172,74 +503,189 @@ static int geneve_stop(struct net_device *dev) struct geneve_dev *geneve = netdev_priv(dev); struct geneve_sock *gs = geneve->sock; + if (!hlist_unhashed(&geneve->hlist)) + hlist_del_rcu(&geneve->hlist); geneve_sock_release(gs); - return 0; } -static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev) +static int geneve_build_skb(struct rtable *rt, struct sk_buff *skb, + __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt, + bool csum) { - struct geneve_dev *geneve = netdev_priv(dev); - struct geneve_sock *gs = geneve->sock; - struct rtable *rt = NULL; - const struct iphdr *iip; /* interior IP header */ - struct flowi4 fl4; + struct genevehdr *gnvh; + int min_headroom; int err; - __be16 sport; - __u8 tos, ttl; - iip = ip_hdr(skb); + min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len + + GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr); + err = skb_cow_head(skb, min_headroom); + if (unlikely(err)) { + kfree_skb(skb); + goto free_rt; + } - skb_reset_mac_header(skb); + skb = udp_tunnel_handle_offloads(skb, csum); + if (IS_ERR(skb)) { + err = PTR_ERR(skb); + goto free_rt; + } + + gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len); + gnvh->ver = GENEVE_VER; + gnvh->opt_len = opt_len / 4; + gnvh->oam = !!(tun_flags & TUNNEL_OAM); + gnvh->critical = !!(tun_flags & TUNNEL_CRIT_OPT); + gnvh->rsvd1 = 0; + memcpy(gnvh->vni, vni, 3); + gnvh->proto_type = htons(ETH_P_TEB); + gnvh->rsvd2 = 0; + memcpy(gnvh->options, opt, opt_len); + + skb_set_inner_protocol(skb, htons(ETH_P_TEB)); + return 0; - /* TODO: port min/max limits should be configurable */ - sport = udp_flow_src_port(dev_net(dev), skb, 0, 0, true); +free_rt: + ip_rt_put(rt); + return err; +} + +static struct rtable *geneve_get_rt(struct sk_buff *skb, + struct net_device *dev, + struct flowi4 *fl4, + struct ip_tunnel_info *info) +{ + struct geneve_dev *geneve = netdev_priv(dev); + struct rtable *rt = NULL; + __u8 tos; + + memset(fl4, 0, sizeof(*fl4)); + fl4->flowi4_mark = skb->mark; + fl4->flowi4_proto = IPPROTO_UDP; + + if (info) { + fl4->daddr = info->key.u.ipv4.dst; + fl4->saddr = info->key.u.ipv4.src; + fl4->flowi4_tos = RT_TOS(info->key.tos); + } else { + tos = geneve->tos; + if (tos == 1) { + const struct iphdr *iip = ip_hdr(skb); + + tos = ip_tunnel_get_dsfield(iip, skb); + } - tos = geneve->tos; - if (tos == 1) - tos = ip_tunnel_get_dsfield(iip, skb); + fl4->flowi4_tos = RT_TOS(tos); + fl4->daddr = geneve->remote.sin_addr.s_addr; + } - memset(&fl4, 0, sizeof(fl4)); - fl4.flowi4_tos = RT_TOS(tos); - fl4.daddr = geneve->remote.sin_addr.s_addr; - rt = ip_route_output_key(geneve->net, &fl4); + rt = ip_route_output_key(geneve->net, fl4); if (IS_ERR(rt)) { - netdev_dbg(dev, "no route to %pI4\n", &fl4.daddr); + netdev_dbg(dev, "no route to %pI4\n", &fl4->daddr); dev->stats.tx_carrier_errors++; - goto tx_error; + return rt; } if (rt->dst.dev == dev) { /* is this necessary? */ - netdev_dbg(dev, "circular route to %pI4\n", &fl4.daddr); + netdev_dbg(dev, "circular route to %pI4\n", &fl4->daddr); dev->stats.collisions++; - goto rt_tx_error; + ip_rt_put(rt); + return ERR_PTR(-EINVAL); } + return rt; +} - tos = ip_tunnel_ecn_encap(tos, iip, skb); +/* Convert 64 bit tunnel ID to 24 bit VNI. */ +static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni) +{ +#ifdef __BIG_ENDIAN + vni[0] = (__force __u8)(tun_id >> 16); + vni[1] = (__force __u8)(tun_id >> 8); + vni[2] = (__force __u8)tun_id; +#else + vni[0] = (__force __u8)((__force u64)tun_id >> 40); + vni[1] = (__force __u8)((__force u64)tun_id >> 48); + vni[2] = (__force __u8)((__force u64)tun_id >> 56); +#endif +} - ttl = geneve->ttl; - if (!ttl && IN_MULTICAST(ntohl(fl4.daddr))) - ttl = 1; +static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct geneve_dev *geneve = netdev_priv(dev); + struct geneve_sock *gs = geneve->sock; + struct ip_tunnel_info *info = NULL; + struct rtable *rt = NULL; + struct flowi4 fl4; + __u8 tos, ttl; + __be16 sport; + bool udp_csum; + __be16 df; + int err; - ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); + if (geneve->collect_md) { + info = skb_tunnel_info(skb); + if (unlikely(info && info->mode != IP_TUNNEL_INFO_TX)) { + netdev_dbg(dev, "no tunnel metadata\n"); + goto tx_error; + } + } - /* no need to handle local destination and encap bypass...yet... */ + rt = geneve_get_rt(skb, dev, &fl4, info); + if (IS_ERR(rt)) { + netdev_dbg(dev, "no route to %pI4\n", &fl4.daddr); + dev->stats.tx_carrier_errors++; + goto tx_error; + } - err = geneve_xmit_skb(gs, rt, skb, fl4.saddr, fl4.daddr, - tos, ttl, 0, sport, htons(GENEVE_UDP_PORT), 0, - geneve->vni, 0, NULL, false, - !net_eq(geneve->net, dev_net(geneve->dev))); - if (err < 0) - ip_rt_put(rt); + sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); + skb_reset_mac_header(skb); - iptunnel_xmit_stats(err, &dev->stats, dev->tstats); + if (info) { + const struct ip_tunnel_key *key = &info->key; + u8 *opts = NULL; + u8 vni[3]; + + tunnel_id_to_vni(key->tun_id, vni); + if (key->tun_flags & TUNNEL_GENEVE_OPT) + opts = ip_tunnel_info_opts(info, info->options_len); + + udp_csum = !!(key->tun_flags & TUNNEL_CSUM); + err = geneve_build_skb(rt, skb, key->tun_flags, vni, + info->options_len, opts, udp_csum); + if (unlikely(err)) + goto err; + + tos = key->tos; + ttl = key->ttl; + df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; + } else { + const struct iphdr *iip; /* interior IP header */ + + udp_csum = false; + err = geneve_build_skb(rt, skb, 0, geneve->vni, + 0, NULL, udp_csum); + if (unlikely(err)) + goto err; + + iip = ip_hdr(skb); + tos = ip_tunnel_ecn_encap(fl4.flowi4_tos, iip, skb); + ttl = geneve->ttl; + if (!ttl && IN_MULTICAST(ntohl(fl4.daddr))) + ttl = 1; + ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); + df = 0; + } + err = udp_tunnel_xmit_skb(rt, gs->sock->sk, skb, fl4.saddr, fl4.daddr, + tos, ttl, df, sport, geneve->dst_port, + !net_eq(geneve->net, dev_net(geneve->dev)), + !udp_csum); + iptunnel_xmit_stats(err, &dev->stats, dev->tstats); return NETDEV_TX_OK; -rt_tx_error: - ip_rt_put(rt); tx_error: - dev->stats.tx_errors++; dev_kfree_skb(skb); +err: + dev->stats.tx_errors++; return NETDEV_TX_OK; } @@ -297,6 +743,7 @@ static void geneve_setup(struct net_device *dev) netif_keep_dst(dev); dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE; + eth_hw_addr_random(dev); } static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = { @@ -304,6 +751,8 @@ static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = { [IFLA_GENEVE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, [IFLA_GENEVE_TTL] = { .type = NLA_U8 }, [IFLA_GENEVE_TOS] = { .type = NLA_U8 }, + [IFLA_GENEVE_PORT] = { .type = NLA_U16 }, + [IFLA_GENEVE_COLLECT_METADATA] = { .type = NLA_FLAG }, }; static int geneve_validate(struct nlattr *tb[], struct nlattr *data[]) @@ -329,68 +778,117 @@ static int geneve_validate(struct nlattr *tb[], struct nlattr *data[]) return 0; } -static int geneve_newlink(struct net *net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]) +static struct geneve_dev *geneve_find_dev(struct geneve_net *gn, + __be16 dst_port, + __be32 rem_addr, + u8 vni[], + bool *tun_on_same_port, + bool *tun_collect_md) +{ + struct geneve_dev *geneve, *t; + + *tun_on_same_port = false; + *tun_collect_md = false; + t = NULL; + list_for_each_entry(geneve, &gn->geneve_list, next) { + if (geneve->dst_port == dst_port) { + *tun_collect_md = geneve->collect_md; + *tun_on_same_port = true; + } + if (!memcmp(vni, geneve->vni, sizeof(geneve->vni)) && + rem_addr == geneve->remote.sin_addr.s_addr && + dst_port == geneve->dst_port) + t = geneve; + } + return t; +} + +static int geneve_configure(struct net *net, struct net_device *dev, + __be32 rem_addr, __u32 vni, __u8 ttl, __u8 tos, + __u16 dst_port, bool metadata) { struct geneve_net *gn = net_generic(net, geneve_net_id); - struct geneve_dev *dummy, *geneve = netdev_priv(dev); - struct hlist_head *vni_list_head; - struct sockaddr_in remote; /* IPv4 address for link partner */ - __u32 vni, hash; + struct geneve_dev *t, *geneve = netdev_priv(dev); + bool tun_collect_md, tun_on_same_port; int err; - if (!data[IFLA_GENEVE_ID] || !data[IFLA_GENEVE_REMOTE]) - return -EINVAL; + if (metadata) { + if (rem_addr || vni || tos || ttl) + return -EINVAL; + } geneve->net = net; geneve->dev = dev; - vni = nla_get_u32(data[IFLA_GENEVE_ID]); geneve->vni[0] = (vni & 0x00ff0000) >> 16; geneve->vni[1] = (vni & 0x0000ff00) >> 8; geneve->vni[2] = vni & 0x000000ff; - geneve->remote.sin_addr.s_addr = - nla_get_in_addr(data[IFLA_GENEVE_REMOTE]); + geneve->remote.sin_addr.s_addr = rem_addr; if (IN_MULTICAST(ntohl(geneve->remote.sin_addr.s_addr))) return -EINVAL; - remote = geneve->remote; - hash = geneve_net_vni_hash(geneve->vni); - vni_list_head = &gn->vni_list[hash]; - hlist_for_each_entry_rcu(dummy, vni_list_head, hlist) { - if (!memcmp(geneve->vni, dummy->vni, sizeof(dummy->vni)) && - !memcmp(&remote, &dummy->remote, sizeof(dummy->remote))) - return -EBUSY; + geneve->ttl = ttl; + geneve->tos = tos; + geneve->dst_port = htons(dst_port); + geneve->collect_md = metadata; + + t = geneve_find_dev(gn, htons(dst_port), rem_addr, geneve->vni, + &tun_on_same_port, &tun_collect_md); + if (t) + return -EBUSY; + + if (metadata) { + if (tun_on_same_port) + return -EPERM; + } else { + if (tun_collect_md) + return -EPERM; } - if (tb[IFLA_ADDRESS] == NULL) - eth_hw_addr_random(dev); - err = register_netdevice(dev); if (err) return err; + list_add(&geneve->next, &gn->geneve_list); + return 0; +} + +static int geneve_newlink(struct net *net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + __u16 dst_port = GENEVE_UDP_PORT; + __u8 ttl = 0, tos = 0; + bool metadata = false; + __be32 rem_addr; + __u32 vni; + + if (!data[IFLA_GENEVE_ID] || !data[IFLA_GENEVE_REMOTE]) + return -EINVAL; + + vni = nla_get_u32(data[IFLA_GENEVE_ID]); + rem_addr = nla_get_in_addr(data[IFLA_GENEVE_REMOTE]); + if (data[IFLA_GENEVE_TTL]) - geneve->ttl = nla_get_u8(data[IFLA_GENEVE_TTL]); + ttl = nla_get_u8(data[IFLA_GENEVE_TTL]); if (data[IFLA_GENEVE_TOS]) - geneve->tos = nla_get_u8(data[IFLA_GENEVE_TOS]); + tos = nla_get_u8(data[IFLA_GENEVE_TOS]); - list_add(&geneve->next, &gn->geneve_list); + if (data[IFLA_GENEVE_PORT]) + dst_port = nla_get_u16(data[IFLA_GENEVE_PORT]); - hlist_add_head_rcu(&geneve->hlist, &gn->vni_list[hash]); + if (data[IFLA_GENEVE_COLLECT_METADATA]) + metadata = true; - return 0; + return geneve_configure(net, dev, rem_addr, vni, + ttl, tos, dst_port, metadata); } static void geneve_dellink(struct net_device *dev, struct list_head *head) { struct geneve_dev *geneve = netdev_priv(dev); - if (!hlist_unhashed(&geneve->hlist)) - hlist_del_rcu(&geneve->hlist); - list_del(&geneve->next); unregister_netdevice_queue(dev, head); } @@ -401,6 +899,8 @@ static size_t geneve_get_size(const struct net_device *dev) nla_total_size(sizeof(struct in_addr)) + /* IFLA_GENEVE_REMOTE */ nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL */ nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TOS */ + nla_total_size(sizeof(__u16)) + /* IFLA_GENEVE_PORT */ + nla_total_size(0) + /* IFLA_GENEVE_COLLECT_METADATA */ 0; } @@ -421,6 +921,14 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u8(skb, IFLA_GENEVE_TOS, geneve->tos)) goto nla_put_failure; + if (nla_put_u16(skb, IFLA_GENEVE_PORT, ntohs(geneve->dst_port))) + goto nla_put_failure; + + if (geneve->collect_md) { + if (nla_put_flag(skb, IFLA_GENEVE_COLLECT_METADATA)) + goto nla_put_failure; + } + return 0; nla_put_failure: @@ -440,16 +948,34 @@ static struct rtnl_link_ops geneve_link_ops __read_mostly = { .fill_info = geneve_fill_info, }; +struct net_device *geneve_dev_create_fb(struct net *net, const char *name, + u8 name_assign_type, u16 dst_port) +{ + struct nlattr *tb[IFLA_MAX + 1]; + struct net_device *dev; + int err; + + memset(tb, 0, sizeof(tb)); + dev = rtnl_create_link(net, name, name_assign_type, + &geneve_link_ops, tb); + if (IS_ERR(dev)) + return dev; + + err = geneve_configure(net, dev, 0, 0, 0, 0, dst_port, true); + if (err) { + free_netdev(dev); + return ERR_PTR(err); + } + return dev; +} +EXPORT_SYMBOL_GPL(geneve_dev_create_fb); + static __net_init int geneve_init_net(struct net *net) { struct geneve_net *gn = net_generic(net, geneve_net_id); - unsigned int h; INIT_LIST_HEAD(&gn->geneve_list); - - for (h = 0; h < VNI_HASH_SIZE; ++h) - INIT_HLIST_HEAD(&gn->vni_list[h]); - + INIT_LIST_HEAD(&gn->sock_list); return 0; } diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 61b457b9ec00..5b4cf66e632e 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1264,36 +1264,13 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) } if (vxlan_collect_metadata(vs)) { - tun_dst = metadata_dst_alloc(sizeof(*md), GFP_ATOMIC); + tun_dst = udp_tun_rx_dst(skb, vxlan_get_sk_family(vs), TUNNEL_KEY, + cpu_to_be64(vni >> 8), sizeof(*md)); + if (!tun_dst) goto drop; info = &tun_dst->u.tun_info; - if (vxlan_get_sk_family(vs) == AF_INET) { - const struct iphdr *iph = ip_hdr(skb); - - info->key.u.ipv4.src = iph->saddr; - info->key.u.ipv4.dst = iph->daddr; - info->key.tos = iph->tos; - info->key.ttl = iph->ttl; - } else { - const struct ipv6hdr *ip6h = ipv6_hdr(skb); - - info->key.u.ipv6.src = ip6h->saddr; - info->key.u.ipv6.dst = ip6h->daddr; - info->key.tos = ipv6_get_dsfield(ip6h); - info->key.ttl = ip6h->hop_limit; - } - - info->key.tp_src = udp_hdr(skb)->source; - info->key.tp_dst = udp_hdr(skb)->dest; - - info->mode = IP_TUNNEL_INFO_RX; - info->key.tun_flags = TUNNEL_KEY; - info->key.tun_id = cpu_to_be64(vni >> 8); - if (udp_hdr(skb)->check != 0) - info->key.tun_flags |= TUNNEL_CSUM; - md = ip_tunnel_info_opts(info, sizeof(*md)); } else { memset(md, 0, sizeof(*md)); diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 2cb52d562272..60c03326c087 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -48,4 +48,65 @@ static inline bool skb_valid_dst(const struct sk_buff *skb) struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags); struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags); +static inline struct metadata_dst *tun_rx_dst(__be16 flags, + __be64 tunnel_id, int md_size) +{ + struct metadata_dst *tun_dst; + struct ip_tunnel_info *info; + + tun_dst = metadata_dst_alloc(md_size, GFP_ATOMIC); + if (!tun_dst) + return NULL; + + info = &tun_dst->u.tun_info; + info->mode = IP_TUNNEL_INFO_RX; + info->key.tun_flags = flags; + info->key.tun_id = tunnel_id; + info->key.tp_src = 0; + info->key.tp_dst = 0; + return tun_dst; +} + +static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb, + __be16 flags, + __be64 tunnel_id, + int md_size) +{ + const struct iphdr *iph = ip_hdr(skb); + struct metadata_dst *tun_dst; + struct ip_tunnel_info *info; + + tun_dst = tun_rx_dst(flags, tunnel_id, md_size); + if (!tun_dst) + return NULL; + + info = &tun_dst->u.tun_info; + info->key.u.ipv4.src = iph->saddr; + info->key.u.ipv4.dst = iph->daddr; + info->key.tos = iph->tos; + info->key.ttl = iph->ttl; + return tun_dst; +} + +static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb, + __be16 flags, + __be64 tunnel_id, + int md_size) +{ + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + struct metadata_dst *tun_dst; + struct ip_tunnel_info *info; + + tun_dst = tun_rx_dst(flags, tunnel_id, md_size); + if (!tun_dst) + return NULL; + + info = &tun_dst->u.tun_info; + info->key.u.ipv6.src = ip6h->saddr; + info->key.u.ipv6.dst = ip6h->daddr; + info->key.tos = ipv6_get_dsfield(ip6h); + info->key.ttl = ip6h->hop_limit; + return tun_dst; +} + #endif /* __NET_DST_METADATA_H */ diff --git a/include/net/geneve.h b/include/net/geneve.h index 2a0543a1899d..3106ed6eae0d 100644 --- a/include/net/geneve.h +++ b/include/net/geneve.h @@ -62,40 +62,9 @@ struct genevehdr { struct geneve_opt options[]; }; -static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) -{ - return (struct genevehdr *)(udp_hdr(skb) + 1); -} - #ifdef CONFIG_INET -struct geneve_sock; - -typedef void (geneve_rcv_t)(struct geneve_sock *gs, struct sk_buff *skb); - -struct geneve_sock { - struct list_head list; - geneve_rcv_t *rcv; - void *rcv_data; - struct socket *sock; - struct rcu_head rcu; - int refcnt; - struct udp_offload udp_offloads; -}; - -#define GENEVE_VER 0 -#define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr)) - -struct geneve_sock *geneve_sock_add(struct net *net, __be16 port, - geneve_rcv_t *rcv, void *data, - bool no_share, bool ipv6); - -void geneve_sock_release(struct geneve_sock *vs); - -int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt, - struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, - __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, - __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt, - bool csum, bool xnet); +struct net_device *geneve_dev_create_fb(struct net *net, const char *name, + u8 name_assign_type, u16 dst_port); #endif /*ifdef CONFIG_INET */ #endif /*ifdef__NET_GENEVE_H */ diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index c491c1221606..35041d0fc21e 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -93,6 +93,10 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, void udp_tunnel_sock_release(struct socket *sock); +struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family, + __be16 flags, __be64 tunnel_id, + int md_size); + static inline struct sk_buff *udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum) { diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 2d13dd44ecaa..3a5f263cfc2f 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -410,6 +410,8 @@ enum { IFLA_GENEVE_REMOTE, IFLA_GENEVE_TTL, IFLA_GENEVE_TOS, + IFLA_GENEVE_PORT, /* destination port */ + IFLA_GENEVE_COLLECT_METADATA, __IFLA_GENEVE_MAX }; #define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1) diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 6fb3c90ad726..416dfa004cfb 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -331,20 +331,6 @@ config NET_FOU_IP_TUNNELS When this option is enabled IP tunnels can be configured to use FOU or GUE encapsulation. -config GENEVE_CORE - tristate "Generic Network Virtualization Encapsulation library" - depends on INET - select NET_UDP_TUNNEL - ---help--- - This allows one to create Geneve virtual interfaces that provide - Layer 2 Networks over Layer 3 Networks. Geneve is often used - to tunnel virtual network infrastructure in virtualized environments. - For more information see: - http://tools.ietf.org/html/draft-gross-geneve-01 - - To compile this driver as a module, choose M here: the module - - config INET_AH tristate "IP: AH transformation" select XFRM_ALGO diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index efc43f300b8c..89aacb630a53 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -57,7 +57,6 @@ obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o obj-$(CONFIG_NETLABEL) += cipso_ipv4.o -obj-$(CONFIG_GENEVE_CORE) += geneve_core.o obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ xfrm4_output.o xfrm4_protocol.o diff --git a/net/ipv4/geneve_core.c b/net/ipv4/geneve_core.c deleted file mode 100644 index 311a4ba6950a..000000000000 --- a/net/ipv4/geneve_core.c +++ /dev/null @@ -1,447 +0,0 @@ -/* - * Geneve: Generic Network Virtualization Encapsulation - * - * Copyright (c) 2014 Nicira, Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/module.h> -#include <linux/errno.h> -#include <linux/slab.h> -#include <linux/skbuff.h> -#include <linux/list.h> -#include <linux/netdevice.h> -#include <linux/in.h> -#include <linux/ip.h> -#include <linux/udp.h> -#include <linux/igmp.h> -#include <linux/etherdevice.h> -#include <linux/if_ether.h> -#include <linux/if_vlan.h> -#include <linux/ethtool.h> -#include <linux/mutex.h> -#include <net/arp.h> -#include <net/ndisc.h> -#include <net/ip.h> -#include <net/ip_tunnels.h> -#include <net/icmp.h> -#include <net/udp.h> -#include <net/rtnetlink.h> -#include <net/route.h> -#include <net/dsfield.h> -#include <net/inet_ecn.h> -#include <net/net_namespace.h> -#include <net/netns/generic.h> -#include <net/geneve.h> -#include <net/protocol.h> -#include <net/udp_tunnel.h> -#if IS_ENABLED(CONFIG_IPV6) -#include <net/ipv6.h> -#include <net/addrconf.h> -#include <net/ip6_tunnel.h> -#include <net/ip6_checksum.h> -#endif - -/* Protects sock_list and refcounts. */ -static DEFINE_MUTEX(geneve_mutex); - -/* per-network namespace private data for this module */ -struct geneve_net { - struct list_head sock_list; -}; - -static int geneve_net_id; - -static struct geneve_sock *geneve_find_sock(struct net *net, - sa_family_t family, __be16 port) -{ - struct geneve_net *gn = net_generic(net, geneve_net_id); - struct geneve_sock *gs; - - list_for_each_entry(gs, &gn->sock_list, list) { - if (inet_sk(gs->sock->sk)->inet_sport == port && - inet_sk(gs->sock->sk)->sk.sk_family == family) - return gs; - } - - return NULL; -} - -static void geneve_build_header(struct genevehdr *geneveh, - __be16 tun_flags, u8 vni[3], - u8 options_len, u8 *options) -{ - geneveh->ver = GENEVE_VER; - geneveh->opt_len = options_len / 4; - geneveh->oam = !!(tun_flags & TUNNEL_OAM); - geneveh->critical = !!(tun_flags & TUNNEL_CRIT_OPT); - geneveh->rsvd1 = 0; - memcpy(geneveh->vni, vni, 3); - geneveh->proto_type = htons(ETH_P_TEB); - geneveh->rsvd2 = 0; - - memcpy(geneveh->options, options, options_len); -} - -/* Transmit a fully formatted Geneve frame. - * - * When calling this function. The skb->data should point - * to the geneve header which is fully formed. - * - * This function will add other UDP tunnel headers. - */ -int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt, - struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, - __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, - __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt, - bool csum, bool xnet) -{ - struct genevehdr *gnvh; - int min_headroom; - int err; - - min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len - + GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr) - + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); - - err = skb_cow_head(skb, min_headroom); - if (unlikely(err)) { - kfree_skb(skb); - return err; - } - - skb = vlan_hwaccel_push_inside(skb); - if (unlikely(!skb)) - return -ENOMEM; - - skb = udp_tunnel_handle_offloads(skb, csum); - if (IS_ERR(skb)) - return PTR_ERR(skb); - - gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len); - geneve_build_header(gnvh, tun_flags, vni, opt_len, opt); - - skb_set_inner_protocol(skb, htons(ETH_P_TEB)); - - return udp_tunnel_xmit_skb(rt, gs->sock->sk, skb, src, dst, - tos, ttl, df, src_port, dst_port, xnet, - !csum); -} -EXPORT_SYMBOL_GPL(geneve_xmit_skb); - -static int geneve_hlen(struct genevehdr *gh) -{ - return sizeof(*gh) + gh->opt_len * 4; -} - -static struct sk_buff **geneve_gro_receive(struct sk_buff **head, - struct sk_buff *skb, - struct udp_offload *uoff) -{ - struct sk_buff *p, **pp = NULL; - struct genevehdr *gh, *gh2; - unsigned int hlen, gh_len, off_gnv; - const struct packet_offload *ptype; - __be16 type; - int flush = 1; - - off_gnv = skb_gro_offset(skb); - hlen = off_gnv + sizeof(*gh); - gh = skb_gro_header_fast(skb, off_gnv); - if (skb_gro_header_hard(skb, hlen)) { - gh = skb_gro_header_slow(skb, hlen, off_gnv); - if (unlikely(!gh)) - goto out; - } - - if (gh->ver != GENEVE_VER || gh->oam) - goto out; - gh_len = geneve_hlen(gh); - - hlen = off_gnv + gh_len; - if (skb_gro_header_hard(skb, hlen)) { - gh = skb_gro_header_slow(skb, hlen, off_gnv); - if (unlikely(!gh)) - goto out; - } - - flush = 0; - - for (p = *head; p; p = p->next) { - if (!NAPI_GRO_CB(p)->same_flow) - continue; - - gh2 = (struct genevehdr *)(p->data + off_gnv); - if (gh->opt_len != gh2->opt_len || - memcmp(gh, gh2, gh_len)) { - NAPI_GRO_CB(p)->same_flow = 0; - continue; - } - } - - type = gh->proto_type; - - rcu_read_lock(); - ptype = gro_find_receive_by_type(type); - if (!ptype) { - flush = 1; - goto out_unlock; - } - - skb_gro_pull(skb, gh_len); - skb_gro_postpull_rcsum(skb, gh, gh_len); - pp = ptype->callbacks.gro_receive(head, skb); - -out_unlock: - rcu_read_unlock(); -out: - NAPI_GRO_CB(skb)->flush |= flush; - - return pp; -} - -static int geneve_gro_complete(struct sk_buff *skb, int nhoff, - struct udp_offload *uoff) -{ - struct genevehdr *gh; - struct packet_offload *ptype; - __be16 type; - int gh_len; - int err = -ENOSYS; - - udp_tunnel_gro_complete(skb, nhoff); - - gh = (struct genevehdr *)(skb->data + nhoff); - gh_len = geneve_hlen(gh); - type = gh->proto_type; - - rcu_read_lock(); - ptype = gro_find_complete_by_type(type); - if (ptype) - err = ptype->callbacks.gro_complete(skb, nhoff + gh_len); - - rcu_read_unlock(); - return err; -} - -static void geneve_notify_add_rx_port(struct geneve_sock *gs) -{ - struct sock *sk = gs->sock->sk; - sa_family_t sa_family = sk->sk_family; - int err; - - if (sa_family == AF_INET) { - err = udp_add_offload(&gs->udp_offloads); - if (err) - pr_warn("geneve: udp_add_offload failed with status %d\n", - err); - } -} - -static void geneve_notify_del_rx_port(struct geneve_sock *gs) -{ - struct sock *sk = gs->sock->sk; - sa_family_t sa_family = sk->sk_family; - - if (sa_family == AF_INET) - udp_del_offload(&gs->udp_offloads); -} - -/* Callback from net/ipv4/udp.c to receive packets */ -static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb) -{ - struct genevehdr *geneveh; - struct geneve_sock *gs; - int opts_len; - - /* Need Geneve and inner Ethernet header to be present */ - if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN))) - goto error; - - /* Return packets with reserved bits set */ - geneveh = geneve_hdr(skb); - - if (unlikely(geneveh->ver != GENEVE_VER)) - goto error; - - if (unlikely(geneveh->proto_type != htons(ETH_P_TEB))) - goto error; - - opts_len = geneveh->opt_len * 4; - if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len, - htons(ETH_P_TEB))) - goto drop; - - gs = rcu_dereference_sk_user_data(sk); - if (!gs) - goto drop; - - gs->rcv(gs, skb); - return 0; - -drop: - /* Consume bad packet */ - kfree_skb(skb); - return 0; - -error: - /* Let the UDP layer deal with the skb */ - return 1; -} - -static struct socket *geneve_create_sock(struct net *net, bool ipv6, - __be16 port) -{ - struct socket *sock; - struct udp_port_cfg udp_conf; - int err; - - memset(&udp_conf, 0, sizeof(udp_conf)); - - if (ipv6) { - udp_conf.family = AF_INET6; - } else { - udp_conf.family = AF_INET; - udp_conf.local_ip.s_addr = htonl(INADDR_ANY); - } - - udp_conf.local_udp_port = port; - - /* Open UDP socket */ - err = udp_sock_create(net, &udp_conf, &sock); - if (err < 0) - return ERR_PTR(err); - - return sock; -} - -/* Create new listen socket if needed */ -static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, - geneve_rcv_t *rcv, void *data, - bool ipv6) -{ - struct geneve_net *gn = net_generic(net, geneve_net_id); - struct geneve_sock *gs; - struct socket *sock; - struct udp_tunnel_sock_cfg tunnel_cfg; - - gs = kzalloc(sizeof(*gs), GFP_KERNEL); - if (!gs) - return ERR_PTR(-ENOMEM); - - sock = geneve_create_sock(net, ipv6, port); - if (IS_ERR(sock)) { - kfree(gs); - return ERR_CAST(sock); - } - - gs->sock = sock; - gs->refcnt = 1; - gs->rcv = rcv; - gs->rcv_data = data; - - /* Initialize the geneve udp offloads structure */ - gs->udp_offloads.port = port; - gs->udp_offloads.callbacks.gro_receive = geneve_gro_receive; - gs->udp_offloads.callbacks.gro_complete = geneve_gro_complete; - geneve_notify_add_rx_port(gs); - - /* Mark socket as an encapsulation socket */ - tunnel_cfg.sk_user_data = gs; - tunnel_cfg.encap_type = 1; - tunnel_cfg.encap_rcv = geneve_udp_encap_recv; - tunnel_cfg.encap_destroy = NULL; - setup_udp_tunnel_sock(net, sock, &tunnel_cfg); - - list_add(&gs->list, &gn->sock_list); - - return gs; -} - -struct geneve_sock *geneve_sock_add(struct net *net, __be16 port, - geneve_rcv_t *rcv, void *data, - bool no_share, bool ipv6) -{ - struct geneve_sock *gs; - - mutex_lock(&geneve_mutex); - - gs = geneve_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port); - if (gs) { - if (!no_share && gs->rcv == rcv) - gs->refcnt++; - else - gs = ERR_PTR(-EBUSY); - } else { - gs = geneve_socket_create(net, port, rcv, data, ipv6); - } - - mutex_unlock(&geneve_mutex); - - return gs; -} -EXPORT_SYMBOL_GPL(geneve_sock_add); - -void geneve_sock_release(struct geneve_sock *gs) -{ - mutex_lock(&geneve_mutex); - - if (--gs->refcnt) - goto unlock; - - list_del(&gs->list); - geneve_notify_del_rx_port(gs); - udp_tunnel_sock_release(gs->sock); - kfree_rcu(gs, rcu); - -unlock: - mutex_unlock(&geneve_mutex); -} -EXPORT_SYMBOL_GPL(geneve_sock_release); - -static __net_init int geneve_init_net(struct net *net) -{ - struct geneve_net *gn = net_generic(net, geneve_net_id); - - INIT_LIST_HEAD(&gn->sock_list); - - return 0; -} - -static struct pernet_operations geneve_net_ops = { - .init = geneve_init_net, - .id = &geneve_net_id, - .size = sizeof(struct geneve_net), -}; - -static int __init geneve_init_module(void) -{ - int rc; - - rc = register_pernet_subsys(&geneve_net_ops); - if (rc) - return rc; - - pr_info("Geneve core logic\n"); - - return 0; -} -module_init(geneve_init_module); - -static void __exit geneve_cleanup_module(void) -{ - unregister_pernet_subsys(&geneve_net_ops); -} -module_exit(geneve_cleanup_module); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jesse Gross <jesse@nicira.com>"); -MODULE_DESCRIPTION("Driver library for GENEVE encapsulated traffic"); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 1bf328182697..faf1cde6f8da 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -400,25 +400,14 @@ static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) if (tunnel) { skb_pop_mac_header(skb); if (tunnel->collect_md) { - struct ip_tunnel_info *info; + __be16 flags; + __be64 tun_id; - tun_dst = metadata_dst_alloc(0, GFP_ATOMIC); + flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY); + tun_id = key_to_tunnel_id(tpi->key); + tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0); if (!tun_dst) return PACKET_REJECT; - - info = &tun_dst->u.tun_info; - info->key.u.ipv4.src = iph->saddr; - info->key.u.ipv4.dst = iph->daddr; - info->key.tos = iph->tos; - info->key.ttl = iph->ttl; - - info->mode = IP_TUNNEL_INFO_RX; - info->key.tun_flags = tpi->flags & - (TUNNEL_CSUM | TUNNEL_KEY); - info->key.tun_id = key_to_tunnel_id(tpi->key); - - info->key.tp_src = 0; - info->key.tp_dst = 0; } ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c index 933ea903f7b8..aba428626b52 100644 --- a/net/ipv4/udp_tunnel.c +++ b/net/ipv4/udp_tunnel.c @@ -4,9 +4,10 @@ #include <linux/udp.h> #include <linux/types.h> #include <linux/kernel.h> +#include <net/dst_metadata.h> +#include <net/net_namespace.h> #include <net/udp.h> #include <net/udp_tunnel.h> -#include <net/net_namespace.h> int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg, struct socket **sockp) @@ -103,4 +104,26 @@ void udp_tunnel_sock_release(struct socket *sock) } EXPORT_SYMBOL_GPL(udp_tunnel_sock_release); +struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family, + __be16 flags, __be64 tunnel_id, int md_size) +{ + struct metadata_dst *tun_dst; + struct ip_tunnel_info *info; + + if (family == AF_INET) + tun_dst = ip_tun_rx_dst(skb, flags, tunnel_id, md_size); + else + tun_dst = ipv6_tun_rx_dst(skb, flags, tunnel_id, md_size); + if (!tun_dst) + return NULL; + + info = &tun_dst->u.tun_info; + info->key.tp_src = udp_hdr(skb)->source; + info->key.tp_dst = udp_hdr(skb)->dest; + if (udp_hdr(skb)->check) + info->key.tun_flags |= TUNNEL_CSUM; + return tun_dst; +} +EXPORT_SYMBOL_GPL(udp_tun_rx_dst); + MODULE_LICENSE("GPL"); diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index 98f343d0d6dd..af7cdef42066 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -70,7 +70,7 @@ config OPENVSWITCH_VXLAN config OPENVSWITCH_GENEVE tristate "Open vSwitch Geneve tunneling support" depends on OPENVSWITCH - depends on GENEVE_CORE + depends on GENEVE default OPENVSWITCH ---help--- If you say Y here, then the Open vSwitch will be able create geneve vport. diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c index d01bd6360970..fa37c95f7339 100644 --- a/net/openvswitch/vport-geneve.c +++ b/net/openvswitch/vport-geneve.c @@ -26,95 +26,44 @@ #include "datapath.h" #include "vport.h" +#include "vport-netdev.h" static struct vport_ops ovs_geneve_vport_ops; - /** * struct geneve_port - Keeps track of open UDP ports - * @gs: The socket created for this port number. - * @name: vport name. + * @dst_port: destination port. */ struct geneve_port { - struct geneve_sock *gs; - char name[IFNAMSIZ]; + u16 port_no; }; -static LIST_HEAD(geneve_ports); - static inline struct geneve_port *geneve_vport(const struct vport *vport) { return vport_priv(vport); } -/* Convert 64 bit tunnel ID to 24 bit VNI. */ -static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni) -{ -#ifdef __BIG_ENDIAN - vni[0] = (__force __u8)(tun_id >> 16); - vni[1] = (__force __u8)(tun_id >> 8); - vni[2] = (__force __u8)tun_id; -#else - vni[0] = (__force __u8)((__force u64)tun_id >> 40); - vni[1] = (__force __u8)((__force u64)tun_id >> 48); - vni[2] = (__force __u8)((__force u64)tun_id >> 56); -#endif -} - -/* Convert 24 bit VNI to 64 bit tunnel ID. */ -static __be64 vni_to_tunnel_id(const __u8 *vni) -{ -#ifdef __BIG_ENDIAN - return (vni[0] << 16) | (vni[1] << 8) | vni[2]; -#else - return (__force __be64)(((__force u64)vni[0] << 40) | - ((__force u64)vni[1] << 48) | - ((__force u64)vni[2] << 56)); -#endif -} - -static void geneve_rcv(struct geneve_sock *gs, struct sk_buff *skb) -{ - struct vport *vport = gs->rcv_data; - struct genevehdr *geneveh = geneve_hdr(skb); - int opts_len; - struct ip_tunnel_info tun_info; - __be64 key; - __be16 flags; - - opts_len = geneveh->opt_len * 4; - - flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT | - (udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0) | - (geneveh->oam ? TUNNEL_OAM : 0) | - (geneveh->critical ? TUNNEL_CRIT_OPT : 0); - - key = vni_to_tunnel_id(geneveh->vni); - - ip_tunnel_info_init(&tun_info, ip_hdr(skb), - udp_hdr(skb)->source, udp_hdr(skb)->dest, - key, flags, geneveh->options, opts_len); - - ovs_vport_receive(vport, skb, &tun_info); -} - static int geneve_get_options(const struct vport *vport, struct sk_buff *skb) { struct geneve_port *geneve_port = geneve_vport(vport); - struct inet_sock *sk = inet_sk(geneve_port->gs->sock->sk); - if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(sk->inet_sport))) + if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, geneve_port->port_no)) return -EMSGSIZE; return 0; } -static void geneve_tnl_destroy(struct vport *vport) +static int geneve_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, + struct ip_tunnel_info *egress_tun_info) { struct geneve_port *geneve_port = geneve_vport(vport); + struct net *net = ovs_dp_get_net(vport->dp); + __be16 dport = htons(geneve_port->port_no); + __be16 sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true); - geneve_sock_release(geneve_port->gs); - - ovs_vport_deferred_free(vport); + return ovs_tunnel_get_egress_info(egress_tun_info, + ovs_dp_get_net(vport->dp), + OVS_CB(skb)->egress_tun_info, + IPPROTO_UDP, skb->mark, sport, dport); } static struct vport *geneve_tnl_create(const struct vport_parms *parms) @@ -122,11 +71,11 @@ static struct vport *geneve_tnl_create(const struct vport_parms *parms) struct net *net = ovs_dp_get_net(parms->dp); struct nlattr *options = parms->options; struct geneve_port *geneve_port; - struct geneve_sock *gs; + struct net_device *dev; struct vport *vport; struct nlattr *a; - int err; u16 dst_port; + int err; if (!options) { err = -EINVAL; @@ -148,104 +97,40 @@ static struct vport *geneve_tnl_create(const struct vport_parms *parms) return vport; geneve_port = geneve_vport(vport); - strncpy(geneve_port->name, parms->name, IFNAMSIZ); + geneve_port->port_no = dst_port; - gs = geneve_sock_add(net, htons(dst_port), geneve_rcv, vport, true, 0); - if (IS_ERR(gs)) { + rtnl_lock(); + dev = geneve_dev_create_fb(net, parms->name, NET_NAME_USER, dst_port); + if (IS_ERR(dev)) { + rtnl_unlock(); ovs_vport_free(vport); - return (void *)gs; + return ERR_CAST(dev); } - geneve_port->gs = gs; + dev_change_flags(dev, dev->flags | IFF_UP); + rtnl_unlock(); return vport; error: return ERR_PTR(err); } -static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb) +static struct vport *geneve_create(const struct vport_parms *parms) { - const struct ip_tunnel_key *tun_key; - struct ip_tunnel_info *tun_info; - struct net *net = ovs_dp_get_net(vport->dp); - struct geneve_port *geneve_port = geneve_vport(vport); - __be16 dport = inet_sk(geneve_port->gs->sock->sk)->inet_sport; - __be16 sport; - struct rtable *rt; - struct flowi4 fl; - u8 vni[3], opts_len, *opts; - __be16 df; - int err; - - tun_info = OVS_CB(skb)->egress_tun_info; - if (unlikely(!tun_info)) { - err = -EINVAL; - goto error; - } - - tun_key = &tun_info->key; - rt = ovs_tunnel_route_lookup(net, tun_key, skb->mark, &fl, IPPROTO_UDP); - if (IS_ERR(rt)) { - err = PTR_ERR(rt); - goto error; - } - - df = tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; - sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true); - tunnel_id_to_vni(tun_key->tun_id, vni); - skb->ignore_df = 1; - - if (tun_key->tun_flags & TUNNEL_GENEVE_OPT) { - opts = (u8 *)tun_info->options; - opts_len = tun_info->options_len; - } else { - opts = NULL; - opts_len = 0; - } - - err = geneve_xmit_skb(geneve_port->gs, rt, skb, fl.saddr, - tun_key->u.ipv4.dst, tun_key->tos, - tun_key->ttl, df, sport, dport, - tun_key->tun_flags, vni, opts_len, opts, - !!(tun_key->tun_flags & TUNNEL_CSUM), false); - if (err < 0) - ip_rt_put(rt); - return err; - -error: - kfree_skb(skb); - return err; -} - -static const char *geneve_get_name(const struct vport *vport) -{ - struct geneve_port *geneve_port = geneve_vport(vport); - - return geneve_port->name; -} + struct vport *vport; -static int geneve_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, - struct ip_tunnel_info *egress_tun_info) -{ - struct geneve_port *geneve_port = geneve_vport(vport); - struct net *net = ovs_dp_get_net(vport->dp); - __be16 dport = inet_sk(geneve_port->gs->sock->sk)->inet_sport; - __be16 sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true); + vport = geneve_tnl_create(parms); + if (IS_ERR(vport)) + return vport; - /* Get tp_src and tp_dst, refert to geneve_build_header(). - */ - return ovs_tunnel_get_egress_info(egress_tun_info, - ovs_dp_get_net(vport->dp), - OVS_CB(skb)->egress_tun_info, - IPPROTO_UDP, skb->mark, sport, dport); + return ovs_netdev_link(vport, parms->name); } static struct vport_ops ovs_geneve_vport_ops = { .type = OVS_VPORT_TYPE_GENEVE, - .create = geneve_tnl_create, - .destroy = geneve_tnl_destroy, - .get_name = geneve_get_name, + .create = geneve_create, + .destroy = ovs_netdev_tunnel_destroy, .get_options = geneve_get_options, - .send = geneve_tnl_send, + .send = ovs_netdev_send, .owner = THIS_MODULE, .get_egress_tun_info = geneve_get_egress_tun_info, }; |