From 03f0d916aa0317592dda11bd17c7357858719b6c Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Wed, 7 Aug 2013 20:01:00 -0700 Subject: openvswitch: Mega flow implementation Add wildcarded flow support in kernel datapath. Wildcarded flow can improve OVS flow set up performance by avoid sending matching new flows to the user space program. The exact performance boost will largely dependent on wildcarded flow hit rate. In case all new flows hits wildcard flows, the flow set up rate is within 5% of that of linux bridge module. Pravin has made significant contributions to this patch. Including API clean ups and bug fixes. Signed-off-by: Pravin B Shelar Signed-off-by: Andy Zhou Signed-off-by: Jesse Gross --- net/openvswitch/flow.h | 96 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 77 insertions(+), 19 deletions(-) (limited to 'net/openvswitch/flow.h') diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 66ef7220293e..9674e45f6969 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2011 Nicira, Inc. + * Copyright (c) 2007-2013 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -33,6 +33,8 @@ #include struct sk_buff; +struct sw_flow_mask; +struct flow_table; struct sw_flow_actions { struct rcu_head rcu; @@ -131,6 +133,8 @@ struct sw_flow { u32 hash; struct sw_flow_key key; + struct sw_flow_key unmasked_key; + struct sw_flow_mask *mask; struct sw_flow_actions __rcu *sf_acts; spinlock_t lock; /* Lock for values below. */ @@ -140,6 +144,25 @@ struct sw_flow { u8 tcp_flags; /* Union of seen TCP flags. */ }; +struct sw_flow_key_range { + size_t start; + size_t end; +}; + +static inline u16 ovs_sw_flow_key_range_actual_size(const struct sw_flow_key_range *range) +{ + return range->end - range->start; +} + +struct sw_flow_match { + struct sw_flow_key *key; + struct sw_flow_key_range range; + struct sw_flow_mask *mask; +}; + +void ovs_match_init(struct sw_flow_match *match, + struct sw_flow_key *key, struct sw_flow_mask *mask); + struct arp_eth_header { __be16 ar_hrd; /* format of hardware address */ __be16 ar_pro; /* format of protocol address */ @@ -159,21 +182,21 @@ void ovs_flow_exit(void); struct sw_flow *ovs_flow_alloc(void); void ovs_flow_deferred_free(struct sw_flow *); -void ovs_flow_free(struct sw_flow *flow); +void ovs_flow_free(struct sw_flow *, bool deferred); struct sw_flow_actions *ovs_flow_actions_alloc(int actions_len); void ovs_flow_deferred_free_acts(struct sw_flow_actions *); -int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *, - int *key_lenp); +int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *); void ovs_flow_used(struct sw_flow *, struct sk_buff *); u64 ovs_flow_used_time(unsigned long flow_jiffies); - -int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *); -int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, +int ovs_flow_to_nlattrs(const struct sw_flow_key *, + const struct sw_flow_key *, struct sk_buff *); +int ovs_match_from_nlattrs(struct sw_flow_match *match, + const struct nlattr *, const struct nlattr *); -int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, int key_len, - const struct nlattr *attr); +int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, + const struct nlattr *attr); #define MAX_ACTIONS_BUFSIZE (32 * 1024) #define TBL_MIN_BUCKETS 1024 @@ -182,6 +205,7 @@ struct flow_table { struct flex_array *buckets; unsigned int count, n_buckets; struct rcu_head rcu; + struct list_head *mask_list; int node_ver; u32 hash_seed; bool keep_flows; @@ -197,22 +221,56 @@ static inline int ovs_flow_tbl_need_to_expand(struct flow_table *table) return (table->count > table->n_buckets); } -struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table, - struct sw_flow_key *key, int len); -void ovs_flow_tbl_destroy(struct flow_table *table); -void ovs_flow_tbl_deferred_destroy(struct flow_table *table); +struct sw_flow *ovs_flow_lookup(struct flow_table *, + const struct sw_flow_key *); +struct sw_flow *ovs_flow_lookup_unmasked_key(struct flow_table *table, + struct sw_flow_match *match); + +void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred); struct flow_table *ovs_flow_tbl_alloc(int new_size); struct flow_table *ovs_flow_tbl_expand(struct flow_table *table); struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table); -void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, - struct sw_flow_key *key, int key_len); -void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow); -struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *idx); +void ovs_flow_insert(struct flow_table *table, struct sw_flow *flow); +void ovs_flow_remove(struct flow_table *table, struct sw_flow *flow); + +struct sw_flow *ovs_flow_dump_next(struct flow_table *table, u32 *bucket, u32 *idx); extern const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1]; int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr, - struct ovs_key_ipv4_tunnel *tun_key); + struct sw_flow_match *match, bool is_mask); int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb, - const struct ovs_key_ipv4_tunnel *tun_key); + const struct ovs_key_ipv4_tunnel *tun_key, + const struct ovs_key_ipv4_tunnel *output); + +bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, + const struct sw_flow_key *key, int key_len); + +struct sw_flow_mask { + int ref_count; + struct rcu_head rcu; + struct list_head list; + struct sw_flow_key_range range; + struct sw_flow_key key; +}; + +static inline u16 +ovs_sw_flow_mask_actual_size(const struct sw_flow_mask *mask) +{ + return ovs_sw_flow_key_range_actual_size(&mask->range); +} + +static inline u16 +ovs_sw_flow_mask_size_roundup(const struct sw_flow_mask *mask) +{ + return roundup(ovs_sw_flow_mask_actual_size(mask), sizeof(u32)); +} +struct sw_flow_mask *ovs_sw_flow_mask_alloc(void); +void ovs_sw_flow_mask_add_ref(struct sw_flow_mask *); +void ovs_sw_flow_mask_del_ref(struct sw_flow_mask *, bool deferred); +void ovs_sw_flow_mask_insert(struct flow_table *, struct sw_flow_mask *); +struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *, + const struct sw_flow_mask *); +void ovs_flow_key_mask(struct sw_flow_key *dst, const struct sw_flow_key *src, + const struct sw_flow_mask *mask); #endif /* flow.h */ -- cgit v1.2.3-59-g8ed1b From a175a723301a8a4a9fedf9ce5b8ca586e7a97b40 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Thu, 22 Aug 2013 12:30:48 -0700 Subject: openvswitch: Add SCTP support This patch adds support for rewriting SCTP src,dst ports similar to the functionality already available for TCP/UDP. Rewriting SCTP ports is expensive due to double-recalculation of the SCTP checksums; this is performed to ensure that packets traversing OVS with invalid checksums will continue to the destination with any checksum corruption intact. Reviewed-by: Simon Horman Signed-off-by: Joe Stringer Signed-off-by: Ben Pfaff Signed-off-by: Jesse Gross --- include/uapi/linux/openvswitch.h | 6 ++++ net/openvswitch/Kconfig | 1 + net/openvswitch/actions.c | 39 ++++++++++++++++++++++++ net/openvswitch/datapath.c | 6 ++++ net/openvswitch/flow.c | 65 ++++++++++++++++++++++++++++++++++++++++ net/openvswitch/flow.h | 8 ++--- 6 files changed, 121 insertions(+), 4 deletions(-) (limited to 'net/openvswitch/flow.h') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index de1fa5d3780f..a74d375b439b 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -259,6 +259,7 @@ enum ovs_key_attr { OVS_KEY_ATTR_ND, /* struct ovs_key_nd */ OVS_KEY_ATTR_SKB_MARK, /* u32 skb mark */ OVS_KEY_ATTR_TUNNEL, /* Nested set of ovs_tunnel attributes */ + OVS_KEY_ATTR_SCTP, /* struct ovs_key_sctp */ #ifdef __KERNEL__ OVS_KEY_ATTR_IPV4_TUNNEL, /* struct ovs_key_ipv4_tunnel */ @@ -333,6 +334,11 @@ struct ovs_key_udp { __be16 udp_dst; }; +struct ovs_key_sctp { + __be16 sctp_src; + __be16 sctp_dst; +}; + struct ovs_key_icmp { __u8 icmp_type; __u8 icmp_code; diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index bed30e69baa7..6ecf491ad509 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -4,6 +4,7 @@ config OPENVSWITCH tristate "Open vSwitch" + select LIBCRC32C ---help--- Open vSwitch is a multilayer Ethernet switch targeted at virtualized environments. In addition to supporting a variety of features diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 1f680222f4f7..65cfaa816075 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -31,6 +32,7 @@ #include #include #include +#include #include "datapath.h" #include "vport.h" @@ -352,6 +354,39 @@ static int set_tcp(struct sk_buff *skb, const struct ovs_key_tcp *tcp_port_key) return 0; } +static int set_sctp(struct sk_buff *skb, + const struct ovs_key_sctp *sctp_port_key) +{ + struct sctphdr *sh; + int err; + unsigned int sctphoff = skb_transport_offset(skb); + + err = make_writable(skb, sctphoff + sizeof(struct sctphdr)); + if (unlikely(err)) + return err; + + sh = sctp_hdr(skb); + if (sctp_port_key->sctp_src != sh->source || + sctp_port_key->sctp_dst != sh->dest) { + __le32 old_correct_csum, new_csum, old_csum; + + old_csum = sh->checksum; + old_correct_csum = sctp_compute_cksum(skb, sctphoff); + + sh->source = sctp_port_key->sctp_src; + sh->dest = sctp_port_key->sctp_dst; + + new_csum = sctp_compute_cksum(skb, sctphoff); + + /* Carry any checksum errors through. */ + sh->checksum = old_csum ^ old_correct_csum ^ new_csum; + + skb->rxhash = 0; + } + + return 0; +} + static int do_output(struct datapath *dp, struct sk_buff *skb, int out_port) { struct vport *vport; @@ -461,6 +496,10 @@ static int execute_set_action(struct sk_buff *skb, case OVS_KEY_ATTR_UDP: err = set_udp(skb, nla_data(nested_attr)); break; + + case OVS_KEY_ATTR_SCTP: + err = set_sctp(skb, nla_data(nested_attr)); + break; } return err; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index d29cd9aa4a67..2aa13bd7f2b2 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -712,6 +712,12 @@ static int validate_set(const struct nlattr *a, return validate_tp_port(flow_key); + case OVS_KEY_ATTR_SCTP: + if (flow_key->ip.proto != IPPROTO_SCTP) + return -EINVAL; + + return validate_tp_port(flow_key); + default: return -EINVAL; } diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 1fceb9653598..2b4785590b56 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -129,6 +130,7 @@ static bool ovs_match_validate(const struct sw_flow_match *match, | (1 << OVS_KEY_ATTR_IPV6) | (1 << OVS_KEY_ATTR_TCP) | (1 << OVS_KEY_ATTR_UDP) + | (1 << OVS_KEY_ATTR_SCTP) | (1 << OVS_KEY_ATTR_ICMP) | (1 << OVS_KEY_ATTR_ICMPV6) | (1 << OVS_KEY_ATTR_ARP) @@ -159,6 +161,12 @@ static bool ovs_match_validate(const struct sw_flow_match *match, mask_allowed |= 1 << OVS_KEY_ATTR_UDP; } + if (match->key->ip.proto == IPPROTO_SCTP) { + key_expected |= 1 << OVS_KEY_ATTR_SCTP; + if (match->mask && (match->mask->key.ip.proto == 0xff)) + mask_allowed |= 1 << OVS_KEY_ATTR_SCTP; + } + if (match->key->ip.proto == IPPROTO_TCP) { key_expected |= 1 << OVS_KEY_ATTR_TCP; if (match->mask && (match->mask->key.ip.proto == 0xff)) @@ -185,6 +193,12 @@ static bool ovs_match_validate(const struct sw_flow_match *match, mask_allowed |= 1 << OVS_KEY_ATTR_UDP; } + if (match->key->ip.proto == IPPROTO_SCTP) { + key_expected |= 1 << OVS_KEY_ATTR_SCTP; + if (match->mask && (match->mask->key.ip.proto == 0xff)) + mask_allowed |= 1 << OVS_KEY_ATTR_SCTP; + } + if (match->key->ip.proto == IPPROTO_TCP) { key_expected |= 1 << OVS_KEY_ATTR_TCP; if (match->mask && (match->mask->key.ip.proto == 0xff)) @@ -280,6 +294,12 @@ static bool udphdr_ok(struct sk_buff *skb) sizeof(struct udphdr)); } +static bool sctphdr_ok(struct sk_buff *skb) +{ + return pskb_may_pull(skb, skb_transport_offset(skb) + + sizeof(struct sctphdr)); +} + static bool icmphdr_ok(struct sk_buff *skb) { return pskb_may_pull(skb, skb_transport_offset(skb) + @@ -891,6 +911,12 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) key->ipv4.tp.src = udp->source; key->ipv4.tp.dst = udp->dest; } + } else if (key->ip.proto == IPPROTO_SCTP) { + if (sctphdr_ok(skb)) { + struct sctphdr *sctp = sctp_hdr(skb); + key->ipv4.tp.src = sctp->source; + key->ipv4.tp.dst = sctp->dest; + } } else if (key->ip.proto == IPPROTO_ICMP) { if (icmphdr_ok(skb)) { struct icmphdr *icmp = icmp_hdr(skb); @@ -953,6 +979,12 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) key->ipv6.tp.src = udp->source; key->ipv6.tp.dst = udp->dest; } + } else if (key->ip.proto == NEXTHDR_SCTP) { + if (sctphdr_ok(skb)) { + struct sctphdr *sctp = sctp_hdr(skb); + key->ipv6.tp.src = sctp->source; + key->ipv6.tp.dst = sctp->dest; + } } else if (key->ip.proto == NEXTHDR_ICMP) { if (icmp6hdr_ok(skb)) { error = parse_icmpv6(skb, key, nh_len); @@ -1087,6 +1119,7 @@ const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6), [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp), [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp), + [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp), [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp), [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6), [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp), @@ -1500,6 +1533,24 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, attrs &= ~(1 << OVS_KEY_ATTR_UDP); } + if (attrs & (1 << OVS_KEY_ATTR_SCTP)) { + const struct ovs_key_sctp *sctp_key; + + sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]); + if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { + SW_FLOW_KEY_PUT(match, ipv4.tp.src, + sctp_key->sctp_src, is_mask); + SW_FLOW_KEY_PUT(match, ipv4.tp.dst, + sctp_key->sctp_dst, is_mask); + } else { + SW_FLOW_KEY_PUT(match, ipv6.tp.src, + sctp_key->sctp_src, is_mask); + SW_FLOW_KEY_PUT(match, ipv6.tp.dst, + sctp_key->sctp_dst, is_mask); + } + attrs &= ~(1 << OVS_KEY_ATTR_SCTP); + } + if (attrs & (1 << OVS_KEY_ATTR_ICMP)) { const struct ovs_key_icmp *icmp_key; @@ -1843,6 +1894,20 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, udp_key->udp_src = output->ipv6.tp.src; udp_key->udp_dst = output->ipv6.tp.dst; } + } else if (swkey->ip.proto == IPPROTO_SCTP) { + struct ovs_key_sctp *sctp_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key)); + if (!nla) + goto nla_put_failure; + sctp_key = nla_data(nla); + if (swkey->eth.type == htons(ETH_P_IP)) { + sctp_key->sctp_src = swkey->ipv4.tp.src; + sctp_key->sctp_dst = swkey->ipv4.tp.dst; + } else if (swkey->eth.type == htons(ETH_P_IPV6)) { + sctp_key->sctp_src = swkey->ipv6.tp.src; + sctp_key->sctp_dst = swkey->ipv6.tp.dst; + } } else if (swkey->eth.type == htons(ETH_P_IP) && swkey->ip.proto == IPPROTO_ICMP) { struct ovs_key_icmp *icmp_key; diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 9674e45f6969..d08dcf78dbf3 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -99,8 +99,8 @@ struct sw_flow_key { } addr; union { struct { - __be16 src; /* TCP/UDP source port. */ - __be16 dst; /* TCP/UDP destination port. */ + __be16 src; /* TCP/UDP/SCTP source port. */ + __be16 dst; /* TCP/UDP/SCTP destination port. */ } tp; struct { u8 sha[ETH_ALEN]; /* ARP source hardware address. */ @@ -115,8 +115,8 @@ struct sw_flow_key { } addr; __be32 label; /* IPv6 flow label. */ struct { - __be16 src; /* TCP/UDP source port. */ - __be16 dst; /* TCP/UDP destination port. */ + __be16 src; /* TCP/UDP/SCTP source port. */ + __be16 dst; /* TCP/UDP/SCTP destination port. */ } tp; struct { struct in6_addr target; /* ND target address. */ -- cgit v1.2.3-59-g8ed1b From 02237373b1c61a09a4db329545e39cffc48910d5 Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Thu, 22 Aug 2013 12:12:57 -0700 Subject: openvswitch: Rename key_len to key_end Key_end is a better name describing the ending boundary than key_len. Rename those variables to make it less confusing. Signed-off-by: Andy Zhou Signed-off-by: Jesse Gross --- net/openvswitch/flow.c | 31 ++++++++++++++++--------------- net/openvswitch/flow.h | 2 +- 2 files changed, 17 insertions(+), 16 deletions(-) (limited to 'net/openvswitch/flow.h') diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 2b4785590b56..80bcb96467a5 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -997,10 +997,11 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) return 0; } -static u32 ovs_flow_hash(const struct sw_flow_key *key, int key_start, int key_len) +static u32 ovs_flow_hash(const struct sw_flow_key *key, int key_start, + int key_end) { return jhash2((u32 *)((u8 *)key + key_start), - DIV_ROUND_UP(key_len - key_start, sizeof(u32)), 0); + DIV_ROUND_UP(key_end - key_start, sizeof(u32)), 0); } static int flow_key_start(const struct sw_flow_key *key) @@ -1012,31 +1013,31 @@ static int flow_key_start(const struct sw_flow_key *key) } static bool __cmp_key(const struct sw_flow_key *key1, - const struct sw_flow_key *key2, int key_start, int key_len) + const struct sw_flow_key *key2, int key_start, int key_end) { return !memcmp((u8 *)key1 + key_start, - (u8 *)key2 + key_start, (key_len - key_start)); + (u8 *)key2 + key_start, (key_end - key_start)); } static bool __flow_cmp_key(const struct sw_flow *flow, - const struct sw_flow_key *key, int key_start, int key_len) + const struct sw_flow_key *key, int key_start, int key_end) { - return __cmp_key(&flow->key, key, key_start, key_len); + return __cmp_key(&flow->key, key, key_start, key_end); } static bool __flow_cmp_unmasked_key(const struct sw_flow *flow, - const struct sw_flow_key *key, int key_start, int key_len) + const struct sw_flow_key *key, int key_start, int key_end) { - return __cmp_key(&flow->unmasked_key, key, key_start, key_len); + return __cmp_key(&flow->unmasked_key, key, key_start, key_end); } bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, - const struct sw_flow_key *key, int key_len) + const struct sw_flow_key *key, int key_end) { int key_start; key_start = flow_key_start(key); - return __flow_cmp_unmasked_key(flow, key, key_start, key_len); + return __flow_cmp_unmasked_key(flow, key, key_start, key_end); } @@ -1044,11 +1045,11 @@ struct sw_flow *ovs_flow_lookup_unmasked_key(struct flow_table *table, struct sw_flow_match *match) { struct sw_flow_key *unmasked = match->key; - int key_len = match->range.end; + int key_end = match->range.end; struct sw_flow *flow; flow = ovs_flow_lookup(table, unmasked); - if (flow && (!ovs_flow_cmp_unmasked_key(flow, unmasked, key_len))) + if (flow && (!ovs_flow_cmp_unmasked_key(flow, unmasked, key_end))) flow = NULL; return flow; @@ -1061,16 +1062,16 @@ static struct sw_flow *ovs_masked_flow_lookup(struct flow_table *table, struct sw_flow *flow; struct hlist_head *head; int key_start = mask->range.start; - int key_len = mask->range.end; + int key_end = mask->range.end; u32 hash; struct sw_flow_key masked_key; ovs_flow_key_mask(&masked_key, flow_key, mask); - hash = ovs_flow_hash(&masked_key, key_start, key_len); + hash = ovs_flow_hash(&masked_key, key_start, key_end); head = find_bucket(table, hash); hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) { if (flow->mask == mask && - __flow_cmp_key(flow, &masked_key, key_start, key_len)) + __flow_cmp_key(flow, &masked_key, key_start, key_end)) return flow; } return NULL; diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index d08dcf78dbf3..e79305184b79 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -243,7 +243,7 @@ int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb, const struct ovs_key_ipv4_tunnel *output); bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, - const struct sw_flow_key *key, int key_len); + const struct sw_flow_key *key, int key_end); struct sw_flow_mask { int ref_count; -- cgit v1.2.3-59-g8ed1b From 5828cd9a68873df1340b420371c02c47647878fb Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Tue, 27 Aug 2013 13:02:21 -0700 Subject: openvswitch: optimize flow compare and mask functions Make sure the sw_flow_key structure and valid mask boundaries are always machine word aligned. Optimize the flow compare and mask operations using machine word size operations. This patch improves throughput on average by 15% when CPU is the bottleneck of forwarding packets. This patch is inspired by ideas and code from a patch submitted by Peter Klausler titled "replace memcmp() with specialized comparator". However, The original patch only optimizes for architectures support unaligned machine word access. This patch optimizes for all architectures. Signed-off-by: Andy Zhou Signed-off-by: Jesse Gross --- net/openvswitch/flow.c | 64 +++++++++++++++++++++++++++++++++----------------- net/openvswitch/flow.h | 19 +-------------- 2 files changed, 44 insertions(+), 39 deletions(-) (limited to 'net/openvswitch/flow.h') diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 80bcb96467a5..ad1aeeb4f373 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -54,8 +54,8 @@ static void update_range__(struct sw_flow_match *match, size_t offset, size_t size, bool is_mask) { struct sw_flow_key_range *range = NULL; - size_t start = offset; - size_t end = offset + size; + size_t start = rounddown(offset, sizeof(long)); + size_t end = roundup(offset + size, sizeof(long)); if (!is_mask) range = &match->range; @@ -102,6 +102,11 @@ static void update_range__(struct sw_flow_match *match, } \ } while (0) +static u16 range_n_bytes(const struct sw_flow_key_range *range) +{ + return range->end - range->start; +} + void ovs_match_init(struct sw_flow_match *match, struct sw_flow_key *key, struct sw_flow_mask *mask) @@ -370,16 +375,17 @@ static bool icmp6hdr_ok(struct sk_buff *skb) void ovs_flow_key_mask(struct sw_flow_key *dst, const struct sw_flow_key *src, const struct sw_flow_mask *mask) { - u8 *m = (u8 *)&mask->key + mask->range.start; - u8 *s = (u8 *)src + mask->range.start; - u8 *d = (u8 *)dst + mask->range.start; + const long *m = (long *)((u8 *)&mask->key + mask->range.start); + const long *s = (long *)((u8 *)src + mask->range.start); + long *d = (long *)((u8 *)dst + mask->range.start); int i; - memset(dst, 0, sizeof(*dst)); - for (i = 0; i < ovs_sw_flow_mask_size_roundup(mask); i++) { - *d = *s & *m; - d++, s++, m++; - } + /* The memory outside of the 'mask->range' are not set since + * further operations on 'dst' only uses contents within + * 'mask->range'. + */ + for (i = 0; i < range_n_bytes(&mask->range); i += sizeof(long)) + *d++ = *s++ & *m++; } #define TCP_FLAGS_OFFSET 13 @@ -1000,8 +1006,13 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) static u32 ovs_flow_hash(const struct sw_flow_key *key, int key_start, int key_end) { - return jhash2((u32 *)((u8 *)key + key_start), - DIV_ROUND_UP(key_end - key_start, sizeof(u32)), 0); + u32 *hash_key = (u32 *)((u8 *)key + key_start); + int hash_u32s = (key_end - key_start) >> 2; + + /* Make sure number of hash bytes are multiple of u32. */ + BUILD_BUG_ON(sizeof(long) % sizeof(u32)); + + return jhash2(hash_key, hash_u32s, 0); } static int flow_key_start(const struct sw_flow_key *key) @@ -1009,17 +1020,25 @@ static int flow_key_start(const struct sw_flow_key *key) if (key->tun_key.ipv4_dst) return 0; else - return offsetof(struct sw_flow_key, phy); + return rounddown(offsetof(struct sw_flow_key, phy), + sizeof(long)); } static bool __cmp_key(const struct sw_flow_key *key1, const struct sw_flow_key *key2, int key_start, int key_end) { - return !memcmp((u8 *)key1 + key_start, - (u8 *)key2 + key_start, (key_end - key_start)); + const long *cp1 = (long *)((u8 *)key1 + key_start); + const long *cp2 = (long *)((u8 *)key2 + key_start); + long diffs = 0; + int i; + + for (i = key_start; i < key_end; i += sizeof(long)) + diffs |= *cp1++ ^ *cp2++; + + return diffs == 0; } -static bool __flow_cmp_key(const struct sw_flow *flow, +static bool __flow_cmp_masked_key(const struct sw_flow *flow, const struct sw_flow_key *key, int key_start, int key_end) { return __cmp_key(&flow->key, key, key_start, key_end); @@ -1056,7 +1075,7 @@ struct sw_flow *ovs_flow_lookup_unmasked_key(struct flow_table *table, } static struct sw_flow *ovs_masked_flow_lookup(struct flow_table *table, - const struct sw_flow_key *flow_key, + const struct sw_flow_key *unmasked, struct sw_flow_mask *mask) { struct sw_flow *flow; @@ -1066,12 +1085,13 @@ static struct sw_flow *ovs_masked_flow_lookup(struct flow_table *table, u32 hash; struct sw_flow_key masked_key; - ovs_flow_key_mask(&masked_key, flow_key, mask); + ovs_flow_key_mask(&masked_key, unmasked, mask); hash = ovs_flow_hash(&masked_key, key_start, key_end); head = find_bucket(table, hash); hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) { if (flow->mask == mask && - __flow_cmp_key(flow, &masked_key, key_start, key_end)) + __flow_cmp_masked_key(flow, &masked_key, + key_start, key_end)) return flow; } return NULL; @@ -1961,6 +1981,8 @@ nla_put_failure: * Returns zero if successful or a negative error code. */ int ovs_flow_init(void) { + BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long)); + flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0, 0, NULL); if (flow_cache == NULL) @@ -2016,7 +2038,7 @@ static bool ovs_sw_flow_mask_equal(const struct sw_flow_mask *a, return (a->range.end == b->range.end) && (a->range.start == b->range.start) - && (memcmp(a_, b_, ovs_sw_flow_mask_actual_size(a)) == 0); + && (memcmp(a_, b_, range_n_bytes(&a->range)) == 0); } struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *tbl, @@ -2053,5 +2075,5 @@ static void ovs_sw_flow_mask_set(struct sw_flow_mask *mask, u8 *m = (u8 *)&mask->key + range->start; mask->range = *range; - memset(m, val, ovs_sw_flow_mask_size_roundup(mask)); + memset(m, val, range_n_bytes(range)); } diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index e79305184b79..b65f885ac3dc 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -125,7 +125,7 @@ struct sw_flow_key { } nd; } ipv6; }; -}; +} __aligned(__alignof__(long)); struct sw_flow { struct rcu_head rcu; @@ -149,11 +149,6 @@ struct sw_flow_key_range { size_t end; }; -static inline u16 ovs_sw_flow_key_range_actual_size(const struct sw_flow_key_range *range) -{ - return range->end - range->start; -} - struct sw_flow_match { struct sw_flow_key *key; struct sw_flow_key_range range; @@ -253,18 +248,6 @@ struct sw_flow_mask { struct sw_flow_key key; }; -static inline u16 -ovs_sw_flow_mask_actual_size(const struct sw_flow_mask *mask) -{ - return ovs_sw_flow_key_range_actual_size(&mask->range); -} - -static inline u16 -ovs_sw_flow_mask_size_roundup(const struct sw_flow_mask *mask) -{ - return roundup(ovs_sw_flow_mask_actual_size(mask), sizeof(u32)); -} - struct sw_flow_mask *ovs_sw_flow_mask_alloc(void); void ovs_sw_flow_mask_add_ref(struct sw_flow_mask *); void ovs_sw_flow_mask_del_ref(struct sw_flow_mask *, bool deferred); -- cgit v1.2.3-59-g8ed1b From 0d40f75bdab241868c0eb6f97aef9f8b3a66f7b3 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Thu, 5 Sep 2013 12:17:05 -0700 Subject: openvswitch: Fix alignment of struct sw_flow_key. sw_flow_key alignment was declared as " __aligned(__alignof__(long))". However, this breaks on the m68k architecture where long is 32 bit in size but 16 bit aligned by default. This aligns to the size of a long to ensure that we can always do comparsions in full long-sized chunks. It also adds an additional build check to catch any reduction in alignment. CC: Andy Zhou Reported-by: Fengguang Wu Reported-by: Geert Uytterhoeven Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- net/openvswitch/flow.c | 1 + net/openvswitch/flow.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'net/openvswitch/flow.h') diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index ad1aeeb4f373..fb36f8565161 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -1981,6 +1981,7 @@ nla_put_failure: * Returns zero if successful or a negative error code. */ int ovs_flow_init(void) { + BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long)); BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long)); flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0, diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index b65f885ac3dc..212fbf7510c4 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -125,7 +125,7 @@ struct sw_flow_key { } nd; } ipv6; }; -} __aligned(__alignof__(long)); +} __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */ struct sw_flow { struct rcu_head rcu; -- cgit v1.2.3-59-g8ed1b