aboutsummaryrefslogtreecommitdiffstats
path: root/net/netfilter
diff options
context:
space:
mode:
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/Makefile3
-rw-r--r--net/netfilter/core.c20
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h2
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c26
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c18
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c41
-rw-r--r--net/netfilter/ipset/ip_set_core.c212
-rw-r--r--net/netfilter/ipset/ip_set_getport.c28
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h4
-rw-r--r--net/netfilter/ipset/ip_set_hash_ip.c10
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmac.c8
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmark.c8
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipport.c8
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportip.c8
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c24
-rw-r--r--net/netfilter/ipset/ip_set_hash_mac.c6
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c24
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c47
-rw-r--r--net/netfilter/ipset/ip_set_hash_netnet.c28
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c24
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c28
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c47
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_ovf.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c18
-rw-r--r--net/netfilter/nf_conntrack_core.c2
-rw-r--r--net/netfilter/nf_conntrack_ecache.c23
-rw-r--r--net/netfilter/nf_conntrack_extend.c21
-rw-r--r--net/netfilter/nf_conntrack_netlink.c76
-rw-r--r--net/netfilter/nf_conntrack_proto_icmp.c6
-rw-r--r--net/netfilter/nf_flow_table_core.c177
-rw-r--r--net/netfilter/nf_flow_table_inet.c25
-rw-r--r--net/netfilter/nf_flow_table_offload.c851
-rw-r--r--net/netfilter/nf_tables_api.c614
-rw-r--r--net/netfilter/nf_tables_offload.c275
-rw-r--r--net/netfilter/nft_chain_filter.c45
-rw-r--r--net/netfilter/nft_cmp.c6
-rw-r--r--net/netfilter/nft_flow_offload.c5
-rw-r--r--net/netfilter/nft_meta.c18
-rw-r--r--net/netfilter/nft_payload.c94
-rw-r--r--net/netfilter/xt_HMARK.c6
-rw-r--r--net/netfilter/xt_time.c19
43 files changed, 2298 insertions, 625 deletions
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 4fc075b612fe..5e9b2eb24349 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -120,7 +120,8 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o
# flow table infrastructure
obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o
-nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o
+nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o \
+ nf_flow_table_offload.o
obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 5d5bdf450091..78f046ec506f 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -536,6 +536,26 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
}
EXPORT_SYMBOL(nf_hook_slow);
+void nf_hook_slow_list(struct list_head *head, struct nf_hook_state *state,
+ const struct nf_hook_entries *e)
+{
+ struct sk_buff *skb, *next;
+ struct list_head sublist;
+ int ret;
+
+ INIT_LIST_HEAD(&sublist);
+
+ list_for_each_entry_safe(skb, next, head, list) {
+ skb_list_del_init(skb);
+ ret = nf_hook_slow(skb, state, e, 0);
+ if (ret == 1)
+ list_add_tail(&skb->list, &sublist);
+ }
+ /* Put passed packets back on main list */
+ list_splice(&sublist, head);
+}
+EXPORT_SYMBOL(nf_hook_slow_list);
+
/* This needs to be compiled in any case to avoid dependencies between the
* nfnetlink_queue code and nf_conntrack.
*/
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 063df74b4647..1abd6f0dc227 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -192,7 +192,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
}
#ifndef IP_SET_BITMAP_STORED_TIMEOUT
-static inline bool
+static bool
mtype_is_filled(const struct mtype_elem *x)
{
return true;
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 11ff9d4a7006..abe8f77d7d23 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -55,7 +55,7 @@ struct bitmap_ip_adt_elem {
u16 id;
};
-static inline u32
+static u32
ip_to_id(const struct bitmap_ip *m, u32 ip)
{
return ((ip & ip_set_hostmask(m->netmask)) - m->first_ip) / m->hosts;
@@ -63,33 +63,33 @@ ip_to_id(const struct bitmap_ip *m, u32 ip)
/* Common functions */
-static inline int
+static int
bitmap_ip_do_test(const struct bitmap_ip_adt_elem *e,
struct bitmap_ip *map, size_t dsize)
{
return !!test_bit(e->id, map->members);
}
-static inline int
+static int
bitmap_ip_gc_test(u16 id, const struct bitmap_ip *map, size_t dsize)
{
return !!test_bit(id, map->members);
}
-static inline int
+static int
bitmap_ip_do_add(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map,
u32 flags, size_t dsize)
{
return !!test_bit(e->id, map->members);
}
-static inline int
+static int
bitmap_ip_do_del(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map)
{
return !test_and_clear_bit(e->id, map->members);
}
-static inline int
+static int
bitmap_ip_do_list(struct sk_buff *skb, const struct bitmap_ip *map, u32 id,
size_t dsize)
{
@@ -97,7 +97,7 @@ bitmap_ip_do_list(struct sk_buff *skb, const struct bitmap_ip *map, u32 id,
htonl(map->first_ip + id * map->hosts));
}
-static inline int
+static int
bitmap_ip_do_head(struct sk_buff *skb, const struct bitmap_ip *map)
{
return nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip)) ||
@@ -237,6 +237,18 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
return true;
}
+static u32
+range_to_mask(u32 from, u32 to, u8 *bits)
+{
+ u32 mask = 0xFFFFFFFE;
+
+ *bits = 32;
+ while (--(*bits) > 0 && mask && (to & mask) != from)
+ mask <<= 1;
+
+ return mask;
+}
+
static int
bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
u32 flags)
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 1d4e63326e68..b618713297da 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -65,7 +65,7 @@ struct bitmap_ipmac_elem {
unsigned char filled;
} __aligned(__alignof__(u64));
-static inline u32
+static u32
ip_to_id(const struct bitmap_ipmac *m, u32 ip)
{
return ip - m->first_ip;
@@ -79,7 +79,7 @@ ip_to_id(const struct bitmap_ipmac *m, u32 ip)
/* Common functions */
-static inline int
+static int
bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
const struct bitmap_ipmac *map, size_t dsize)
{
@@ -94,7 +94,7 @@ bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
return -EAGAIN;
}
-static inline int
+static int
bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map, size_t dsize)
{
const struct bitmap_ipmac_elem *elem;
@@ -106,13 +106,13 @@ bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map, size_t dsize)
return elem->filled == MAC_FILLED;
}
-static inline int
+static int
bitmap_ipmac_is_filled(const struct bitmap_ipmac_elem *elem)
{
return elem->filled == MAC_FILLED;
}
-static inline int
+static int
bitmap_ipmac_add_timeout(unsigned long *timeout,
const struct bitmap_ipmac_adt_elem *e,
const struct ip_set_ext *ext, struct ip_set *set,
@@ -139,7 +139,7 @@ bitmap_ipmac_add_timeout(unsigned long *timeout,
return 0;
}
-static inline int
+static int
bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
struct bitmap_ipmac *map, u32 flags, size_t dsize)
{
@@ -177,14 +177,14 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
return IPSET_ADD_STORE_PLAIN_TIMEOUT;
}
-static inline int
+static int
bitmap_ipmac_do_del(const struct bitmap_ipmac_adt_elem *e,
struct bitmap_ipmac *map)
{
return !test_and_clear_bit(e->id, map->members);
}
-static inline int
+static int
bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map,
u32 id, size_t dsize)
{
@@ -197,7 +197,7 @@ bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map,
nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, elem->ether));
}
-static inline int
+static int
bitmap_ipmac_do_head(struct sk_buff *skb, const struct bitmap_ipmac *map)
{
return nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip)) ||
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 704a0dda1609..23d6095cb196 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -46,7 +46,7 @@ struct bitmap_port_adt_elem {
u16 id;
};
-static inline u16
+static u16
port_to_id(const struct bitmap_port *m, u16 port)
{
return port - m->first_port;
@@ -54,34 +54,34 @@ port_to_id(const struct bitmap_port *m, u16 port)
/* Common functions */
-static inline int
+static int
bitmap_port_do_test(const struct bitmap_port_adt_elem *e,
const struct bitmap_port *map, size_t dsize)
{
return !!test_bit(e->id, map->members);
}
-static inline int
+static int
bitmap_port_gc_test(u16 id, const struct bitmap_port *map, size_t dsize)
{
return !!test_bit(id, map->members);
}
-static inline int
+static int
bitmap_port_do_add(const struct bitmap_port_adt_elem *e,
struct bitmap_port *map, u32 flags, size_t dsize)
{
return !!test_bit(e->id, map->members);
}
-static inline int
+static int
bitmap_port_do_del(const struct bitmap_port_adt_elem *e,
struct bitmap_port *map)
{
return !test_and_clear_bit(e->id, map->members);
}
-static inline int
+static int
bitmap_port_do_list(struct sk_buff *skb, const struct bitmap_port *map, u32 id,
size_t dsize)
{
@@ -89,13 +89,40 @@ bitmap_port_do_list(struct sk_buff *skb, const struct bitmap_port *map, u32 id,
htons(map->first_port + id));
}
-static inline int
+static int
bitmap_port_do_head(struct sk_buff *skb, const struct bitmap_port *map)
{
return nla_put_net16(skb, IPSET_ATTR_PORT, htons(map->first_port)) ||
nla_put_net16(skb, IPSET_ATTR_PORT_TO, htons(map->last_port));
}
+static bool
+ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src, __be16 *port)
+{
+ bool ret;
+ u8 proto;
+
+ switch (pf) {
+ case NFPROTO_IPV4:
+ ret = ip_set_get_ip4_port(skb, src, port, &proto);
+ break;
+ case NFPROTO_IPV6:
+ ret = ip_set_get_ip6_port(skb, src, port, &proto);
+ break;
+ default:
+ return false;
+ }
+ if (!ret)
+ return ret;
+ switch (proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ return true;
+ default:
+ return false;
+ }
+}
+
static int
bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index d73d1828216a..169e0a04f814 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -35,7 +35,7 @@ struct ip_set_net {
static unsigned int ip_set_net_id __read_mostly;
-static inline struct ip_set_net *ip_set_pernet(struct net *net)
+static struct ip_set_net *ip_set_pernet(struct net *net)
{
return net_generic(net, ip_set_net_id);
}
@@ -67,13 +67,13 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
* serialized by ip_set_type_mutex.
*/
-static inline void
+static void
ip_set_type_lock(void)
{
mutex_lock(&ip_set_type_mutex);
}
-static inline void
+static void
ip_set_type_unlock(void)
{
mutex_unlock(&ip_set_type_mutex);
@@ -277,7 +277,7 @@ ip_set_free(void *members)
}
EXPORT_SYMBOL_GPL(ip_set_free);
-static inline bool
+static bool
flag_nested(const struct nlattr *nla)
{
return nla->nla_type & NLA_F_NESTED;
@@ -327,6 +327,83 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
}
EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
+static u32
+ip_set_timeout_get(const unsigned long *timeout)
+{
+ u32 t;
+
+ if (*timeout == IPSET_ELEM_PERMANENT)
+ return 0;
+
+ t = jiffies_to_msecs(*timeout - jiffies) / MSEC_PER_SEC;
+ /* Zero value in userspace means no timeout */
+ return t == 0 ? 1 : t;
+}
+
+static char *
+ip_set_comment_uget(struct nlattr *tb)
+{
+ return nla_data(tb);
+}
+
+/* Called from uadd only, protected by the set spinlock.
+ * The kadt functions don't use the comment extensions in any way.
+ */
+void
+ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment,
+ const struct ip_set_ext *ext)
+{
+ struct ip_set_comment_rcu *c = rcu_dereference_protected(comment->c, 1);
+ size_t len = ext->comment ? strlen(ext->comment) : 0;
+
+ if (unlikely(c)) {
+ set->ext_size -= sizeof(*c) + strlen(c->str) + 1;
+ kfree_rcu(c, rcu);
+ rcu_assign_pointer(comment->c, NULL);
+ }
+ if (!len)
+ return;
+ if (unlikely(len > IPSET_MAX_COMMENT_SIZE))
+ len = IPSET_MAX_COMMENT_SIZE;
+ c = kmalloc(sizeof(*c) + len + 1, GFP_ATOMIC);
+ if (unlikely(!c))
+ return;
+ strlcpy(c->str, ext->comment, len + 1);
+ set->ext_size += sizeof(*c) + strlen(c->str) + 1;
+ rcu_assign_pointer(comment->c, c);
+}
+EXPORT_SYMBOL_GPL(ip_set_init_comment);
+
+/* Used only when dumping a set, protected by rcu_read_lock() */
+static int
+ip_set_put_comment(struct sk_buff *skb, const struct ip_set_comment *comment)
+{
+ struct ip_set_comment_rcu *c = rcu_dereference(comment->c);
+
+ if (!c)
+ return 0;
+ return nla_put_string(skb, IPSET_ATTR_COMMENT, c->str);
+}
+
+/* Called from uadd/udel, flush or the garbage collectors protected
+ * by the set spinlock.
+ * Called when the set is destroyed and when there can't be any user
+ * of the set data anymore.
+ */
+static void
+ip_set_comment_free(struct ip_set *set, void *ptr)
+{
+ struct ip_set_comment *comment = ptr;
+ struct ip_set_comment_rcu *c;
+
+ c = rcu_dereference_protected(comment->c, 1);
+ if (unlikely(!c))
+ return;
+ set->ext_size -= sizeof(*c) + strlen(c->str) + 1;
+ kfree_rcu(c, rcu);
+ rcu_assign_pointer(comment->c, NULL);
+}
+
typedef void (*destroyer)(struct ip_set *, void *);
/* ipset data extension types, in size order */
@@ -353,12 +430,12 @@ const struct ip_set_ext_type ip_set_extensions[] = {
.flag = IPSET_FLAG_WITH_COMMENT,
.len = sizeof(struct ip_set_comment),
.align = __alignof__(struct ip_set_comment),
- .destroy = (destroyer) ip_set_comment_free,
+ .destroy = ip_set_comment_free,
},
};
EXPORT_SYMBOL_GPL(ip_set_extensions);
-static inline bool
+static bool
add_extension(enum ip_set_ext_id id, u32 flags, struct nlattr *tb[])
{
return ip_set_extensions[id].flag ?
@@ -448,6 +525,46 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
}
EXPORT_SYMBOL_GPL(ip_set_get_extensions);
+static u64
+ip_set_get_bytes(const struct ip_set_counter *counter)
+{
+ return (u64)atomic64_read(&(counter)->bytes);
+}
+
+static u64
+ip_set_get_packets(const struct ip_set_counter *counter)
+{
+ return (u64)atomic64_read(&(counter)->packets);
+}
+
+static bool
+ip_set_put_counter(struct sk_buff *skb, const struct ip_set_counter *counter)
+{
+ return nla_put_net64(skb, IPSET_ATTR_BYTES,
+ cpu_to_be64(ip_set_get_bytes(counter)),
+ IPSET_ATTR_PAD) ||
+ nla_put_net64(skb, IPSET_ATTR_PACKETS,
+ cpu_to_be64(ip_set_get_packets(counter)),
+ IPSET_ATTR_PAD);
+}
+
+static bool
+ip_set_put_skbinfo(struct sk_buff *skb, const struct ip_set_skbinfo *skbinfo)
+{
+ /* Send nonzero parameters only */
+ return ((skbinfo->skbmark || skbinfo->skbmarkmask) &&
+ nla_put_net64(skb, IPSET_ATTR_SKBMARK,
+ cpu_to_be64((u64)skbinfo->skbmark << 32 |
+ skbinfo->skbmarkmask),
+ IPSET_ATTR_PAD)) ||
+ (skbinfo->skbprio &&
+ nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
+ cpu_to_be32(skbinfo->skbprio))) ||
+ (skbinfo->skbqueue &&
+ nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
+ cpu_to_be16(skbinfo->skbqueue)));
+}
+
int
ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
const void *e, bool active)
@@ -473,6 +590,55 @@ ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
}
EXPORT_SYMBOL_GPL(ip_set_put_extensions);
+static bool
+ip_set_match_counter(u64 counter, u64 match, u8 op)
+{
+ switch (op) {
+ case IPSET_COUNTER_NONE:
+ return true;
+ case IPSET_COUNTER_EQ:
+ return counter == match;
+ case IPSET_COUNTER_NE:
+ return counter != match;
+ case IPSET_COUNTER_LT:
+ return counter < match;
+ case IPSET_COUNTER_GT:
+ return counter > match;
+ }
+ return false;
+}
+
+static void
+ip_set_add_bytes(u64 bytes, struct ip_set_counter *counter)
+{
+ atomic64_add((long long)bytes, &(counter)->bytes);
+}
+
+static void
+ip_set_add_packets(u64 packets, struct ip_set_counter *counter)
+{
+ atomic64_add((long long)packets, &(counter)->packets);
+}
+
+static void
+ip_set_update_counter(struct ip_set_counter *counter,
+ const struct ip_set_ext *ext, u32 flags)
+{
+ if (ext->packets != ULLONG_MAX &&
+ !(flags & IPSET_FLAG_SKIP_COUNTER_UPDATE)) {
+ ip_set_add_bytes(ext->bytes, counter);
+ ip_set_add_packets(ext->packets, counter);
+ }
+}
+
+static void
+ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo,
+ const struct ip_set_ext *ext,
+ struct ip_set_ext *mext, u32 flags)
+{
+ mext->skbinfo = *skbinfo;
+}
+
bool
ip_set_match_extensions(struct ip_set *set, const struct ip_set_ext *ext,
struct ip_set_ext *mext, u32 flags, void *data)
@@ -508,7 +674,7 @@ EXPORT_SYMBOL_GPL(ip_set_match_extensions);
* The set behind an index may change by swapping only, from userspace.
*/
-static inline void
+static void
__ip_set_get(struct ip_set *set)
{
write_lock_bh(&ip_set_ref_lock);
@@ -516,7 +682,7 @@ __ip_set_get(struct ip_set *set)
write_unlock_bh(&ip_set_ref_lock);
}
-static inline void
+static void
__ip_set_put(struct ip_set *set)
{
write_lock_bh(&ip_set_ref_lock);
@@ -528,7 +694,7 @@ __ip_set_put(struct ip_set *set)
/* set->ref can be swapped out by ip_set_swap, netlink events (like dump) need
* a separate reference counter
*/
-static inline void
+static void
__ip_set_put_netlink(struct ip_set *set)
{
write_lock_bh(&ip_set_ref_lock);
@@ -543,7 +709,7 @@ __ip_set_put_netlink(struct ip_set *set)
* so it can't be destroyed (or changed) under our foot.
*/
-static inline struct ip_set *
+static struct ip_set *
ip_set_rcu_get(struct net *net, ip_set_id_t index)
{
struct ip_set *set;
@@ -672,7 +838,7 @@ EXPORT_SYMBOL_GPL(ip_set_get_byname);
*
*/
-static inline void
+static void
__ip_set_put_byindex(struct ip_set_net *inst, ip_set_id_t index)
{
struct ip_set *set;
@@ -1255,6 +1421,30 @@ static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb,
#define DUMP_TYPE(arg) (((u32)(arg)) & 0x0000FFFF)
#define DUMP_FLAGS(arg) (((u32)(arg)) >> 16)
+int
+ip_set_put_flags(struct sk_buff *skb, struct ip_set *set)
+{
+ u32 cadt_flags = 0;
+
+ if (SET_WITH_TIMEOUT(set))
+ if (unlikely(nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
+ htonl(set->timeout))))
+ return -EMSGSIZE;
+ if (SET_WITH_COUNTER(set))
+ cadt_flags |= IPSET_FLAG_WITH_COUNTERS;
+ if (SET_WITH_COMMENT(set))
+ cadt_flags |= IPSET_FLAG_WITH_COMMENT;
+ if (SET_WITH_SKBINFO(set))
+ cadt_flags |= IPSET_FLAG_WITH_SKBINFO;
+ if (SET_WITH_FORCEADD(set))
+ cadt_flags |= IPSET_FLAG_WITH_FORCEADD;
+
+ if (!cadt_flags)
+ return 0;
+ return nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(cadt_flags));
+}
+EXPORT_SYMBOL_GPL(ip_set_put_flags);
+
static int
ip_set_dump_done(struct netlink_callback *cb)
{
diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c
index 2b8f959574b4..36615eb3eae1 100644
--- a/net/netfilter/ipset/ip_set_getport.c
+++ b/net/netfilter/ipset/ip_set_getport.c
@@ -148,31 +148,3 @@ ip_set_get_ip6_port(const struct sk_buff *skb, bool src,
}
EXPORT_SYMBOL_GPL(ip_set_get_ip6_port);
#endif
-
-bool
-ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src, __be16 *port)
-{
- bool ret;
- u8 proto;
-
- switch (pf) {
- case NFPROTO_IPV4:
- ret = ip_set_get_ip4_port(skb, src, port, &proto);
- break;
- case NFPROTO_IPV6:
- ret = ip_set_get_ip6_port(skb, src, port, &proto);
- break;
- default:
- return false;
- }
- if (!ret)
- return ret;
- switch (proto) {
- case IPPROTO_TCP:
- case IPPROTO_UDP:
- return true;
- default:
- return false;
- }
-}
-EXPORT_SYMBOL_GPL(ip_set_get_ip_port);
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index d098d87bc331..7480ce55b5c8 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -39,7 +39,7 @@
#ifdef IP_SET_HASH_WITH_MULTI
#define AHASH_MAX(h) ((h)->ahash_max)
-static inline u8
+static u8
tune_ahash_max(u8 curr, u32 multi)
{
u32 n;
@@ -909,7 +909,7 @@ out:
return ret;
}
-static inline int
+static int
mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext,
struct ip_set_ext *mext, struct ip_set *set, u32 flags)
{
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index f4432d9fcad0..5d6d68eaf6a9 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -44,7 +44,7 @@ struct hash_ip4_elem {
/* Common functions */
-static inline bool
+static bool
hash_ip4_data_equal(const struct hash_ip4_elem *e1,
const struct hash_ip4_elem *e2,
u32 *multi)
@@ -63,7 +63,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_ip4_data_next(struct hash_ip4_elem *next, const struct hash_ip4_elem *e)
{
next->ip = e->ip;
@@ -171,7 +171,7 @@ struct hash_ip6_elem {
/* Common functions */
-static inline bool
+static bool
hash_ip6_data_equal(const struct hash_ip6_elem *ip1,
const struct hash_ip6_elem *ip2,
u32 *multi)
@@ -179,7 +179,7 @@ hash_ip6_data_equal(const struct hash_ip6_elem *ip1,
return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6);
}
-static inline void
+static void
hash_ip6_netmask(union nf_inet_addr *ip, u8 prefix)
{
ip6_netmask(ip, prefix);
@@ -196,7 +196,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_ip6_data_next(struct hash_ip6_elem *next, const struct hash_ip6_elem *e)
{
}
diff --git a/net/netfilter/ipset/ip_set_hash_ipmac.c b/net/netfilter/ipset/ip_set_hash_ipmac.c
index 4ce563eb927d..eceb7bc4a93a 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmac.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmac.c
@@ -47,7 +47,7 @@ struct hash_ipmac4_elem {
/* Common functions */
-static inline bool
+static bool
hash_ipmac4_data_equal(const struct hash_ipmac4_elem *e1,
const struct hash_ipmac4_elem *e2,
u32 *multi)
@@ -67,7 +67,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_ipmac4_data_next(struct hash_ipmac4_elem *next,
const struct hash_ipmac4_elem *e)
{
@@ -154,7 +154,7 @@ struct hash_ipmac6_elem {
/* Common functions */
-static inline bool
+static bool
hash_ipmac6_data_equal(const struct hash_ipmac6_elem *e1,
const struct hash_ipmac6_elem *e2,
u32 *multi)
@@ -175,7 +175,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_ipmac6_data_next(struct hash_ipmac6_elem *next,
const struct hash_ipmac6_elem *e)
{
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
index 7a1734aad0c5..aba1df617d6e 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -42,7 +42,7 @@ struct hash_ipmark4_elem {
/* Common functions */
-static inline bool
+static bool
hash_ipmark4_data_equal(const struct hash_ipmark4_elem *ip1,
const struct hash_ipmark4_elem *ip2,
u32 *multi)
@@ -64,7 +64,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_ipmark4_data_next(struct hash_ipmark4_elem *next,
const struct hash_ipmark4_elem *d)
{
@@ -165,7 +165,7 @@ struct hash_ipmark6_elem {
/* Common functions */
-static inline bool
+static bool
hash_ipmark6_data_equal(const struct hash_ipmark6_elem *ip1,
const struct hash_ipmark6_elem *ip2,
u32 *multi)
@@ -187,7 +187,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_ipmark6_data_next(struct hash_ipmark6_elem *next,
const struct hash_ipmark6_elem *d)
{
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index 32e240658334..1ff228717e29 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -47,7 +47,7 @@ struct hash_ipport4_elem {
/* Common functions */
-static inline bool
+static bool
hash_ipport4_data_equal(const struct hash_ipport4_elem *ip1,
const struct hash_ipport4_elem *ip2,
u32 *multi)
@@ -71,7 +71,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_ipport4_data_next(struct hash_ipport4_elem *next,
const struct hash_ipport4_elem *d)
{
@@ -202,7 +202,7 @@ struct hash_ipport6_elem {
/* Common functions */
-static inline bool
+static bool
hash_ipport6_data_equal(const struct hash_ipport6_elem *ip1,
const struct hash_ipport6_elem *ip2,
u32 *multi)
@@ -226,7 +226,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_ipport6_data_next(struct hash_ipport6_elem *next,
const struct hash_ipport6_elem *d)
{
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 15d419353179..fa88afd812fa 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -46,7 +46,7 @@ struct hash_ipportip4_elem {
u8 padding;
};
-static inline bool
+static bool
hash_ipportip4_data_equal(const struct hash_ipportip4_elem *ip1,
const struct hash_ipportip4_elem *ip2,
u32 *multi)
@@ -72,7 +72,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_ipportip4_data_next(struct hash_ipportip4_elem *next,
const struct hash_ipportip4_elem *d)
{
@@ -210,7 +210,7 @@ struct hash_ipportip6_elem {
/* Common functions */
-static inline bool
+static bool
hash_ipportip6_data_equal(const struct hash_ipportip6_elem *ip1,
const struct hash_ipportip6_elem *ip2,
u32 *multi)
@@ -236,7 +236,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_ipportip6_data_next(struct hash_ipportip6_elem *next,
const struct hash_ipportip6_elem *d)
{
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 7a4d7afd4121..eef6ecfcb409 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -59,7 +59,7 @@ struct hash_ipportnet4_elem {
/* Common functions */
-static inline bool
+static bool
hash_ipportnet4_data_equal(const struct hash_ipportnet4_elem *ip1,
const struct hash_ipportnet4_elem *ip2,
u32 *multi)
@@ -71,25 +71,25 @@ hash_ipportnet4_data_equal(const struct hash_ipportnet4_elem *ip1,
ip1->proto == ip2->proto;
}
-static inline int
+static int
hash_ipportnet4_do_data_match(const struct hash_ipportnet4_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
-static inline void
+static void
hash_ipportnet4_data_set_flags(struct hash_ipportnet4_elem *elem, u32 flags)
{
elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
}
-static inline void
+static void
hash_ipportnet4_data_reset_flags(struct hash_ipportnet4_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
-static inline void
+static void
hash_ipportnet4_data_netmask(struct hash_ipportnet4_elem *elem, u8 cidr)
{
elem->ip2 &= ip_set_netmask(cidr);
@@ -116,7 +116,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_ipportnet4_data_next(struct hash_ipportnet4_elem *next,
const struct hash_ipportnet4_elem *d)
{
@@ -308,7 +308,7 @@ struct hash_ipportnet6_elem {
/* Common functions */
-static inline bool
+static bool
hash_ipportnet6_data_equal(const struct hash_ipportnet6_elem *ip1,
const struct hash_ipportnet6_elem *ip2,
u32 *multi)
@@ -320,25 +320,25 @@ hash_ipportnet6_data_equal(const struct hash_ipportnet6_elem *ip1,
ip1->proto == ip2->proto;
}
-static inline int
+static int
hash_ipportnet6_do_data_match(const struct hash_ipportnet6_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
-static inline void
+static void
hash_ipportnet6_data_set_flags(struct hash_ipportnet6_elem *elem, u32 flags)
{
elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
}
-static inline void
+static void
hash_ipportnet6_data_reset_flags(struct hash_ipportnet6_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
-static inline void
+static void
hash_ipportnet6_data_netmask(struct hash_ipportnet6_elem *elem, u8 cidr)
{
ip6_netmask(&elem->ip2, cidr);
@@ -365,7 +365,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_ipportnet6_data_next(struct hash_ipportnet6_elem *next,
const struct hash_ipportnet6_elem *d)
{
diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c
index d94c585d33c5..0b61593165ef 100644
--- a/net/netfilter/ipset/ip_set_hash_mac.c
+++ b/net/netfilter/ipset/ip_set_hash_mac.c
@@ -37,7 +37,7 @@ struct hash_mac4_elem {
/* Common functions */
-static inline bool
+static bool
hash_mac4_data_equal(const struct hash_mac4_elem *e1,
const struct hash_mac4_elem *e2,
u32 *multi)
@@ -45,7 +45,7 @@ hash_mac4_data_equal(const struct hash_mac4_elem *e1,
return ether_addr_equal(e1->ether, e2->ether);
}
-static inline bool
+static bool
hash_mac4_data_list(struct sk_buff *skb, const struct hash_mac4_elem *e)
{
if (nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, e->ether))
@@ -56,7 +56,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_mac4_data_next(struct hash_mac4_elem *next,
const struct hash_mac4_elem *e)
{
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 3d932de0ad29..136cf0781d3a 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -47,7 +47,7 @@ struct hash_net4_elem {
/* Common functions */
-static inline bool
+static bool
hash_net4_data_equal(const struct hash_net4_elem *ip1,
const struct hash_net4_elem *ip2,
u32 *multi)
@@ -56,25 +56,25 @@ hash_net4_data_equal(const struct hash_net4_elem *ip1,
ip1->cidr == ip2->cidr;
}
-static inline int
+static int
hash_net4_do_data_match(const struct hash_net4_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
-static inline void
+static void
hash_net4_data_set_flags(struct hash_net4_elem *elem, u32 flags)
{
elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
}
-static inline void
+static void
hash_net4_data_reset_flags(struct hash_net4_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
-static inline void
+static void
hash_net4_data_netmask(struct hash_net4_elem *elem, u8 cidr)
{
elem->ip &= ip_set_netmask(cidr);
@@ -97,7 +97,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_net4_data_next(struct hash_net4_elem *next,
const struct hash_net4_elem *d)
{
@@ -212,7 +212,7 @@ struct hash_net6_elem {
/* Common functions */
-static inline bool
+static bool
hash_net6_data_equal(const struct hash_net6_elem *ip1,
const struct hash_net6_elem *ip2,
u32 *multi)
@@ -221,25 +221,25 @@ hash_net6_data_equal(const struct hash_net6_elem *ip1,
ip1->cidr == ip2->cidr;
}
-static inline int
+static int
hash_net6_do_data_match(const struct hash_net6_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
-static inline void
+static void
hash_net6_data_set_flags(struct hash_net6_elem *elem, u32 flags)
{
elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
}
-static inline void
+static void
hash_net6_data_reset_flags(struct hash_net6_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
-static inline void
+static void
hash_net6_data_netmask(struct hash_net6_elem *elem, u8 cidr)
{
ip6_netmask(&elem->ip, cidr);
@@ -262,7 +262,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_net6_data_next(struct hash_net6_elem *next,
const struct hash_net6_elem *d)
{
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 87b29f971226..be5e95a0d876 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -25,7 +25,8 @@
/* 3 Counters support added */
/* 4 Comments support added */
/* 5 Forceadd support added */
-#define IPSET_TYPE_REV_MAX 6 /* skbinfo support added */
+/* 6 skbinfo support added */
+#define IPSET_TYPE_REV_MAX 7 /* interface wildcard support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
@@ -57,12 +58,13 @@ struct hash_netiface4_elem {
u8 cidr;
u8 nomatch;
u8 elem;
+ u8 wildcard;
char iface[IFNAMSIZ];
};
/* Common functions */
-static inline bool
+static bool
hash_netiface4_data_equal(const struct hash_netiface4_elem *ip1,
const struct hash_netiface4_elem *ip2,
u32 *multi)
@@ -71,28 +73,30 @@ hash_netiface4_data_equal(const struct hash_netiface4_elem *ip1,
ip1->cidr == ip2->cidr &&
(++*multi) &&
ip1->physdev == ip2->physdev &&
- strcmp(ip1->iface, ip2->iface) == 0;
+ (ip1->wildcard ?
+ strncmp(ip1->iface, ip2->iface, strlen(ip1->iface)) == 0 :
+ strcmp(ip1->iface, ip2->iface) == 0);
}
-static inline int
+static int
hash_netiface4_do_data_match(const struct hash_netiface4_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
-static inline void
+static void
hash_netiface4_data_set_flags(struct hash_netiface4_elem *elem, u32 flags)
{
elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
}
-static inline void
+static void
hash_netiface4_data_reset_flags(struct hash_netiface4_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
-static inline void
+static void
hash_netiface4_data_netmask(struct hash_netiface4_elem *elem, u8 cidr)
{
elem->ip &= ip_set_netmask(cidr);
@@ -103,7 +107,8 @@ static bool
hash_netiface4_data_list(struct sk_buff *skb,
const struct hash_netiface4_elem *data)
{
- u32 flags = data->physdev ? IPSET_FLAG_PHYSDEV : 0;
+ u32 flags = (data->physdev ? IPSET_FLAG_PHYSDEV : 0) |
+ (data->wildcard ? IPSET_FLAG_IFACE_WILDCARD : 0);
if (data->nomatch)
flags |= IPSET_FLAG_NOMATCH;
@@ -119,7 +124,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_netiface4_data_next(struct hash_netiface4_elem *next,
const struct hash_netiface4_elem *d)
{
@@ -229,6 +234,8 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
e.physdev = 1;
if (cadt_flags & IPSET_FLAG_NOMATCH)
flags |= (IPSET_FLAG_NOMATCH << 16);
+ if (cadt_flags & IPSET_FLAG_IFACE_WILDCARD)
+ e.wildcard = 1;
}
if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) {
e.ip = htonl(ip & ip_set_hostmask(e.cidr));
@@ -280,12 +287,13 @@ struct hash_netiface6_elem {
u8 cidr;
u8 nomatch;
u8 elem;
+ u8 wildcard;
char iface[IFNAMSIZ];
};
/* Common functions */
-static inline bool
+static bool
hash_netiface6_data_equal(const struct hash_netiface6_elem *ip1,
const struct hash_netiface6_elem *ip2,
u32 *multi)
@@ -294,28 +302,30 @@ hash_netiface6_data_equal(const struct hash_netiface6_elem *ip1,
ip1->cidr == ip2->cidr &&
(++*multi) &&
ip1->physdev == ip2->physdev &&
- strcmp(ip1->iface, ip2->iface) == 0;
+ (ip1->wildcard ?
+ strncmp(ip1->iface, ip2->iface, strlen(ip1->iface)) == 0 :
+ strcmp(ip1->iface, ip2->iface) == 0);
}
-static inline int
+static int
hash_netiface6_do_data_match(const struct hash_netiface6_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
-static inline void
+static void
hash_netiface6_data_set_flags(struct hash_netiface6_elem *elem, u32 flags)
{
elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
}
-static inline void
+static void
hash_netiface6_data_reset_flags(struct hash_netiface6_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
-static inline void
+static void
hash_netiface6_data_netmask(struct hash_netiface6_elem *elem, u8 cidr)
{
ip6_netmask(&elem->ip, cidr);
@@ -326,7 +336,8 @@ static bool
hash_netiface6_data_list(struct sk_buff *skb,
const struct hash_netiface6_elem *data)
{
- u32 flags = data->physdev ? IPSET_FLAG_PHYSDEV : 0;
+ u32 flags = (data->physdev ? IPSET_FLAG_PHYSDEV : 0) |
+ (data->wildcard ? IPSET_FLAG_IFACE_WILDCARD : 0);
if (data->nomatch)
flags |= IPSET_FLAG_NOMATCH;
@@ -342,7 +353,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_netiface6_data_next(struct hash_netiface6_elem *next,
const struct hash_netiface6_elem *d)
{
@@ -440,6 +451,8 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],
e.physdev = 1;
if (cadt_flags & IPSET_FLAG_NOMATCH)
flags |= (IPSET_FLAG_NOMATCH << 16);
+ if (cadt_flags & IPSET_FLAG_IFACE_WILDCARD)
+ e.wildcard = 1;
}
ret = adtfn(set, &e, &ext, &ext, flags);
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index 4398322fad59..da4ef910b12d 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -52,7 +52,7 @@ struct hash_netnet4_elem {
/* Common functions */
-static inline bool
+static bool
hash_netnet4_data_equal(const struct hash_netnet4_elem *ip1,
const struct hash_netnet4_elem *ip2,
u32 *multi)
@@ -61,32 +61,32 @@ hash_netnet4_data_equal(const struct hash_netnet4_elem *ip1,
ip1->ccmp == ip2->ccmp;
}
-static inline int
+static int
hash_netnet4_do_data_match(const struct hash_netnet4_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
-static inline void
+static void
hash_netnet4_data_set_flags(struct hash_netnet4_elem *elem, u32 flags)
{
elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
}
-static inline void
+static void
hash_netnet4_data_reset_flags(struct hash_netnet4_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
-static inline void
+static void
hash_netnet4_data_reset_elem(struct hash_netnet4_elem *elem,
struct hash_netnet4_elem *orig)
{
elem->ip[1] = orig->ip[1];
}
-static inline void
+static void
hash_netnet4_data_netmask(struct hash_netnet4_elem *elem, u8 cidr, bool inner)
{
if (inner) {
@@ -117,7 +117,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_netnet4_data_next(struct hash_netnet4_elem *next,
const struct hash_netnet4_elem *d)
{
@@ -282,7 +282,7 @@ struct hash_netnet6_elem {
/* Common functions */
-static inline bool
+static bool
hash_netnet6_data_equal(const struct hash_netnet6_elem *ip1,
const struct hash_netnet6_elem *ip2,
u32 *multi)
@@ -292,32 +292,32 @@ hash_netnet6_data_equal(const struct hash_netnet6_elem *ip1,
ip1->ccmp == ip2->ccmp;
}
-static inline int
+static int
hash_netnet6_do_data_match(const struct hash_netnet6_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
-static inline void
+static void
hash_netnet6_data_set_flags(struct hash_netnet6_elem *elem, u32 flags)
{
elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
}
-static inline void
+static void
hash_netnet6_data_reset_flags(struct hash_netnet6_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
-static inline void
+static void
hash_netnet6_data_reset_elem(struct hash_netnet6_elem *elem,
struct hash_netnet6_elem *orig)
{
elem->ip[1] = orig->ip[1];
}
-static inline void
+static void
hash_netnet6_data_netmask(struct hash_netnet6_elem *elem, u8 cidr, bool inner)
{
if (inner) {
@@ -348,7 +348,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_netnet6_data_next(struct hash_netnet6_elem *next,
const struct hash_netnet6_elem *d)
{
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 799f2272cc65..34448df80fb9 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -57,7 +57,7 @@ struct hash_netport4_elem {
/* Common functions */
-static inline bool
+static bool
hash_netport4_data_equal(const struct hash_netport4_elem *ip1,
const struct hash_netport4_elem *ip2,
u32 *multi)
@@ -68,25 +68,25 @@ hash_netport4_data_equal(const struct hash_netport4_elem *ip1,
ip1->cidr == ip2->cidr;
}
-static inline int
+static int
hash_netport4_do_data_match(const struct hash_netport4_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
-static inline void
+static void
hash_netport4_data_set_flags(struct hash_netport4_elem *elem, u32 flags)
{
elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
}
-static inline void
+static void
hash_netport4_data_reset_flags(struct hash_netport4_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
-static inline void
+static void
hash_netport4_data_netmask(struct hash_netport4_elem *elem, u8 cidr)
{
elem->ip &= ip_set_netmask(cidr);
@@ -112,7 +112,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_netport4_data_next(struct hash_netport4_elem *next,
const struct hash_netport4_elem *d)
{
@@ -270,7 +270,7 @@ struct hash_netport6_elem {
/* Common functions */
-static inline bool
+static bool
hash_netport6_data_equal(const struct hash_netport6_elem *ip1,
const struct hash_netport6_elem *ip2,
u32 *multi)
@@ -281,25 +281,25 @@ hash_netport6_data_equal(const struct hash_netport6_elem *ip1,
ip1->cidr == ip2->cidr;
}
-static inline int
+static int
hash_netport6_do_data_match(const struct hash_netport6_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
-static inline void
+static void
hash_netport6_data_set_flags(struct hash_netport6_elem *elem, u32 flags)
{
elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
}
-static inline void
+static void
hash_netport6_data_reset_flags(struct hash_netport6_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
-static inline void
+static void
hash_netport6_data_netmask(struct hash_netport6_elem *elem, u8 cidr)
{
ip6_netmask(&elem->ip, cidr);
@@ -325,7 +325,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_netport6_data_next(struct hash_netport6_elem *next,
const struct hash_netport6_elem *d)
{
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index a82b70e8b9a6..934c1712cba8 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -56,7 +56,7 @@ struct hash_netportnet4_elem {
/* Common functions */
-static inline bool
+static bool
hash_netportnet4_data_equal(const struct hash_netportnet4_elem *ip1,
const struct hash_netportnet4_elem *ip2,
u32 *multi)
@@ -67,32 +67,32 @@ hash_netportnet4_data_equal(const struct hash_netportnet4_elem *ip1,
ip1->proto == ip2->proto;
}
-static inline int
+static int
hash_netportnet4_do_data_match(const struct hash_netportnet4_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
-static inline void
+static void
hash_netportnet4_data_set_flags(struct hash_netportnet4_elem *elem, u32 flags)
{
elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
}
-static inline void
+static void
hash_netportnet4_data_reset_flags(struct hash_netportnet4_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
-static inline void
+static void
hash_netportnet4_data_reset_elem(struct hash_netportnet4_elem *elem,
struct hash_netportnet4_elem *orig)
{
elem->ip[1] = orig->ip[1];
}
-static inline void
+static void
hash_netportnet4_data_netmask(struct hash_netportnet4_elem *elem,
u8 cidr, bool inner)
{
@@ -126,7 +126,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_netportnet4_data_next(struct hash_netportnet4_elem *next,
const struct hash_netportnet4_elem *d)
{
@@ -331,7 +331,7 @@ struct hash_netportnet6_elem {
/* Common functions */
-static inline bool
+static bool
hash_netportnet6_data_equal(const struct hash_netportnet6_elem *ip1,
const struct hash_netportnet6_elem *ip2,
u32 *multi)
@@ -343,32 +343,32 @@ hash_netportnet6_data_equal(const struct hash_netportnet6_elem *ip1,
ip1->proto == ip2->proto;
}
-static inline int
+static int
hash_netportnet6_do_data_match(const struct hash_netportnet6_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
-static inline void
+static void
hash_netportnet6_data_set_flags(struct hash_netportnet6_elem *elem, u32 flags)
{
elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
}
-static inline void
+static void
hash_netportnet6_data_reset_flags(struct hash_netportnet6_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
-static inline void
+static void
hash_netportnet6_data_reset_elem(struct hash_netportnet6_elem *elem,
struct hash_netportnet6_elem *orig)
{
elem->ip[1] = orig->ip[1];
}
-static inline void
+static void
hash_netportnet6_data_netmask(struct hash_netportnet6_elem *elem,
u8 cidr, bool inner)
{
@@ -402,7 +402,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_netportnet6_data_next(struct hash_netportnet6_elem *next,
const struct hash_netportnet6_elem *d)
{
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 67ac50104e6f..cd747c0962fd 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -149,7 +149,7 @@ __list_set_del_rcu(struct rcu_head * rcu)
kfree(e);
}
-static inline void
+static void
list_set_del(struct ip_set *set, struct set_elem *e)
{
struct list_set *map = set->data;
@@ -160,7 +160,7 @@ list_set_del(struct ip_set *set, struct set_elem *e)
call_rcu(&e->rcu, __list_set_del_rcu);
}
-static inline void
+static void
list_set_replace(struct ip_set *set, struct set_elem *e, struct set_elem *old)
{
struct list_set *map = set->data;
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 8b80ab794a92..512259f579d7 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -2402,18 +2402,22 @@ estimator_fail:
return -ENOMEM;
}
-static void __net_exit __ip_vs_cleanup(struct net *net)
+static void __net_exit __ip_vs_cleanup_batch(struct list_head *net_list)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
-
- ip_vs_service_net_cleanup(ipvs); /* ip_vs_flush() with locks */
- ip_vs_conn_net_cleanup(ipvs);
- ip_vs_app_net_cleanup(ipvs);
- ip_vs_protocol_net_cleanup(ipvs);
- ip_vs_control_net_cleanup(ipvs);
- ip_vs_estimator_net_cleanup(ipvs);
- IP_VS_DBG(2, "ipvs netns %d released\n", ipvs->gen);
- net->ipvs = NULL;
+ struct netns_ipvs *ipvs;
+ struct net *net;
+
+ ip_vs_service_nets_cleanup(net_list); /* ip_vs_flush() with locks */
+ list_for_each_entry(net, net_list, exit_list) {
+ ipvs = net_ipvs(net);
+ ip_vs_conn_net_cleanup(ipvs);
+ ip_vs_app_net_cleanup(ipvs);
+ ip_vs_protocol_net_cleanup(ipvs);
+ ip_vs_control_net_cleanup(ipvs);
+ ip_vs_estimator_net_cleanup(ipvs);
+ IP_VS_DBG(2, "ipvs netns %d released\n", ipvs->gen);
+ net->ipvs = NULL;
+ }
}
static int __net_init __ip_vs_dev_init(struct net *net)
@@ -2429,27 +2433,32 @@ hook_fail:
return ret;
}
-static void __net_exit __ip_vs_dev_cleanup(struct net *net)
+static void __net_exit __ip_vs_dev_cleanup_batch(struct list_head *net_list)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
+ struct netns_ipvs *ipvs;
+ struct net *net;
+
EnterFunction(2);
- nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
- ipvs->enable = 0; /* Disable packet reception */
- smp_wmb();
- ip_vs_sync_net_cleanup(ipvs);
+ list_for_each_entry(net, net_list, exit_list) {
+ ipvs = net_ipvs(net);
+ nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
+ ipvs->enable = 0; /* Disable packet reception */
+ smp_wmb();
+ ip_vs_sync_net_cleanup(ipvs);
+ }
LeaveFunction(2);
}
static struct pernet_operations ipvs_core_ops = {
.init = __ip_vs_init,
- .exit = __ip_vs_cleanup,
+ .exit_batch = __ip_vs_cleanup_batch,
.id = &ip_vs_net_id,
.size = sizeof(struct netns_ipvs),
};
static struct pernet_operations ipvs_core_dev_ops = {
.init = __ip_vs_dev_init,
- .exit = __ip_vs_dev_cleanup,
+ .exit_batch = __ip_vs_dev_cleanup_batch,
};
/*
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 3cccc88ef817..3be7398901e0 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1607,14 +1607,20 @@ static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup)
/*
* Delete service by {netns} in the service table.
- * Called by __ip_vs_cleanup()
+ * Called by __ip_vs_batch_cleanup()
*/
-void ip_vs_service_net_cleanup(struct netns_ipvs *ipvs)
+void ip_vs_service_nets_cleanup(struct list_head *net_list)
{
+ struct netns_ipvs *ipvs;
+ struct net *net;
+
EnterFunction(2);
/* Check for "full" addressed entries */
mutex_lock(&__ip_vs_mutex);
- ip_vs_flush(ipvs, true);
+ list_for_each_entry(net, net_list, exit_list) {
+ ipvs = net_ipvs(net);
+ ip_vs_flush(ipvs, true);
+ }
mutex_unlock(&__ip_vs_mutex);
LeaveFunction(2);
}
diff --git a/net/netfilter/ipvs/ip_vs_ovf.c b/net/netfilter/ipvs/ip_vs_ovf.c
index 78b074cd5464..c03066fdd5ca 100644
--- a/net/netfilter/ipvs/ip_vs_ovf.c
+++ b/net/netfilter/ipvs/ip_vs_ovf.c
@@ -5,7 +5,7 @@
* Authors: Raducu Deaconu <rhadoo_io@yahoo.com>
*
* Scheduler implements "overflow" loadbalancing according to number of active
- * connections , will keep all conections to the node with the highest weight
+ * connections , will keep all connections to the node with the highest weight
* and overflow to the next node if the number of connections exceeds the node's
* weight.
* Note that this scheduler might not be suitable for UDP because it only uses
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 888d3068a492..b1e300f8881b 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -407,12 +407,9 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
goto err_put;
skb_dst_drop(skb);
- if (noref) {
- if (!local)
- skb_dst_set_noref(skb, &rt->dst);
- else
- skb_dst_set(skb, dst_clone(&rt->dst));
- } else
+ if (noref)
+ skb_dst_set_noref(skb, &rt->dst);
+ else
skb_dst_set(skb, &rt->dst);
return local;
@@ -574,12 +571,9 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
goto err_put;
skb_dst_drop(skb);
- if (noref) {
- if (!local)
- skb_dst_set_noref(skb, &rt->dst);
- else
- skb_dst_set(skb, dst_clone(&rt->dst));
- } else
+ if (noref)
+ skb_dst_set_noref(skb, &rt->dst);
+ else
skb_dst_set(skb, &rt->dst);
return local;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 5cd610b547e0..0af1898af2b8 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -573,7 +573,6 @@ EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc);
void nf_ct_tmpl_free(struct nf_conn *tmpl)
{
nf_ct_ext_destroy(tmpl);
- nf_ct_ext_free(tmpl);
if (ARCH_KMALLOC_MINALIGN <= NFCT_INFOMASK)
kfree((char *)tmpl - tmpl->proto.tmpl_padto);
@@ -1417,7 +1416,6 @@ void nf_conntrack_free(struct nf_conn *ct)
WARN_ON(atomic_read(&ct->ct_general.use) != 0);
nf_ct_ext_destroy(ct);
- nf_ct_ext_free(ct);
kmem_cache_free(nf_conntrack_cachep, ct);
smp_mb__before_atomic();
atomic_dec(&net->ct.count);
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 6fba74b5aaf7..7956c9f19899 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -30,6 +30,7 @@
static DEFINE_MUTEX(nf_ct_ecache_mutex);
#define ECACHE_RETRY_WAIT (HZ/10)
+#define ECACHE_STACK_ALLOC (256 / sizeof(void *))
enum retry_state {
STATE_CONGESTED,
@@ -39,11 +40,11 @@ enum retry_state {
static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu)
{
- struct nf_conn *refs[16];
+ struct nf_conn *refs[ECACHE_STACK_ALLOC];
+ enum retry_state ret = STATE_DONE;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
unsigned int evicted = 0;
- enum retry_state ret = STATE_DONE;
spin_lock(&pcpu->lock);
@@ -54,10 +55,22 @@ static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu)
if (!nf_ct_is_confirmed(ct))
continue;
+ /* This ecache access is safe because the ct is on the
+ * pcpu dying list and we hold the spinlock -- the entry
+ * cannot be free'd until after the lock is released.
+ *
+ * This is true even if ct has a refcount of 0: the
+ * cpu that is about to free the entry must remove it
+ * from the dying list and needs the lock to do so.
+ */
e = nf_ct_ecache_find(ct);
if (!e || e->state != NFCT_ECACHE_DESTROY_FAIL)
continue;
+ /* ct is in NFCT_ECACHE_DESTROY_FAIL state, this means
+ * the worker owns this entry: the ct will remain valid
+ * until the worker puts its ct reference.
+ */
if (nf_conntrack_event(IPCT_DESTROY, ct)) {
ret = STATE_CONGESTED;
break;
@@ -189,15 +202,15 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct)
if (notify == NULL)
goto out_unlock;
+ if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct))
+ goto out_unlock;
+
e = nf_ct_ecache_find(ct);
if (e == NULL)
goto out_unlock;
events = xchg(&e->cache, 0);
- if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct))
- goto out_unlock;
-
/* We make a copy of the missed event cache without taking
* the lock, thus we may send missed events twice. However,
* this does not harm and it happens very rarely. */
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index d4ed1e197921..c24e5b64b00c 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -34,21 +34,24 @@ void nf_ct_ext_destroy(struct nf_conn *ct)
t->destroy(ct);
rcu_read_unlock();
}
+
+ kfree(ct->ext);
}
EXPORT_SYMBOL(nf_ct_ext_destroy);
void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
{
unsigned int newlen, newoff, oldlen, alloc;
- struct nf_ct_ext *old, *new;
struct nf_ct_ext_type *t;
+ struct nf_ct_ext *new;
/* Conntrack must not be confirmed to avoid races on reallocation. */
WARN_ON(nf_ct_is_confirmed(ct));
- old = ct->ext;
- if (old) {
+ if (ct->ext) {
+ const struct nf_ct_ext *old = ct->ext;
+
if (__nf_ct_ext_exist(old, id))
return NULL;
oldlen = old->len;
@@ -68,22 +71,18 @@ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
rcu_read_unlock();
alloc = max(newlen, NF_CT_EXT_PREALLOC);
- kmemleak_not_leak(old);
- new = __krealloc(old, alloc, gfp);
+ new = krealloc(ct->ext, alloc, gfp);
if (!new)
return NULL;
- if (!old) {
+ if (!ct->ext)
memset(new->offset, 0, sizeof(new->offset));
- ct->ext = new;
- } else if (new != old) {
- kfree_rcu(old, rcu);
- rcu_assign_pointer(ct->ext, new);
- }
new->offset[id] = newoff;
new->len = newlen;
memset((void *)new + newoff, 0, newlen - newoff);
+
+ ct->ext = new;
return (void *)new + newoff;
}
EXPORT_SYMBOL(nf_ct_ext_add);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index e2d13cd18875..d8d33ef52ce0 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -506,9 +506,45 @@ nla_put_failure:
return -1;
}
+/* all these functions access ct->ext. Caller must either hold a reference
+ * on ct or prevent its deletion by holding either the bucket spinlock or
+ * pcpu dying list lock.
+ */
+static int ctnetlink_dump_extinfo(struct sk_buff *skb,
+ struct nf_conn *ct, u32 type)
+{
+ if (ctnetlink_dump_acct(skb, ct, type) < 0 ||
+ ctnetlink_dump_timestamp(skb, ct) < 0 ||
+ ctnetlink_dump_helpinfo(skb, ct) < 0 ||
+ ctnetlink_dump_labels(skb, ct) < 0 ||
+ ctnetlink_dump_ct_seq_adj(skb, ct) < 0 ||
+ ctnetlink_dump_ct_synproxy(skb, ct) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int ctnetlink_dump_info(struct sk_buff *skb, struct nf_conn *ct)
+{
+ if (ctnetlink_dump_status(skb, ct) < 0 ||
+ ctnetlink_dump_mark(skb, ct) < 0 ||
+ ctnetlink_dump_secctx(skb, ct) < 0 ||
+ ctnetlink_dump_id(skb, ct) < 0 ||
+ ctnetlink_dump_use(skb, ct) < 0 ||
+ ctnetlink_dump_master(skb, ct) < 0)
+ return -1;
+
+ if (!test_bit(IPS_OFFLOAD_BIT, &ct->status) &&
+ (ctnetlink_dump_timeout(skb, ct) < 0 ||
+ ctnetlink_dump_protoinfo(skb, ct) < 0))
+ return -1;
+
+ return 0;
+}
+
static int
ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
- struct nf_conn *ct)
+ struct nf_conn *ct, bool extinfo)
{
const struct nf_conntrack_zone *zone;
struct nlmsghdr *nlh;
@@ -552,23 +588,9 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
NF_CT_DEFAULT_ZONE_DIR) < 0)
goto nla_put_failure;
- if (ctnetlink_dump_status(skb, ct) < 0 ||
- ctnetlink_dump_acct(skb, ct, type) < 0 ||
- ctnetlink_dump_timestamp(skb, ct) < 0 ||
- ctnetlink_dump_helpinfo(skb, ct) < 0 ||
- ctnetlink_dump_mark(skb, ct) < 0 ||
- ctnetlink_dump_secctx(skb, ct) < 0 ||
- ctnetlink_dump_labels(skb, ct) < 0 ||
- ctnetlink_dump_id(skb, ct) < 0 ||
- ctnetlink_dump_use(skb, ct) < 0 ||
- ctnetlink_dump_master(skb, ct) < 0 ||
- ctnetlink_dump_ct_seq_adj(skb, ct) < 0 ||
- ctnetlink_dump_ct_synproxy(skb, ct) < 0)
+ if (ctnetlink_dump_info(skb, ct) < 0)
goto nla_put_failure;
-
- if (!test_bit(IPS_OFFLOAD_BIT, &ct->status) &&
- (ctnetlink_dump_timeout(skb, ct) < 0 ||
- ctnetlink_dump_protoinfo(skb, ct) < 0))
+ if (extinfo && ctnetlink_dump_extinfo(skb, ct, type) < 0)
goto nla_put_failure;
nlmsg_end(skb, nlh);
@@ -953,13 +975,11 @@ restart:
if (!ctnetlink_filter_match(ct, cb->data))
continue;
- rcu_read_lock();
res =
ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
- ct);
- rcu_read_unlock();
+ ct, true);
if (res < 0) {
nf_conntrack_get(&ct->ct_general);
cb->args[1] = (unsigned long)ct;
@@ -1364,10 +1384,8 @@ static int ctnetlink_get_conntrack(struct net *net, struct sock *ctnl,
return -ENOMEM;
}
- rcu_read_lock();
err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
- NFNL_MSG_TYPE(nlh->nlmsg_type), ct);
- rcu_read_unlock();
+ NFNL_MSG_TYPE(nlh->nlmsg_type), ct, true);
nf_ct_put(ct);
if (err <= 0)
goto free;
@@ -1429,12 +1447,18 @@ restart:
continue;
cb->args[1] = 0;
}
- rcu_read_lock();
+
+ /* We can't dump extension info for the unconfirmed
+ * list because unconfirmed conntracks can have
+ * ct->ext reallocated (and thus freed).
+ *
+ * In the dying list case ct->ext can't be free'd
+ * until after we drop pcpu->lock.
+ */
res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
- ct);
- rcu_read_unlock();
+ ct, dying ? true : false);
if (res < 0) {
if (!atomic_inc_not_zero(&ct->ct_general.use))
continue;
diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c
index 097deba7441a..c2e3dff773bc 100644
--- a/net/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/netfilter/nf_conntrack_proto_icmp.c
@@ -235,11 +235,7 @@ int nf_conntrack_icmpv4_error(struct nf_conn *tmpl,
}
/* Need to track icmp error message? */
- if (icmph->type != ICMP_DEST_UNREACH &&
- icmph->type != ICMP_SOURCE_QUENCH &&
- icmph->type != ICMP_TIME_EXCEEDED &&
- icmph->type != ICMP_PARAMETERPROB &&
- icmph->type != ICMP_REDIRECT)
+ if (!icmp_is_err(icmph->type))
return NF_ACCEPT;
memset(&outer_daddr, 0, sizeof(outer_daddr));
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 128245efe84a..9889d52eda82 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -14,24 +14,15 @@
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_tuple.h>
-struct flow_offload_entry {
- struct flow_offload flow;
- struct nf_conn *ct;
- struct rcu_head rcu_head;
-};
-
static DEFINE_MUTEX(flowtable_lock);
static LIST_HEAD(flowtables);
static void
-flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
- struct nf_flow_route *route,
+flow_offload_fill_dir(struct flow_offload *flow,
enum flow_offload_tuple_dir dir)
{
struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
- struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
- struct dst_entry *other_dst = route->tuple[!dir].dst;
- struct dst_entry *dst = route->tuple[dir].dst;
+ struct nf_conntrack_tuple *ctt = &flow->ct->tuplehash[dir].tuple;
ft->dir = dir;
@@ -39,12 +30,10 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
case NFPROTO_IPV4:
ft->src_v4 = ctt->src.u3.in;
ft->dst_v4 = ctt->dst.u3.in;
- ft->mtu = ip_dst_mtu_maybe_forward(dst, true);
break;
case NFPROTO_IPV6:
ft->src_v6 = ctt->src.u3.in6;
ft->dst_v6 = ctt->dst.u3.in6;
- ft->mtu = ip6_dst_mtu_forward(dst);
break;
}
@@ -52,37 +41,24 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
ft->l4proto = ctt->dst.protonum;
ft->src_port = ctt->src.u.tcp.port;
ft->dst_port = ctt->dst.u.tcp.port;
-
- ft->iifidx = other_dst->dev->ifindex;
- ft->dst_cache = dst;
}
-struct flow_offload *
-flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
+struct flow_offload *flow_offload_alloc(struct nf_conn *ct)
{
- struct flow_offload_entry *entry;
struct flow_offload *flow;
if (unlikely(nf_ct_is_dying(ct) ||
!atomic_inc_not_zero(&ct->ct_general.use)))
return NULL;
- entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
- if (!entry)
+ flow = kzalloc(sizeof(*flow), GFP_ATOMIC);
+ if (!flow)
goto err_ct_refcnt;
- flow = &entry->flow;
+ flow->ct = ct;
- if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
- goto err_dst_cache_original;
-
- if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
- goto err_dst_cache_reply;
-
- entry->ct = ct;
-
- flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL);
- flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY);
+ flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
+ flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_REPLY);
if (ct->status & IPS_SRC_NAT)
flow->flags |= FLOW_OFFLOAD_SNAT;
@@ -91,10 +67,6 @@ flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
return flow;
-err_dst_cache_reply:
- dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
-err_dst_cache_original:
- kfree(entry);
err_ct_refcnt:
nf_ct_put(ct);
@@ -102,6 +74,56 @@ err_ct_refcnt:
}
EXPORT_SYMBOL_GPL(flow_offload_alloc);
+static int flow_offload_fill_route(struct flow_offload *flow,
+ const struct nf_flow_route *route,
+ enum flow_offload_tuple_dir dir)
+{
+ struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
+ struct dst_entry *other_dst = route->tuple[!dir].dst;
+ struct dst_entry *dst = route->tuple[dir].dst;
+
+ if (!dst_hold_safe(route->tuple[dir].dst))
+ return -1;
+
+ switch (flow_tuple->l3proto) {
+ case NFPROTO_IPV4:
+ flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true);
+ break;
+ case NFPROTO_IPV6:
+ flow_tuple->mtu = ip6_dst_mtu_forward(dst);
+ break;
+ }
+
+ flow_tuple->iifidx = other_dst->dev->ifindex;
+ flow_tuple->dst_cache = dst;
+
+ return 0;
+}
+
+int flow_offload_route_init(struct flow_offload *flow,
+ const struct nf_flow_route *route)
+{
+ int err;
+
+ err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL);
+ if (err < 0)
+ return err;
+
+ err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY);
+ if (err < 0)
+ goto err_route_reply;
+
+ flow->type = NF_FLOW_OFFLOAD_ROUTE;
+
+ return 0;
+
+err_route_reply:
+ dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(flow_offload_route_init);
+
static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
{
tcp->state = TCP_CONNTRACK_ESTABLISHED;
@@ -150,17 +172,25 @@ static void flow_offload_fixup_ct(struct nf_conn *ct)
flow_offload_fixup_ct_timeout(ct);
}
-void flow_offload_free(struct flow_offload *flow)
+static void flow_offload_route_release(struct flow_offload *flow)
{
- struct flow_offload_entry *e;
-
dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
- e = container_of(flow, struct flow_offload_entry, flow);
+}
+
+void flow_offload_free(struct flow_offload *flow)
+{
+ switch (flow->type) {
+ case NF_FLOW_OFFLOAD_ROUTE:
+ flow_offload_route_release(flow);
+ break;
+ default:
+ break;
+ }
if (flow->flags & FLOW_OFFLOAD_DYING)
- nf_ct_delete(e->ct, 0, 0);
- nf_ct_put(e->ct);
- kfree_rcu(e, rcu_head);
+ nf_ct_delete(flow->ct, 0, 0);
+ nf_ct_put(flow->ct);
+ kfree_rcu(flow, rcu_head);
}
EXPORT_SYMBOL_GPL(flow_offload_free);
@@ -220,6 +250,9 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
return err;
}
+ if (flow_table->flags & NF_FLOWTABLE_HW_OFFLOAD)
+ nf_flow_offload_add(flow_table, flow);
+
return 0;
}
EXPORT_SYMBOL_GPL(flow_offload_add);
@@ -232,8 +265,6 @@ static inline bool nf_flow_has_expired(const struct flow_offload *flow)
static void flow_offload_del(struct nf_flowtable *flow_table,
struct flow_offload *flow)
{
- struct flow_offload_entry *e;
-
rhashtable_remove_fast(&flow_table->rhashtable,
&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
nf_flow_offload_rhash_params);
@@ -241,25 +272,21 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
nf_flow_offload_rhash_params);
- e = container_of(flow, struct flow_offload_entry, flow);
- clear_bit(IPS_OFFLOAD_BIT, &e->ct->status);
+ clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
if (nf_flow_has_expired(flow))
- flow_offload_fixup_ct(e->ct);
+ flow_offload_fixup_ct(flow->ct);
else if (flow->flags & FLOW_OFFLOAD_TEARDOWN)
- flow_offload_fixup_ct_timeout(e->ct);
+ flow_offload_fixup_ct_timeout(flow->ct);
flow_offload_free(flow);
}
void flow_offload_teardown(struct flow_offload *flow)
{
- struct flow_offload_entry *e;
-
flow->flags |= FLOW_OFFLOAD_TEARDOWN;
- e = container_of(flow, struct flow_offload_entry, flow);
- flow_offload_fixup_ct_state(e->ct);
+ flow_offload_fixup_ct_state(flow->ct);
}
EXPORT_SYMBOL_GPL(flow_offload_teardown);
@@ -269,7 +296,6 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
{
struct flow_offload_tuple_rhash *tuplehash;
struct flow_offload *flow;
- struct flow_offload_entry *e;
int dir;
tuplehash = rhashtable_lookup(&flow_table->rhashtable, tuple,
@@ -282,8 +308,7 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
if (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))
return NULL;
- e = container_of(flow, struct flow_offload_entry, flow);
- if (unlikely(nf_ct_is_dying(e->ct)))
+ if (unlikely(nf_ct_is_dying(flow->ct)))
return NULL;
return tuplehash;
@@ -327,12 +352,21 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
{
struct nf_flowtable *flow_table = data;
- struct flow_offload_entry *e;
- e = container_of(flow, struct flow_offload_entry, flow);
- if (nf_flow_has_expired(flow) || nf_ct_is_dying(e->ct) ||
- (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN)))
- flow_offload_del(flow_table, flow);
+ if (flow->flags & FLOW_OFFLOAD_HW)
+ nf_flow_offload_stats(flow_table, flow);
+
+ if (nf_flow_has_expired(flow) || nf_ct_is_dying(flow->ct) ||
+ (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))) {
+ if (flow->flags & FLOW_OFFLOAD_HW) {
+ if (!(flow->flags & FLOW_OFFLOAD_HW_DYING))
+ nf_flow_offload_del(flow_table, flow);
+ else if (flow->flags & FLOW_OFFLOAD_HW_DEAD)
+ flow_offload_del(flow_table, flow);
+ } else {
+ flow_offload_del(flow_table, flow);
+ }
+ }
}
static void nf_flow_offload_work_gc(struct work_struct *work)
@@ -465,6 +499,7 @@ int nf_flow_table_init(struct nf_flowtable *flowtable)
int err;
INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
+ flow_block_init(&flowtable->flow_block);
err = rhashtable_init(&flowtable->rhashtable,
&nf_flow_offload_rhash_params);
@@ -485,15 +520,13 @@ EXPORT_SYMBOL_GPL(nf_flow_table_init);
static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
{
struct net_device *dev = data;
- struct flow_offload_entry *e;
-
- e = container_of(flow, struct flow_offload_entry, flow);
if (!dev) {
flow_offload_teardown(flow);
return;
}
- if (net_eq(nf_ct_net(e->ct), dev_net(dev)) &&
+
+ if (net_eq(nf_ct_net(flow->ct), dev_net(dev)) &&
(flow->tuplehash[0].tuple.iifidx == dev->ifindex ||
flow->tuplehash[1].tuple.iifidx == dev->ifindex))
flow_offload_dead(flow);
@@ -502,6 +535,7 @@ static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
struct net_device *dev)
{
+ nf_flow_table_offload_flush(flowtable);
nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
flush_delayed_work(&flowtable->gc_work);
}
@@ -529,5 +563,18 @@ void nf_flow_table_free(struct nf_flowtable *flow_table)
}
EXPORT_SYMBOL_GPL(nf_flow_table_free);
+static int __init nf_flow_table_module_init(void)
+{
+ return nf_flow_table_offload_init();
+}
+
+static void __exit nf_flow_table_module_exit(void)
+{
+ nf_flow_table_offload_exit();
+}
+
+module_init(nf_flow_table_module_init);
+module_exit(nf_flow_table_module_exit);
+
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c
index 593357aedb36..88bedf1ff1ae 100644
--- a/net/netfilter/nf_flow_table_inet.c
+++ b/net/netfilter/nf_flow_table_inet.c
@@ -21,9 +21,34 @@ nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb,
return NF_ACCEPT;
}
+static int nf_flow_rule_route_inet(struct net *net,
+ const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
+{
+ const struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
+ int err;
+
+ switch (flow_tuple->l3proto) {
+ case NFPROTO_IPV4:
+ err = nf_flow_rule_route_ipv4(net, flow, dir, flow_rule);
+ break;
+ case NFPROTO_IPV6:
+ err = nf_flow_rule_route_ipv6(net, flow, dir, flow_rule);
+ break;
+ default:
+ err = -1;
+ break;
+ }
+
+ return err;
+}
+
static struct nf_flowtable_type flowtable_inet = {
.family = NFPROTO_INET,
.init = nf_flow_table_init,
+ .setup = nf_flow_table_offload_setup,
+ .action = nf_flow_rule_route_inet,
.free = nf_flow_table_free,
.hook = nf_flow_offload_inet_hook,
.owner = THIS_MODULE,
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
new file mode 100644
index 000000000000..c54c9a6cc981
--- /dev/null
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -0,0 +1,851 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/rhashtable.h>
+#include <linux/netdevice.h>
+#include <linux/tc_act/tc_csum.h>
+#include <net/flow_offload.h>
+#include <net/netfilter/nf_flow_table.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+
+static struct work_struct nf_flow_offload_work;
+static DEFINE_SPINLOCK(flow_offload_pending_list_lock);
+static LIST_HEAD(flow_offload_pending_list);
+
+struct flow_offload_work {
+ struct list_head list;
+ enum flow_cls_command cmd;
+ int priority;
+ struct nf_flowtable *flowtable;
+ struct flow_offload *flow;
+};
+
+struct nf_flow_key {
+ struct flow_dissector_key_control control;
+ struct flow_dissector_key_basic basic;
+ union {
+ struct flow_dissector_key_ipv4_addrs ipv4;
+ };
+ struct flow_dissector_key_tcp tcp;
+ struct flow_dissector_key_ports tp;
+} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
+
+struct nf_flow_match {
+ struct flow_dissector dissector;
+ struct nf_flow_key key;
+ struct nf_flow_key mask;
+};
+
+struct nf_flow_rule {
+ struct nf_flow_match match;
+ struct flow_rule *rule;
+};
+
+#define NF_FLOW_DISSECTOR(__match, __type, __field) \
+ (__match)->dissector.offset[__type] = \
+ offsetof(struct nf_flow_key, __field)
+
+static int nf_flow_rule_match(struct nf_flow_match *match,
+ const struct flow_offload_tuple *tuple)
+{
+ struct nf_flow_key *mask = &match->mask;
+ struct nf_flow_key *key = &match->key;
+
+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control);
+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_BASIC, basic);
+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp);
+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp);
+
+ switch (tuple->l3proto) {
+ case AF_INET:
+ key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ key->basic.n_proto = htons(ETH_P_IP);
+ key->ipv4.src = tuple->src_v4.s_addr;
+ mask->ipv4.src = 0xffffffff;
+ key->ipv4.dst = tuple->dst_v4.s_addr;
+ mask->ipv4.dst = 0xffffffff;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ mask->basic.n_proto = 0xffff;
+
+ switch (tuple->l4proto) {
+ case IPPROTO_TCP:
+ key->tcp.flags = 0;
+ mask->tcp.flags = TCP_FLAG_RST | TCP_FLAG_FIN;
+ match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP);
+ break;
+ case IPPROTO_UDP:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ key->basic.ip_proto = tuple->l4proto;
+ mask->basic.ip_proto = 0xff;
+
+ key->tp.src = tuple->src_port;
+ mask->tp.src = 0xffff;
+ key->tp.dst = tuple->dst_port;
+ mask->tp.dst = 0xffff;
+
+ match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+ BIT(FLOW_DISSECTOR_KEY_BASIC) |
+ BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_PORTS);
+ return 0;
+}
+
+static void flow_offload_mangle(struct flow_action_entry *entry,
+ enum flow_action_mangle_base htype,
+ u32 offset, u8 *value, u8 *mask)
+{
+ entry->id = FLOW_ACTION_MANGLE;
+ entry->mangle.htype = htype;
+ entry->mangle.offset = offset;
+ memcpy(&entry->mangle.mask, mask, sizeof(u32));
+ memcpy(&entry->mangle.val, value, sizeof(u32));
+}
+
+static inline struct flow_action_entry *
+flow_action_entry_next(struct nf_flow_rule *flow_rule)
+{
+ int i = flow_rule->rule->action.num_entries++;
+
+ return &flow_rule->rule->action.entries[i];
+}
+
+static int flow_offload_eth_src(struct net *net,
+ const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
+{
+ const struct flow_offload_tuple *tuple = &flow->tuplehash[!dir].tuple;
+ struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
+ struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
+ struct net_device *dev;
+ u32 mask, val;
+ u16 val16;
+
+ dev = dev_get_by_index(net, tuple->iifidx);
+ if (!dev)
+ return -ENOENT;
+
+ mask = ~0xffff0000;
+ memcpy(&val16, dev->dev_addr, 2);
+ val = val16 << 16;
+ flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
+ (u8 *)&val, (u8 *)&mask);
+
+ mask = ~0xffffffff;
+ memcpy(&val, dev->dev_addr + 2, 4);
+ flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
+ (u8 *)&val, (u8 *)&mask);
+ dev_put(dev);
+
+ return 0;
+}
+
+static int flow_offload_eth_dst(struct net *net,
+ const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
+{
+ const struct flow_offload_tuple *tuple = &flow->tuplehash[dir].tuple;
+ struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
+ struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
+ struct neighbour *n;
+ u32 mask, val;
+ u16 val16;
+
+ n = dst_neigh_lookup(tuple->dst_cache, &tuple->dst_v4);
+ if (!n)
+ return -ENOENT;
+
+ mask = ~0xffffffff;
+ memcpy(&val, n->ha, 4);
+ flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 0,
+ (u8 *)&val, (u8 *)&mask);
+
+ mask = ~0x0000ffff;
+ memcpy(&val16, n->ha + 4, 2);
+ val = val16;
+ flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
+ (u8 *)&val, (u8 *)&mask);
+ neigh_release(n);
+
+ return 0;
+}
+
+static void flow_offload_ipv4_snat(struct net *net,
+ const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
+{
+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
+ u32 mask = ~htonl(0xffffffff);
+ __be32 addr;
+ u32 offset;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
+ offset = offsetof(struct iphdr, saddr);
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
+ offset = offsetof(struct iphdr, daddr);
+ break;
+ default:
+ return;
+ }
+
+ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
+ (u8 *)&addr, (u8 *)&mask);
+}
+
+static void flow_offload_ipv4_dnat(struct net *net,
+ const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
+{
+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
+ u32 mask = ~htonl(0xffffffff);
+ __be32 addr;
+ u32 offset;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
+ offset = offsetof(struct iphdr, daddr);
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
+ offset = offsetof(struct iphdr, saddr);
+ break;
+ default:
+ return;
+ }
+
+ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
+ (u8 *)&addr, (u8 *)&mask);
+}
+
+static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
+ unsigned int offset,
+ u8 *addr, u8 *mask)
+{
+ struct flow_action_entry *entry;
+ int i;
+
+ for (i = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32)) {
+ entry = flow_action_entry_next(flow_rule);
+ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6,
+ offset + i,
+ &addr[i], mask);
+ }
+}
+
+static void flow_offload_ipv6_snat(struct net *net,
+ const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
+{
+ u32 mask = ~htonl(0xffffffff);
+ const u8 *addr;
+ u32 offset;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6.s6_addr;
+ offset = offsetof(struct ipv6hdr, saddr);
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6.s6_addr;
+ offset = offsetof(struct ipv6hdr, daddr);
+ break;
+ default:
+ return;
+ }
+
+ flow_offload_ipv6_mangle(flow_rule, offset, (u8 *)addr, (u8 *)&mask);
+}
+
+static void flow_offload_ipv6_dnat(struct net *net,
+ const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
+{
+ u32 mask = ~htonl(0xffffffff);
+ const u8 *addr;
+ u32 offset;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6.s6_addr;
+ offset = offsetof(struct ipv6hdr, daddr);
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6.s6_addr;
+ offset = offsetof(struct ipv6hdr, saddr);
+ break;
+ default:
+ return;
+ }
+
+ flow_offload_ipv6_mangle(flow_rule, offset, (u8 *)addr, (u8 *)&mask);
+}
+
+static int flow_offload_l4proto(const struct flow_offload *flow)
+{
+ u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
+ u8 type = 0;
+
+ switch (protonum) {
+ case IPPROTO_TCP:
+ type = FLOW_ACT_MANGLE_HDR_TYPE_TCP;
+ break;
+ case IPPROTO_UDP:
+ type = FLOW_ACT_MANGLE_HDR_TYPE_UDP;
+ break;
+ default:
+ break;
+ }
+
+ return type;
+}
+
+static void flow_offload_port_snat(struct net *net,
+ const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
+{
+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
+ u32 mask = ~htonl(0xffff0000);
+ __be16 port;
+ u32 offset;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port;
+ offset = 0; /* offsetof(struct tcphdr, source); */
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
+ offset = 0; /* offsetof(struct tcphdr, dest); */
+ break;
+ default:
+ break;
+ }
+
+ flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
+ (u8 *)&port, (u8 *)&mask);
+}
+
+static void flow_offload_port_dnat(struct net *net,
+ const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
+{
+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
+ u32 mask = ~htonl(0xffff);
+ __be16 port;
+ u32 offset;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port;
+ offset = 0; /* offsetof(struct tcphdr, source); */
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
+ offset = 0; /* offsetof(struct tcphdr, dest); */
+ break;
+ default:
+ break;
+ }
+
+ flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
+ (u8 *)&port, (u8 *)&mask);
+}
+
+static void flow_offload_ipv4_checksum(struct net *net,
+ const struct flow_offload *flow,
+ struct nf_flow_rule *flow_rule)
+{
+ u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
+
+ entry->id = FLOW_ACTION_CSUM;
+ entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR;
+
+ switch (protonum) {
+ case IPPROTO_TCP:
+ entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_TCP;
+ break;
+ case IPPROTO_UDP:
+ entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_UDP;
+ break;
+ }
+}
+
+static void flow_offload_redirect(const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
+{
+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
+ struct rtable *rt;
+
+ rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
+ entry->id = FLOW_ACTION_REDIRECT;
+ entry->dev = rt->dst.dev;
+ dev_hold(rt->dst.dev);
+}
+
+int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
+{
+ if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
+ flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
+ return -1;
+
+ if (flow->flags & FLOW_OFFLOAD_SNAT) {
+ flow_offload_ipv4_snat(net, flow, dir, flow_rule);
+ flow_offload_port_snat(net, flow, dir, flow_rule);
+ }
+ if (flow->flags & FLOW_OFFLOAD_DNAT) {
+ flow_offload_ipv4_dnat(net, flow, dir, flow_rule);
+ flow_offload_port_dnat(net, flow, dir, flow_rule);
+ }
+ if (flow->flags & FLOW_OFFLOAD_SNAT ||
+ flow->flags & FLOW_OFFLOAD_DNAT)
+ flow_offload_ipv4_checksum(net, flow, flow_rule);
+
+ flow_offload_redirect(flow, dir, flow_rule);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4);
+
+int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
+{
+ if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
+ flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
+ return -1;
+
+ if (flow->flags & FLOW_OFFLOAD_SNAT) {
+ flow_offload_ipv6_snat(net, flow, dir, flow_rule);
+ flow_offload_port_snat(net, flow, dir, flow_rule);
+ }
+ if (flow->flags & FLOW_OFFLOAD_DNAT) {
+ flow_offload_ipv6_dnat(net, flow, dir, flow_rule);
+ flow_offload_port_dnat(net, flow, dir, flow_rule);
+ }
+
+ flow_offload_redirect(flow, dir, flow_rule);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6);
+
+#define NF_FLOW_RULE_ACTION_MAX 16
+
+static struct nf_flow_rule *
+nf_flow_offload_rule_alloc(struct net *net,
+ const struct flow_offload_work *offload,
+ enum flow_offload_tuple_dir dir)
+{
+ const struct nf_flowtable *flowtable = offload->flowtable;
+ const struct flow_offload *flow = offload->flow;
+ const struct flow_offload_tuple *tuple;
+ struct nf_flow_rule *flow_rule;
+ int err = -ENOMEM;
+
+ flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL);
+ if (!flow_rule)
+ goto err_flow;
+
+ flow_rule->rule = flow_rule_alloc(NF_FLOW_RULE_ACTION_MAX);
+ if (!flow_rule->rule)
+ goto err_flow_rule;
+
+ flow_rule->rule->match.dissector = &flow_rule->match.dissector;
+ flow_rule->rule->match.mask = &flow_rule->match.mask;
+ flow_rule->rule->match.key = &flow_rule->match.key;
+
+ tuple = &flow->tuplehash[dir].tuple;
+ err = nf_flow_rule_match(&flow_rule->match, tuple);
+ if (err < 0)
+ goto err_flow_match;
+
+ flow_rule->rule->action.num_entries = 0;
+ if (flowtable->type->action(net, flow, dir, flow_rule) < 0)
+ goto err_flow_match;
+
+ return flow_rule;
+
+err_flow_match:
+ kfree(flow_rule->rule);
+err_flow_rule:
+ kfree(flow_rule);
+err_flow:
+ return NULL;
+}
+
+static void __nf_flow_offload_destroy(struct nf_flow_rule *flow_rule)
+{
+ struct flow_action_entry *entry;
+ int i;
+
+ for (i = 0; i < flow_rule->rule->action.num_entries; i++) {
+ entry = &flow_rule->rule->action.entries[i];
+ if (entry->id != FLOW_ACTION_REDIRECT)
+ continue;
+
+ dev_put(entry->dev);
+ }
+ kfree(flow_rule->rule);
+ kfree(flow_rule);
+}
+
+static void nf_flow_offload_destroy(struct nf_flow_rule *flow_rule[])
+{
+ int i;
+
+ for (i = 0; i < FLOW_OFFLOAD_DIR_MAX; i++)
+ __nf_flow_offload_destroy(flow_rule[i]);
+}
+
+static int nf_flow_offload_alloc(const struct flow_offload_work *offload,
+ struct nf_flow_rule *flow_rule[])
+{
+ struct net *net = read_pnet(&offload->flowtable->net);
+
+ flow_rule[0] = nf_flow_offload_rule_alloc(net, offload,
+ FLOW_OFFLOAD_DIR_ORIGINAL);
+ if (!flow_rule[0])
+ return -ENOMEM;
+
+ flow_rule[1] = nf_flow_offload_rule_alloc(net, offload,
+ FLOW_OFFLOAD_DIR_REPLY);
+ if (!flow_rule[1]) {
+ __nf_flow_offload_destroy(flow_rule[0]);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void nf_flow_offload_init(struct flow_cls_offload *cls_flow,
+ __be16 proto, int priority,
+ enum flow_cls_command cmd,
+ const struct flow_offload_tuple *tuple,
+ struct netlink_ext_ack *extack)
+{
+ cls_flow->common.protocol = proto;
+ cls_flow->common.prio = priority;
+ cls_flow->common.extack = extack;
+ cls_flow->command = cmd;
+ cls_flow->cookie = (unsigned long)tuple;
+}
+
+static int flow_offload_tuple_add(struct flow_offload_work *offload,
+ struct nf_flow_rule *flow_rule,
+ enum flow_offload_tuple_dir dir)
+{
+ struct nf_flowtable *flowtable = offload->flowtable;
+ struct flow_cls_offload cls_flow = {};
+ struct flow_block_cb *block_cb;
+ struct netlink_ext_ack extack;
+ __be16 proto = ETH_P_ALL;
+ int err, i = 0;
+
+ nf_flow_offload_init(&cls_flow, proto, offload->priority,
+ FLOW_CLS_REPLACE,
+ &offload->flow->tuplehash[dir].tuple, &extack);
+ cls_flow.rule = flow_rule->rule;
+
+ list_for_each_entry(block_cb, &flowtable->flow_block.cb_list, list) {
+ err = block_cb->cb(TC_SETUP_FT, &cls_flow,
+ block_cb->cb_priv);
+ if (err < 0)
+ continue;
+
+ i++;
+ }
+
+ return i;
+}
+
+static void flow_offload_tuple_del(struct flow_offload_work *offload,
+ enum flow_offload_tuple_dir dir)
+{
+ struct nf_flowtable *flowtable = offload->flowtable;
+ struct flow_cls_offload cls_flow = {};
+ struct flow_block_cb *block_cb;
+ struct netlink_ext_ack extack;
+ __be16 proto = ETH_P_ALL;
+
+ nf_flow_offload_init(&cls_flow, proto, offload->priority,
+ FLOW_CLS_DESTROY,
+ &offload->flow->tuplehash[dir].tuple, &extack);
+
+ list_for_each_entry(block_cb, &flowtable->flow_block.cb_list, list)
+ block_cb->cb(TC_SETUP_FT, &cls_flow, block_cb->cb_priv);
+
+ offload->flow->flags |= FLOW_OFFLOAD_HW_DEAD;
+}
+
+static int flow_offload_rule_add(struct flow_offload_work *offload,
+ struct nf_flow_rule *flow_rule[])
+{
+ int ok_count = 0;
+
+ ok_count += flow_offload_tuple_add(offload, flow_rule[0],
+ FLOW_OFFLOAD_DIR_ORIGINAL);
+ ok_count += flow_offload_tuple_add(offload, flow_rule[1],
+ FLOW_OFFLOAD_DIR_REPLY);
+ if (ok_count == 0)
+ return -ENOENT;
+
+ return 0;
+}
+
+static int flow_offload_work_add(struct flow_offload_work *offload)
+{
+ struct nf_flow_rule *flow_rule[FLOW_OFFLOAD_DIR_MAX];
+ int err;
+
+ err = nf_flow_offload_alloc(offload, flow_rule);
+ if (err < 0)
+ return -ENOMEM;
+
+ err = flow_offload_rule_add(offload, flow_rule);
+
+ nf_flow_offload_destroy(flow_rule);
+
+ return err;
+}
+
+static void flow_offload_work_del(struct flow_offload_work *offload)
+{
+ flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
+ flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
+}
+
+static void flow_offload_tuple_stats(struct flow_offload_work *offload,
+ enum flow_offload_tuple_dir dir,
+ struct flow_stats *stats)
+{
+ struct nf_flowtable *flowtable = offload->flowtable;
+ struct flow_cls_offload cls_flow = {};
+ struct flow_block_cb *block_cb;
+ struct netlink_ext_ack extack;
+ __be16 proto = ETH_P_ALL;
+
+ nf_flow_offload_init(&cls_flow, proto, offload->priority,
+ FLOW_CLS_STATS,
+ &offload->flow->tuplehash[dir].tuple, &extack);
+
+ list_for_each_entry(block_cb, &flowtable->flow_block.cb_list, list)
+ block_cb->cb(TC_SETUP_FT, &cls_flow, block_cb->cb_priv);
+ memcpy(stats, &cls_flow.stats, sizeof(*stats));
+}
+
+static void flow_offload_work_stats(struct flow_offload_work *offload)
+{
+ struct flow_stats stats[FLOW_OFFLOAD_DIR_MAX] = {};
+ u64 lastused;
+
+ flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]);
+ flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY, &stats[1]);
+
+ lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
+ offload->flow->timeout = max_t(u64, offload->flow->timeout,
+ lastused + NF_FLOW_TIMEOUT);
+}
+
+static void flow_offload_work_handler(struct work_struct *work)
+{
+ struct flow_offload_work *offload, *next;
+ LIST_HEAD(offload_pending_list);
+ int ret;
+
+ spin_lock_bh(&flow_offload_pending_list_lock);
+ list_replace_init(&flow_offload_pending_list, &offload_pending_list);
+ spin_unlock_bh(&flow_offload_pending_list_lock);
+
+ list_for_each_entry_safe(offload, next, &offload_pending_list, list) {
+ switch (offload->cmd) {
+ case FLOW_CLS_REPLACE:
+ ret = flow_offload_work_add(offload);
+ if (ret < 0)
+ offload->flow->flags &= ~FLOW_OFFLOAD_HW;
+ break;
+ case FLOW_CLS_DESTROY:
+ flow_offload_work_del(offload);
+ break;
+ case FLOW_CLS_STATS:
+ flow_offload_work_stats(offload);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ }
+ list_del(&offload->list);
+ kfree(offload);
+ }
+}
+
+static void flow_offload_queue_work(struct flow_offload_work *offload)
+{
+ spin_lock_bh(&flow_offload_pending_list_lock);
+ list_add_tail(&offload->list, &flow_offload_pending_list);
+ spin_unlock_bh(&flow_offload_pending_list_lock);
+
+ schedule_work(&nf_flow_offload_work);
+}
+
+void nf_flow_offload_add(struct nf_flowtable *flowtable,
+ struct flow_offload *flow)
+{
+ struct flow_offload_work *offload;
+
+ offload = kmalloc(sizeof(struct flow_offload_work), GFP_ATOMIC);
+ if (!offload)
+ return;
+
+ offload->cmd = FLOW_CLS_REPLACE;
+ offload->flow = flow;
+ offload->priority = flowtable->priority;
+ offload->flowtable = flowtable;
+ flow->flags |= FLOW_OFFLOAD_HW;
+
+ flow_offload_queue_work(offload);
+}
+
+void nf_flow_offload_del(struct nf_flowtable *flowtable,
+ struct flow_offload *flow)
+{
+ struct flow_offload_work *offload;
+
+ offload = kzalloc(sizeof(struct flow_offload_work), GFP_ATOMIC);
+ if (!offload)
+ return;
+
+ offload->cmd = FLOW_CLS_DESTROY;
+ offload->flow = flow;
+ offload->flow->flags |= FLOW_OFFLOAD_HW_DYING;
+ offload->flowtable = flowtable;
+
+ flow_offload_queue_work(offload);
+}
+
+void nf_flow_offload_stats(struct nf_flowtable *flowtable,
+ struct flow_offload *flow)
+{
+ struct flow_offload_work *offload;
+ s64 delta;
+
+ delta = flow->timeout - jiffies;
+ if ((delta >= (9 * NF_FLOW_TIMEOUT) / 10) ||
+ flow->flags & FLOW_OFFLOAD_HW_DYING)
+ return;
+
+ offload = kzalloc(sizeof(struct flow_offload_work), GFP_ATOMIC);
+ if (!offload)
+ return;
+
+ offload->cmd = FLOW_CLS_STATS;
+ offload->flow = flow;
+ offload->flowtable = flowtable;
+
+ flow_offload_queue_work(offload);
+}
+
+void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
+{
+ if (flowtable->flags & NF_FLOWTABLE_HW_OFFLOAD)
+ flush_work(&nf_flow_offload_work);
+}
+
+static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
+ struct flow_block_offload *bo,
+ enum flow_block_command cmd)
+{
+ struct flow_block_cb *block_cb, *next;
+ int err = 0;
+
+ switch (cmd) {
+ case FLOW_BLOCK_BIND:
+ list_splice(&bo->cb_list, &flowtable->flow_block.cb_list);
+ break;
+ case FLOW_BLOCK_UNBIND:
+ list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
+ list_del(&block_cb->list);
+ flow_block_cb_free(block_cb);
+ }
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ err = -EOPNOTSUPP;
+ }
+
+ return err;
+}
+
+int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
+ struct net_device *dev,
+ enum flow_block_command cmd)
+{
+ struct netlink_ext_ack extack = {};
+ struct flow_block_offload bo = {};
+ int err;
+
+ if (!(flowtable->flags & NF_FLOWTABLE_HW_OFFLOAD))
+ return 0;
+
+ if (!dev->netdev_ops->ndo_setup_tc)
+ return -EOPNOTSUPP;
+
+ bo.net = dev_net(dev);
+ bo.block = &flowtable->flow_block;
+ bo.command = cmd;
+ bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+ bo.extack = &extack;
+ INIT_LIST_HEAD(&bo.cb_list);
+
+ err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
+ if (err < 0)
+ return err;
+
+ return nf_flow_table_block_setup(flowtable, &bo, cmd);
+}
+EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup);
+
+int nf_flow_table_offload_init(void)
+{
+ INIT_WORK(&nf_flow_offload_work, flow_offload_work_handler);
+
+ return 0;
+}
+
+void nf_flow_table_offload_exit(void)
+{
+ struct flow_offload_work *offload, *next;
+ LIST_HEAD(offload_pending_list);
+
+ cancel_work_sync(&nf_flow_offload_work);
+
+ list_for_each_entry_safe(offload, next, &offload_pending_list, list) {
+ list_del(&offload->list);
+ kfree(offload);
+ }
+}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 712a428509ad..ff04cdc87f76 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -151,11 +151,64 @@ static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set)
}
}
+static int nft_netdev_register_hooks(struct net *net,
+ struct list_head *hook_list)
+{
+ struct nft_hook *hook;
+ int err, j;
+
+ j = 0;
+ list_for_each_entry(hook, hook_list, list) {
+ err = nf_register_net_hook(net, &hook->ops);
+ if (err < 0)
+ goto err_register;
+
+ j++;
+ }
+ return 0;
+
+err_register:
+ list_for_each_entry(hook, hook_list, list) {
+ if (j-- <= 0)
+ break;
+
+ nf_unregister_net_hook(net, &hook->ops);
+ }
+ return err;
+}
+
+static void nft_netdev_unregister_hooks(struct net *net,
+ struct list_head *hook_list)
+{
+ struct nft_hook *hook;
+
+ list_for_each_entry(hook, hook_list, list)
+ nf_unregister_net_hook(net, &hook->ops);
+}
+
+static int nft_register_basechain_hooks(struct net *net, int family,
+ struct nft_base_chain *basechain)
+{
+ if (family == NFPROTO_NETDEV)
+ return nft_netdev_register_hooks(net, &basechain->hook_list);
+
+ return nf_register_net_hook(net, &basechain->ops);
+}
+
+static void nft_unregister_basechain_hooks(struct net *net, int family,
+ struct nft_base_chain *basechain)
+{
+ if (family == NFPROTO_NETDEV)
+ nft_netdev_unregister_hooks(net, &basechain->hook_list);
+ else
+ nf_unregister_net_hook(net, &basechain->ops);
+}
+
static int nf_tables_register_hook(struct net *net,
const struct nft_table *table,
struct nft_chain *chain)
{
- const struct nft_base_chain *basechain;
+ struct nft_base_chain *basechain;
const struct nf_hook_ops *ops;
if (table->flags & NFT_TABLE_F_DORMANT ||
@@ -168,14 +221,14 @@ static int nf_tables_register_hook(struct net *net,
if (basechain->type->ops_register)
return basechain->type->ops_register(net, ops);
- return nf_register_net_hook(net, ops);
+ return nft_register_basechain_hooks(net, table->family, basechain);
}
static void nf_tables_unregister_hook(struct net *net,
const struct nft_table *table,
struct nft_chain *chain)
{
- const struct nft_base_chain *basechain;
+ struct nft_base_chain *basechain;
const struct nf_hook_ops *ops;
if (table->flags & NFT_TABLE_F_DORMANT ||
@@ -187,7 +240,7 @@ static void nf_tables_unregister_hook(struct net *net,
if (basechain->type->ops_unregister)
return basechain->type->ops_unregister(net, ops);
- nf_unregister_net_hook(net, ops);
+ nft_unregister_basechain_hooks(net, table->family, basechain);
}
static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
@@ -308,6 +361,7 @@ static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type,
static int nft_delrule(struct nft_ctx *ctx, struct nft_rule *rule)
{
+ struct nft_flow_rule *flow;
struct nft_trans *trans;
int err;
@@ -315,6 +369,16 @@ static int nft_delrule(struct nft_ctx *ctx, struct nft_rule *rule)
if (trans == NULL)
return -ENOMEM;
+ if (ctx->chain->flags & NFT_CHAIN_HW_OFFLOAD) {
+ flow = nft_flow_rule_create(ctx->net, rule);
+ if (IS_ERR(flow)) {
+ nft_trans_destroy(trans);
+ return PTR_ERR(flow);
+ }
+
+ nft_trans_flow_rule(trans) = flow;
+ }
+
err = nf_tables_delrule_deactivate(ctx, rule);
if (err < 0) {
nft_trans_destroy(trans);
@@ -742,7 +806,8 @@ static void nft_table_disable(struct net *net, struct nft_table *table, u32 cnt)
if (cnt && i++ == cnt)
break;
- nf_unregister_net_hook(net, &nft_base_chain(chain)->ops);
+ nft_unregister_basechain_hooks(net, table->family,
+ nft_base_chain(chain));
}
}
@@ -757,14 +822,16 @@ static int nf_tables_table_enable(struct net *net, struct nft_table *table)
if (!nft_is_base_chain(chain))
continue;
- err = nf_register_net_hook(net, &nft_base_chain(chain)->ops);
+ err = nft_register_basechain_hooks(net, table->family,
+ nft_base_chain(chain));
if (err < 0)
- goto err;
+ goto err_register_hooks;
i++;
}
return 0;
-err:
+
+err_register_hooks:
if (i)
nft_table_disable(net, table, i);
return err;
@@ -1225,6 +1292,46 @@ nla_put_failure:
return -ENOSPC;
}
+static int nft_dump_basechain_hook(struct sk_buff *skb, int family,
+ const struct nft_base_chain *basechain)
+{
+ const struct nf_hook_ops *ops = &basechain->ops;
+ struct nft_hook *hook, *first = NULL;
+ struct nlattr *nest, *nest_devs;
+ int n = 0;
+
+ nest = nla_nest_start_noflag(skb, NFTA_CHAIN_HOOK);
+ if (nest == NULL)
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_HOOK_HOOKNUM, htonl(ops->hooknum)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority)))
+ goto nla_put_failure;
+
+ if (family == NFPROTO_NETDEV) {
+ nest_devs = nla_nest_start_noflag(skb, NFTA_HOOK_DEVS);
+ list_for_each_entry(hook, &basechain->hook_list, list) {
+ if (!first)
+ first = hook;
+
+ if (nla_put_string(skb, NFTA_DEVICE_NAME,
+ hook->ops.dev->name))
+ goto nla_put_failure;
+ n++;
+ }
+ nla_nest_end(skb, nest_devs);
+
+ if (n == 1 &&
+ nla_put_string(skb, NFTA_HOOK_DEV, first->ops.dev->name))
+ goto nla_put_failure;
+ }
+ nla_nest_end(skb, nest);
+
+ return 0;
+nla_put_failure:
+ return -1;
+}
+
static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
u32 portid, u32 seq, int event, u32 flags,
int family, const struct nft_table *table,
@@ -1253,21 +1360,10 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
if (nft_is_base_chain(chain)) {
const struct nft_base_chain *basechain = nft_base_chain(chain);
- const struct nf_hook_ops *ops = &basechain->ops;
struct nft_stats __percpu *stats;
- struct nlattr *nest;
- nest = nla_nest_start_noflag(skb, NFTA_CHAIN_HOOK);
- if (nest == NULL)
- goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_HOOK_HOOKNUM, htonl(ops->hooknum)))
+ if (nft_dump_basechain_hook(skb, family, basechain))
goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority)))
- goto nla_put_failure;
- if (basechain->dev_name[0] &&
- nla_put_string(skb, NFTA_HOOK_DEV, basechain->dev_name))
- goto nla_put_failure;
- nla_nest_end(skb, nest);
if (nla_put_be32(skb, NFTA_CHAIN_POLICY,
htonl(basechain->policy)))
@@ -1485,6 +1581,7 @@ static void nf_tables_chain_free_chain_rules(struct nft_chain *chain)
static void nf_tables_chain_destroy(struct nft_ctx *ctx)
{
struct nft_chain *chain = ctx->chain;
+ struct nft_hook *hook, *next;
if (WARN_ON(chain->use > 0))
return;
@@ -1495,6 +1592,13 @@ static void nf_tables_chain_destroy(struct nft_ctx *ctx)
if (nft_is_base_chain(chain)) {
struct nft_base_chain *basechain = nft_base_chain(chain);
+ if (ctx->family == NFPROTO_NETDEV) {
+ list_for_each_entry_safe(hook, next,
+ &basechain->hook_list, list) {
+ list_del_rcu(&hook->list);
+ kfree_rcu(hook, rcu);
+ }
+ }
module_put(basechain->type->owner);
if (rcu_access_pointer(basechain->stats)) {
static_branch_dec(&nft_counters_enabled);
@@ -1508,13 +1612,125 @@ static void nf_tables_chain_destroy(struct nft_ctx *ctx)
}
}
+static struct nft_hook *nft_netdev_hook_alloc(struct net *net,
+ const struct nlattr *attr)
+{
+ struct net_device *dev;
+ char ifname[IFNAMSIZ];
+ struct nft_hook *hook;
+ int err;
+
+ hook = kmalloc(sizeof(struct nft_hook), GFP_KERNEL);
+ if (!hook) {
+ err = -ENOMEM;
+ goto err_hook_alloc;
+ }
+
+ nla_strlcpy(ifname, attr, IFNAMSIZ);
+ dev = __dev_get_by_name(net, ifname);
+ if (!dev) {
+ err = -ENOENT;
+ goto err_hook_dev;
+ }
+ hook->ops.dev = dev;
+
+ return hook;
+
+err_hook_dev:
+ kfree(hook);
+err_hook_alloc:
+ return ERR_PTR(err);
+}
+
+static bool nft_hook_list_find(struct list_head *hook_list,
+ const struct nft_hook *this)
+{
+ struct nft_hook *hook;
+
+ list_for_each_entry(hook, hook_list, list) {
+ if (this->ops.dev == hook->ops.dev)
+ return true;
+ }
+
+ return false;
+}
+
+static int nf_tables_parse_netdev_hooks(struct net *net,
+ const struct nlattr *attr,
+ struct list_head *hook_list)
+{
+ struct nft_hook *hook, *next;
+ const struct nlattr *tmp;
+ int rem, n = 0, err;
+
+ nla_for_each_nested(tmp, attr, rem) {
+ if (nla_type(tmp) != NFTA_DEVICE_NAME) {
+ err = -EINVAL;
+ goto err_hook;
+ }
+
+ hook = nft_netdev_hook_alloc(net, tmp);
+ if (IS_ERR(hook)) {
+ err = PTR_ERR(hook);
+ goto err_hook;
+ }
+ if (nft_hook_list_find(hook_list, hook)) {
+ err = -EEXIST;
+ goto err_hook;
+ }
+ list_add_tail(&hook->list, hook_list);
+ n++;
+
+ if (n == NFT_NETDEVICE_MAX) {
+ err = -EFBIG;
+ goto err_hook;
+ }
+ }
+ if (!n)
+ return -EINVAL;
+
+ return 0;
+
+err_hook:
+ list_for_each_entry_safe(hook, next, hook_list, list) {
+ list_del(&hook->list);
+ kfree(hook);
+ }
+ return err;
+}
+
struct nft_chain_hook {
u32 num;
s32 priority;
const struct nft_chain_type *type;
- struct net_device *dev;
+ struct list_head list;
};
+static int nft_chain_parse_netdev(struct net *net,
+ struct nlattr *tb[],
+ struct list_head *hook_list)
+{
+ struct nft_hook *hook;
+ int err;
+
+ if (tb[NFTA_HOOK_DEV]) {
+ hook = nft_netdev_hook_alloc(net, tb[NFTA_HOOK_DEV]);
+ if (IS_ERR(hook))
+ return PTR_ERR(hook);
+
+ list_add_tail(&hook->list, hook_list);
+ } else if (tb[NFTA_HOOK_DEVS]) {
+ err = nf_tables_parse_netdev_hooks(net, tb[NFTA_HOOK_DEVS],
+ hook_list);
+ if (err < 0)
+ return err;
+ } else {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int nft_chain_parse_hook(struct net *net,
const struct nlattr * const nla[],
struct nft_chain_hook *hook, u8 family,
@@ -1522,7 +1738,6 @@ static int nft_chain_parse_hook(struct net *net,
{
struct nlattr *ha[NFTA_HOOK_MAX + 1];
const struct nft_chain_type *type;
- struct net_device *dev;
int err;
lockdep_assert_held(&net->nft.commit_mutex);
@@ -1560,23 +1775,14 @@ static int nft_chain_parse_hook(struct net *net,
hook->type = type;
- hook->dev = NULL;
+ INIT_LIST_HEAD(&hook->list);
if (family == NFPROTO_NETDEV) {
- char ifname[IFNAMSIZ];
-
- if (!ha[NFTA_HOOK_DEV]) {
- module_put(type->owner);
- return -EOPNOTSUPP;
- }
-
- nla_strlcpy(ifname, ha[NFTA_HOOK_DEV], IFNAMSIZ);
- dev = __dev_get_by_name(net, ifname);
- if (!dev) {
+ err = nft_chain_parse_netdev(net, ha, &hook->list);
+ if (err < 0) {
module_put(type->owner);
- return -ENOENT;
+ return err;
}
- hook->dev = dev;
- } else if (ha[NFTA_HOOK_DEV]) {
+ } else if (ha[NFTA_HOOK_DEV] || ha[NFTA_HOOK_DEVS]) {
module_put(type->owner);
return -EOPNOTSUPP;
}
@@ -1586,6 +1792,12 @@ static int nft_chain_parse_hook(struct net *net,
static void nft_chain_release_hook(struct nft_chain_hook *hook)
{
+ struct nft_hook *h, *next;
+
+ list_for_each_entry_safe(h, next, &hook->list, list) {
+ list_del(&h->list);
+ kfree(h);
+ }
module_put(hook->type->owner);
}
@@ -1610,6 +1822,49 @@ static struct nft_rule **nf_tables_chain_alloc_rules(const struct nft_chain *cha
return kvmalloc(alloc, GFP_KERNEL);
}
+static void nft_basechain_hook_init(struct nf_hook_ops *ops, u8 family,
+ const struct nft_chain_hook *hook,
+ struct nft_chain *chain)
+{
+ ops->pf = family;
+ ops->hooknum = hook->num;
+ ops->priority = hook->priority;
+ ops->priv = chain;
+ ops->hook = hook->type->hooks[ops->hooknum];
+}
+
+static int nft_basechain_init(struct nft_base_chain *basechain, u8 family,
+ struct nft_chain_hook *hook, u32 flags)
+{
+ struct nft_chain *chain;
+ struct nft_hook *h;
+
+ basechain->type = hook->type;
+ INIT_LIST_HEAD(&basechain->hook_list);
+ chain = &basechain->chain;
+
+ if (family == NFPROTO_NETDEV) {
+ list_splice_init(&hook->list, &basechain->hook_list);
+ list_for_each_entry(h, &basechain->hook_list, list)
+ nft_basechain_hook_init(&h->ops, family, hook, chain);
+
+ basechain->ops.hooknum = hook->num;
+ basechain->ops.priority = hook->priority;
+ } else {
+ nft_basechain_hook_init(&basechain->ops, family, hook, chain);
+ }
+
+ chain->flags |= NFT_BASE_CHAIN | flags;
+ basechain->policy = NF_ACCEPT;
+ if (chain->flags & NFT_CHAIN_HW_OFFLOAD &&
+ nft_chain_offload_priority(basechain) < 0)
+ return -EOPNOTSUPP;
+
+ flow_block_init(&basechain->flow_block);
+
+ return 0;
+}
+
static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
u8 policy, u32 flags)
{
@@ -1628,7 +1883,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
if (nla[NFTA_CHAIN_HOOK]) {
struct nft_chain_hook hook;
- struct nf_hook_ops *ops;
err = nft_chain_parse_hook(net, nla, &hook, family, true);
if (err < 0)
@@ -1639,9 +1893,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
nft_chain_release_hook(&hook);
return -ENOMEM;
}
-
- if (hook.dev != NULL)
- strncpy(basechain->dev_name, hook.dev->name, IFNAMSIZ);
+ chain = &basechain->chain;
if (nla[NFTA_CHAIN_COUNTERS]) {
stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
@@ -1654,24 +1906,12 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
static_branch_inc(&nft_counters_enabled);
}
- basechain->type = hook.type;
- chain = &basechain->chain;
-
- ops = &basechain->ops;
- ops->pf = family;
- ops->hooknum = hook.num;
- ops->priority = hook.priority;
- ops->priv = chain;
- ops->hook = hook.type->hooks[ops->hooknum];
- ops->dev = hook.dev;
-
- chain->flags |= NFT_BASE_CHAIN | flags;
- basechain->policy = NF_ACCEPT;
- if (chain->flags & NFT_CHAIN_HW_OFFLOAD &&
- nft_chain_offload_priority(basechain) < 0)
- return -EOPNOTSUPP;
-
- flow_block_init(&basechain->flow_block);
+ err = nft_basechain_init(basechain, family, &hook, flags);
+ if (err < 0) {
+ nft_chain_release_hook(&hook);
+ kfree(basechain);
+ return err;
+ }
} else {
chain = kzalloc(sizeof(*chain), GFP_KERNEL);
if (chain == NULL)
@@ -1731,6 +1971,25 @@ err1:
return err;
}
+static bool nft_hook_list_equal(struct list_head *hook_list1,
+ struct list_head *hook_list2)
+{
+ struct nft_hook *hook;
+ int n = 0, m = 0;
+
+ n = 0;
+ list_for_each_entry(hook, hook_list2, list) {
+ if (!nft_hook_list_find(hook_list1, hook))
+ return false;
+
+ n++;
+ }
+ list_for_each_entry(hook, hook_list1, list)
+ m++;
+
+ return n == m;
+}
+
static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
u32 flags)
{
@@ -1762,12 +2021,19 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
return -EBUSY;
}
- ops = &basechain->ops;
- if (ops->hooknum != hook.num ||
- ops->priority != hook.priority ||
- ops->dev != hook.dev) {
- nft_chain_release_hook(&hook);
- return -EBUSY;
+ if (ctx->family == NFPROTO_NETDEV) {
+ if (!nft_hook_list_equal(&basechain->hook_list,
+ &hook.list)) {
+ nft_chain_release_hook(&hook);
+ return -EBUSY;
+ }
+ } else {
+ ops = &basechain->ops;
+ if (ops->hooknum != hook.num ||
+ ops->priority != hook.priority) {
+ nft_chain_release_hook(&hook);
+ return -EBUSY;
+ }
}
nft_chain_release_hook(&hook);
}
@@ -5580,6 +5846,7 @@ static const struct nla_policy nft_flowtable_policy[NFTA_FLOWTABLE_MAX + 1] = {
.len = NFT_NAME_MAXLEN - 1 },
[NFTA_FLOWTABLE_HOOK] = { .type = NLA_NESTED },
[NFTA_FLOWTABLE_HANDLE] = { .type = NLA_U64 },
+ [NFTA_FLOWTABLE_FLAGS] = { .type = NLA_U32 },
};
struct nft_flowtable *nft_flowtable_lookup(const struct nft_table *table,
@@ -5626,43 +5893,6 @@ nft_flowtable_lookup_byhandle(const struct nft_table *table,
return ERR_PTR(-ENOENT);
}
-static int nf_tables_parse_devices(const struct nft_ctx *ctx,
- const struct nlattr *attr,
- struct net_device *dev_array[], int *len)
-{
- const struct nlattr *tmp;
- struct net_device *dev;
- char ifname[IFNAMSIZ];
- int rem, n = 0, err;
-
- nla_for_each_nested(tmp, attr, rem) {
- if (nla_type(tmp) != NFTA_DEVICE_NAME) {
- err = -EINVAL;
- goto err1;
- }
-
- nla_strlcpy(ifname, tmp, IFNAMSIZ);
- dev = __dev_get_by_name(ctx->net, ifname);
- if (!dev) {
- err = -ENOENT;
- goto err1;
- }
-
- dev_array[n++] = dev;
- if (n == NFT_FLOWTABLE_DEVICE_MAX) {
- err = -EFBIG;
- goto err1;
- }
- }
- if (!len)
- return -EINVAL;
-
- err = 0;
-err1:
- *len = n;
- return err;
-}
-
static const struct nla_policy nft_flowtable_hook_policy[NFTA_FLOWTABLE_HOOK_MAX + 1] = {
[NFTA_FLOWTABLE_HOOK_NUM] = { .type = NLA_U32 },
[NFTA_FLOWTABLE_HOOK_PRIORITY] = { .type = NLA_U32 },
@@ -5673,11 +5903,10 @@ static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
const struct nlattr *attr,
struct nft_flowtable *flowtable)
{
- struct net_device *dev_array[NFT_FLOWTABLE_DEVICE_MAX];
struct nlattr *tb[NFTA_FLOWTABLE_HOOK_MAX + 1];
- struct nf_hook_ops *ops;
+ struct nft_hook *hook;
int hooknum, priority;
- int err, n = 0, i;
+ int err;
err = nla_parse_nested_deprecated(tb, NFTA_FLOWTABLE_HOOK_MAX, attr,
nft_flowtable_hook_policy, NULL);
@@ -5695,27 +5924,21 @@ static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
priority = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_PRIORITY]));
- err = nf_tables_parse_devices(ctx, tb[NFTA_FLOWTABLE_HOOK_DEVS],
- dev_array, &n);
+ err = nf_tables_parse_netdev_hooks(ctx->net,
+ tb[NFTA_FLOWTABLE_HOOK_DEVS],
+ &flowtable->hook_list);
if (err < 0)
return err;
- ops = kcalloc(n, sizeof(struct nf_hook_ops), GFP_KERNEL);
- if (!ops)
- return -ENOMEM;
-
- flowtable->hooknum = hooknum;
- flowtable->priority = priority;
- flowtable->ops = ops;
- flowtable->ops_len = n;
+ flowtable->hooknum = hooknum;
+ flowtable->data.priority = priority;
- for (i = 0; i < n; i++) {
- flowtable->ops[i].pf = NFPROTO_NETDEV;
- flowtable->ops[i].hooknum = hooknum;
- flowtable->ops[i].priority = priority;
- flowtable->ops[i].priv = &flowtable->data;
- flowtable->ops[i].hook = flowtable->data.type->hook;
- flowtable->ops[i].dev = dev_array[i];
+ list_for_each_entry(hook, &flowtable->hook_list, list) {
+ hook->ops.pf = NFPROTO_NETDEV;
+ hook->ops.hooknum = hooknum;
+ hook->ops.priority = priority;
+ hook->ops.priv = &flowtable->data;
+ hook->ops.hook = flowtable->data.type->hook;
}
return err;
@@ -5752,17 +5975,73 @@ nft_flowtable_type_get(struct net *net, u8 family)
return ERR_PTR(-ENOENT);
}
+static void nft_unregister_flowtable_hook(struct net *net,
+ struct nft_flowtable *flowtable,
+ struct nft_hook *hook)
+{
+ nf_unregister_net_hook(net, &hook->ops);
+ flowtable->data.type->setup(&flowtable->data, hook->ops.dev,
+ FLOW_BLOCK_UNBIND);
+}
+
static void nft_unregister_flowtable_net_hooks(struct net *net,
struct nft_flowtable *flowtable)
{
- int i;
+ struct nft_hook *hook;
- for (i = 0; i < flowtable->ops_len; i++) {
- if (!flowtable->ops[i].dev)
- continue;
+ list_for_each_entry(hook, &flowtable->hook_list, list)
+ nft_unregister_flowtable_hook(net, flowtable, hook);
+}
+
+static int nft_register_flowtable_net_hooks(struct net *net,
+ struct nft_table *table,
+ struct nft_flowtable *flowtable)
+{
+ struct nft_hook *hook, *hook2, *next;
+ struct nft_flowtable *ft;
+ int err, i = 0;
+
+ list_for_each_entry(hook, &flowtable->hook_list, list) {
+ list_for_each_entry(ft, &table->flowtables, list) {
+ list_for_each_entry(hook2, &ft->hook_list, list) {
+ if (hook->ops.dev == hook2->ops.dev &&
+ hook->ops.pf == hook2->ops.pf) {
+ err = -EBUSY;
+ goto err_unregister_net_hooks;
+ }
+ }
+ }
+
+ err = flowtable->data.type->setup(&flowtable->data,
+ hook->ops.dev,
+ FLOW_BLOCK_BIND);
+ if (err < 0)
+ goto err_unregister_net_hooks;
+
+ err = nf_register_net_hook(net, &hook->ops);
+ if (err < 0) {
+ flowtable->data.type->setup(&flowtable->data,
+ hook->ops.dev,
+ FLOW_BLOCK_UNBIND);
+ goto err_unregister_net_hooks;
+ }
+
+ i++;
+ }
+
+ return 0;
- nf_unregister_net_hook(net, &flowtable->ops[i]);
+err_unregister_net_hooks:
+ list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
+ if (i-- <= 0)
+ break;
+
+ nft_unregister_flowtable_hook(net, flowtable, hook);
+ list_del_rcu(&hook->list);
+ kfree_rcu(hook, rcu);
}
+
+ return err;
}
static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
@@ -5773,12 +6052,13 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
const struct nf_flowtable_type *type;
- struct nft_flowtable *flowtable, *ft;
u8 genmask = nft_genmask_next(net);
int family = nfmsg->nfgen_family;
+ struct nft_flowtable *flowtable;
+ struct nft_hook *hook, *next;
struct nft_table *table;
struct nft_ctx ctx;
- int err, i, k;
+ int err;
if (!nla[NFTA_FLOWTABLE_TABLE] ||
!nla[NFTA_FLOWTABLE_NAME] ||
@@ -5817,6 +6097,7 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
flowtable->table = table;
flowtable->handle = nf_tables_alloc_handle(table);
+ INIT_LIST_HEAD(&flowtable->hook_list);
flowtable->name = nla_strdup(nla[NFTA_FLOWTABLE_NAME], GFP_KERNEL);
if (!flowtable->name) {
@@ -5830,6 +6111,14 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
goto err2;
}
+ if (nla[NFTA_FLOWTABLE_FLAGS]) {
+ flowtable->data.flags =
+ ntohl(nla_get_be32(nla[NFTA_FLOWTABLE_FLAGS]));
+ if (flowtable->data.flags & ~NF_FLOWTABLE_HW_OFFLOAD)
+ goto err3;
+ }
+
+ write_pnet(&flowtable->data.net, net);
flowtable->data.type = type;
err = type->init(&flowtable->data);
if (err < 0)
@@ -5840,43 +6129,24 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
if (err < 0)
goto err4;
- for (i = 0; i < flowtable->ops_len; i++) {
- if (!flowtable->ops[i].dev)
- continue;
-
- list_for_each_entry(ft, &table->flowtables, list) {
- for (k = 0; k < ft->ops_len; k++) {
- if (!ft->ops[k].dev)
- continue;
-
- if (flowtable->ops[i].dev == ft->ops[k].dev &&
- flowtable->ops[i].pf == ft->ops[k].pf) {
- err = -EBUSY;
- goto err5;
- }
- }
- }
-
- err = nf_register_net_hook(net, &flowtable->ops[i]);
- if (err < 0)
- goto err5;
- }
+ err = nft_register_flowtable_net_hooks(ctx.net, table, flowtable);
+ if (err < 0)
+ goto err4;
err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable);
if (err < 0)
- goto err6;
+ goto err5;
list_add_tail_rcu(&flowtable->list, &table->flowtables);
table->use++;
return 0;
-err6:
- i = flowtable->ops_len;
err5:
- for (k = i - 1; k >= 0; k--)
- nf_unregister_net_hook(net, &flowtable->ops[k]);
-
- kfree(flowtable->ops);
+ list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
+ nft_unregister_flowtable_hook(net, flowtable, hook);
+ list_del_rcu(&hook->list);
+ kfree_rcu(hook, rcu);
+ }
err4:
flowtable->data.type->free(&flowtable->data);
err3:
@@ -5943,8 +6213,8 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
{
struct nlattr *nest, *nest_devs;
struct nfgenmsg *nfmsg;
+ struct nft_hook *hook;
struct nlmsghdr *nlh;
- int i;
event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
@@ -5960,25 +6230,23 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) ||
nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)) ||
nla_put_be64(skb, NFTA_FLOWTABLE_HANDLE, cpu_to_be64(flowtable->handle),
- NFTA_FLOWTABLE_PAD))
+ NFTA_FLOWTABLE_PAD) ||
+ nla_put_be32(skb, NFTA_FLOWTABLE_FLAGS, htonl(flowtable->data.flags)))
goto nla_put_failure;
nest = nla_nest_start_noflag(skb, NFTA_FLOWTABLE_HOOK);
if (!nest)
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_NUM, htonl(flowtable->hooknum)) ||
- nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_PRIORITY, htonl(flowtable->priority)))
+ nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_PRIORITY, htonl(flowtable->data.priority)))
goto nla_put_failure;
nest_devs = nla_nest_start_noflag(skb, NFTA_FLOWTABLE_HOOK_DEVS);
if (!nest_devs)
goto nla_put_failure;
- for (i = 0; i < flowtable->ops_len; i++) {
- const struct net_device *dev = READ_ONCE(flowtable->ops[i].dev);
-
- if (dev &&
- nla_put_string(skb, NFTA_DEVICE_NAME, dev->name))
+ list_for_each_entry_rcu(hook, &flowtable->hook_list, list) {
+ if (nla_put_string(skb, NFTA_DEVICE_NAME, hook->ops.dev->name))
goto nla_put_failure;
}
nla_nest_end(skb, nest_devs);
@@ -6169,7 +6437,12 @@ err:
static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
{
- kfree(flowtable->ops);
+ struct nft_hook *hook, *next;
+
+ list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
+ list_del_rcu(&hook->list);
+ kfree(hook);
+ }
kfree(flowtable->name);
flowtable->data.type->free(&flowtable->data);
module_put(flowtable->data.type->owner);
@@ -6209,14 +6482,15 @@ nla_put_failure:
static void nft_flowtable_event(unsigned long event, struct net_device *dev,
struct nft_flowtable *flowtable)
{
- int i;
+ struct nft_hook *hook;
- for (i = 0; i < flowtable->ops_len; i++) {
- if (flowtable->ops[i].dev != dev)
+ list_for_each_entry(hook, &flowtable->hook_list, list) {
+ if (hook->ops.dev != dev)
continue;
- nf_unregister_net_hook(dev_net(dev), &flowtable->ops[i]);
- flowtable->ops[i].dev = NULL;
+ nft_unregister_flowtable_hook(dev_net(dev), flowtable, hook);
+ list_del_rcu(&hook->list);
+ kfree_rcu(hook, rcu);
break;
}
}
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index e25dab8128db..68f17a6921d8 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -132,13 +132,13 @@ static void nft_flow_offload_common_init(struct flow_cls_common_offload *common,
common->extack = extack;
}
-static int nft_setup_cb_call(struct nft_base_chain *basechain,
- enum tc_setup_type type, void *type_data)
+static int nft_setup_cb_call(enum tc_setup_type type, void *type_data,
+ struct list_head *cb_list)
{
struct flow_block_cb *block_cb;
int err;
- list_for_each_entry(block_cb, &basechain->flow_block.cb_list, list) {
+ list_for_each_entry(block_cb, cb_list, list) {
err = block_cb->cb(type, type_data, block_cb->cb_priv);
if (err < 0)
return err;
@@ -155,32 +155,46 @@ int nft_chain_offload_priority(struct nft_base_chain *basechain)
return 0;
}
+static void nft_flow_cls_offload_setup(struct flow_cls_offload *cls_flow,
+ const struct nft_base_chain *basechain,
+ const struct nft_rule *rule,
+ const struct nft_flow_rule *flow,
+ struct netlink_ext_ack *extack,
+ enum flow_cls_command command)
+{
+ __be16 proto = ETH_P_ALL;
+
+ memset(cls_flow, 0, sizeof(*cls_flow));
+
+ if (flow)
+ proto = flow->proto;
+
+ nft_flow_offload_common_init(&cls_flow->common, proto,
+ basechain->ops.priority, extack);
+ cls_flow->command = command;
+ cls_flow->cookie = (unsigned long) rule;
+ if (flow)
+ cls_flow->rule = flow->rule;
+}
+
static int nft_flow_offload_rule(struct nft_chain *chain,
struct nft_rule *rule,
struct nft_flow_rule *flow,
enum flow_cls_command command)
{
- struct flow_cls_offload cls_flow = {};
+ struct netlink_ext_ack extack = {};
+ struct flow_cls_offload cls_flow;
struct nft_base_chain *basechain;
- struct netlink_ext_ack extack;
- __be16 proto = ETH_P_ALL;
if (!nft_is_base_chain(chain))
return -EOPNOTSUPP;
basechain = nft_base_chain(chain);
+ nft_flow_cls_offload_setup(&cls_flow, basechain, rule, flow, &extack,
+ command);
- if (flow)
- proto = flow->proto;
-
- nft_flow_offload_common_init(&cls_flow.common, proto,
- basechain->ops.priority, &extack);
- cls_flow.command = command;
- cls_flow.cookie = (unsigned long) rule;
- if (flow)
- cls_flow.rule = flow->rule;
-
- return nft_setup_cb_call(basechain, TC_SETUP_CLSFLOWER, &cls_flow);
+ return nft_setup_cb_call(TC_SETUP_CLSFLOWER, &cls_flow,
+ &basechain->flow_block.cb_list);
}
static int nft_flow_offload_bind(struct flow_block_offload *bo,
@@ -194,6 +208,18 @@ static int nft_flow_offload_unbind(struct flow_block_offload *bo,
struct nft_base_chain *basechain)
{
struct flow_block_cb *block_cb, *next;
+ struct flow_cls_offload cls_flow;
+ struct netlink_ext_ack extack;
+ struct nft_chain *chain;
+ struct nft_rule *rule;
+
+ chain = &basechain->chain;
+ list_for_each_entry(rule, &chain->rules, list) {
+ memset(&extack, 0, sizeof(extack));
+ nft_flow_cls_offload_setup(&cls_flow, basechain, rule, NULL,
+ &extack, FLOW_CLS_DESTROY);
+ nft_setup_cb_call(TC_SETUP_CLSFLOWER, &cls_flow, &bo->cb_list);
+ }
list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
list_del(&block_cb->list);
@@ -224,20 +250,30 @@ static int nft_block_setup(struct nft_base_chain *basechain,
return err;
}
+static void nft_flow_block_offload_init(struct flow_block_offload *bo,
+ struct net *net,
+ enum flow_block_command cmd,
+ struct nft_base_chain *basechain,
+ struct netlink_ext_ack *extack)
+{
+ memset(bo, 0, sizeof(*bo));
+ bo->net = net;
+ bo->block = &basechain->flow_block;
+ bo->command = cmd;
+ bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+ bo->extack = extack;
+ INIT_LIST_HEAD(&bo->cb_list);
+}
+
static int nft_block_offload_cmd(struct nft_base_chain *chain,
struct net_device *dev,
enum flow_block_command cmd)
{
struct netlink_ext_ack extack = {};
- struct flow_block_offload bo = {};
+ struct flow_block_offload bo;
int err;
- bo.net = dev_net(dev);
- bo.block = &chain->flow_block;
- bo.command = cmd;
- bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
- bo.extack = &extack;
- INIT_LIST_HEAD(&bo.cb_list);
+ nft_flow_block_offload_init(&bo, dev_net(dev), cmd, chain, &extack);
err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
if (err < 0)
@@ -253,17 +289,12 @@ static void nft_indr_block_ing_cmd(struct net_device *dev,
enum flow_block_command cmd)
{
struct netlink_ext_ack extack = {};
- struct flow_block_offload bo = {};
+ struct flow_block_offload bo;
if (!chain)
return;
- bo.net = dev_net(dev);
- bo.block = &chain->flow_block;
- bo.command = cmd;
- bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
- bo.extack = &extack;
- INIT_LIST_HEAD(&bo.cb_list);
+ nft_flow_block_offload_init(&bo, dev_net(dev), cmd, chain, &extack);
cb(dev, cb_priv, TC_SETUP_BLOCK, &bo);
@@ -274,15 +305,10 @@ static int nft_indr_block_offload_cmd(struct nft_base_chain *chain,
struct net_device *dev,
enum flow_block_command cmd)
{
- struct flow_block_offload bo = {};
struct netlink_ext_ack extack = {};
+ struct flow_block_offload bo;
- bo.net = dev_net(dev);
- bo.block = &chain->flow_block;
- bo.command = cmd;
- bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
- bo.extack = &extack;
- INIT_LIST_HEAD(&bo.cb_list);
+ nft_flow_block_offload_init(&bo, dev_net(dev), cmd, chain, &extack);
flow_indr_block_call(dev, &bo, cmd);
@@ -294,32 +320,122 @@ static int nft_indr_block_offload_cmd(struct nft_base_chain *chain,
#define FLOW_SETUP_BLOCK TC_SETUP_BLOCK
-static int nft_flow_offload_chain(struct nft_chain *chain,
- u8 *ppolicy,
+static int nft_chain_offload_cmd(struct nft_base_chain *basechain,
+ struct net_device *dev,
+ enum flow_block_command cmd)
+{
+ int err;
+
+ if (dev->netdev_ops->ndo_setup_tc)
+ err = nft_block_offload_cmd(basechain, dev, cmd);
+ else
+ err = nft_indr_block_offload_cmd(basechain, dev, cmd);
+
+ return err;
+}
+
+static int nft_flow_block_chain(struct nft_base_chain *basechain,
+ const struct net_device *this_dev,
+ enum flow_block_command cmd)
+{
+ struct net_device *dev;
+ struct nft_hook *hook;
+ int err, i = 0;
+
+ list_for_each_entry(hook, &basechain->hook_list, list) {
+ dev = hook->ops.dev;
+ if (this_dev && this_dev != dev)
+ continue;
+
+ err = nft_chain_offload_cmd(basechain, dev, cmd);
+ if (err < 0 && cmd == FLOW_BLOCK_BIND) {
+ if (!this_dev)
+ goto err_flow_block;
+
+ return err;
+ }
+ i++;
+ }
+
+ return 0;
+
+err_flow_block:
+ list_for_each_entry(hook, &basechain->hook_list, list) {
+ if (i-- <= 0)
+ break;
+
+ dev = hook->ops.dev;
+ nft_chain_offload_cmd(basechain, dev, FLOW_BLOCK_UNBIND);
+ }
+ return err;
+}
+
+static int nft_flow_offload_chain(struct nft_chain *chain, u8 *ppolicy,
enum flow_block_command cmd)
{
struct nft_base_chain *basechain;
- struct net_device *dev;
u8 policy;
if (!nft_is_base_chain(chain))
return -EOPNOTSUPP;
basechain = nft_base_chain(chain);
- dev = basechain->ops.dev;
- if (!dev)
- return -EOPNOTSUPP;
-
policy = ppolicy ? *ppolicy : basechain->policy;
/* Only default policy to accept is supported for now. */
if (cmd == FLOW_BLOCK_BIND && policy == NF_DROP)
return -EOPNOTSUPP;
- if (dev->netdev_ops->ndo_setup_tc)
- return nft_block_offload_cmd(basechain, dev, cmd);
- else
- return nft_indr_block_offload_cmd(basechain, dev, cmd);
+ return nft_flow_block_chain(basechain, NULL, cmd);
+}
+
+static void nft_flow_rule_offload_abort(struct net *net,
+ struct nft_trans *trans)
+{
+ int err = 0;
+
+ list_for_each_entry_continue_reverse(trans, &net->nft.commit_list, list) {
+ if (trans->ctx.family != NFPROTO_NETDEV)
+ continue;
+
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWCHAIN:
+ if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) ||
+ nft_trans_chain_update(trans))
+ continue;
+
+ err = nft_flow_offload_chain(trans->ctx.chain, NULL,
+ FLOW_BLOCK_UNBIND);
+ break;
+ case NFT_MSG_DELCHAIN:
+ if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
+ continue;
+
+ err = nft_flow_offload_chain(trans->ctx.chain, NULL,
+ FLOW_BLOCK_BIND);
+ break;
+ case NFT_MSG_NEWRULE:
+ if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
+ continue;
+
+ err = nft_flow_offload_rule(trans->ctx.chain,
+ nft_trans_rule(trans),
+ NULL, FLOW_CLS_DESTROY);
+ break;
+ case NFT_MSG_DELRULE:
+ if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
+ continue;
+
+ err = nft_flow_offload_rule(trans->ctx.chain,
+ nft_trans_rule(trans),
+ nft_trans_flow_rule(trans),
+ FLOW_CLS_REPLACE);
+ break;
+ }
+
+ if (WARN_ON_ONCE(err))
+ break;
+ }
}
int nft_flow_rule_offload_commit(struct net *net)
@@ -355,14 +471,14 @@ int nft_flow_rule_offload_commit(struct net *net)
continue;
if (trans->ctx.flags & NLM_F_REPLACE ||
- !(trans->ctx.flags & NLM_F_APPEND))
- return -EOPNOTSUPP;
-
+ !(trans->ctx.flags & NLM_F_APPEND)) {
+ err = -EOPNOTSUPP;
+ break;
+ }
err = nft_flow_offload_rule(trans->ctx.chain,
nft_trans_rule(trans),
nft_trans_flow_rule(trans),
FLOW_CLS_REPLACE);
- nft_flow_rule_destroy(nft_trans_flow_rule(trans));
break;
case NFT_MSG_DELRULE:
if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
@@ -370,13 +486,31 @@ int nft_flow_rule_offload_commit(struct net *net)
err = nft_flow_offload_rule(trans->ctx.chain,
nft_trans_rule(trans),
- nft_trans_flow_rule(trans),
- FLOW_CLS_DESTROY);
+ NULL, FLOW_CLS_DESTROY);
break;
}
- if (err)
- return err;
+ if (err) {
+ nft_flow_rule_offload_abort(net, trans);
+ break;
+ }
+ }
+
+ list_for_each_entry(trans, &net->nft.commit_list, list) {
+ if (trans->ctx.family != NFPROTO_NETDEV)
+ continue;
+
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWRULE:
+ case NFT_MSG_DELRULE:
+ if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
+ continue;
+
+ nft_flow_rule_destroy(nft_trans_flow_rule(trans));
+ break;
+ default:
+ break;
+ }
}
return err;
@@ -386,6 +520,7 @@ static struct nft_chain *__nft_offload_get_chain(struct net_device *dev)
{
struct nft_base_chain *basechain;
struct net *net = dev_net(dev);
+ struct nft_hook *hook, *found;
const struct nft_table *table;
struct nft_chain *chain;
@@ -398,8 +533,16 @@ static struct nft_chain *__nft_offload_get_chain(struct net_device *dev)
!(chain->flags & NFT_CHAIN_HW_OFFLOAD))
continue;
+ found = NULL;
basechain = nft_base_chain(chain);
- if (strncmp(basechain->dev_name, dev->name, IFNAMSIZ))
+ list_for_each_entry(hook, &basechain->hook_list, list) {
+ if (hook->ops.dev != dev)
+ continue;
+
+ found = hook;
+ break;
+ }
+ if (!found)
continue;
return chain;
@@ -427,18 +570,6 @@ static void nft_indr_block_cb(struct net_device *dev,
mutex_unlock(&net->nft.commit_mutex);
}
-static void nft_offload_chain_clean(struct nft_chain *chain)
-{
- struct nft_rule *rule;
-
- list_for_each_entry(rule, &chain->rules, list) {
- nft_flow_offload_rule(chain, rule,
- NULL, FLOW_CLS_DESTROY);
- }
-
- nft_flow_offload_chain(chain, NULL, FLOW_BLOCK_UNBIND);
-}
-
static int nft_offload_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
@@ -449,7 +580,9 @@ static int nft_offload_netdev_event(struct notifier_block *this,
mutex_lock(&net->nft.commit_mutex);
chain = __nft_offload_get_chain(dev);
if (chain)
- nft_offload_chain_clean(chain);
+ nft_flow_block_chain(nft_base_chain(chain), dev,
+ FLOW_BLOCK_UNBIND);
+
mutex_unlock(&net->nft.commit_mutex);
return NOTIFY_DONE;
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index b5d5d071d765..c78d01bc02e9 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -287,28 +287,35 @@ static void nft_netdev_event(unsigned long event, struct net_device *dev,
struct nft_ctx *ctx)
{
struct nft_base_chain *basechain = nft_base_chain(ctx->chain);
+ struct nft_hook *hook, *found = NULL;
+ int n = 0;
- switch (event) {
- case NETDEV_UNREGISTER:
- if (strcmp(basechain->dev_name, dev->name) != 0)
- return;
-
- /* UNREGISTER events are also happpening on netns exit.
- *
- * Altough nf_tables core releases all tables/chains, only
- * this event handler provides guarantee that
- * basechain.ops->dev is still accessible, so we cannot
- * skip exiting net namespaces.
- */
- __nft_release_basechain(ctx);
- break;
- case NETDEV_CHANGENAME:
- if (dev->ifindex != basechain->ops.dev->ifindex)
- return;
+ if (event != NETDEV_UNREGISTER)
+ return;
- strncpy(basechain->dev_name, dev->name, IFNAMSIZ);
- break;
+ list_for_each_entry(hook, &basechain->hook_list, list) {
+ if (hook->ops.dev == dev)
+ found = hook;
+
+ n++;
}
+ if (!found)
+ return;
+
+ if (n > 1) {
+ nf_unregister_net_hook(ctx->net, &found->ops);
+ list_del_rcu(&found->list);
+ kfree_rcu(found, rcu);
+ return;
+ }
+
+ /* UNREGISTER events are also happening on netns exit.
+ *
+ * Although nf_tables core releases all tables/chains, only this event
+ * handler provides guarantee that hook->ops.dev is still accessible,
+ * so we cannot skip exiting net namespaces.
+ */
+ __nft_release_basechain(ctx);
}
static int nf_tables_netdev_event(struct notifier_block *this,
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index 0744b2bb46da..b8092069f868 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -10,6 +10,7 @@
#include <linux/module.h>
#include <linux/netlink.h>
#include <linux/netfilter.h>
+#include <linux/if_arp.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_tables_offload.h>
@@ -125,6 +126,11 @@ static int __nft_cmp_offload(struct nft_offload_ctx *ctx,
flow->match.dissector.used_keys |= BIT(reg->key);
flow->match.dissector.offset[reg->key] = reg->base_offset;
+ if (reg->key == FLOW_DISSECTOR_KEY_META &&
+ reg->offset == offsetof(struct nft_flow_key, meta.ingress_iftype) &&
+ nft_reg_load16(priv->data.data) != ARPHRD_ETHER)
+ return -EOPNOTSUPP;
+
nft_offload_update_dependency(ctx, &priv->data, priv->len);
return 0;
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index f29bbc74c4bf..dd82ff2ee19f 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -115,10 +115,13 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
if (nft_flow_route(pkt, ct, &route, dir) < 0)
goto err_flow_route;
- flow = flow_offload_alloc(ct, &route);
+ flow = flow_offload_alloc(ct);
if (!flow)
goto err_flow_alloc;
+ if (flow_offload_route_init(flow, &route) < 0)
+ goto err_flow_add;
+
if (tcph) {
ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 317e3a9e8c5b..9740b554fdb3 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -33,19 +33,19 @@
static DEFINE_PER_CPU(struct rnd_state, nft_prandom_state);
-static u8 nft_meta_weekday(unsigned long secs)
+static u8 nft_meta_weekday(time64_t secs)
{
unsigned int dse;
u8 wday;
secs -= NFT_META_SECS_PER_MINUTE * sys_tz.tz_minuteswest;
- dse = secs / NFT_META_SECS_PER_DAY;
+ dse = div_u64(secs, NFT_META_SECS_PER_DAY);
wday = (4 + dse) % NFT_META_DAYS_PER_WEEK;
return wday;
}
-static u32 nft_meta_hour(unsigned long secs)
+static u32 nft_meta_hour(time64_t secs)
{
struct tm tm;
@@ -250,10 +250,10 @@ void nft_meta_get_eval(const struct nft_expr *expr,
nft_reg_store64(dest, ktime_get_real_ns());
break;
case NFT_META_TIME_DAY:
- nft_reg_store8(dest, nft_meta_weekday(get_seconds()));
+ nft_reg_store8(dest, nft_meta_weekday(ktime_get_real_seconds()));
break;
case NFT_META_TIME_HOUR:
- *dest = nft_meta_hour(get_seconds());
+ *dest = nft_meta_hour(ktime_get_real_seconds());
break;
default:
WARN_ON(1);
@@ -547,6 +547,14 @@ static int nft_meta_get_offload(struct nft_offload_ctx *ctx,
sizeof(__u8), reg);
nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_TRANSPORT);
break;
+ case NFT_META_IIF:
+ NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_META, meta,
+ ingress_ifindex, sizeof(__u32), reg);
+ break;
+ case NFT_META_IIFTYPE:
+ NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_META, meta,
+ ingress_iftype, sizeof(__u16), reg);
+ break;
default:
return -EOPNOTSUPP;
}
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 5cb2d8908d2a..1993af3a2979 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -23,50 +23,58 @@
#include <linux/ip.h>
#include <linux/ipv6.h>
+static bool nft_payload_rebuild_vlan_hdr(const struct sk_buff *skb, int mac_off,
+ struct vlan_ethhdr *veth)
+{
+ if (skb_copy_bits(skb, mac_off, veth, ETH_HLEN))
+ return false;
+
+ veth->h_vlan_proto = skb->vlan_proto;
+ veth->h_vlan_TCI = htons(skb_vlan_tag_get(skb));
+ veth->h_vlan_encapsulated_proto = skb->protocol;
+
+ return true;
+}
+
/* add vlan header into the user buffer for if tag was removed by offloads */
static bool
nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len)
{
int mac_off = skb_mac_header(skb) - skb->data;
- u8 vlan_len, *vlanh, *dst_u8 = (u8 *) d;
+ u8 *vlanh, *dst_u8 = (u8 *) d;
struct vlan_ethhdr veth;
+ u8 vlan_hlen = 0;
+
+ if ((skb->protocol == htons(ETH_P_8021AD) ||
+ skb->protocol == htons(ETH_P_8021Q)) &&
+ offset >= VLAN_ETH_HLEN && offset < VLAN_ETH_HLEN + VLAN_HLEN)
+ vlan_hlen += VLAN_HLEN;
vlanh = (u8 *) &veth;
- if (offset < ETH_HLEN) {
- u8 ethlen = min_t(u8, len, ETH_HLEN - offset);
+ if (offset < VLAN_ETH_HLEN + vlan_hlen) {
+ u8 ethlen = len;
- if (skb_copy_bits(skb, mac_off, &veth, ETH_HLEN))
+ if (vlan_hlen &&
+ skb_copy_bits(skb, mac_off, &veth, VLAN_ETH_HLEN) < 0)
+ return false;
+ else if (!nft_payload_rebuild_vlan_hdr(skb, mac_off, &veth))
return false;
- veth.h_vlan_proto = skb->vlan_proto;
+ if (offset + len > VLAN_ETH_HLEN + vlan_hlen)
+ ethlen -= offset + len - VLAN_ETH_HLEN + vlan_hlen;
- memcpy(dst_u8, vlanh + offset, ethlen);
+ memcpy(dst_u8, vlanh + offset - vlan_hlen, ethlen);
len -= ethlen;
if (len == 0)
return true;
dst_u8 += ethlen;
- offset = ETH_HLEN;
- } else if (offset >= VLAN_ETH_HLEN) {
- offset -= VLAN_HLEN;
- goto skip;
+ offset = ETH_HLEN + vlan_hlen;
+ } else {
+ offset -= VLAN_HLEN + vlan_hlen;
}
- veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb));
- veth.h_vlan_encapsulated_proto = skb->protocol;
-
- vlanh += offset;
-
- vlan_len = min_t(u8, len, VLAN_ETH_HLEN - offset);
- memcpy(dst_u8, vlanh, vlan_len);
-
- len -= vlan_len;
- if (!len)
- return true;
-
- dst_u8 += vlan_len;
- skip:
return skb_copy_bits(skb, offset + mac_off, dst_u8, len) == 0;
}
@@ -174,6 +182,44 @@ static int nft_payload_offload_ll(struct nft_offload_ctx *ctx,
NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_ETH_ADDRS, eth_addrs,
dst, ETH_ALEN, reg);
break;
+ case offsetof(struct ethhdr, h_proto):
+ if (priv->len != sizeof(__be16))
+ return -EOPNOTSUPP;
+
+ NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic,
+ n_proto, sizeof(__be16), reg);
+ nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_NETWORK);
+ break;
+ case offsetof(struct vlan_ethhdr, h_vlan_TCI):
+ if (priv->len != sizeof(__be16))
+ return -EOPNOTSUPP;
+
+ NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_VLAN, vlan,
+ vlan_tci, sizeof(__be16), reg);
+ break;
+ case offsetof(struct vlan_ethhdr, h_vlan_encapsulated_proto):
+ if (priv->len != sizeof(__be16))
+ return -EOPNOTSUPP;
+
+ NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_VLAN, vlan,
+ vlan_tpid, sizeof(__be16), reg);
+ nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_NETWORK);
+ break;
+ case offsetof(struct vlan_ethhdr, h_vlan_TCI) + sizeof(struct vlan_hdr):
+ if (priv->len != sizeof(__be16))
+ return -EOPNOTSUPP;
+
+ NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_CVLAN, vlan,
+ vlan_tci, sizeof(__be16), reg);
+ break;
+ case offsetof(struct vlan_ethhdr, h_vlan_encapsulated_proto) +
+ sizeof(struct vlan_hdr):
+ if (priv->len != sizeof(__be16))
+ return -EOPNOTSUPP;
+
+ NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_CVLAN, vlan,
+ vlan_tpid, sizeof(__be16), reg);
+ break;
default:
return -EOPNOTSUPP;
}
diff --git a/net/netfilter/xt_HMARK.c b/net/netfilter/xt_HMARK.c
index be7798a50546..713fb38541df 100644
--- a/net/netfilter/xt_HMARK.c
+++ b/net/netfilter/xt_HMARK.c
@@ -239,11 +239,7 @@ static int get_inner_hdr(const struct sk_buff *skb, int iphsz, int *nhoff)
return 0;
/* Error message? */
- if (icmph->type != ICMP_DEST_UNREACH &&
- icmph->type != ICMP_SOURCE_QUENCH &&
- icmph->type != ICMP_TIME_EXCEEDED &&
- icmph->type != ICMP_PARAMETERPROB &&
- icmph->type != ICMP_REDIRECT)
+ if (!icmp_is_err(icmph->type))
return 0;
*nhoff += iphsz + sizeof(_ih);
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index 8dbb4d48f2ed..67cb98489415 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -77,12 +77,12 @@ static inline bool is_leap(unsigned int y)
* This is done in three separate functions so that the most expensive
* calculations are done last, in case a "simple match" can be found earlier.
*/
-static inline unsigned int localtime_1(struct xtm *r, time_t time)
+static inline unsigned int localtime_1(struct xtm *r, time64_t time)
{
unsigned int v, w;
/* Each day has 86400s, so finding the hour/minute is actually easy. */
- v = time % SECONDS_PER_DAY;
+ div_u64_rem(time, SECONDS_PER_DAY, &v);
r->second = v % 60;
w = v / 60;
r->minute = w % 60;
@@ -90,13 +90,13 @@ static inline unsigned int localtime_1(struct xtm *r, time_t time)
return v;
}
-static inline void localtime_2(struct xtm *r, time_t time)
+static inline void localtime_2(struct xtm *r, time64_t time)
{
/*
* Here comes the rest (weekday, monthday). First, divide the SSTE
* by seconds-per-day to get the number of _days_ since the epoch.
*/
- r->dse = time / 86400;
+ r->dse = div_u64(time, SECONDS_PER_DAY);
/*
* 1970-01-01 (w=0) was a Thursday (4).
@@ -105,7 +105,7 @@ static inline void localtime_2(struct xtm *r, time_t time)
r->weekday = (4 + r->dse - 1) % 7 + 1;
}
-static void localtime_3(struct xtm *r, time_t time)
+static void localtime_3(struct xtm *r, time64_t time)
{
unsigned int year, i, w = r->dse;
@@ -160,7 +160,7 @@ time_mt(const struct sk_buff *skb, struct xt_action_param *par)
const struct xt_time_info *info = par->matchinfo;
unsigned int packet_time;
struct xtm current_time;
- s64 stamp;
+ time64_t stamp;
/*
* We need real time here, but we can neither use skb->tstamp
@@ -173,14 +173,14 @@ time_mt(const struct sk_buff *skb, struct xt_action_param *par)
* 1. match before 13:00
* 2. match after 13:00
*
- * If you match against processing time (get_seconds) it
+ * If you match against processing time (ktime_get_real_seconds) it
* may happen that the same packet matches both rules if
* it arrived at the right moment before 13:00, so it would be
* better to check skb->tstamp and set it via __net_timestamp()
* if needed. This however breaks outgoing packets tx timestamp,
* and causes them to get delayed forever by fq packet scheduler.
*/
- stamp = get_seconds();
+ stamp = ktime_get_real_seconds();
if (info->flags & XT_TIME_LOCAL_TZ)
/* Adjust for local timezone */
@@ -193,6 +193,9 @@ time_mt(const struct sk_buff *skb, struct xt_action_param *par)
* - 'now' is in the weekday mask
* - 'now' is in the daytime range time_start..time_end
* (and by default, libxt_time will set these so as to match)
+ *
+ * note: info->date_start/stop are unsigned 32-bit values that
+ * can hold values beyond y2038, but not after y2106.
*/
if (stamp < info->date_start || stamp > info->date_stop)