diff options
Diffstat (limited to 'net/netfilter/nf_conntrack_netlink.c')
-rw-r--r-- | net/netfilter/nf_conntrack_netlink.c | 1189 |
1 files changed, 706 insertions, 483 deletions
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 6a1c8f1f6171..7562b215b932 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -54,8 +54,16 @@ #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> +#include "nf_internals.h" + MODULE_LICENSE("GPL"); +struct ctnetlink_list_dump_ctx { + struct nf_conn *last; + unsigned int cpu; + bool done; +}; + static int ctnetlink_dump_tuples_proto(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_l4proto *l4proto) @@ -165,10 +173,14 @@ nla_put_failure: return -1; } -static int ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct) +static int ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct, + bool skip_zero) { long timeout = nf_ct_expires(ct) / HZ; + if (skip_zero && timeout == 0) + return 0; + if (nla_put_be32(skb, CTA_TIMEOUT, htonl(timeout))) goto nla_put_failure; return 0; @@ -177,7 +189,8 @@ nla_put_failure: return -1; } -static int ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct) +static int ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct, + bool destroy) { const struct nf_conntrack_l4proto *l4proto; struct nlattr *nest_proto; @@ -191,7 +204,7 @@ static int ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct) if (!nest_proto) goto nla_put_failure; - ret = l4proto->to_nlattr(skb, nest_proto, ct); + ret = l4proto->to_nlattr(skb, nest_proto, ct, destroy); nla_nest_end(skb, nest_proto); @@ -211,6 +224,7 @@ static int ctnetlink_dump_helpinfo(struct sk_buff *skb, if (!help) return 0; + rcu_read_lock(); helper = rcu_dereference(help->helper); if (!helper) goto out; @@ -226,9 +240,11 @@ static int ctnetlink_dump_helpinfo(struct sk_buff *skb, nla_nest_end(skb, nest_helper); out: + rcu_read_unlock(); return 0; nla_put_failure: + rcu_read_unlock(); return -1; } @@ -498,7 +514,7 @@ nla_put_failure: static int ctnetlink_dump_use(struct sk_buff *skb, const struct nf_conn *ct) { - if (nla_put_be32(skb, CTA_USE, htonl(atomic_read(&ct->ct_general.use)))) + if (nla_put_be32(skb, CTA_USE, htonl(refcount_read(&ct->ct_general.use)))) goto nla_put_failure; return 0; @@ -535,8 +551,8 @@ static int ctnetlink_dump_info(struct sk_buff *skb, struct nf_conn *ct) return -1; if (!test_bit(IPS_OFFLOAD_BIT, &ct->status) && - (ctnetlink_dump_timeout(skb, ct) < 0 || - ctnetlink_dump_protoinfo(skb, ct) < 0)) + (ctnetlink_dump_timeout(skb, ct, false) < 0 || + ctnetlink_dump_protoinfo(skb, ct, false) < 0)) return -1; return 0; @@ -544,24 +560,21 @@ static int ctnetlink_dump_info(struct sk_buff *skb, struct nf_conn *ct) static int ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, - struct nf_conn *ct, bool extinfo) + struct nf_conn *ct, bool extinfo, unsigned int flags) { const struct nf_conntrack_zone *zone; struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; struct nlattr *nest_parms; - unsigned int flags = portid ? NLM_F_MULTI : 0, event; + unsigned int event; + if (portid) + flags |= NLM_F_MULTI; event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_NEW); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, nf_ct_l3num(ct), + NFNETLINK_V0, 0); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = nf_ct_l3num(ct); - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - zone = nf_ct_zone(ct); nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG); @@ -699,12 +712,11 @@ static size_t ctnetlink_nlmsg_size(const struct nf_conn *ct) } static int -ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) +ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item) { const struct nf_conntrack_zone *zone; struct net *net; struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; struct nlattr *nest_parms; struct nf_conn *ct = item->ct; struct sk_buff *skb; @@ -734,15 +746,11 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) goto errout; type = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, type); - nlh = nlmsg_put(skb, item->portid, 0, type, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, item->portid, 0, type, flags, nf_ct_l3num(ct), + NFNETLINK_V0, 0); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = nf_ct_l3num(ct); - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - zone = nf_ct_zone(ct); nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG); @@ -776,15 +784,19 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) goto nla_put_failure; if (events & (1 << IPCT_DESTROY)) { + if (ctnetlink_dump_timeout(skb, ct, true) < 0) + goto nla_put_failure; + if (ctnetlink_dump_acct(skb, ct, type) < 0 || - ctnetlink_dump_timestamp(skb, ct) < 0) + ctnetlink_dump_timestamp(skb, ct) < 0 || + ctnetlink_dump_protoinfo(skb, ct, true) < 0) goto nla_put_failure; } else { - if (ctnetlink_dump_timeout(skb, ct) < 0) + if (ctnetlink_dump_timeout(skb, ct, false) < 0) goto nla_put_failure; - if (events & (1 << IPCT_PROTOINFO) - && ctnetlink_dump_protoinfo(skb, ct) < 0) + if (events & (1 << IPCT_PROTOINFO) && + ctnetlink_dump_protoinfo(skb, ct, false) < 0) goto nla_put_failure; if ((events & (1 << IPCT_HELPER) || nfct_help(ct)) @@ -846,21 +858,112 @@ static int ctnetlink_done(struct netlink_callback *cb) return 0; } +struct ctnetlink_filter_u32 { + u32 val; + u32 mask; +}; + struct ctnetlink_filter { u8 family; - struct { - u_int32_t val; - u_int32_t mask; - } mark; + + u_int32_t orig_flags; + u_int32_t reply_flags; + + struct nf_conntrack_tuple orig; + struct nf_conntrack_tuple reply; + struct nf_conntrack_zone zone; + + struct ctnetlink_filter_u32 mark; + struct ctnetlink_filter_u32 status; +}; + +static const struct nla_policy cta_filter_nla_policy[CTA_FILTER_MAX + 1] = { + [CTA_FILTER_ORIG_FLAGS] = { .type = NLA_U32 }, + [CTA_FILTER_REPLY_FLAGS] = { .type = NLA_U32 }, }; +static int ctnetlink_parse_filter(const struct nlattr *attr, + struct ctnetlink_filter *filter) +{ + struct nlattr *tb[CTA_FILTER_MAX + 1]; + int ret = 0; + + ret = nla_parse_nested(tb, CTA_FILTER_MAX, attr, cta_filter_nla_policy, + NULL); + if (ret) + return ret; + + if (tb[CTA_FILTER_ORIG_FLAGS]) { + filter->orig_flags = nla_get_u32(tb[CTA_FILTER_ORIG_FLAGS]); + if (filter->orig_flags & ~CTA_FILTER_F_ALL) + return -EOPNOTSUPP; + } + + if (tb[CTA_FILTER_REPLY_FLAGS]) { + filter->reply_flags = nla_get_u32(tb[CTA_FILTER_REPLY_FLAGS]); + if (filter->reply_flags & ~CTA_FILTER_F_ALL) + return -EOPNOTSUPP; + } + + return 0; +} + +static int ctnetlink_parse_zone(const struct nlattr *attr, + struct nf_conntrack_zone *zone); +static int ctnetlink_parse_tuple_filter(const struct nlattr * const cda[], + struct nf_conntrack_tuple *tuple, + u32 type, u_int8_t l3num, + struct nf_conntrack_zone *zone, + u_int32_t flags); + +static int ctnetlink_filter_parse_mark(struct ctnetlink_filter_u32 *mark, + const struct nlattr * const cda[]) +{ +#ifdef CONFIG_NF_CONNTRACK_MARK + if (cda[CTA_MARK]) { + mark->val = ntohl(nla_get_be32(cda[CTA_MARK])); + + if (cda[CTA_MARK_MASK]) + mark->mask = ntohl(nla_get_be32(cda[CTA_MARK_MASK])); + else + mark->mask = 0xffffffff; + } else if (cda[CTA_MARK_MASK]) { + return -EINVAL; + } +#endif + return 0; +} + +static int ctnetlink_filter_parse_status(struct ctnetlink_filter_u32 *status, + const struct nlattr * const cda[]) +{ + if (cda[CTA_STATUS]) { + status->val = ntohl(nla_get_be32(cda[CTA_STATUS])); + if (cda[CTA_STATUS_MASK]) + status->mask = ntohl(nla_get_be32(cda[CTA_STATUS_MASK])); + else + status->mask = status->val; + + /* status->val == 0? always true, else always false. */ + if (status->mask == 0) + return -EINVAL; + } else if (cda[CTA_STATUS_MASK]) { + return -EINVAL; + } + + /* CTA_STATUS is NLA_U32, if this fires UAPI needs to be extended */ + BUILD_BUG_ON(__IPS_MAX_BIT >= 32); + return 0; +} + static struct ctnetlink_filter * ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family) { struct ctnetlink_filter *filter; + int err; #ifndef CONFIG_NF_CONNTRACK_MARK - if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) + if (cda[CTA_MARK] || cda[CTA_MARK_MASK]) return ERR_PTR(-EOPNOTSUPP); #endif @@ -870,13 +973,66 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family) filter->family = family; -#ifdef CONFIG_NF_CONNTRACK_MARK - if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) { - filter->mark.val = ntohl(nla_get_be32(cda[CTA_MARK])); - filter->mark.mask = ntohl(nla_get_be32(cda[CTA_MARK_MASK])); + err = ctnetlink_filter_parse_mark(&filter->mark, cda); + if (err) + goto err_filter; + + err = ctnetlink_filter_parse_status(&filter->status, cda); + if (err) + goto err_filter; + + if (!cda[CTA_FILTER]) + return filter; + + err = ctnetlink_parse_zone(cda[CTA_ZONE], &filter->zone); + if (err < 0) + goto err_filter; + + err = ctnetlink_parse_filter(cda[CTA_FILTER], filter); + if (err < 0) + goto err_filter; + + if (filter->orig_flags) { + if (!cda[CTA_TUPLE_ORIG]) { + err = -EINVAL; + goto err_filter; + } + + err = ctnetlink_parse_tuple_filter(cda, &filter->orig, + CTA_TUPLE_ORIG, + filter->family, + &filter->zone, + filter->orig_flags); + if (err < 0) + goto err_filter; } -#endif + + if (filter->reply_flags) { + if (!cda[CTA_TUPLE_REPLY]) { + err = -EINVAL; + goto err_filter; + } + + err = ctnetlink_parse_tuple_filter(cda, &filter->reply, + CTA_TUPLE_REPLY, + filter->family, + &filter->zone, + filter->reply_flags); + if (err < 0) + goto err_filter; + } + return filter; + +err_filter: + kfree(filter); + + return ERR_PTR(err); +} + +static bool ctnetlink_needs_filter(u8 family, const struct nlattr * const *cda) +{ + return family || cda[CTA_MARK] || cda[CTA_FILTER] || cda[CTA_STATUS]; } static int ctnetlink_start(struct netlink_callback *cb) @@ -886,7 +1042,7 @@ static int ctnetlink_start(struct netlink_callback *cb) struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); u8 family = nfmsg->nfgen_family; - if (family || (cda[CTA_MARK] && cda[CTA_MARK_MASK])) { + if (ctnetlink_needs_filter(family, cda)) { filter = ctnetlink_alloc_filter(cda, family); if (IS_ERR(filter)) return PTR_ERR(filter); @@ -896,9 +1052,80 @@ static int ctnetlink_start(struct netlink_callback *cb) return 0; } +static int ctnetlink_filter_match_tuple(struct nf_conntrack_tuple *filter_tuple, + struct nf_conntrack_tuple *ct_tuple, + u_int32_t flags, int family) +{ + switch (family) { + case NFPROTO_IPV4: + if ((flags & CTA_FILTER_FLAG(CTA_IP_SRC)) && + filter_tuple->src.u3.ip != ct_tuple->src.u3.ip) + return 0; + + if ((flags & CTA_FILTER_FLAG(CTA_IP_DST)) && + filter_tuple->dst.u3.ip != ct_tuple->dst.u3.ip) + return 0; + break; + case NFPROTO_IPV6: + if ((flags & CTA_FILTER_FLAG(CTA_IP_SRC)) && + !ipv6_addr_cmp(&filter_tuple->src.u3.in6, + &ct_tuple->src.u3.in6)) + return 0; + + if ((flags & CTA_FILTER_FLAG(CTA_IP_DST)) && + !ipv6_addr_cmp(&filter_tuple->dst.u3.in6, + &ct_tuple->dst.u3.in6)) + return 0; + break; + } + + if ((flags & CTA_FILTER_FLAG(CTA_PROTO_NUM)) && + filter_tuple->dst.protonum != ct_tuple->dst.protonum) + return 0; + + switch (ct_tuple->dst.protonum) { + case IPPROTO_TCP: + case IPPROTO_UDP: + if ((flags & CTA_FILTER_FLAG(CTA_PROTO_SRC_PORT)) && + filter_tuple->src.u.tcp.port != ct_tuple->src.u.tcp.port) + return 0; + + if ((flags & CTA_FILTER_FLAG(CTA_PROTO_DST_PORT)) && + filter_tuple->dst.u.tcp.port != ct_tuple->dst.u.tcp.port) + return 0; + break; + case IPPROTO_ICMP: + if ((flags & CTA_FILTER_FLAG(CTA_PROTO_ICMP_TYPE)) && + filter_tuple->dst.u.icmp.type != ct_tuple->dst.u.icmp.type) + return 0; + if ((flags & CTA_FILTER_FLAG(CTA_PROTO_ICMP_CODE)) && + filter_tuple->dst.u.icmp.code != ct_tuple->dst.u.icmp.code) + return 0; + if ((flags & CTA_FILTER_FLAG(CTA_PROTO_ICMP_ID)) && + filter_tuple->src.u.icmp.id != ct_tuple->src.u.icmp.id) + return 0; + break; + case IPPROTO_ICMPV6: + if ((flags & CTA_FILTER_FLAG(CTA_PROTO_ICMPV6_TYPE)) && + filter_tuple->dst.u.icmp.type != ct_tuple->dst.u.icmp.type) + return 0; + if ((flags & CTA_FILTER_FLAG(CTA_PROTO_ICMPV6_CODE)) && + filter_tuple->dst.u.icmp.code != ct_tuple->dst.u.icmp.code) + return 0; + if ((flags & CTA_FILTER_FLAG(CTA_PROTO_ICMPV6_ID)) && + filter_tuple->src.u.icmp.id != ct_tuple->src.u.icmp.id) + return 0; + break; + } + + return 1; +} + static int ctnetlink_filter_match(struct nf_conn *ct, void *data) { struct ctnetlink_filter *filter = data; + struct nf_conntrack_tuple *tuple; + u32 status; if (filter == NULL) goto out; @@ -910,10 +1137,29 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data) if (filter->family && nf_ct_l3num(ct) != filter->family) goto ignore_entry; + if (filter->orig_flags) { + tuple = nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL); + if (!ctnetlink_filter_match_tuple(&filter->orig, tuple, + filter->orig_flags, + filter->family)) + goto ignore_entry; + } + + if (filter->reply_flags) { + tuple = nf_ct_tuple(ct, IP_CT_DIR_REPLY); + if (!ctnetlink_filter_match_tuple(&filter->reply, tuple, + filter->reply_flags, + filter->family)) + goto ignore_entry; + } + #ifdef CONFIG_NF_CONNTRACK_MARK if ((ct->mark & filter->mark.mask) != filter->mark.val) goto ignore_entry; #endif + status = (u32)READ_ONCE(ct->status); + if ((status & filter->status.mask) != filter->status.val) + goto ignore_entry; out: return 1; @@ -925,6 +1171,7 @@ ignore_entry: static int ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { + unsigned int flags = cb->data ? NLM_F_DUMP_FILTERED : 0; struct net *net = sock_net(skb->sk); struct nf_conn *ct, *last; struct nf_conntrack_tuple_hash *h; @@ -954,12 +1201,11 @@ restart: } hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[cb->args[0]], hnnode) { - if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) - continue; ct = nf_ct_tuplehash_to_ctrack(h); if (nf_ct_is_expired(ct)) { + /* need to defer nf_ct_kill() until lock is released */ if (i < ARRAY_SIZE(nf_ct_evict) && - atomic_inc_not_zero(&ct->ct_general.use)) + refcount_inc_not_zero(&ct->ct_general.use)) nf_ct_evict[i++] = ct; continue; } @@ -967,6 +1213,9 @@ restart: if (!net_eq(net, nf_ct_net(ct))) continue; + if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) + continue; + if (cb->args[1]) { if (ct != last) continue; @@ -979,7 +1228,7 @@ restart: ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFNL_MSG_TYPE(cb->nlh->nlmsg_type), - ct, true); + ct, true, flags); if (res < 0) { nf_conntrack_get(&ct->ct_general); cb->args[1] = (unsigned long)ct; @@ -1014,31 +1263,50 @@ out: } static int ipv4_nlattr_to_tuple(struct nlattr *tb[], - struct nf_conntrack_tuple *t) + struct nf_conntrack_tuple *t, + u_int32_t flags) { - if (!tb[CTA_IP_V4_SRC] || !tb[CTA_IP_V4_DST]) - return -EINVAL; + if (flags & CTA_FILTER_FLAG(CTA_IP_SRC)) { + if (!tb[CTA_IP_V4_SRC]) + return -EINVAL; + + t->src.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_SRC]); + } - t->src.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_SRC]); - t->dst.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_DST]); + if (flags & CTA_FILTER_FLAG(CTA_IP_DST)) { + if (!tb[CTA_IP_V4_DST]) + return -EINVAL; + + t->dst.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_DST]); + } return 0; } static int ipv6_nlattr_to_tuple(struct nlattr *tb[], - struct nf_conntrack_tuple *t) + struct nf_conntrack_tuple *t, + u_int32_t flags) { - if (!tb[CTA_IP_V6_SRC] || !tb[CTA_IP_V6_DST]) - return -EINVAL; + if (flags & CTA_FILTER_FLAG(CTA_IP_SRC)) { + if (!tb[CTA_IP_V6_SRC]) + return -EINVAL; + + t->src.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_SRC]); + } - t->src.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_SRC]); - t->dst.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_DST]); + if (flags & CTA_FILTER_FLAG(CTA_IP_DST)) { + if (!tb[CTA_IP_V6_DST]) + return -EINVAL; + + t->dst.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_DST]); + } return 0; } static int ctnetlink_parse_tuple_ip(struct nlattr *attr, - struct nf_conntrack_tuple *tuple) + struct nf_conntrack_tuple *tuple, + u_int32_t flags) { struct nlattr *tb[CTA_IP_MAX+1]; int ret = 0; @@ -1054,10 +1322,10 @@ static int ctnetlink_parse_tuple_ip(struct nlattr *attr, switch (tuple->src.l3num) { case NFPROTO_IPV4: - ret = ipv4_nlattr_to_tuple(tb, tuple); + ret = ipv4_nlattr_to_tuple(tb, tuple, flags); break; case NFPROTO_IPV6: - ret = ipv6_nlattr_to_tuple(tb, tuple); + ret = ipv6_nlattr_to_tuple(tb, tuple, flags); break; } @@ -1069,7 +1337,8 @@ static const struct nla_policy proto_nla_policy[CTA_PROTO_MAX+1] = { }; static int ctnetlink_parse_tuple_proto(struct nlattr *attr, - struct nf_conntrack_tuple *tuple) + struct nf_conntrack_tuple *tuple, + u_int32_t flags) { const struct nf_conntrack_l4proto *l4proto; struct nlattr *tb[CTA_PROTO_MAX+1]; @@ -1080,8 +1349,12 @@ static int ctnetlink_parse_tuple_proto(struct nlattr *attr, if (ret < 0) return ret; + if (!(flags & CTA_FILTER_FLAG(CTA_PROTO_NUM))) + return 0; + if (!tb[CTA_PROTO_NUM]) return -EINVAL; + tuple->dst.protonum = nla_get_u8(tb[CTA_PROTO_NUM]); rcu_read_lock(); @@ -1092,7 +1365,7 @@ static int ctnetlink_parse_tuple_proto(struct nlattr *attr, l4proto->nla_policy, NULL); if (ret == 0) - ret = l4proto->nlattr_to_tuple(tb, tuple); + ret = l4proto->nlattr_to_tuple(tb, tuple, flags); } rcu_read_unlock(); @@ -1143,10 +1416,21 @@ static const struct nla_policy tuple_nla_policy[CTA_TUPLE_MAX+1] = { [CTA_TUPLE_ZONE] = { .type = NLA_U16 }, }; +#define CTA_FILTER_F_ALL_CTA_PROTO \ + (CTA_FILTER_F_CTA_PROTO_SRC_PORT | \ + CTA_FILTER_F_CTA_PROTO_DST_PORT | \ + CTA_FILTER_F_CTA_PROTO_ICMP_TYPE | \ + CTA_FILTER_F_CTA_PROTO_ICMP_CODE | \ + CTA_FILTER_F_CTA_PROTO_ICMP_ID | \ + CTA_FILTER_F_CTA_PROTO_ICMPV6_TYPE | \ + CTA_FILTER_F_CTA_PROTO_ICMPV6_CODE | \ + CTA_FILTER_F_CTA_PROTO_ICMPV6_ID) + static int -ctnetlink_parse_tuple(const struct nlattr * const cda[], - struct nf_conntrack_tuple *tuple, u32 type, - u_int8_t l3num, struct nf_conntrack_zone *zone) +ctnetlink_parse_tuple_filter(const struct nlattr * const cda[], + struct nf_conntrack_tuple *tuple, u32 type, + u_int8_t l3num, struct nf_conntrack_zone *zone, + u_int32_t flags) { struct nlattr *tb[CTA_TUPLE_MAX+1]; int err; @@ -1158,23 +1442,33 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[], if (err < 0) return err; - if (!tb[CTA_TUPLE_IP]) - return -EINVAL; - + if (l3num != NFPROTO_IPV4 && l3num != NFPROTO_IPV6) + return -EOPNOTSUPP; tuple->src.l3num = l3num; - err = ctnetlink_parse_tuple_ip(tb[CTA_TUPLE_IP], tuple); - if (err < 0) - return err; + if (flags & CTA_FILTER_FLAG(CTA_IP_DST) || + flags & CTA_FILTER_FLAG(CTA_IP_SRC)) { + if (!tb[CTA_TUPLE_IP]) + return -EINVAL; - if (!tb[CTA_TUPLE_PROTO]) - return -EINVAL; + err = ctnetlink_parse_tuple_ip(tb[CTA_TUPLE_IP], tuple, flags); + if (err < 0) + return err; + } - err = ctnetlink_parse_tuple_proto(tb[CTA_TUPLE_PROTO], tuple); - if (err < 0) - return err; + if (flags & CTA_FILTER_FLAG(CTA_PROTO_NUM)) { + if (!tb[CTA_TUPLE_PROTO]) + return -EINVAL; - if (tb[CTA_TUPLE_ZONE]) { + err = ctnetlink_parse_tuple_proto(tb[CTA_TUPLE_PROTO], tuple, flags); + if (err < 0) + return err; + } else if (flags & CTA_FILTER_FLAG(ALL_CTA_PROTO)) { + /* Can't manage proto flags without a protonum */ + return -EINVAL; + } + + if ((flags & CTA_FILTER_FLAG(CTA_TUPLE_ZONE)) && tb[CTA_TUPLE_ZONE]) { if (!zone) return -EINVAL; @@ -1193,6 +1487,15 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[], return 0; } +static int +ctnetlink_parse_tuple(const struct nlattr * const cda[], + struct nf_conntrack_tuple *tuple, u32 type, + u_int8_t l3num, struct nf_conntrack_zone *zone) +{ + return ctnetlink_parse_tuple_filter(cda, tuple, type, l3num, zone, + CTA_FILTER_FLAG(ALL)); +} + static const struct nla_policy help_nla_policy[CTA_HELP_MAX+1] = { [CTA_HELP_NAME] = { .type = NLA_NUL_STRING, .len = NF_CT_HELPER_NAME_LEN - 1 }, @@ -1240,6 +1543,8 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { .len = NF_CT_LABELS_MAX_SIZE }, [CTA_LABELS_MASK] = { .type = NLA_BINARY, .len = NF_CT_LABELS_MAX_SIZE }, + [CTA_FILTER] = { .type = NLA_NESTED }, + [CTA_STATUS_MASK] = { .type = NLA_U32 }, }; static int ctnetlink_flush_iterate(struct nf_conn *ct, void *data) @@ -1255,31 +1560,38 @@ static int ctnetlink_flush_conntrack(struct net *net, u32 portid, int report, u8 family) { struct ctnetlink_filter *filter = NULL; + struct nf_ct_iter_data iter = { + .net = net, + .portid = portid, + .report = report, + }; + + if (ctnetlink_needs_filter(family, cda)) { + if (cda[CTA_FILTER]) + return -EOPNOTSUPP; - if (family || (cda[CTA_MARK] && cda[CTA_MARK_MASK])) { filter = ctnetlink_alloc_filter(cda, family); if (IS_ERR(filter)) return PTR_ERR(filter); + + iter.data = filter; } - nf_ct_iterate_cleanup_net(net, ctnetlink_flush_iterate, filter, - portid, report); + nf_ct_iterate_cleanup_net(ctnetlink_flush_iterate, &iter); kfree(filter); return 0; } -static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl, - struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[], - struct netlink_ext_ack *extack) +static int ctnetlink_del_conntrack(struct sk_buff *skb, + const struct nfnl_info *info, + const struct nlattr * const cda[]) { + u8 family = info->nfmsg->nfgen_family; struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; - struct nf_conn *ct; - struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct nf_conntrack_zone zone; + struct nf_conn *ct; int err; err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone); @@ -1288,22 +1600,22 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl, if (cda[CTA_TUPLE_ORIG]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, - nfmsg->nfgen_family, &zone); + family, &zone); else if (cda[CTA_TUPLE_REPLY]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, - nfmsg->nfgen_family, &zone); + family, &zone); else { - u_int8_t u3 = nfmsg->version ? nfmsg->nfgen_family : AF_UNSPEC; + u_int8_t u3 = info->nfmsg->version ? family : AF_UNSPEC; - return ctnetlink_flush_conntrack(net, cda, + return ctnetlink_flush_conntrack(info->net, cda, NETLINK_CB(skb).portid, - nlmsg_report(nlh), u3); + nlmsg_report(info->nlh), u3); } if (err < 0) return err; - h = nf_conntrack_find_get(net, &zone, &tuple); + h = nf_conntrack_find_get(info->net, &zone, &tuple); if (!h) return -ENOENT; @@ -1323,28 +1635,25 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl, } } - nf_ct_delete(ct, NETLINK_CB(skb).portid, nlmsg_report(nlh)); + nf_ct_delete(ct, NETLINK_CB(skb).portid, nlmsg_report(info->nlh)); nf_ct_put(ct); return 0; } -static int ctnetlink_get_conntrack(struct net *net, struct sock *ctnl, - struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[], - struct netlink_ext_ack *extack) +static int ctnetlink_get_conntrack(struct sk_buff *skb, + const struct nfnl_info *info, + const struct nlattr * const cda[]) { + u_int8_t u3 = info->nfmsg->nfgen_family; struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; - struct nf_conn *ct; - struct sk_buff *skb2 = NULL; - struct nfgenmsg *nfmsg = nlmsg_data(nlh); - u_int8_t u3 = nfmsg->nfgen_family; struct nf_conntrack_zone zone; + struct sk_buff *skb2; + struct nf_conn *ct; int err; - if (nlh->nlmsg_flags & NLM_F_DUMP) { + if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .start = ctnetlink_start, .dump = ctnetlink_dump_table, @@ -1352,7 +1661,7 @@ static int ctnetlink_get_conntrack(struct net *net, struct sock *ctnl, .data = (void *)cda, }; - return netlink_dump_start(ctnl, skb, nlh, &c); + return netlink_dump_start(info->sk, skb, info->nlh, &c); } err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone); @@ -1371,158 +1680,163 @@ static int ctnetlink_get_conntrack(struct net *net, struct sock *ctnl, if (err < 0) return err; - h = nf_conntrack_find_get(net, &zone, &tuple); + h = nf_conntrack_find_get(info->net, &zone, &tuple); if (!h) return -ENOENT; ct = nf_ct_tuplehash_to_ctrack(h); - err = -ENOMEM; skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (skb2 == NULL) { + if (!skb2) { nf_ct_put(ct); return -ENOMEM; } - err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, - NFNL_MSG_TYPE(nlh->nlmsg_type), ct, true); + err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).portid, + info->nlh->nlmsg_seq, + NFNL_MSG_TYPE(info->nlh->nlmsg_type), ct, + true, 0); nf_ct_put(ct); - if (err <= 0) - goto free; + if (err <= 0) { + kfree_skb(skb2); + return -ENOMEM; + } - err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); - if (err < 0) - goto out; + return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid); +} + +static int ctnetlink_done_list(struct netlink_callback *cb) +{ + struct ctnetlink_list_dump_ctx *ctx = (void *)cb->ctx; + + if (ctx->last) + nf_ct_put(ctx->last); return 0; +} -free: - kfree_skb(skb2); -out: - /* this avoids a loop in nfnetlink. */ - return err == -EAGAIN ? -ENOBUFS : err; +#ifdef CONFIG_NF_CONNTRACK_EVENTS +static int ctnetlink_dump_one_entry(struct sk_buff *skb, + struct netlink_callback *cb, + struct nf_conn *ct, + bool dying) +{ + struct ctnetlink_list_dump_ctx *ctx = (void *)cb->ctx; + struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + u8 l3proto = nfmsg->nfgen_family; + int res; + + if (l3proto && nf_ct_l3num(ct) != l3proto) + return 0; + + if (ctx->last) { + if (ct != ctx->last) + return 0; + + ctx->last = NULL; + } + + /* We can't dump extension info for the unconfirmed + * list because unconfirmed conntracks can have + * ct->ext reallocated (and thus freed). + * + * In the dying list case ct->ext can't be free'd + * until after we drop pcpu->lock. + */ + res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFNL_MSG_TYPE(cb->nlh->nlmsg_type), + ct, dying, 0); + if (res < 0) { + if (!refcount_inc_not_zero(&ct->ct_general.use)) + return 0; + + ctx->last = ct; + } + + return res; } +#endif -static int ctnetlink_done_list(struct netlink_callback *cb) +static int +ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb) { - if (cb->args[1]) - nf_ct_put((struct nf_conn *)cb->args[1]); return 0; } static int -ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying) +ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb) { - struct nf_conn *ct, *last; + struct ctnetlink_list_dump_ctx *ctx = (void *)cb->ctx; + struct nf_conn *last = ctx->last; +#ifdef CONFIG_NF_CONNTRACK_EVENTS + const struct net *net = sock_net(skb->sk); + struct nf_conntrack_net_ecache *ecache_net; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; - struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); - u_int8_t l3proto = nfmsg->nfgen_family; - int res; - int cpu; - struct hlist_nulls_head *list; - struct net *net = sock_net(skb->sk); +#endif - if (cb->args[2]) + if (ctx->done) return 0; - last = (struct nf_conn *)cb->args[1]; + ctx->last = NULL; - for (cpu = cb->args[0]; cpu < nr_cpu_ids; cpu++) { - struct ct_pcpu *pcpu; +#ifdef CONFIG_NF_CONNTRACK_EVENTS + ecache_net = nf_conn_pernet_ecache(net); + spin_lock_bh(&ecache_net->dying_lock); - if (!cpu_possible(cpu)) - continue; + hlist_nulls_for_each_entry(h, n, &ecache_net->dying_list, hnnode) { + struct nf_conn *ct; + int res; - pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); - spin_lock_bh(&pcpu->lock); - list = dying ? &pcpu->dying : &pcpu->unconfirmed; -restart: - hlist_nulls_for_each_entry(h, n, list, hnnode) { - ct = nf_ct_tuplehash_to_ctrack(h); - if (l3proto && nf_ct_l3num(ct) != l3proto) - continue; - if (cb->args[1]) { - if (ct != last) - continue; - cb->args[1] = 0; - } + ct = nf_ct_tuplehash_to_ctrack(h); + if (last && last != ct) + continue; - /* We can't dump extension info for the unconfirmed - * list because unconfirmed conntracks can have - * ct->ext reallocated (and thus freed). - * - * In the dying list case ct->ext can't be free'd - * until after we drop pcpu->lock. - */ - res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - NFNL_MSG_TYPE(cb->nlh->nlmsg_type), - ct, dying ? true : false); - if (res < 0) { - if (!atomic_inc_not_zero(&ct->ct_general.use)) - continue; - cb->args[0] = cpu; - cb->args[1] = (unsigned long)ct; - spin_unlock_bh(&pcpu->lock); - goto out; - } + res = ctnetlink_dump_one_entry(skb, cb, ct, true); + if (res < 0) { + spin_unlock_bh(&ecache_net->dying_lock); + nf_ct_put(last); + return skb->len; } - if (cb->args[1]) { - cb->args[1] = 0; - goto restart; - } - spin_unlock_bh(&pcpu->lock); - } - cb->args[2] = 1; -out: - if (last) + nf_ct_put(last); + last = NULL; + } - return skb->len; -} + spin_unlock_bh(&ecache_net->dying_lock); +#endif + ctx->done = true; + nf_ct_put(last); -static int -ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb) -{ - return ctnetlink_dump_list(skb, cb, true); + return skb->len; } -static int ctnetlink_get_ct_dying(struct net *net, struct sock *ctnl, - struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[], - struct netlink_ext_ack *extack) +static int ctnetlink_get_ct_dying(struct sk_buff *skb, + const struct nfnl_info *info, + const struct nlattr * const cda[]) { - if (nlh->nlmsg_flags & NLM_F_DUMP) { + if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = ctnetlink_dump_dying, .done = ctnetlink_done_list, }; - return netlink_dump_start(ctnl, skb, nlh, &c); + return netlink_dump_start(info->sk, skb, info->nlh, &c); } return -EOPNOTSUPP; } -static int -ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb) +static int ctnetlink_get_ct_unconfirmed(struct sk_buff *skb, + const struct nfnl_info *info, + const struct nlattr * const cda[]) { - return ctnetlink_dump_list(skb, cb, false); -} - -static int ctnetlink_get_ct_unconfirmed(struct net *net, struct sock *ctnl, - struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[], - struct netlink_ext_ack *extack) -{ - if (nlh->nlmsg_flags & NLM_F_DUMP) { + if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = ctnetlink_dump_unconfirmed, .done = ctnetlink_done_list, }; - return netlink_dump_start(ctnl, skb, nlh, &c); + return netlink_dump_start(info->sk, skb, info->nlh, &c); } return -EOPNOTSUPP; @@ -1533,8 +1847,9 @@ static int ctnetlink_parse_nat_setup(struct nf_conn *ct, enum nf_nat_manip_type manip, const struct nlattr *attr) + __must_hold(RCU) { - struct nf_nat_hook *nat_hook; + const struct nf_nat_hook *nat_hook; int err; nat_hook = rcu_dereference(nf_nat_hook); @@ -1576,45 +1891,10 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct, } #endif -static void -__ctnetlink_change_status(struct nf_conn *ct, unsigned long on, - unsigned long off) -{ - unsigned int bit; - - /* Ignore these unchangable bits */ - on &= ~IPS_UNCHANGEABLE_MASK; - off &= ~IPS_UNCHANGEABLE_MASK; - - for (bit = 0; bit < __IPS_MAX_BIT; bit++) { - if (on & (1 << bit)) - set_bit(bit, &ct->status); - else if (off & (1 << bit)) - clear_bit(bit, &ct->status); - } -} - static int ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[]) { - unsigned long d; - unsigned int status = ntohl(nla_get_be32(cda[CTA_STATUS])); - d = ct->status ^ status; - - if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING)) - /* unchangeable */ - return -EBUSY; - - if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY)) - /* SEEN_REPLY bit can only be set */ - return -EBUSY; - - if (d & IPS_ASSURED && !(status & IPS_ASSURED)) - /* ASSURED bit can only be set */ - return -EBUSY; - - __ctnetlink_change_status(ct, status, 0); - return 0; + return nf_ct_change_status_common(ct, ntohl(nla_get_be32(cda[CTA_STATUS]))); } static int @@ -1690,7 +1970,7 @@ static int ctnetlink_change_helper(struct nf_conn *ct, } if (help) { - if (help->helper == helper) { + if (rcu_access_pointer(help->helper) == helper) { /* update private helper data if allowed. */ if (helper->from_nlattr) helper->from_nlattr(helpinfo, ct); @@ -1709,16 +1989,7 @@ static int ctnetlink_change_helper(struct nf_conn *ct, static int ctnetlink_change_timeout(struct nf_conn *ct, const struct nlattr * const cda[]) { - u64 timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ; - - if (timeout > INT_MAX) - timeout = INT_MAX; - ct->timeout = nfct_time_stamp + (u32)timeout; - - if (test_bit(IPS_DYING_BIT, &ct->status)) - return -ETIME; - - return 0; + return __nf_ct_change_timeout(ct, (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ); } #if defined(CONFIG_NF_CONNTRACK_MARK) @@ -1978,9 +2249,7 @@ ctnetlink_create_conntrack(struct net *net, goto err1; timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ; - if (timeout > INT_MAX) - timeout = INT_MAX; - ct->timeout = (u32)timeout + nfct_time_stamp; + __nf_ct_set_timeout(ct, timeout); rcu_read_lock(); if (cda[CTA_HELP]) { @@ -2025,14 +2294,10 @@ ctnetlink_create_conntrack(struct net *net, if (helper->from_nlattr) helper->from_nlattr(helpinfo, ct); - /* not in hash table yet so not strictly necessary */ + /* disable helper auto-assignment for this entry */ + ct->status |= IPS_HELPER; RCU_INIT_POINTER(help->helper, helper); } - } else { - /* try an implicit helper assignation */ - err = __nf_ct_try_assign_helper(ct, NULL, GFP_ATOMIC); - if (err < 0) - goto err2; } err = ctnetlink_setup_nat(ct, cda); @@ -2118,18 +2383,15 @@ err1: return ERR_PTR(err); } -static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl, - struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[], - struct netlink_ext_ack *extack) +static int ctnetlink_new_conntrack(struct sk_buff *skb, + const struct nfnl_info *info, + const struct nlattr * const cda[]) { struct nf_conntrack_tuple otuple, rtuple; struct nf_conntrack_tuple_hash *h = NULL; - struct nfgenmsg *nfmsg = nlmsg_data(nlh); - struct nf_conn *ct; - u_int8_t u3 = nfmsg->nfgen_family; + u_int8_t u3 = info->nfmsg->nfgen_family; struct nf_conntrack_zone zone; + struct nf_conn *ct; int err; err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone); @@ -2151,13 +2413,13 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl, } if (cda[CTA_TUPLE_ORIG]) - h = nf_conntrack_find_get(net, &zone, &otuple); + h = nf_conntrack_find_get(info->net, &zone, &otuple); else if (cda[CTA_TUPLE_REPLY]) - h = nf_conntrack_find_get(net, &zone, &rtuple); + h = nf_conntrack_find_get(info->net, &zone, &rtuple); if (h == NULL) { err = -ENOENT; - if (nlh->nlmsg_flags & NLM_F_CREATE) { + if (info->nlh->nlmsg_flags & NLM_F_CREATE) { enum ip_conntrack_events events; if (!cda[CTA_TUPLE_ORIG] || !cda[CTA_TUPLE_REPLY]) @@ -2165,8 +2427,8 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl, if (otuple.dst.protonum != rtuple.dst.protonum) return -EINVAL; - ct = ctnetlink_create_conntrack(net, &zone, cda, &otuple, - &rtuple, u3); + ct = ctnetlink_create_conntrack(info->net, &zone, cda, + &otuple, &rtuple, u3); if (IS_ERR(ct)) return PTR_ERR(ct); @@ -2189,7 +2451,7 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl, (1 << IPCT_SYNPROXY) | events, ct, NETLINK_CB(skb).portid, - nlmsg_report(nlh)); + nlmsg_report(info->nlh)); nf_ct_put(ct); } @@ -2199,7 +2461,7 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl, err = -EEXIST; ct = nf_ct_tuplehash_to_ctrack(h); - if (!(nlh->nlmsg_flags & NLM_F_EXCL)) { + if (!(info->nlh->nlmsg_flags & NLM_F_EXCL)) { err = ctnetlink_change_conntrack(ct, cda); if (err == 0) { nf_conntrack_eventmask_report((1 << IPCT_REPLY) | @@ -2211,7 +2473,7 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl, (1 << IPCT_MARK) | (1 << IPCT_SYNPROXY), ct, NETLINK_CB(skb).portid, - nlmsg_report(nlh)); + nlmsg_report(info->nlh)); } } @@ -2224,23 +2486,17 @@ ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq, __u16 cpu, const struct ip_conntrack_stat *st) { struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0, event; event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_GET_STATS_CPU); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC, + NFNETLINK_V0, htons(cpu)); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = AF_UNSPEC; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(cpu); - if (nla_put_be32(skb, CTA_STATS_FOUND, htonl(st->found)) || nla_put_be32(skb, CTA_STATS_INVALID, htonl(st->invalid)) || - nla_put_be32(skb, CTA_STATS_IGNORE, htonl(st->ignore)) || nla_put_be32(skb, CTA_STATS_INSERT, htonl(st->insert)) || nla_put_be32(skb, CTA_STATS_INSERT_FAILED, htonl(st->insert_failed)) || @@ -2248,7 +2504,11 @@ ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq, nla_put_be32(skb, CTA_STATS_EARLY_DROP, htonl(st->early_drop)) || nla_put_be32(skb, CTA_STATS_ERROR, htonl(st->error)) || nla_put_be32(skb, CTA_STATS_SEARCH_RESTART, - htonl(st->search_restart))) + htonl(st->search_restart)) || + nla_put_be32(skb, CTA_STATS_CLASH_RESOLVE, + htonl(st->clash_resolve)) || + nla_put_be32(skb, CTA_STATS_CHAIN_TOOLONG, + htonl(st->chaintoolong))) goto nla_put_failure; nlmsg_end(skb, nlh); @@ -2287,17 +2547,15 @@ ctnetlink_ct_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -static int ctnetlink_stat_ct_cpu(struct net *net, struct sock *ctnl, - struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[], - struct netlink_ext_ack *extack) +static int ctnetlink_stat_ct_cpu(struct sk_buff *skb, + const struct nfnl_info *info, + const struct nlattr * const cda[]) { - if (nlh->nlmsg_flags & NLM_F_DUMP) { + if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = ctnetlink_ct_stat_cpu_dump, }; - return netlink_dump_start(ctnl, skb, nlh, &c); + return netlink_dump_start(info->sk, skb, info->nlh, &c); } return 0; @@ -2307,21 +2565,17 @@ static int ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, struct net *net) { - struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0, event; - unsigned int nr_conntracks = atomic_read(&net->ct.count); + unsigned int nr_conntracks; + struct nlmsghdr *nlh; event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_GET_STATS); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC, + NFNETLINK_V0, 0); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = AF_UNSPEC; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - + nr_conntracks = nf_conntrack_count(net); if (nla_put_be32(skb, CTA_STATS_GLOBAL_ENTRIES, htonl(nr_conntracks))) goto nla_put_failure; @@ -2337,10 +2591,8 @@ nlmsg_failure: return -1; } -static int ctnetlink_stat_ct(struct net *net, struct sock *ctnl, - struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[], - struct netlink_ext_ack *extack) +static int ctnetlink_stat_ct(struct sk_buff *skb, const struct nfnl_info *info, + const struct nlattr * const cda[]) { struct sk_buff *skb2; int err; @@ -2350,23 +2602,15 @@ static int ctnetlink_stat_ct(struct net *net, struct sock *ctnl, return -ENOMEM; err = ctnetlink_stat_ct_fill_info(skb2, NETLINK_CB(skb).portid, - nlh->nlmsg_seq, - NFNL_MSG_TYPE(nlh->nlmsg_type), + info->nlh->nlmsg_seq, + NFNL_MSG_TYPE(info->nlh->nlmsg_type), sock_net(skb->sk)); - if (err <= 0) - goto free; - - err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); - if (err < 0) - goto out; - - return 0; + if (err <= 0) { + kfree_skb(skb2); + return -ENOMEM; + } -free: - kfree_skb(skb2); -out: - /* this avoids a loop in nfnetlink. */ - return err == -EAGAIN ? -ENOBUFS : err; + return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid); } static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = { @@ -2405,6 +2649,8 @@ ctnetlink_glue_build_size(const struct nf_conn *ct) + nla_total_size(0) /* CTA_HELP */ + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */ + ctnetlink_secctx_size(ct) + + ctnetlink_acct_size(ct) + + ctnetlink_timestamp_size(ct) #if IS_ENABLED(CONFIG_NF_NAT) + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */ + 6 * nla_total_size(sizeof(u_int32_t)) /* CTA_NAT_SEQ_OFFSET */ @@ -2419,12 +2665,6 @@ ctnetlink_glue_build_size(const struct nf_conn *ct) ; } -static struct nf_conn *ctnetlink_glue_get_ct(const struct sk_buff *skb, - enum ip_conntrack_info *ctinfo) -{ - return nf_ct_get(skb, ctinfo); -} - static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct) { const struct nf_conntrack_zone *zone; @@ -2462,10 +2702,14 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct) if (ctnetlink_dump_status(skb, ct) < 0) goto nla_put_failure; - if (ctnetlink_dump_timeout(skb, ct) < 0) + if (ctnetlink_dump_timeout(skb, ct, false) < 0) goto nla_put_failure; - if (ctnetlink_dump_protoinfo(skb, ct) < 0) + if (ctnetlink_dump_protoinfo(skb, ct, false) < 0) + goto nla_put_failure; + + if (ctnetlink_dump_acct(skb, ct, IPCTNL_MSG_CT_GET) < 0 || + ctnetlink_dump_timestamp(skb, ct) < 0) goto nla_put_failure; if (ctnetlink_dump_helpinfo(skb, ct) < 0) @@ -2542,7 +2786,7 @@ ctnetlink_update_status(struct nf_conn *ct, const struct nlattr * const cda[]) * unchangeable bits but do not error out. Also user programs * are allowed to clear the bits that they are allowed to change. */ - __ctnetlink_change_status(ct, status, ~status); + __nf_ct_change_status(ct, status, ~status); return 0; } @@ -2657,8 +2901,7 @@ static void ctnetlink_glue_seqadj(struct sk_buff *skb, struct nf_conn *ct, nf_ct_tcp_seqadj_set(skb, ct, ctinfo, diff); } -static struct nfnl_ct_hook ctnetlink_glue_hook = { - .get_ct = ctnetlink_glue_get_ct, +static const struct nfnl_ct_hook ctnetlink_glue_hook = { .build_size = ctnetlink_glue_build_size, .build = ctnetlink_glue_build, .parse = ctnetlink_glue_parse, @@ -2702,6 +2945,7 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb, memset(&m, 0xFF, sizeof(m)); memcpy(&m.src.u3, &mask->src.u3, sizeof(m.src.u3)); m.src.u.all = mask->src.u.all; + m.src.l3num = tuple->src.l3num; m.dst.protonum = tuple->dst.protonum; nest_parms = nla_nest_start(skb, CTA_EXPECT_MASK); @@ -2731,7 +2975,7 @@ static const union nf_inet_addr any_addr; static __be32 nf_expect_get_id(const struct nf_conntrack_expect *exp) { - static __read_mostly siphash_key_t exp_id_seed; + static siphash_aligned_key_t exp_id_seed; unsigned long a, b, c, d; net_get_random_once(&exp_id_seed, sizeof(exp_id_seed)); @@ -2824,19 +3068,14 @@ ctnetlink_exp_fill_info(struct sk_buff *skb, u32 portid, u32 seq, int event, const struct nf_conntrack_expect *exp) { struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0; event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_EXP, event); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, + exp->tuple.src.l3num, NFNETLINK_V0, 0); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = exp->tuple.src.l3num; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - if (ctnetlink_exp_dump_expect(skb, exp) < 0) goto nla_put_failure; @@ -2851,12 +3090,11 @@ nla_put_failure: #ifdef CONFIG_NF_CONNTRACK_EVENTS static int -ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) +ctnetlink_expect_event(unsigned int events, const struct nf_exp_event *item) { struct nf_conntrack_expect *exp = item->exp; struct net *net = nf_ct_exp_net(exp); struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; struct sk_buff *skb; unsigned int type, group; int flags = 0; @@ -2879,15 +3117,11 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) goto errout; type = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_EXP, type); - nlh = nlmsg_put(skb, item->portid, 0, type, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, item->portid, 0, type, flags, + exp->tuple.src.l3num, NFNETLINK_V0, 0); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = exp->tuple.src.l3num; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - if (ctnetlink_exp_dump_expect(skb, exp) < 0) goto nla_put_failure; @@ -3052,29 +3286,28 @@ static int ctnetlink_dump_exp_ct(struct net *net, struct sock *ctnl, return err; } -static int ctnetlink_get_expect(struct net *net, struct sock *ctnl, - struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[], - struct netlink_ext_ack *extack) +static int ctnetlink_get_expect(struct sk_buff *skb, + const struct nfnl_info *info, + const struct nlattr * const cda[]) { + u_int8_t u3 = info->nfmsg->nfgen_family; struct nf_conntrack_tuple tuple; struct nf_conntrack_expect *exp; - struct sk_buff *skb2; - struct nfgenmsg *nfmsg = nlmsg_data(nlh); - u_int8_t u3 = nfmsg->nfgen_family; struct nf_conntrack_zone zone; + struct sk_buff *skb2; int err; - if (nlh->nlmsg_flags & NLM_F_DUMP) { + if (info->nlh->nlmsg_flags & NLM_F_DUMP) { if (cda[CTA_EXPECT_MASTER]) - return ctnetlink_dump_exp_ct(net, ctnl, skb, nlh, cda, - extack); + return ctnetlink_dump_exp_ct(info->net, info->sk, skb, + info->nlh, cda, + info->extack); else { struct netlink_dump_control c = { .dump = ctnetlink_exp_dump_table, .done = ctnetlink_exp_done, }; - return netlink_dump_start(ctnl, skb, nlh, &c); + return netlink_dump_start(info->sk, skb, info->nlh, &c); } } @@ -3094,7 +3327,7 @@ static int ctnetlink_get_expect(struct net *net, struct sock *ctnl, if (err < 0) return err; - exp = nf_ct_expect_find_get(net, &zone, &tuple); + exp = nf_ct_expect_find_get(info->net, &zone, &tuple); if (!exp) return -ENOENT; @@ -3107,42 +3340,39 @@ static int ctnetlink_get_expect(struct net *net, struct sock *ctnl, } } - err = -ENOMEM; skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (skb2 == NULL) { + if (!skb2) { nf_ct_expect_put(exp); - goto out; + return -ENOMEM; } rcu_read_lock(); err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).portid, - nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp); + info->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, + exp); rcu_read_unlock(); nf_ct_expect_put(exp); - if (err <= 0) - goto free; - - err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); - if (err < 0) - goto out; - - return 0; + if (err <= 0) { + kfree_skb(skb2); + return -ENOMEM; + } -free: - kfree_skb(skb2); -out: - /* this avoids a loop in nfnetlink. */ - return err == -EAGAIN ? -ENOBUFS : err; + return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid); } static bool expect_iter_name(struct nf_conntrack_expect *exp, void *data) { + struct nf_conntrack_helper *helper; const struct nf_conn_help *m_help; const char *name = data; m_help = nfct_help(exp->master); - return strcmp(m_help->helper->name, name) == 0; + helper = rcu_dereference(m_help->helper); + if (!helper) + return false; + + return strcmp(helper->name, name) == 0; } static bool expect_iter_all(struct nf_conntrack_expect *exp, void *data) @@ -3150,15 +3380,13 @@ static bool expect_iter_all(struct nf_conntrack_expect *exp, void *data) return true; } -static int ctnetlink_del_expect(struct net *net, struct sock *ctnl, - struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[], - struct netlink_ext_ack *extack) +static int ctnetlink_del_expect(struct sk_buff *skb, + const struct nfnl_info *info, + const struct nlattr * const cda[]) { + u_int8_t u3 = info->nfmsg->nfgen_family; struct nf_conntrack_expect *exp; struct nf_conntrack_tuple tuple; - struct nfgenmsg *nfmsg = nlmsg_data(nlh); - u_int8_t u3 = nfmsg->nfgen_family; struct nf_conntrack_zone zone; int err; @@ -3174,7 +3402,7 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl, return err; /* bump usage count to 2 */ - exp = nf_ct_expect_find_get(net, &zone, &tuple); + exp = nf_ct_expect_find_get(info->net, &zone, &tuple); if (!exp) return -ENOENT; @@ -3190,7 +3418,7 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl, spin_lock_bh(&nf_conntrack_expect_lock); if (del_timer(&exp->timeout)) { nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid, - nlmsg_report(nlh)); + nlmsg_report(info->nlh)); nf_ct_expect_put(exp); } spin_unlock_bh(&nf_conntrack_expect_lock); @@ -3200,14 +3428,14 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl, } else if (cda[CTA_EXPECT_HELP_NAME]) { char *name = nla_data(cda[CTA_EXPECT_HELP_NAME]); - nf_ct_expect_iterate_net(net, expect_iter_name, name, + nf_ct_expect_iterate_net(info->net, expect_iter_name, name, NETLINK_CB(skb).portid, - nlmsg_report(nlh)); + nlmsg_report(info->nlh)); } else { /* This basically means we have to flush everything*/ - nf_ct_expect_iterate_net(net, expect_iter_all, NULL, + nf_ct_expect_iterate_net(info->net, expect_iter_all, NULL, NETLINK_CB(skb).portid, - nlmsg_report(nlh)); + nlmsg_report(info->nlh)); } return 0; @@ -3403,15 +3631,13 @@ err_ct: return err; } -static int ctnetlink_new_expect(struct net *net, struct sock *ctnl, - struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[], - struct netlink_ext_ack *extack) +static int ctnetlink_new_expect(struct sk_buff *skb, + const struct nfnl_info *info, + const struct nlattr * const cda[]) { + u_int8_t u3 = info->nfmsg->nfgen_family; struct nf_conntrack_tuple tuple; struct nf_conntrack_expect *exp; - struct nfgenmsg *nfmsg = nlmsg_data(nlh); - u_int8_t u3 = nfmsg->nfgen_family; struct nf_conntrack_zone zone; int err; @@ -3430,20 +3656,20 @@ static int ctnetlink_new_expect(struct net *net, struct sock *ctnl, return err; spin_lock_bh(&nf_conntrack_expect_lock); - exp = __nf_ct_expect_find(net, &zone, &tuple); + exp = __nf_ct_expect_find(info->net, &zone, &tuple); if (!exp) { spin_unlock_bh(&nf_conntrack_expect_lock); err = -ENOENT; - if (nlh->nlmsg_flags & NLM_F_CREATE) { - err = ctnetlink_create_expect(net, &zone, cda, u3, + if (info->nlh->nlmsg_flags & NLM_F_CREATE) { + err = ctnetlink_create_expect(info->net, &zone, cda, u3, NETLINK_CB(skb).portid, - nlmsg_report(nlh)); + nlmsg_report(info->nlh)); } return err; } err = -EEXIST; - if (!(nlh->nlmsg_flags & NLM_F_EXCL)) + if (!(info->nlh->nlmsg_flags & NLM_F_EXCL)) err = ctnetlink_change_expect(exp, cda); spin_unlock_bh(&nf_conntrack_expect_lock); @@ -3455,20 +3681,15 @@ ctnetlink_exp_stat_fill_info(struct sk_buff *skb, u32 portid, u32 seq, int cpu, const struct ip_conntrack_stat *st) { struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0, event; event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_EXP_GET_STATS_CPU); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC, + NFNETLINK_V0, htons(cpu)); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = AF_UNSPEC; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(cpu); - if (nla_put_be32(skb, CTA_STATS_EXP_NEW, htonl(st->expect_new)) || nla_put_be32(skb, CTA_STATS_EXP_CREATE, htonl(st->expect_create)) || nla_put_be32(skb, CTA_STATS_EXP_DELETE, htonl(st->expect_delete))) @@ -3509,17 +3730,15 @@ ctnetlink_exp_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -static int ctnetlink_stat_exp_cpu(struct net *net, struct sock *ctnl, - struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[], - struct netlink_ext_ack *extack) +static int ctnetlink_stat_exp_cpu(struct sk_buff *skb, + const struct nfnl_info *info, + const struct nlattr * const cda[]) { - if (nlh->nlmsg_flags & NLM_F_DUMP) { + if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = ctnetlink_exp_stat_cpu_dump, }; - return netlink_dump_start(ctnl, skb, nlh, &c); + return netlink_dump_start(info->sk, skb, info->nlh, &c); } return 0; @@ -3527,44 +3746,77 @@ static int ctnetlink_stat_exp_cpu(struct net *net, struct sock *ctnl, #ifdef CONFIG_NF_CONNTRACK_EVENTS static struct nf_ct_event_notifier ctnl_notifier = { - .fcn = ctnetlink_conntrack_event, -}; - -static struct nf_exp_event_notifier ctnl_notifier_exp = { - .fcn = ctnetlink_expect_event, + .ct_event = ctnetlink_conntrack_event, + .exp_event = ctnetlink_expect_event, }; #endif static const struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = { - [IPCTNL_MSG_CT_NEW] = { .call = ctnetlink_new_conntrack, - .attr_count = CTA_MAX, - .policy = ct_nla_policy }, - [IPCTNL_MSG_CT_GET] = { .call = ctnetlink_get_conntrack, - .attr_count = CTA_MAX, - .policy = ct_nla_policy }, - [IPCTNL_MSG_CT_DELETE] = { .call = ctnetlink_del_conntrack, - .attr_count = CTA_MAX, - .policy = ct_nla_policy }, - [IPCTNL_MSG_CT_GET_CTRZERO] = { .call = ctnetlink_get_conntrack, - .attr_count = CTA_MAX, - .policy = ct_nla_policy }, - [IPCTNL_MSG_CT_GET_STATS_CPU] = { .call = ctnetlink_stat_ct_cpu }, - [IPCTNL_MSG_CT_GET_STATS] = { .call = ctnetlink_stat_ct }, - [IPCTNL_MSG_CT_GET_DYING] = { .call = ctnetlink_get_ct_dying }, - [IPCTNL_MSG_CT_GET_UNCONFIRMED] = { .call = ctnetlink_get_ct_unconfirmed }, + [IPCTNL_MSG_CT_NEW] = { + .call = ctnetlink_new_conntrack, + .type = NFNL_CB_MUTEX, + .attr_count = CTA_MAX, + .policy = ct_nla_policy + }, + [IPCTNL_MSG_CT_GET] = { + .call = ctnetlink_get_conntrack, + .type = NFNL_CB_MUTEX, + .attr_count = CTA_MAX, + .policy = ct_nla_policy + }, + [IPCTNL_MSG_CT_DELETE] = { + .call = ctnetlink_del_conntrack, + .type = NFNL_CB_MUTEX, + .attr_count = CTA_MAX, + .policy = ct_nla_policy + }, + [IPCTNL_MSG_CT_GET_CTRZERO] = { + .call = ctnetlink_get_conntrack, + .type = NFNL_CB_MUTEX, + .attr_count = CTA_MAX, + .policy = ct_nla_policy + }, + [IPCTNL_MSG_CT_GET_STATS_CPU] = { + .call = ctnetlink_stat_ct_cpu, + .type = NFNL_CB_MUTEX, + }, + [IPCTNL_MSG_CT_GET_STATS] = { + .call = ctnetlink_stat_ct, + .type = NFNL_CB_MUTEX, + }, + [IPCTNL_MSG_CT_GET_DYING] = { + .call = ctnetlink_get_ct_dying, + .type = NFNL_CB_MUTEX, + }, + [IPCTNL_MSG_CT_GET_UNCONFIRMED] = { + .call = ctnetlink_get_ct_unconfirmed, + .type = NFNL_CB_MUTEX, + }, }; static const struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = { - [IPCTNL_MSG_EXP_GET] = { .call = ctnetlink_get_expect, - .attr_count = CTA_EXPECT_MAX, - .policy = exp_nla_policy }, - [IPCTNL_MSG_EXP_NEW] = { .call = ctnetlink_new_expect, - .attr_count = CTA_EXPECT_MAX, - .policy = exp_nla_policy }, - [IPCTNL_MSG_EXP_DELETE] = { .call = ctnetlink_del_expect, - .attr_count = CTA_EXPECT_MAX, - .policy = exp_nla_policy }, - [IPCTNL_MSG_EXP_GET_STATS_CPU] = { .call = ctnetlink_stat_exp_cpu }, + [IPCTNL_MSG_EXP_GET] = { + .call = ctnetlink_get_expect, + .type = NFNL_CB_MUTEX, + .attr_count = CTA_EXPECT_MAX, + .policy = exp_nla_policy + }, + [IPCTNL_MSG_EXP_NEW] = { + .call = ctnetlink_new_expect, + .type = NFNL_CB_MUTEX, + .attr_count = CTA_EXPECT_MAX, + .policy = exp_nla_policy + }, + [IPCTNL_MSG_EXP_DELETE] = { + .call = ctnetlink_del_expect, + .type = NFNL_CB_MUTEX, + .attr_count = CTA_EXPECT_MAX, + .policy = exp_nla_policy + }, + [IPCTNL_MSG_EXP_GET_STATS_CPU] = { + .call = ctnetlink_stat_exp_cpu, + .type = NFNL_CB_MUTEX, + }, }; static const struct nfnetlink_subsystem ctnl_subsys = { @@ -3588,58 +3840,29 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_EXP); static int __net_init ctnetlink_net_init(struct net *net) { #ifdef CONFIG_NF_CONNTRACK_EVENTS - int ret; - - ret = nf_conntrack_register_notifier(net, &ctnl_notifier); - if (ret < 0) { - pr_err("ctnetlink_init: cannot register notifier.\n"); - goto err_out; - } - - ret = nf_ct_expect_register_notifier(net, &ctnl_notifier_exp); - if (ret < 0) { - pr_err("ctnetlink_init: cannot expect register notifier.\n"); - goto err_unreg_notifier; - } + nf_conntrack_register_notifier(net, &ctnl_notifier); #endif return 0; - -#ifdef CONFIG_NF_CONNTRACK_EVENTS -err_unreg_notifier: - nf_conntrack_unregister_notifier(net, &ctnl_notifier); -err_out: - return ret; -#endif } -static void ctnetlink_net_exit(struct net *net) +static void ctnetlink_net_pre_exit(struct net *net) { #ifdef CONFIG_NF_CONNTRACK_EVENTS - nf_ct_expect_unregister_notifier(net, &ctnl_notifier_exp); - nf_conntrack_unregister_notifier(net, &ctnl_notifier); + nf_conntrack_unregister_notifier(net); #endif } -static void __net_exit ctnetlink_net_exit_batch(struct list_head *net_exit_list) -{ - struct net *net; - - list_for_each_entry(net, net_exit_list, exit_list) - ctnetlink_net_exit(net); - - /* wait for other cpus until they are done with ctnl_notifiers */ - synchronize_rcu(); -} - static struct pernet_operations ctnetlink_net_ops = { .init = ctnetlink_net_init, - .exit_batch = ctnetlink_net_exit_batch, + .pre_exit = ctnetlink_net_pre_exit, }; static int __init ctnetlink_init(void) { int ret; + BUILD_BUG_ON(sizeof(struct ctnetlink_list_dump_ctx) > sizeof_field(struct netlink_callback, ctx)); + ret = nfnetlink_subsys_register(&ctnl_subsys); if (ret < 0) { pr_err("ctnetlink_init: cannot register with nfnetlink.\n"); |