diff options
Diffstat (limited to 'net/netfilter')
56 files changed, 1302 insertions, 1544 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 2ab870ef233a..beb3a69ce1d4 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -403,21 +403,6 @@ config NF_NAT_NEEDED depends on NF_NAT default y -config NF_NAT_PROTO_DCCP - bool - depends on NF_NAT && NF_CT_PROTO_DCCP - default NF_NAT && NF_CT_PROTO_DCCP - -config NF_NAT_PROTO_UDPLITE - bool - depends on NF_NAT && NF_CT_PROTO_UDPLITE - default NF_NAT && NF_CT_PROTO_UDPLITE - -config NF_NAT_PROTO_SCTP - bool - default NF_NAT && NF_CT_PROTO_SCTP - depends on NF_NAT && NF_CT_PROTO_SCTP - config NF_NAT_AMANDA tristate depends on NF_CONNTRACK && NF_NAT diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 4ddf3ef51ece..1ae65a314d7a 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -47,12 +47,7 @@ obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o -nf_nat-y := nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \ - nf_nat_proto_udp.o nf_nat_proto_tcp.o nf_nat_helper.o - -# NAT protocols (nf_nat) -nf_nat-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o -nf_nat-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o +nf_nat-y := nf_nat_core.o nf_nat_proto.o nf_nat_helper.o # generic transport layer logging obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index c00b6a2e8e3c..980000fc3b50 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -219,10 +219,6 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb, struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); u32 ip; - /* MAC can be src only */ - if (!(opt->flags & IPSET_DIM_TWO_SRC)) - return 0; - ip = ntohl(ip4addr(skb, opt->flags & IPSET_DIM_ONE_SRC)); if (ip < map->first_ip || ip > map->last_ip) return -IPSET_ERR_BITMAP_RANGE; @@ -233,7 +229,14 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb, return -EINVAL; e.id = ip_to_id(map, ip); - memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN); + + if (opt->flags & IPSET_DIM_ONE_SRC) + ether_addr_copy(e.ether, eth_hdr(skb)->h_source); + else + ether_addr_copy(e.ether, eth_hdr(skb)->h_dest); + + if (is_zero_ether_addr(e.ether)) + return -EINVAL; return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index bc4bd247bb7d..45a257695bef 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -55,11 +55,15 @@ MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); MODULE_DESCRIPTION("core IP set support"); MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); -/* When the nfnl mutex is held: */ +/* When the nfnl mutex or ip_set_ref_lock is held: */ #define ip_set_dereference(p) \ - rcu_dereference_protected(p, lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET)) + rcu_dereference_protected(p, \ + lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET) || \ + lockdep_is_held(&ip_set_ref_lock)) #define ip_set(inst, id) \ ip_set_dereference((inst)->ip_set_list)[id] +#define ip_set_ref_netlink(inst,id) \ + rcu_dereference_raw((inst)->ip_set_list)[id] /* The set types are implemented in modules and registered set types * can be found in ip_set_type_list. Adding/deleting types is @@ -693,21 +697,20 @@ ip_set_put_byindex(struct net *net, ip_set_id_t index) EXPORT_SYMBOL_GPL(ip_set_put_byindex); /* Get the name of a set behind a set index. - * We assume the set is referenced, so it does exist and - * can't be destroyed. The set cannot be renamed due to - * the referencing either. - * + * Set itself is protected by RCU, but its name isn't: to protect against + * renaming, grab ip_set_ref_lock as reader (see ip_set_rename()) and copy the + * name. */ -const char * -ip_set_name_byindex(struct net *net, ip_set_id_t index) +void +ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name) { - const struct ip_set *set = ip_set_rcu_get(net, index); + struct ip_set *set = ip_set_rcu_get(net, index); BUG_ON(!set); - BUG_ON(set->ref == 0); - /* Referenced, so it's safe */ - return set->name; + read_lock_bh(&ip_set_ref_lock); + strncpy(name, set->name, IPSET_MAXNAMELEN); + read_unlock_bh(&ip_set_ref_lock); } EXPORT_SYMBOL_GPL(ip_set_name_byindex); @@ -768,11 +771,21 @@ EXPORT_SYMBOL_GPL(ip_set_nfnl_put); * The commands are serialized by the nfnl mutex. */ +static inline u8 protocol(const struct nlattr * const tb[]) +{ + return nla_get_u8(tb[IPSET_ATTR_PROTOCOL]); +} + static inline bool protocol_failed(const struct nlattr * const tb[]) { - return !tb[IPSET_ATTR_PROTOCOL] || - nla_get_u8(tb[IPSET_ATTR_PROTOCOL]) != IPSET_PROTOCOL; + return !tb[IPSET_ATTR_PROTOCOL] || protocol(tb) != IPSET_PROTOCOL; +} + +static inline bool +protocol_min_failed(const struct nlattr * const tb[]) +{ + return !tb[IPSET_ATTR_PROTOCOL] || protocol(tb) < IPSET_PROTOCOL_MIN; } static inline u32 @@ -886,7 +899,7 @@ static int ip_set_create(struct net *net, struct sock *ctnl, u32 flags = flag_exist(nlh); int ret = 0; - if (unlikely(protocol_failed(attr) || + if (unlikely(protocol_min_failed(attr) || !attr[IPSET_ATTR_SETNAME] || !attr[IPSET_ATTR_TYPENAME] || !attr[IPSET_ATTR_REVISION] || @@ -961,7 +974,7 @@ static int ip_set_create(struct net *net, struct sock *ctnl, /* Wraparound */ goto cleanup; - list = kcalloc(i, sizeof(struct ip_set *), GFP_KERNEL); + list = kvcalloc(i, sizeof(struct ip_set *), GFP_KERNEL); if (!list) goto cleanup; /* nfnl mutex is held, both lists are valid */ @@ -973,7 +986,7 @@ static int ip_set_create(struct net *net, struct sock *ctnl, /* Use new list */ index = inst->ip_set_max; inst->ip_set_max = i; - kfree(tmp); + kvfree(tmp); ret = 0; } else if (ret) { goto cleanup; @@ -1024,7 +1037,7 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl, ip_set_id_t i; int ret = 0; - if (unlikely(protocol_failed(attr))) + if (unlikely(protocol_min_failed(attr))) return -IPSET_ERR_PROTOCOL; /* Must wait for flush to be really finished in list:set */ @@ -1102,7 +1115,7 @@ static int ip_set_flush(struct net *net, struct sock *ctnl, struct sk_buff *skb, struct ip_set *s; ip_set_id_t i; - if (unlikely(protocol_failed(attr))) + if (unlikely(protocol_min_failed(attr))) return -IPSET_ERR_PROTOCOL; if (!attr[IPSET_ATTR_SETNAME]) { @@ -1144,7 +1157,7 @@ static int ip_set_rename(struct net *net, struct sock *ctnl, ip_set_id_t i; int ret = 0; - if (unlikely(protocol_failed(attr) || + if (unlikely(protocol_min_failed(attr) || !attr[IPSET_ATTR_SETNAME] || !attr[IPSET_ATTR_SETNAME2])) return -IPSET_ERR_PROTOCOL; @@ -1153,7 +1166,7 @@ static int ip_set_rename(struct net *net, struct sock *ctnl, if (!set) return -ENOENT; - read_lock_bh(&ip_set_ref_lock); + write_lock_bh(&ip_set_ref_lock); if (set->ref != 0) { ret = -IPSET_ERR_REFERENCED; goto out; @@ -1170,7 +1183,7 @@ static int ip_set_rename(struct net *net, struct sock *ctnl, strncpy(set->name, name2, IPSET_MAXNAMELEN); out: - read_unlock_bh(&ip_set_ref_lock); + write_unlock_bh(&ip_set_ref_lock); return ret; } @@ -1193,7 +1206,7 @@ static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb, ip_set_id_t from_id, to_id; char from_name[IPSET_MAXNAMELEN]; - if (unlikely(protocol_failed(attr) || + if (unlikely(protocol_min_failed(attr) || !attr[IPSET_ATTR_SETNAME] || !attr[IPSET_ATTR_SETNAME2])) return -IPSET_ERR_PROTOCOL; @@ -1252,7 +1265,7 @@ ip_set_dump_done(struct netlink_callback *cb) struct ip_set_net *inst = (struct ip_set_net *)cb->args[IPSET_CB_NET]; ip_set_id_t index = (ip_set_id_t)cb->args[IPSET_CB_INDEX]; - struct ip_set *set = ip_set(inst, index); + struct ip_set *set = ip_set_ref_netlink(inst, index); if (set->variant->uref) set->variant->uref(set, cb, false); @@ -1288,6 +1301,7 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst) nla_parse(cda, IPSET_ATTR_CMD_MAX, attr, nlh->nlmsg_len - min_len, ip_set_setname_policy, NULL); + cb->args[IPSET_CB_PROTO] = nla_get_u8(cda[IPSET_ATTR_PROTOCOL]); if (cda[IPSET_ATTR_SETNAME]) { struct ip_set *set; @@ -1389,7 +1403,8 @@ dump_last: ret = -EMSGSIZE; goto release_refcount; } - if (nla_put_u8(skb, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) || + if (nla_put_u8(skb, IPSET_ATTR_PROTOCOL, + cb->args[IPSET_CB_PROTO]) || nla_put_string(skb, IPSET_ATTR_SETNAME, set->name)) goto nla_put_failure; if (dump_flags & IPSET_FLAG_LIST_SETNAME) @@ -1404,6 +1419,9 @@ dump_last: nla_put_u8(skb, IPSET_ATTR_REVISION, set->revision)) goto nla_put_failure; + if (cb->args[IPSET_CB_PROTO] > IPSET_PROTOCOL_MIN && + nla_put_net16(skb, IPSET_ATTR_INDEX, htons(index))) + goto nla_put_failure; ret = set->variant->head(set, skb); if (ret < 0) goto release_refcount; @@ -1441,7 +1459,7 @@ next_set: release_refcount: /* If there was an error or set is done, release set */ if (ret || !cb->args[IPSET_CB_ARG0]) { - set = ip_set(inst, index); + set = ip_set_ref_netlink(inst, index); if (set->variant->uref) set->variant->uref(set, cb, false); pr_debug("release set %s\n", set->name); @@ -1463,7 +1481,7 @@ static int ip_set_dump(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlattr * const attr[], struct netlink_ext_ack *extack) { - if (unlikely(protocol_failed(attr))) + if (unlikely(protocol_min_failed(attr))) return -IPSET_ERR_PROTOCOL; { @@ -1557,7 +1575,7 @@ static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb, bool use_lineno; int ret = 0; - if (unlikely(protocol_failed(attr) || + if (unlikely(protocol_min_failed(attr) || !attr[IPSET_ATTR_SETNAME] || !((attr[IPSET_ATTR_DATA] != NULL) ^ (attr[IPSET_ATTR_ADT] != NULL)) || @@ -1612,7 +1630,7 @@ static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb, bool use_lineno; int ret = 0; - if (unlikely(protocol_failed(attr) || + if (unlikely(protocol_min_failed(attr) || !attr[IPSET_ATTR_SETNAME] || !((attr[IPSET_ATTR_DATA] != NULL) ^ (attr[IPSET_ATTR_ADT] != NULL)) || @@ -1664,7 +1682,7 @@ static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb, struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {}; int ret = 0; - if (unlikely(protocol_failed(attr) || + if (unlikely(protocol_min_failed(attr) || !attr[IPSET_ATTR_SETNAME] || !attr[IPSET_ATTR_DATA] || !flag_nested(attr[IPSET_ATTR_DATA]))) @@ -1701,7 +1719,7 @@ static int ip_set_header(struct net *net, struct sock *ctnl, struct nlmsghdr *nlh2; int ret = 0; - if (unlikely(protocol_failed(attr) || + if (unlikely(protocol_min_failed(attr) || !attr[IPSET_ATTR_SETNAME])) return -IPSET_ERR_PROTOCOL; @@ -1717,7 +1735,7 @@ static int ip_set_header(struct net *net, struct sock *ctnl, IPSET_CMD_HEADER); if (!nlh2) goto nlmsg_failure; - if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) || + if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) || nla_put_string(skb2, IPSET_ATTR_SETNAME, set->name) || nla_put_string(skb2, IPSET_ATTR_TYPENAME, set->type->name) || nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) || @@ -1758,7 +1776,7 @@ static int ip_set_type(struct net *net, struct sock *ctnl, struct sk_buff *skb, const char *typename; int ret = 0; - if (unlikely(protocol_failed(attr) || + if (unlikely(protocol_min_failed(attr) || !attr[IPSET_ATTR_TYPENAME] || !attr[IPSET_ATTR_FAMILY])) return -IPSET_ERR_PROTOCOL; @@ -1777,7 +1795,7 @@ static int ip_set_type(struct net *net, struct sock *ctnl, struct sk_buff *skb, IPSET_CMD_TYPE); if (!nlh2) goto nlmsg_failure; - if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) || + if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) || nla_put_string(skb2, IPSET_ATTR_TYPENAME, typename) || nla_put_u8(skb2, IPSET_ATTR_FAMILY, family) || nla_put_u8(skb2, IPSET_ATTR_REVISION, max) || @@ -1828,6 +1846,111 @@ static int ip_set_protocol(struct net *net, struct sock *ctnl, goto nlmsg_failure; if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL)) goto nla_put_failure; + if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL_MIN, IPSET_PROTOCOL_MIN)) + goto nla_put_failure; + nlmsg_end(skb2, nlh2); + + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); + if (ret < 0) + return ret; + + return 0; + +nla_put_failure: + nlmsg_cancel(skb2, nlh2); +nlmsg_failure: + kfree_skb(skb2); + return -EMSGSIZE; +} + +/* Get set by name or index, from userspace */ + +static int ip_set_byname(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const attr[], + struct netlink_ext_ack *extack) +{ + struct ip_set_net *inst = ip_set_pernet(net); + struct sk_buff *skb2; + struct nlmsghdr *nlh2; + ip_set_id_t id = IPSET_INVALID_ID; + const struct ip_set *set; + int ret = 0; + + if (unlikely(protocol_failed(attr) || + !attr[IPSET_ATTR_SETNAME])) + return -IPSET_ERR_PROTOCOL; + + set = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &id); + if (id == IPSET_INVALID_ID) + return -ENOENT; + + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb2) + return -ENOMEM; + + nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, + IPSET_CMD_GET_BYNAME); + if (!nlh2) + goto nlmsg_failure; + if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) || + nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) || + nla_put_net16(skb2, IPSET_ATTR_INDEX, htons(id))) + goto nla_put_failure; + nlmsg_end(skb2, nlh2); + + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); + if (ret < 0) + return ret; + + return 0; + +nla_put_failure: + nlmsg_cancel(skb2, nlh2); +nlmsg_failure: + kfree_skb(skb2); + return -EMSGSIZE; +} + +static const struct nla_policy ip_set_index_policy[IPSET_ATTR_CMD_MAX + 1] = { + [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, + [IPSET_ATTR_INDEX] = { .type = NLA_U16 }, +}; + +static int ip_set_byindex(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const attr[], + struct netlink_ext_ack *extack) +{ + struct ip_set_net *inst = ip_set_pernet(net); + struct sk_buff *skb2; + struct nlmsghdr *nlh2; + ip_set_id_t id = IPSET_INVALID_ID; + const struct ip_set *set; + int ret = 0; + + if (unlikely(protocol_failed(attr) || + !attr[IPSET_ATTR_INDEX])) + return -IPSET_ERR_PROTOCOL; + + id = ip_set_get_h16(attr[IPSET_ATTR_INDEX]); + if (id >= inst->ip_set_max) + return -ENOENT; + set = ip_set(inst, id); + if (set == NULL) + return -ENOENT; + + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb2) + return -ENOMEM; + + nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, + IPSET_CMD_GET_BYINDEX); + if (!nlh2) + goto nlmsg_failure; + if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) || + nla_put_string(skb2, IPSET_ATTR_SETNAME, set->name)) + goto nla_put_failure; nlmsg_end(skb2, nlh2); ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); @@ -1913,6 +2036,16 @@ static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = { .attr_count = IPSET_ATTR_CMD_MAX, .policy = ip_set_protocol_policy, }, + [IPSET_CMD_GET_BYNAME] = { + .call = ip_set_byname, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_setname_policy, + }, + [IPSET_CMD_GET_BYINDEX] = { + .call = ip_set_byindex, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_index_policy, + }, }; static struct nfnetlink_subsystem ip_set_netlink_subsys __read_mostly = { @@ -1958,7 +2091,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) goto done; } - if (req_version->version != IPSET_PROTOCOL) { + if (req_version->version < IPSET_PROTOCOL_MIN) { ret = -EPROTO; goto done; } @@ -2021,9 +2154,11 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) } nfnl_lock(NFNL_SUBSYS_IPSET); set = ip_set(inst, req_get->set.index); - strncpy(req_get->set.name, set ? set->name : "", - IPSET_MAXNAMELEN); + ret = strscpy(req_get->set.name, set ? set->name : "", + IPSET_MAXNAMELEN); nfnl_unlock(NFNL_SUBSYS_IPSET); + if (ret < 0) + goto done; goto copy; } default: @@ -2059,7 +2194,7 @@ ip_set_net_init(struct net *net) if (inst->ip_set_max >= IPSET_INVALID_ID) inst->ip_set_max = IPSET_INVALID_ID - 1; - list = kcalloc(inst->ip_set_max, sizeof(struct ip_set *), GFP_KERNEL); + list = kvcalloc(inst->ip_set_max, sizeof(struct ip_set *), GFP_KERNEL); if (!list) return -ENOMEM; inst->is_deleted = false; @@ -2087,7 +2222,7 @@ ip_set_net_exit(struct net *net) } } nfnl_unlock(NFNL_SUBSYS_IPSET); - kfree(rcu_dereference_protected(inst->ip_set_list, 1)); + kvfree(rcu_dereference_protected(inst->ip_set_list, 1)); } static struct pernet_operations ip_set_net_ops = { diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index e287da68d5fa..2c9609929c71 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -67,7 +67,7 @@ tune_ahash_max(u8 curr, u32 multi) /* A hash bucket */ struct hbucket { - struct rcu_head rcu; /* for call_rcu_bh */ + struct rcu_head rcu; /* for call_rcu */ /* Which positions are used in the array */ DECLARE_BITMAP(used, AHASH_MAX_TUNED); u8 size; /* size of the array */ @@ -664,7 +664,7 @@ retry: spin_unlock_bh(&set->lock); /* Give time to other readers of the set */ - synchronize_rcu_bh(); + synchronize_rcu(); pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name, orig->htable_bits, orig, t->htable_bits, t); diff --git a/net/netfilter/ipset/ip_set_hash_ipmac.c b/net/netfilter/ipset/ip_set_hash_ipmac.c index 1ab5ed2f6839..c830c68142ff 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmac.c +++ b/net/netfilter/ipset/ip_set_hash_ipmac.c @@ -36,9 +36,6 @@ MODULE_ALIAS("ip_set_hash:ip,mac"); /* Type specific function prefix */ #define HTYPE hash_ipmac -/* Zero valued element is not supported */ -static const unsigned char invalid_ether[ETH_ALEN] = { 0 }; - /* IPv4 variant */ /* Member elements */ @@ -103,8 +100,12 @@ hash_ipmac4_kadt(struct ip_set *set, const struct sk_buff *skb, (skb_mac_header(skb) + ETH_HLEN) > skb->data) return -EINVAL; - memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN); - if (ether_addr_equal(e.ether, invalid_ether)) + if (opt->flags & IPSET_DIM_ONE_SRC) + ether_addr_copy(e.ether, eth_hdr(skb)->h_source); + else + ether_addr_copy(e.ether, eth_hdr(skb)->h_dest); + + if (is_zero_ether_addr(e.ether)) return -EINVAL; ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip); @@ -140,7 +141,7 @@ hash_ipmac4_uadt(struct ip_set *set, struct nlattr *tb[], if (ret) return ret; memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN); - if (ether_addr_equal(e.ether, invalid_ether)) + if (is_zero_ether_addr(e.ether)) return -IPSET_ERR_HASH_ELEM; return adtfn(set, &e, &ext, &ext, flags); @@ -211,16 +212,16 @@ hash_ipmac6_kadt(struct ip_set *set, const struct sk_buff *skb, }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); - /* MAC can be src only */ - if (!(opt->flags & IPSET_DIM_TWO_SRC)) - return 0; - if (skb_mac_header(skb) < skb->head || (skb_mac_header(skb) + ETH_HLEN) > skb->data) return -EINVAL; - memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN); - if (ether_addr_equal(e.ether, invalid_ether)) + if (opt->flags & IPSET_DIM_ONE_SRC) + ether_addr_copy(e.ether, eth_hdr(skb)->h_source); + else + ether_addr_copy(e.ether, eth_hdr(skb)->h_dest); + + if (is_zero_ether_addr(e.ether)) return -EINVAL; ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6); @@ -260,7 +261,7 @@ hash_ipmac6_uadt(struct ip_set *set, struct nlattr *tb[], return ret; memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN); - if (ether_addr_equal(e.ether, invalid_ether)) + if (is_zero_ether_addr(e.ether)) return -IPSET_ERR_HASH_ELEM; return adtfn(set, &e, &ext, &ext, flags); diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c index f9d5a2a1e3d0..4fe5f243d0a3 100644 --- a/net/netfilter/ipset/ip_set_hash_mac.c +++ b/net/netfilter/ipset/ip_set_hash_mac.c @@ -81,15 +81,15 @@ hash_mac4_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_mac4_elem e = { { .foo[0] = 0, .foo[1] = 0 } }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); - /* MAC can be src only */ - if (!(opt->flags & IPSET_DIM_ONE_SRC)) - return 0; - if (skb_mac_header(skb) < skb->head || (skb_mac_header(skb) + ETH_HLEN) > skb->data) return -EINVAL; - ether_addr_copy(e.ether, eth_hdr(skb)->h_source); + if (opt->flags & IPSET_DIM_ONE_SRC) + ether_addr_copy(e.ether, eth_hdr(skb)->h_source); + else + ether_addr_copy(e.ether, eth_hdr(skb)->h_dest); + if (is_zero_ether_addr(e.ether)) return -EINVAL; return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index d391485a6acd..613e18e720a4 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -213,13 +213,13 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_CIDR]) { e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]); - if (!e.cidr[0] || e.cidr[0] > HOST_MASK) + if (e.cidr[0] > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; } if (tb[IPSET_ATTR_CIDR2]) { e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]); - if (!e.cidr[1] || e.cidr[1] > HOST_MASK) + if (e.cidr[1] > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; } @@ -493,13 +493,13 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_CIDR]) { e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]); - if (!e.cidr[0] || e.cidr[0] > HOST_MASK) + if (e.cidr[0] > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; } if (tb[IPSET_ATTR_CIDR2]) { e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]); - if (!e.cidr[1] || e.cidr[1] > HOST_MASK) + if (e.cidr[1] > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; } diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 072a658fde04..8da228da53ae 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -148,9 +148,7 @@ __list_set_del_rcu(struct rcu_head * rcu) { struct set_elem *e = container_of(rcu, struct set_elem, rcu); struct ip_set *set = e->set; - struct list_set *map = set->data; - ip_set_put_byindex(map->net, e->id); ip_set_ext_destroy(set, e); kfree(e); } @@ -158,15 +156,21 @@ __list_set_del_rcu(struct rcu_head * rcu) static inline void list_set_del(struct ip_set *set, struct set_elem *e) { + struct list_set *map = set->data; + set->elements--; list_del_rcu(&e->list); + ip_set_put_byindex(map->net, e->id); call_rcu(&e->rcu, __list_set_del_rcu); } static inline void -list_set_replace(struct set_elem *e, struct set_elem *old) +list_set_replace(struct ip_set *set, struct set_elem *e, struct set_elem *old) { + struct list_set *map = set->data; + list_replace_rcu(&old->list, &e->list); + ip_set_put_byindex(map->net, old->id); call_rcu(&old->rcu, __list_set_del_rcu); } @@ -298,7 +302,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, INIT_LIST_HEAD(&e->list); list_set_init_extensions(set, ext, e); if (n) - list_set_replace(e, n); + list_set_replace(set, e, n); else if (next) list_add_tail_rcu(&e->list, &next->list); else if (prev) @@ -486,6 +490,7 @@ list_set_list(const struct ip_set *set, const struct list_set *map = set->data; struct nlattr *atd, *nested; u32 i = 0, first = cb->args[IPSET_CB_ARG0]; + char name[IPSET_MAXNAMELEN]; struct set_elem *e; int ret = 0; @@ -504,8 +509,8 @@ list_set_list(const struct ip_set *set, nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) goto nla_put_failure; - if (nla_put_string(skb, IPSET_ATTR_NAME, - ip_set_name_byindex(map->net, e->id))) + ip_set_name_byindex(map->net, e->id, name); + if (nla_put_string(skb, IPSET_ATTR_NAME, name)) goto nla_put_failure; if (ip_set_put_extensions(skb, set, e, true)) goto nla_put_failure; @@ -526,8 +531,8 @@ nla_put_failure: ret = -EMSGSIZE; } else { cb->args[IPSET_CB_ARG0] = i; + ipset_nest_end(skb, atd); } - ipset_nest_end(skb, atd); out: rcu_read_unlock(); return ret; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 83395bf6dc35..432141f04af3 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3980,6 +3980,9 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) static struct notifier_block ip_vs_dst_notifier = { .notifier_call = ip_vs_dst_event, +#ifdef CONFIG_IP_VS_IPV6 + .priority = ADDRCONF_NOTIFY_PRIORITY + 5, +#endif }; int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index 02ca7df793f5..9cd180bda092 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -49,6 +49,7 @@ struct nf_conncount_tuple { struct nf_conntrack_zone zone; int cpu; u32 jiffies32; + bool dead; struct rcu_head rcu_head; }; @@ -106,15 +107,16 @@ nf_conncount_add(struct nf_conncount_list *list, conn->zone = *zone; conn->cpu = raw_smp_processor_id(); conn->jiffies32 = (u32)jiffies; - spin_lock(&list->list_lock); + conn->dead = false; + spin_lock_bh(&list->list_lock); if (list->dead == true) { kmem_cache_free(conncount_conn_cachep, conn); - spin_unlock(&list->list_lock); + spin_unlock_bh(&list->list_lock); return NF_CONNCOUNT_SKIP; } list_add_tail(&conn->node, &list->head); list->count++; - spin_unlock(&list->list_lock); + spin_unlock_bh(&list->list_lock); return NF_CONNCOUNT_ADDED; } EXPORT_SYMBOL_GPL(nf_conncount_add); @@ -132,19 +134,22 @@ static bool conn_free(struct nf_conncount_list *list, { bool free_entry = false; - spin_lock(&list->list_lock); + spin_lock_bh(&list->list_lock); - if (list->count == 0) { - spin_unlock(&list->list_lock); - return free_entry; + if (conn->dead) { + spin_unlock_bh(&list->list_lock); + return free_entry; } list->count--; + conn->dead = true; list_del_rcu(&conn->node); - if (list->count == 0) + if (list->count == 0) { + list->dead = true; free_entry = true; + } - spin_unlock(&list->list_lock); + spin_unlock_bh(&list->list_lock); call_rcu(&conn->rcu_head, __conn_free); return free_entry; } @@ -245,7 +250,7 @@ void nf_conncount_list_init(struct nf_conncount_list *list) { spin_lock_init(&list->list_lock); INIT_LIST_HEAD(&list->head); - list->count = 1; + list->count = 0; list->dead = false; } EXPORT_SYMBOL_GPL(nf_conncount_list_init); @@ -259,6 +264,7 @@ bool nf_conncount_gc_list(struct net *net, struct nf_conn *found_ct; unsigned int collected = 0; bool free_entry = false; + bool ret = false; list_for_each_entry_safe(conn, conn_n, &list->head, node) { found = find_or_evict(net, list, conn, &free_entry); @@ -288,7 +294,15 @@ bool nf_conncount_gc_list(struct net *net, if (collected > CONNCOUNT_GC_MAX_NODES) return false; } - return false; + + spin_lock_bh(&list->list_lock); + if (!list->count) { + list->dead = true; + ret = true; + } + spin_unlock_bh(&list->list_lock); + + return ret; } EXPORT_SYMBOL_GPL(nf_conncount_gc_list); @@ -309,11 +323,8 @@ static void tree_nodes_free(struct rb_root *root, while (gc_count) { rbconn = gc_nodes[--gc_count]; spin_lock(&rbconn->list.list_lock); - if (rbconn->list.count == 0 && rbconn->list.dead == false) { - rbconn->list.dead = true; - rb_erase(&rbconn->node, root); - call_rcu(&rbconn->rcu_head, __tree_nodes_free); - } + rb_erase(&rbconn->node, root); + call_rcu(&rbconn->rcu_head, __tree_nodes_free); spin_unlock(&rbconn->list.list_lock); } } @@ -414,8 +425,9 @@ insert_tree(struct net *net, nf_conncount_list_init(&rbconn->list); list_add(&conn->node, &rbconn->list.head); count = 1; + rbconn->list.count = count; - rb_link_node(&rbconn->node, parent, rbnode); + rb_link_node_rcu(&rbconn->node, parent, rbnode); rb_insert_color(&rbconn->node, root); out_unlock: spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]); diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c index 1d66de5151b2..49e523cc49d0 100644 --- a/net/netfilter/nf_conntrack_acct.c +++ b/net/netfilter/nf_conntrack_acct.c @@ -25,102 +25,15 @@ static bool nf_ct_acct __read_mostly; module_param_named(acct, nf_ct_acct, bool, 0644); MODULE_PARM_DESC(acct, "Enable connection tracking flow accounting."); -#ifdef CONFIG_SYSCTL -static struct ctl_table acct_sysctl_table[] = { - { - .procname = "nf_conntrack_acct", - .data = &init_net.ct.sysctl_acct, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - {} -}; -#endif /* CONFIG_SYSCTL */ - -unsigned int -seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir) -{ - struct nf_conn_acct *acct; - struct nf_conn_counter *counter; - - acct = nf_conn_acct_find(ct); - if (!acct) - return 0; - - counter = acct->counter; - seq_printf(s, "packets=%llu bytes=%llu ", - (unsigned long long)atomic64_read(&counter[dir].packets), - (unsigned long long)atomic64_read(&counter[dir].bytes)); - - return 0; -}; -EXPORT_SYMBOL_GPL(seq_print_acct); - static const struct nf_ct_ext_type acct_extend = { .len = sizeof(struct nf_conn_acct), .align = __alignof__(struct nf_conn_acct), .id = NF_CT_EXT_ACCT, }; -#ifdef CONFIG_SYSCTL -static int nf_conntrack_acct_init_sysctl(struct net *net) -{ - struct ctl_table *table; - - table = kmemdup(acct_sysctl_table, sizeof(acct_sysctl_table), - GFP_KERNEL); - if (!table) - goto out; - - table[0].data = &net->ct.sysctl_acct; - - /* Don't export sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) - table[0].procname = NULL; - - net->ct.acct_sysctl_header = register_net_sysctl(net, "net/netfilter", - table); - if (!net->ct.acct_sysctl_header) { - pr_err("can't register to sysctl\n"); - goto out_register; - } - return 0; - -out_register: - kfree(table); -out: - return -ENOMEM; -} - -static void nf_conntrack_acct_fini_sysctl(struct net *net) -{ - struct ctl_table *table; - - table = net->ct.acct_sysctl_header->ctl_table_arg; - unregister_net_sysctl_table(net->ct.acct_sysctl_header); - kfree(table); -} -#else -static int nf_conntrack_acct_init_sysctl(struct net *net) -{ - return 0; -} - -static void nf_conntrack_acct_fini_sysctl(struct net *net) -{ -} -#endif - -int nf_conntrack_acct_pernet_init(struct net *net) +void nf_conntrack_acct_pernet_init(struct net *net) { net->ct.sysctl_acct = nf_ct_acct; - return nf_conntrack_acct_init_sysctl(net); -} - -void nf_conntrack_acct_pernet_fini(struct net *net) -{ - nf_conntrack_acct_fini_sysctl(net); } int nf_conntrack_acct_init(void) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index ca1168d67fac..e87c21e47efe 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1073,19 +1073,22 @@ static unsigned int early_drop_list(struct net *net, return drops; } -static noinline int early_drop(struct net *net, unsigned int _hash) +static noinline int early_drop(struct net *net, unsigned int hash) { - unsigned int i; + unsigned int i, bucket; for (i = 0; i < NF_CT_EVICTION_RANGE; i++) { struct hlist_nulls_head *ct_hash; - unsigned int hash, hsize, drops; + unsigned int hsize, drops; rcu_read_lock(); nf_conntrack_get_ht(&ct_hash, &hsize); - hash = reciprocal_scale(_hash++, hsize); + if (!i) + bucket = reciprocal_scale(hash, hsize); + else + bucket = (bucket + 1) % hsize; - drops = early_drop_list(net, &ct_hash[hash]); + drops = early_drop_list(net, &ct_hash[bucket]); rcu_read_unlock(); if (drops) { @@ -2107,10 +2110,7 @@ i_see_dead_people: list_for_each_entry(net, net_exit_list, exit_list) { nf_conntrack_proto_pernet_fini(net); - nf_conntrack_helper_pernet_fini(net); nf_conntrack_ecache_pernet_fini(net); - nf_conntrack_tstamp_pernet_fini(net); - nf_conntrack_acct_pernet_fini(net); nf_conntrack_expect_pernet_fini(net); free_percpu(net->ct.stat); free_percpu(net->ct.pcpu_lists); @@ -2407,32 +2407,19 @@ int nf_conntrack_init_net(struct net *net) ret = nf_conntrack_expect_pernet_init(net); if (ret < 0) goto err_expect; - ret = nf_conntrack_acct_pernet_init(net); - if (ret < 0) - goto err_acct; - ret = nf_conntrack_tstamp_pernet_init(net); - if (ret < 0) - goto err_tstamp; - ret = nf_conntrack_ecache_pernet_init(net); - if (ret < 0) - goto err_ecache; - ret = nf_conntrack_helper_pernet_init(net); - if (ret < 0) - goto err_helper; + + nf_conntrack_acct_pernet_init(net); + nf_conntrack_tstamp_pernet_init(net); + nf_conntrack_ecache_pernet_init(net); + nf_conntrack_helper_pernet_init(net); + ret = nf_conntrack_proto_pernet_init(net); if (ret < 0) goto err_proto; return 0; err_proto: - nf_conntrack_helper_pernet_fini(net); -err_helper: nf_conntrack_ecache_pernet_fini(net); -err_ecache: - nf_conntrack_tstamp_pernet_fini(net); -err_tstamp: - nf_conntrack_acct_pernet_fini(net); -err_acct: nf_conntrack_expect_pernet_fini(net); err_expect: free_percpu(net->ct.stat); diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index c11822a7d2bf..3d042f8ff183 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -336,85 +336,21 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); #define NF_CT_EVENTS_DEFAULT 1 static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT; -#ifdef CONFIG_SYSCTL -static struct ctl_table event_sysctl_table[] = { - { - .procname = "nf_conntrack_events", - .data = &init_net.ct.sysctl_events, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - {} -}; -#endif /* CONFIG_SYSCTL */ - static const struct nf_ct_ext_type event_extend = { .len = sizeof(struct nf_conntrack_ecache), .align = __alignof__(struct nf_conntrack_ecache), .id = NF_CT_EXT_ECACHE, }; -#ifdef CONFIG_SYSCTL -static int nf_conntrack_event_init_sysctl(struct net *net) -{ - struct ctl_table *table; - - table = kmemdup(event_sysctl_table, sizeof(event_sysctl_table), - GFP_KERNEL); - if (!table) - goto out; - - table[0].data = &net->ct.sysctl_events; - - /* Don't export sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) - table[0].procname = NULL; - - net->ct.event_sysctl_header = - register_net_sysctl(net, "net/netfilter", table); - if (!net->ct.event_sysctl_header) { - pr_err("can't register to sysctl\n"); - goto out_register; - } - return 0; - -out_register: - kfree(table); -out: - return -ENOMEM; -} - -static void nf_conntrack_event_fini_sysctl(struct net *net) -{ - struct ctl_table *table; - - table = net->ct.event_sysctl_header->ctl_table_arg; - unregister_net_sysctl_table(net->ct.event_sysctl_header); - kfree(table); -} -#else -static int nf_conntrack_event_init_sysctl(struct net *net) -{ - return 0; -} - -static void nf_conntrack_event_fini_sysctl(struct net *net) -{ -} -#endif /* CONFIG_SYSCTL */ - -int nf_conntrack_ecache_pernet_init(struct net *net) +void nf_conntrack_ecache_pernet_init(struct net *net) { net->ct.sysctl_events = nf_ct_events; INIT_DELAYED_WORK(&net->ct.ecache_dwork, ecache_work); - return nf_conntrack_event_init_sysctl(net); } void nf_conntrack_ecache_pernet_fini(struct net *net) { cancel_delayed_work_sync(&net->ct.ecache_dwork); - nf_conntrack_event_fini_sysctl(net); } int nf_conntrack_ecache_init(void) diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index e24b762ffa1d..274baf1dab87 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -42,67 +42,6 @@ module_param_named(nf_conntrack_helper, nf_ct_auto_assign_helper, bool, 0644); MODULE_PARM_DESC(nf_conntrack_helper, "Enable automatic conntrack helper assignment (default 0)"); -#ifdef CONFIG_SYSCTL -static struct ctl_table helper_sysctl_table[] = { - { - .procname = "nf_conntrack_helper", - .data = &init_net.ct.sysctl_auto_assign_helper, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - {} -}; - -static int nf_conntrack_helper_init_sysctl(struct net *net) -{ - struct ctl_table *table; - - table = kmemdup(helper_sysctl_table, sizeof(helper_sysctl_table), - GFP_KERNEL); - if (!table) - goto out; - - table[0].data = &net->ct.sysctl_auto_assign_helper; - - /* Don't export sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) - table[0].procname = NULL; - - net->ct.helper_sysctl_header = - register_net_sysctl(net, "net/netfilter", table); - - if (!net->ct.helper_sysctl_header) { - pr_err("nf_conntrack_helper: can't register to sysctl.\n"); - goto out_register; - } - return 0; - -out_register: - kfree(table); -out: - return -ENOMEM; -} - -static void nf_conntrack_helper_fini_sysctl(struct net *net) -{ - struct ctl_table *table; - - table = net->ct.helper_sysctl_header->ctl_table_arg; - unregister_net_sysctl_table(net->ct.helper_sysctl_header); - kfree(table); -} -#else -static int nf_conntrack_helper_init_sysctl(struct net *net) -{ - return 0; -} - -static void nf_conntrack_helper_fini_sysctl(struct net *net) -{ -} -#endif /* CONFIG_SYSCTL */ - /* Stupid hash, but collision free for the default registrations of the * helpers currently in the kernel. */ static unsigned int helper_hash(const struct nf_conntrack_tuple *tuple) @@ -533,16 +472,10 @@ static const struct nf_ct_ext_type helper_extend = { .id = NF_CT_EXT_HELPER, }; -int nf_conntrack_helper_pernet_init(struct net *net) +void nf_conntrack_helper_pernet_init(struct net *net) { net->ct.auto_assign_helper_warned = false; net->ct.sysctl_auto_assign_helper = nf_ct_auto_assign_helper; - return nf_conntrack_helper_init_sysctl(net); -} - -void nf_conntrack_helper_pernet_fini(struct net *net) -{ - nf_conntrack_helper_fini_sysctl(net); } int nf_conntrack_helper_init(void) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 4ae8e528943a..1213beb5a714 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -47,7 +47,6 @@ #include <net/netfilter/nf_conntrack_synproxy.h> #ifdef CONFIG_NF_NAT_NEEDED #include <net/netfilter/nf_nat_core.h> -#include <net/netfilter/nf_nat_l4proto.h> #include <net/netfilter/nf_nat_helper.h> #endif @@ -1688,6 +1687,22 @@ static int ctnetlink_change_timeout(struct nf_conn *ct, return 0; } +#if defined(CONFIG_NF_CONNTRACK_MARK) +static void ctnetlink_change_mark(struct nf_conn *ct, + const struct nlattr * const cda[]) +{ + u32 mark, newmark, mask = 0; + + if (cda[CTA_MARK_MASK]) + mask = ~ntohl(nla_get_be32(cda[CTA_MARK_MASK])); + + mark = ntohl(nla_get_be32(cda[CTA_MARK])); + newmark = (ct->mark & mask) ^ mark; + if (newmark != ct->mark) + ct->mark = newmark; +} +#endif + static const struct nla_policy protoinfo_policy[CTA_PROTOINFO_MAX+1] = { [CTA_PROTOINFO_TCP] = { .type = NLA_NESTED }, [CTA_PROTOINFO_DCCP] = { .type = NLA_NESTED }, @@ -1883,7 +1898,7 @@ ctnetlink_change_conntrack(struct nf_conn *ct, #if defined(CONFIG_NF_CONNTRACK_MARK) if (cda[CTA_MARK]) - ct->mark = ntohl(nla_get_be32(cda[CTA_MARK])); + ctnetlink_change_mark(ct, cda); #endif if (cda[CTA_SEQ_ADJ_ORIG] || cda[CTA_SEQ_ADJ_REPLY]) { @@ -2027,7 +2042,7 @@ ctnetlink_create_conntrack(struct net *net, #if defined(CONFIG_NF_CONNTRACK_MARK) if (cda[CTA_MARK]) - ct->mark = ntohl(nla_get_be32(cda[CTA_MARK])); + ctnetlink_change_mark(ct, cda); #endif /* setup master conntrack: this is a confirmed expectation */ @@ -2524,14 +2539,7 @@ ctnetlink_glue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct) } #if defined(CONFIG_NF_CONNTRACK_MARK) if (cda[CTA_MARK]) { - u32 mask = 0, mark, newmark; - if (cda[CTA_MARK_MASK]) - mask = ~ntohl(nla_get_be32(cda[CTA_MARK_MASK])); - - mark = ntohl(nla_get_be32(cda[CTA_MARK])); - newmark = (ct->mark & mask) ^ mark; - if (newmark != ct->mark) - ct->mark = newmark; + ctnetlink_change_mark(ct, cda); } #endif return 0; diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 40643af7137e..859f5d07a915 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -175,8 +175,7 @@ static struct nf_proto_net *nf_ct_l4proto_net(struct net *net, static int nf_ct_l4proto_register_sysctl(struct net *net, - struct nf_proto_net *pn, - const struct nf_conntrack_l4proto *l4proto) + struct nf_proto_net *pn) { int err = 0; @@ -198,9 +197,7 @@ int nf_ct_l4proto_register_sysctl(struct net *net, } static -void nf_ct_l4proto_unregister_sysctl(struct net *net, - struct nf_proto_net *pn, - const struct nf_conntrack_l4proto *l4proto) +void nf_ct_l4proto_unregister_sysctl(struct nf_proto_net *pn) { #ifdef CONFIG_SYSCTL if (pn->ctl_table_header != NULL) @@ -252,7 +249,7 @@ int nf_ct_l4proto_pernet_register_one(struct net *net, if (pn == NULL) goto out; - ret = nf_ct_l4proto_register_sysctl(net, pn, l4proto); + ret = nf_ct_l4proto_register_sysctl(net, pn); if (ret < 0) goto out; @@ -296,7 +293,7 @@ void nf_ct_l4proto_pernet_unregister_one(struct net *net, return; pn->users--; - nf_ct_l4proto_unregister_sysctl(net, pn, l4proto); + nf_ct_l4proto_unregister_sysctl(pn); } EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister_one); @@ -946,16 +943,14 @@ int nf_conntrack_proto_pernet_init(struct net *net) if (err < 0) return err; err = nf_ct_l4proto_register_sysctl(net, - pn, - &nf_conntrack_l4proto_generic); + pn); if (err < 0) return err; err = nf_ct_l4proto_pernet_register(net, builtin_l4proto, ARRAY_SIZE(builtin_l4proto)); if (err < 0) { - nf_ct_l4proto_unregister_sysctl(net, pn, - &nf_conntrack_l4proto_generic); + nf_ct_l4proto_unregister_sysctl(pn); return err; } @@ -971,9 +966,7 @@ void nf_conntrack_proto_pernet_fini(struct net *net) nf_ct_l4proto_pernet_unregister(net, builtin_l4proto, ARRAY_SIZE(builtin_l4proto)); pn->users--; - nf_ct_l4proto_unregister_sysctl(net, - pn, - &nf_conntrack_l4proto_generic); + nf_ct_l4proto_unregister_sysctl(pn); } diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 171e9e122e5f..023c1445bc39 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -384,11 +384,6 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] = }, }; -static inline struct nf_dccp_net *dccp_pernet(struct net *net) -{ - return &net->ct.nf_ct_proto.dccp; -} - static noinline bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, const struct dccp_hdr *dh) @@ -401,7 +396,7 @@ dccp_new(struct nf_conn *ct, const struct sk_buff *skb, state = dccp_state_table[CT_DCCP_ROLE_CLIENT][dh->dccph_type][CT_DCCP_NONE]; switch (state) { default: - dn = dccp_pernet(net); + dn = nf_dccp_pernet(net); if (dn->dccp_loose == 0) { msg = "not picking up existing connection "; goto out_invalid; @@ -568,7 +563,7 @@ static int dccp_packet(struct nf_conn *ct, struct sk_buff *skb, timeouts = nf_ct_timeout_lookup(ct); if (!timeouts) - timeouts = dccp_pernet(nf_ct_net(ct))->dccp_timeout; + timeouts = nf_dccp_pernet(nf_ct_net(ct))->dccp_timeout; nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]); return NF_ACCEPT; @@ -681,7 +676,7 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) static int dccp_timeout_nlattr_to_obj(struct nlattr *tb[], struct net *net, void *data) { - struct nf_dccp_net *dn = dccp_pernet(net); + struct nf_dccp_net *dn = nf_dccp_pernet(net); unsigned int *timeouts = data; int i; @@ -814,7 +809,7 @@ static int dccp_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *pn, static int dccp_init_net(struct net *net) { - struct nf_dccp_net *dn = dccp_pernet(net); + struct nf_dccp_net *dn = nf_dccp_pernet(net); struct nf_proto_net *pn = &dn->pn; if (!pn->users) { diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index e10e867e0b55..5da19d5fbc76 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -27,11 +27,6 @@ static bool nf_generic_should_process(u8 proto) } } -static inline struct nf_generic_net *generic_pernet(struct net *net) -{ - return &net->ct.nf_ct_proto.generic; -} - static bool generic_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct net *net, struct nf_conntrack_tuple *tuple) @@ -58,7 +53,7 @@ static int generic_packet(struct nf_conn *ct, } if (!timeout) - timeout = &generic_pernet(nf_ct_net(ct))->timeout; + timeout = &nf_generic_pernet(nf_ct_net(ct))->timeout; nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); return NF_ACCEPT; @@ -72,7 +67,7 @@ static int generic_packet(struct nf_conn *ct, static int generic_timeout_nlattr_to_obj(struct nlattr *tb[], struct net *net, void *data) { - struct nf_generic_net *gn = generic_pernet(net); + struct nf_generic_net *gn = nf_generic_pernet(net); unsigned int *timeout = data; if (!timeout) @@ -138,7 +133,7 @@ static int generic_kmemdup_sysctl_table(struct nf_proto_net *pn, static int generic_init_net(struct net *net) { - struct nf_generic_net *gn = generic_pernet(net); + struct nf_generic_net *gn = nf_generic_pernet(net); struct nf_proto_net *pn = &gn->pn; gn->timeout = nf_ct_generic_timeout; diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 9b48dc8b4b88..8899b51aad44 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -43,24 +43,12 @@ #include <linux/netfilter/nf_conntrack_proto_gre.h> #include <linux/netfilter/nf_conntrack_pptp.h> -enum grep_conntrack { - GRE_CT_UNREPLIED, - GRE_CT_REPLIED, - GRE_CT_MAX -}; - static const unsigned int gre_timeouts[GRE_CT_MAX] = { [GRE_CT_UNREPLIED] = 30*HZ, [GRE_CT_REPLIED] = 180*HZ, }; static unsigned int proto_gre_net_id __read_mostly; -struct netns_proto_gre { - struct nf_proto_net nf; - rwlock_t keymap_lock; - struct list_head keymap_list; - unsigned int gre_timeouts[GRE_CT_MAX]; -}; static inline struct netns_proto_gre *gre_pernet(struct net *net) { @@ -332,9 +320,49 @@ gre_timeout_nla_policy[CTA_TIMEOUT_GRE_MAX+1] = { }; #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ +#ifdef CONFIG_SYSCTL +static struct ctl_table gre_sysctl_table[] = { + { + .procname = "nf_conntrack_gre_timeout", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { + .procname = "nf_conntrack_gre_timeout_stream", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + {} +}; +#endif + +static int gre_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *nf, + struct netns_proto_gre *net_gre) +{ +#ifdef CONFIG_SYSCTL + int i; + + if (nf->ctl_table) + return 0; + + nf->ctl_table = kmemdup(gre_sysctl_table, + sizeof(gre_sysctl_table), + GFP_KERNEL); + if (!nf->ctl_table) + return -ENOMEM; + + for (i = 0; i < GRE_CT_MAX; i++) + nf->ctl_table[i].data = &net_gre->gre_timeouts[i]; +#endif + return 0; +} + static int gre_init_net(struct net *net) { struct netns_proto_gre *net_gre = gre_pernet(net); + struct nf_proto_net *nf = &net_gre->nf; int i; rwlock_init(&net_gre->keymap_lock); @@ -342,7 +370,7 @@ static int gre_init_net(struct net *net) for (i = 0; i < GRE_CT_MAX; i++) net_gre->gre_timeouts[i] = gre_timeouts[i]; - return 0; + return gre_kmemdup_sysctl_table(net, nf, net_gre); } /* protocol helper struct */ @@ -402,6 +430,8 @@ static int __init nf_ct_proto_gre_init(void) { int ret; + BUILD_BUG_ON(offsetof(struct netns_proto_gre, nf) != 0); + ret = register_pernet_subsys(&proto_gre_net_ops); if (ret < 0) goto out_pernet; diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c index 3598520bd19b..de64d8a5fdfd 100644 --- a/net/netfilter/nf_conntrack_proto_icmp.c +++ b/net/netfilter/nf_conntrack_proto_icmp.c @@ -25,11 +25,6 @@ static const unsigned int nf_ct_icmp_timeout = 30*HZ; -static inline struct nf_icmp_net *icmp_pernet(struct net *net) -{ - return &net->ct.nf_ct_proto.icmp; -} - static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct net *net, struct nf_conntrack_tuple *tuple) { @@ -103,7 +98,7 @@ static int icmp_packet(struct nf_conn *ct, } if (!timeout) - timeout = &icmp_pernet(nf_ct_net(ct))->timeout; + timeout = &nf_icmp_pernet(nf_ct_net(ct))->timeout; nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); return NF_ACCEPT; @@ -275,7 +270,7 @@ static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[], struct net *net, void *data) { unsigned int *timeout = data; - struct nf_icmp_net *in = icmp_pernet(net); + struct nf_icmp_net *in = nf_icmp_pernet(net); if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) { if (!timeout) @@ -337,7 +332,7 @@ static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn, static int icmp_init_net(struct net *net) { - struct nf_icmp_net *in = icmp_pernet(net); + struct nf_icmp_net *in = nf_icmp_pernet(net); struct nf_proto_net *pn = &in->pn; in->timeout = nf_ct_icmp_timeout; diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c index 378618feed5d..a15eefb8e317 100644 --- a/net/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/netfilter/nf_conntrack_proto_icmpv6.c @@ -30,11 +30,6 @@ static const unsigned int nf_ct_icmpv6_timeout = 30*HZ; -static inline struct nf_icmp_net *icmpv6_pernet(struct net *net) -{ - return &net->ct.nf_ct_proto.icmpv6; -} - static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct net *net, @@ -87,7 +82,7 @@ static bool icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple, static unsigned int *icmpv6_get_timeouts(struct net *net) { - return &icmpv6_pernet(net)->timeout; + return &nf_icmpv6_pernet(net)->timeout; } /* Returns verdict for packet, or -1 for invalid. */ @@ -286,7 +281,7 @@ static int icmpv6_timeout_nlattr_to_obj(struct nlattr *tb[], struct net *net, void *data) { unsigned int *timeout = data; - struct nf_icmp_net *in = icmpv6_pernet(net); + struct nf_icmp_net *in = nf_icmpv6_pernet(net); if (!timeout) timeout = icmpv6_get_timeouts(net); @@ -348,7 +343,7 @@ static int icmpv6_kmemdup_sysctl_table(struct nf_proto_net *pn, static int icmpv6_init_net(struct net *net) { - struct nf_icmp_net *in = icmpv6_pernet(net); + struct nf_icmp_net *in = nf_icmpv6_pernet(net); struct nf_proto_net *pn = &in->pn; in->timeout = nf_ct_icmpv6_timeout; diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 3d719d3eb9a3..d53e3e78f605 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -146,11 +146,6 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = { } }; -static inline struct nf_sctp_net *sctp_pernet(struct net *net) -{ - return &net->ct.nf_ct_proto.sctp; -} - #ifdef CONFIG_NF_CONNTRACK_PROCFS /* Print out the private part of the conntrack. */ static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct) @@ -480,7 +475,7 @@ static int sctp_packet(struct nf_conn *ct, timeouts = nf_ct_timeout_lookup(ct); if (!timeouts) - timeouts = sctp_pernet(nf_ct_net(ct))->timeouts; + timeouts = nf_sctp_pernet(nf_ct_net(ct))->timeouts; nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]); @@ -599,7 +594,7 @@ static int sctp_timeout_nlattr_to_obj(struct nlattr *tb[], struct net *net, void *data) { unsigned int *timeouts = data; - struct nf_sctp_net *sn = sctp_pernet(net); + struct nf_sctp_net *sn = nf_sctp_pernet(net); int i; /* set default SCTP timeouts. */ @@ -736,7 +731,7 @@ static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn, static int sctp_init_net(struct net *net) { - struct nf_sctp_net *sn = sctp_pernet(net); + struct nf_sctp_net *sn = nf_sctp_pernet(net); struct nf_proto_net *pn = &sn->pn; if (!pn->users) { diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 1bcf9984d45e..4dcbd51a8e97 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -272,11 +272,6 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { } }; -static inline struct nf_tcp_net *tcp_pernet(struct net *net) -{ - return &net->ct.nf_ct_proto.tcp; -} - #ifdef CONFIG_NF_CONNTRACK_PROCFS /* Print out the private part of the conntrack. */ static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct) @@ -475,7 +470,7 @@ static bool tcp_in_window(const struct nf_conn *ct, const struct tcphdr *tcph) { struct net *net = nf_ct_net(ct); - struct nf_tcp_net *tn = tcp_pernet(net); + struct nf_tcp_net *tn = nf_tcp_pernet(net); struct ip_ct_tcp_state *sender = &state->seen[dir]; struct ip_ct_tcp_state *receiver = &state->seen[!dir]; const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; @@ -767,7 +762,7 @@ static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, { enum tcp_conntrack new_state; struct net *net = nf_ct_net(ct); - const struct nf_tcp_net *tn = tcp_pernet(net); + const struct nf_tcp_net *tn = nf_tcp_pernet(net); const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0]; const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1]; @@ -841,7 +836,7 @@ static int tcp_packet(struct nf_conn *ct, const struct nf_hook_state *state) { struct net *net = nf_ct_net(ct); - struct nf_tcp_net *tn = tcp_pernet(net); + struct nf_tcp_net *tn = nf_tcp_pernet(net); struct nf_conntrack_tuple *tuple; enum tcp_conntrack new_state, old_state; unsigned int index, *timeouts; @@ -1283,7 +1278,7 @@ static unsigned int tcp_nlattr_tuple_size(void) static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[], struct net *net, void *data) { - struct nf_tcp_net *tn = tcp_pernet(net); + struct nf_tcp_net *tn = nf_tcp_pernet(net); unsigned int *timeouts = data; int i; @@ -1508,7 +1503,7 @@ static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn, static int tcp_init_net(struct net *net) { - struct nf_tcp_net *tn = tcp_pernet(net); + struct nf_tcp_net *tn = nf_tcp_pernet(net); struct nf_proto_net *pn = &tn->pn; if (!pn->users) { diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index a7aa70370913..b4f5d5e82031 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -29,17 +29,12 @@ static const unsigned int udp_timeouts[UDP_CT_MAX] = { [UDP_CT_UNREPLIED] = 30*HZ, - [UDP_CT_REPLIED] = 180*HZ, + [UDP_CT_REPLIED] = 120*HZ, }; -static inline struct nf_udp_net *udp_pernet(struct net *net) -{ - return &net->ct.nf_ct_proto.udp; -} - static unsigned int *udp_get_timeouts(struct net *net) { - return udp_pernet(net)->timeouts; + return nf_udp_pernet(net)->timeouts; } static void udp_error_log(const struct sk_buff *skb, @@ -105,11 +100,21 @@ static int udp_packet(struct nf_conn *ct, if (!timeouts) timeouts = udp_get_timeouts(nf_ct_net(ct)); + if (!nf_ct_is_confirmed(ct)) + ct->proto.udp.stream_ts = 2 * HZ + jiffies; + /* If we've seen traffic both ways, this is some kind of UDP - stream. Extend timeout. */ + * stream. Set Assured. + */ if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { - nf_ct_refresh_acct(ct, ctinfo, skb, - timeouts[UDP_CT_REPLIED]); + unsigned long extra = timeouts[UDP_CT_UNREPLIED]; + + /* Still active after two seconds? Extend timeout. */ + if (time_after(jiffies, ct->proto.udp.stream_ts)) + extra = timeouts[UDP_CT_REPLIED]; + + nf_ct_refresh_acct(ct, ctinfo, skb, extra); + /* Also, more likely to be important, and not a probe */ if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) nf_conntrack_event_cache(IPCT_ASSURED, ct); @@ -212,7 +217,7 @@ static int udp_timeout_nlattr_to_obj(struct nlattr *tb[], struct net *net, void *data) { unsigned int *timeouts = data; - struct nf_udp_net *un = udp_pernet(net); + struct nf_udp_net *un = nf_udp_pernet(net); if (!timeouts) timeouts = un->timeouts; @@ -292,7 +297,7 @@ static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn, static int udp_init_net(struct net *net) { - struct nf_udp_net *un = udp_pernet(net); + struct nf_udp_net *un = nf_udp_pernet(net); struct nf_proto_net *pn = &un->pn; if (!pn->users) { diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c index a975efd6b8c3..9da303461069 100644 --- a/net/netfilter/nf_conntrack_seqadj.c +++ b/net/netfilter/nf_conntrack_seqadj.c @@ -115,12 +115,12 @@ static void nf_ct_sack_block_adjust(struct sk_buff *skb, /* TCP SACK sequence number adjustment */ static unsigned int nf_ct_sack_adjust(struct sk_buff *skb, unsigned int protoff, - struct tcphdr *tcph, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { - unsigned int dir, optoff, optend; + struct tcphdr *tcph = (void *)skb->data + protoff; struct nf_conn_seqadj *seqadj = nfct_seqadj(ct); + unsigned int dir, optoff, optend; optoff = protoff + sizeof(struct tcphdr); optend = protoff + tcph->doff * 4; @@ -128,6 +128,7 @@ static unsigned int nf_ct_sack_adjust(struct sk_buff *skb, if (!skb_make_writable(skb, optend)) return 0; + tcph = (void *)skb->data + protoff; dir = CTINFO2DIR(ctinfo); while (optoff < optend) { @@ -207,7 +208,7 @@ int nf_ct_seq_adjust(struct sk_buff *skb, ntohl(newack)); tcph->ack_seq = newack; - res = nf_ct_sack_adjust(skb, protoff, tcph, ct, ctinfo); + res = nf_ct_sack_adjust(skb, protoff, ct, ctinfo); out: spin_unlock_bh(&ct->lock); diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 463d17d349c1..b6177fd73304 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -267,6 +267,24 @@ static const char* l4proto_name(u16 proto) return "unknown"; } +static unsigned int +seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir) +{ + struct nf_conn_acct *acct; + struct nf_conn_counter *counter; + + acct = nf_conn_acct_find(ct); + if (!acct) + return 0; + + counter = acct->counter; + seq_printf(s, "packets=%llu bytes=%llu ", + (unsigned long long)atomic64_read(&counter[dir].packets), + (unsigned long long)atomic64_read(&counter[dir].bytes)); + + return 0; +} + /* return 0 on success, 1 in case of error */ static int ct_seq_show(struct seq_file *s, void *v) { @@ -514,36 +532,53 @@ nf_conntrack_hash_sysctl(struct ctl_table *table, int write, static struct ctl_table_header *nf_ct_netfilter_header; +enum nf_ct_sysctl_index { + NF_SYSCTL_CT_MAX, + NF_SYSCTL_CT_COUNT, + NF_SYSCTL_CT_BUCKETS, + NF_SYSCTL_CT_CHECKSUM, + NF_SYSCTL_CT_LOG_INVALID, + NF_SYSCTL_CT_EXPECT_MAX, + NF_SYSCTL_CT_ACCT, + NF_SYSCTL_CT_HELPER, +#ifdef CONFIG_NF_CONNTRACK_EVENTS + NF_SYSCTL_CT_EVENTS, +#endif +#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP + NF_SYSCTL_CT_TIMESTAMP, +#endif +}; + static struct ctl_table nf_ct_sysctl_table[] = { - { + [NF_SYSCTL_CT_MAX] = { .procname = "nf_conntrack_max", .data = &nf_conntrack_max, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, - { + [NF_SYSCTL_CT_COUNT] = { .procname = "nf_conntrack_count", .data = &init_net.ct.count, .maxlen = sizeof(int), .mode = 0444, .proc_handler = proc_dointvec, }, - { + [NF_SYSCTL_CT_BUCKETS] = { .procname = "nf_conntrack_buckets", .data = &nf_conntrack_htable_size_user, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = nf_conntrack_hash_sysctl, }, - { + [NF_SYSCTL_CT_CHECKSUM] = { .procname = "nf_conntrack_checksum", .data = &init_net.ct.sysctl_checksum, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, }, - { + [NF_SYSCTL_CT_LOG_INVALID] = { .procname = "nf_conntrack_log_invalid", .data = &init_net.ct.sysctl_log_invalid, .maxlen = sizeof(unsigned int), @@ -552,13 +587,45 @@ static struct ctl_table nf_ct_sysctl_table[] = { .extra1 = &log_invalid_proto_min, .extra2 = &log_invalid_proto_max, }, - { + [NF_SYSCTL_CT_EXPECT_MAX] = { .procname = "nf_conntrack_expect_max", .data = &nf_ct_expect_max, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, + [NF_SYSCTL_CT_ACCT] = { + .procname = "nf_conntrack_acct", + .data = &init_net.ct.sysctl_acct, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + [NF_SYSCTL_CT_HELPER] = { + .procname = "nf_conntrack_helper", + .data = &init_net.ct.sysctl_auto_assign_helper, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#ifdef CONFIG_NF_CONNTRACK_EVENTS + [NF_SYSCTL_CT_EVENTS] = { + .procname = "nf_conntrack_events", + .data = &init_net.ct.sysctl_events, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif +#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP + [NF_SYSCTL_CT_TIMESTAMP] = { + .procname = "nf_conntrack_timestamp", + .data = &init_net.ct.sysctl_tstamp, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif { } }; @@ -582,16 +649,28 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) if (!table) goto out_kmemdup; - table[1].data = &net->ct.count; - table[3].data = &net->ct.sysctl_checksum; - table[4].data = &net->ct.sysctl_log_invalid; + table[NF_SYSCTL_CT_COUNT].data = &net->ct.count; + table[NF_SYSCTL_CT_CHECKSUM].data = &net->ct.sysctl_checksum; + table[NF_SYSCTL_CT_LOG_INVALID].data = &net->ct.sysctl_log_invalid; +#ifdef CONFIG_NF_CONNTRACK_EVENTS + table[NF_SYSCTL_CT_EVENTS].data = &net->ct.sysctl_events; +#endif /* Don't export sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) - table[0].procname = NULL; + if (net->user_ns != &init_user_ns) { + table[NF_SYSCTL_CT_MAX].procname = NULL; + table[NF_SYSCTL_CT_ACCT].procname = NULL; + table[NF_SYSCTL_CT_HELPER].procname = NULL; +#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP + table[NF_SYSCTL_CT_TIMESTAMP].procname = NULL; +#endif +#ifdef CONFIG_NF_CONNTRACK_EVENTS + table[NF_SYSCTL_CT_EVENTS].procname = NULL; +#endif + } if (!net_eq(&init_net, net)) - table[2].mode = 0444; + table[NF_SYSCTL_CT_BUCKETS].mode = 0444; net->ct.sysctl_header = register_net_sysctl(net, "net/netfilter", table); if (!net->ct.sysctl_header) diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c index 56766cb26e40..705b912bd91f 100644 --- a/net/netfilter/nf_conntrack_timestamp.c +++ b/net/netfilter/nf_conntrack_timestamp.c @@ -22,83 +22,15 @@ static bool nf_ct_tstamp __read_mostly; module_param_named(tstamp, nf_ct_tstamp, bool, 0644); MODULE_PARM_DESC(tstamp, "Enable connection tracking flow timestamping."); -#ifdef CONFIG_SYSCTL -static struct ctl_table tstamp_sysctl_table[] = { - { - .procname = "nf_conntrack_timestamp", - .data = &init_net.ct.sysctl_tstamp, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - {} -}; -#endif /* CONFIG_SYSCTL */ - static const struct nf_ct_ext_type tstamp_extend = { .len = sizeof(struct nf_conn_tstamp), .align = __alignof__(struct nf_conn_tstamp), .id = NF_CT_EXT_TSTAMP, }; -#ifdef CONFIG_SYSCTL -static int nf_conntrack_tstamp_init_sysctl(struct net *net) -{ - struct ctl_table *table; - - table = kmemdup(tstamp_sysctl_table, sizeof(tstamp_sysctl_table), - GFP_KERNEL); - if (!table) - goto out; - - table[0].data = &net->ct.sysctl_tstamp; - - /* Don't export sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) - table[0].procname = NULL; - - net->ct.tstamp_sysctl_header = register_net_sysctl(net, "net/netfilter", - table); - if (!net->ct.tstamp_sysctl_header) { - pr_err("can't register to sysctl\n"); - goto out_register; - } - return 0; - -out_register: - kfree(table); -out: - return -ENOMEM; -} - -static void nf_conntrack_tstamp_fini_sysctl(struct net *net) -{ - struct ctl_table *table; - - table = net->ct.tstamp_sysctl_header->ctl_table_arg; - unregister_net_sysctl_table(net->ct.tstamp_sysctl_header); - kfree(table); -} -#else -static int nf_conntrack_tstamp_init_sysctl(struct net *net) -{ - return 0; -} - -static void nf_conntrack_tstamp_fini_sysctl(struct net *net) -{ -} -#endif - -int nf_conntrack_tstamp_pernet_init(struct net *net) +void nf_conntrack_tstamp_pernet_init(struct net *net) { net->ct.sysctl_tstamp = nf_ct_tstamp; - return nf_conntrack_tstamp_init_sysctl(net); -} - -void nf_conntrack_tstamp_pernet_fini(struct net *net) -{ - nf_conntrack_tstamp_fini_sysctl(net); } int nf_conntrack_tstamp_init(void) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index b7a4816add76..fa0844e2a68d 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -247,9 +247,10 @@ flow_offload_lookup(struct nf_flowtable *flow_table, } EXPORT_SYMBOL_GPL(flow_offload_lookup); -int nf_flow_table_iterate(struct nf_flowtable *flow_table, - void (*iter)(struct flow_offload *flow, void *data), - void *data) +static int +nf_flow_table_iterate(struct nf_flowtable *flow_table, + void (*iter)(struct flow_offload *flow, void *data), + void *data) { struct flow_offload_tuple_rhash *tuplehash; struct rhashtable_iter hti; @@ -279,40 +280,19 @@ int nf_flow_table_iterate(struct nf_flowtable *flow_table, return err; } -EXPORT_SYMBOL_GPL(nf_flow_table_iterate); static inline bool nf_flow_has_expired(const struct flow_offload *flow) { return (__s32)(flow->timeout - (u32)jiffies) <= 0; } -static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table) +static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data) { - struct flow_offload_tuple_rhash *tuplehash; - struct rhashtable_iter hti; - struct flow_offload *flow; - - rhashtable_walk_enter(&flow_table->rhashtable, &hti); - rhashtable_walk_start(&hti); + struct nf_flowtable *flow_table = data; - while ((tuplehash = rhashtable_walk_next(&hti))) { - if (IS_ERR(tuplehash)) { - if (PTR_ERR(tuplehash) != -EAGAIN) - break; - continue; - } - if (tuplehash->tuple.dir) - continue; - - flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); - - if (nf_flow_has_expired(flow) || - (flow->flags & (FLOW_OFFLOAD_DYING | - FLOW_OFFLOAD_TEARDOWN))) - flow_offload_del(flow_table, flow); - } - rhashtable_walk_stop(&hti); - rhashtable_walk_exit(&hti); + if (nf_flow_has_expired(flow) || + (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))) + flow_offload_del(flow_table, flow); } static void nf_flow_offload_work_gc(struct work_struct *work) @@ -320,7 +300,7 @@ static void nf_flow_offload_work_gc(struct work_struct *work) struct nf_flowtable *flow_table; flow_table = container_of(work, struct nf_flowtable, gc_work.work); - nf_flow_offload_gc_step(flow_table); + nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table); queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ); } @@ -504,7 +484,7 @@ void nf_flow_table_free(struct nf_flowtable *flow_table) mutex_unlock(&flowtable_lock); cancel_delayed_work_sync(&flow_table->gc_work); nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); - nf_flow_offload_gc_step(flow_table); + nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table); rhashtable_destroy(&flow_table->rhashtable); } EXPORT_SYMBOL_GPL(nf_flow_table_free); diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c index a8c5c846aec1..3a0d6880b7c9 100644 --- a/net/netfilter/nf_log_common.c +++ b/net/netfilter/nf_log_common.c @@ -156,22 +156,20 @@ nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf, const struct net_device *out, const struct nf_loginfo *loginfo, const char *prefix) { + const struct net_device *physoutdev __maybe_unused; + const struct net_device *physindev __maybe_unused; + nf_log_buf_add(m, KERN_SOH "%c%sIN=%s OUT=%s ", '0' + loginfo->u.log.level, prefix, in ? in->name : "", out ? out->name : ""); #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - if (skb->nf_bridge) { - const struct net_device *physindev; - const struct net_device *physoutdev; - - physindev = nf_bridge_get_physindev(skb); - if (physindev && in != physindev) - nf_log_buf_add(m, "PHYSIN=%s ", physindev->name); - physoutdev = nf_bridge_get_physoutdev(skb); - if (physoutdev && out != physoutdev) - nf_log_buf_add(m, "PHYSOUT=%s ", physoutdev->name); - } + physindev = nf_bridge_get_physindev(skb); + if (physindev && in != physindev) + nf_log_buf_add(m, "PHYSIN=%s ", physindev->name); + physoutdev = nf_bridge_get_physoutdev(skb); + if (physoutdev && out != physoutdev) + nf_log_buf_add(m, "PHYSOUT=%s ", physoutdev->name); #endif } EXPORT_SYMBOL_GPL(nf_log_dump_packet_common); diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index e2b196054dfc..d159e9e7835b 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -23,7 +23,6 @@ #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_l3proto.h> -#include <net/netfilter/nf_nat_l4proto.h> #include <net/netfilter/nf_nat_core.h> #include <net/netfilter/nf_nat_helper.h> #include <net/netfilter/nf_conntrack_helper.h> @@ -38,8 +37,6 @@ static spinlock_t nf_nat_locks[CONNTRACK_LOCKS]; static DEFINE_MUTEX(nf_nat_proto_mutex); static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO] __read_mostly; -static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO] - __read_mostly; static unsigned int nat_net_id __read_mostly; static struct hlist_head *nf_nat_bysource __read_mostly; @@ -67,13 +64,6 @@ __nf_nat_l3proto_find(u8 family) return rcu_dereference(nf_nat_l3protos[family]); } -inline const struct nf_nat_l4proto * -__nf_nat_l4proto_find(u8 family, u8 protonum) -{ - return rcu_dereference(nf_nat_l4protos[family][protonum]); -} -EXPORT_SYMBOL_GPL(__nf_nat_l4proto_find); - #ifdef CONFIG_XFRM static void __nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl) { @@ -117,7 +107,8 @@ int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family) dst = skb_dst(skb); if (dst->xfrm) dst = ((struct xfrm_dst *)dst)->route; - dst_hold(dst); + if (!dst_hold_safe(dst)) + return -EHOSTUNREACH; if (sk && !net_eq(net, sock_net(sk))) sk = NULL; @@ -172,27 +163,66 @@ nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, } EXPORT_SYMBOL(nf_nat_used_tuple); +static bool nf_nat_inet_in_range(const struct nf_conntrack_tuple *t, + const struct nf_nat_range2 *range) +{ + if (t->src.l3num == NFPROTO_IPV4) + return ntohl(t->src.u3.ip) >= ntohl(range->min_addr.ip) && + ntohl(t->src.u3.ip) <= ntohl(range->max_addr.ip); + + return ipv6_addr_cmp(&t->src.u3.in6, &range->min_addr.in6) >= 0 && + ipv6_addr_cmp(&t->src.u3.in6, &range->max_addr.in6) <= 0; +} + +/* Is the manipable part of the tuple between min and max incl? */ +static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype, + const union nf_conntrack_man_proto *min, + const union nf_conntrack_man_proto *max) +{ + __be16 port; + + switch (tuple->dst.protonum) { + case IPPROTO_ICMP: /* fallthrough */ + case IPPROTO_ICMPV6: + return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) && + ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id); + case IPPROTO_GRE: /* all fall though */ + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + case IPPROTO_DCCP: + case IPPROTO_SCTP: + if (maniptype == NF_NAT_MANIP_SRC) + port = tuple->src.u.all; + else + port = tuple->dst.u.all; + + return ntohs(port) >= ntohs(min->all) && + ntohs(port) <= ntohs(max->all); + default: + return true; + } +} + /* If we source map this tuple so reply looks like reply_tuple, will * that meet the constraints of range. */ -static int in_range(const struct nf_nat_l3proto *l3proto, - const struct nf_nat_l4proto *l4proto, - const struct nf_conntrack_tuple *tuple, +static int in_range(const struct nf_conntrack_tuple *tuple, const struct nf_nat_range2 *range) { /* If we are supposed to map IPs, then we must be in the * range specified, otherwise let this drag us onto a new src IP. */ if (range->flags & NF_NAT_RANGE_MAP_IPS && - !l3proto->in_range(tuple, range)) + !nf_nat_inet_in_range(tuple, range)) return 0; - if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) || - l4proto->in_range(tuple, NF_NAT_MANIP_SRC, - &range->min_proto, &range->max_proto)) + if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) return 1; - return 0; + return l4proto_in_range(tuple, NF_NAT_MANIP_SRC, + &range->min_proto, &range->max_proto); } static inline int @@ -211,8 +241,6 @@ same_src(const struct nf_conn *ct, static int find_appropriate_src(struct net *net, const struct nf_conntrack_zone *zone, - const struct nf_nat_l3proto *l3proto, - const struct nf_nat_l4proto *l4proto, const struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *result, const struct nf_nat_range2 *range) @@ -229,7 +257,7 @@ find_appropriate_src(struct net *net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); result->dst = tuple->dst; - if (in_range(l3proto, l4proto, result, range)) + if (in_range(result, range)) return 1; } } @@ -310,6 +338,123 @@ find_best_ips_proto(const struct nf_conntrack_zone *zone, } } +/* Alter the per-proto part of the tuple (depending on maniptype), to + * give a unique tuple in the given range if possible. + * + * Per-protocol part of tuple is initialized to the incoming packet. + */ +static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_nat_range2 *range, + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct) +{ + unsigned int range_size, min, max, i, attempts; + __be16 *keyptr; + u16 off; + static const unsigned int max_attempts = 128; + + switch (tuple->dst.protonum) { + case IPPROTO_ICMP: /* fallthrough */ + case IPPROTO_ICMPV6: + /* id is same for either direction... */ + keyptr = &tuple->src.u.icmp.id; + min = range->min_proto.icmp.id; + range_size = ntohs(range->max_proto.icmp.id) - + ntohs(range->min_proto.icmp.id) + 1; + goto find_free_id; +#if IS_ENABLED(CONFIG_NF_CT_PROTO_GRE) + case IPPROTO_GRE: + /* If there is no master conntrack we are not PPTP, + do not change tuples */ + if (!ct->master) + return; + + if (maniptype == NF_NAT_MANIP_SRC) + keyptr = &tuple->src.u.gre.key; + else + keyptr = &tuple->dst.u.gre.key; + + if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) { + min = 1; + range_size = 65535; + } else { + min = ntohs(range->min_proto.gre.key); + range_size = ntohs(range->max_proto.gre.key) - min + 1; + } + goto find_free_id; +#endif + case IPPROTO_UDP: /* fallthrough */ + case IPPROTO_UDPLITE: /* fallthrough */ + case IPPROTO_TCP: /* fallthrough */ + case IPPROTO_SCTP: /* fallthrough */ + case IPPROTO_DCCP: /* fallthrough */ + if (maniptype == NF_NAT_MANIP_SRC) + keyptr = &tuple->src.u.all; + else + keyptr = &tuple->dst.u.all; + + break; + default: + return; + } + + /* If no range specified... */ + if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) { + /* If it's dst rewrite, can't change port */ + if (maniptype == NF_NAT_MANIP_DST) + return; + + if (ntohs(*keyptr) < 1024) { + /* Loose convention: >> 512 is credential passing */ + if (ntohs(*keyptr) < 512) { + min = 1; + range_size = 511 - min + 1; + } else { + min = 600; + range_size = 1023 - min + 1; + } + } else { + min = 1024; + range_size = 65535 - 1024 + 1; + } + } else { + min = ntohs(range->min_proto.all); + max = ntohs(range->max_proto.all); + if (unlikely(max < min)) + swap(max, min); + range_size = max - min + 1; + } + +find_free_id: + if (range->flags & NF_NAT_RANGE_PROTO_OFFSET) + off = (ntohs(*keyptr) - ntohs(range->base_proto.all)); + else + off = prandom_u32(); + + attempts = range_size; + if (attempts > max_attempts) + attempts = max_attempts; + + /* We are in softirq; doing a search of the entire range risks + * soft lockup when all tuples are already used. + * + * If we can't find any free port from first offset, pick a new + * one and try again, with ever smaller search window. + */ +another_round: + for (i = 0; i < attempts; i++, off++) { + *keyptr = htons(min + off % range_size); + if (!nf_nat_used_tuple(tuple, ct)) + return; + } + + if (attempts >= range_size || attempts < 16) + return; + attempts /= 2; + off = prandom_u32(); + goto another_round; +} + /* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING, * we change the source to map into the range. For NF_INET_PRE_ROUTING * and NF_INET_LOCAL_OUT, we change the destination to map into the @@ -324,17 +469,10 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { const struct nf_conntrack_zone *zone; - const struct nf_nat_l3proto *l3proto; - const struct nf_nat_l4proto *l4proto; struct net *net = nf_ct_net(ct); zone = nf_ct_zone(ct); - rcu_read_lock(); - l3proto = __nf_nat_l3proto_find(orig_tuple->src.l3num); - l4proto = __nf_nat_l4proto_find(orig_tuple->src.l3num, - orig_tuple->dst.protonum); - /* 1) If this srcip/proto/src-proto-part is currently mapped, * and that same mapping gives a unique tuple within the given * range, use that. @@ -346,16 +484,16 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, if (maniptype == NF_NAT_MANIP_SRC && !(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) { /* try the original tuple first */ - if (in_range(l3proto, l4proto, orig_tuple, range)) { + if (in_range(orig_tuple, range)) { if (!nf_nat_used_tuple(orig_tuple, ct)) { *tuple = *orig_tuple; - goto out; + return; } - } else if (find_appropriate_src(net, zone, l3proto, l4proto, + } else if (find_appropriate_src(net, zone, orig_tuple, tuple, range)) { pr_debug("get_unique_tuple: Found current src map\n"); if (!nf_nat_used_tuple(tuple, ct)) - goto out; + return; } } @@ -371,21 +509,19 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) { if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) && - l4proto->in_range(tuple, maniptype, + l4proto_in_range(tuple, maniptype, &range->min_proto, &range->max_proto) && (range->min_proto.all == range->max_proto.all || !nf_nat_used_tuple(tuple, ct))) - goto out; + return; } else if (!nf_nat_used_tuple(tuple, ct)) { - goto out; + return; } } /* Last chance: get protocol to try to obtain unique tuple. */ - l4proto->unique_tuple(l3proto, tuple, range, maniptype, ct); -out: - rcu_read_unlock(); + nf_nat_l4proto_unique_tuple(tuple, range, maniptype, ct); } struct nf_conn_nat *nf_ct_nat_ext_add(struct nf_conn *ct) @@ -501,16 +637,13 @@ static unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_dir dir) { const struct nf_nat_l3proto *l3proto; - const struct nf_nat_l4proto *l4proto; struct nf_conntrack_tuple target; /* We are aiming to look like inverse of other direction. */ nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); l3proto = __nf_nat_l3proto_find(target.src.l3num); - l4proto = __nf_nat_l4proto_find(target.src.l3num, - target.dst.protonum); - if (!l3proto->manip_pkt(skb, 0, l4proto, &target, mtype)) + if (!l3proto->manip_pkt(skb, 0, &target, mtype)) return NF_DROP; return NF_ACCEPT; @@ -666,16 +799,6 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data) return 0; } -static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto) -{ - struct nf_nat_proto_clean clean = { - .l3proto = l3proto, - .l4proto = l4proto, - }; - - nf_ct_iterate_destroy(nf_nat_proto_remove, &clean); -} - static void nf_nat_l3proto_clean(u8 l3proto) { struct nf_nat_proto_clean clean = { @@ -685,82 +808,8 @@ static void nf_nat_l3proto_clean(u8 l3proto) nf_ct_iterate_destroy(nf_nat_proto_remove, &clean); } -/* Protocol registration. */ -int nf_nat_l4proto_register(u8 l3proto, const struct nf_nat_l4proto *l4proto) -{ - const struct nf_nat_l4proto **l4protos; - unsigned int i; - int ret = 0; - - mutex_lock(&nf_nat_proto_mutex); - if (nf_nat_l4protos[l3proto] == NULL) { - l4protos = kmalloc_array(IPPROTO_MAX, - sizeof(struct nf_nat_l4proto *), - GFP_KERNEL); - if (l4protos == NULL) { - ret = -ENOMEM; - goto out; - } - - for (i = 0; i < IPPROTO_MAX; i++) - RCU_INIT_POINTER(l4protos[i], &nf_nat_l4proto_unknown); - - /* Before making proto_array visible to lockless readers, - * we must make sure its content is committed to memory. - */ - smp_wmb(); - - nf_nat_l4protos[l3proto] = l4protos; - } - - if (rcu_dereference_protected( - nf_nat_l4protos[l3proto][l4proto->l4proto], - lockdep_is_held(&nf_nat_proto_mutex) - ) != &nf_nat_l4proto_unknown) { - ret = -EBUSY; - goto out; - } - RCU_INIT_POINTER(nf_nat_l4protos[l3proto][l4proto->l4proto], l4proto); - out: - mutex_unlock(&nf_nat_proto_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(nf_nat_l4proto_register); - -/* No one stores the protocol anywhere; simply delete it. */ -void nf_nat_l4proto_unregister(u8 l3proto, const struct nf_nat_l4proto *l4proto) -{ - mutex_lock(&nf_nat_proto_mutex); - RCU_INIT_POINTER(nf_nat_l4protos[l3proto][l4proto->l4proto], - &nf_nat_l4proto_unknown); - mutex_unlock(&nf_nat_proto_mutex); - synchronize_rcu(); - - nf_nat_l4proto_clean(l3proto, l4proto->l4proto); -} -EXPORT_SYMBOL_GPL(nf_nat_l4proto_unregister); - int nf_nat_l3proto_register(const struct nf_nat_l3proto *l3proto) { - mutex_lock(&nf_nat_proto_mutex); - RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_TCP], - &nf_nat_l4proto_tcp); - RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_UDP], - &nf_nat_l4proto_udp); -#ifdef CONFIG_NF_NAT_PROTO_DCCP - RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_DCCP], - &nf_nat_l4proto_dccp); -#endif -#ifdef CONFIG_NF_NAT_PROTO_SCTP - RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_SCTP], - &nf_nat_l4proto_sctp); -#endif -#ifdef CONFIG_NF_NAT_PROTO_UDPLITE - RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_UDPLITE], - &nf_nat_l4proto_udplite); -#endif - mutex_unlock(&nf_nat_proto_mutex); - RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], l3proto); return 0; } @@ -801,12 +850,26 @@ static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = { [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 }, }; +static int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[], + struct nf_nat_range2 *range) +{ + if (tb[CTA_PROTONAT_PORT_MIN]) { + range->min_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]); + range->max_proto.all = range->min_proto.all; + range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + } + if (tb[CTA_PROTONAT_PORT_MAX]) { + range->max_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]); + range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + } + return 0; +} + static int nfnetlink_parse_nat_proto(struct nlattr *attr, const struct nf_conn *ct, struct nf_nat_range2 *range) { struct nlattr *tb[CTA_PROTONAT_MAX+1]; - const struct nf_nat_l4proto *l4proto; int err; err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, @@ -814,11 +877,7 @@ static int nfnetlink_parse_nat_proto(struct nlattr *attr, if (err < 0) return err; - l4proto = __nf_nat_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); - if (l4proto->nlattr_to_range) - err = l4proto->nlattr_to_range(tb, range); - - return err; + return nf_nat_l4proto_nlattr_to_range(tb, range); } static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = { @@ -1081,7 +1140,6 @@ static int __init nf_nat_init(void) static void __exit nf_nat_cleanup(void) { struct nf_nat_proto_clean clean = {}; - unsigned int i; nf_ct_iterate_destroy(nf_nat_proto_clean, &clean); @@ -1089,10 +1147,6 @@ static void __exit nf_nat_cleanup(void) nf_ct_helper_expectfn_unregister(&follow_master_nat); RCU_INIT_POINTER(nf_nat_hook, NULL); - synchronize_rcu(); - - for (i = 0; i < NFPROTO_NUMPROTO; i++) - kfree(nf_nat_l4protos[i]); synchronize_net(); kvfree(nf_nat_bysource); unregister_pernet_subsys(&nat_net_ops); diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c new file mode 100644 index 000000000000..f83bf9d8c9f5 --- /dev/null +++ b/net/netfilter/nf_nat_proto.c @@ -0,0 +1,343 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/types.h> +#include <linux/export.h> +#include <linux/init.h> +#include <linux/udp.h> +#include <linux/tcp.h> +#include <linux/icmp.h> +#include <linux/icmpv6.h> + +#include <linux/dccp.h> +#include <linux/sctp.h> +#include <net/sctp/checksum.h> + +#include <linux/netfilter.h> +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_nat_core.h> +#include <net/netfilter/nf_nat_l3proto.h> +#include <net/netfilter/nf_nat_l4proto.h> + +static void +__udp_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, struct udphdr *hdr, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype, bool do_csum) +{ + __be16 *portptr, newport; + + if (maniptype == NF_NAT_MANIP_SRC) { + /* Get rid of src port */ + newport = tuple->src.u.udp.port; + portptr = &hdr->source; + } else { + /* Get rid of dst port */ + newport = tuple->dst.u.udp.port; + portptr = &hdr->dest; + } + if (do_csum) { + l3proto->csum_update(skb, iphdroff, &hdr->check, + tuple, maniptype); + inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, + false); + if (!hdr->check) + hdr->check = CSUM_MANGLED_0; + } + *portptr = newport; +} + +static bool udp_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + struct udphdr *hdr; + bool do_csum; + + if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) + return false; + + hdr = (struct udphdr *)(skb->data + hdroff); + do_csum = hdr->check || skb->ip_summed == CHECKSUM_PARTIAL; + + __udp_manip_pkt(skb, l3proto, iphdroff, hdr, tuple, maniptype, do_csum); + return true; +} + +static bool udplite_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ +#ifdef CONFIG_NF_CT_PROTO_UDPLITE + struct udphdr *hdr; + + if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) + return false; + + hdr = (struct udphdr *)(skb->data + hdroff); + __udp_manip_pkt(skb, l3proto, iphdroff, hdr, tuple, maniptype, true); +#endif + return true; +} + +static bool +sctp_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ +#ifdef CONFIG_NF_CT_PROTO_SCTP + struct sctphdr *hdr; + int hdrsize = 8; + + /* This could be an inner header returned in imcp packet; in such + * cases we cannot update the checksum field since it is outside + * of the 8 bytes of transport layer headers we are guaranteed. + */ + if (skb->len >= hdroff + sizeof(*hdr)) + hdrsize = sizeof(*hdr); + + if (!skb_make_writable(skb, hdroff + hdrsize)) + return false; + + hdr = (struct sctphdr *)(skb->data + hdroff); + + if (maniptype == NF_NAT_MANIP_SRC) { + /* Get rid of src port */ + hdr->source = tuple->src.u.sctp.port; + } else { + /* Get rid of dst port */ + hdr->dest = tuple->dst.u.sctp.port; + } + + if (hdrsize < sizeof(*hdr)) + return true; + + if (skb->ip_summed != CHECKSUM_PARTIAL) { + hdr->checksum = sctp_compute_cksum(skb, hdroff); + skb->ip_summed = CHECKSUM_NONE; + } + +#endif + return true; +} + +static bool +tcp_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + struct tcphdr *hdr; + __be16 *portptr, newport, oldport; + int hdrsize = 8; /* TCP connection tracking guarantees this much */ + + /* this could be a inner header returned in icmp packet; in such + cases we cannot update the checksum field since it is outside of + the 8 bytes of transport layer headers we are guaranteed */ + if (skb->len >= hdroff + sizeof(struct tcphdr)) + hdrsize = sizeof(struct tcphdr); + + if (!skb_make_writable(skb, hdroff + hdrsize)) + return false; + + hdr = (struct tcphdr *)(skb->data + hdroff); + + if (maniptype == NF_NAT_MANIP_SRC) { + /* Get rid of src port */ + newport = tuple->src.u.tcp.port; + portptr = &hdr->source; + } else { + /* Get rid of dst port */ + newport = tuple->dst.u.tcp.port; + portptr = &hdr->dest; + } + + oldport = *portptr; + *portptr = newport; + + if (hdrsize < sizeof(*hdr)) + return true; + + l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype); + inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, false); + return true; +} + +static bool +dccp_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ +#ifdef CONFIG_NF_CT_PROTO_DCCP + struct dccp_hdr *hdr; + __be16 *portptr, oldport, newport; + int hdrsize = 8; /* DCCP connection tracking guarantees this much */ + + if (skb->len >= hdroff + sizeof(struct dccp_hdr)) + hdrsize = sizeof(struct dccp_hdr); + + if (!skb_make_writable(skb, hdroff + hdrsize)) + return false; + + hdr = (struct dccp_hdr *)(skb->data + hdroff); + + if (maniptype == NF_NAT_MANIP_SRC) { + newport = tuple->src.u.dccp.port; + portptr = &hdr->dccph_sport; + } else { + newport = tuple->dst.u.dccp.port; + portptr = &hdr->dccph_dport; + } + + oldport = *portptr; + *portptr = newport; + + if (hdrsize < sizeof(*hdr)) + return true; + + l3proto->csum_update(skb, iphdroff, &hdr->dccph_checksum, + tuple, maniptype); + inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport, + false); +#endif + return true; +} + +static bool +icmp_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + struct icmphdr *hdr; + + if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) + return false; + + hdr = (struct icmphdr *)(skb->data + hdroff); + inet_proto_csum_replace2(&hdr->checksum, skb, + hdr->un.echo.id, tuple->src.u.icmp.id, false); + hdr->un.echo.id = tuple->src.u.icmp.id; + return true; +} + +static bool +icmpv6_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + struct icmp6hdr *hdr; + + if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) + return false; + + hdr = (struct icmp6hdr *)(skb->data + hdroff); + l3proto->csum_update(skb, iphdroff, &hdr->icmp6_cksum, + tuple, maniptype); + if (hdr->icmp6_type == ICMPV6_ECHO_REQUEST || + hdr->icmp6_type == ICMPV6_ECHO_REPLY) { + inet_proto_csum_replace2(&hdr->icmp6_cksum, skb, + hdr->icmp6_identifier, + tuple->src.u.icmp.id, false); + hdr->icmp6_identifier = tuple->src.u.icmp.id; + } + return true; +} + +/* manipulate a GRE packet according to maniptype */ +static bool +gre_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ +#if IS_ENABLED(CONFIG_NF_CT_PROTO_GRE) + const struct gre_base_hdr *greh; + struct pptp_gre_header *pgreh; + + /* pgreh includes two optional 32bit fields which are not required + * to be there. That's where the magic '8' comes from */ + if (!skb_make_writable(skb, hdroff + sizeof(*pgreh) - 8)) + return false; + + greh = (void *)skb->data + hdroff; + pgreh = (struct pptp_gre_header *)greh; + + /* we only have destination manip of a packet, since 'source key' + * is not present in the packet itself */ + if (maniptype != NF_NAT_MANIP_DST) + return true; + + switch (greh->flags & GRE_VERSION) { + case GRE_VERSION_0: + /* We do not currently NAT any GREv0 packets. + * Try to behave like "nf_nat_proto_unknown" */ + break; + case GRE_VERSION_1: + pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key)); + pgreh->call_id = tuple->dst.u.gre.key; + break; + default: + pr_debug("can't nat unknown GRE version\n"); + return false; + } +#endif + return true; +} + +bool nf_nat_l4proto_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + switch (tuple->dst.protonum) { + case IPPROTO_TCP: + return tcp_manip_pkt(skb, l3proto, iphdroff, hdroff, + tuple, maniptype); + case IPPROTO_UDP: + return udp_manip_pkt(skb, l3proto, iphdroff, hdroff, + tuple, maniptype); + case IPPROTO_UDPLITE: + return udplite_manip_pkt(skb, l3proto, iphdroff, hdroff, + tuple, maniptype); + case IPPROTO_SCTP: + return sctp_manip_pkt(skb, l3proto, iphdroff, hdroff, + tuple, maniptype); + case IPPROTO_ICMP: + return icmp_manip_pkt(skb, l3proto, iphdroff, hdroff, + tuple, maniptype); + case IPPROTO_ICMPV6: + return icmpv6_manip_pkt(skb, l3proto, iphdroff, hdroff, + tuple, maniptype); + case IPPROTO_DCCP: + return dccp_manip_pkt(skb, l3proto, iphdroff, hdroff, + tuple, maniptype); + case IPPROTO_GRE: + return gre_manip_pkt(skb, l3proto, iphdroff, hdroff, + tuple, maniptype); + } + + /* If we don't know protocol -- no error, pass it unmodified. */ + return true; +} +EXPORT_SYMBOL_GPL(nf_nat_l4proto_manip_pkt); diff --git a/net/netfilter/nf_nat_proto_common.c b/net/netfilter/nf_nat_proto_common.c deleted file mode 100644 index 5d849d835561..000000000000 --- a/net/netfilter/nf_nat_proto_common.c +++ /dev/null @@ -1,120 +0,0 @@ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> - * (C) 2008 Patrick McHardy <kaber@trash.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/types.h> -#include <linux/random.h> -#include <linux/netfilter.h> -#include <linux/export.h> - -#include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_core.h> -#include <net/netfilter/nf_nat_l3proto.h> -#include <net/netfilter/nf_nat_l4proto.h> - -bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype, - const union nf_conntrack_man_proto *min, - const union nf_conntrack_man_proto *max) -{ - __be16 port; - - if (maniptype == NF_NAT_MANIP_SRC) - port = tuple->src.u.all; - else - port = tuple->dst.u.all; - - return ntohs(port) >= ntohs(min->all) && - ntohs(port) <= ntohs(max->all); -} -EXPORT_SYMBOL_GPL(nf_nat_l4proto_in_range); - -void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto, - struct nf_conntrack_tuple *tuple, - const struct nf_nat_range2 *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct, - u16 *rover) -{ - unsigned int range_size, min, max, i; - __be16 *portptr; - u_int16_t off; - - if (maniptype == NF_NAT_MANIP_SRC) - portptr = &tuple->src.u.all; - else - portptr = &tuple->dst.u.all; - - /* If no range specified... */ - if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) { - /* If it's dst rewrite, can't change port */ - if (maniptype == NF_NAT_MANIP_DST) - return; - - if (ntohs(*portptr) < 1024) { - /* Loose convention: >> 512 is credential passing */ - if (ntohs(*portptr) < 512) { - min = 1; - range_size = 511 - min + 1; - } else { - min = 600; - range_size = 1023 - min + 1; - } - } else { - min = 1024; - range_size = 65535 - 1024 + 1; - } - } else { - min = ntohs(range->min_proto.all); - max = ntohs(range->max_proto.all); - if (unlikely(max < min)) - swap(max, min); - range_size = max - min + 1; - } - - if (range->flags & NF_NAT_RANGE_PROTO_RANDOM) { - off = l3proto->secure_port(tuple, maniptype == NF_NAT_MANIP_SRC - ? tuple->dst.u.all - : tuple->src.u.all); - } else if (range->flags & NF_NAT_RANGE_PROTO_RANDOM_FULLY) { - off = prandom_u32(); - } else if (range->flags & NF_NAT_RANGE_PROTO_OFFSET) { - off = (ntohs(*portptr) - ntohs(range->base_proto.all)); - } else { - off = *rover; - } - - for (i = 0; ; ++off) { - *portptr = htons(min + off % range_size); - if (++i != range_size && nf_nat_used_tuple(tuple, ct)) - continue; - if (!(range->flags & (NF_NAT_RANGE_PROTO_RANDOM_ALL| - NF_NAT_RANGE_PROTO_OFFSET))) - *rover = off; - return; - } -} -EXPORT_SYMBOL_GPL(nf_nat_l4proto_unique_tuple); - -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) -int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[], - struct nf_nat_range2 *range) -{ - if (tb[CTA_PROTONAT_PORT_MIN]) { - range->min_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]); - range->max_proto.all = range->min_proto.all; - range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; - } - if (tb[CTA_PROTONAT_PORT_MAX]) { - range->max_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]); - range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; - } - return 0; -} -EXPORT_SYMBOL_GPL(nf_nat_l4proto_nlattr_to_range); -#endif diff --git a/net/netfilter/nf_nat_proto_dccp.c b/net/netfilter/nf_nat_proto_dccp.c deleted file mode 100644 index 67ea0d83aa5a..000000000000 --- a/net/netfilter/nf_nat_proto_dccp.c +++ /dev/null @@ -1,82 +0,0 @@ -/* - * DCCP NAT protocol helper - * - * Copyright (c) 2005, 2006, 2008 Patrick McHardy <kaber@trash.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - */ - -#include <linux/kernel.h> -#include <linux/skbuff.h> -#include <linux/dccp.h> - -#include <net/netfilter/nf_conntrack.h> -#include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_l3proto.h> -#include <net/netfilter/nf_nat_l4proto.h> - -static u_int16_t dccp_port_rover; - -static void -dccp_unique_tuple(const struct nf_nat_l3proto *l3proto, - struct nf_conntrack_tuple *tuple, - const struct nf_nat_range2 *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct) -{ - nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct, - &dccp_port_rover); -} - -static bool -dccp_manip_pkt(struct sk_buff *skb, - const struct nf_nat_l3proto *l3proto, - unsigned int iphdroff, unsigned int hdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype) -{ - struct dccp_hdr *hdr; - __be16 *portptr, oldport, newport; - int hdrsize = 8; /* DCCP connection tracking guarantees this much */ - - if (skb->len >= hdroff + sizeof(struct dccp_hdr)) - hdrsize = sizeof(struct dccp_hdr); - - if (!skb_make_writable(skb, hdroff + hdrsize)) - return false; - - hdr = (struct dccp_hdr *)(skb->data + hdroff); - - if (maniptype == NF_NAT_MANIP_SRC) { - newport = tuple->src.u.dccp.port; - portptr = &hdr->dccph_sport; - } else { - newport = tuple->dst.u.dccp.port; - portptr = &hdr->dccph_dport; - } - - oldport = *portptr; - *portptr = newport; - - if (hdrsize < sizeof(*hdr)) - return true; - - l3proto->csum_update(skb, iphdroff, &hdr->dccph_checksum, - tuple, maniptype); - inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport, - false); - return true; -} - -const struct nf_nat_l4proto nf_nat_l4proto_dccp = { - .l4proto = IPPROTO_DCCP, - .manip_pkt = dccp_manip_pkt, - .in_range = nf_nat_l4proto_in_range, - .unique_tuple = dccp_unique_tuple, -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) - .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, -#endif -}; diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c deleted file mode 100644 index 1c5d9b65fbba..000000000000 --- a/net/netfilter/nf_nat_proto_sctp.c +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/types.h> -#include <linux/sctp.h> -#include <net/sctp/checksum.h> - -#include <net/netfilter/nf_nat_l4proto.h> - -static u_int16_t nf_sctp_port_rover; - -static void -sctp_unique_tuple(const struct nf_nat_l3proto *l3proto, - struct nf_conntrack_tuple *tuple, - const struct nf_nat_range2 *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct) -{ - nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct, - &nf_sctp_port_rover); -} - -static bool -sctp_manip_pkt(struct sk_buff *skb, - const struct nf_nat_l3proto *l3proto, - unsigned int iphdroff, unsigned int hdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype) -{ - struct sctphdr *hdr; - int hdrsize = 8; - - /* This could be an inner header returned in imcp packet; in such - * cases we cannot update the checksum field since it is outside - * of the 8 bytes of transport layer headers we are guaranteed. - */ - if (skb->len >= hdroff + sizeof(*hdr)) - hdrsize = sizeof(*hdr); - - if (!skb_make_writable(skb, hdroff + hdrsize)) - return false; - - hdr = (struct sctphdr *)(skb->data + hdroff); - - if (maniptype == NF_NAT_MANIP_SRC) { - /* Get rid of src port */ - hdr->source = tuple->src.u.sctp.port; - } else { - /* Get rid of dst port */ - hdr->dest = tuple->dst.u.sctp.port; - } - - if (hdrsize < sizeof(*hdr)) - return true; - - if (skb->ip_summed != CHECKSUM_PARTIAL) { - hdr->checksum = sctp_compute_cksum(skb, hdroff); - skb->ip_summed = CHECKSUM_NONE; - } - - return true; -} - -const struct nf_nat_l4proto nf_nat_l4proto_sctp = { - .l4proto = IPPROTO_SCTP, - .manip_pkt = sctp_manip_pkt, - .in_range = nf_nat_l4proto_in_range, - .unique_tuple = sctp_unique_tuple, -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) - .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, -#endif -}; diff --git a/net/netfilter/nf_nat_proto_tcp.c b/net/netfilter/nf_nat_proto_tcp.c deleted file mode 100644 index f15fcd475f98..000000000000 --- a/net/netfilter/nf_nat_proto_tcp.c +++ /dev/null @@ -1,85 +0,0 @@ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/types.h> -#include <linux/init.h> -#include <linux/export.h> -#include <linux/tcp.h> - -#include <linux/netfilter.h> -#include <linux/netfilter/nfnetlink_conntrack.h> -#include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_l3proto.h> -#include <net/netfilter/nf_nat_l4proto.h> -#include <net/netfilter/nf_nat_core.h> - -static u16 tcp_port_rover; - -static void -tcp_unique_tuple(const struct nf_nat_l3proto *l3proto, - struct nf_conntrack_tuple *tuple, - const struct nf_nat_range2 *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct) -{ - nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct, - &tcp_port_rover); -} - -static bool -tcp_manip_pkt(struct sk_buff *skb, - const struct nf_nat_l3proto *l3proto, - unsigned int iphdroff, unsigned int hdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype) -{ - struct tcphdr *hdr; - __be16 *portptr, newport, oldport; - int hdrsize = 8; /* TCP connection tracking guarantees this much */ - - /* this could be a inner header returned in icmp packet; in such - cases we cannot update the checksum field since it is outside of - the 8 bytes of transport layer headers we are guaranteed */ - if (skb->len >= hdroff + sizeof(struct tcphdr)) - hdrsize = sizeof(struct tcphdr); - - if (!skb_make_writable(skb, hdroff + hdrsize)) - return false; - - hdr = (struct tcphdr *)(skb->data + hdroff); - - if (maniptype == NF_NAT_MANIP_SRC) { - /* Get rid of src port */ - newport = tuple->src.u.tcp.port; - portptr = &hdr->source; - } else { - /* Get rid of dst port */ - newport = tuple->dst.u.tcp.port; - portptr = &hdr->dest; - } - - oldport = *portptr; - *portptr = newport; - - if (hdrsize < sizeof(*hdr)) - return true; - - l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype); - inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, false); - return true; -} - -const struct nf_nat_l4proto nf_nat_l4proto_tcp = { - .l4proto = IPPROTO_TCP, - .manip_pkt = tcp_manip_pkt, - .in_range = nf_nat_l4proto_in_range, - .unique_tuple = tcp_unique_tuple, -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) - .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, -#endif -}; diff --git a/net/netfilter/nf_nat_proto_udp.c b/net/netfilter/nf_nat_proto_udp.c deleted file mode 100644 index 5790f70a83b2..000000000000 --- a/net/netfilter/nf_nat_proto_udp.c +++ /dev/null @@ -1,130 +0,0 @@ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/types.h> -#include <linux/export.h> -#include <linux/init.h> -#include <linux/udp.h> - -#include <linux/netfilter.h> -#include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_core.h> -#include <net/netfilter/nf_nat_l3proto.h> -#include <net/netfilter/nf_nat_l4proto.h> - -static u16 udp_port_rover; - -static void -udp_unique_tuple(const struct nf_nat_l3proto *l3proto, - struct nf_conntrack_tuple *tuple, - const struct nf_nat_range2 *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct) -{ - nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct, - &udp_port_rover); -} - -static void -__udp_manip_pkt(struct sk_buff *skb, - const struct nf_nat_l3proto *l3proto, - unsigned int iphdroff, struct udphdr *hdr, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype, bool do_csum) -{ - __be16 *portptr, newport; - - if (maniptype == NF_NAT_MANIP_SRC) { - /* Get rid of src port */ - newport = tuple->src.u.udp.port; - portptr = &hdr->source; - } else { - /* Get rid of dst port */ - newport = tuple->dst.u.udp.port; - portptr = &hdr->dest; - } - if (do_csum) { - l3proto->csum_update(skb, iphdroff, &hdr->check, - tuple, maniptype); - inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, - false); - if (!hdr->check) - hdr->check = CSUM_MANGLED_0; - } - *portptr = newport; -} - -static bool udp_manip_pkt(struct sk_buff *skb, - const struct nf_nat_l3proto *l3proto, - unsigned int iphdroff, unsigned int hdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype) -{ - struct udphdr *hdr; - bool do_csum; - - if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) - return false; - - hdr = (struct udphdr *)(skb->data + hdroff); - do_csum = hdr->check || skb->ip_summed == CHECKSUM_PARTIAL; - - __udp_manip_pkt(skb, l3proto, iphdroff, hdr, tuple, maniptype, do_csum); - return true; -} - -#ifdef CONFIG_NF_NAT_PROTO_UDPLITE -static u16 udplite_port_rover; - -static bool udplite_manip_pkt(struct sk_buff *skb, - const struct nf_nat_l3proto *l3proto, - unsigned int iphdroff, unsigned int hdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype) -{ - struct udphdr *hdr; - - if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) - return false; - - hdr = (struct udphdr *)(skb->data + hdroff); - __udp_manip_pkt(skb, l3proto, iphdroff, hdr, tuple, maniptype, true); - return true; -} - -static void -udplite_unique_tuple(const struct nf_nat_l3proto *l3proto, - struct nf_conntrack_tuple *tuple, - const struct nf_nat_range2 *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct) -{ - nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct, - &udplite_port_rover); -} - -const struct nf_nat_l4proto nf_nat_l4proto_udplite = { - .l4proto = IPPROTO_UDPLITE, - .manip_pkt = udplite_manip_pkt, - .in_range = nf_nat_l4proto_in_range, - .unique_tuple = udplite_unique_tuple, -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) - .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, -#endif -}; -#endif /* CONFIG_NF_NAT_PROTO_UDPLITE */ - -const struct nf_nat_l4proto nf_nat_l4proto_udp = { - .l4proto = IPPROTO_UDP, - .manip_pkt = udp_manip_pkt, - .in_range = nf_nat_l4proto_in_range, - .unique_tuple = udp_unique_tuple, -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) - .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, -#endif -}; diff --git a/net/netfilter/nf_nat_proto_unknown.c b/net/netfilter/nf_nat_proto_unknown.c deleted file mode 100644 index c5db3e251232..000000000000 --- a/net/netfilter/nf_nat_proto_unknown.c +++ /dev/null @@ -1,54 +0,0 @@ -/* The "unknown" protocol. This is what is used for protocols we - * don't understand. It's returned by ip_ct_find_proto(). - */ - -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/types.h> -#include <linux/init.h> - -#include <linux/netfilter.h> -#include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_l4proto.h> - -static bool unknown_in_range(const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type manip_type, - const union nf_conntrack_man_proto *min, - const union nf_conntrack_man_proto *max) -{ - return true; -} - -static void unknown_unique_tuple(const struct nf_nat_l3proto *l3proto, - struct nf_conntrack_tuple *tuple, - const struct nf_nat_range2 *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct) -{ - /* Sorry: we can't help you; if it's not unique, we can't frob - * anything. - */ - return; -} - -static bool -unknown_manip_pkt(struct sk_buff *skb, - const struct nf_nat_l3proto *l3proto, - unsigned int iphdroff, unsigned int hdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype) -{ - return true; -} - -const struct nf_nat_l4proto nf_nat_l4proto_unknown = { - .manip_pkt = unknown_manip_pkt, - .in_range = unknown_in_range, - .unique_tuple = unknown_unique_tuple, -}; diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c index 1f3086074981..aa1be643d7a0 100644 --- a/net/netfilter/nf_nat_sip.c +++ b/net/netfilter/nf_nat_sip.c @@ -18,6 +18,7 @@ #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_helper.h> +#include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_expect.h> #include <net/netfilter/nf_conntrack_seqadj.h> @@ -316,6 +317,9 @@ static void nf_nat_sip_seq_adjust(struct sk_buff *skb, unsigned int protoff, static void nf_nat_sip_expected(struct nf_conn *ct, struct nf_conntrack_expect *exp) { + struct nf_conn_help *help = nfct_help(ct->master); + struct nf_conntrack_expect *pair_exp; + int range_set_for_snat = 0; struct nf_nat_range2 range; /* This must be a fresh one. */ @@ -327,15 +331,42 @@ static void nf_nat_sip_expected(struct nf_conn *ct, range.min_addr = range.max_addr = exp->saved_addr; nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); - /* Change src to where master sends to, but only if the connection - * actually came from the same source. */ - if (nf_inet_addr_cmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3, + /* Do media streams SRC manip according with the parameters + * found in the paired expectation. + */ + if (exp->class != SIP_EXPECT_SIGNALLING) { + spin_lock_bh(&nf_conntrack_expect_lock); + hlist_for_each_entry(pair_exp, &help->expectations, lnode) { + if (pair_exp->tuple.src.l3num == nf_ct_l3num(ct) && + pair_exp->tuple.dst.protonum == ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum && + nf_inet_addr_cmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3, &pair_exp->saved_addr) && + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.all == pair_exp->saved_proto.all) { + range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); + range.min_proto.all = range.max_proto.all = pair_exp->tuple.dst.u.all; + range.min_addr = range.max_addr = pair_exp->tuple.dst.u3; + range_set_for_snat = 1; + break; + } + } + spin_unlock_bh(&nf_conntrack_expect_lock); + } + + /* When no paired expectation has been found, change src to + * where master sends to, but only if the connection actually came + * from the same source. + */ + if (!range_set_for_snat && + nf_inet_addr_cmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3, &ct->master->tuplehash[exp->dir].tuple.src.u3)) { range.flags = NF_NAT_RANGE_MAP_IPS; range.min_addr = range.max_addr = ct->master->tuplehash[!exp->dir].tuple.dst.u3; - nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); + range_set_for_snat = 1; } + + /* Perform SRC manip. */ + if (range_set_for_snat) + nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); } static unsigned int nf_nat_sip_expect(struct sk_buff *skb, unsigned int protoff, diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index d67a96a25a68..a36a77bae1d6 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -46,6 +46,24 @@ void nf_unregister_queue_handler(struct net *net) } EXPORT_SYMBOL(nf_unregister_queue_handler); +static void nf_queue_entry_release_br_nf_refs(struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + + if (nf_bridge) { + struct net_device *physdev; + + physdev = nf_bridge_get_physindev(skb); + if (physdev) + dev_put(physdev); + physdev = nf_bridge_get_physoutdev(skb); + if (physdev) + dev_put(physdev); + } +#endif +} + void nf_queue_entry_release_refs(struct nf_queue_entry *entry) { struct nf_hook_state *state = &entry->state; @@ -57,20 +75,28 @@ void nf_queue_entry_release_refs(struct nf_queue_entry *entry) dev_put(state->out); if (state->sk) sock_put(state->sk); + + nf_queue_entry_release_br_nf_refs(entry->skb); +} +EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs); + +static void nf_queue_entry_get_br_nf_refs(struct sk_buff *skb) +{ #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - if (entry->skb->nf_bridge) { + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + + if (nf_bridge) { struct net_device *physdev; - physdev = nf_bridge_get_physindev(entry->skb); + physdev = nf_bridge_get_physindev(skb); if (physdev) - dev_put(physdev); - physdev = nf_bridge_get_physoutdev(entry->skb); + dev_hold(physdev); + physdev = nf_bridge_get_physoutdev(skb); if (physdev) - dev_put(physdev); + dev_hold(physdev); } #endif } -EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs); /* Bump dev refs so they don't vanish while packet is out */ void nf_queue_entry_get_refs(struct nf_queue_entry *entry) @@ -83,18 +109,8 @@ void nf_queue_entry_get_refs(struct nf_queue_entry *entry) dev_hold(state->out); if (state->sk) sock_hold(state->sk); -#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - if (entry->skb->nf_bridge) { - struct net_device *physdev; - physdev = nf_bridge_get_physindev(entry->skb); - if (physdev) - dev_hold(physdev); - physdev = nf_bridge_get_physoutdev(entry->skb); - if (physdev) - dev_hold(physdev); - } -#endif + nf_queue_entry_get_br_nf_refs(entry->skb); } EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 42487d01a3ed..fec814dace5a 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1216,7 +1216,8 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, if (nla_put_string(skb, NFTA_CHAIN_TYPE, basechain->type->name)) goto nla_put_failure; - if (basechain->stats && nft_dump_stats(skb, basechain->stats)) + if (rcu_access_pointer(basechain->stats) && + nft_dump_stats(skb, rcu_dereference(basechain->stats))) goto nla_put_failure; } @@ -1392,7 +1393,8 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr) return newstats; } -static void nft_chain_stats_replace(struct nft_base_chain *chain, +static void nft_chain_stats_replace(struct net *net, + struct nft_base_chain *chain, struct nft_stats __percpu *newstats) { struct nft_stats __percpu *oldstats; @@ -1400,8 +1402,9 @@ static void nft_chain_stats_replace(struct nft_base_chain *chain, if (newstats == NULL) return; - if (chain->stats) { - oldstats = nfnl_dereference(chain->stats, NFNL_SUBSYS_NFTABLES); + if (rcu_access_pointer(chain->stats)) { + oldstats = rcu_dereference_protected(chain->stats, + lockdep_commit_lock_is_held(net)); rcu_assign_pointer(chain->stats, newstats); synchronize_rcu(); free_percpu(oldstats); @@ -1439,9 +1442,10 @@ static void nf_tables_chain_destroy(struct nft_ctx *ctx) struct nft_base_chain *basechain = nft_base_chain(chain); module_put(basechain->type->owner); - free_percpu(basechain->stats); - if (basechain->stats) + if (rcu_access_pointer(basechain->stats)) { static_branch_dec(&nft_counters_enabled); + free_percpu(rcu_dereference_raw(basechain->stats)); + } kfree(chain->name); kfree(basechain); } else { @@ -1590,7 +1594,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, kfree(basechain); return PTR_ERR(stats); } - basechain->stats = stats; + rcu_assign_pointer(basechain->stats, stats); static_branch_inc(&nft_counters_enabled); } @@ -2291,15 +2295,52 @@ struct nft_rule_dump_ctx { char *chain; }; +static int __nf_tables_dump_rules(struct sk_buff *skb, + unsigned int *idx, + struct netlink_callback *cb, + const struct nft_table *table, + const struct nft_chain *chain) +{ + struct net *net = sock_net(skb->sk); + unsigned int s_idx = cb->args[0]; + const struct nft_rule *rule; + int rc = 1; + + list_for_each_entry_rcu(rule, &chain->rules, list) { + if (!nft_is_active(net, rule)) + goto cont; + if (*idx < s_idx) + goto cont; + if (*idx > s_idx) { + memset(&cb->args[1], 0, + sizeof(cb->args) - sizeof(cb->args[0])); + } + if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFT_MSG_NEWRULE, + NLM_F_MULTI | NLM_F_APPEND, + table->family, + table, chain, rule) < 0) + goto out_unfinished; + + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); +cont: + (*idx)++; + } + rc = 0; +out_unfinished: + cb->args[0] = *idx; + return rc; +} + static int nf_tables_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) { const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); const struct nft_rule_dump_ctx *ctx = cb->data; - const struct nft_table *table; + struct nft_table *table; const struct nft_chain *chain; - const struct nft_rule *rule; - unsigned int idx = 0, s_idx = cb->args[0]; + unsigned int idx = 0; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; @@ -2313,37 +2354,34 @@ static int nf_tables_dump_rules(struct sk_buff *skb, if (ctx && ctx->table && strcmp(ctx->table, table->name) != 0) continue; - list_for_each_entry_rcu(chain, &table->chains, list) { - if (ctx && ctx->chain && - strcmp(ctx->chain, chain->name) != 0) - continue; + if (ctx && ctx->chain) { + struct rhlist_head *list, *tmp; - list_for_each_entry_rcu(rule, &chain->rules, list) { - if (!nft_is_active(net, rule)) - goto cont; - if (idx < s_idx) - goto cont; - if (idx > s_idx) - memset(&cb->args[1], 0, - sizeof(cb->args) - sizeof(cb->args[0])); - if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - NFT_MSG_NEWRULE, - NLM_F_MULTI | NLM_F_APPEND, - table->family, - table, chain, rule) < 0) - goto done; - - nl_dump_check_consistent(cb, nlmsg_hdr(skb)); -cont: - idx++; + list = rhltable_lookup(&table->chains_ht, ctx->chain, + nft_chain_ht_params); + if (!list) + goto done; + + rhl_for_each_entry_rcu(chain, tmp, list, rhlhead) { + if (!nft_is_active(net, chain)) + continue; + __nf_tables_dump_rules(skb, &idx, + cb, table, chain); + break; } + goto done; } + + list_for_each_entry_rcu(chain, &table->chains, list) { + if (__nf_tables_dump_rules(skb, &idx, cb, table, chain)) + goto done; + } + + if (ctx && ctx->table) + break; } done: rcu_read_unlock(); - - cb->args[0] = idx; return skb->len; } @@ -2457,7 +2495,7 @@ err: static void nf_tables_rule_destroy(const struct nft_ctx *ctx, struct nft_rule *rule) { - struct nft_expr *expr; + struct nft_expr *expr, *next; /* * Careful: some expressions might not be initialized in case this @@ -2465,8 +2503,9 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx, */ expr = nft_expr_first(rule); while (expr != nft_expr_last(rule) && expr->ops) { + next = nft_expr_next(expr); nf_tables_expr_destroy(ctx, expr); - expr = nft_expr_next(expr); + expr = next; } kfree(rule); } @@ -2589,17 +2628,14 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, if (chain->use == UINT_MAX) return -EOVERFLOW; - } - - if (nla[NFTA_RULE_POSITION]) { - if (!(nlh->nlmsg_flags & NLM_F_CREATE)) - return -EOPNOTSUPP; - pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION])); - old_rule = __nft_rule_lookup(chain, pos_handle); - if (IS_ERR(old_rule)) { - NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION]); - return PTR_ERR(old_rule); + if (nla[NFTA_RULE_POSITION]) { + pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION])); + old_rule = __nft_rule_lookup(chain, pos_handle); + if (IS_ERR(old_rule)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION]); + return PTR_ERR(old_rule); + } } } @@ -2669,21 +2705,14 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, } if (nlh->nlmsg_flags & NLM_F_REPLACE) { - if (!nft_is_active_next(net, old_rule)) { - err = -ENOENT; - goto err2; - } - trans = nft_trans_rule_add(&ctx, NFT_MSG_DELRULE, - old_rule); + trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule); if (trans == NULL) { err = -ENOMEM; goto err2; } - nft_deactivate_next(net, old_rule); - chain->use--; - - if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) { - err = -ENOMEM; + err = nft_delrule(&ctx, old_rule); + if (err < 0) { + nft_trans_destroy(trans); goto err2; } @@ -6189,7 +6218,8 @@ static void nft_chain_commit_update(struct nft_trans *trans) return; basechain = nft_base_chain(trans->ctx.chain); - nft_chain_stats_replace(basechain, nft_trans_chain_stats(trans)); + nft_chain_stats_replace(trans->ctx.net, basechain, + nft_trans_chain_stats(trans)); switch (nft_trans_chain_policy(trans)) { case NF_DROP: @@ -6324,7 +6354,7 @@ static void nf_tables_commit_chain_free_rules_old(struct nft_rule **rules) call_rcu(&old->h, __nf_tables_commit_chain_free_rules_old); } -static void nf_tables_commit_chain_active(struct net *net, struct nft_chain *chain) +static void nf_tables_commit_chain(struct net *net, struct nft_chain *chain) { struct nft_rule **g0, **g1; bool next_genbit; @@ -6441,11 +6471,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) /* step 2. Make rules_gen_X visible to packet path */ list_for_each_entry(table, &net->nft.tables, list) { - list_for_each_entry(chain, &table->chains, list) { - if (!nft_is_active_next(net, chain)) - continue; - nf_tables_commit_chain_active(net, chain); - } + list_for_each_entry(chain, &table->chains, list) + nf_tables_commit_chain(net, chain); } /* diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 3fbce3b9c5ec..a50500232b0a 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -101,7 +101,7 @@ static noinline void nft_update_chain_stats(const struct nft_chain *chain, struct nft_stats *stats; base_chain = nft_base_chain(chain); - if (!base_chain->stats) + if (!rcu_access_pointer(base_chain->stats)) return; local_bh_disable(); diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index e7a50af1b3d6..109b0d27345a 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -382,7 +382,8 @@ err: static int cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid, u32 seq, u32 type, int event, u16 l3num, - const struct nf_conntrack_l4proto *l4proto) + const struct nf_conntrack_l4proto *l4proto, + const unsigned int *timeouts) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; @@ -408,7 +409,7 @@ cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid, if (!nest_parms) goto nla_put_failure; - ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, NULL); + ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, timeouts); if (ret < 0) goto nla_put_failure; @@ -430,6 +431,7 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl, struct netlink_ext_ack *extack) { const struct nf_conntrack_l4proto *l4proto; + unsigned int *timeouts = NULL; struct sk_buff *skb2; int ret, err; __u16 l3num; @@ -442,12 +444,55 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl, l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); l4proto = nf_ct_l4proto_find_get(l4num); - /* This protocol is not supported, skip. */ - if (l4proto->l4proto != l4num) { - err = -EOPNOTSUPP; + err = -EOPNOTSUPP; + if (l4proto->l4proto != l4num) goto err; + + switch (l4proto->l4proto) { + case IPPROTO_ICMP: + timeouts = &nf_icmp_pernet(net)->timeout; + break; + case IPPROTO_TCP: + timeouts = nf_tcp_pernet(net)->timeouts; + break; + case IPPROTO_UDP: /* fallthrough */ + case IPPROTO_UDPLITE: + timeouts = nf_udp_pernet(net)->timeouts; + break; + case IPPROTO_DCCP: +#ifdef CONFIG_NF_CT_PROTO_DCCP + timeouts = nf_dccp_pernet(net)->dccp_timeout; +#endif + break; + case IPPROTO_ICMPV6: + timeouts = &nf_icmpv6_pernet(net)->timeout; + break; + case IPPROTO_SCTP: +#ifdef CONFIG_NF_CT_PROTO_SCTP + timeouts = nf_sctp_pernet(net)->timeouts; +#endif + break; + case IPPROTO_GRE: +#ifdef CONFIG_NF_CT_PROTO_GRE + if (l4proto->net_id) { + struct netns_proto_gre *net_gre; + + net_gre = net_generic(net, *l4proto->net_id); + timeouts = net_gre->gre_timeouts; + } +#endif + break; + case 255: + timeouts = &nf_generic_pernet(net)->timeout; + break; + default: + WARN_ONCE(1, "Missing timeouts for proto %d", l4proto->l4proto); + break; } + if (!timeouts) + goto err; + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (skb2 == NULL) { err = -ENOMEM; @@ -458,8 +503,7 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl, nlh->nlmsg_seq, NFNL_MSG_TYPE(nlh->nlmsg_type), IPCTNL_MSG_TIMEOUT_DEFAULT_SET, - l3num, - l4proto); + l3num, l4proto, timeouts); if (ret <= 0) { kfree_skb(skb2); err = -ENOMEM; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 332c69d27b47..b1f9c5303f02 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -148,7 +148,7 @@ static void instance_put(struct nfulnl_instance *inst) { if (inst && refcount_dec_and_test(&inst->use)) - call_rcu_bh(&inst->rcu, nfulnl_instance_free_rcu); + call_rcu(&inst->rcu, nfulnl_instance_free_rcu); } static void nfulnl_timer(struct timer_list *t); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 43041f087eb3..0dcc3592d053 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -727,13 +727,13 @@ nf_queue_entry_dup(struct nf_queue_entry *e) */ static void nf_bridge_adjust_skb_data(struct sk_buff *skb) { - if (skb->nf_bridge) + if (nf_bridge_info_get(skb)) __skb_push(skb, skb->network_header - skb->mac_header); } static void nf_bridge_adjust_segmented_data(struct sk_buff *skb) { - if (skb->nf_bridge) + if (nf_bridge_info_get(skb)) __skb_pull(skb, skb->network_header - skb->mac_header); } #else @@ -904,23 +904,22 @@ nfqnl_set_mode(struct nfqnl_instance *queue, static int dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex) { +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + int physinif, physoutif; + + physinif = nf_bridge_get_physinif(entry->skb); + physoutif = nf_bridge_get_physoutif(entry->skb); + + if (physinif == ifindex || physoutif == ifindex) + return 1; +#endif if (entry->state.in) if (entry->state.in->ifindex == ifindex) return 1; if (entry->state.out) if (entry->state.out->ifindex == ifindex) return 1; -#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - if (entry->skb->nf_bridge) { - int physinif, physoutif; - physinif = nf_bridge_get_physinif(entry->skb); - physoutif = nf_bridge_get_physoutif(entry->skb); - - if (physinif == ifindex || physoutif == ifindex) - return 1; - } -#endif return 0; } @@ -1148,8 +1147,9 @@ static int nfqa_parse_bridge(struct nf_queue_entry *entry, if (!tb[NFQA_VLAN_TCI] || !tb[NFQA_VLAN_PROTO]) return -EINVAL; - entry->skb->vlan_tci = ntohs(nla_get_be16(tb[NFQA_VLAN_TCI])); - entry->skb->vlan_proto = nla_get_be16(tb[NFQA_VLAN_PROTO]); + __vlan_hwaccel_put_tag(entry->skb, + nla_get_be16(tb[NFQA_VLAN_PROTO]), + ntohs(nla_get_be16(tb[NFQA_VLAN_TCI]))); } if (nfqa[NFQA_L2HDR]) { diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 768292eac2a4..7334e0b80a5e 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -54,9 +54,11 @@ static bool nft_xt_put(struct nft_xt *xt) return false; } -static int nft_compat_chain_validate_dependency(const char *tablename, - const struct nft_chain *chain) +static int nft_compat_chain_validate_dependency(const struct nft_ctx *ctx, + const char *tablename) { + enum nft_chain_types type = NFT_CHAIN_T_DEFAULT; + const struct nft_chain *chain = ctx->chain; const struct nft_base_chain *basechain; if (!tablename || @@ -64,9 +66,12 @@ static int nft_compat_chain_validate_dependency(const char *tablename, return 0; basechain = nft_base_chain(chain); - if (strcmp(tablename, "nat") == 0 && - basechain->type->type != NFT_CHAIN_T_NAT) - return -EINVAL; + if (strcmp(tablename, "nat") == 0) { + if (ctx->family != NFPROTO_BRIDGE) + type = NFT_CHAIN_T_NAT; + if (basechain->type->type != type) + return -EINVAL; + } return 0; } @@ -342,8 +347,7 @@ static int nft_target_validate(const struct nft_ctx *ctx, if (target->hooks && !(hook_mask & target->hooks)) return -EINVAL; - ret = nft_compat_chain_validate_dependency(target->table, - ctx->chain); + ret = nft_compat_chain_validate_dependency(ctx, target->table); if (ret < 0) return ret; } @@ -516,6 +520,7 @@ __nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr, void *info) { struct xt_match *match = expr->ops->data; + struct module *me = match->me; struct xt_mtdtor_param par; par.net = ctx->net; @@ -526,7 +531,7 @@ __nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr, par.match->destroy(&par); if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops))) - module_put(match->me); + module_put(me); } static void @@ -590,8 +595,7 @@ static int nft_match_validate(const struct nft_ctx *ctx, if (match->hooks && !(hook_mask & match->hooks)) return -EINVAL; - ret = nft_compat_chain_validate_dependency(match->table, - ctx->chain); + ret = nft_compat_chain_validate_dependency(ctx, match->table); if (ret < 0) return ret; } diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index e82d9a966c45..974525eb92df 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -214,7 +214,9 @@ static int __init nft_flow_offload_module_init(void) { int err; - register_netdevice_notifier(&flow_offload_netdev_notifier); + err = register_netdevice_notifier(&flow_offload_netdev_notifier); + if (err) + goto err; err = nft_register_expr(&nft_flow_offload_type); if (err < 0) @@ -224,6 +226,7 @@ static int __init nft_flow_offload_module_init(void) register_expr: unregister_netdevice_notifier(&flow_offload_netdev_notifier); +err: return err; } diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 6180626c3f80..6df486c5ebd3 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -229,7 +229,7 @@ void nft_meta_get_eval(const struct nft_expr *expr, } #ifdef CONFIG_XFRM case NFT_META_SECPATH: - nft_reg_store8(dest, !!skb->sp); + nft_reg_store8(dest, secpath_exists(skb)); break; #endif #ifdef CONFIG_NF_TABLES_BRIDGE diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c index 649d1700ec5b..3cc1b3dc3c3c 100644 --- a/net/netfilter/nft_numgen.c +++ b/net/netfilter/nft_numgen.c @@ -24,7 +24,6 @@ struct nft_ng_inc { u32 modulus; atomic_t counter; u32 offset; - struct nft_set *map; }; static u32 nft_ng_inc_gen(struct nft_ng_inc *priv) @@ -48,34 +47,11 @@ static void nft_ng_inc_eval(const struct nft_expr *expr, regs->data[priv->dreg] = nft_ng_inc_gen(priv); } -static void nft_ng_inc_map_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) -{ - struct nft_ng_inc *priv = nft_expr_priv(expr); - const struct nft_set *map = priv->map; - const struct nft_set_ext *ext; - u32 result; - bool found; - - result = nft_ng_inc_gen(priv); - found = map->ops->lookup(nft_net(pkt), map, &result, &ext); - - if (!found) - return; - - nft_data_copy(®s->data[priv->dreg], - nft_set_ext_data(ext), map->dlen); -} - static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = { [NFTA_NG_DREG] = { .type = NLA_U32 }, [NFTA_NG_MODULUS] = { .type = NLA_U32 }, [NFTA_NG_TYPE] = { .type = NLA_U32 }, [NFTA_NG_OFFSET] = { .type = NLA_U32 }, - [NFTA_NG_SET_NAME] = { .type = NLA_STRING, - .len = NFT_SET_MAXNAMELEN - 1 }, - [NFTA_NG_SET_ID] = { .type = NLA_U32 }, }; static int nft_ng_inc_init(const struct nft_ctx *ctx, @@ -101,22 +77,6 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx, NFT_DATA_VALUE, sizeof(u32)); } -static int nft_ng_inc_map_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) -{ - struct nft_ng_inc *priv = nft_expr_priv(expr); - u8 genmask = nft_genmask_next(ctx->net); - - nft_ng_inc_init(ctx, expr, tb); - - priv->map = nft_set_lookup_global(ctx->net, ctx->table, - tb[NFTA_NG_SET_NAME], - tb[NFTA_NG_SET_ID], genmask); - - return PTR_ERR_OR_ZERO(priv->map); -} - static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg, u32 modulus, enum nft_ng_types type, u32 offset) { @@ -143,27 +103,10 @@ static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr) priv->offset); } -static int nft_ng_inc_map_dump(struct sk_buff *skb, - const struct nft_expr *expr) -{ - const struct nft_ng_inc *priv = nft_expr_priv(expr); - - if (nft_ng_dump(skb, priv->dreg, priv->modulus, - NFT_NG_INCREMENTAL, priv->offset) || - nla_put_string(skb, NFTA_NG_SET_NAME, priv->map->name)) - goto nla_put_failure; - - return 0; - -nla_put_failure: - return -1; -} - struct nft_ng_random { enum nft_registers dreg:8; u32 modulus; u32 offset; - struct nft_set *map; }; static u32 nft_ng_random_gen(struct nft_ng_random *priv) @@ -183,25 +126,6 @@ static void nft_ng_random_eval(const struct nft_expr *expr, regs->data[priv->dreg] = nft_ng_random_gen(priv); } -static void nft_ng_random_map_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) -{ - struct nft_ng_random *priv = nft_expr_priv(expr); - const struct nft_set *map = priv->map; - const struct nft_set_ext *ext; - u32 result; - bool found; - - result = nft_ng_random_gen(priv); - found = map->ops->lookup(nft_net(pkt), map, &result, &ext); - if (!found) - return; - - nft_data_copy(®s->data[priv->dreg], - nft_set_ext_data(ext), map->dlen); -} - static int nft_ng_random_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) @@ -226,21 +150,6 @@ static int nft_ng_random_init(const struct nft_ctx *ctx, NFT_DATA_VALUE, sizeof(u32)); } -static int nft_ng_random_map_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) -{ - struct nft_ng_random *priv = nft_expr_priv(expr); - u8 genmask = nft_genmask_next(ctx->net); - - nft_ng_random_init(ctx, expr, tb); - priv->map = nft_set_lookup_global(ctx->net, ctx->table, - tb[NFTA_NG_SET_NAME], - tb[NFTA_NG_SET_ID], genmask); - - return PTR_ERR_OR_ZERO(priv->map); -} - static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_ng_random *priv = nft_expr_priv(expr); @@ -249,22 +158,6 @@ static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr) priv->offset); } -static int nft_ng_random_map_dump(struct sk_buff *skb, - const struct nft_expr *expr) -{ - const struct nft_ng_random *priv = nft_expr_priv(expr); - - if (nft_ng_dump(skb, priv->dreg, priv->modulus, - NFT_NG_RANDOM, priv->offset) || - nla_put_string(skb, NFTA_NG_SET_NAME, priv->map->name)) - goto nla_put_failure; - - return 0; - -nla_put_failure: - return -1; -} - static struct nft_expr_type nft_ng_type; static const struct nft_expr_ops nft_ng_inc_ops = { .type = &nft_ng_type, @@ -274,14 +167,6 @@ static const struct nft_expr_ops nft_ng_inc_ops = { .dump = nft_ng_inc_dump, }; -static const struct nft_expr_ops nft_ng_inc_map_ops = { - .type = &nft_ng_type, - .size = NFT_EXPR_SIZE(sizeof(struct nft_ng_inc)), - .eval = nft_ng_inc_map_eval, - .init = nft_ng_inc_map_init, - .dump = nft_ng_inc_map_dump, -}; - static const struct nft_expr_ops nft_ng_random_ops = { .type = &nft_ng_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_ng_random)), @@ -290,14 +175,6 @@ static const struct nft_expr_ops nft_ng_random_ops = { .dump = nft_ng_random_dump, }; -static const struct nft_expr_ops nft_ng_random_map_ops = { - .type = &nft_ng_type, - .size = NFT_EXPR_SIZE(sizeof(struct nft_ng_random)), - .eval = nft_ng_random_map_eval, - .init = nft_ng_random_map_init, - .dump = nft_ng_random_map_dump, -}; - static const struct nft_expr_ops * nft_ng_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { @@ -312,12 +189,8 @@ nft_ng_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) switch (type) { case NFT_NG_INCREMENTAL: - if (tb[NFTA_NG_SET_NAME]) - return &nft_ng_inc_map_ops; return &nft_ng_inc_ops; case NFT_NG_RANDOM: - if (tb[NFTA_NG_SET_NAME]) - return &nft_ng_random_map_ops; return &nft_ng_random_ops; } diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c index ca5e5d8c5ef8..b13618c764ec 100644 --- a/net/netfilter/nft_osf.c +++ b/net/netfilter/nft_osf.c @@ -50,7 +50,7 @@ static int nft_osf_init(const struct nft_ctx *ctx, int err; u8 ttl; - if (nla_get_u8(tb[NFTA_OSF_TTL])) { + if (tb[NFTA_OSF_TTL]) { ttl = nla_get_u8(tb[NFTA_OSF_TTL]); if (ttl > 2) return -EINVAL; diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c index 5322609f7662..b08865ec5ed3 100644 --- a/net/netfilter/nft_xfrm.c +++ b/net/netfilter/nft_xfrm.c @@ -161,7 +161,7 @@ static void nft_xfrm_get_eval_in(const struct nft_xfrm *priv, struct nft_regs *regs, const struct nft_pktinfo *pkt) { - const struct sec_path *sp = pkt->skb->sp; + const struct sec_path *sp = skb_sec_path(pkt->skb); const struct xfrm_state *state; if (sp == NULL || sp->len <= priv->spnum) { diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index c6acfc2d9c84..eb4cbd244c3d 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -114,6 +114,22 @@ static void idletimer_tg_expired(struct timer_list *t) schedule_work(&timer->work); } +static int idletimer_check_sysfs_name(const char *name, unsigned int size) +{ + int ret; + + ret = xt_check_proc_name(name, size); + if (ret < 0) + return ret; + + if (!strcmp(name, "power") || + !strcmp(name, "subsystem") || + !strcmp(name, "uevent")) + return -EINVAL; + + return 0; +} + static int idletimer_tg_create(struct idletimer_tg_info *info) { int ret; @@ -124,6 +140,10 @@ static int idletimer_tg_create(struct idletimer_tg_info *info) goto out; } + ret = idletimer_check_sysfs_name(info->label, sizeof(info->label)); + if (ret < 0) + goto out_free_timer; + sysfs_attr_init(&info->timer->attr.attr); info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL); if (!info->timer->attr.attr.name) { diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c index dec843cadf46..9e05c86ba5c4 100644 --- a/net/netfilter/xt_RATEEST.c +++ b/net/netfilter/xt_RATEEST.c @@ -201,18 +201,8 @@ static __net_init int xt_rateest_net_init(struct net *net) return 0; } -static void __net_exit xt_rateest_net_exit(struct net *net) -{ - struct xt_rateest_net *xn = net_generic(net, xt_rateest_id); - int i; - - for (i = 0; i < ARRAY_SIZE(xn->hash); i++) - WARN_ON_ONCE(!hlist_empty(&xn->hash[i])); -} - static struct pernet_operations xt_rateest_net_ops = { .init = xt_rateest_net_init, - .exit = xt_rateest_net_exit, .id = &xt_rateest_id, .size = sizeof(struct xt_rateest_net), }; diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 3e7d259e5d8d..28e27a32d9b9 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -260,7 +260,7 @@ static inline void dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent) { hlist_del_rcu(&ent->node); - call_rcu_bh(&ent->rcu, dsthash_free_rcu); + call_rcu(&ent->rcu, dsthash_free_rcu); ht->count--; } static void htable_gc(struct work_struct *work); @@ -295,9 +295,10 @@ static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg, /* copy match config into hashtable config */ ret = cfg_copy(&hinfo->cfg, (void *)cfg, 3); - - if (ret) + if (ret) { + vfree(hinfo); return ret; + } hinfo->cfg.size = size; if (hinfo->cfg.max == 0) @@ -814,7 +815,6 @@ hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) int ret; ret = cfg_copy(&cfg, (void *)&info->cfg, 1); - if (ret) return ret; @@ -830,7 +830,6 @@ hashlimit_mt_v2(const struct sk_buff *skb, struct xt_action_param *par) int ret; ret = cfg_copy(&cfg, (void *)&info->cfg, 2); - if (ret) return ret; @@ -921,7 +920,6 @@ static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par) return ret; ret = cfg_copy(&cfg, (void *)&info->cfg, 1); - if (ret) return ret; @@ -940,7 +938,6 @@ static int hashlimit_mt_check_v2(const struct xt_mtchk_param *par) return ret; ret = cfg_copy(&cfg, (void *)&info->cfg, 2); - if (ret) return ret; @@ -1329,7 +1326,7 @@ static void __exit hashlimit_mt_exit(void) xt_unregister_matches(hashlimit_mt_reg, ARRAY_SIZE(hashlimit_mt_reg)); unregister_pernet_subsys(&hashlimit_net_ops); - rcu_barrier_bh(); + rcu_barrier(); kmem_cache_destroy(hashlimit_cachep); } diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index 9d6d67b953ac..4034d70bff39 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -33,7 +33,7 @@ physdev_mt(const struct sk_buff *skb, struct xt_action_param *par) /* Not a bridged IP packet or no info available yet: * LOCAL_OUT/mangle and LOCAL_OUT/nat don't know if * the destination device will be a bridge. */ - if (!skb->nf_bridge) { + if (!nf_bridge_info_exists(skb)) { /* Return MATCH if the invert flags of the used options are on */ if ((info->bitmask & XT_PHYSDEV_OP_BRIDGED) && !(info->invert & XT_PHYSDEV_OP_BRIDGED)) diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c index 13f8ccf946d6..aa84e8121c93 100644 --- a/net/netfilter/xt_policy.c +++ b/net/netfilter/xt_policy.c @@ -56,7 +56,7 @@ match_policy_in(const struct sk_buff *skb, const struct xt_policy_info *info, unsigned short family) { const struct xt_policy_elem *e; - const struct sec_path *sp = skb->sp; + const struct sec_path *sp = skb_sec_path(skb); int strict = info->flags & XT_POLICY_MATCH_STRICT; int i, pos; |