diff options
Diffstat (limited to 'net/netfilter')
25 files changed, 552 insertions, 163 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 25313c29d799..52370211e46b 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -441,6 +441,7 @@ endif # NF_CONNTRACK config NF_TABLES select NETFILTER_NETLINK + select LIBCRC32C tristate "Netfilter nf_tables support" help nftables is the new packet classification framework that intends to diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 3ac7c8c1548d..63d032191e62 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -282,6 +282,16 @@ nf_hook_entry_head(struct net *net, int pf, unsigned int hooknum, return NULL; return net->nf.hooks_bridge + hooknum; #endif +#ifdef CONFIG_NETFILTER_INGRESS + case NFPROTO_INET: + if (WARN_ON_ONCE(hooknum != NF_INET_INGRESS)) + return NULL; + if (!dev || dev_net(dev) != net) { + WARN_ON_ONCE(1); + return NULL; + } + return &dev->nf_hooks_ingress; +#endif case NFPROTO_IPV4: if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv4) <= hooknum)) return NULL; @@ -311,20 +321,80 @@ nf_hook_entry_head(struct net *net, int pf, unsigned int hooknum, return NULL; } +static int nf_ingress_check(struct net *net, const struct nf_hook_ops *reg, + int hooknum) +{ +#ifndef CONFIG_NETFILTER_INGRESS + if (reg->hooknum == hooknum) + return -EOPNOTSUPP; +#endif + if (reg->hooknum != hooknum || + !reg->dev || dev_net(reg->dev) != net) + return -EINVAL; + + return 0; +} + +static inline bool nf_ingress_hook(const struct nf_hook_ops *reg, int pf) +{ + if ((pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) || + (pf == NFPROTO_INET && reg->hooknum == NF_INET_INGRESS)) + return true; + + return false; +} + +static void nf_static_key_inc(const struct nf_hook_ops *reg, int pf) +{ +#ifdef CONFIG_JUMP_LABEL + int hooknum; + + if (pf == NFPROTO_INET && reg->hooknum == NF_INET_INGRESS) { + pf = NFPROTO_NETDEV; + hooknum = NF_NETDEV_INGRESS; + } else { + hooknum = reg->hooknum; + } + static_key_slow_inc(&nf_hooks_needed[pf][hooknum]); +#endif +} + +static void nf_static_key_dec(const struct nf_hook_ops *reg, int pf) +{ +#ifdef CONFIG_JUMP_LABEL + int hooknum; + + if (pf == NFPROTO_INET && reg->hooknum == NF_INET_INGRESS) { + pf = NFPROTO_NETDEV; + hooknum = NF_NETDEV_INGRESS; + } else { + hooknum = reg->hooknum; + } + static_key_slow_dec(&nf_hooks_needed[pf][hooknum]); +#endif +} + static int __nf_register_net_hook(struct net *net, int pf, const struct nf_hook_ops *reg) { struct nf_hook_entries *p, *new_hooks; struct nf_hook_entries __rcu **pp; + int err; - if (pf == NFPROTO_NETDEV) { -#ifndef CONFIG_NETFILTER_INGRESS - if (reg->hooknum == NF_NETDEV_INGRESS) - return -EOPNOTSUPP; -#endif - if (reg->hooknum != NF_NETDEV_INGRESS || - !reg->dev || dev_net(reg->dev) != net) - return -EINVAL; + switch (pf) { + case NFPROTO_NETDEV: + err = nf_ingress_check(net, reg, NF_NETDEV_INGRESS); + if (err < 0) + return err; + break; + case NFPROTO_INET: + if (reg->hooknum != NF_INET_INGRESS) + break; + + err = nf_ingress_check(net, reg, NF_INET_INGRESS); + if (err < 0) + return err; + break; } pp = nf_hook_entry_head(net, pf, reg->hooknum, reg->dev); @@ -345,12 +415,11 @@ static int __nf_register_net_hook(struct net *net, int pf, hooks_validate(new_hooks); #ifdef CONFIG_NETFILTER_INGRESS - if (pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) + if (nf_ingress_hook(reg, pf)) net_inc_ingress_queue(); #endif -#ifdef CONFIG_JUMP_LABEL - static_key_slow_inc(&nf_hooks_needed[pf][reg->hooknum]); -#endif + nf_static_key_inc(reg, pf); + BUG_ON(p == new_hooks); nf_hook_entries_free(p); return 0; @@ -403,12 +472,10 @@ static void __nf_unregister_net_hook(struct net *net, int pf, if (nf_remove_net_hook(p, reg)) { #ifdef CONFIG_NETFILTER_INGRESS - if (pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) + if (nf_ingress_hook(reg, pf)) net_dec_ingress_queue(); #endif -#ifdef CONFIG_JUMP_LABEL - static_key_slow_dec(&nf_hooks_needed[pf][reg->hooknum]); -#endif + nf_static_key_dec(reg, pf); } else { WARN_ONCE(1, "hook not found, pf %d num %d", pf, reg->hooknum); } @@ -425,8 +492,12 @@ static void __nf_unregister_net_hook(struct net *net, int pf, void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) { if (reg->pf == NFPROTO_INET) { - __nf_unregister_net_hook(net, NFPROTO_IPV4, reg); - __nf_unregister_net_hook(net, NFPROTO_IPV6, reg); + if (reg->hooknum == NF_INET_INGRESS) { + __nf_unregister_net_hook(net, NFPROTO_INET, reg); + } else { + __nf_unregister_net_hook(net, NFPROTO_IPV4, reg); + __nf_unregister_net_hook(net, NFPROTO_IPV6, reg); + } } else { __nf_unregister_net_hook(net, reg->pf, reg); } @@ -451,14 +522,20 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) int err; if (reg->pf == NFPROTO_INET) { - err = __nf_register_net_hook(net, NFPROTO_IPV4, reg); - if (err < 0) - return err; - - err = __nf_register_net_hook(net, NFPROTO_IPV6, reg); - if (err < 0) { - __nf_unregister_net_hook(net, NFPROTO_IPV4, reg); - return err; + if (reg->hooknum == NF_INET_INGRESS) { + err = __nf_register_net_hook(net, NFPROTO_INET, reg); + if (err < 0) + return err; + } else { + err = __nf_register_net_hook(net, NFPROTO_IPV4, reg); + if (err < 0) + return err; + + err = __nf_register_net_hook(net, NFPROTO_IPV6, reg); + if (err < 0) { + __nf_unregister_net_hook(net, NFPROTO_IPV4, reg); + return err; + } } } else { err = __nf_register_net_hook(net, reg->pf, reg); diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 920b7c4331f0..6f35832f0de3 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -250,22 +250,7 @@ EXPORT_SYMBOL_GPL(ip_set_type_unregister); void * ip_set_alloc(size_t size) { - void *members = NULL; - - if (size < KMALLOC_MAX_SIZE) - members = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); - - if (members) { - pr_debug("%p: allocated with kmalloc\n", members); - return members; - } - - members = vzalloc(size); - if (!members) - return NULL; - pr_debug("%p: allocated with vmalloc\n", members); - - return members; + return kvzalloc(size, GFP_KERNEL_ACCOUNT); } EXPORT_SYMBOL_GPL(ip_set_alloc); diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index 2c1593089ede..eb0e329f9b8d 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -29,7 +29,6 @@ if IP_VS config IP_VS_IPV6 bool "IPv6 support for IPVS" depends on IPV6 = y || IP_VS = IPV6 - select IP6_NF_IPTABLES select NF_DEFRAG_IPV6 help Add IPv6 support to IPVS. diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index a90b8eac16ac..c100c6b112c8 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -402,6 +402,8 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) { unsigned int hash; struct ip_vs_conn *cp, *ret=NULL; + const union nf_inet_addr *saddr; + __be16 sport; /* * Check for "full" addressed entries @@ -411,10 +413,20 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) rcu_read_lock(); hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { - if (p->vport == cp->cport && p->cport == cp->dport && - cp->af == p->af && + if (p->vport != cp->cport) + continue; + + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { + sport = cp->vport; + saddr = &cp->vaddr; + } else { + sport = cp->dport; + saddr = &cp->daddr; + } + + if (p->cport == sport && cp->af == p->af && ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && - ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) && + ip_vs_addr_equal(p->af, p->caddr, saddr) && p->protocol == cp->protocol && cp->ipvs == p->ipvs) { if (!__ip_vs_conn_get(cp)) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index e3668a6e54e4..cc3c275934f4 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -875,7 +875,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb, unsigned int verdict = NF_DROP; if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) - goto ignore_cp; + goto after_nat; /* Ensure the checksum is correct */ if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) { @@ -901,6 +901,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb, if (ip_vs_route_me_harder(cp->ipvs, af, skb, hooknum)) goto out; +after_nat: /* do the statistics and put it back */ ip_vs_out_stats(cp, skb); @@ -909,8 +910,6 @@ static int handle_response_icmp(int af, struct sk_buff *skb, ip_vs_notrack(skb); else ip_vs_update_conntrack(skb, cp, 0); - -ignore_cp: verdict = NF_ACCEPT; out: @@ -1276,6 +1275,9 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, { struct ip_vs_protocol *pp = pd->pp; + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) + goto after_nat; + IP_VS_DBG_PKT(11, af, pp, skb, iph->off, "Outgoing packet"); if (skb_ensure_writable(skb, iph->len)) @@ -1316,6 +1318,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, IP_VS_DBG_PKT(10, af, pp, skb, iph->off, "After SNAT"); +after_nat: ip_vs_out_stats(cp, skb); ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd); skb->ipvs_property = 1; @@ -1412,11 +1415,8 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in cp = INDIRECT_CALL_1(pp->conn_out_get, ip_vs_conn_out_get_proto, ipvs, af, skb, &iph); - if (likely(cp)) { - if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) - goto ignore_cp; + if (likely(cp)) return handle_response(af, skb, pd, cp, &iph, hooknum); - } /* Check for real-server-started requests */ if (atomic_read(&ipvs->conn_out_counter)) { @@ -1475,14 +1475,9 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in } } -out: IP_VS_DBG_PKT(12, af, pp, skb, iph.off, "ip_vs_out: packet continues traversal as normal"); return NF_ACCEPT; - -ignore_cp: - __ip_vs_conn_put(cp); - goto out; } /* diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 678c5b14841c..e279ded4e306 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2508,6 +2508,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, sockptr_t ptr, unsigned int len) /* Set timeout values for (tcp tcpfin udp) */ ret = ip_vs_set_timeout(ipvs, (struct ip_vs_timeout_user *)arg); goto out_unlock; + } else if (!len) { + /* No more commands with len == 0 below */ + ret = -EINVAL; + goto out_unlock; } usvc_compat = (struct ip_vs_service_user *)arg; @@ -2584,9 +2588,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, sockptr_t ptr, unsigned int len) break; case IP_VS_SO_SET_DELDEST: ret = ip_vs_del_dest(svc, &udest); - break; - default: - ret = -EINVAL; } out_unlock: @@ -3892,7 +3893,7 @@ out: } -static const struct genl_ops ip_vs_genl_ops[] = { +static const struct genl_small_ops ip_vs_genl_ops[] = { { .cmd = IPVS_CMD_NEW_SERVICE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, @@ -4000,8 +4001,8 @@ static struct genl_family ip_vs_genl_family __ro_after_init = { .policy = ip_vs_cmd_policy, .netnsok = true, /* Make ipvsadm to work on netns */ .module = THIS_MODULE, - .ops = ip_vs_genl_ops, - .n_ops = ARRAY_SIZE(ip_vs_genl_ops), + .small_ops = ip_vs_genl_ops, + .n_small_ops = ARRAY_SIZE(ip_vs_genl_ops), }; static int __init ip_vs_genl_register(void) diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 2b8abbfe018c..16b48064f715 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -242,9 +242,6 @@ struct ip_vs_sync_thread_data { | IPVS Sync Connection (1) | */ -#define SYNC_MESG_HEADER_LEN 4 -#define MAX_CONNS_PER_SYNCBUFF 255 /* nr_conns in ip_vs_sync_mesg is 8 bit */ - /* Version 0 header */ struct ip_vs_sync_mesg_v0 { __u8 nr_conns; diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index b00866d777fe..d2e5a8f644b8 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -609,6 +609,8 @@ static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb, if (ret == NF_ACCEPT) { nf_reset_ct(skb); skb_forward_csum(skb); + if (skb->dev) + skb->tstamp = 0; } return ret; } @@ -649,6 +651,8 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb, if (!local) { skb_forward_csum(skb); + if (skb->dev) + skb->tstamp = 0; NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb, NULL, skb_dst(skb)->dev, dst_output); } else @@ -669,6 +673,8 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb, if (!local) { ip_vs_drop_early_demux_sk(skb); skb_forward_csum(skb); + if (skb->dev) + skb->tstamp = 0; NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb, NULL, skb_dst(skb)->dev, dst_output); } else diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 5b97d233f89b..234b7cab37c3 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -859,7 +859,6 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) out: nf_conntrack_double_unlock(hash, reply_hash); - NF_CT_STAT_INC(net, insert_failed); local_bh_enable(); return -EEXIST; } @@ -909,6 +908,7 @@ static void __nf_conntrack_insert_prepare(struct nf_conn *ct) tstamp->start = ktime_get_real_ns(); } +/* caller must hold locks to prevent concurrent changes */ static int __nf_ct_resolve_clash(struct sk_buff *skb, struct nf_conntrack_tuple_hash *h) { @@ -922,23 +922,21 @@ static int __nf_ct_resolve_clash(struct sk_buff *skb, if (nf_ct_is_dying(ct)) return NF_DROP; - if (!atomic_inc_not_zero(&ct->ct_general.use)) - return NF_DROP; - if (((ct->status & IPS_NAT_DONE_MASK) == 0) || nf_ct_match(ct, loser_ct)) { struct net *net = nf_ct_net(ct); + nf_conntrack_get(&ct->ct_general); + nf_ct_acct_merge(ct, ctinfo, loser_ct); nf_ct_add_to_dying_list(loser_ct); nf_conntrack_put(&loser_ct->ct_general); nf_ct_set(skb, ct, ctinfo); - NF_CT_STAT_INC(net, insert_failed); + NF_CT_STAT_INC(net, clash_resolve); return NF_ACCEPT; } - nf_ct_put(ct); return NF_DROP; } @@ -998,6 +996,8 @@ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx) hlist_nulls_add_head_rcu(&loser_ct->tuplehash[IP_CT_DIR_REPLY].hnnode, &nf_conntrack_hash[repl_idx]); + + NF_CT_STAT_INC(net, clash_resolve); return NF_ACCEPT; } @@ -1027,10 +1027,10 @@ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx) * * Failing that, the new, unconfirmed conntrack is still added to the table * provided that the collision only occurs in the ORIGINAL direction. - * The new entry will be added after the existing one in the hash list, + * The new entry will be added only in the non-clashing REPLY direction, * so packets in the ORIGINAL direction will continue to match the existing * entry. The new entry will also have a fixed timeout so it expires -- - * due to the collision, it will not see bidirectional traffic. + * due to the collision, it will only see reply traffic. * * Returns NF_DROP if the clash could not be resolved. */ @@ -1725,10 +1725,8 @@ nf_conntrack_handle_icmp(struct nf_conn *tmpl, else return NF_ACCEPT; - if (ret <= 0) { + if (ret <= 0) NF_CT_STAT_INC_ATOMIC(state->net, error); - NF_CT_STAT_INC_ATOMIC(state->net, invalid); - } return ret; } @@ -1802,10 +1800,8 @@ nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state) if (tmpl || ctinfo == IP_CT_UNTRACKED) { /* Previously seen (loopback or untracked)? Ignore. */ if ((tmpl && !nf_ct_is_template(tmpl)) || - ctinfo == IP_CT_UNTRACKED) { - NF_CT_STAT_INC_ATOMIC(state->net, ignore); + ctinfo == IP_CT_UNTRACKED) return NF_ACCEPT; - } skb->_nfct = 0; } @@ -1813,7 +1809,6 @@ nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state) dataoff = get_l4proto(skb, skb_network_offset(skb), state->pf, &protonum); if (dataoff <= 0) { pr_debug("not prepared to track yet or error occurred\n"); - NF_CT_STAT_INC_ATOMIC(state->net, error); NF_CT_STAT_INC_ATOMIC(state->net, invalid); ret = NF_ACCEPT; goto out; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index c3a4214dc958..3d0fd33be018 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2497,7 +2497,6 @@ ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq, if (nla_put_be32(skb, CTA_STATS_FOUND, htonl(st->found)) || nla_put_be32(skb, CTA_STATS_INVALID, htonl(st->invalid)) || - nla_put_be32(skb, CTA_STATS_IGNORE, htonl(st->ignore)) || nla_put_be32(skb, CTA_STATS_INSERT, htonl(st->insert)) || nla_put_be32(skb, CTA_STATS_INSERT_FAILED, htonl(st->insert_failed)) || @@ -2505,7 +2504,9 @@ ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq, nla_put_be32(skb, CTA_STATS_EARLY_DROP, htonl(st->early_drop)) || nla_put_be32(skb, CTA_STATS_ERROR, htonl(st->error)) || nla_put_be32(skb, CTA_STATS_SEARCH_RESTART, - htonl(st->search_restart))) + htonl(st->search_restart)) || + nla_put_be32(skb, CTA_STATS_CLASH_RESOLVE, + htonl(st->clash_resolve))) goto nla_put_failure; nlmsg_end(skb, nlh); diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index a604f43e3e6b..46c5557c1fec 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -428,18 +428,18 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v) const struct ip_conntrack_stat *st = v; if (v == SEQ_START_TOKEN) { - seq_puts(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete search_restart\n"); + seq_puts(seq, "entries clashres found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete search_restart\n"); return 0; } seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x " "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", nr_conntracks, - 0, + st->clash_resolve, st->found, 0, st->invalid, - st->ignore, + 0, 0, 0, st->insert, diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 4f7a567c536e..513f78db3cb2 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -395,8 +395,7 @@ static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff, { struct tcphdr *tcph; - if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) || - skb_try_make_writable(skb, thoff + sizeof(*tcph))) + if (skb_try_make_writable(skb, thoff + sizeof(*tcph))) return -1; tcph = (void *)(skb_network_header(skb) + thoff); @@ -410,8 +409,7 @@ static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff, { struct udphdr *udph; - if (!pskb_may_pull(skb, thoff + sizeof(*udph)) || - skb_try_make_writable(skb, thoff + sizeof(*udph))) + if (skb_try_make_writable(skb, thoff + sizeof(*udph))) return -1; udph = (void *)(skb_network_header(skb) + thoff); @@ -449,8 +447,7 @@ int nf_flow_snat_port(const struct flow_offload *flow, struct flow_ports *hdr; __be16 port, new_port; - if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) || - skb_try_make_writable(skb, thoff + sizeof(*hdr))) + if (skb_try_make_writable(skb, thoff + sizeof(*hdr))) return -1; hdr = (void *)(skb_network_header(skb) + thoff); @@ -481,8 +478,7 @@ int nf_flow_dnat_port(const struct flow_offload *flow, struct flow_ports *hdr; __be16 port, new_port; - if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) || - skb_try_make_writable(skb, thoff + sizeof(*hdr))) + if (skb_try_make_writable(skb, thoff + sizeof(*hdr))) return -1; hdr = (void *)(skb_network_header(skb) + thoff); diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index a3bca758b849..a698dbe28ef5 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -25,9 +25,6 @@ static int nf_flow_state_check(struct flow_offload *flow, int proto, if (proto != IPPROTO_TCP) return 0; - if (!pskb_may_pull(skb, thoff + sizeof(*tcph))) - return -1; - tcph = (void *)(skb_network_header(skb) + thoff); if (unlikely(tcph->fin || tcph->rst)) { flow_offload_teardown(flow); @@ -42,8 +39,7 @@ static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff, { struct tcphdr *tcph; - if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) || - skb_try_make_writable(skb, thoff + sizeof(*tcph))) + if (skb_try_make_writable(skb, thoff + sizeof(*tcph))) return -1; tcph = (void *)(skb_network_header(skb) + thoff); @@ -57,8 +53,7 @@ static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff, { struct udphdr *udph; - if (!pskb_may_pull(skb, thoff + sizeof(*udph)) || - skb_try_make_writable(skb, thoff + sizeof(*udph))) + if (skb_try_make_writable(skb, thoff + sizeof(*udph))) return -1; udph = (void *)(skb_network_header(skb) + thoff); @@ -167,8 +162,8 @@ static bool ip_has_options(unsigned int thoff) static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, struct flow_offload_tuple *tuple) { + unsigned int thoff, hdrsize; struct flow_ports *ports; - unsigned int thoff; struct iphdr *iph; if (!pskb_may_pull(skb, sizeof(*iph))) @@ -181,15 +176,22 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, unlikely(ip_has_options(thoff))) return -1; - if (iph->protocol != IPPROTO_TCP && - iph->protocol != IPPROTO_UDP) + switch (iph->protocol) { + case IPPROTO_TCP: + hdrsize = sizeof(struct tcphdr); + break; + case IPPROTO_UDP: + hdrsize = sizeof(struct udphdr); + break; + default: return -1; + } if (iph->ttl <= 1) return -1; thoff = iph->ihl * 4; - if (!pskb_may_pull(skb, thoff + sizeof(*ports))) + if (!pskb_may_pull(skb, thoff + hdrsize)) return -1; iph = ip_hdr(skb); @@ -315,8 +317,7 @@ static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff, { struct tcphdr *tcph; - if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) || - skb_try_make_writable(skb, thoff + sizeof(*tcph))) + if (skb_try_make_writable(skb, thoff + sizeof(*tcph))) return -1; tcph = (void *)(skb_network_header(skb) + thoff); @@ -332,8 +333,7 @@ static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff, { struct udphdr *udph; - if (!pskb_may_pull(skb, thoff + sizeof(*udph)) || - skb_try_make_writable(skb, thoff + sizeof(*udph))) + if (skb_try_make_writable(skb, thoff + sizeof(*udph))) return -1; udph = (void *)(skb_network_header(skb) + thoff); @@ -439,24 +439,31 @@ static int nf_flow_nat_ipv6(const struct flow_offload *flow, static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, struct flow_offload_tuple *tuple) { + unsigned int thoff, hdrsize; struct flow_ports *ports; struct ipv6hdr *ip6h; - unsigned int thoff; if (!pskb_may_pull(skb, sizeof(*ip6h))) return -1; ip6h = ipv6_hdr(skb); - if (ip6h->nexthdr != IPPROTO_TCP && - ip6h->nexthdr != IPPROTO_UDP) + switch (ip6h->nexthdr) { + case IPPROTO_TCP: + hdrsize = sizeof(struct tcphdr); + break; + case IPPROTO_UDP: + hdrsize = sizeof(struct udphdr); + break; + default: return -1; + } if (ip6h->hop_limit <= 1) return -1; thoff = sizeof(*ip6h); - if (!pskb_may_pull(skb, thoff + sizeof(*ports))) + if (!pskb_may_pull(skb, thoff + hdrsize)) return -1; ip6h = ipv6_hdr(skb); diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c index ae5628ddbe6d..fd7c5f0f5c25 100644 --- a/net/netfilter/nf_log_common.c +++ b/net/netfilter/nf_log_common.c @@ -171,6 +171,18 @@ nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf, } EXPORT_SYMBOL_GPL(nf_log_dump_packet_common); +void nf_log_dump_vlan(struct nf_log_buf *m, const struct sk_buff *skb) +{ + u16 vid; + + if (!skb_vlan_tag_present(skb)) + return; + + vid = skb_vlan_tag_get(skb); + nf_log_buf_add(m, "VPROTO=%04x VID=%u ", ntohs(skb->vlan_proto), vid); +} +EXPORT_SYMBOL_GPL(nf_log_dump_vlan); + /* bridge and netdev logging families share this code. */ void nf_log_l2packet(struct net *net, u_int8_t pf, __be16 protocol, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 4603b667973a..9957e0ed8658 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -206,7 +206,7 @@ static int nf_tables_register_hook(struct net *net, if (basechain->type->ops_register) return basechain->type->ops_register(net, ops); - if (table->family == NFPROTO_NETDEV) + if (nft_base_chain_netdev(table->family, basechain->ops.hooknum)) return nft_netdev_register_hooks(net, &basechain->hook_list); return nf_register_net_hook(net, &basechain->ops); @@ -228,7 +228,7 @@ static void nf_tables_unregister_hook(struct net *net, if (basechain->type->ops_unregister) return basechain->type->ops_unregister(net, ops); - if (table->family == NFPROTO_NETDEV) + if (nft_base_chain_netdev(table->family, basechain->ops.hooknum)) nft_netdev_unregister_hooks(net, &basechain->hook_list); else nf_unregister_net_hook(net, &basechain->ops); @@ -650,6 +650,8 @@ static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = { .len = NFT_TABLE_MAXNAMELEN - 1 }, [NFTA_TABLE_FLAGS] = { .type = NLA_U32 }, [NFTA_TABLE_HANDLE] = { .type = NLA_U64 }, + [NFTA_TABLE_USERDATA] = { .type = NLA_BINARY, + .len = NFT_USERDATA_MAXLEN } }; static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, @@ -676,6 +678,11 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, NFTA_TABLE_PAD)) goto nla_put_failure; + if (table->udata) { + if (nla_put(skb, NFTA_TABLE_USERDATA, table->udlen, table->udata)) + goto nla_put_failure; + } + nlmsg_end(skb, nlh); return 0; @@ -988,8 +995,8 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, int family = nfmsg->nfgen_family; const struct nlattr *attr; struct nft_table *table; - u32 flags = 0; struct nft_ctx ctx; + u32 flags = 0; int err; lockdep_assert_held(&net->nft.commit_mutex); @@ -1025,6 +1032,14 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, if (table->name == NULL) goto err_strdup; + if (nla[NFTA_TABLE_USERDATA]) { + table->udata = nla_memdup(nla[NFTA_TABLE_USERDATA], GFP_KERNEL); + if (table->udata == NULL) + goto err_table_udata; + + table->udlen = nla_len(nla[NFTA_TABLE_USERDATA]); + } + err = rhltable_init(&table->chains_ht, &nft_chain_ht_params); if (err) goto err_chain_ht; @@ -1047,6 +1062,8 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, err_trans: rhltable_destroy(&table->chains_ht); err_chain_ht: + kfree(table->udata); +err_table_udata: kfree(table->name); err_strdup: kfree(table); @@ -1202,6 +1219,7 @@ static void nf_tables_table_destroy(struct nft_ctx *ctx) rhltable_destroy(&ctx->table->chains_ht); kfree(ctx->table->name); + kfree(ctx->table->udata); kfree(ctx->table); } @@ -1297,6 +1315,8 @@ static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = { [NFTA_CHAIN_COUNTERS] = { .type = NLA_NESTED }, [NFTA_CHAIN_FLAGS] = { .type = NLA_U32 }, [NFTA_CHAIN_ID] = { .type = NLA_U32 }, + [NFTA_CHAIN_USERDATA] = { .type = NLA_BINARY, + .len = NFT_USERDATA_MAXLEN }, }; static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = { @@ -1361,7 +1381,7 @@ static int nft_dump_basechain_hook(struct sk_buff *skb, int family, if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority))) goto nla_put_failure; - if (family == NFPROTO_NETDEV) { + if (nft_base_chain_netdev(family, ops->hooknum)) { nest_devs = nla_nest_start_noflag(skb, NFTA_HOOK_DEVS); list_for_each_entry(hook, &basechain->hook_list, list) { if (!first) @@ -1438,6 +1458,10 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, if (nla_put_be32(skb, NFTA_CHAIN_USE, htonl(chain->use))) goto nla_put_failure; + if (chain->udata && + nla_put(skb, NFTA_CHAIN_USERDATA, chain->udlen, chain->udata)) + goto nla_put_failure; + nlmsg_end(skb, nlh); return 0; @@ -1661,7 +1685,7 @@ void nf_tables_chain_destroy(struct nft_ctx *ctx) if (nft_is_base_chain(chain)) { struct nft_base_chain *basechain = nft_base_chain(chain); - if (ctx->family == NFPROTO_NETDEV) { + if (nft_base_chain_netdev(ctx->family, basechain->ops.hooknum)) { list_for_each_entry_safe(hook, next, &basechain->hook_list, list) { list_del_rcu(&hook->list); @@ -1674,9 +1698,11 @@ void nf_tables_chain_destroy(struct nft_ctx *ctx) free_percpu(rcu_dereference_raw(basechain->stats)); } kfree(chain->name); + kfree(chain->udata); kfree(basechain); } else { kfree(chain->name); + kfree(chain->udata); kfree(chain); } } @@ -1838,7 +1864,7 @@ static int nft_chain_parse_hook(struct net *net, if (IS_ERR(type)) return PTR_ERR(type); } - if (hook->num > NF_MAX_HOOKS || !(type->hook_mask & (1 << hook->num))) + if (hook->num >= NFT_MAX_HOOKS || !(type->hook_mask & (1 << hook->num))) return -EOPNOTSUPP; if (type->type == NFT_CHAIN_T_NAT && @@ -1851,7 +1877,7 @@ static int nft_chain_parse_hook(struct net *net, hook->type = type; INIT_LIST_HEAD(&hook->list); - if (family == NFPROTO_NETDEV) { + if (nft_base_chain_netdev(family, hook->num)) { err = nft_chain_parse_netdev(net, ha, &hook->list); if (err < 0) { module_put(type->owner); @@ -1918,7 +1944,7 @@ static int nft_basechain_init(struct nft_base_chain *basechain, u8 family, INIT_LIST_HEAD(&basechain->hook_list); chain = &basechain->chain; - if (family == NFPROTO_NETDEV) { + if (nft_base_chain_netdev(family, hook->num)) { list_splice_init(&hook->list, &basechain->hook_list); list_for_each_entry(h, &basechain->hook_list, list) nft_basechain_hook_init(&h->ops, family, hook, chain); @@ -2030,7 +2056,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, } else { if (!(flags & NFT_CHAIN_BINDING)) { err = -EINVAL; - goto err1; + goto err_destroy_chain; } snprintf(name, sizeof(name), "__chain%llu", ++chain_id); @@ -2039,13 +2065,22 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, if (!chain->name) { err = -ENOMEM; - goto err1; + goto err_destroy_chain; + } + + if (nla[NFTA_CHAIN_USERDATA]) { + chain->udata = nla_memdup(nla[NFTA_CHAIN_USERDATA], GFP_KERNEL); + if (chain->udata == NULL) { + err = -ENOMEM; + goto err_destroy_chain; + } + chain->udlen = nla_len(nla[NFTA_CHAIN_USERDATA]); } rules = nf_tables_chain_alloc_rules(chain, 0); if (!rules) { err = -ENOMEM; - goto err1; + goto err_destroy_chain; } *rules = NULL; @@ -2054,12 +2089,12 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, err = nf_tables_register_hook(net, table, chain); if (err < 0) - goto err1; + goto err_destroy_chain; trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN); if (IS_ERR(trans)) { err = PTR_ERR(trans); - goto err2; + goto err_unregister_hook; } nft_trans_chain_policy(trans) = NFT_CHAIN_POLICY_UNSET; @@ -2069,15 +2104,15 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, err = nft_chain_add(table, chain); if (err < 0) { nft_trans_destroy(trans); - goto err2; + goto err_unregister_hook; } table->use++; return 0; -err2: +err_unregister_hook: nf_tables_unregister_hook(net, table, chain); -err1: +err_destroy_chain: nf_tables_chain_destroy(ctx); return err; @@ -2103,7 +2138,8 @@ static bool nft_hook_list_equal(struct list_head *hook_list1, } static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, - u32 flags) + u32 flags, const struct nlattr *attr, + struct netlink_ext_ack *extack) { const struct nlattr * const *nla = ctx->nla; struct nft_table *table = ctx->table; @@ -2119,9 +2155,10 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, return -EOPNOTSUPP; if (nla[NFTA_CHAIN_HOOK]) { - if (!nft_is_base_chain(chain)) + if (!nft_is_base_chain(chain)) { + NL_SET_BAD_ATTR(extack, attr); return -EEXIST; - + } err = nft_chain_parse_hook(ctx->net, nla, &hook, ctx->family, false); if (err < 0) @@ -2130,13 +2167,15 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, basechain = nft_base_chain(chain); if (basechain->type != hook.type) { nft_chain_release_hook(&hook); + NL_SET_BAD_ATTR(extack, attr); return -EEXIST; } - if (ctx->family == NFPROTO_NETDEV) { + if (nft_base_chain_netdev(ctx->family, hook.num)) { if (!nft_hook_list_equal(&basechain->hook_list, &hook.list)) { nft_chain_release_hook(&hook); + NL_SET_BAD_ATTR(extack, attr); return -EEXIST; } } else { @@ -2144,6 +2183,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, if (ops->hooknum != hook.num || ops->priority != hook.priority) { nft_chain_release_hook(&hook); + NL_SET_BAD_ATTR(extack, attr); return -EEXIST; } } @@ -2156,8 +2196,10 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, chain2 = nft_chain_lookup(ctx->net, table, nla[NFTA_CHAIN_NAME], genmask); - if (!IS_ERR(chain2)) + if (!IS_ERR(chain2)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_NAME]); return -EEXIST; + } } if (nla[NFTA_CHAIN_COUNTERS]) { @@ -2200,6 +2242,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, nft_trans_chain_update(tmp) && nft_trans_chain_name(tmp) && strcmp(name, nft_trans_chain_name(tmp)) == 0) { + NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_NAME]); kfree(name); goto err; } @@ -2322,7 +2365,8 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, return -EOPNOTSUPP; flags |= chain->flags & NFT_CHAIN_BASE; - return nf_tables_updchain(&ctx, genmask, policy, flags); + return nf_tables_updchain(&ctx, genmask, policy, flags, attr, + extack); } return nf_tables_addchain(&ctx, family, genmask, policy, flags); @@ -5737,6 +5781,8 @@ static const struct nla_policy nft_obj_policy[NFTA_OBJ_MAX + 1] = { [NFTA_OBJ_TYPE] = { .type = NLA_U32 }, [NFTA_OBJ_DATA] = { .type = NLA_NESTED }, [NFTA_OBJ_HANDLE] = { .type = NLA_U64}, + [NFTA_OBJ_USERDATA] = { .type = NLA_BINARY, + .len = NFT_USERDATA_MAXLEN }, }; static struct nft_object *nft_obj_init(const struct nft_ctx *ctx, @@ -5928,7 +5974,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, obj = nft_obj_init(&ctx, type, nla[NFTA_OBJ_DATA]); if (IS_ERR(obj)) { err = PTR_ERR(obj); - goto err1; + goto err_init; } obj->key.table = table; obj->handle = nf_tables_alloc_handle(table); @@ -5936,32 +5982,42 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, obj->key.name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL); if (!obj->key.name) { err = -ENOMEM; - goto err2; + goto err_strdup; + } + + if (nla[NFTA_OBJ_USERDATA]) { + obj->udata = nla_memdup(nla[NFTA_OBJ_USERDATA], GFP_KERNEL); + if (obj->udata == NULL) + goto err_userdata; + + obj->udlen = nla_len(nla[NFTA_OBJ_USERDATA]); } err = nft_trans_obj_add(&ctx, NFT_MSG_NEWOBJ, obj); if (err < 0) - goto err3; + goto err_trans; err = rhltable_insert(&nft_objname_ht, &obj->rhlhead, nft_objname_ht_params); if (err < 0) - goto err4; + goto err_obj_ht; list_add_tail_rcu(&obj->list, &table->objects); table->use++; return 0; -err4: +err_obj_ht: /* queued in transaction log */ INIT_LIST_HEAD(&obj->list); return err; -err3: +err_trans: kfree(obj->key.name); -err2: +err_userdata: + kfree(obj->udata); +err_strdup: if (obj->ops->destroy) obj->ops->destroy(&ctx, obj); kfree(obj); -err1: +err_init: module_put(type->owner); return err; } @@ -5993,6 +6049,10 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net, NFTA_OBJ_PAD)) goto nla_put_failure; + if (obj->udata && + nla_put(skb, NFTA_OBJ_USERDATA, obj->udlen, obj->udata)) + goto nla_put_failure; + nlmsg_end(skb, nlh); return 0; @@ -6199,6 +6259,7 @@ static void nft_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj) module_put(obj->ops->type->owner); kfree(obj->key.name); + kfree(obj->udata); kfree(obj); } diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 587897a2498b..dbc2e945c98e 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -47,13 +47,22 @@ static inline void nft_trace_packet(struct nft_traceinfo *info, } } +static void nft_bitwise_fast_eval(const struct nft_expr *expr, + struct nft_regs *regs) +{ + const struct nft_bitwise_fast_expr *priv = nft_expr_priv(expr); + u32 *src = ®s->data[priv->sreg]; + u32 *dst = ®s->data[priv->dreg]; + + *dst = (*src & priv->mask) ^ priv->xor; +} + static void nft_cmp_fast_eval(const struct nft_expr *expr, struct nft_regs *regs) { const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr); - u32 mask = nft_cmp_fast_mask(priv->len); - if ((regs->data[priv->sreg] & mask) == priv->data) + if (((regs->data[priv->sreg] & priv->mask) == priv->data) ^ priv->inv) return; regs->verdict.code = NFT_BREAK; } @@ -176,6 +185,8 @@ next_rule: nft_rule_for_each_expr(expr, last, rule) { if (expr->ops == &nft_cmp_fast_ops) nft_cmp_fast_eval(expr, ®s); + else if (expr->ops == &nft_bitwise_fast_ops) + nft_bitwise_fast_eval(expr, ®s); else if (expr->ops != &nft_payload_fast_ops || !nft_payload_fast_eval(expr, ®s, pkt)) expr_call_ops_eval(expr, ®s, pkt); diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index 9ef37c1b7b3b..7c7e06624dc3 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -323,8 +323,6 @@ static int nft_indr_block_offload_cmd(struct nft_base_chain *basechain, return nft_block_setup(basechain, &bo, cmd); } -#define FLOW_SETUP_BLOCK TC_SETUP_BLOCK - static int nft_chain_offload_cmd(struct nft_base_chain *basechain, struct net_device *dev, enum flow_block_command cmd) diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 3a2e64e13b22..2daa1f6ae344 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -46,6 +46,23 @@ static struct { const struct nfnetlink_subsystem __rcu *subsys; } table[NFNL_SUBSYS_COUNT]; +static struct lock_class_key nfnl_lockdep_keys[NFNL_SUBSYS_COUNT]; + +static const char *const nfnl_lockdep_names[NFNL_SUBSYS_COUNT] = { + [NFNL_SUBSYS_NONE] = "nfnl_subsys_none", + [NFNL_SUBSYS_CTNETLINK] = "nfnl_subsys_ctnetlink", + [NFNL_SUBSYS_CTNETLINK_EXP] = "nfnl_subsys_ctnetlink_exp", + [NFNL_SUBSYS_QUEUE] = "nfnl_subsys_queue", + [NFNL_SUBSYS_ULOG] = "nfnl_subsys_ulog", + [NFNL_SUBSYS_OSF] = "nfnl_subsys_osf", + [NFNL_SUBSYS_IPSET] = "nfnl_subsys_ipset", + [NFNL_SUBSYS_ACCT] = "nfnl_subsys_acct", + [NFNL_SUBSYS_CTNETLINK_TIMEOUT] = "nfnl_subsys_cttimeout", + [NFNL_SUBSYS_CTHELPER] = "nfnl_subsys_cthelper", + [NFNL_SUBSYS_NFTABLES] = "nfnl_subsys_nftables", + [NFNL_SUBSYS_NFT_COMPAT] = "nfnl_subsys_nftcompat", +}; + static const int nfnl_group2type[NFNLGRP_MAX+1] = { [NFNLGRP_CONNTRACK_NEW] = NFNL_SUBSYS_CTNETLINK, [NFNLGRP_CONNTRACK_UPDATE] = NFNL_SUBSYS_CTNETLINK, @@ -632,7 +649,7 @@ static int __init nfnetlink_init(void) BUG_ON(nfnl_group2type[i] == NFNL_SUBSYS_NONE); for (i=0; i<NFNL_SUBSYS_COUNT; i++) - mutex_init(&table[i].mutex); + __mutex_init(&table[i].mutex, nfnl_lockdep_names[i], &nfnl_lockdep_keys[i]); return register_pernet_subsys(&nfnetlink_net_ops); } diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index bc37d6c59db4..bbd773d74377 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -163,11 +163,6 @@ static int nft_bitwise_init(const struct nft_ctx *ctx, u32 len; int err; - if (!tb[NFTA_BITWISE_SREG] || - !tb[NFTA_BITWISE_DREG] || - !tb[NFTA_BITWISE_LEN]) - return -EINVAL; - err = nft_parse_u32_check(tb[NFTA_BITWISE_LEN], U8_MAX, &len); if (err < 0) return err; @@ -292,9 +287,143 @@ static const struct nft_expr_ops nft_bitwise_ops = { .offload = nft_bitwise_offload, }; +static int +nft_bitwise_extract_u32_data(const struct nlattr * const tb, u32 *out) +{ + struct nft_data_desc desc; + struct nft_data data; + int err = 0; + + err = nft_data_init(NULL, &data, sizeof(data), &desc, tb); + if (err < 0) + return err; + + if (desc.type != NFT_DATA_VALUE || desc.len != sizeof(u32)) { + err = -EINVAL; + goto err; + } + *out = data.data[0]; +err: + nft_data_release(&data, desc.type); + return err; +} + +static int nft_bitwise_fast_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_bitwise_fast_expr *priv = nft_expr_priv(expr); + int err; + + priv->sreg = nft_parse_register(tb[NFTA_BITWISE_SREG]); + err = nft_validate_register_load(priv->sreg, sizeof(u32)); + if (err < 0) + return err; + + priv->dreg = nft_parse_register(tb[NFTA_BITWISE_DREG]); + err = nft_validate_register_store(ctx, priv->dreg, NULL, + NFT_DATA_VALUE, sizeof(u32)); + if (err < 0) + return err; + + if (tb[NFTA_BITWISE_DATA]) + return -EINVAL; + + if (!tb[NFTA_BITWISE_MASK] || + !tb[NFTA_BITWISE_XOR]) + return -EINVAL; + + err = nft_bitwise_extract_u32_data(tb[NFTA_BITWISE_MASK], &priv->mask); + if (err < 0) + return err; + + err = nft_bitwise_extract_u32_data(tb[NFTA_BITWISE_XOR], &priv->xor); + if (err < 0) + return err; + + return 0; +} + +static int +nft_bitwise_fast_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_bitwise_fast_expr *priv = nft_expr_priv(expr); + struct nft_data data; + + if (nft_dump_register(skb, NFTA_BITWISE_SREG, priv->sreg)) + return -1; + if (nft_dump_register(skb, NFTA_BITWISE_DREG, priv->dreg)) + return -1; + if (nla_put_be32(skb, NFTA_BITWISE_LEN, htonl(sizeof(u32)))) + return -1; + if (nla_put_be32(skb, NFTA_BITWISE_OP, htonl(NFT_BITWISE_BOOL))) + return -1; + + data.data[0] = priv->mask; + if (nft_data_dump(skb, NFTA_BITWISE_MASK, &data, + NFT_DATA_VALUE, sizeof(u32)) < 0) + return -1; + + data.data[0] = priv->xor; + if (nft_data_dump(skb, NFTA_BITWISE_XOR, &data, + NFT_DATA_VALUE, sizeof(u32)) < 0) + return -1; + + return 0; +} + +static int nft_bitwise_fast_offload(struct nft_offload_ctx *ctx, + struct nft_flow_rule *flow, + const struct nft_expr *expr) +{ + const struct nft_bitwise_fast_expr *priv = nft_expr_priv(expr); + struct nft_offload_reg *reg = &ctx->regs[priv->dreg]; + + if (priv->xor || priv->sreg != priv->dreg || reg->len != sizeof(u32)) + return -EOPNOTSUPP; + + reg->mask.data[0] = priv->mask; + return 0; +} + +const struct nft_expr_ops nft_bitwise_fast_ops = { + .type = &nft_bitwise_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_bitwise_fast_expr)), + .eval = NULL, /* inlined */ + .init = nft_bitwise_fast_init, + .dump = nft_bitwise_fast_dump, + .offload = nft_bitwise_fast_offload, +}; + +static const struct nft_expr_ops * +nft_bitwise_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + int err; + u32 len; + + if (!tb[NFTA_BITWISE_LEN] || + !tb[NFTA_BITWISE_SREG] || + !tb[NFTA_BITWISE_DREG]) + return ERR_PTR(-EINVAL); + + err = nft_parse_u32_check(tb[NFTA_BITWISE_LEN], U8_MAX, &len); + if (err < 0) + return ERR_PTR(err); + + if (len != sizeof(u32)) + return &nft_bitwise_ops; + + if (tb[NFTA_BITWISE_OP] && + ntohl(nla_get_be32(tb[NFTA_BITWISE_OP])) != NFT_BITWISE_BOOL) + return &nft_bitwise_ops; + + return &nft_bitwise_fast_ops; +} + struct nft_expr_type nft_bitwise_type __read_mostly = { .name = "bitwise", - .ops = &nft_bitwise_ops, + .select_ops = nft_bitwise_select_ops, .policy = nft_bitwise_policy, .maxattr = NFTA_BITWISE_MAX, .owner = THIS_MODULE, diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c index c78d01bc02e9..ff8528ad3dc6 100644 --- a/net/netfilter/nft_chain_filter.c +++ b/net/netfilter/nft_chain_filter.c @@ -161,16 +161,49 @@ static unsigned int nft_do_chain_inet(void *priv, struct sk_buff *skb, return nft_do_chain(&pkt, priv); } +static unsigned int nft_do_chain_inet_ingress(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nf_hook_state ingress_state = *state; + struct nft_pktinfo pkt; + + switch (skb->protocol) { + case htons(ETH_P_IP): + /* Original hook is NFPROTO_NETDEV and NF_NETDEV_INGRESS. */ + ingress_state.pf = NFPROTO_IPV4; + ingress_state.hook = NF_INET_INGRESS; + nft_set_pktinfo(&pkt, skb, &ingress_state); + + if (nft_set_pktinfo_ipv4_ingress(&pkt, skb) < 0) + return NF_DROP; + break; + case htons(ETH_P_IPV6): + ingress_state.pf = NFPROTO_IPV6; + ingress_state.hook = NF_INET_INGRESS; + nft_set_pktinfo(&pkt, skb, &ingress_state); + + if (nft_set_pktinfo_ipv6_ingress(&pkt, skb) < 0) + return NF_DROP; + break; + default: + return NF_ACCEPT; + } + + return nft_do_chain(&pkt, priv); +} + static const struct nft_chain_type nft_chain_filter_inet = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, .family = NFPROTO_INET, - .hook_mask = (1 << NF_INET_LOCAL_IN) | + .hook_mask = (1 << NF_INET_INGRESS) | + (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING), .hooks = { + [NF_INET_INGRESS] = nft_do_chain_inet_ingress, [NF_INET_LOCAL_IN] = nft_do_chain_inet, [NF_INET_LOCAL_OUT] = nft_do_chain_inet, [NF_INET_FORWARD] = nft_do_chain_inet, diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index 16f4d84599ac..bc079d68a536 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -167,7 +167,6 @@ static int nft_cmp_fast_init(const struct nft_ctx *ctx, struct nft_cmp_fast_expr *priv = nft_expr_priv(expr); struct nft_data_desc desc; struct nft_data data; - u32 mask; int err; err = nft_data_init(NULL, &data, sizeof(data), &desc, @@ -181,10 +180,11 @@ static int nft_cmp_fast_init(const struct nft_ctx *ctx, return err; desc.len *= BITS_PER_BYTE; - mask = nft_cmp_fast_mask(desc.len); - priv->data = data.data[0] & mask; + priv->mask = nft_cmp_fast_mask(desc.len); + priv->data = data.data[0] & priv->mask; priv->len = desc.len; + priv->inv = ntohl(nla_get_be32(tb[NFTA_CMP_OP])) != NFT_CMP_EQ; return 0; } @@ -201,7 +201,7 @@ static int nft_cmp_fast_offload(struct nft_offload_ctx *ctx, }, .sreg = priv->sreg, .len = priv->len / BITS_PER_BYTE, - .op = NFT_CMP_EQ, + .op = priv->inv ? NFT_CMP_NEQ : NFT_CMP_EQ, }; return __nft_cmp_offload(ctx, flow, &cmp); @@ -210,11 +210,12 @@ static int nft_cmp_fast_offload(struct nft_offload_ctx *ctx, static int nft_cmp_fast_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr); + enum nft_cmp_ops op = priv->inv ? NFT_CMP_NEQ : NFT_CMP_EQ; struct nft_data data; if (nft_dump_register(skb, NFTA_CMP_SREG, priv->sreg)) goto nla_put_failure; - if (nla_put_be32(skb, NFTA_CMP_OP, htonl(NFT_CMP_EQ))) + if (nla_put_be32(skb, NFTA_CMP_OP, htonl(op))) goto nla_put_failure; data.data[0] = priv->data; @@ -272,7 +273,7 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) goto err1; } - if (desc.len <= sizeof(u32) && op == NFT_CMP_EQ) + if (desc.len <= sizeof(u32) && (op == NFT_CMP_EQ || op == NFT_CMP_NEQ)) return &nft_cmp_fast_ops; return &nft_cmp_ops; diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 7a2e59638499..dcd3c7b8a367 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -22,6 +22,7 @@ #include <linux/icmpv6.h> #include <linux/ip.h> #include <linux/ipv6.h> +#include <net/sctp/checksum.h> static bool nft_payload_rebuild_vlan_hdr(const struct sk_buff *skb, int mac_off, struct vlan_ethhdr *veth) @@ -484,6 +485,19 @@ static int nft_payload_l4csum_offset(const struct nft_pktinfo *pkt, return 0; } +static int nft_payload_csum_sctp(struct sk_buff *skb, int offset) +{ + struct sctphdr *sh; + + if (skb_ensure_writable(skb, offset + sizeof(*sh))) + return -1; + + sh = (struct sctphdr *)(skb->data + offset); + sh->checksum = sctp_compute_cksum(skb, offset); + skb->ip_summed = CHECKSUM_UNNECESSARY; + return 0; +} + static int nft_payload_l4csum_update(const struct nft_pktinfo *pkt, struct sk_buff *skb, __wsum fsum, __wsum tsum) @@ -587,6 +601,13 @@ static void nft_payload_set_eval(const struct nft_expr *expr, skb_store_bits(skb, offset, src, priv->len) < 0) goto err; + if (priv->csum_type == NFT_PAYLOAD_CSUM_SCTP && + pkt->tprot == IPPROTO_SCTP && + skb->ip_summed != CHECKSUM_PARTIAL) { + if (nft_payload_csum_sctp(skb, pkt->xt.thoff)) + goto err; + } + return; err: regs->verdict.code = NFT_BREAK; @@ -623,6 +644,13 @@ static int nft_payload_set_init(const struct nft_ctx *ctx, case NFT_PAYLOAD_CSUM_NONE: case NFT_PAYLOAD_CSUM_INET: break; + case NFT_PAYLOAD_CSUM_SCTP: + if (priv->base != NFT_PAYLOAD_TRANSPORT_HEADER) + return -EINVAL; + + if (priv->csum_offset != offsetof(struct sctphdr, checksum)) + return -EINVAL; + break; default: return -EOPNOTSUPP; } diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index 637ce3e8c575..a28aca5124ce 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -14,6 +14,25 @@ struct nft_socket { }; }; +static void nft_socket_wildcard(const struct nft_pktinfo *pkt, + struct nft_regs *regs, struct sock *sk, + u32 *dest) +{ + switch (nft_pf(pkt)) { + case NFPROTO_IPV4: + nft_reg_store8(dest, inet_sk(sk)->inet_rcv_saddr == 0); + break; +#if IS_ENABLED(CONFIG_NF_TABLES_IPV6) + case NFPROTO_IPV6: + nft_reg_store8(dest, ipv6_addr_any(&sk->sk_v6_rcv_saddr)); + break; +#endif + default: + regs->verdict.code = NFT_BREAK; + return; + } +} + static void nft_socket_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -59,6 +78,13 @@ static void nft_socket_eval(const struct nft_expr *expr, return; } break; + case NFT_SOCKET_WILDCARD: + if (!sk_fullsock(sk)) { + regs->verdict.code = NFT_BREAK; + return; + } + nft_socket_wildcard(pkt, regs, sk, dest); + break; default: WARN_ON(1); regs->verdict.code = NFT_BREAK; @@ -97,6 +123,7 @@ static int nft_socket_init(const struct nft_ctx *ctx, priv->key = ntohl(nla_get_u32(tb[NFTA_SOCKET_KEY])); switch(priv->key) { case NFT_SOCKET_TRANSPARENT: + case NFT_SOCKET_WILDCARD: len = sizeof(u8); break; case NFT_SOCKET_MARK: diff --git a/net/netfilter/xt_HMARK.c b/net/netfilter/xt_HMARK.c index 713fb38541df..8928ec56c388 100644 --- a/net/netfilter/xt_HMARK.c +++ b/net/netfilter/xt_HMARK.c @@ -276,7 +276,7 @@ hmark_pkt_set_htuple_ipv4(const struct sk_buff *skb, struct hmark_tuple *t, return 0; /* follow-up fragments don't contain ports, skip all fragments */ - if (ip->frag_off & htons(IP_MF | IP_OFFSET)) + if (ip_is_fragment(ip)) return 0; hmark_set_tuple_ports(skb, (ip->ihl * 4) + nhoff, t, info); |